module @module {
  util.global private @__auto.token_embd.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.token_embd.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xf16>
  util.global private @__auto.blk.0.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.0.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.0.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.0.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.1.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.1.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.1.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.2.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.2.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.2.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.3.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.3.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.3.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.4.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.4.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.4.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.5.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.5.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.5.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.6.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.6.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.6.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.7.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.7.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.7.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.8.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.8.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.8.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.9.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.9.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.9.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.10.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.10.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.10.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.11.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.11.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.11.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.12.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.12.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.12.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.13.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.13.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.13.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.14.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.14.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.14.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.15.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.15.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.15.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.16.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.16.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.16.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.17.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.17.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.17.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.18.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.18.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.18.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.19.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.19.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.19.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.20.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.20.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.20.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.21.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.21.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.21.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.22.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.22.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.22.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.23.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.23.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.23.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.24.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.24.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.24.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.25.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.25.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.25.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.26.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.26.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.26.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.27.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.27.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.27.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.28.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.28.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.28.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.29.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.29.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.29.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.30.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.30.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.30.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.attn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.attn_q.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.0"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.1"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.2"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.3"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.4"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.5"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.6"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_q.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.attn_q.weight.shard.7"> : tensor<512x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_k.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.attn_k.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.0"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.1"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.2"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.3"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.4"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.5"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.6"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_v.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.attn_v.weight.shard.7"> : tensor<128x4096xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.0"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.1"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.2"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.3"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.4"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.5"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.6"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.attn_output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.attn_output.weight.shard.7"> : tensor<4096x512xf16>
  util.global private @__auto.blk.31.ffn_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_gate.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.0"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.1"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.2"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.3"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.4"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.5"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.6"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_up.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.ffn_up.weight.shard.7"> : tensor<1792x4096xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.0"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.1"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.2"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.3"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.4"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.5"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.6"> : tensor<4096x1792xf16>
  util.global private @__auto.blk.31.ffn_down.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"blk.31.ffn_down.weight.shard.7"> : tensor<4096x1792xf16>
  util.global private @__auto.output_norm.weight {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output_norm.weight$7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xf32>
  util.global private @__auto.output.weight.shard.0 {iree.abi.affinity = #hal.device.promise<@__device_0>} = #stream.parameter.named<"model"::"output.weight.shard.0"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.1 {iree.abi.affinity = #hal.device.promise<@__device_1>} = #stream.parameter.named<"model"::"output.weight.shard.1"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.2 {iree.abi.affinity = #hal.device.promise<@__device_2>} = #stream.parameter.named<"model"::"output.weight.shard.2"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.3 {iree.abi.affinity = #hal.device.promise<@__device_3>} = #stream.parameter.named<"model"::"output.weight.shard.3"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.4 {iree.abi.affinity = #hal.device.promise<@__device_4>} = #stream.parameter.named<"model"::"output.weight.shard.4"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.5 {iree.abi.affinity = #hal.device.promise<@__device_5>} = #stream.parameter.named<"model"::"output.weight.shard.5"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.6 {iree.abi.affinity = #hal.device.promise<@__device_6>} = #stream.parameter.named<"model"::"output.weight.shard.6"> : tensor<128256x512xf16>
  util.global private @__auto.output.weight.shard.7 {iree.abi.affinity = #hal.device.promise<@__device_7>} = #stream.parameter.named<"model"::"output.weight.shard.7"> : tensor<128256x512xf16>
  func.func @prefill_bs4(%arg0: !torch.vtensor<[4,?],si64> {iree.abi.affinity = #hal.device.promise<@__device_0>}, %arg1: !torch.vtensor<[4],si64> {iree.abi.affinity = #hal.device.promise<@__device_0>}, %arg2: !torch.vtensor<[4,?],si64> {iree.abi.affinity = #hal.device.promise<@__device_0>}, %arg3: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_0>}, %arg4: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_1>}, %arg5: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_2>}, %arg6: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_3>}, %arg7: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_4>}, %arg8: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_5>}, %arg9: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_6>}, %arg10: !torch.tensor<[?,131072],f16> {iree.abi.affinity = #hal.device.promise<@__device_7>}) -> !torch.vtensor<[4,?,128256],f16> attributes {torch.assume_strict_symbolic_shapes} {
    %__auto.token_embd.weight = util.global.load @__auto.token_embd.weight : tensor<128256x4096xf16>
    %0 = torch_c.from_builtin_tensor %__auto.token_embd.weight : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$1 = util.global.load @__auto.token_embd.weight$1 : tensor<128256x4096xf16>
    %1 = torch_c.from_builtin_tensor %__auto.token_embd.weight$1 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$2 = util.global.load @__auto.token_embd.weight$2 : tensor<128256x4096xf16>
    %2 = torch_c.from_builtin_tensor %__auto.token_embd.weight$2 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$3 = util.global.load @__auto.token_embd.weight$3 : tensor<128256x4096xf16>
    %3 = torch_c.from_builtin_tensor %__auto.token_embd.weight$3 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$4 = util.global.load @__auto.token_embd.weight$4 : tensor<128256x4096xf16>
    %4 = torch_c.from_builtin_tensor %__auto.token_embd.weight$4 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$5 = util.global.load @__auto.token_embd.weight$5 : tensor<128256x4096xf16>
    %5 = torch_c.from_builtin_tensor %__auto.token_embd.weight$5 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$6 = util.global.load @__auto.token_embd.weight$6 : tensor<128256x4096xf16>
    %6 = torch_c.from_builtin_tensor %__auto.token_embd.weight$6 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.token_embd.weight$7 = util.global.load @__auto.token_embd.weight$7 : tensor<128256x4096xf16>
    %7 = torch_c.from_builtin_tensor %__auto.token_embd.weight$7 : tensor<128256x4096xf16> -> !torch.vtensor<[128256,4096],f16>
    %__auto.blk.0.attn_norm.weight = util.global.load @__auto.blk.0.attn_norm.weight : tensor<4096xf32>
    %8 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$1 = util.global.load @__auto.blk.0.attn_norm.weight$1 : tensor<4096xf32>
    %9 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$2 = util.global.load @__auto.blk.0.attn_norm.weight$2 : tensor<4096xf32>
    %10 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$3 = util.global.load @__auto.blk.0.attn_norm.weight$3 : tensor<4096xf32>
    %11 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$4 = util.global.load @__auto.blk.0.attn_norm.weight$4 : tensor<4096xf32>
    %12 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$5 = util.global.load @__auto.blk.0.attn_norm.weight$5 : tensor<4096xf32>
    %13 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$6 = util.global.load @__auto.blk.0.attn_norm.weight$6 : tensor<4096xf32>
    %14 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_norm.weight$7 = util.global.load @__auto.blk.0.attn_norm.weight$7 : tensor<4096xf32>
    %15 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.attn_q.weight.shard.0 = util.global.load @__auto.blk.0.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %16 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.1 = util.global.load @__auto.blk.0.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %17 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.2 = util.global.load @__auto.blk.0.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %18 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.3 = util.global.load @__auto.blk.0.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %19 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.4 = util.global.load @__auto.blk.0.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %20 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.5 = util.global.load @__auto.blk.0.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %21 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.6 = util.global.load @__auto.blk.0.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %22 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_q.weight.shard.7 = util.global.load @__auto.blk.0.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %23 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.0 = util.global.load @__auto.blk.0.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %24 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.1 = util.global.load @__auto.blk.0.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %25 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.2 = util.global.load @__auto.blk.0.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %26 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.3 = util.global.load @__auto.blk.0.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %27 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.4 = util.global.load @__auto.blk.0.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %28 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.5 = util.global.load @__auto.blk.0.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %29 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.6 = util.global.load @__auto.blk.0.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %30 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_k.weight.shard.7 = util.global.load @__auto.blk.0.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %31 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.0 = util.global.load @__auto.blk.0.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %32 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.1 = util.global.load @__auto.blk.0.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %33 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.2 = util.global.load @__auto.blk.0.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %34 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.3 = util.global.load @__auto.blk.0.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %35 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.4 = util.global.load @__auto.blk.0.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %36 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.5 = util.global.load @__auto.blk.0.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %37 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.6 = util.global.load @__auto.blk.0.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %38 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_v.weight.shard.7 = util.global.load @__auto.blk.0.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %39 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.0.attn_output.weight.shard.0 = util.global.load @__auto.blk.0.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %40 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.1 = util.global.load @__auto.blk.0.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %41 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.2 = util.global.load @__auto.blk.0.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %42 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.3 = util.global.load @__auto.blk.0.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %43 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.4 = util.global.load @__auto.blk.0.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %44 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.5 = util.global.load @__auto.blk.0.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %45 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.6 = util.global.load @__auto.blk.0.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %46 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.attn_output.weight.shard.7 = util.global.load @__auto.blk.0.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %47 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.0.ffn_norm.weight = util.global.load @__auto.blk.0.ffn_norm.weight : tensor<4096xf32>
    %48 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$1 = util.global.load @__auto.blk.0.ffn_norm.weight$1 : tensor<4096xf32>
    %49 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$2 = util.global.load @__auto.blk.0.ffn_norm.weight$2 : tensor<4096xf32>
    %50 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$3 = util.global.load @__auto.blk.0.ffn_norm.weight$3 : tensor<4096xf32>
    %51 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$4 = util.global.load @__auto.blk.0.ffn_norm.weight$4 : tensor<4096xf32>
    %52 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$5 = util.global.load @__auto.blk.0.ffn_norm.weight$5 : tensor<4096xf32>
    %53 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$6 = util.global.load @__auto.blk.0.ffn_norm.weight$6 : tensor<4096xf32>
    %54 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_norm.weight$7 = util.global.load @__auto.blk.0.ffn_norm.weight$7 : tensor<4096xf32>
    %55 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.0.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %56 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %57 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %58 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %59 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %60 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %61 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %62 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.0.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %63 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.0 = util.global.load @__auto.blk.0.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %64 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.1 = util.global.load @__auto.blk.0.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %65 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.2 = util.global.load @__auto.blk.0.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %66 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.3 = util.global.load @__auto.blk.0.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %67 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.4 = util.global.load @__auto.blk.0.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %68 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.5 = util.global.load @__auto.blk.0.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %69 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.6 = util.global.load @__auto.blk.0.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %70 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_up.weight.shard.7 = util.global.load @__auto.blk.0.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %71 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.0.ffn_down.weight.shard.0 = util.global.load @__auto.blk.0.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %72 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.1 = util.global.load @__auto.blk.0.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %73 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.2 = util.global.load @__auto.blk.0.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %74 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.3 = util.global.load @__auto.blk.0.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %75 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.4 = util.global.load @__auto.blk.0.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %76 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.5 = util.global.load @__auto.blk.0.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %77 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.6 = util.global.load @__auto.blk.0.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %78 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.0.ffn_down.weight.shard.7 = util.global.load @__auto.blk.0.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %79 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.attn_norm.weight = util.global.load @__auto.blk.1.attn_norm.weight : tensor<4096xf32>
    %80 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$1 = util.global.load @__auto.blk.1.attn_norm.weight$1 : tensor<4096xf32>
    %81 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$2 = util.global.load @__auto.blk.1.attn_norm.weight$2 : tensor<4096xf32>
    %82 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$3 = util.global.load @__auto.blk.1.attn_norm.weight$3 : tensor<4096xf32>
    %83 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$4 = util.global.load @__auto.blk.1.attn_norm.weight$4 : tensor<4096xf32>
    %84 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$5 = util.global.load @__auto.blk.1.attn_norm.weight$5 : tensor<4096xf32>
    %85 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$6 = util.global.load @__auto.blk.1.attn_norm.weight$6 : tensor<4096xf32>
    %86 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_norm.weight$7 = util.global.load @__auto.blk.1.attn_norm.weight$7 : tensor<4096xf32>
    %87 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.attn_q.weight.shard.0 = util.global.load @__auto.blk.1.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %88 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.1 = util.global.load @__auto.blk.1.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %89 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.2 = util.global.load @__auto.blk.1.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %90 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.3 = util.global.load @__auto.blk.1.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %91 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.4 = util.global.load @__auto.blk.1.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %92 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.5 = util.global.load @__auto.blk.1.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %93 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.6 = util.global.load @__auto.blk.1.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %94 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_q.weight.shard.7 = util.global.load @__auto.blk.1.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %95 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.0 = util.global.load @__auto.blk.1.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %96 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.1 = util.global.load @__auto.blk.1.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %97 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.2 = util.global.load @__auto.blk.1.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %98 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.3 = util.global.load @__auto.blk.1.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %99 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.4 = util.global.load @__auto.blk.1.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %100 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.5 = util.global.load @__auto.blk.1.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %101 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.6 = util.global.load @__auto.blk.1.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %102 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_k.weight.shard.7 = util.global.load @__auto.blk.1.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %103 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.0 = util.global.load @__auto.blk.1.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %104 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.1 = util.global.load @__auto.blk.1.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %105 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.2 = util.global.load @__auto.blk.1.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %106 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.3 = util.global.load @__auto.blk.1.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %107 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.4 = util.global.load @__auto.blk.1.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %108 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.5 = util.global.load @__auto.blk.1.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %109 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.6 = util.global.load @__auto.blk.1.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %110 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_v.weight.shard.7 = util.global.load @__auto.blk.1.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %111 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.1.attn_output.weight.shard.0 = util.global.load @__auto.blk.1.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %112 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.1 = util.global.load @__auto.blk.1.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %113 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.2 = util.global.load @__auto.blk.1.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %114 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.3 = util.global.load @__auto.blk.1.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %115 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.4 = util.global.load @__auto.blk.1.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %116 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.5 = util.global.load @__auto.blk.1.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %117 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.6 = util.global.load @__auto.blk.1.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %118 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.attn_output.weight.shard.7 = util.global.load @__auto.blk.1.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %119 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.1.ffn_norm.weight = util.global.load @__auto.blk.1.ffn_norm.weight : tensor<4096xf32>
    %120 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$1 = util.global.load @__auto.blk.1.ffn_norm.weight$1 : tensor<4096xf32>
    %121 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$2 = util.global.load @__auto.blk.1.ffn_norm.weight$2 : tensor<4096xf32>
    %122 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$3 = util.global.load @__auto.blk.1.ffn_norm.weight$3 : tensor<4096xf32>
    %123 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$4 = util.global.load @__auto.blk.1.ffn_norm.weight$4 : tensor<4096xf32>
    %124 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$5 = util.global.load @__auto.blk.1.ffn_norm.weight$5 : tensor<4096xf32>
    %125 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$6 = util.global.load @__auto.blk.1.ffn_norm.weight$6 : tensor<4096xf32>
    %126 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_norm.weight$7 = util.global.load @__auto.blk.1.ffn_norm.weight$7 : tensor<4096xf32>
    %127 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.1.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %128 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %129 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %130 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %131 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %132 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %133 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %134 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.1.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %135 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.0 = util.global.load @__auto.blk.1.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %136 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.1 = util.global.load @__auto.blk.1.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %137 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.2 = util.global.load @__auto.blk.1.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %138 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.3 = util.global.load @__auto.blk.1.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %139 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.4 = util.global.load @__auto.blk.1.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %140 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.5 = util.global.load @__auto.blk.1.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %141 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.6 = util.global.load @__auto.blk.1.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %142 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_up.weight.shard.7 = util.global.load @__auto.blk.1.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %143 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.1.ffn_down.weight.shard.0 = util.global.load @__auto.blk.1.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %144 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.1 = util.global.load @__auto.blk.1.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %145 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.2 = util.global.load @__auto.blk.1.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %146 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.3 = util.global.load @__auto.blk.1.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %147 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.4 = util.global.load @__auto.blk.1.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %148 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.5 = util.global.load @__auto.blk.1.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %149 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.6 = util.global.load @__auto.blk.1.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %150 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.1.ffn_down.weight.shard.7 = util.global.load @__auto.blk.1.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %151 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.attn_norm.weight = util.global.load @__auto.blk.2.attn_norm.weight : tensor<4096xf32>
    %152 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$1 = util.global.load @__auto.blk.2.attn_norm.weight$1 : tensor<4096xf32>
    %153 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$2 = util.global.load @__auto.blk.2.attn_norm.weight$2 : tensor<4096xf32>
    %154 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$3 = util.global.load @__auto.blk.2.attn_norm.weight$3 : tensor<4096xf32>
    %155 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$4 = util.global.load @__auto.blk.2.attn_norm.weight$4 : tensor<4096xf32>
    %156 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$5 = util.global.load @__auto.blk.2.attn_norm.weight$5 : tensor<4096xf32>
    %157 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$6 = util.global.load @__auto.blk.2.attn_norm.weight$6 : tensor<4096xf32>
    %158 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_norm.weight$7 = util.global.load @__auto.blk.2.attn_norm.weight$7 : tensor<4096xf32>
    %159 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.attn_q.weight.shard.0 = util.global.load @__auto.blk.2.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %160 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.1 = util.global.load @__auto.blk.2.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %161 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.2 = util.global.load @__auto.blk.2.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %162 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.3 = util.global.load @__auto.blk.2.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %163 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.4 = util.global.load @__auto.blk.2.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %164 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.5 = util.global.load @__auto.blk.2.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %165 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.6 = util.global.load @__auto.blk.2.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %166 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_q.weight.shard.7 = util.global.load @__auto.blk.2.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %167 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.0 = util.global.load @__auto.blk.2.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %168 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.1 = util.global.load @__auto.blk.2.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %169 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.2 = util.global.load @__auto.blk.2.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %170 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.3 = util.global.load @__auto.blk.2.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %171 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.4 = util.global.load @__auto.blk.2.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %172 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.5 = util.global.load @__auto.blk.2.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %173 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.6 = util.global.load @__auto.blk.2.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %174 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_k.weight.shard.7 = util.global.load @__auto.blk.2.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %175 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.0 = util.global.load @__auto.blk.2.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %176 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.1 = util.global.load @__auto.blk.2.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %177 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.2 = util.global.load @__auto.blk.2.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %178 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.3 = util.global.load @__auto.blk.2.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %179 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.4 = util.global.load @__auto.blk.2.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %180 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.5 = util.global.load @__auto.blk.2.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %181 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.6 = util.global.load @__auto.blk.2.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %182 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_v.weight.shard.7 = util.global.load @__auto.blk.2.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %183 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.2.attn_output.weight.shard.0 = util.global.load @__auto.blk.2.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %184 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.1 = util.global.load @__auto.blk.2.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %185 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.2 = util.global.load @__auto.blk.2.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %186 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.3 = util.global.load @__auto.blk.2.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %187 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.4 = util.global.load @__auto.blk.2.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %188 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.5 = util.global.load @__auto.blk.2.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %189 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.6 = util.global.load @__auto.blk.2.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %190 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.attn_output.weight.shard.7 = util.global.load @__auto.blk.2.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %191 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.2.ffn_norm.weight = util.global.load @__auto.blk.2.ffn_norm.weight : tensor<4096xf32>
    %192 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$1 = util.global.load @__auto.blk.2.ffn_norm.weight$1 : tensor<4096xf32>
    %193 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$2 = util.global.load @__auto.blk.2.ffn_norm.weight$2 : tensor<4096xf32>
    %194 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$3 = util.global.load @__auto.blk.2.ffn_norm.weight$3 : tensor<4096xf32>
    %195 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$4 = util.global.load @__auto.blk.2.ffn_norm.weight$4 : tensor<4096xf32>
    %196 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$5 = util.global.load @__auto.blk.2.ffn_norm.weight$5 : tensor<4096xf32>
    %197 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$6 = util.global.load @__auto.blk.2.ffn_norm.weight$6 : tensor<4096xf32>
    %198 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_norm.weight$7 = util.global.load @__auto.blk.2.ffn_norm.weight$7 : tensor<4096xf32>
    %199 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.2.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %200 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %201 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %202 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %203 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %204 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %205 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %206 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.2.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %207 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.0 = util.global.load @__auto.blk.2.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %208 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.1 = util.global.load @__auto.blk.2.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %209 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.2 = util.global.load @__auto.blk.2.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %210 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.3 = util.global.load @__auto.blk.2.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %211 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.4 = util.global.load @__auto.blk.2.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %212 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.5 = util.global.load @__auto.blk.2.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %213 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.6 = util.global.load @__auto.blk.2.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %214 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_up.weight.shard.7 = util.global.load @__auto.blk.2.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %215 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.2.ffn_down.weight.shard.0 = util.global.load @__auto.blk.2.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %216 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.1 = util.global.load @__auto.blk.2.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %217 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.2 = util.global.load @__auto.blk.2.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %218 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.3 = util.global.load @__auto.blk.2.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %219 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.4 = util.global.load @__auto.blk.2.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %220 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.5 = util.global.load @__auto.blk.2.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %221 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.6 = util.global.load @__auto.blk.2.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %222 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.2.ffn_down.weight.shard.7 = util.global.load @__auto.blk.2.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %223 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.attn_norm.weight = util.global.load @__auto.blk.3.attn_norm.weight : tensor<4096xf32>
    %224 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$1 = util.global.load @__auto.blk.3.attn_norm.weight$1 : tensor<4096xf32>
    %225 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$2 = util.global.load @__auto.blk.3.attn_norm.weight$2 : tensor<4096xf32>
    %226 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$3 = util.global.load @__auto.blk.3.attn_norm.weight$3 : tensor<4096xf32>
    %227 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$4 = util.global.load @__auto.blk.3.attn_norm.weight$4 : tensor<4096xf32>
    %228 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$5 = util.global.load @__auto.blk.3.attn_norm.weight$5 : tensor<4096xf32>
    %229 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$6 = util.global.load @__auto.blk.3.attn_norm.weight$6 : tensor<4096xf32>
    %230 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_norm.weight$7 = util.global.load @__auto.blk.3.attn_norm.weight$7 : tensor<4096xf32>
    %231 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.attn_q.weight.shard.0 = util.global.load @__auto.blk.3.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %232 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.1 = util.global.load @__auto.blk.3.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %233 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.2 = util.global.load @__auto.blk.3.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %234 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.3 = util.global.load @__auto.blk.3.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %235 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.4 = util.global.load @__auto.blk.3.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %236 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.5 = util.global.load @__auto.blk.3.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %237 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.6 = util.global.load @__auto.blk.3.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %238 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_q.weight.shard.7 = util.global.load @__auto.blk.3.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %239 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.0 = util.global.load @__auto.blk.3.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %240 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.1 = util.global.load @__auto.blk.3.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %241 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.2 = util.global.load @__auto.blk.3.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %242 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.3 = util.global.load @__auto.blk.3.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %243 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.4 = util.global.load @__auto.blk.3.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %244 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.5 = util.global.load @__auto.blk.3.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %245 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.6 = util.global.load @__auto.blk.3.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %246 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_k.weight.shard.7 = util.global.load @__auto.blk.3.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %247 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.0 = util.global.load @__auto.blk.3.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %248 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.1 = util.global.load @__auto.blk.3.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %249 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.2 = util.global.load @__auto.blk.3.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %250 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.3 = util.global.load @__auto.blk.3.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %251 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.4 = util.global.load @__auto.blk.3.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %252 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.5 = util.global.load @__auto.blk.3.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %253 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.6 = util.global.load @__auto.blk.3.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %254 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_v.weight.shard.7 = util.global.load @__auto.blk.3.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %255 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.3.attn_output.weight.shard.0 = util.global.load @__auto.blk.3.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %256 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.1 = util.global.load @__auto.blk.3.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %257 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.2 = util.global.load @__auto.blk.3.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %258 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.3 = util.global.load @__auto.blk.3.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %259 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.4 = util.global.load @__auto.blk.3.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %260 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.5 = util.global.load @__auto.blk.3.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %261 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.6 = util.global.load @__auto.blk.3.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %262 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.attn_output.weight.shard.7 = util.global.load @__auto.blk.3.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %263 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.3.ffn_norm.weight = util.global.load @__auto.blk.3.ffn_norm.weight : tensor<4096xf32>
    %264 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$1 = util.global.load @__auto.blk.3.ffn_norm.weight$1 : tensor<4096xf32>
    %265 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$2 = util.global.load @__auto.blk.3.ffn_norm.weight$2 : tensor<4096xf32>
    %266 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$3 = util.global.load @__auto.blk.3.ffn_norm.weight$3 : tensor<4096xf32>
    %267 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$4 = util.global.load @__auto.blk.3.ffn_norm.weight$4 : tensor<4096xf32>
    %268 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$5 = util.global.load @__auto.blk.3.ffn_norm.weight$5 : tensor<4096xf32>
    %269 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$6 = util.global.load @__auto.blk.3.ffn_norm.weight$6 : tensor<4096xf32>
    %270 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_norm.weight$7 = util.global.load @__auto.blk.3.ffn_norm.weight$7 : tensor<4096xf32>
    %271 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.3.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %272 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %273 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %274 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %275 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %276 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %277 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %278 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.3.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %279 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.0 = util.global.load @__auto.blk.3.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %280 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.1 = util.global.load @__auto.blk.3.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %281 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.2 = util.global.load @__auto.blk.3.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %282 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.3 = util.global.load @__auto.blk.3.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %283 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.4 = util.global.load @__auto.blk.3.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %284 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.5 = util.global.load @__auto.blk.3.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %285 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.6 = util.global.load @__auto.blk.3.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %286 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_up.weight.shard.7 = util.global.load @__auto.blk.3.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %287 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.3.ffn_down.weight.shard.0 = util.global.load @__auto.blk.3.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %288 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.1 = util.global.load @__auto.blk.3.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %289 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.2 = util.global.load @__auto.blk.3.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %290 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.3 = util.global.load @__auto.blk.3.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %291 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.4 = util.global.load @__auto.blk.3.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %292 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.5 = util.global.load @__auto.blk.3.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %293 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.6 = util.global.load @__auto.blk.3.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %294 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.3.ffn_down.weight.shard.7 = util.global.load @__auto.blk.3.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %295 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.attn_norm.weight = util.global.load @__auto.blk.4.attn_norm.weight : tensor<4096xf32>
    %296 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$1 = util.global.load @__auto.blk.4.attn_norm.weight$1 : tensor<4096xf32>
    %297 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$2 = util.global.load @__auto.blk.4.attn_norm.weight$2 : tensor<4096xf32>
    %298 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$3 = util.global.load @__auto.blk.4.attn_norm.weight$3 : tensor<4096xf32>
    %299 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$4 = util.global.load @__auto.blk.4.attn_norm.weight$4 : tensor<4096xf32>
    %300 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$5 = util.global.load @__auto.blk.4.attn_norm.weight$5 : tensor<4096xf32>
    %301 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$6 = util.global.load @__auto.blk.4.attn_norm.weight$6 : tensor<4096xf32>
    %302 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_norm.weight$7 = util.global.load @__auto.blk.4.attn_norm.weight$7 : tensor<4096xf32>
    %303 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.attn_q.weight.shard.0 = util.global.load @__auto.blk.4.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %304 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.1 = util.global.load @__auto.blk.4.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %305 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.2 = util.global.load @__auto.blk.4.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %306 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.3 = util.global.load @__auto.blk.4.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %307 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.4 = util.global.load @__auto.blk.4.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %308 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.5 = util.global.load @__auto.blk.4.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %309 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.6 = util.global.load @__auto.blk.4.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %310 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_q.weight.shard.7 = util.global.load @__auto.blk.4.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %311 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.0 = util.global.load @__auto.blk.4.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %312 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.1 = util.global.load @__auto.blk.4.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %313 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.2 = util.global.load @__auto.blk.4.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %314 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.3 = util.global.load @__auto.blk.4.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %315 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.4 = util.global.load @__auto.blk.4.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %316 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.5 = util.global.load @__auto.blk.4.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %317 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.6 = util.global.load @__auto.blk.4.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %318 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_k.weight.shard.7 = util.global.load @__auto.blk.4.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %319 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.0 = util.global.load @__auto.blk.4.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %320 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.1 = util.global.load @__auto.blk.4.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %321 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.2 = util.global.load @__auto.blk.4.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %322 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.3 = util.global.load @__auto.blk.4.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %323 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.4 = util.global.load @__auto.blk.4.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %324 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.5 = util.global.load @__auto.blk.4.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %325 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.6 = util.global.load @__auto.blk.4.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %326 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_v.weight.shard.7 = util.global.load @__auto.blk.4.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %327 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.4.attn_output.weight.shard.0 = util.global.load @__auto.blk.4.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %328 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.1 = util.global.load @__auto.blk.4.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %329 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.2 = util.global.load @__auto.blk.4.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %330 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.3 = util.global.load @__auto.blk.4.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %331 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.4 = util.global.load @__auto.blk.4.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %332 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.5 = util.global.load @__auto.blk.4.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %333 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.6 = util.global.load @__auto.blk.4.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %334 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.attn_output.weight.shard.7 = util.global.load @__auto.blk.4.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %335 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.4.ffn_norm.weight = util.global.load @__auto.blk.4.ffn_norm.weight : tensor<4096xf32>
    %336 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$1 = util.global.load @__auto.blk.4.ffn_norm.weight$1 : tensor<4096xf32>
    %337 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$2 = util.global.load @__auto.blk.4.ffn_norm.weight$2 : tensor<4096xf32>
    %338 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$3 = util.global.load @__auto.blk.4.ffn_norm.weight$3 : tensor<4096xf32>
    %339 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$4 = util.global.load @__auto.blk.4.ffn_norm.weight$4 : tensor<4096xf32>
    %340 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$5 = util.global.load @__auto.blk.4.ffn_norm.weight$5 : tensor<4096xf32>
    %341 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$6 = util.global.load @__auto.blk.4.ffn_norm.weight$6 : tensor<4096xf32>
    %342 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_norm.weight$7 = util.global.load @__auto.blk.4.ffn_norm.weight$7 : tensor<4096xf32>
    %343 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.4.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %344 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %345 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %346 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %347 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %348 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %349 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %350 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.4.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %351 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.0 = util.global.load @__auto.blk.4.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %352 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.1 = util.global.load @__auto.blk.4.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %353 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.2 = util.global.load @__auto.blk.4.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %354 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.3 = util.global.load @__auto.blk.4.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %355 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.4 = util.global.load @__auto.blk.4.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %356 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.5 = util.global.load @__auto.blk.4.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %357 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.6 = util.global.load @__auto.blk.4.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %358 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_up.weight.shard.7 = util.global.load @__auto.blk.4.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %359 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.4.ffn_down.weight.shard.0 = util.global.load @__auto.blk.4.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %360 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.1 = util.global.load @__auto.blk.4.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %361 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.2 = util.global.load @__auto.blk.4.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %362 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.3 = util.global.load @__auto.blk.4.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %363 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.4 = util.global.load @__auto.blk.4.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %364 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.5 = util.global.load @__auto.blk.4.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %365 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.6 = util.global.load @__auto.blk.4.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %366 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.4.ffn_down.weight.shard.7 = util.global.load @__auto.blk.4.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %367 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.attn_norm.weight = util.global.load @__auto.blk.5.attn_norm.weight : tensor<4096xf32>
    %368 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$1 = util.global.load @__auto.blk.5.attn_norm.weight$1 : tensor<4096xf32>
    %369 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$2 = util.global.load @__auto.blk.5.attn_norm.weight$2 : tensor<4096xf32>
    %370 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$3 = util.global.load @__auto.blk.5.attn_norm.weight$3 : tensor<4096xf32>
    %371 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$4 = util.global.load @__auto.blk.5.attn_norm.weight$4 : tensor<4096xf32>
    %372 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$5 = util.global.load @__auto.blk.5.attn_norm.weight$5 : tensor<4096xf32>
    %373 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$6 = util.global.load @__auto.blk.5.attn_norm.weight$6 : tensor<4096xf32>
    %374 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_norm.weight$7 = util.global.load @__auto.blk.5.attn_norm.weight$7 : tensor<4096xf32>
    %375 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.attn_q.weight.shard.0 = util.global.load @__auto.blk.5.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %376 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.1 = util.global.load @__auto.blk.5.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %377 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.2 = util.global.load @__auto.blk.5.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %378 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.3 = util.global.load @__auto.blk.5.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %379 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.4 = util.global.load @__auto.blk.5.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %380 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.5 = util.global.load @__auto.blk.5.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %381 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.6 = util.global.load @__auto.blk.5.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %382 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_q.weight.shard.7 = util.global.load @__auto.blk.5.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %383 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.0 = util.global.load @__auto.blk.5.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %384 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.1 = util.global.load @__auto.blk.5.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %385 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.2 = util.global.load @__auto.blk.5.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %386 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.3 = util.global.load @__auto.blk.5.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %387 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.4 = util.global.load @__auto.blk.5.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %388 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.5 = util.global.load @__auto.blk.5.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %389 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.6 = util.global.load @__auto.blk.5.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %390 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_k.weight.shard.7 = util.global.load @__auto.blk.5.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %391 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.0 = util.global.load @__auto.blk.5.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %392 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.1 = util.global.load @__auto.blk.5.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %393 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.2 = util.global.load @__auto.blk.5.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %394 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.3 = util.global.load @__auto.blk.5.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %395 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.4 = util.global.load @__auto.blk.5.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %396 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.5 = util.global.load @__auto.blk.5.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %397 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.6 = util.global.load @__auto.blk.5.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %398 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_v.weight.shard.7 = util.global.load @__auto.blk.5.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %399 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.5.attn_output.weight.shard.0 = util.global.load @__auto.blk.5.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %400 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.1 = util.global.load @__auto.blk.5.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %401 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.2 = util.global.load @__auto.blk.5.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %402 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.3 = util.global.load @__auto.blk.5.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %403 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.4 = util.global.load @__auto.blk.5.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %404 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.5 = util.global.load @__auto.blk.5.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %405 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.6 = util.global.load @__auto.blk.5.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %406 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.attn_output.weight.shard.7 = util.global.load @__auto.blk.5.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %407 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.5.ffn_norm.weight = util.global.load @__auto.blk.5.ffn_norm.weight : tensor<4096xf32>
    %408 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$1 = util.global.load @__auto.blk.5.ffn_norm.weight$1 : tensor<4096xf32>
    %409 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$2 = util.global.load @__auto.blk.5.ffn_norm.weight$2 : tensor<4096xf32>
    %410 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$3 = util.global.load @__auto.blk.5.ffn_norm.weight$3 : tensor<4096xf32>
    %411 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$4 = util.global.load @__auto.blk.5.ffn_norm.weight$4 : tensor<4096xf32>
    %412 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$5 = util.global.load @__auto.blk.5.ffn_norm.weight$5 : tensor<4096xf32>
    %413 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$6 = util.global.load @__auto.blk.5.ffn_norm.weight$6 : tensor<4096xf32>
    %414 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_norm.weight$7 = util.global.load @__auto.blk.5.ffn_norm.weight$7 : tensor<4096xf32>
    %415 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.5.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %416 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %417 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %418 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %419 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %420 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %421 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %422 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.5.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %423 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.0 = util.global.load @__auto.blk.5.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %424 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.1 = util.global.load @__auto.blk.5.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %425 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.2 = util.global.load @__auto.blk.5.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %426 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.3 = util.global.load @__auto.blk.5.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %427 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.4 = util.global.load @__auto.blk.5.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %428 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.5 = util.global.load @__auto.blk.5.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %429 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.6 = util.global.load @__auto.blk.5.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %430 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_up.weight.shard.7 = util.global.load @__auto.blk.5.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %431 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.5.ffn_down.weight.shard.0 = util.global.load @__auto.blk.5.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %432 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.1 = util.global.load @__auto.blk.5.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %433 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.2 = util.global.load @__auto.blk.5.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %434 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.3 = util.global.load @__auto.blk.5.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %435 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.4 = util.global.load @__auto.blk.5.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %436 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.5 = util.global.load @__auto.blk.5.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %437 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.6 = util.global.load @__auto.blk.5.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %438 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.5.ffn_down.weight.shard.7 = util.global.load @__auto.blk.5.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %439 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.attn_norm.weight = util.global.load @__auto.blk.6.attn_norm.weight : tensor<4096xf32>
    %440 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$1 = util.global.load @__auto.blk.6.attn_norm.weight$1 : tensor<4096xf32>
    %441 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$2 = util.global.load @__auto.blk.6.attn_norm.weight$2 : tensor<4096xf32>
    %442 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$3 = util.global.load @__auto.blk.6.attn_norm.weight$3 : tensor<4096xf32>
    %443 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$4 = util.global.load @__auto.blk.6.attn_norm.weight$4 : tensor<4096xf32>
    %444 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$5 = util.global.load @__auto.blk.6.attn_norm.weight$5 : tensor<4096xf32>
    %445 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$6 = util.global.load @__auto.blk.6.attn_norm.weight$6 : tensor<4096xf32>
    %446 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_norm.weight$7 = util.global.load @__auto.blk.6.attn_norm.weight$7 : tensor<4096xf32>
    %447 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.attn_q.weight.shard.0 = util.global.load @__auto.blk.6.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %448 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.1 = util.global.load @__auto.blk.6.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %449 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.2 = util.global.load @__auto.blk.6.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %450 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.3 = util.global.load @__auto.blk.6.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %451 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.4 = util.global.load @__auto.blk.6.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %452 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.5 = util.global.load @__auto.blk.6.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %453 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.6 = util.global.load @__auto.blk.6.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %454 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_q.weight.shard.7 = util.global.load @__auto.blk.6.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %455 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.0 = util.global.load @__auto.blk.6.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %456 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.1 = util.global.load @__auto.blk.6.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %457 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.2 = util.global.load @__auto.blk.6.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %458 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.3 = util.global.load @__auto.blk.6.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %459 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.4 = util.global.load @__auto.blk.6.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %460 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.5 = util.global.load @__auto.blk.6.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %461 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.6 = util.global.load @__auto.blk.6.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %462 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_k.weight.shard.7 = util.global.load @__auto.blk.6.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %463 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.0 = util.global.load @__auto.blk.6.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %464 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.1 = util.global.load @__auto.blk.6.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %465 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.2 = util.global.load @__auto.blk.6.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %466 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.3 = util.global.load @__auto.blk.6.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %467 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.4 = util.global.load @__auto.blk.6.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %468 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.5 = util.global.load @__auto.blk.6.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %469 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.6 = util.global.load @__auto.blk.6.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %470 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_v.weight.shard.7 = util.global.load @__auto.blk.6.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %471 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.6.attn_output.weight.shard.0 = util.global.load @__auto.blk.6.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %472 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.1 = util.global.load @__auto.blk.6.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %473 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.2 = util.global.load @__auto.blk.6.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %474 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.3 = util.global.load @__auto.blk.6.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %475 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.4 = util.global.load @__auto.blk.6.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %476 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.5 = util.global.load @__auto.blk.6.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %477 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.6 = util.global.load @__auto.blk.6.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %478 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.attn_output.weight.shard.7 = util.global.load @__auto.blk.6.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %479 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.6.ffn_norm.weight = util.global.load @__auto.blk.6.ffn_norm.weight : tensor<4096xf32>
    %480 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$1 = util.global.load @__auto.blk.6.ffn_norm.weight$1 : tensor<4096xf32>
    %481 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$2 = util.global.load @__auto.blk.6.ffn_norm.weight$2 : tensor<4096xf32>
    %482 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$3 = util.global.load @__auto.blk.6.ffn_norm.weight$3 : tensor<4096xf32>
    %483 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$4 = util.global.load @__auto.blk.6.ffn_norm.weight$4 : tensor<4096xf32>
    %484 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$5 = util.global.load @__auto.blk.6.ffn_norm.weight$5 : tensor<4096xf32>
    %485 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$6 = util.global.load @__auto.blk.6.ffn_norm.weight$6 : tensor<4096xf32>
    %486 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_norm.weight$7 = util.global.load @__auto.blk.6.ffn_norm.weight$7 : tensor<4096xf32>
    %487 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.6.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %488 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %489 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %490 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %491 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %492 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %493 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %494 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.6.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %495 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.0 = util.global.load @__auto.blk.6.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %496 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.1 = util.global.load @__auto.blk.6.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %497 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.2 = util.global.load @__auto.blk.6.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %498 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.3 = util.global.load @__auto.blk.6.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %499 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.4 = util.global.load @__auto.blk.6.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %500 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.5 = util.global.load @__auto.blk.6.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %501 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.6 = util.global.load @__auto.blk.6.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %502 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_up.weight.shard.7 = util.global.load @__auto.blk.6.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %503 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.6.ffn_down.weight.shard.0 = util.global.load @__auto.blk.6.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %504 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.1 = util.global.load @__auto.blk.6.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %505 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.2 = util.global.load @__auto.blk.6.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %506 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.3 = util.global.load @__auto.blk.6.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %507 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.4 = util.global.load @__auto.blk.6.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %508 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.5 = util.global.load @__auto.blk.6.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %509 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.6 = util.global.load @__auto.blk.6.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %510 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.6.ffn_down.weight.shard.7 = util.global.load @__auto.blk.6.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %511 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.attn_norm.weight = util.global.load @__auto.blk.7.attn_norm.weight : tensor<4096xf32>
    %512 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$1 = util.global.load @__auto.blk.7.attn_norm.weight$1 : tensor<4096xf32>
    %513 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$2 = util.global.load @__auto.blk.7.attn_norm.weight$2 : tensor<4096xf32>
    %514 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$3 = util.global.load @__auto.blk.7.attn_norm.weight$3 : tensor<4096xf32>
    %515 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$4 = util.global.load @__auto.blk.7.attn_norm.weight$4 : tensor<4096xf32>
    %516 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$5 = util.global.load @__auto.blk.7.attn_norm.weight$5 : tensor<4096xf32>
    %517 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$6 = util.global.load @__auto.blk.7.attn_norm.weight$6 : tensor<4096xf32>
    %518 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_norm.weight$7 = util.global.load @__auto.blk.7.attn_norm.weight$7 : tensor<4096xf32>
    %519 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.attn_q.weight.shard.0 = util.global.load @__auto.blk.7.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %520 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.1 = util.global.load @__auto.blk.7.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %521 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.2 = util.global.load @__auto.blk.7.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %522 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.3 = util.global.load @__auto.blk.7.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %523 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.4 = util.global.load @__auto.blk.7.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %524 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.5 = util.global.load @__auto.blk.7.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %525 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.6 = util.global.load @__auto.blk.7.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %526 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_q.weight.shard.7 = util.global.load @__auto.blk.7.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %527 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.0 = util.global.load @__auto.blk.7.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %528 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.1 = util.global.load @__auto.blk.7.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %529 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.2 = util.global.load @__auto.blk.7.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %530 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.3 = util.global.load @__auto.blk.7.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %531 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.4 = util.global.load @__auto.blk.7.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %532 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.5 = util.global.load @__auto.blk.7.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %533 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.6 = util.global.load @__auto.blk.7.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %534 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_k.weight.shard.7 = util.global.load @__auto.blk.7.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %535 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.0 = util.global.load @__auto.blk.7.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %536 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.1 = util.global.load @__auto.blk.7.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %537 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.2 = util.global.load @__auto.blk.7.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %538 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.3 = util.global.load @__auto.blk.7.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %539 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.4 = util.global.load @__auto.blk.7.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %540 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.5 = util.global.load @__auto.blk.7.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %541 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.6 = util.global.load @__auto.blk.7.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %542 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_v.weight.shard.7 = util.global.load @__auto.blk.7.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %543 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.7.attn_output.weight.shard.0 = util.global.load @__auto.blk.7.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %544 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.1 = util.global.load @__auto.blk.7.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %545 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.2 = util.global.load @__auto.blk.7.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %546 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.3 = util.global.load @__auto.blk.7.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %547 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.4 = util.global.load @__auto.blk.7.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %548 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.5 = util.global.load @__auto.blk.7.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %549 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.6 = util.global.load @__auto.blk.7.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %550 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.attn_output.weight.shard.7 = util.global.load @__auto.blk.7.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %551 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.7.ffn_norm.weight = util.global.load @__auto.blk.7.ffn_norm.weight : tensor<4096xf32>
    %552 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$1 = util.global.load @__auto.blk.7.ffn_norm.weight$1 : tensor<4096xf32>
    %553 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$2 = util.global.load @__auto.blk.7.ffn_norm.weight$2 : tensor<4096xf32>
    %554 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$3 = util.global.load @__auto.blk.7.ffn_norm.weight$3 : tensor<4096xf32>
    %555 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$4 = util.global.load @__auto.blk.7.ffn_norm.weight$4 : tensor<4096xf32>
    %556 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$5 = util.global.load @__auto.blk.7.ffn_norm.weight$5 : tensor<4096xf32>
    %557 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$6 = util.global.load @__auto.blk.7.ffn_norm.weight$6 : tensor<4096xf32>
    %558 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_norm.weight$7 = util.global.load @__auto.blk.7.ffn_norm.weight$7 : tensor<4096xf32>
    %559 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.7.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %560 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %561 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %562 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %563 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %564 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %565 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %566 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.7.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %567 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.0 = util.global.load @__auto.blk.7.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %568 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.1 = util.global.load @__auto.blk.7.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %569 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.2 = util.global.load @__auto.blk.7.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %570 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.3 = util.global.load @__auto.blk.7.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %571 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.4 = util.global.load @__auto.blk.7.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %572 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.5 = util.global.load @__auto.blk.7.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %573 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.6 = util.global.load @__auto.blk.7.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %574 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_up.weight.shard.7 = util.global.load @__auto.blk.7.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %575 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.7.ffn_down.weight.shard.0 = util.global.load @__auto.blk.7.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %576 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.1 = util.global.load @__auto.blk.7.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %577 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.2 = util.global.load @__auto.blk.7.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %578 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.3 = util.global.load @__auto.blk.7.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %579 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.4 = util.global.load @__auto.blk.7.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %580 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.5 = util.global.load @__auto.blk.7.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %581 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.6 = util.global.load @__auto.blk.7.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %582 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.7.ffn_down.weight.shard.7 = util.global.load @__auto.blk.7.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %583 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.attn_norm.weight = util.global.load @__auto.blk.8.attn_norm.weight : tensor<4096xf32>
    %584 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$1 = util.global.load @__auto.blk.8.attn_norm.weight$1 : tensor<4096xf32>
    %585 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$2 = util.global.load @__auto.blk.8.attn_norm.weight$2 : tensor<4096xf32>
    %586 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$3 = util.global.load @__auto.blk.8.attn_norm.weight$3 : tensor<4096xf32>
    %587 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$4 = util.global.load @__auto.blk.8.attn_norm.weight$4 : tensor<4096xf32>
    %588 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$5 = util.global.load @__auto.blk.8.attn_norm.weight$5 : tensor<4096xf32>
    %589 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$6 = util.global.load @__auto.blk.8.attn_norm.weight$6 : tensor<4096xf32>
    %590 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_norm.weight$7 = util.global.load @__auto.blk.8.attn_norm.weight$7 : tensor<4096xf32>
    %591 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.attn_q.weight.shard.0 = util.global.load @__auto.blk.8.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %592 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.1 = util.global.load @__auto.blk.8.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %593 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.2 = util.global.load @__auto.blk.8.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %594 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.3 = util.global.load @__auto.blk.8.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %595 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.4 = util.global.load @__auto.blk.8.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %596 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.5 = util.global.load @__auto.blk.8.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %597 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.6 = util.global.load @__auto.blk.8.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %598 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_q.weight.shard.7 = util.global.load @__auto.blk.8.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %599 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.0 = util.global.load @__auto.blk.8.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %600 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.1 = util.global.load @__auto.blk.8.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %601 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.2 = util.global.load @__auto.blk.8.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %602 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.3 = util.global.load @__auto.blk.8.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %603 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.4 = util.global.load @__auto.blk.8.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %604 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.5 = util.global.load @__auto.blk.8.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %605 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.6 = util.global.load @__auto.blk.8.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %606 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_k.weight.shard.7 = util.global.load @__auto.blk.8.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %607 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.0 = util.global.load @__auto.blk.8.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %608 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.1 = util.global.load @__auto.blk.8.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %609 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.2 = util.global.load @__auto.blk.8.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %610 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.3 = util.global.load @__auto.blk.8.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %611 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.4 = util.global.load @__auto.blk.8.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %612 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.5 = util.global.load @__auto.blk.8.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %613 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.6 = util.global.load @__auto.blk.8.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %614 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_v.weight.shard.7 = util.global.load @__auto.blk.8.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %615 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.8.attn_output.weight.shard.0 = util.global.load @__auto.blk.8.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %616 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.1 = util.global.load @__auto.blk.8.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %617 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.2 = util.global.load @__auto.blk.8.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %618 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.3 = util.global.load @__auto.blk.8.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %619 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.4 = util.global.load @__auto.blk.8.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %620 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.5 = util.global.load @__auto.blk.8.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %621 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.6 = util.global.load @__auto.blk.8.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %622 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.attn_output.weight.shard.7 = util.global.load @__auto.blk.8.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %623 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.8.ffn_norm.weight = util.global.load @__auto.blk.8.ffn_norm.weight : tensor<4096xf32>
    %624 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$1 = util.global.load @__auto.blk.8.ffn_norm.weight$1 : tensor<4096xf32>
    %625 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$2 = util.global.load @__auto.blk.8.ffn_norm.weight$2 : tensor<4096xf32>
    %626 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$3 = util.global.load @__auto.blk.8.ffn_norm.weight$3 : tensor<4096xf32>
    %627 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$4 = util.global.load @__auto.blk.8.ffn_norm.weight$4 : tensor<4096xf32>
    %628 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$5 = util.global.load @__auto.blk.8.ffn_norm.weight$5 : tensor<4096xf32>
    %629 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$6 = util.global.load @__auto.blk.8.ffn_norm.weight$6 : tensor<4096xf32>
    %630 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_norm.weight$7 = util.global.load @__auto.blk.8.ffn_norm.weight$7 : tensor<4096xf32>
    %631 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.8.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %632 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %633 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %634 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %635 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %636 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %637 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %638 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.8.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %639 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.0 = util.global.load @__auto.blk.8.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %640 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.1 = util.global.load @__auto.blk.8.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %641 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.2 = util.global.load @__auto.blk.8.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %642 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.3 = util.global.load @__auto.blk.8.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %643 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.4 = util.global.load @__auto.blk.8.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %644 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.5 = util.global.load @__auto.blk.8.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %645 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.6 = util.global.load @__auto.blk.8.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %646 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_up.weight.shard.7 = util.global.load @__auto.blk.8.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %647 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.8.ffn_down.weight.shard.0 = util.global.load @__auto.blk.8.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %648 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.1 = util.global.load @__auto.blk.8.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %649 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.2 = util.global.load @__auto.blk.8.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %650 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.3 = util.global.load @__auto.blk.8.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %651 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.4 = util.global.load @__auto.blk.8.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %652 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.5 = util.global.load @__auto.blk.8.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %653 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.6 = util.global.load @__auto.blk.8.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %654 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.8.ffn_down.weight.shard.7 = util.global.load @__auto.blk.8.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %655 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.attn_norm.weight = util.global.load @__auto.blk.9.attn_norm.weight : tensor<4096xf32>
    %656 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$1 = util.global.load @__auto.blk.9.attn_norm.weight$1 : tensor<4096xf32>
    %657 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$2 = util.global.load @__auto.blk.9.attn_norm.weight$2 : tensor<4096xf32>
    %658 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$3 = util.global.load @__auto.blk.9.attn_norm.weight$3 : tensor<4096xf32>
    %659 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$4 = util.global.load @__auto.blk.9.attn_norm.weight$4 : tensor<4096xf32>
    %660 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$5 = util.global.load @__auto.blk.9.attn_norm.weight$5 : tensor<4096xf32>
    %661 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$6 = util.global.load @__auto.blk.9.attn_norm.weight$6 : tensor<4096xf32>
    %662 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_norm.weight$7 = util.global.load @__auto.blk.9.attn_norm.weight$7 : tensor<4096xf32>
    %663 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.attn_q.weight.shard.0 = util.global.load @__auto.blk.9.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %664 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.1 = util.global.load @__auto.blk.9.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %665 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.2 = util.global.load @__auto.blk.9.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %666 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.3 = util.global.load @__auto.blk.9.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %667 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.4 = util.global.load @__auto.blk.9.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %668 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.5 = util.global.load @__auto.blk.9.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %669 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.6 = util.global.load @__auto.blk.9.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %670 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_q.weight.shard.7 = util.global.load @__auto.blk.9.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %671 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.0 = util.global.load @__auto.blk.9.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %672 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.1 = util.global.load @__auto.blk.9.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %673 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.2 = util.global.load @__auto.blk.9.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %674 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.3 = util.global.load @__auto.blk.9.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %675 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.4 = util.global.load @__auto.blk.9.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %676 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.5 = util.global.load @__auto.blk.9.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %677 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.6 = util.global.load @__auto.blk.9.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %678 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_k.weight.shard.7 = util.global.load @__auto.blk.9.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %679 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.0 = util.global.load @__auto.blk.9.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %680 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.1 = util.global.load @__auto.blk.9.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %681 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.2 = util.global.load @__auto.blk.9.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %682 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.3 = util.global.load @__auto.blk.9.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %683 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.4 = util.global.load @__auto.blk.9.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %684 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.5 = util.global.load @__auto.blk.9.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %685 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.6 = util.global.load @__auto.blk.9.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %686 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_v.weight.shard.7 = util.global.load @__auto.blk.9.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %687 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.9.attn_output.weight.shard.0 = util.global.load @__auto.blk.9.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %688 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.1 = util.global.load @__auto.blk.9.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %689 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.2 = util.global.load @__auto.blk.9.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %690 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.3 = util.global.load @__auto.blk.9.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %691 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.4 = util.global.load @__auto.blk.9.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %692 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.5 = util.global.load @__auto.blk.9.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %693 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.6 = util.global.load @__auto.blk.9.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %694 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.attn_output.weight.shard.7 = util.global.load @__auto.blk.9.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %695 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.9.ffn_norm.weight = util.global.load @__auto.blk.9.ffn_norm.weight : tensor<4096xf32>
    %696 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$1 = util.global.load @__auto.blk.9.ffn_norm.weight$1 : tensor<4096xf32>
    %697 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$2 = util.global.load @__auto.blk.9.ffn_norm.weight$2 : tensor<4096xf32>
    %698 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$3 = util.global.load @__auto.blk.9.ffn_norm.weight$3 : tensor<4096xf32>
    %699 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$4 = util.global.load @__auto.blk.9.ffn_norm.weight$4 : tensor<4096xf32>
    %700 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$5 = util.global.load @__auto.blk.9.ffn_norm.weight$5 : tensor<4096xf32>
    %701 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$6 = util.global.load @__auto.blk.9.ffn_norm.weight$6 : tensor<4096xf32>
    %702 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_norm.weight$7 = util.global.load @__auto.blk.9.ffn_norm.weight$7 : tensor<4096xf32>
    %703 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.9.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %704 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %705 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %706 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %707 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %708 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %709 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %710 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.9.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %711 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.0 = util.global.load @__auto.blk.9.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %712 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.1 = util.global.load @__auto.blk.9.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %713 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.2 = util.global.load @__auto.blk.9.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %714 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.3 = util.global.load @__auto.blk.9.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %715 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.4 = util.global.load @__auto.blk.9.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %716 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.5 = util.global.load @__auto.blk.9.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %717 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.6 = util.global.load @__auto.blk.9.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %718 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_up.weight.shard.7 = util.global.load @__auto.blk.9.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %719 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.9.ffn_down.weight.shard.0 = util.global.load @__auto.blk.9.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %720 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.1 = util.global.load @__auto.blk.9.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %721 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.2 = util.global.load @__auto.blk.9.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %722 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.3 = util.global.load @__auto.blk.9.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %723 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.4 = util.global.load @__auto.blk.9.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %724 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.5 = util.global.load @__auto.blk.9.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %725 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.6 = util.global.load @__auto.blk.9.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %726 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.9.ffn_down.weight.shard.7 = util.global.load @__auto.blk.9.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %727 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.attn_norm.weight = util.global.load @__auto.blk.10.attn_norm.weight : tensor<4096xf32>
    %728 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$1 = util.global.load @__auto.blk.10.attn_norm.weight$1 : tensor<4096xf32>
    %729 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$2 = util.global.load @__auto.blk.10.attn_norm.weight$2 : tensor<4096xf32>
    %730 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$3 = util.global.load @__auto.blk.10.attn_norm.weight$3 : tensor<4096xf32>
    %731 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$4 = util.global.load @__auto.blk.10.attn_norm.weight$4 : tensor<4096xf32>
    %732 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$5 = util.global.load @__auto.blk.10.attn_norm.weight$5 : tensor<4096xf32>
    %733 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$6 = util.global.load @__auto.blk.10.attn_norm.weight$6 : tensor<4096xf32>
    %734 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_norm.weight$7 = util.global.load @__auto.blk.10.attn_norm.weight$7 : tensor<4096xf32>
    %735 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.attn_q.weight.shard.0 = util.global.load @__auto.blk.10.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %736 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.1 = util.global.load @__auto.blk.10.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %737 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.2 = util.global.load @__auto.blk.10.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %738 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.3 = util.global.load @__auto.blk.10.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %739 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.4 = util.global.load @__auto.blk.10.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %740 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.5 = util.global.load @__auto.blk.10.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %741 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.6 = util.global.load @__auto.blk.10.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %742 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_q.weight.shard.7 = util.global.load @__auto.blk.10.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %743 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.0 = util.global.load @__auto.blk.10.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %744 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.1 = util.global.load @__auto.blk.10.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %745 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.2 = util.global.load @__auto.blk.10.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %746 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.3 = util.global.load @__auto.blk.10.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %747 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.4 = util.global.load @__auto.blk.10.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %748 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.5 = util.global.load @__auto.blk.10.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %749 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.6 = util.global.load @__auto.blk.10.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %750 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_k.weight.shard.7 = util.global.load @__auto.blk.10.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %751 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.0 = util.global.load @__auto.blk.10.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %752 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.1 = util.global.load @__auto.blk.10.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %753 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.2 = util.global.load @__auto.blk.10.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %754 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.3 = util.global.load @__auto.blk.10.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %755 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.4 = util.global.load @__auto.blk.10.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %756 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.5 = util.global.load @__auto.blk.10.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %757 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.6 = util.global.load @__auto.blk.10.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %758 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_v.weight.shard.7 = util.global.load @__auto.blk.10.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %759 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.10.attn_output.weight.shard.0 = util.global.load @__auto.blk.10.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %760 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.1 = util.global.load @__auto.blk.10.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %761 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.2 = util.global.load @__auto.blk.10.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %762 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.3 = util.global.load @__auto.blk.10.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %763 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.4 = util.global.load @__auto.blk.10.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %764 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.5 = util.global.load @__auto.blk.10.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %765 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.6 = util.global.load @__auto.blk.10.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %766 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.attn_output.weight.shard.7 = util.global.load @__auto.blk.10.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %767 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.10.ffn_norm.weight = util.global.load @__auto.blk.10.ffn_norm.weight : tensor<4096xf32>
    %768 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$1 = util.global.load @__auto.blk.10.ffn_norm.weight$1 : tensor<4096xf32>
    %769 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$2 = util.global.load @__auto.blk.10.ffn_norm.weight$2 : tensor<4096xf32>
    %770 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$3 = util.global.load @__auto.blk.10.ffn_norm.weight$3 : tensor<4096xf32>
    %771 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$4 = util.global.load @__auto.blk.10.ffn_norm.weight$4 : tensor<4096xf32>
    %772 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$5 = util.global.load @__auto.blk.10.ffn_norm.weight$5 : tensor<4096xf32>
    %773 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$6 = util.global.load @__auto.blk.10.ffn_norm.weight$6 : tensor<4096xf32>
    %774 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_norm.weight$7 = util.global.load @__auto.blk.10.ffn_norm.weight$7 : tensor<4096xf32>
    %775 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.10.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %776 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %777 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %778 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %779 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %780 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %781 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %782 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.10.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %783 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.0 = util.global.load @__auto.blk.10.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %784 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.1 = util.global.load @__auto.blk.10.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %785 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.2 = util.global.load @__auto.blk.10.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %786 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.3 = util.global.load @__auto.blk.10.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %787 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.4 = util.global.load @__auto.blk.10.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %788 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.5 = util.global.load @__auto.blk.10.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %789 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.6 = util.global.load @__auto.blk.10.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %790 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_up.weight.shard.7 = util.global.load @__auto.blk.10.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %791 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.10.ffn_down.weight.shard.0 = util.global.load @__auto.blk.10.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %792 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.1 = util.global.load @__auto.blk.10.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %793 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.2 = util.global.load @__auto.blk.10.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %794 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.3 = util.global.load @__auto.blk.10.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %795 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.4 = util.global.load @__auto.blk.10.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %796 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.5 = util.global.load @__auto.blk.10.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %797 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.6 = util.global.load @__auto.blk.10.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %798 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.10.ffn_down.weight.shard.7 = util.global.load @__auto.blk.10.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %799 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.attn_norm.weight = util.global.load @__auto.blk.11.attn_norm.weight : tensor<4096xf32>
    %800 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$1 = util.global.load @__auto.blk.11.attn_norm.weight$1 : tensor<4096xf32>
    %801 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$2 = util.global.load @__auto.blk.11.attn_norm.weight$2 : tensor<4096xf32>
    %802 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$3 = util.global.load @__auto.blk.11.attn_norm.weight$3 : tensor<4096xf32>
    %803 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$4 = util.global.load @__auto.blk.11.attn_norm.weight$4 : tensor<4096xf32>
    %804 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$5 = util.global.load @__auto.blk.11.attn_norm.weight$5 : tensor<4096xf32>
    %805 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$6 = util.global.load @__auto.blk.11.attn_norm.weight$6 : tensor<4096xf32>
    %806 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_norm.weight$7 = util.global.load @__auto.blk.11.attn_norm.weight$7 : tensor<4096xf32>
    %807 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.attn_q.weight.shard.0 = util.global.load @__auto.blk.11.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %808 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.1 = util.global.load @__auto.blk.11.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %809 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.2 = util.global.load @__auto.blk.11.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %810 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.3 = util.global.load @__auto.blk.11.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %811 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.4 = util.global.load @__auto.blk.11.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %812 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.5 = util.global.load @__auto.blk.11.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %813 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.6 = util.global.load @__auto.blk.11.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %814 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_q.weight.shard.7 = util.global.load @__auto.blk.11.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %815 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.0 = util.global.load @__auto.blk.11.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %816 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.1 = util.global.load @__auto.blk.11.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %817 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.2 = util.global.load @__auto.blk.11.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %818 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.3 = util.global.load @__auto.blk.11.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %819 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.4 = util.global.load @__auto.blk.11.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %820 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.5 = util.global.load @__auto.blk.11.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %821 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.6 = util.global.load @__auto.blk.11.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %822 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_k.weight.shard.7 = util.global.load @__auto.blk.11.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %823 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.0 = util.global.load @__auto.blk.11.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %824 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.1 = util.global.load @__auto.blk.11.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %825 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.2 = util.global.load @__auto.blk.11.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %826 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.3 = util.global.load @__auto.blk.11.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %827 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.4 = util.global.load @__auto.blk.11.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %828 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.5 = util.global.load @__auto.blk.11.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %829 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.6 = util.global.load @__auto.blk.11.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %830 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_v.weight.shard.7 = util.global.load @__auto.blk.11.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %831 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.11.attn_output.weight.shard.0 = util.global.load @__auto.blk.11.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %832 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.1 = util.global.load @__auto.blk.11.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %833 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.2 = util.global.load @__auto.blk.11.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %834 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.3 = util.global.load @__auto.blk.11.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %835 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.4 = util.global.load @__auto.blk.11.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %836 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.5 = util.global.load @__auto.blk.11.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %837 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.6 = util.global.load @__auto.blk.11.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %838 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.attn_output.weight.shard.7 = util.global.load @__auto.blk.11.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %839 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.11.ffn_norm.weight = util.global.load @__auto.blk.11.ffn_norm.weight : tensor<4096xf32>
    %840 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$1 = util.global.load @__auto.blk.11.ffn_norm.weight$1 : tensor<4096xf32>
    %841 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$2 = util.global.load @__auto.blk.11.ffn_norm.weight$2 : tensor<4096xf32>
    %842 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$3 = util.global.load @__auto.blk.11.ffn_norm.weight$3 : tensor<4096xf32>
    %843 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$4 = util.global.load @__auto.blk.11.ffn_norm.weight$4 : tensor<4096xf32>
    %844 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$5 = util.global.load @__auto.blk.11.ffn_norm.weight$5 : tensor<4096xf32>
    %845 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$6 = util.global.load @__auto.blk.11.ffn_norm.weight$6 : tensor<4096xf32>
    %846 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_norm.weight$7 = util.global.load @__auto.blk.11.ffn_norm.weight$7 : tensor<4096xf32>
    %847 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.11.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %848 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %849 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %850 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %851 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %852 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %853 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %854 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.11.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %855 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.0 = util.global.load @__auto.blk.11.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %856 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.1 = util.global.load @__auto.blk.11.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %857 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.2 = util.global.load @__auto.blk.11.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %858 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.3 = util.global.load @__auto.blk.11.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %859 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.4 = util.global.load @__auto.blk.11.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %860 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.5 = util.global.load @__auto.blk.11.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %861 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.6 = util.global.load @__auto.blk.11.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %862 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_up.weight.shard.7 = util.global.load @__auto.blk.11.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %863 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.11.ffn_down.weight.shard.0 = util.global.load @__auto.blk.11.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %864 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.1 = util.global.load @__auto.blk.11.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %865 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.2 = util.global.load @__auto.blk.11.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %866 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.3 = util.global.load @__auto.blk.11.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %867 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.4 = util.global.load @__auto.blk.11.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %868 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.5 = util.global.load @__auto.blk.11.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %869 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.6 = util.global.load @__auto.blk.11.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %870 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.11.ffn_down.weight.shard.7 = util.global.load @__auto.blk.11.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %871 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.attn_norm.weight = util.global.load @__auto.blk.12.attn_norm.weight : tensor<4096xf32>
    %872 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$1 = util.global.load @__auto.blk.12.attn_norm.weight$1 : tensor<4096xf32>
    %873 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$2 = util.global.load @__auto.blk.12.attn_norm.weight$2 : tensor<4096xf32>
    %874 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$3 = util.global.load @__auto.blk.12.attn_norm.weight$3 : tensor<4096xf32>
    %875 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$4 = util.global.load @__auto.blk.12.attn_norm.weight$4 : tensor<4096xf32>
    %876 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$5 = util.global.load @__auto.blk.12.attn_norm.weight$5 : tensor<4096xf32>
    %877 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$6 = util.global.load @__auto.blk.12.attn_norm.weight$6 : tensor<4096xf32>
    %878 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_norm.weight$7 = util.global.load @__auto.blk.12.attn_norm.weight$7 : tensor<4096xf32>
    %879 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.attn_q.weight.shard.0 = util.global.load @__auto.blk.12.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %880 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.1 = util.global.load @__auto.blk.12.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %881 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.2 = util.global.load @__auto.blk.12.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %882 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.3 = util.global.load @__auto.blk.12.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %883 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.4 = util.global.load @__auto.blk.12.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %884 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.5 = util.global.load @__auto.blk.12.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %885 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.6 = util.global.load @__auto.blk.12.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %886 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_q.weight.shard.7 = util.global.load @__auto.blk.12.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %887 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.0 = util.global.load @__auto.blk.12.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %888 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.1 = util.global.load @__auto.blk.12.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %889 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.2 = util.global.load @__auto.blk.12.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %890 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.3 = util.global.load @__auto.blk.12.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %891 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.4 = util.global.load @__auto.blk.12.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %892 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.5 = util.global.load @__auto.blk.12.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %893 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.6 = util.global.load @__auto.blk.12.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %894 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_k.weight.shard.7 = util.global.load @__auto.blk.12.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %895 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.0 = util.global.load @__auto.blk.12.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %896 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.1 = util.global.load @__auto.blk.12.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %897 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.2 = util.global.load @__auto.blk.12.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %898 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.3 = util.global.load @__auto.blk.12.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %899 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.4 = util.global.load @__auto.blk.12.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %900 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.5 = util.global.load @__auto.blk.12.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %901 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.6 = util.global.load @__auto.blk.12.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %902 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_v.weight.shard.7 = util.global.load @__auto.blk.12.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %903 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.12.attn_output.weight.shard.0 = util.global.load @__auto.blk.12.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %904 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.1 = util.global.load @__auto.blk.12.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %905 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.2 = util.global.load @__auto.blk.12.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %906 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.3 = util.global.load @__auto.blk.12.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %907 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.4 = util.global.load @__auto.blk.12.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %908 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.5 = util.global.load @__auto.blk.12.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %909 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.6 = util.global.load @__auto.blk.12.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %910 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.attn_output.weight.shard.7 = util.global.load @__auto.blk.12.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %911 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.12.ffn_norm.weight = util.global.load @__auto.blk.12.ffn_norm.weight : tensor<4096xf32>
    %912 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$1 = util.global.load @__auto.blk.12.ffn_norm.weight$1 : tensor<4096xf32>
    %913 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$2 = util.global.load @__auto.blk.12.ffn_norm.weight$2 : tensor<4096xf32>
    %914 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$3 = util.global.load @__auto.blk.12.ffn_norm.weight$3 : tensor<4096xf32>
    %915 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$4 = util.global.load @__auto.blk.12.ffn_norm.weight$4 : tensor<4096xf32>
    %916 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$5 = util.global.load @__auto.blk.12.ffn_norm.weight$5 : tensor<4096xf32>
    %917 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$6 = util.global.load @__auto.blk.12.ffn_norm.weight$6 : tensor<4096xf32>
    %918 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_norm.weight$7 = util.global.load @__auto.blk.12.ffn_norm.weight$7 : tensor<4096xf32>
    %919 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.12.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %920 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %921 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %922 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %923 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %924 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %925 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %926 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.12.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %927 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.0 = util.global.load @__auto.blk.12.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %928 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.1 = util.global.load @__auto.blk.12.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %929 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.2 = util.global.load @__auto.blk.12.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %930 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.3 = util.global.load @__auto.blk.12.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %931 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.4 = util.global.load @__auto.blk.12.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %932 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.5 = util.global.load @__auto.blk.12.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %933 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.6 = util.global.load @__auto.blk.12.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %934 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_up.weight.shard.7 = util.global.load @__auto.blk.12.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %935 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.12.ffn_down.weight.shard.0 = util.global.load @__auto.blk.12.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %936 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.1 = util.global.load @__auto.blk.12.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %937 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.2 = util.global.load @__auto.blk.12.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %938 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.3 = util.global.load @__auto.blk.12.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %939 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.4 = util.global.load @__auto.blk.12.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %940 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.5 = util.global.load @__auto.blk.12.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %941 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.6 = util.global.load @__auto.blk.12.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %942 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.12.ffn_down.weight.shard.7 = util.global.load @__auto.blk.12.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %943 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.attn_norm.weight = util.global.load @__auto.blk.13.attn_norm.weight : tensor<4096xf32>
    %944 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$1 = util.global.load @__auto.blk.13.attn_norm.weight$1 : tensor<4096xf32>
    %945 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$2 = util.global.load @__auto.blk.13.attn_norm.weight$2 : tensor<4096xf32>
    %946 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$3 = util.global.load @__auto.blk.13.attn_norm.weight$3 : tensor<4096xf32>
    %947 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$4 = util.global.load @__auto.blk.13.attn_norm.weight$4 : tensor<4096xf32>
    %948 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$5 = util.global.load @__auto.blk.13.attn_norm.weight$5 : tensor<4096xf32>
    %949 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$6 = util.global.load @__auto.blk.13.attn_norm.weight$6 : tensor<4096xf32>
    %950 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_norm.weight$7 = util.global.load @__auto.blk.13.attn_norm.weight$7 : tensor<4096xf32>
    %951 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.attn_q.weight.shard.0 = util.global.load @__auto.blk.13.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %952 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.1 = util.global.load @__auto.blk.13.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %953 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.2 = util.global.load @__auto.blk.13.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %954 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.3 = util.global.load @__auto.blk.13.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %955 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.4 = util.global.load @__auto.blk.13.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %956 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.5 = util.global.load @__auto.blk.13.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %957 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.6 = util.global.load @__auto.blk.13.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %958 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_q.weight.shard.7 = util.global.load @__auto.blk.13.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %959 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.0 = util.global.load @__auto.blk.13.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %960 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.1 = util.global.load @__auto.blk.13.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %961 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.2 = util.global.load @__auto.blk.13.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %962 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.3 = util.global.load @__auto.blk.13.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %963 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.4 = util.global.load @__auto.blk.13.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %964 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.5 = util.global.load @__auto.blk.13.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %965 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.6 = util.global.load @__auto.blk.13.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %966 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_k.weight.shard.7 = util.global.load @__auto.blk.13.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %967 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.0 = util.global.load @__auto.blk.13.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %968 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.1 = util.global.load @__auto.blk.13.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %969 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.2 = util.global.load @__auto.blk.13.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %970 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.3 = util.global.load @__auto.blk.13.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %971 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.4 = util.global.load @__auto.blk.13.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %972 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.5 = util.global.load @__auto.blk.13.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %973 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.6 = util.global.load @__auto.blk.13.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %974 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_v.weight.shard.7 = util.global.load @__auto.blk.13.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %975 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.13.attn_output.weight.shard.0 = util.global.load @__auto.blk.13.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %976 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.1 = util.global.load @__auto.blk.13.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %977 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.2 = util.global.load @__auto.blk.13.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %978 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.3 = util.global.load @__auto.blk.13.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %979 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.4 = util.global.load @__auto.blk.13.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %980 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.5 = util.global.load @__auto.blk.13.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %981 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.6 = util.global.load @__auto.blk.13.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %982 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.attn_output.weight.shard.7 = util.global.load @__auto.blk.13.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %983 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.13.ffn_norm.weight = util.global.load @__auto.blk.13.ffn_norm.weight : tensor<4096xf32>
    %984 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$1 = util.global.load @__auto.blk.13.ffn_norm.weight$1 : tensor<4096xf32>
    %985 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$2 = util.global.load @__auto.blk.13.ffn_norm.weight$2 : tensor<4096xf32>
    %986 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$3 = util.global.load @__auto.blk.13.ffn_norm.weight$3 : tensor<4096xf32>
    %987 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$4 = util.global.load @__auto.blk.13.ffn_norm.weight$4 : tensor<4096xf32>
    %988 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$5 = util.global.load @__auto.blk.13.ffn_norm.weight$5 : tensor<4096xf32>
    %989 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$6 = util.global.load @__auto.blk.13.ffn_norm.weight$6 : tensor<4096xf32>
    %990 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_norm.weight$7 = util.global.load @__auto.blk.13.ffn_norm.weight$7 : tensor<4096xf32>
    %991 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.13.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %992 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %993 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %994 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %995 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %996 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %997 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %998 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.13.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %999 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.0 = util.global.load @__auto.blk.13.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1000 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.1 = util.global.load @__auto.blk.13.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1001 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.2 = util.global.load @__auto.blk.13.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1002 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.3 = util.global.load @__auto.blk.13.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1003 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.4 = util.global.load @__auto.blk.13.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1004 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.5 = util.global.load @__auto.blk.13.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1005 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.6 = util.global.load @__auto.blk.13.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1006 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_up.weight.shard.7 = util.global.load @__auto.blk.13.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1007 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.13.ffn_down.weight.shard.0 = util.global.load @__auto.blk.13.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1008 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.1 = util.global.load @__auto.blk.13.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1009 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.2 = util.global.load @__auto.blk.13.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1010 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.3 = util.global.load @__auto.blk.13.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1011 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.4 = util.global.load @__auto.blk.13.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1012 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.5 = util.global.load @__auto.blk.13.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1013 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.6 = util.global.load @__auto.blk.13.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1014 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.13.ffn_down.weight.shard.7 = util.global.load @__auto.blk.13.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1015 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.attn_norm.weight = util.global.load @__auto.blk.14.attn_norm.weight : tensor<4096xf32>
    %1016 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$1 = util.global.load @__auto.blk.14.attn_norm.weight$1 : tensor<4096xf32>
    %1017 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$2 = util.global.load @__auto.blk.14.attn_norm.weight$2 : tensor<4096xf32>
    %1018 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$3 = util.global.load @__auto.blk.14.attn_norm.weight$3 : tensor<4096xf32>
    %1019 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$4 = util.global.load @__auto.blk.14.attn_norm.weight$4 : tensor<4096xf32>
    %1020 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$5 = util.global.load @__auto.blk.14.attn_norm.weight$5 : tensor<4096xf32>
    %1021 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$6 = util.global.load @__auto.blk.14.attn_norm.weight$6 : tensor<4096xf32>
    %1022 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_norm.weight$7 = util.global.load @__auto.blk.14.attn_norm.weight$7 : tensor<4096xf32>
    %1023 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.attn_q.weight.shard.0 = util.global.load @__auto.blk.14.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1024 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.1 = util.global.load @__auto.blk.14.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1025 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.2 = util.global.load @__auto.blk.14.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1026 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.3 = util.global.load @__auto.blk.14.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1027 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.4 = util.global.load @__auto.blk.14.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1028 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.5 = util.global.load @__auto.blk.14.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1029 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.6 = util.global.load @__auto.blk.14.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1030 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_q.weight.shard.7 = util.global.load @__auto.blk.14.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1031 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.0 = util.global.load @__auto.blk.14.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1032 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.1 = util.global.load @__auto.blk.14.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1033 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.2 = util.global.load @__auto.blk.14.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1034 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.3 = util.global.load @__auto.blk.14.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1035 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.4 = util.global.load @__auto.blk.14.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1036 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.5 = util.global.load @__auto.blk.14.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1037 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.6 = util.global.load @__auto.blk.14.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1038 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_k.weight.shard.7 = util.global.load @__auto.blk.14.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1039 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.0 = util.global.load @__auto.blk.14.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1040 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.1 = util.global.load @__auto.blk.14.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1041 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.2 = util.global.load @__auto.blk.14.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1042 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.3 = util.global.load @__auto.blk.14.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1043 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.4 = util.global.load @__auto.blk.14.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1044 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.5 = util.global.load @__auto.blk.14.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1045 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.6 = util.global.load @__auto.blk.14.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1046 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_v.weight.shard.7 = util.global.load @__auto.blk.14.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1047 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.14.attn_output.weight.shard.0 = util.global.load @__auto.blk.14.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1048 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.1 = util.global.load @__auto.blk.14.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1049 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.2 = util.global.load @__auto.blk.14.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1050 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.3 = util.global.load @__auto.blk.14.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1051 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.4 = util.global.load @__auto.blk.14.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1052 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.5 = util.global.load @__auto.blk.14.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1053 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.6 = util.global.load @__auto.blk.14.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1054 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.attn_output.weight.shard.7 = util.global.load @__auto.blk.14.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1055 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.14.ffn_norm.weight = util.global.load @__auto.blk.14.ffn_norm.weight : tensor<4096xf32>
    %1056 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$1 = util.global.load @__auto.blk.14.ffn_norm.weight$1 : tensor<4096xf32>
    %1057 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$2 = util.global.load @__auto.blk.14.ffn_norm.weight$2 : tensor<4096xf32>
    %1058 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$3 = util.global.load @__auto.blk.14.ffn_norm.weight$3 : tensor<4096xf32>
    %1059 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$4 = util.global.load @__auto.blk.14.ffn_norm.weight$4 : tensor<4096xf32>
    %1060 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$5 = util.global.load @__auto.blk.14.ffn_norm.weight$5 : tensor<4096xf32>
    %1061 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$6 = util.global.load @__auto.blk.14.ffn_norm.weight$6 : tensor<4096xf32>
    %1062 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_norm.weight$7 = util.global.load @__auto.blk.14.ffn_norm.weight$7 : tensor<4096xf32>
    %1063 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.14.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1064 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1065 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1066 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1067 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1068 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1069 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1070 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.14.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1071 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.0 = util.global.load @__auto.blk.14.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1072 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.1 = util.global.load @__auto.blk.14.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1073 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.2 = util.global.load @__auto.blk.14.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1074 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.3 = util.global.load @__auto.blk.14.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1075 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.4 = util.global.load @__auto.blk.14.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1076 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.5 = util.global.load @__auto.blk.14.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1077 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.6 = util.global.load @__auto.blk.14.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1078 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_up.weight.shard.7 = util.global.load @__auto.blk.14.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1079 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.14.ffn_down.weight.shard.0 = util.global.load @__auto.blk.14.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1080 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.1 = util.global.load @__auto.blk.14.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1081 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.2 = util.global.load @__auto.blk.14.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1082 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.3 = util.global.load @__auto.blk.14.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1083 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.4 = util.global.load @__auto.blk.14.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1084 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.5 = util.global.load @__auto.blk.14.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1085 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.6 = util.global.load @__auto.blk.14.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1086 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.14.ffn_down.weight.shard.7 = util.global.load @__auto.blk.14.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1087 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.attn_norm.weight = util.global.load @__auto.blk.15.attn_norm.weight : tensor<4096xf32>
    %1088 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$1 = util.global.load @__auto.blk.15.attn_norm.weight$1 : tensor<4096xf32>
    %1089 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$2 = util.global.load @__auto.blk.15.attn_norm.weight$2 : tensor<4096xf32>
    %1090 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$3 = util.global.load @__auto.blk.15.attn_norm.weight$3 : tensor<4096xf32>
    %1091 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$4 = util.global.load @__auto.blk.15.attn_norm.weight$4 : tensor<4096xf32>
    %1092 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$5 = util.global.load @__auto.blk.15.attn_norm.weight$5 : tensor<4096xf32>
    %1093 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$6 = util.global.load @__auto.blk.15.attn_norm.weight$6 : tensor<4096xf32>
    %1094 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_norm.weight$7 = util.global.load @__auto.blk.15.attn_norm.weight$7 : tensor<4096xf32>
    %1095 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.attn_q.weight.shard.0 = util.global.load @__auto.blk.15.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1096 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.1 = util.global.load @__auto.blk.15.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1097 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.2 = util.global.load @__auto.blk.15.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1098 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.3 = util.global.load @__auto.blk.15.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1099 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.4 = util.global.load @__auto.blk.15.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1100 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.5 = util.global.load @__auto.blk.15.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1101 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.6 = util.global.load @__auto.blk.15.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1102 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_q.weight.shard.7 = util.global.load @__auto.blk.15.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1103 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.0 = util.global.load @__auto.blk.15.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1104 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.1 = util.global.load @__auto.blk.15.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1105 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.2 = util.global.load @__auto.blk.15.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1106 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.3 = util.global.load @__auto.blk.15.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1107 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.4 = util.global.load @__auto.blk.15.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1108 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.5 = util.global.load @__auto.blk.15.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1109 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.6 = util.global.load @__auto.blk.15.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1110 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_k.weight.shard.7 = util.global.load @__auto.blk.15.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1111 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.0 = util.global.load @__auto.blk.15.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1112 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.1 = util.global.load @__auto.blk.15.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1113 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.2 = util.global.load @__auto.blk.15.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1114 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.3 = util.global.load @__auto.blk.15.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1115 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.4 = util.global.load @__auto.blk.15.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1116 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.5 = util.global.load @__auto.blk.15.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1117 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.6 = util.global.load @__auto.blk.15.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1118 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_v.weight.shard.7 = util.global.load @__auto.blk.15.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1119 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.15.attn_output.weight.shard.0 = util.global.load @__auto.blk.15.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1120 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.1 = util.global.load @__auto.blk.15.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1121 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.2 = util.global.load @__auto.blk.15.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1122 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.3 = util.global.load @__auto.blk.15.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1123 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.4 = util.global.load @__auto.blk.15.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1124 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.5 = util.global.load @__auto.blk.15.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1125 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.6 = util.global.load @__auto.blk.15.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1126 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.attn_output.weight.shard.7 = util.global.load @__auto.blk.15.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1127 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.15.ffn_norm.weight = util.global.load @__auto.blk.15.ffn_norm.weight : tensor<4096xf32>
    %1128 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$1 = util.global.load @__auto.blk.15.ffn_norm.weight$1 : tensor<4096xf32>
    %1129 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$2 = util.global.load @__auto.blk.15.ffn_norm.weight$2 : tensor<4096xf32>
    %1130 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$3 = util.global.load @__auto.blk.15.ffn_norm.weight$3 : tensor<4096xf32>
    %1131 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$4 = util.global.load @__auto.blk.15.ffn_norm.weight$4 : tensor<4096xf32>
    %1132 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$5 = util.global.load @__auto.blk.15.ffn_norm.weight$5 : tensor<4096xf32>
    %1133 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$6 = util.global.load @__auto.blk.15.ffn_norm.weight$6 : tensor<4096xf32>
    %1134 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_norm.weight$7 = util.global.load @__auto.blk.15.ffn_norm.weight$7 : tensor<4096xf32>
    %1135 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.15.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1136 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1137 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1138 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1139 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1140 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1141 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1142 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.15.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1143 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.0 = util.global.load @__auto.blk.15.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1144 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.1 = util.global.load @__auto.blk.15.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1145 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.2 = util.global.load @__auto.blk.15.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1146 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.3 = util.global.load @__auto.blk.15.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1147 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.4 = util.global.load @__auto.blk.15.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1148 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.5 = util.global.load @__auto.blk.15.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1149 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.6 = util.global.load @__auto.blk.15.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1150 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_up.weight.shard.7 = util.global.load @__auto.blk.15.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1151 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.15.ffn_down.weight.shard.0 = util.global.load @__auto.blk.15.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1152 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.1 = util.global.load @__auto.blk.15.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1153 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.2 = util.global.load @__auto.blk.15.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1154 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.3 = util.global.load @__auto.blk.15.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1155 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.4 = util.global.load @__auto.blk.15.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1156 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.5 = util.global.load @__auto.blk.15.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1157 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.6 = util.global.load @__auto.blk.15.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1158 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.15.ffn_down.weight.shard.7 = util.global.load @__auto.blk.15.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1159 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.attn_norm.weight = util.global.load @__auto.blk.16.attn_norm.weight : tensor<4096xf32>
    %1160 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$1 = util.global.load @__auto.blk.16.attn_norm.weight$1 : tensor<4096xf32>
    %1161 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$2 = util.global.load @__auto.blk.16.attn_norm.weight$2 : tensor<4096xf32>
    %1162 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$3 = util.global.load @__auto.blk.16.attn_norm.weight$3 : tensor<4096xf32>
    %1163 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$4 = util.global.load @__auto.blk.16.attn_norm.weight$4 : tensor<4096xf32>
    %1164 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$5 = util.global.load @__auto.blk.16.attn_norm.weight$5 : tensor<4096xf32>
    %1165 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$6 = util.global.load @__auto.blk.16.attn_norm.weight$6 : tensor<4096xf32>
    %1166 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_norm.weight$7 = util.global.load @__auto.blk.16.attn_norm.weight$7 : tensor<4096xf32>
    %1167 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.attn_q.weight.shard.0 = util.global.load @__auto.blk.16.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1168 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.1 = util.global.load @__auto.blk.16.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1169 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.2 = util.global.load @__auto.blk.16.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1170 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.3 = util.global.load @__auto.blk.16.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1171 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.4 = util.global.load @__auto.blk.16.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1172 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.5 = util.global.load @__auto.blk.16.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1173 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.6 = util.global.load @__auto.blk.16.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1174 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_q.weight.shard.7 = util.global.load @__auto.blk.16.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1175 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.0 = util.global.load @__auto.blk.16.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1176 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.1 = util.global.load @__auto.blk.16.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1177 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.2 = util.global.load @__auto.blk.16.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1178 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.3 = util.global.load @__auto.blk.16.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1179 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.4 = util.global.load @__auto.blk.16.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1180 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.5 = util.global.load @__auto.blk.16.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1181 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.6 = util.global.load @__auto.blk.16.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1182 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_k.weight.shard.7 = util.global.load @__auto.blk.16.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1183 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.0 = util.global.load @__auto.blk.16.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1184 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.1 = util.global.load @__auto.blk.16.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1185 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.2 = util.global.load @__auto.blk.16.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1186 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.3 = util.global.load @__auto.blk.16.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1187 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.4 = util.global.load @__auto.blk.16.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1188 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.5 = util.global.load @__auto.blk.16.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1189 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.6 = util.global.load @__auto.blk.16.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1190 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_v.weight.shard.7 = util.global.load @__auto.blk.16.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1191 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.16.attn_output.weight.shard.0 = util.global.load @__auto.blk.16.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1192 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.1 = util.global.load @__auto.blk.16.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1193 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.2 = util.global.load @__auto.blk.16.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1194 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.3 = util.global.load @__auto.blk.16.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1195 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.4 = util.global.load @__auto.blk.16.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1196 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.5 = util.global.load @__auto.blk.16.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1197 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.6 = util.global.load @__auto.blk.16.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1198 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.attn_output.weight.shard.7 = util.global.load @__auto.blk.16.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1199 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.16.ffn_norm.weight = util.global.load @__auto.blk.16.ffn_norm.weight : tensor<4096xf32>
    %1200 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$1 = util.global.load @__auto.blk.16.ffn_norm.weight$1 : tensor<4096xf32>
    %1201 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$2 = util.global.load @__auto.blk.16.ffn_norm.weight$2 : tensor<4096xf32>
    %1202 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$3 = util.global.load @__auto.blk.16.ffn_norm.weight$3 : tensor<4096xf32>
    %1203 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$4 = util.global.load @__auto.blk.16.ffn_norm.weight$4 : tensor<4096xf32>
    %1204 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$5 = util.global.load @__auto.blk.16.ffn_norm.weight$5 : tensor<4096xf32>
    %1205 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$6 = util.global.load @__auto.blk.16.ffn_norm.weight$6 : tensor<4096xf32>
    %1206 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_norm.weight$7 = util.global.load @__auto.blk.16.ffn_norm.weight$7 : tensor<4096xf32>
    %1207 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.16.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1208 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1209 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1210 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1211 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1212 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1213 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1214 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.16.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1215 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.0 = util.global.load @__auto.blk.16.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1216 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.1 = util.global.load @__auto.blk.16.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1217 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.2 = util.global.load @__auto.blk.16.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1218 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.3 = util.global.load @__auto.blk.16.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1219 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.4 = util.global.load @__auto.blk.16.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1220 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.5 = util.global.load @__auto.blk.16.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1221 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.6 = util.global.load @__auto.blk.16.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1222 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_up.weight.shard.7 = util.global.load @__auto.blk.16.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1223 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.16.ffn_down.weight.shard.0 = util.global.load @__auto.blk.16.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1224 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.1 = util.global.load @__auto.blk.16.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1225 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.2 = util.global.load @__auto.blk.16.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1226 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.3 = util.global.load @__auto.blk.16.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1227 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.4 = util.global.load @__auto.blk.16.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1228 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.5 = util.global.load @__auto.blk.16.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1229 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.6 = util.global.load @__auto.blk.16.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1230 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.16.ffn_down.weight.shard.7 = util.global.load @__auto.blk.16.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1231 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.attn_norm.weight = util.global.load @__auto.blk.17.attn_norm.weight : tensor<4096xf32>
    %1232 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$1 = util.global.load @__auto.blk.17.attn_norm.weight$1 : tensor<4096xf32>
    %1233 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$2 = util.global.load @__auto.blk.17.attn_norm.weight$2 : tensor<4096xf32>
    %1234 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$3 = util.global.load @__auto.blk.17.attn_norm.weight$3 : tensor<4096xf32>
    %1235 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$4 = util.global.load @__auto.blk.17.attn_norm.weight$4 : tensor<4096xf32>
    %1236 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$5 = util.global.load @__auto.blk.17.attn_norm.weight$5 : tensor<4096xf32>
    %1237 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$6 = util.global.load @__auto.blk.17.attn_norm.weight$6 : tensor<4096xf32>
    %1238 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_norm.weight$7 = util.global.load @__auto.blk.17.attn_norm.weight$7 : tensor<4096xf32>
    %1239 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.attn_q.weight.shard.0 = util.global.load @__auto.blk.17.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1240 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.1 = util.global.load @__auto.blk.17.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1241 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.2 = util.global.load @__auto.blk.17.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1242 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.3 = util.global.load @__auto.blk.17.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1243 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.4 = util.global.load @__auto.blk.17.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1244 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.5 = util.global.load @__auto.blk.17.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1245 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.6 = util.global.load @__auto.blk.17.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1246 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_q.weight.shard.7 = util.global.load @__auto.blk.17.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1247 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.0 = util.global.load @__auto.blk.17.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1248 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.1 = util.global.load @__auto.blk.17.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1249 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.2 = util.global.load @__auto.blk.17.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1250 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.3 = util.global.load @__auto.blk.17.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1251 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.4 = util.global.load @__auto.blk.17.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1252 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.5 = util.global.load @__auto.blk.17.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1253 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.6 = util.global.load @__auto.blk.17.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1254 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_k.weight.shard.7 = util.global.load @__auto.blk.17.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1255 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.0 = util.global.load @__auto.blk.17.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1256 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.1 = util.global.load @__auto.blk.17.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1257 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.2 = util.global.load @__auto.blk.17.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1258 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.3 = util.global.load @__auto.blk.17.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1259 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.4 = util.global.load @__auto.blk.17.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1260 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.5 = util.global.load @__auto.blk.17.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1261 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.6 = util.global.load @__auto.blk.17.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1262 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_v.weight.shard.7 = util.global.load @__auto.blk.17.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1263 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.17.attn_output.weight.shard.0 = util.global.load @__auto.blk.17.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1264 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.1 = util.global.load @__auto.blk.17.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1265 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.2 = util.global.load @__auto.blk.17.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1266 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.3 = util.global.load @__auto.blk.17.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1267 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.4 = util.global.load @__auto.blk.17.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1268 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.5 = util.global.load @__auto.blk.17.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1269 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.6 = util.global.load @__auto.blk.17.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1270 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.attn_output.weight.shard.7 = util.global.load @__auto.blk.17.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1271 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.17.ffn_norm.weight = util.global.load @__auto.blk.17.ffn_norm.weight : tensor<4096xf32>
    %1272 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$1 = util.global.load @__auto.blk.17.ffn_norm.weight$1 : tensor<4096xf32>
    %1273 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$2 = util.global.load @__auto.blk.17.ffn_norm.weight$2 : tensor<4096xf32>
    %1274 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$3 = util.global.load @__auto.blk.17.ffn_norm.weight$3 : tensor<4096xf32>
    %1275 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$4 = util.global.load @__auto.blk.17.ffn_norm.weight$4 : tensor<4096xf32>
    %1276 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$5 = util.global.load @__auto.blk.17.ffn_norm.weight$5 : tensor<4096xf32>
    %1277 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$6 = util.global.load @__auto.blk.17.ffn_norm.weight$6 : tensor<4096xf32>
    %1278 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_norm.weight$7 = util.global.load @__auto.blk.17.ffn_norm.weight$7 : tensor<4096xf32>
    %1279 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.17.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1280 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1281 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1282 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1283 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1284 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1285 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1286 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.17.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1287 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.0 = util.global.load @__auto.blk.17.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1288 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.1 = util.global.load @__auto.blk.17.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1289 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.2 = util.global.load @__auto.blk.17.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1290 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.3 = util.global.load @__auto.blk.17.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1291 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.4 = util.global.load @__auto.blk.17.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1292 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.5 = util.global.load @__auto.blk.17.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1293 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.6 = util.global.load @__auto.blk.17.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1294 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_up.weight.shard.7 = util.global.load @__auto.blk.17.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1295 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.17.ffn_down.weight.shard.0 = util.global.load @__auto.blk.17.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1296 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.1 = util.global.load @__auto.blk.17.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1297 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.2 = util.global.load @__auto.blk.17.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1298 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.3 = util.global.load @__auto.blk.17.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1299 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.4 = util.global.load @__auto.blk.17.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1300 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.5 = util.global.load @__auto.blk.17.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1301 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.6 = util.global.load @__auto.blk.17.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1302 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.17.ffn_down.weight.shard.7 = util.global.load @__auto.blk.17.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1303 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.attn_norm.weight = util.global.load @__auto.blk.18.attn_norm.weight : tensor<4096xf32>
    %1304 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$1 = util.global.load @__auto.blk.18.attn_norm.weight$1 : tensor<4096xf32>
    %1305 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$2 = util.global.load @__auto.blk.18.attn_norm.weight$2 : tensor<4096xf32>
    %1306 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$3 = util.global.load @__auto.blk.18.attn_norm.weight$3 : tensor<4096xf32>
    %1307 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$4 = util.global.load @__auto.blk.18.attn_norm.weight$4 : tensor<4096xf32>
    %1308 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$5 = util.global.load @__auto.blk.18.attn_norm.weight$5 : tensor<4096xf32>
    %1309 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$6 = util.global.load @__auto.blk.18.attn_norm.weight$6 : tensor<4096xf32>
    %1310 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_norm.weight$7 = util.global.load @__auto.blk.18.attn_norm.weight$7 : tensor<4096xf32>
    %1311 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.attn_q.weight.shard.0 = util.global.load @__auto.blk.18.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1312 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.1 = util.global.load @__auto.blk.18.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1313 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.2 = util.global.load @__auto.blk.18.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1314 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.3 = util.global.load @__auto.blk.18.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1315 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.4 = util.global.load @__auto.blk.18.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1316 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.5 = util.global.load @__auto.blk.18.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1317 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.6 = util.global.load @__auto.blk.18.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1318 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_q.weight.shard.7 = util.global.load @__auto.blk.18.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1319 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.0 = util.global.load @__auto.blk.18.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1320 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.1 = util.global.load @__auto.blk.18.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1321 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.2 = util.global.load @__auto.blk.18.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1322 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.3 = util.global.load @__auto.blk.18.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1323 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.4 = util.global.load @__auto.blk.18.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1324 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.5 = util.global.load @__auto.blk.18.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1325 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.6 = util.global.load @__auto.blk.18.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1326 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_k.weight.shard.7 = util.global.load @__auto.blk.18.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1327 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.0 = util.global.load @__auto.blk.18.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1328 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.1 = util.global.load @__auto.blk.18.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1329 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.2 = util.global.load @__auto.blk.18.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1330 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.3 = util.global.load @__auto.blk.18.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1331 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.4 = util.global.load @__auto.blk.18.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1332 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.5 = util.global.load @__auto.blk.18.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1333 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.6 = util.global.load @__auto.blk.18.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1334 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_v.weight.shard.7 = util.global.load @__auto.blk.18.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1335 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.18.attn_output.weight.shard.0 = util.global.load @__auto.blk.18.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1336 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.1 = util.global.load @__auto.blk.18.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1337 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.2 = util.global.load @__auto.blk.18.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1338 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.3 = util.global.load @__auto.blk.18.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1339 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.4 = util.global.load @__auto.blk.18.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1340 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.5 = util.global.load @__auto.blk.18.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1341 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.6 = util.global.load @__auto.blk.18.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1342 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.attn_output.weight.shard.7 = util.global.load @__auto.blk.18.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1343 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.18.ffn_norm.weight = util.global.load @__auto.blk.18.ffn_norm.weight : tensor<4096xf32>
    %1344 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$1 = util.global.load @__auto.blk.18.ffn_norm.weight$1 : tensor<4096xf32>
    %1345 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$2 = util.global.load @__auto.blk.18.ffn_norm.weight$2 : tensor<4096xf32>
    %1346 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$3 = util.global.load @__auto.blk.18.ffn_norm.weight$3 : tensor<4096xf32>
    %1347 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$4 = util.global.load @__auto.blk.18.ffn_norm.weight$4 : tensor<4096xf32>
    %1348 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$5 = util.global.load @__auto.blk.18.ffn_norm.weight$5 : tensor<4096xf32>
    %1349 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$6 = util.global.load @__auto.blk.18.ffn_norm.weight$6 : tensor<4096xf32>
    %1350 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_norm.weight$7 = util.global.load @__auto.blk.18.ffn_norm.weight$7 : tensor<4096xf32>
    %1351 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.18.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1352 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1353 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1354 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1355 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1356 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1357 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1358 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.18.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1359 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.0 = util.global.load @__auto.blk.18.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1360 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.1 = util.global.load @__auto.blk.18.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1361 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.2 = util.global.load @__auto.blk.18.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1362 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.3 = util.global.load @__auto.blk.18.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1363 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.4 = util.global.load @__auto.blk.18.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1364 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.5 = util.global.load @__auto.blk.18.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1365 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.6 = util.global.load @__auto.blk.18.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1366 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_up.weight.shard.7 = util.global.load @__auto.blk.18.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1367 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.18.ffn_down.weight.shard.0 = util.global.load @__auto.blk.18.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1368 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.1 = util.global.load @__auto.blk.18.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1369 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.2 = util.global.load @__auto.blk.18.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1370 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.3 = util.global.load @__auto.blk.18.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1371 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.4 = util.global.load @__auto.blk.18.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1372 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.5 = util.global.load @__auto.blk.18.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1373 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.6 = util.global.load @__auto.blk.18.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1374 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.18.ffn_down.weight.shard.7 = util.global.load @__auto.blk.18.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1375 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.attn_norm.weight = util.global.load @__auto.blk.19.attn_norm.weight : tensor<4096xf32>
    %1376 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$1 = util.global.load @__auto.blk.19.attn_norm.weight$1 : tensor<4096xf32>
    %1377 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$2 = util.global.load @__auto.blk.19.attn_norm.weight$2 : tensor<4096xf32>
    %1378 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$3 = util.global.load @__auto.blk.19.attn_norm.weight$3 : tensor<4096xf32>
    %1379 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$4 = util.global.load @__auto.blk.19.attn_norm.weight$4 : tensor<4096xf32>
    %1380 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$5 = util.global.load @__auto.blk.19.attn_norm.weight$5 : tensor<4096xf32>
    %1381 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$6 = util.global.load @__auto.blk.19.attn_norm.weight$6 : tensor<4096xf32>
    %1382 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_norm.weight$7 = util.global.load @__auto.blk.19.attn_norm.weight$7 : tensor<4096xf32>
    %1383 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.attn_q.weight.shard.0 = util.global.load @__auto.blk.19.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1384 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.1 = util.global.load @__auto.blk.19.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1385 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.2 = util.global.load @__auto.blk.19.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1386 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.3 = util.global.load @__auto.blk.19.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1387 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.4 = util.global.load @__auto.blk.19.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1388 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.5 = util.global.load @__auto.blk.19.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1389 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.6 = util.global.load @__auto.blk.19.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1390 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_q.weight.shard.7 = util.global.load @__auto.blk.19.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1391 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.0 = util.global.load @__auto.blk.19.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1392 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.1 = util.global.load @__auto.blk.19.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1393 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.2 = util.global.load @__auto.blk.19.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1394 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.3 = util.global.load @__auto.blk.19.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1395 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.4 = util.global.load @__auto.blk.19.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1396 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.5 = util.global.load @__auto.blk.19.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1397 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.6 = util.global.load @__auto.blk.19.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1398 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_k.weight.shard.7 = util.global.load @__auto.blk.19.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1399 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.0 = util.global.load @__auto.blk.19.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1400 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.1 = util.global.load @__auto.blk.19.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1401 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.2 = util.global.load @__auto.blk.19.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1402 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.3 = util.global.load @__auto.blk.19.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1403 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.4 = util.global.load @__auto.blk.19.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1404 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.5 = util.global.load @__auto.blk.19.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1405 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.6 = util.global.load @__auto.blk.19.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1406 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_v.weight.shard.7 = util.global.load @__auto.blk.19.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1407 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.19.attn_output.weight.shard.0 = util.global.load @__auto.blk.19.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1408 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.1 = util.global.load @__auto.blk.19.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1409 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.2 = util.global.load @__auto.blk.19.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1410 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.3 = util.global.load @__auto.blk.19.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1411 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.4 = util.global.load @__auto.blk.19.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1412 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.5 = util.global.load @__auto.blk.19.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1413 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.6 = util.global.load @__auto.blk.19.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1414 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.attn_output.weight.shard.7 = util.global.load @__auto.blk.19.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1415 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.19.ffn_norm.weight = util.global.load @__auto.blk.19.ffn_norm.weight : tensor<4096xf32>
    %1416 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$1 = util.global.load @__auto.blk.19.ffn_norm.weight$1 : tensor<4096xf32>
    %1417 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$2 = util.global.load @__auto.blk.19.ffn_norm.weight$2 : tensor<4096xf32>
    %1418 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$3 = util.global.load @__auto.blk.19.ffn_norm.weight$3 : tensor<4096xf32>
    %1419 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$4 = util.global.load @__auto.blk.19.ffn_norm.weight$4 : tensor<4096xf32>
    %1420 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$5 = util.global.load @__auto.blk.19.ffn_norm.weight$5 : tensor<4096xf32>
    %1421 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$6 = util.global.load @__auto.blk.19.ffn_norm.weight$6 : tensor<4096xf32>
    %1422 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_norm.weight$7 = util.global.load @__auto.blk.19.ffn_norm.weight$7 : tensor<4096xf32>
    %1423 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.19.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1424 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1425 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1426 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1427 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1428 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1429 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1430 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.19.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1431 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.0 = util.global.load @__auto.blk.19.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1432 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.1 = util.global.load @__auto.blk.19.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1433 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.2 = util.global.load @__auto.blk.19.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1434 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.3 = util.global.load @__auto.blk.19.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1435 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.4 = util.global.load @__auto.blk.19.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1436 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.5 = util.global.load @__auto.blk.19.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1437 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.6 = util.global.load @__auto.blk.19.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1438 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_up.weight.shard.7 = util.global.load @__auto.blk.19.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1439 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.19.ffn_down.weight.shard.0 = util.global.load @__auto.blk.19.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1440 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.1 = util.global.load @__auto.blk.19.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1441 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.2 = util.global.load @__auto.blk.19.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1442 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.3 = util.global.load @__auto.blk.19.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1443 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.4 = util.global.load @__auto.blk.19.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1444 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.5 = util.global.load @__auto.blk.19.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1445 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.6 = util.global.load @__auto.blk.19.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1446 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.19.ffn_down.weight.shard.7 = util.global.load @__auto.blk.19.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1447 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.attn_norm.weight = util.global.load @__auto.blk.20.attn_norm.weight : tensor<4096xf32>
    %1448 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$1 = util.global.load @__auto.blk.20.attn_norm.weight$1 : tensor<4096xf32>
    %1449 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$2 = util.global.load @__auto.blk.20.attn_norm.weight$2 : tensor<4096xf32>
    %1450 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$3 = util.global.load @__auto.blk.20.attn_norm.weight$3 : tensor<4096xf32>
    %1451 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$4 = util.global.load @__auto.blk.20.attn_norm.weight$4 : tensor<4096xf32>
    %1452 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$5 = util.global.load @__auto.blk.20.attn_norm.weight$5 : tensor<4096xf32>
    %1453 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$6 = util.global.load @__auto.blk.20.attn_norm.weight$6 : tensor<4096xf32>
    %1454 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_norm.weight$7 = util.global.load @__auto.blk.20.attn_norm.weight$7 : tensor<4096xf32>
    %1455 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.attn_q.weight.shard.0 = util.global.load @__auto.blk.20.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1456 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.1 = util.global.load @__auto.blk.20.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1457 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.2 = util.global.load @__auto.blk.20.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1458 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.3 = util.global.load @__auto.blk.20.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1459 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.4 = util.global.load @__auto.blk.20.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1460 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.5 = util.global.load @__auto.blk.20.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1461 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.6 = util.global.load @__auto.blk.20.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1462 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_q.weight.shard.7 = util.global.load @__auto.blk.20.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1463 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.0 = util.global.load @__auto.blk.20.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1464 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.1 = util.global.load @__auto.blk.20.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1465 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.2 = util.global.load @__auto.blk.20.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1466 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.3 = util.global.load @__auto.blk.20.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1467 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.4 = util.global.load @__auto.blk.20.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1468 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.5 = util.global.load @__auto.blk.20.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1469 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.6 = util.global.load @__auto.blk.20.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1470 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_k.weight.shard.7 = util.global.load @__auto.blk.20.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1471 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.0 = util.global.load @__auto.blk.20.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1472 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.1 = util.global.load @__auto.blk.20.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1473 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.2 = util.global.load @__auto.blk.20.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1474 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.3 = util.global.load @__auto.blk.20.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1475 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.4 = util.global.load @__auto.blk.20.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1476 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.5 = util.global.load @__auto.blk.20.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1477 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.6 = util.global.load @__auto.blk.20.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1478 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_v.weight.shard.7 = util.global.load @__auto.blk.20.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1479 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.20.attn_output.weight.shard.0 = util.global.load @__auto.blk.20.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1480 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.1 = util.global.load @__auto.blk.20.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1481 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.2 = util.global.load @__auto.blk.20.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1482 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.3 = util.global.load @__auto.blk.20.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1483 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.4 = util.global.load @__auto.blk.20.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1484 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.5 = util.global.load @__auto.blk.20.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1485 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.6 = util.global.load @__auto.blk.20.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1486 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.attn_output.weight.shard.7 = util.global.load @__auto.blk.20.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1487 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.20.ffn_norm.weight = util.global.load @__auto.blk.20.ffn_norm.weight : tensor<4096xf32>
    %1488 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$1 = util.global.load @__auto.blk.20.ffn_norm.weight$1 : tensor<4096xf32>
    %1489 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$2 = util.global.load @__auto.blk.20.ffn_norm.weight$2 : tensor<4096xf32>
    %1490 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$3 = util.global.load @__auto.blk.20.ffn_norm.weight$3 : tensor<4096xf32>
    %1491 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$4 = util.global.load @__auto.blk.20.ffn_norm.weight$4 : tensor<4096xf32>
    %1492 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$5 = util.global.load @__auto.blk.20.ffn_norm.weight$5 : tensor<4096xf32>
    %1493 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$6 = util.global.load @__auto.blk.20.ffn_norm.weight$6 : tensor<4096xf32>
    %1494 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_norm.weight$7 = util.global.load @__auto.blk.20.ffn_norm.weight$7 : tensor<4096xf32>
    %1495 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.20.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1496 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1497 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1498 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1499 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1500 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1501 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1502 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.20.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1503 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.0 = util.global.load @__auto.blk.20.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1504 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.1 = util.global.load @__auto.blk.20.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1505 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.2 = util.global.load @__auto.blk.20.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1506 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.3 = util.global.load @__auto.blk.20.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1507 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.4 = util.global.load @__auto.blk.20.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1508 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.5 = util.global.load @__auto.blk.20.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1509 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.6 = util.global.load @__auto.blk.20.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1510 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_up.weight.shard.7 = util.global.load @__auto.blk.20.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1511 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.20.ffn_down.weight.shard.0 = util.global.load @__auto.blk.20.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1512 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.1 = util.global.load @__auto.blk.20.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1513 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.2 = util.global.load @__auto.blk.20.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1514 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.3 = util.global.load @__auto.blk.20.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1515 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.4 = util.global.load @__auto.blk.20.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1516 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.5 = util.global.load @__auto.blk.20.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1517 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.6 = util.global.load @__auto.blk.20.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1518 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.20.ffn_down.weight.shard.7 = util.global.load @__auto.blk.20.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1519 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.attn_norm.weight = util.global.load @__auto.blk.21.attn_norm.weight : tensor<4096xf32>
    %1520 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$1 = util.global.load @__auto.blk.21.attn_norm.weight$1 : tensor<4096xf32>
    %1521 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$2 = util.global.load @__auto.blk.21.attn_norm.weight$2 : tensor<4096xf32>
    %1522 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$3 = util.global.load @__auto.blk.21.attn_norm.weight$3 : tensor<4096xf32>
    %1523 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$4 = util.global.load @__auto.blk.21.attn_norm.weight$4 : tensor<4096xf32>
    %1524 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$5 = util.global.load @__auto.blk.21.attn_norm.weight$5 : tensor<4096xf32>
    %1525 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$6 = util.global.load @__auto.blk.21.attn_norm.weight$6 : tensor<4096xf32>
    %1526 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_norm.weight$7 = util.global.load @__auto.blk.21.attn_norm.weight$7 : tensor<4096xf32>
    %1527 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.attn_q.weight.shard.0 = util.global.load @__auto.blk.21.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1528 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.1 = util.global.load @__auto.blk.21.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1529 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.2 = util.global.load @__auto.blk.21.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1530 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.3 = util.global.load @__auto.blk.21.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1531 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.4 = util.global.load @__auto.blk.21.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1532 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.5 = util.global.load @__auto.blk.21.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1533 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.6 = util.global.load @__auto.blk.21.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1534 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_q.weight.shard.7 = util.global.load @__auto.blk.21.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1535 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.0 = util.global.load @__auto.blk.21.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1536 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.1 = util.global.load @__auto.blk.21.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1537 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.2 = util.global.load @__auto.blk.21.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1538 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.3 = util.global.load @__auto.blk.21.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1539 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.4 = util.global.load @__auto.blk.21.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1540 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.5 = util.global.load @__auto.blk.21.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1541 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.6 = util.global.load @__auto.blk.21.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1542 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_k.weight.shard.7 = util.global.load @__auto.blk.21.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1543 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.0 = util.global.load @__auto.blk.21.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1544 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.1 = util.global.load @__auto.blk.21.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1545 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.2 = util.global.load @__auto.blk.21.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1546 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.3 = util.global.load @__auto.blk.21.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1547 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.4 = util.global.load @__auto.blk.21.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1548 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.5 = util.global.load @__auto.blk.21.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1549 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.6 = util.global.load @__auto.blk.21.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1550 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_v.weight.shard.7 = util.global.load @__auto.blk.21.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1551 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.21.attn_output.weight.shard.0 = util.global.load @__auto.blk.21.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1552 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.1 = util.global.load @__auto.blk.21.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1553 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.2 = util.global.load @__auto.blk.21.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1554 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.3 = util.global.load @__auto.blk.21.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1555 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.4 = util.global.load @__auto.blk.21.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1556 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.5 = util.global.load @__auto.blk.21.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1557 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.6 = util.global.load @__auto.blk.21.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1558 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.attn_output.weight.shard.7 = util.global.load @__auto.blk.21.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1559 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.21.ffn_norm.weight = util.global.load @__auto.blk.21.ffn_norm.weight : tensor<4096xf32>
    %1560 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$1 = util.global.load @__auto.blk.21.ffn_norm.weight$1 : tensor<4096xf32>
    %1561 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$2 = util.global.load @__auto.blk.21.ffn_norm.weight$2 : tensor<4096xf32>
    %1562 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$3 = util.global.load @__auto.blk.21.ffn_norm.weight$3 : tensor<4096xf32>
    %1563 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$4 = util.global.load @__auto.blk.21.ffn_norm.weight$4 : tensor<4096xf32>
    %1564 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$5 = util.global.load @__auto.blk.21.ffn_norm.weight$5 : tensor<4096xf32>
    %1565 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$6 = util.global.load @__auto.blk.21.ffn_norm.weight$6 : tensor<4096xf32>
    %1566 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_norm.weight$7 = util.global.load @__auto.blk.21.ffn_norm.weight$7 : tensor<4096xf32>
    %1567 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.21.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1568 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1569 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1570 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1571 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1572 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1573 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1574 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.21.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1575 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.0 = util.global.load @__auto.blk.21.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1576 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.1 = util.global.load @__auto.blk.21.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1577 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.2 = util.global.load @__auto.blk.21.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1578 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.3 = util.global.load @__auto.blk.21.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1579 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.4 = util.global.load @__auto.blk.21.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1580 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.5 = util.global.load @__auto.blk.21.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1581 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.6 = util.global.load @__auto.blk.21.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1582 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_up.weight.shard.7 = util.global.load @__auto.blk.21.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1583 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.21.ffn_down.weight.shard.0 = util.global.load @__auto.blk.21.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1584 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.1 = util.global.load @__auto.blk.21.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1585 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.2 = util.global.load @__auto.blk.21.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1586 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.3 = util.global.load @__auto.blk.21.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1587 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.4 = util.global.load @__auto.blk.21.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1588 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.5 = util.global.load @__auto.blk.21.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1589 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.6 = util.global.load @__auto.blk.21.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1590 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.21.ffn_down.weight.shard.7 = util.global.load @__auto.blk.21.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1591 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.attn_norm.weight = util.global.load @__auto.blk.22.attn_norm.weight : tensor<4096xf32>
    %1592 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$1 = util.global.load @__auto.blk.22.attn_norm.weight$1 : tensor<4096xf32>
    %1593 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$2 = util.global.load @__auto.blk.22.attn_norm.weight$2 : tensor<4096xf32>
    %1594 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$3 = util.global.load @__auto.blk.22.attn_norm.weight$3 : tensor<4096xf32>
    %1595 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$4 = util.global.load @__auto.blk.22.attn_norm.weight$4 : tensor<4096xf32>
    %1596 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$5 = util.global.load @__auto.blk.22.attn_norm.weight$5 : tensor<4096xf32>
    %1597 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$6 = util.global.load @__auto.blk.22.attn_norm.weight$6 : tensor<4096xf32>
    %1598 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_norm.weight$7 = util.global.load @__auto.blk.22.attn_norm.weight$7 : tensor<4096xf32>
    %1599 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.attn_q.weight.shard.0 = util.global.load @__auto.blk.22.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1600 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.1 = util.global.load @__auto.blk.22.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1601 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.2 = util.global.load @__auto.blk.22.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1602 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.3 = util.global.load @__auto.blk.22.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1603 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.4 = util.global.load @__auto.blk.22.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1604 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.5 = util.global.load @__auto.blk.22.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1605 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.6 = util.global.load @__auto.blk.22.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1606 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_q.weight.shard.7 = util.global.load @__auto.blk.22.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1607 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.0 = util.global.load @__auto.blk.22.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1608 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.1 = util.global.load @__auto.blk.22.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1609 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.2 = util.global.load @__auto.blk.22.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1610 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.3 = util.global.load @__auto.blk.22.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1611 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.4 = util.global.load @__auto.blk.22.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1612 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.5 = util.global.load @__auto.blk.22.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1613 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.6 = util.global.load @__auto.blk.22.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1614 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_k.weight.shard.7 = util.global.load @__auto.blk.22.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1615 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.0 = util.global.load @__auto.blk.22.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1616 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.1 = util.global.load @__auto.blk.22.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1617 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.2 = util.global.load @__auto.blk.22.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1618 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.3 = util.global.load @__auto.blk.22.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1619 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.4 = util.global.load @__auto.blk.22.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1620 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.5 = util.global.load @__auto.blk.22.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1621 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.6 = util.global.load @__auto.blk.22.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1622 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_v.weight.shard.7 = util.global.load @__auto.blk.22.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1623 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.22.attn_output.weight.shard.0 = util.global.load @__auto.blk.22.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1624 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.1 = util.global.load @__auto.blk.22.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1625 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.2 = util.global.load @__auto.blk.22.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1626 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.3 = util.global.load @__auto.blk.22.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1627 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.4 = util.global.load @__auto.blk.22.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1628 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.5 = util.global.load @__auto.blk.22.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1629 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.6 = util.global.load @__auto.blk.22.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1630 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.attn_output.weight.shard.7 = util.global.load @__auto.blk.22.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1631 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.22.ffn_norm.weight = util.global.load @__auto.blk.22.ffn_norm.weight : tensor<4096xf32>
    %1632 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$1 = util.global.load @__auto.blk.22.ffn_norm.weight$1 : tensor<4096xf32>
    %1633 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$2 = util.global.load @__auto.blk.22.ffn_norm.weight$2 : tensor<4096xf32>
    %1634 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$3 = util.global.load @__auto.blk.22.ffn_norm.weight$3 : tensor<4096xf32>
    %1635 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$4 = util.global.load @__auto.blk.22.ffn_norm.weight$4 : tensor<4096xf32>
    %1636 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$5 = util.global.load @__auto.blk.22.ffn_norm.weight$5 : tensor<4096xf32>
    %1637 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$6 = util.global.load @__auto.blk.22.ffn_norm.weight$6 : tensor<4096xf32>
    %1638 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_norm.weight$7 = util.global.load @__auto.blk.22.ffn_norm.weight$7 : tensor<4096xf32>
    %1639 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.22.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1640 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1641 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1642 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1643 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1644 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1645 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1646 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.22.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1647 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.0 = util.global.load @__auto.blk.22.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1648 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.1 = util.global.load @__auto.blk.22.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1649 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.2 = util.global.load @__auto.blk.22.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1650 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.3 = util.global.load @__auto.blk.22.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1651 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.4 = util.global.load @__auto.blk.22.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1652 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.5 = util.global.load @__auto.blk.22.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1653 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.6 = util.global.load @__auto.blk.22.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1654 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_up.weight.shard.7 = util.global.load @__auto.blk.22.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1655 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.22.ffn_down.weight.shard.0 = util.global.load @__auto.blk.22.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1656 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.1 = util.global.load @__auto.blk.22.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1657 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.2 = util.global.load @__auto.blk.22.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1658 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.3 = util.global.load @__auto.blk.22.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1659 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.4 = util.global.load @__auto.blk.22.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1660 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.5 = util.global.load @__auto.blk.22.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1661 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.6 = util.global.load @__auto.blk.22.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1662 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.22.ffn_down.weight.shard.7 = util.global.load @__auto.blk.22.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1663 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.attn_norm.weight = util.global.load @__auto.blk.23.attn_norm.weight : tensor<4096xf32>
    %1664 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$1 = util.global.load @__auto.blk.23.attn_norm.weight$1 : tensor<4096xf32>
    %1665 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$2 = util.global.load @__auto.blk.23.attn_norm.weight$2 : tensor<4096xf32>
    %1666 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$3 = util.global.load @__auto.blk.23.attn_norm.weight$3 : tensor<4096xf32>
    %1667 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$4 = util.global.load @__auto.blk.23.attn_norm.weight$4 : tensor<4096xf32>
    %1668 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$5 = util.global.load @__auto.blk.23.attn_norm.weight$5 : tensor<4096xf32>
    %1669 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$6 = util.global.load @__auto.blk.23.attn_norm.weight$6 : tensor<4096xf32>
    %1670 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_norm.weight$7 = util.global.load @__auto.blk.23.attn_norm.weight$7 : tensor<4096xf32>
    %1671 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.attn_q.weight.shard.0 = util.global.load @__auto.blk.23.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1672 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.1 = util.global.load @__auto.blk.23.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1673 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.2 = util.global.load @__auto.blk.23.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1674 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.3 = util.global.load @__auto.blk.23.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1675 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.4 = util.global.load @__auto.blk.23.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1676 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.5 = util.global.load @__auto.blk.23.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1677 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.6 = util.global.load @__auto.blk.23.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1678 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_q.weight.shard.7 = util.global.load @__auto.blk.23.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1679 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.0 = util.global.load @__auto.blk.23.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1680 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.1 = util.global.load @__auto.blk.23.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1681 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.2 = util.global.load @__auto.blk.23.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1682 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.3 = util.global.load @__auto.blk.23.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1683 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.4 = util.global.load @__auto.blk.23.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1684 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.5 = util.global.load @__auto.blk.23.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1685 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.6 = util.global.load @__auto.blk.23.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1686 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_k.weight.shard.7 = util.global.load @__auto.blk.23.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1687 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.0 = util.global.load @__auto.blk.23.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1688 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.1 = util.global.load @__auto.blk.23.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1689 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.2 = util.global.load @__auto.blk.23.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1690 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.3 = util.global.load @__auto.blk.23.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1691 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.4 = util.global.load @__auto.blk.23.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1692 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.5 = util.global.load @__auto.blk.23.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1693 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.6 = util.global.load @__auto.blk.23.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1694 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_v.weight.shard.7 = util.global.load @__auto.blk.23.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1695 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.23.attn_output.weight.shard.0 = util.global.load @__auto.blk.23.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1696 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.1 = util.global.load @__auto.blk.23.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1697 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.2 = util.global.load @__auto.blk.23.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1698 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.3 = util.global.load @__auto.blk.23.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1699 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.4 = util.global.load @__auto.blk.23.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1700 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.5 = util.global.load @__auto.blk.23.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1701 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.6 = util.global.load @__auto.blk.23.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1702 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.attn_output.weight.shard.7 = util.global.load @__auto.blk.23.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1703 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.23.ffn_norm.weight = util.global.load @__auto.blk.23.ffn_norm.weight : tensor<4096xf32>
    %1704 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$1 = util.global.load @__auto.blk.23.ffn_norm.weight$1 : tensor<4096xf32>
    %1705 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$2 = util.global.load @__auto.blk.23.ffn_norm.weight$2 : tensor<4096xf32>
    %1706 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$3 = util.global.load @__auto.blk.23.ffn_norm.weight$3 : tensor<4096xf32>
    %1707 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$4 = util.global.load @__auto.blk.23.ffn_norm.weight$4 : tensor<4096xf32>
    %1708 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$5 = util.global.load @__auto.blk.23.ffn_norm.weight$5 : tensor<4096xf32>
    %1709 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$6 = util.global.load @__auto.blk.23.ffn_norm.weight$6 : tensor<4096xf32>
    %1710 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_norm.weight$7 = util.global.load @__auto.blk.23.ffn_norm.weight$7 : tensor<4096xf32>
    %1711 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.23.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1712 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1713 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1714 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1715 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1716 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1717 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1718 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.23.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1719 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.0 = util.global.load @__auto.blk.23.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1720 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.1 = util.global.load @__auto.blk.23.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1721 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.2 = util.global.load @__auto.blk.23.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1722 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.3 = util.global.load @__auto.blk.23.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1723 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.4 = util.global.load @__auto.blk.23.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1724 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.5 = util.global.load @__auto.blk.23.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1725 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.6 = util.global.load @__auto.blk.23.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1726 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_up.weight.shard.7 = util.global.load @__auto.blk.23.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1727 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.23.ffn_down.weight.shard.0 = util.global.load @__auto.blk.23.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1728 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.1 = util.global.load @__auto.blk.23.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1729 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.2 = util.global.load @__auto.blk.23.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1730 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.3 = util.global.load @__auto.blk.23.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1731 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.4 = util.global.load @__auto.blk.23.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1732 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.5 = util.global.load @__auto.blk.23.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1733 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.6 = util.global.load @__auto.blk.23.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1734 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.23.ffn_down.weight.shard.7 = util.global.load @__auto.blk.23.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1735 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.attn_norm.weight = util.global.load @__auto.blk.24.attn_norm.weight : tensor<4096xf32>
    %1736 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$1 = util.global.load @__auto.blk.24.attn_norm.weight$1 : tensor<4096xf32>
    %1737 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$2 = util.global.load @__auto.blk.24.attn_norm.weight$2 : tensor<4096xf32>
    %1738 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$3 = util.global.load @__auto.blk.24.attn_norm.weight$3 : tensor<4096xf32>
    %1739 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$4 = util.global.load @__auto.blk.24.attn_norm.weight$4 : tensor<4096xf32>
    %1740 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$5 = util.global.load @__auto.blk.24.attn_norm.weight$5 : tensor<4096xf32>
    %1741 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$6 = util.global.load @__auto.blk.24.attn_norm.weight$6 : tensor<4096xf32>
    %1742 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_norm.weight$7 = util.global.load @__auto.blk.24.attn_norm.weight$7 : tensor<4096xf32>
    %1743 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.attn_q.weight.shard.0 = util.global.load @__auto.blk.24.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1744 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.1 = util.global.load @__auto.blk.24.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1745 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.2 = util.global.load @__auto.blk.24.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1746 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.3 = util.global.load @__auto.blk.24.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1747 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.4 = util.global.load @__auto.blk.24.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1748 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.5 = util.global.load @__auto.blk.24.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1749 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.6 = util.global.load @__auto.blk.24.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1750 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_q.weight.shard.7 = util.global.load @__auto.blk.24.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1751 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.0 = util.global.load @__auto.blk.24.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1752 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.1 = util.global.load @__auto.blk.24.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1753 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.2 = util.global.load @__auto.blk.24.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1754 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.3 = util.global.load @__auto.blk.24.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1755 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.4 = util.global.load @__auto.blk.24.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1756 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.5 = util.global.load @__auto.blk.24.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1757 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.6 = util.global.load @__auto.blk.24.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1758 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_k.weight.shard.7 = util.global.load @__auto.blk.24.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1759 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.0 = util.global.load @__auto.blk.24.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1760 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.1 = util.global.load @__auto.blk.24.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1761 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.2 = util.global.load @__auto.blk.24.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1762 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.3 = util.global.load @__auto.blk.24.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1763 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.4 = util.global.load @__auto.blk.24.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1764 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.5 = util.global.load @__auto.blk.24.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1765 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.6 = util.global.load @__auto.blk.24.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1766 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_v.weight.shard.7 = util.global.load @__auto.blk.24.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1767 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.24.attn_output.weight.shard.0 = util.global.load @__auto.blk.24.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1768 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.1 = util.global.load @__auto.blk.24.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1769 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.2 = util.global.load @__auto.blk.24.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1770 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.3 = util.global.load @__auto.blk.24.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1771 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.4 = util.global.load @__auto.blk.24.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1772 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.5 = util.global.load @__auto.blk.24.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1773 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.6 = util.global.load @__auto.blk.24.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1774 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.attn_output.weight.shard.7 = util.global.load @__auto.blk.24.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1775 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.24.ffn_norm.weight = util.global.load @__auto.blk.24.ffn_norm.weight : tensor<4096xf32>
    %1776 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$1 = util.global.load @__auto.blk.24.ffn_norm.weight$1 : tensor<4096xf32>
    %1777 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$2 = util.global.load @__auto.blk.24.ffn_norm.weight$2 : tensor<4096xf32>
    %1778 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$3 = util.global.load @__auto.blk.24.ffn_norm.weight$3 : tensor<4096xf32>
    %1779 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$4 = util.global.load @__auto.blk.24.ffn_norm.weight$4 : tensor<4096xf32>
    %1780 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$5 = util.global.load @__auto.blk.24.ffn_norm.weight$5 : tensor<4096xf32>
    %1781 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$6 = util.global.load @__auto.blk.24.ffn_norm.weight$6 : tensor<4096xf32>
    %1782 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_norm.weight$7 = util.global.load @__auto.blk.24.ffn_norm.weight$7 : tensor<4096xf32>
    %1783 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.24.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1784 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1785 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1786 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1787 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1788 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1789 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1790 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.24.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1791 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.0 = util.global.load @__auto.blk.24.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1792 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.1 = util.global.load @__auto.blk.24.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1793 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.2 = util.global.load @__auto.blk.24.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1794 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.3 = util.global.load @__auto.blk.24.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1795 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.4 = util.global.load @__auto.blk.24.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1796 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.5 = util.global.load @__auto.blk.24.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1797 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.6 = util.global.load @__auto.blk.24.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1798 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_up.weight.shard.7 = util.global.load @__auto.blk.24.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1799 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.24.ffn_down.weight.shard.0 = util.global.load @__auto.blk.24.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1800 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.1 = util.global.load @__auto.blk.24.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1801 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.2 = util.global.load @__auto.blk.24.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1802 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.3 = util.global.load @__auto.blk.24.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1803 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.4 = util.global.load @__auto.blk.24.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1804 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.5 = util.global.load @__auto.blk.24.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1805 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.6 = util.global.load @__auto.blk.24.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1806 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.24.ffn_down.weight.shard.7 = util.global.load @__auto.blk.24.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1807 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.attn_norm.weight = util.global.load @__auto.blk.25.attn_norm.weight : tensor<4096xf32>
    %1808 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$1 = util.global.load @__auto.blk.25.attn_norm.weight$1 : tensor<4096xf32>
    %1809 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$2 = util.global.load @__auto.blk.25.attn_norm.weight$2 : tensor<4096xf32>
    %1810 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$3 = util.global.load @__auto.blk.25.attn_norm.weight$3 : tensor<4096xf32>
    %1811 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$4 = util.global.load @__auto.blk.25.attn_norm.weight$4 : tensor<4096xf32>
    %1812 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$5 = util.global.load @__auto.blk.25.attn_norm.weight$5 : tensor<4096xf32>
    %1813 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$6 = util.global.load @__auto.blk.25.attn_norm.weight$6 : tensor<4096xf32>
    %1814 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_norm.weight$7 = util.global.load @__auto.blk.25.attn_norm.weight$7 : tensor<4096xf32>
    %1815 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.attn_q.weight.shard.0 = util.global.load @__auto.blk.25.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1816 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.1 = util.global.load @__auto.blk.25.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1817 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.2 = util.global.load @__auto.blk.25.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1818 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.3 = util.global.load @__auto.blk.25.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1819 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.4 = util.global.load @__auto.blk.25.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1820 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.5 = util.global.load @__auto.blk.25.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1821 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.6 = util.global.load @__auto.blk.25.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1822 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_q.weight.shard.7 = util.global.load @__auto.blk.25.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1823 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.0 = util.global.load @__auto.blk.25.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1824 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.1 = util.global.load @__auto.blk.25.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1825 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.2 = util.global.load @__auto.blk.25.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1826 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.3 = util.global.load @__auto.blk.25.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1827 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.4 = util.global.load @__auto.blk.25.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1828 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.5 = util.global.load @__auto.blk.25.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1829 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.6 = util.global.load @__auto.blk.25.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1830 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_k.weight.shard.7 = util.global.load @__auto.blk.25.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1831 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.0 = util.global.load @__auto.blk.25.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1832 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.1 = util.global.load @__auto.blk.25.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1833 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.2 = util.global.load @__auto.blk.25.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1834 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.3 = util.global.load @__auto.blk.25.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1835 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.4 = util.global.load @__auto.blk.25.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1836 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.5 = util.global.load @__auto.blk.25.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1837 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.6 = util.global.load @__auto.blk.25.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1838 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_v.weight.shard.7 = util.global.load @__auto.blk.25.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1839 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.25.attn_output.weight.shard.0 = util.global.load @__auto.blk.25.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1840 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.1 = util.global.load @__auto.blk.25.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1841 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.2 = util.global.load @__auto.blk.25.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1842 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.3 = util.global.load @__auto.blk.25.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1843 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.4 = util.global.load @__auto.blk.25.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1844 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.5 = util.global.load @__auto.blk.25.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1845 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.6 = util.global.load @__auto.blk.25.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1846 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.attn_output.weight.shard.7 = util.global.load @__auto.blk.25.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1847 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.25.ffn_norm.weight = util.global.load @__auto.blk.25.ffn_norm.weight : tensor<4096xf32>
    %1848 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$1 = util.global.load @__auto.blk.25.ffn_norm.weight$1 : tensor<4096xf32>
    %1849 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$2 = util.global.load @__auto.blk.25.ffn_norm.weight$2 : tensor<4096xf32>
    %1850 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$3 = util.global.load @__auto.blk.25.ffn_norm.weight$3 : tensor<4096xf32>
    %1851 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$4 = util.global.load @__auto.blk.25.ffn_norm.weight$4 : tensor<4096xf32>
    %1852 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$5 = util.global.load @__auto.blk.25.ffn_norm.weight$5 : tensor<4096xf32>
    %1853 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$6 = util.global.load @__auto.blk.25.ffn_norm.weight$6 : tensor<4096xf32>
    %1854 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_norm.weight$7 = util.global.load @__auto.blk.25.ffn_norm.weight$7 : tensor<4096xf32>
    %1855 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.25.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1856 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1857 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1858 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1859 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1860 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1861 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1862 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.25.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1863 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.0 = util.global.load @__auto.blk.25.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1864 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.1 = util.global.load @__auto.blk.25.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1865 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.2 = util.global.load @__auto.blk.25.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1866 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.3 = util.global.load @__auto.blk.25.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1867 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.4 = util.global.load @__auto.blk.25.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1868 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.5 = util.global.load @__auto.blk.25.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1869 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.6 = util.global.load @__auto.blk.25.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1870 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_up.weight.shard.7 = util.global.load @__auto.blk.25.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1871 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.25.ffn_down.weight.shard.0 = util.global.load @__auto.blk.25.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1872 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.1 = util.global.load @__auto.blk.25.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1873 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.2 = util.global.load @__auto.blk.25.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1874 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.3 = util.global.load @__auto.blk.25.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1875 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.4 = util.global.load @__auto.blk.25.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1876 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.5 = util.global.load @__auto.blk.25.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1877 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.6 = util.global.load @__auto.blk.25.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1878 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.25.ffn_down.weight.shard.7 = util.global.load @__auto.blk.25.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1879 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.attn_norm.weight = util.global.load @__auto.blk.26.attn_norm.weight : tensor<4096xf32>
    %1880 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$1 = util.global.load @__auto.blk.26.attn_norm.weight$1 : tensor<4096xf32>
    %1881 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$2 = util.global.load @__auto.blk.26.attn_norm.weight$2 : tensor<4096xf32>
    %1882 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$3 = util.global.load @__auto.blk.26.attn_norm.weight$3 : tensor<4096xf32>
    %1883 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$4 = util.global.load @__auto.blk.26.attn_norm.weight$4 : tensor<4096xf32>
    %1884 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$5 = util.global.load @__auto.blk.26.attn_norm.weight$5 : tensor<4096xf32>
    %1885 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$6 = util.global.load @__auto.blk.26.attn_norm.weight$6 : tensor<4096xf32>
    %1886 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_norm.weight$7 = util.global.load @__auto.blk.26.attn_norm.weight$7 : tensor<4096xf32>
    %1887 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.attn_q.weight.shard.0 = util.global.load @__auto.blk.26.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1888 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.1 = util.global.load @__auto.blk.26.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1889 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.2 = util.global.load @__auto.blk.26.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1890 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.3 = util.global.load @__auto.blk.26.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1891 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.4 = util.global.load @__auto.blk.26.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1892 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.5 = util.global.load @__auto.blk.26.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1893 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.6 = util.global.load @__auto.blk.26.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1894 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_q.weight.shard.7 = util.global.load @__auto.blk.26.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1895 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.0 = util.global.load @__auto.blk.26.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1896 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.1 = util.global.load @__auto.blk.26.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1897 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.2 = util.global.load @__auto.blk.26.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1898 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.3 = util.global.load @__auto.blk.26.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1899 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.4 = util.global.load @__auto.blk.26.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1900 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.5 = util.global.load @__auto.blk.26.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1901 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.6 = util.global.load @__auto.blk.26.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1902 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_k.weight.shard.7 = util.global.load @__auto.blk.26.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1903 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.0 = util.global.load @__auto.blk.26.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1904 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.1 = util.global.load @__auto.blk.26.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1905 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.2 = util.global.load @__auto.blk.26.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1906 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.3 = util.global.load @__auto.blk.26.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1907 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.4 = util.global.load @__auto.blk.26.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1908 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.5 = util.global.load @__auto.blk.26.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1909 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.6 = util.global.load @__auto.blk.26.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1910 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_v.weight.shard.7 = util.global.load @__auto.blk.26.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1911 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.26.attn_output.weight.shard.0 = util.global.load @__auto.blk.26.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1912 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.1 = util.global.load @__auto.blk.26.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1913 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.2 = util.global.load @__auto.blk.26.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1914 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.3 = util.global.load @__auto.blk.26.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1915 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.4 = util.global.load @__auto.blk.26.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1916 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.5 = util.global.load @__auto.blk.26.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1917 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.6 = util.global.load @__auto.blk.26.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1918 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.attn_output.weight.shard.7 = util.global.load @__auto.blk.26.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1919 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.26.ffn_norm.weight = util.global.load @__auto.blk.26.ffn_norm.weight : tensor<4096xf32>
    %1920 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$1 = util.global.load @__auto.blk.26.ffn_norm.weight$1 : tensor<4096xf32>
    %1921 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$2 = util.global.load @__auto.blk.26.ffn_norm.weight$2 : tensor<4096xf32>
    %1922 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$3 = util.global.load @__auto.blk.26.ffn_norm.weight$3 : tensor<4096xf32>
    %1923 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$4 = util.global.load @__auto.blk.26.ffn_norm.weight$4 : tensor<4096xf32>
    %1924 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$5 = util.global.load @__auto.blk.26.ffn_norm.weight$5 : tensor<4096xf32>
    %1925 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$6 = util.global.load @__auto.blk.26.ffn_norm.weight$6 : tensor<4096xf32>
    %1926 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_norm.weight$7 = util.global.load @__auto.blk.26.ffn_norm.weight$7 : tensor<4096xf32>
    %1927 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.26.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %1928 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %1929 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %1930 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %1931 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %1932 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %1933 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %1934 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.26.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %1935 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.0 = util.global.load @__auto.blk.26.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %1936 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.1 = util.global.load @__auto.blk.26.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %1937 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.2 = util.global.load @__auto.blk.26.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %1938 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.3 = util.global.load @__auto.blk.26.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %1939 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.4 = util.global.load @__auto.blk.26.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %1940 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.5 = util.global.load @__auto.blk.26.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %1941 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.6 = util.global.load @__auto.blk.26.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %1942 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_up.weight.shard.7 = util.global.load @__auto.blk.26.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %1943 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.26.ffn_down.weight.shard.0 = util.global.load @__auto.blk.26.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %1944 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.1 = util.global.load @__auto.blk.26.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %1945 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.2 = util.global.load @__auto.blk.26.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %1946 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.3 = util.global.load @__auto.blk.26.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %1947 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.4 = util.global.load @__auto.blk.26.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %1948 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.5 = util.global.load @__auto.blk.26.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %1949 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.6 = util.global.load @__auto.blk.26.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %1950 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.26.ffn_down.weight.shard.7 = util.global.load @__auto.blk.26.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %1951 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.attn_norm.weight = util.global.load @__auto.blk.27.attn_norm.weight : tensor<4096xf32>
    %1952 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$1 = util.global.load @__auto.blk.27.attn_norm.weight$1 : tensor<4096xf32>
    %1953 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$2 = util.global.load @__auto.blk.27.attn_norm.weight$2 : tensor<4096xf32>
    %1954 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$3 = util.global.load @__auto.blk.27.attn_norm.weight$3 : tensor<4096xf32>
    %1955 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$4 = util.global.load @__auto.blk.27.attn_norm.weight$4 : tensor<4096xf32>
    %1956 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$5 = util.global.load @__auto.blk.27.attn_norm.weight$5 : tensor<4096xf32>
    %1957 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$6 = util.global.load @__auto.blk.27.attn_norm.weight$6 : tensor<4096xf32>
    %1958 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_norm.weight$7 = util.global.load @__auto.blk.27.attn_norm.weight$7 : tensor<4096xf32>
    %1959 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.attn_q.weight.shard.0 = util.global.load @__auto.blk.27.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %1960 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.1 = util.global.load @__auto.blk.27.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %1961 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.2 = util.global.load @__auto.blk.27.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %1962 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.3 = util.global.load @__auto.blk.27.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %1963 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.4 = util.global.load @__auto.blk.27.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %1964 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.5 = util.global.load @__auto.blk.27.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %1965 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.6 = util.global.load @__auto.blk.27.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %1966 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_q.weight.shard.7 = util.global.load @__auto.blk.27.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %1967 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.0 = util.global.load @__auto.blk.27.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %1968 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.1 = util.global.load @__auto.blk.27.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %1969 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.2 = util.global.load @__auto.blk.27.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %1970 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.3 = util.global.load @__auto.blk.27.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %1971 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.4 = util.global.load @__auto.blk.27.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %1972 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.5 = util.global.load @__auto.blk.27.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %1973 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.6 = util.global.load @__auto.blk.27.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %1974 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_k.weight.shard.7 = util.global.load @__auto.blk.27.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %1975 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.0 = util.global.load @__auto.blk.27.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %1976 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.1 = util.global.load @__auto.blk.27.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %1977 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.2 = util.global.load @__auto.blk.27.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %1978 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.3 = util.global.load @__auto.blk.27.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %1979 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.4 = util.global.load @__auto.blk.27.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %1980 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.5 = util.global.load @__auto.blk.27.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %1981 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.6 = util.global.load @__auto.blk.27.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %1982 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_v.weight.shard.7 = util.global.load @__auto.blk.27.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %1983 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.27.attn_output.weight.shard.0 = util.global.load @__auto.blk.27.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %1984 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.1 = util.global.load @__auto.blk.27.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %1985 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.2 = util.global.load @__auto.blk.27.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %1986 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.3 = util.global.load @__auto.blk.27.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %1987 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.4 = util.global.load @__auto.blk.27.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %1988 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.5 = util.global.load @__auto.blk.27.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %1989 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.6 = util.global.load @__auto.blk.27.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %1990 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.attn_output.weight.shard.7 = util.global.load @__auto.blk.27.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %1991 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.27.ffn_norm.weight = util.global.load @__auto.blk.27.ffn_norm.weight : tensor<4096xf32>
    %1992 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$1 = util.global.load @__auto.blk.27.ffn_norm.weight$1 : tensor<4096xf32>
    %1993 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$2 = util.global.load @__auto.blk.27.ffn_norm.weight$2 : tensor<4096xf32>
    %1994 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$3 = util.global.load @__auto.blk.27.ffn_norm.weight$3 : tensor<4096xf32>
    %1995 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$4 = util.global.load @__auto.blk.27.ffn_norm.weight$4 : tensor<4096xf32>
    %1996 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$5 = util.global.load @__auto.blk.27.ffn_norm.weight$5 : tensor<4096xf32>
    %1997 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$6 = util.global.load @__auto.blk.27.ffn_norm.weight$6 : tensor<4096xf32>
    %1998 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_norm.weight$7 = util.global.load @__auto.blk.27.ffn_norm.weight$7 : tensor<4096xf32>
    %1999 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.27.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %2000 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %2001 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %2002 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %2003 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %2004 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %2005 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %2006 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.27.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %2007 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.0 = util.global.load @__auto.blk.27.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %2008 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.1 = util.global.load @__auto.blk.27.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %2009 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.2 = util.global.load @__auto.blk.27.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %2010 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.3 = util.global.load @__auto.blk.27.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %2011 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.4 = util.global.load @__auto.blk.27.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %2012 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.5 = util.global.load @__auto.blk.27.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %2013 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.6 = util.global.load @__auto.blk.27.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %2014 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_up.weight.shard.7 = util.global.load @__auto.blk.27.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %2015 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.27.ffn_down.weight.shard.0 = util.global.load @__auto.blk.27.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %2016 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.1 = util.global.load @__auto.blk.27.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %2017 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.2 = util.global.load @__auto.blk.27.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %2018 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.3 = util.global.load @__auto.blk.27.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %2019 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.4 = util.global.load @__auto.blk.27.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %2020 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.5 = util.global.load @__auto.blk.27.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %2021 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.6 = util.global.load @__auto.blk.27.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %2022 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.27.ffn_down.weight.shard.7 = util.global.load @__auto.blk.27.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %2023 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.attn_norm.weight = util.global.load @__auto.blk.28.attn_norm.weight : tensor<4096xf32>
    %2024 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$1 = util.global.load @__auto.blk.28.attn_norm.weight$1 : tensor<4096xf32>
    %2025 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$2 = util.global.load @__auto.blk.28.attn_norm.weight$2 : tensor<4096xf32>
    %2026 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$3 = util.global.load @__auto.blk.28.attn_norm.weight$3 : tensor<4096xf32>
    %2027 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$4 = util.global.load @__auto.blk.28.attn_norm.weight$4 : tensor<4096xf32>
    %2028 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$5 = util.global.load @__auto.blk.28.attn_norm.weight$5 : tensor<4096xf32>
    %2029 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$6 = util.global.load @__auto.blk.28.attn_norm.weight$6 : tensor<4096xf32>
    %2030 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_norm.weight$7 = util.global.load @__auto.blk.28.attn_norm.weight$7 : tensor<4096xf32>
    %2031 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.attn_q.weight.shard.0 = util.global.load @__auto.blk.28.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %2032 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.1 = util.global.load @__auto.blk.28.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %2033 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.2 = util.global.load @__auto.blk.28.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %2034 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.3 = util.global.load @__auto.blk.28.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %2035 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.4 = util.global.load @__auto.blk.28.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %2036 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.5 = util.global.load @__auto.blk.28.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %2037 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.6 = util.global.load @__auto.blk.28.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %2038 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_q.weight.shard.7 = util.global.load @__auto.blk.28.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %2039 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.0 = util.global.load @__auto.blk.28.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %2040 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.1 = util.global.load @__auto.blk.28.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %2041 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.2 = util.global.load @__auto.blk.28.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %2042 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.3 = util.global.load @__auto.blk.28.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %2043 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.4 = util.global.load @__auto.blk.28.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %2044 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.5 = util.global.load @__auto.blk.28.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %2045 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.6 = util.global.load @__auto.blk.28.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %2046 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_k.weight.shard.7 = util.global.load @__auto.blk.28.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %2047 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.0 = util.global.load @__auto.blk.28.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %2048 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.1 = util.global.load @__auto.blk.28.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %2049 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.2 = util.global.load @__auto.blk.28.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %2050 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.3 = util.global.load @__auto.blk.28.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %2051 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.4 = util.global.load @__auto.blk.28.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %2052 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.5 = util.global.load @__auto.blk.28.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %2053 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.6 = util.global.load @__auto.blk.28.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %2054 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_v.weight.shard.7 = util.global.load @__auto.blk.28.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %2055 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.28.attn_output.weight.shard.0 = util.global.load @__auto.blk.28.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %2056 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.1 = util.global.load @__auto.blk.28.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %2057 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.2 = util.global.load @__auto.blk.28.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %2058 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.3 = util.global.load @__auto.blk.28.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %2059 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.4 = util.global.load @__auto.blk.28.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %2060 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.5 = util.global.load @__auto.blk.28.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %2061 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.6 = util.global.load @__auto.blk.28.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %2062 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.attn_output.weight.shard.7 = util.global.load @__auto.blk.28.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %2063 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.28.ffn_norm.weight = util.global.load @__auto.blk.28.ffn_norm.weight : tensor<4096xf32>
    %2064 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$1 = util.global.load @__auto.blk.28.ffn_norm.weight$1 : tensor<4096xf32>
    %2065 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$2 = util.global.load @__auto.blk.28.ffn_norm.weight$2 : tensor<4096xf32>
    %2066 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$3 = util.global.load @__auto.blk.28.ffn_norm.weight$3 : tensor<4096xf32>
    %2067 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$4 = util.global.load @__auto.blk.28.ffn_norm.weight$4 : tensor<4096xf32>
    %2068 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$5 = util.global.load @__auto.blk.28.ffn_norm.weight$5 : tensor<4096xf32>
    %2069 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$6 = util.global.load @__auto.blk.28.ffn_norm.weight$6 : tensor<4096xf32>
    %2070 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_norm.weight$7 = util.global.load @__auto.blk.28.ffn_norm.weight$7 : tensor<4096xf32>
    %2071 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.28.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %2072 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %2073 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %2074 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %2075 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %2076 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %2077 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %2078 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.28.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %2079 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.0 = util.global.load @__auto.blk.28.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %2080 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.1 = util.global.load @__auto.blk.28.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %2081 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.2 = util.global.load @__auto.blk.28.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %2082 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.3 = util.global.load @__auto.blk.28.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %2083 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.4 = util.global.load @__auto.blk.28.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %2084 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.5 = util.global.load @__auto.blk.28.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %2085 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.6 = util.global.load @__auto.blk.28.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %2086 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_up.weight.shard.7 = util.global.load @__auto.blk.28.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %2087 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.28.ffn_down.weight.shard.0 = util.global.load @__auto.blk.28.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %2088 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.1 = util.global.load @__auto.blk.28.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %2089 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.2 = util.global.load @__auto.blk.28.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %2090 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.3 = util.global.load @__auto.blk.28.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %2091 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.4 = util.global.load @__auto.blk.28.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %2092 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.5 = util.global.load @__auto.blk.28.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %2093 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.6 = util.global.load @__auto.blk.28.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %2094 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.28.ffn_down.weight.shard.7 = util.global.load @__auto.blk.28.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %2095 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.attn_norm.weight = util.global.load @__auto.blk.29.attn_norm.weight : tensor<4096xf32>
    %2096 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$1 = util.global.load @__auto.blk.29.attn_norm.weight$1 : tensor<4096xf32>
    %2097 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$2 = util.global.load @__auto.blk.29.attn_norm.weight$2 : tensor<4096xf32>
    %2098 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$3 = util.global.load @__auto.blk.29.attn_norm.weight$3 : tensor<4096xf32>
    %2099 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$4 = util.global.load @__auto.blk.29.attn_norm.weight$4 : tensor<4096xf32>
    %2100 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$5 = util.global.load @__auto.blk.29.attn_norm.weight$5 : tensor<4096xf32>
    %2101 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$6 = util.global.load @__auto.blk.29.attn_norm.weight$6 : tensor<4096xf32>
    %2102 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_norm.weight$7 = util.global.load @__auto.blk.29.attn_norm.weight$7 : tensor<4096xf32>
    %2103 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.attn_q.weight.shard.0 = util.global.load @__auto.blk.29.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %2104 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.1 = util.global.load @__auto.blk.29.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %2105 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.2 = util.global.load @__auto.blk.29.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %2106 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.3 = util.global.load @__auto.blk.29.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %2107 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.4 = util.global.load @__auto.blk.29.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %2108 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.5 = util.global.load @__auto.blk.29.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %2109 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.6 = util.global.load @__auto.blk.29.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %2110 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_q.weight.shard.7 = util.global.load @__auto.blk.29.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %2111 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.0 = util.global.load @__auto.blk.29.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %2112 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.1 = util.global.load @__auto.blk.29.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %2113 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.2 = util.global.load @__auto.blk.29.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %2114 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.3 = util.global.load @__auto.blk.29.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %2115 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.4 = util.global.load @__auto.blk.29.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %2116 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.5 = util.global.load @__auto.blk.29.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %2117 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.6 = util.global.load @__auto.blk.29.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %2118 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_k.weight.shard.7 = util.global.load @__auto.blk.29.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %2119 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.0 = util.global.load @__auto.blk.29.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %2120 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.1 = util.global.load @__auto.blk.29.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %2121 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.2 = util.global.load @__auto.blk.29.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %2122 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.3 = util.global.load @__auto.blk.29.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %2123 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.4 = util.global.load @__auto.blk.29.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %2124 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.5 = util.global.load @__auto.blk.29.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %2125 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.6 = util.global.load @__auto.blk.29.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %2126 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_v.weight.shard.7 = util.global.load @__auto.blk.29.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %2127 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.29.attn_output.weight.shard.0 = util.global.load @__auto.blk.29.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %2128 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.1 = util.global.load @__auto.blk.29.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %2129 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.2 = util.global.load @__auto.blk.29.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %2130 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.3 = util.global.load @__auto.blk.29.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %2131 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.4 = util.global.load @__auto.blk.29.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %2132 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.5 = util.global.load @__auto.blk.29.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %2133 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.6 = util.global.load @__auto.blk.29.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %2134 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.attn_output.weight.shard.7 = util.global.load @__auto.blk.29.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %2135 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.29.ffn_norm.weight = util.global.load @__auto.blk.29.ffn_norm.weight : tensor<4096xf32>
    %2136 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$1 = util.global.load @__auto.blk.29.ffn_norm.weight$1 : tensor<4096xf32>
    %2137 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$2 = util.global.load @__auto.blk.29.ffn_norm.weight$2 : tensor<4096xf32>
    %2138 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$3 = util.global.load @__auto.blk.29.ffn_norm.weight$3 : tensor<4096xf32>
    %2139 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$4 = util.global.load @__auto.blk.29.ffn_norm.weight$4 : tensor<4096xf32>
    %2140 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$5 = util.global.load @__auto.blk.29.ffn_norm.weight$5 : tensor<4096xf32>
    %2141 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$6 = util.global.load @__auto.blk.29.ffn_norm.weight$6 : tensor<4096xf32>
    %2142 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_norm.weight$7 = util.global.load @__auto.blk.29.ffn_norm.weight$7 : tensor<4096xf32>
    %2143 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.29.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %2144 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %2145 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %2146 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %2147 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %2148 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %2149 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %2150 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.29.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %2151 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.0 = util.global.load @__auto.blk.29.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %2152 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.1 = util.global.load @__auto.blk.29.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %2153 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.2 = util.global.load @__auto.blk.29.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %2154 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.3 = util.global.load @__auto.blk.29.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %2155 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.4 = util.global.load @__auto.blk.29.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %2156 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.5 = util.global.load @__auto.blk.29.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %2157 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.6 = util.global.load @__auto.blk.29.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %2158 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_up.weight.shard.7 = util.global.load @__auto.blk.29.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %2159 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.29.ffn_down.weight.shard.0 = util.global.load @__auto.blk.29.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %2160 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.1 = util.global.load @__auto.blk.29.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %2161 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.2 = util.global.load @__auto.blk.29.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %2162 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.3 = util.global.load @__auto.blk.29.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %2163 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.4 = util.global.load @__auto.blk.29.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %2164 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.5 = util.global.load @__auto.blk.29.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %2165 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.6 = util.global.load @__auto.blk.29.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %2166 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.29.ffn_down.weight.shard.7 = util.global.load @__auto.blk.29.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %2167 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.attn_norm.weight = util.global.load @__auto.blk.30.attn_norm.weight : tensor<4096xf32>
    %2168 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$1 = util.global.load @__auto.blk.30.attn_norm.weight$1 : tensor<4096xf32>
    %2169 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$2 = util.global.load @__auto.blk.30.attn_norm.weight$2 : tensor<4096xf32>
    %2170 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$3 = util.global.load @__auto.blk.30.attn_norm.weight$3 : tensor<4096xf32>
    %2171 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$4 = util.global.load @__auto.blk.30.attn_norm.weight$4 : tensor<4096xf32>
    %2172 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$5 = util.global.load @__auto.blk.30.attn_norm.weight$5 : tensor<4096xf32>
    %2173 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$6 = util.global.load @__auto.blk.30.attn_norm.weight$6 : tensor<4096xf32>
    %2174 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_norm.weight$7 = util.global.load @__auto.blk.30.attn_norm.weight$7 : tensor<4096xf32>
    %2175 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.attn_q.weight.shard.0 = util.global.load @__auto.blk.30.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %2176 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.1 = util.global.load @__auto.blk.30.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %2177 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.2 = util.global.load @__auto.blk.30.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %2178 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.3 = util.global.load @__auto.blk.30.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %2179 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.4 = util.global.load @__auto.blk.30.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %2180 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.5 = util.global.load @__auto.blk.30.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %2181 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.6 = util.global.load @__auto.blk.30.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %2182 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_q.weight.shard.7 = util.global.load @__auto.blk.30.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %2183 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.0 = util.global.load @__auto.blk.30.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %2184 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.1 = util.global.load @__auto.blk.30.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %2185 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.2 = util.global.load @__auto.blk.30.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %2186 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.3 = util.global.load @__auto.blk.30.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %2187 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.4 = util.global.load @__auto.blk.30.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %2188 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.5 = util.global.load @__auto.blk.30.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %2189 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.6 = util.global.load @__auto.blk.30.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %2190 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_k.weight.shard.7 = util.global.load @__auto.blk.30.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %2191 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.0 = util.global.load @__auto.blk.30.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %2192 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.1 = util.global.load @__auto.blk.30.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %2193 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.2 = util.global.load @__auto.blk.30.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %2194 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.3 = util.global.load @__auto.blk.30.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %2195 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.4 = util.global.load @__auto.blk.30.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %2196 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.5 = util.global.load @__auto.blk.30.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %2197 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.6 = util.global.load @__auto.blk.30.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %2198 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_v.weight.shard.7 = util.global.load @__auto.blk.30.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %2199 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.30.attn_output.weight.shard.0 = util.global.load @__auto.blk.30.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %2200 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.1 = util.global.load @__auto.blk.30.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %2201 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.2 = util.global.load @__auto.blk.30.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %2202 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.3 = util.global.load @__auto.blk.30.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %2203 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.4 = util.global.load @__auto.blk.30.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %2204 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.5 = util.global.load @__auto.blk.30.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %2205 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.6 = util.global.load @__auto.blk.30.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %2206 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.attn_output.weight.shard.7 = util.global.load @__auto.blk.30.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %2207 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.30.ffn_norm.weight = util.global.load @__auto.blk.30.ffn_norm.weight : tensor<4096xf32>
    %2208 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$1 = util.global.load @__auto.blk.30.ffn_norm.weight$1 : tensor<4096xf32>
    %2209 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$2 = util.global.load @__auto.blk.30.ffn_norm.weight$2 : tensor<4096xf32>
    %2210 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$3 = util.global.load @__auto.blk.30.ffn_norm.weight$3 : tensor<4096xf32>
    %2211 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$4 = util.global.load @__auto.blk.30.ffn_norm.weight$4 : tensor<4096xf32>
    %2212 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$5 = util.global.load @__auto.blk.30.ffn_norm.weight$5 : tensor<4096xf32>
    %2213 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$6 = util.global.load @__auto.blk.30.ffn_norm.weight$6 : tensor<4096xf32>
    %2214 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_norm.weight$7 = util.global.load @__auto.blk.30.ffn_norm.weight$7 : tensor<4096xf32>
    %2215 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.30.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %2216 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %2217 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %2218 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %2219 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %2220 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %2221 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %2222 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.30.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %2223 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.0 = util.global.load @__auto.blk.30.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %2224 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.1 = util.global.load @__auto.blk.30.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %2225 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.2 = util.global.load @__auto.blk.30.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %2226 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.3 = util.global.load @__auto.blk.30.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %2227 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.4 = util.global.load @__auto.blk.30.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %2228 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.5 = util.global.load @__auto.blk.30.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %2229 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.6 = util.global.load @__auto.blk.30.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %2230 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_up.weight.shard.7 = util.global.load @__auto.blk.30.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %2231 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.30.ffn_down.weight.shard.0 = util.global.load @__auto.blk.30.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %2232 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.1 = util.global.load @__auto.blk.30.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %2233 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.2 = util.global.load @__auto.blk.30.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %2234 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.3 = util.global.load @__auto.blk.30.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %2235 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.4 = util.global.load @__auto.blk.30.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %2236 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.5 = util.global.load @__auto.blk.30.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %2237 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.6 = util.global.load @__auto.blk.30.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %2238 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.30.ffn_down.weight.shard.7 = util.global.load @__auto.blk.30.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %2239 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.attn_norm.weight = util.global.load @__auto.blk.31.attn_norm.weight : tensor<4096xf32>
    %2240 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$1 = util.global.load @__auto.blk.31.attn_norm.weight$1 : tensor<4096xf32>
    %2241 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$2 = util.global.load @__auto.blk.31.attn_norm.weight$2 : tensor<4096xf32>
    %2242 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$3 = util.global.load @__auto.blk.31.attn_norm.weight$3 : tensor<4096xf32>
    %2243 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$4 = util.global.load @__auto.blk.31.attn_norm.weight$4 : tensor<4096xf32>
    %2244 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$5 = util.global.load @__auto.blk.31.attn_norm.weight$5 : tensor<4096xf32>
    %2245 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$6 = util.global.load @__auto.blk.31.attn_norm.weight$6 : tensor<4096xf32>
    %2246 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_norm.weight$7 = util.global.load @__auto.blk.31.attn_norm.weight$7 : tensor<4096xf32>
    %2247 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.attn_q.weight.shard.0 = util.global.load @__auto.blk.31.attn_q.weight.shard.0 : tensor<512x4096xf16>
    %2248 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.0 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.1 = util.global.load @__auto.blk.31.attn_q.weight.shard.1 : tensor<512x4096xf16>
    %2249 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.1 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.2 = util.global.load @__auto.blk.31.attn_q.weight.shard.2 : tensor<512x4096xf16>
    %2250 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.2 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.3 = util.global.load @__auto.blk.31.attn_q.weight.shard.3 : tensor<512x4096xf16>
    %2251 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.3 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.4 = util.global.load @__auto.blk.31.attn_q.weight.shard.4 : tensor<512x4096xf16>
    %2252 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.4 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.5 = util.global.load @__auto.blk.31.attn_q.weight.shard.5 : tensor<512x4096xf16>
    %2253 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.5 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.6 = util.global.load @__auto.blk.31.attn_q.weight.shard.6 : tensor<512x4096xf16>
    %2254 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.6 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_q.weight.shard.7 = util.global.load @__auto.blk.31.attn_q.weight.shard.7 : tensor<512x4096xf16>
    %2255 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight.shard.7 : tensor<512x4096xf16> -> !torch.vtensor<[512,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.0 = util.global.load @__auto.blk.31.attn_k.weight.shard.0 : tensor<128x4096xf16>
    %2256 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.1 = util.global.load @__auto.blk.31.attn_k.weight.shard.1 : tensor<128x4096xf16>
    %2257 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.2 = util.global.load @__auto.blk.31.attn_k.weight.shard.2 : tensor<128x4096xf16>
    %2258 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.3 = util.global.load @__auto.blk.31.attn_k.weight.shard.3 : tensor<128x4096xf16>
    %2259 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.4 = util.global.load @__auto.blk.31.attn_k.weight.shard.4 : tensor<128x4096xf16>
    %2260 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.5 = util.global.load @__auto.blk.31.attn_k.weight.shard.5 : tensor<128x4096xf16>
    %2261 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.6 = util.global.load @__auto.blk.31.attn_k.weight.shard.6 : tensor<128x4096xf16>
    %2262 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_k.weight.shard.7 = util.global.load @__auto.blk.31.attn_k.weight.shard.7 : tensor<128x4096xf16>
    %2263 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.0 = util.global.load @__auto.blk.31.attn_v.weight.shard.0 : tensor<128x4096xf16>
    %2264 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.0 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.1 = util.global.load @__auto.blk.31.attn_v.weight.shard.1 : tensor<128x4096xf16>
    %2265 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.1 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.2 = util.global.load @__auto.blk.31.attn_v.weight.shard.2 : tensor<128x4096xf16>
    %2266 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.2 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.3 = util.global.load @__auto.blk.31.attn_v.weight.shard.3 : tensor<128x4096xf16>
    %2267 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.3 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.4 = util.global.load @__auto.blk.31.attn_v.weight.shard.4 : tensor<128x4096xf16>
    %2268 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.4 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.5 = util.global.load @__auto.blk.31.attn_v.weight.shard.5 : tensor<128x4096xf16>
    %2269 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.5 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.6 = util.global.load @__auto.blk.31.attn_v.weight.shard.6 : tensor<128x4096xf16>
    %2270 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.6 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_v.weight.shard.7 = util.global.load @__auto.blk.31.attn_v.weight.shard.7 : tensor<128x4096xf16>
    %2271 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight.shard.7 : tensor<128x4096xf16> -> !torch.vtensor<[128,4096],f16>
    %__auto.blk.31.attn_output.weight.shard.0 = util.global.load @__auto.blk.31.attn_output.weight.shard.0 : tensor<4096x512xf16>
    %2272 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.0 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.1 = util.global.load @__auto.blk.31.attn_output.weight.shard.1 : tensor<4096x512xf16>
    %2273 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.1 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.2 = util.global.load @__auto.blk.31.attn_output.weight.shard.2 : tensor<4096x512xf16>
    %2274 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.2 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.3 = util.global.load @__auto.blk.31.attn_output.weight.shard.3 : tensor<4096x512xf16>
    %2275 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.3 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.4 = util.global.load @__auto.blk.31.attn_output.weight.shard.4 : tensor<4096x512xf16>
    %2276 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.4 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.5 = util.global.load @__auto.blk.31.attn_output.weight.shard.5 : tensor<4096x512xf16>
    %2277 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.5 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.6 = util.global.load @__auto.blk.31.attn_output.weight.shard.6 : tensor<4096x512xf16>
    %2278 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.6 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.attn_output.weight.shard.7 = util.global.load @__auto.blk.31.attn_output.weight.shard.7 : tensor<4096x512xf16>
    %2279 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight.shard.7 : tensor<4096x512xf16> -> !torch.vtensor<[4096,512],f16>
    %__auto.blk.31.ffn_norm.weight = util.global.load @__auto.blk.31.ffn_norm.weight : tensor<4096xf32>
    %2280 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$1 = util.global.load @__auto.blk.31.ffn_norm.weight$1 : tensor<4096xf32>
    %2281 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$2 = util.global.load @__auto.blk.31.ffn_norm.weight$2 : tensor<4096xf32>
    %2282 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$3 = util.global.load @__auto.blk.31.ffn_norm.weight$3 : tensor<4096xf32>
    %2283 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$4 = util.global.load @__auto.blk.31.ffn_norm.weight$4 : tensor<4096xf32>
    %2284 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$5 = util.global.load @__auto.blk.31.ffn_norm.weight$5 : tensor<4096xf32>
    %2285 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$6 = util.global.load @__auto.blk.31.ffn_norm.weight$6 : tensor<4096xf32>
    %2286 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_norm.weight$7 = util.global.load @__auto.blk.31.ffn_norm.weight$7 : tensor<4096xf32>
    %2287 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.blk.31.ffn_gate.weight.shard.0 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.0 : tensor<1792x4096xf16>
    %2288 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.1 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.1 : tensor<1792x4096xf16>
    %2289 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.2 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.2 : tensor<1792x4096xf16>
    %2290 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.3 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.3 : tensor<1792x4096xf16>
    %2291 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.4 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.4 : tensor<1792x4096xf16>
    %2292 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.5 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.5 : tensor<1792x4096xf16>
    %2293 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.6 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.6 : tensor<1792x4096xf16>
    %2294 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_gate.weight.shard.7 = util.global.load @__auto.blk.31.ffn_gate.weight.shard.7 : tensor<1792x4096xf16>
    %2295 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.0 = util.global.load @__auto.blk.31.ffn_up.weight.shard.0 : tensor<1792x4096xf16>
    %2296 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.0 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.1 = util.global.load @__auto.blk.31.ffn_up.weight.shard.1 : tensor<1792x4096xf16>
    %2297 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.1 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.2 = util.global.load @__auto.blk.31.ffn_up.weight.shard.2 : tensor<1792x4096xf16>
    %2298 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.2 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.3 = util.global.load @__auto.blk.31.ffn_up.weight.shard.3 : tensor<1792x4096xf16>
    %2299 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.3 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.4 = util.global.load @__auto.blk.31.ffn_up.weight.shard.4 : tensor<1792x4096xf16>
    %2300 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.4 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.5 = util.global.load @__auto.blk.31.ffn_up.weight.shard.5 : tensor<1792x4096xf16>
    %2301 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.5 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.6 = util.global.load @__auto.blk.31.ffn_up.weight.shard.6 : tensor<1792x4096xf16>
    %2302 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.6 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_up.weight.shard.7 = util.global.load @__auto.blk.31.ffn_up.weight.shard.7 : tensor<1792x4096xf16>
    %2303 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight.shard.7 : tensor<1792x4096xf16> -> !torch.vtensor<[1792,4096],f16>
    %__auto.blk.31.ffn_down.weight.shard.0 = util.global.load @__auto.blk.31.ffn_down.weight.shard.0 : tensor<4096x1792xf16>
    %2304 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.0 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.1 = util.global.load @__auto.blk.31.ffn_down.weight.shard.1 : tensor<4096x1792xf16>
    %2305 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.1 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.2 = util.global.load @__auto.blk.31.ffn_down.weight.shard.2 : tensor<4096x1792xf16>
    %2306 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.2 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.3 = util.global.load @__auto.blk.31.ffn_down.weight.shard.3 : tensor<4096x1792xf16>
    %2307 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.3 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.4 = util.global.load @__auto.blk.31.ffn_down.weight.shard.4 : tensor<4096x1792xf16>
    %2308 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.4 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.5 = util.global.load @__auto.blk.31.ffn_down.weight.shard.5 : tensor<4096x1792xf16>
    %2309 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.5 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.6 = util.global.load @__auto.blk.31.ffn_down.weight.shard.6 : tensor<4096x1792xf16>
    %2310 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.6 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.blk.31.ffn_down.weight.shard.7 = util.global.load @__auto.blk.31.ffn_down.weight.shard.7 : tensor<4096x1792xf16>
    %2311 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight.shard.7 : tensor<4096x1792xf16> -> !torch.vtensor<[4096,1792],f16>
    %__auto.output_norm.weight = util.global.load @__auto.output_norm.weight : tensor<4096xf32>
    %2312 = torch_c.from_builtin_tensor %__auto.output_norm.weight : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$1 = util.global.load @__auto.output_norm.weight$1 : tensor<4096xf32>
    %2313 = torch_c.from_builtin_tensor %__auto.output_norm.weight$1 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$2 = util.global.load @__auto.output_norm.weight$2 : tensor<4096xf32>
    %2314 = torch_c.from_builtin_tensor %__auto.output_norm.weight$2 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$3 = util.global.load @__auto.output_norm.weight$3 : tensor<4096xf32>
    %2315 = torch_c.from_builtin_tensor %__auto.output_norm.weight$3 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$4 = util.global.load @__auto.output_norm.weight$4 : tensor<4096xf32>
    %2316 = torch_c.from_builtin_tensor %__auto.output_norm.weight$4 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$5 = util.global.load @__auto.output_norm.weight$5 : tensor<4096xf32>
    %2317 = torch_c.from_builtin_tensor %__auto.output_norm.weight$5 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$6 = util.global.load @__auto.output_norm.weight$6 : tensor<4096xf32>
    %2318 = torch_c.from_builtin_tensor %__auto.output_norm.weight$6 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output_norm.weight$7 = util.global.load @__auto.output_norm.weight$7 : tensor<4096xf32>
    %2319 = torch_c.from_builtin_tensor %__auto.output_norm.weight$7 : tensor<4096xf32> -> !torch.vtensor<[4096],f32>
    %__auto.output.weight.shard.0 = util.global.load @__auto.output.weight.shard.0 : tensor<128256x512xf16>
    %2320 = torch_c.from_builtin_tensor %__auto.output.weight.shard.0 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.1 = util.global.load @__auto.output.weight.shard.1 : tensor<128256x512xf16>
    %2321 = torch_c.from_builtin_tensor %__auto.output.weight.shard.1 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.2 = util.global.load @__auto.output.weight.shard.2 : tensor<128256x512xf16>
    %2322 = torch_c.from_builtin_tensor %__auto.output.weight.shard.2 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.3 = util.global.load @__auto.output.weight.shard.3 : tensor<128256x512xf16>
    %2323 = torch_c.from_builtin_tensor %__auto.output.weight.shard.3 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.4 = util.global.load @__auto.output.weight.shard.4 : tensor<128256x512xf16>
    %2324 = torch_c.from_builtin_tensor %__auto.output.weight.shard.4 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.5 = util.global.load @__auto.output.weight.shard.5 : tensor<128256x512xf16>
    %2325 = torch_c.from_builtin_tensor %__auto.output.weight.shard.5 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.6 = util.global.load @__auto.output.weight.shard.6 : tensor<128256x512xf16>
    %2326 = torch_c.from_builtin_tensor %__auto.output.weight.shard.6 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %__auto.output.weight.shard.7 = util.global.load @__auto.output.weight.shard.7 : tensor<128256x512xf16>
    %2327 = torch_c.from_builtin_tensor %__auto.output.weight.shard.7 : tensor<128256x512xf16> -> !torch.vtensor<[128256,512],f16>
    %2328 = torch.copy.to_vtensor %arg3 : !torch.vtensor<[?,131072],f16>
    %2329 = torch.copy.to_vtensor %arg4 : !torch.vtensor<[?,131072],f16>
    %2330 = torch.copy.to_vtensor %arg5 : !torch.vtensor<[?,131072],f16>
    %2331 = torch.copy.to_vtensor %arg6 : !torch.vtensor<[?,131072],f16>
    %2332 = torch.copy.to_vtensor %arg7 : !torch.vtensor<[?,131072],f16>
    %2333 = torch.copy.to_vtensor %arg8 : !torch.vtensor<[?,131072],f16>
    %2334 = torch.copy.to_vtensor %arg9 : !torch.vtensor<[?,131072],f16>
    %2335 = torch.copy.to_vtensor %arg10 : !torch.vtensor<[?,131072],f16>
    %2336 = torch.symbolic_int "s1" {min_val = 2, max_val = 8191} : !torch.int
    %2337 = torch.symbolic_int "s2" {min_val = 2, max_val = 9223372036854775806} : !torch.int
    torch.bind_symbolic_shape %arg0, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %arg2, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2328, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2329, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2330, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2331, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2332, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2333, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2334, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %2335, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %2338 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1 = arith.constant 1 : index
    %dim = tensor.dim %2338, %c1 : tensor<4x?xi64>
    %2339 = flow.tensor.transfer %2338 : tensor<4x?xi64>{%dim} to #hal.device.promise<@__device_0>
    %2340 = torch_c.from_builtin_tensor %2339 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2340, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2341 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_0 = arith.constant 1 : index
    %dim_1 = tensor.dim %2341, %c1_0 : tensor<4x?xi64>
    %2342 = flow.tensor.transfer %2341 : tensor<4x?xi64>{%dim_1} to #hal.device.promise<@__device_1>
    %2343 = torch_c.from_builtin_tensor %2342 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2343, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2344 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_2 = arith.constant 1 : index
    %dim_3 = tensor.dim %2344, %c1_2 : tensor<4x?xi64>
    %2345 = flow.tensor.transfer %2344 : tensor<4x?xi64>{%dim_3} to #hal.device.promise<@__device_2>
    %2346 = torch_c.from_builtin_tensor %2345 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2346, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2347 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_4 = arith.constant 1 : index
    %dim_5 = tensor.dim %2347, %c1_4 : tensor<4x?xi64>
    %2348 = flow.tensor.transfer %2347 : tensor<4x?xi64>{%dim_5} to #hal.device.promise<@__device_3>
    %2349 = torch_c.from_builtin_tensor %2348 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2349, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2350 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_6 = arith.constant 1 : index
    %dim_7 = tensor.dim %2350, %c1_6 : tensor<4x?xi64>
    %2351 = flow.tensor.transfer %2350 : tensor<4x?xi64>{%dim_7} to #hal.device.promise<@__device_4>
    %2352 = torch_c.from_builtin_tensor %2351 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2352, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2353 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_8 = arith.constant 1 : index
    %dim_9 = tensor.dim %2353, %c1_8 : tensor<4x?xi64>
    %2354 = flow.tensor.transfer %2353 : tensor<4x?xi64>{%dim_9} to #hal.device.promise<@__device_5>
    %2355 = torch_c.from_builtin_tensor %2354 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2355, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2356 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_10 = arith.constant 1 : index
    %dim_11 = tensor.dim %2356, %c1_10 : tensor<4x?xi64>
    %2357 = flow.tensor.transfer %2356 : tensor<4x?xi64>{%dim_11} to #hal.device.promise<@__device_6>
    %2358 = torch_c.from_builtin_tensor %2357 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2358, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2359 = torch_c.to_builtin_tensor %arg0 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_12 = arith.constant 1 : index
    %dim_13 = tensor.dim %2359, %c1_12 : tensor<4x?xi64>
    %2360 = flow.tensor.transfer %2359 : tensor<4x?xi64>{%dim_13} to #hal.device.promise<@__device_7>
    %2361 = torch_c.from_builtin_tensor %2360 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2361, [%2336], affine_map<()[s0] -> (4, s0 * 16)> : !torch.vtensor<[4,?],si64>
    %2362 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_14 = arith.constant 1 : index
    %dim_15 = tensor.dim %2362, %c1_14 : tensor<4x?xi64>
    %2363 = flow.tensor.transfer %2362 : tensor<4x?xi64>{%dim_15} to #hal.device.promise<@__device_0>
    %2364 = torch_c.from_builtin_tensor %2363 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2364, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2365 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_16 = arith.constant 1 : index
    %dim_17 = tensor.dim %2365, %c1_16 : tensor<4x?xi64>
    %2366 = flow.tensor.transfer %2365 : tensor<4x?xi64>{%dim_17} to #hal.device.promise<@__device_1>
    %2367 = torch_c.from_builtin_tensor %2366 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2367, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2368 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_18 = arith.constant 1 : index
    %dim_19 = tensor.dim %2368, %c1_18 : tensor<4x?xi64>
    %2369 = flow.tensor.transfer %2368 : tensor<4x?xi64>{%dim_19} to #hal.device.promise<@__device_2>
    %2370 = torch_c.from_builtin_tensor %2369 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2370, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2371 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_20 = arith.constant 1 : index
    %dim_21 = tensor.dim %2371, %c1_20 : tensor<4x?xi64>
    %2372 = flow.tensor.transfer %2371 : tensor<4x?xi64>{%dim_21} to #hal.device.promise<@__device_3>
    %2373 = torch_c.from_builtin_tensor %2372 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2373, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2374 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_22 = arith.constant 1 : index
    %dim_23 = tensor.dim %2374, %c1_22 : tensor<4x?xi64>
    %2375 = flow.tensor.transfer %2374 : tensor<4x?xi64>{%dim_23} to #hal.device.promise<@__device_4>
    %2376 = torch_c.from_builtin_tensor %2375 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2376, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2377 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_24 = arith.constant 1 : index
    %dim_25 = tensor.dim %2377, %c1_24 : tensor<4x?xi64>
    %2378 = flow.tensor.transfer %2377 : tensor<4x?xi64>{%dim_25} to #hal.device.promise<@__device_5>
    %2379 = torch_c.from_builtin_tensor %2378 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2379, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2380 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_26 = arith.constant 1 : index
    %dim_27 = tensor.dim %2380, %c1_26 : tensor<4x?xi64>
    %2381 = flow.tensor.transfer %2380 : tensor<4x?xi64>{%dim_27} to #hal.device.promise<@__device_6>
    %2382 = torch_c.from_builtin_tensor %2381 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2382, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %2383 = torch_c.to_builtin_tensor %arg2 : !torch.vtensor<[4,?],si64> -> tensor<4x?xi64>
    %c1_28 = arith.constant 1 : index
    %dim_29 = tensor.dim %2383, %c1_28 : tensor<4x?xi64>
    %2384 = flow.tensor.transfer %2383 : tensor<4x?xi64>{%dim_29} to #hal.device.promise<@__device_7>
    %2385 = torch_c.from_builtin_tensor %2384 : tensor<4x?xi64> -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %2385, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int-1 = torch.constant.int -1
    %false = torch.constant.bool false
    %false_30 = torch.constant.bool false
    %2386 = torch.aten.embedding %0, %2340, %int-1, %false, %false_30 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_31 = torch.constant.int -1
    %false_32 = torch.constant.bool false
    %false_33 = torch.constant.bool false
    %2387 = torch.aten.embedding %1, %2343, %int-1_31, %false_32, %false_33 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_34 = torch.constant.int -1
    %false_35 = torch.constant.bool false
    %false_36 = torch.constant.bool false
    %2388 = torch.aten.embedding %2, %2346, %int-1_34, %false_35, %false_36 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_37 = torch.constant.int -1
    %false_38 = torch.constant.bool false
    %false_39 = torch.constant.bool false
    %2389 = torch.aten.embedding %3, %2349, %int-1_37, %false_38, %false_39 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_40 = torch.constant.int -1
    %false_41 = torch.constant.bool false
    %false_42 = torch.constant.bool false
    %2390 = torch.aten.embedding %4, %2352, %int-1_40, %false_41, %false_42 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_43 = torch.constant.int -1
    %false_44 = torch.constant.bool false
    %false_45 = torch.constant.bool false
    %2391 = torch.aten.embedding %5, %2355, %int-1_43, %false_44, %false_45 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_46 = torch.constant.int -1
    %false_47 = torch.constant.bool false
    %false_48 = torch.constant.bool false
    %2392 = torch.aten.embedding %6, %2358, %int-1_46, %false_47, %false_48 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int-1_49 = torch.constant.int -1
    %false_50 = torch.constant.bool false
    %false_51 = torch.constant.bool false
    %2393 = torch.aten.embedding %7, %2361, %int-1_49, %false_50, %false_51 : !torch.vtensor<[128256,4096],f16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6 = torch.constant.int 6
    %2394 = torch.prims.convert_element_type %2386, %int6 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_52 = torch.constant.int 6
    %2395 = torch.prims.convert_element_type %2387, %int6_52 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53 = torch.constant.int 6
    %2396 = torch.prims.convert_element_type %2388, %int6_53 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_54 = torch.constant.int 6
    %2397 = torch.prims.convert_element_type %2389, %int6_54 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55 = torch.constant.int 6
    %2398 = torch.prims.convert_element_type %2390, %int6_55 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_56 = torch.constant.int 6
    %2399 = torch.prims.convert_element_type %2391, %int6_56 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57 = torch.constant.int 6
    %2400 = torch.prims.convert_element_type %2392, %int6_57 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58 = torch.constant.int 6
    %2401 = torch.prims.convert_element_type %2393, %int6_58 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2 = torch.constant.int 2
    %2402 = torch.aten.pow.Tensor_Scalar %2394, %int2 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59 = torch.constant.int 2
    %2403 = torch.aten.pow.Tensor_Scalar %2395, %int2_59 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_60 = torch.constant.int 2
    %2404 = torch.aten.pow.Tensor_Scalar %2396, %int2_60 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_61 = torch.constant.int 2
    %2405 = torch.aten.pow.Tensor_Scalar %2397, %int2_61 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_62 = torch.constant.int 2
    %2406 = torch.aten.pow.Tensor_Scalar %2398, %int2_62 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_63 = torch.constant.int 2
    %2407 = torch.aten.pow.Tensor_Scalar %2399, %int2_63 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_64 = torch.constant.int 2
    %2408 = torch.aten.pow.Tensor_Scalar %2400, %int2_64 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_65 = torch.constant.int 2
    %2409 = torch.aten.pow.Tensor_Scalar %2401, %int2_65 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_66 = torch.constant.int -1
    %2410 = torch.prim.ListConstruct %int-1_66 : (!torch.int) -> !torch.list<int>
    %true = torch.constant.bool true
    %none = torch.constant.none
    %2411 = torch.aten.mean.dim %2402, %2410, %true, %none : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_67 = torch.constant.int -1
    %2412 = torch.prim.ListConstruct %int-1_67 : (!torch.int) -> !torch.list<int>
    %true_68 = torch.constant.bool true
    %none_69 = torch.constant.none
    %2413 = torch.aten.mean.dim %2403, %2412, %true_68, %none_69 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_70 = torch.constant.int -1
    %2414 = torch.prim.ListConstruct %int-1_70 : (!torch.int) -> !torch.list<int>
    %true_71 = torch.constant.bool true
    %none_72 = torch.constant.none
    %2415 = torch.aten.mean.dim %2404, %2414, %true_71, %none_72 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_73 = torch.constant.int -1
    %2416 = torch.prim.ListConstruct %int-1_73 : (!torch.int) -> !torch.list<int>
    %true_74 = torch.constant.bool true
    %none_75 = torch.constant.none
    %2417 = torch.aten.mean.dim %2405, %2416, %true_74, %none_75 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_76 = torch.constant.int -1
    %2418 = torch.prim.ListConstruct %int-1_76 : (!torch.int) -> !torch.list<int>
    %true_77 = torch.constant.bool true
    %none_78 = torch.constant.none
    %2419 = torch.aten.mean.dim %2406, %2418, %true_77, %none_78 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_79 = torch.constant.int -1
    %2420 = torch.prim.ListConstruct %int-1_79 : (!torch.int) -> !torch.list<int>
    %true_80 = torch.constant.bool true
    %none_81 = torch.constant.none
    %2421 = torch.aten.mean.dim %2407, %2420, %true_80, %none_81 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_82 = torch.constant.int -1
    %2422 = torch.prim.ListConstruct %int-1_82 : (!torch.int) -> !torch.list<int>
    %true_83 = torch.constant.bool true
    %none_84 = torch.constant.none
    %2423 = torch.aten.mean.dim %2408, %2422, %true_83, %none_84 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_85 = torch.constant.int -1
    %2424 = torch.prim.ListConstruct %int-1_85 : (!torch.int) -> !torch.list<int>
    %true_86 = torch.constant.bool true
    %none_87 = torch.constant.none
    %2425 = torch.aten.mean.dim %2409, %2424, %true_86, %none_87 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06 = torch.constant.float 9.9999997473787516E-6
    %int1 = torch.constant.int 1
    %2426 = torch.aten.add.Scalar %2411, %float9.999990e-06, %int1 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_88 = torch.constant.float 9.9999997473787516E-6
    %int1_89 = torch.constant.int 1
    %2427 = torch.aten.add.Scalar %2413, %float9.999990e-06_88, %int1_89 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_90 = torch.constant.float 9.9999997473787516E-6
    %int1_91 = torch.constant.int 1
    %2428 = torch.aten.add.Scalar %2415, %float9.999990e-06_90, %int1_91 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_92 = torch.constant.float 9.9999997473787516E-6
    %int1_93 = torch.constant.int 1
    %2429 = torch.aten.add.Scalar %2417, %float9.999990e-06_92, %int1_93 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_94 = torch.constant.float 9.9999997473787516E-6
    %int1_95 = torch.constant.int 1
    %2430 = torch.aten.add.Scalar %2419, %float9.999990e-06_94, %int1_95 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_96 = torch.constant.float 9.9999997473787516E-6
    %int1_97 = torch.constant.int 1
    %2431 = torch.aten.add.Scalar %2421, %float9.999990e-06_96, %int1_97 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_98 = torch.constant.float 9.9999997473787516E-6
    %int1_99 = torch.constant.int 1
    %2432 = torch.aten.add.Scalar %2423, %float9.999990e-06_98, %int1_99 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_100 = torch.constant.float 9.9999997473787516E-6
    %int1_101 = torch.constant.int 1
    %2433 = torch.aten.add.Scalar %2425, %float9.999990e-06_100, %int1_101 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2434 = torch.aten.rsqrt %2426 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2435 = torch.aten.rsqrt %2427 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2436 = torch.aten.rsqrt %2428 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2437 = torch.aten.rsqrt %2429 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2438 = torch.aten.rsqrt %2430 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2439 = torch.aten.rsqrt %2431 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2440 = torch.aten.rsqrt %2432 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2441 = torch.aten.rsqrt %2433 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %2441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %2442 = torch.aten.mul.Tensor %2394, %2434 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2443 = torch.aten.mul.Tensor %2395, %2435 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2444 = torch.aten.mul.Tensor %2396, %2436 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2445 = torch.aten.mul.Tensor %2397, %2437 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2446 = torch.aten.mul.Tensor %2398, %2438 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2447 = torch.aten.mul.Tensor %2399, %2439 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2448 = torch.aten.mul.Tensor %2400, %2440 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2449 = torch.aten.mul.Tensor %2401, %2441 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2450 = torch.aten.mul.Tensor %8, %2442 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2451 = torch.aten.mul.Tensor %9, %2443 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2452 = torch.aten.mul.Tensor %10, %2444 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2453 = torch.aten.mul.Tensor %11, %2445 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2454 = torch.aten.mul.Tensor %12, %2446 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2455 = torch.aten.mul.Tensor %13, %2447 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2456 = torch.aten.mul.Tensor %14, %2448 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %2457 = torch.aten.mul.Tensor %15, %2449 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %2457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5 = torch.constant.int 5
    %2458 = torch.prims.convert_element_type %2450, %int5 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_102 = torch.constant.int 5
    %2459 = torch.prims.convert_element_type %2451, %int5_102 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_103 = torch.constant.int 5
    %2460 = torch.prims.convert_element_type %2452, %int5_103 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_104 = torch.constant.int 5
    %2461 = torch.prims.convert_element_type %2453, %int5_104 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_105 = torch.constant.int 5
    %2462 = torch.prims.convert_element_type %2454, %int5_105 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_106 = torch.constant.int 5
    %2463 = torch.prims.convert_element_type %2455, %int5_106 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_107 = torch.constant.int 5
    %2464 = torch.prims.convert_element_type %2456, %int5_107 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_108 = torch.constant.int 5
    %2465 = torch.prims.convert_element_type %2457, %int5_108 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %2465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_109 = torch.constant.int 1
    %int0 = torch.constant.int 0
    %2466 = torch.prim.ListConstruct %int1_109, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
    %2467 = torch.aten.permute %16, %2466 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_110 = torch.constant.int 1
    %int0_111 = torch.constant.int 0
    %2468 = torch.prim.ListConstruct %int1_110, %int0_111 : (!torch.int, !torch.int) -> !torch.list<int>
    %2469 = torch.aten.permute %17, %2468 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_112 = torch.constant.int 1
    %int0_113 = torch.constant.int 0
    %2470 = torch.prim.ListConstruct %int1_112, %int0_113 : (!torch.int, !torch.int) -> !torch.list<int>
    %2471 = torch.aten.permute %18, %2470 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_114 = torch.constant.int 1
    %int0_115 = torch.constant.int 0
    %2472 = torch.prim.ListConstruct %int1_114, %int0_115 : (!torch.int, !torch.int) -> !torch.list<int>
    %2473 = torch.aten.permute %19, %2472 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_116 = torch.constant.int 1
    %int0_117 = torch.constant.int 0
    %2474 = torch.prim.ListConstruct %int1_116, %int0_117 : (!torch.int, !torch.int) -> !torch.list<int>
    %2475 = torch.aten.permute %20, %2474 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_118 = torch.constant.int 1
    %int0_119 = torch.constant.int 0
    %2476 = torch.prim.ListConstruct %int1_118, %int0_119 : (!torch.int, !torch.int) -> !torch.list<int>
    %2477 = torch.aten.permute %21, %2476 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_120 = torch.constant.int 1
    %int0_121 = torch.constant.int 0
    %2478 = torch.prim.ListConstruct %int1_120, %int0_121 : (!torch.int, !torch.int) -> !torch.list<int>
    %2479 = torch.aten.permute %22, %2478 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_122 = torch.constant.int 1
    %int0_123 = torch.constant.int 0
    %2480 = torch.prim.ListConstruct %int1_122, %int0_123 : (!torch.int, !torch.int) -> !torch.list<int>
    %2481 = torch.aten.permute %23, %2480 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_124 = torch.constant.int 1
    %2482 = torch.aten.size.int %arg0, %int1_124 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.int
    %int4 = torch.constant.int 4
    %2483 = torch.aten.mul.int %int4, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096 = torch.constant.int 4096
    %2484 = torch.prim.ListConstruct %2483, %int4096 : (!torch.int, !torch.int) -> !torch.list<int>
    %2485 = torch.aten.view %2458, %2484 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2485, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2486 = torch.aten.mm %2485, %2467 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2486, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_125 = torch.constant.int 4
    %int512 = torch.constant.int 512
    %2487 = torch.prim.ListConstruct %int4_125, %2482, %int512 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2488 = torch.aten.view %2486, %2487 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_126 = torch.constant.int 4
    %2489 = torch.aten.mul.int %int4_126, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_127 = torch.constant.int 4096
    %2490 = torch.prim.ListConstruct %2489, %int4096_127 : (!torch.int, !torch.int) -> !torch.list<int>
    %2491 = torch.aten.view %2459, %2490 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2491, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2492 = torch.aten.mm %2491, %2469 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2492, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_128 = torch.constant.int 4
    %int512_129 = torch.constant.int 512
    %2493 = torch.prim.ListConstruct %int4_128, %2482, %int512_129 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2494 = torch.aten.view %2492, %2493 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_130 = torch.constant.int 4
    %2495 = torch.aten.mul.int %int4_130, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_131 = torch.constant.int 4096
    %2496 = torch.prim.ListConstruct %2495, %int4096_131 : (!torch.int, !torch.int) -> !torch.list<int>
    %2497 = torch.aten.view %2460, %2496 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2497, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2498 = torch.aten.mm %2497, %2471 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2498, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_132 = torch.constant.int 4
    %int512_133 = torch.constant.int 512
    %2499 = torch.prim.ListConstruct %int4_132, %2482, %int512_133 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2500 = torch.aten.view %2498, %2499 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_134 = torch.constant.int 4
    %2501 = torch.aten.mul.int %int4_134, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_135 = torch.constant.int 4096
    %2502 = torch.prim.ListConstruct %2501, %int4096_135 : (!torch.int, !torch.int) -> !torch.list<int>
    %2503 = torch.aten.view %2461, %2502 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2503, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2504 = torch.aten.mm %2503, %2473 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2504, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_136 = torch.constant.int 4
    %int512_137 = torch.constant.int 512
    %2505 = torch.prim.ListConstruct %int4_136, %2482, %int512_137 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2506 = torch.aten.view %2504, %2505 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_138 = torch.constant.int 4
    %2507 = torch.aten.mul.int %int4_138, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_139 = torch.constant.int 4096
    %2508 = torch.prim.ListConstruct %2507, %int4096_139 : (!torch.int, !torch.int) -> !torch.list<int>
    %2509 = torch.aten.view %2462, %2508 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2509, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2510 = torch.aten.mm %2509, %2475 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2510, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_140 = torch.constant.int 4
    %int512_141 = torch.constant.int 512
    %2511 = torch.prim.ListConstruct %int4_140, %2482, %int512_141 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2512 = torch.aten.view %2510, %2511 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_142 = torch.constant.int 4
    %2513 = torch.aten.mul.int %int4_142, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_143 = torch.constant.int 4096
    %2514 = torch.prim.ListConstruct %2513, %int4096_143 : (!torch.int, !torch.int) -> !torch.list<int>
    %2515 = torch.aten.view %2463, %2514 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2515, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2516 = torch.aten.mm %2515, %2477 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2516, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_144 = torch.constant.int 4
    %int512_145 = torch.constant.int 512
    %2517 = torch.prim.ListConstruct %int4_144, %2482, %int512_145 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2518 = torch.aten.view %2516, %2517 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_146 = torch.constant.int 4
    %2519 = torch.aten.mul.int %int4_146, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_147 = torch.constant.int 4096
    %2520 = torch.prim.ListConstruct %2519, %int4096_147 : (!torch.int, !torch.int) -> !torch.list<int>
    %2521 = torch.aten.view %2464, %2520 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2521, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2522 = torch.aten.mm %2521, %2479 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2522, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_148 = torch.constant.int 4
    %int512_149 = torch.constant.int 512
    %2523 = torch.prim.ListConstruct %int4_148, %2482, %int512_149 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2524 = torch.aten.view %2522, %2523 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_150 = torch.constant.int 4
    %2525 = torch.aten.mul.int %int4_150, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_151 = torch.constant.int 4096
    %2526 = torch.prim.ListConstruct %2525, %int4096_151 : (!torch.int, !torch.int) -> !torch.list<int>
    %2527 = torch.aten.view %2465, %2526 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2527, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2528 = torch.aten.mm %2527, %2481 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %2528, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_152 = torch.constant.int 4
    %int512_153 = torch.constant.int 512
    %2529 = torch.prim.ListConstruct %int4_152, %2482, %int512_153 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2530 = torch.aten.view %2528, %2529 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %2530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_154 = torch.constant.int 1
    %int0_155 = torch.constant.int 0
    %2531 = torch.prim.ListConstruct %int1_154, %int0_155 : (!torch.int, !torch.int) -> !torch.list<int>
    %2532 = torch.aten.permute %24, %2531 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_156 = torch.constant.int 1
    %int0_157 = torch.constant.int 0
    %2533 = torch.prim.ListConstruct %int1_156, %int0_157 : (!torch.int, !torch.int) -> !torch.list<int>
    %2534 = torch.aten.permute %25, %2533 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_158 = torch.constant.int 1
    %int0_159 = torch.constant.int 0
    %2535 = torch.prim.ListConstruct %int1_158, %int0_159 : (!torch.int, !torch.int) -> !torch.list<int>
    %2536 = torch.aten.permute %26, %2535 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_160 = torch.constant.int 1
    %int0_161 = torch.constant.int 0
    %2537 = torch.prim.ListConstruct %int1_160, %int0_161 : (!torch.int, !torch.int) -> !torch.list<int>
    %2538 = torch.aten.permute %27, %2537 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_162 = torch.constant.int 1
    %int0_163 = torch.constant.int 0
    %2539 = torch.prim.ListConstruct %int1_162, %int0_163 : (!torch.int, !torch.int) -> !torch.list<int>
    %2540 = torch.aten.permute %28, %2539 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_164 = torch.constant.int 1
    %int0_165 = torch.constant.int 0
    %2541 = torch.prim.ListConstruct %int1_164, %int0_165 : (!torch.int, !torch.int) -> !torch.list<int>
    %2542 = torch.aten.permute %29, %2541 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_166 = torch.constant.int 1
    %int0_167 = torch.constant.int 0
    %2543 = torch.prim.ListConstruct %int1_166, %int0_167 : (!torch.int, !torch.int) -> !torch.list<int>
    %2544 = torch.aten.permute %30, %2543 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_168 = torch.constant.int 1
    %int0_169 = torch.constant.int 0
    %2545 = torch.prim.ListConstruct %int1_168, %int0_169 : (!torch.int, !torch.int) -> !torch.list<int>
    %2546 = torch.aten.permute %31, %2545 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_170 = torch.constant.int 4
    %2547 = torch.aten.mul.int %int4_170, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_171 = torch.constant.int 4096
    %2548 = torch.prim.ListConstruct %2547, %int4096_171 : (!torch.int, !torch.int) -> !torch.list<int>
    %2549 = torch.aten.view %2458, %2548 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2549, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2550 = torch.aten.mm %2549, %2532 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2550, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_172 = torch.constant.int 4
    %int128 = torch.constant.int 128
    %2551 = torch.prim.ListConstruct %int4_172, %2482, %int128 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2552 = torch.aten.view %2550, %2551 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_173 = torch.constant.int 4
    %2553 = torch.aten.mul.int %int4_173, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_174 = torch.constant.int 4096
    %2554 = torch.prim.ListConstruct %2553, %int4096_174 : (!torch.int, !torch.int) -> !torch.list<int>
    %2555 = torch.aten.view %2459, %2554 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2555, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2556 = torch.aten.mm %2555, %2534 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2556, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_175 = torch.constant.int 4
    %int128_176 = torch.constant.int 128
    %2557 = torch.prim.ListConstruct %int4_175, %2482, %int128_176 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2558 = torch.aten.view %2556, %2557 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_177 = torch.constant.int 4
    %2559 = torch.aten.mul.int %int4_177, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_178 = torch.constant.int 4096
    %2560 = torch.prim.ListConstruct %2559, %int4096_178 : (!torch.int, !torch.int) -> !torch.list<int>
    %2561 = torch.aten.view %2460, %2560 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2561, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2562 = torch.aten.mm %2561, %2536 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2562, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_179 = torch.constant.int 4
    %int128_180 = torch.constant.int 128
    %2563 = torch.prim.ListConstruct %int4_179, %2482, %int128_180 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2564 = torch.aten.view %2562, %2563 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_181 = torch.constant.int 4
    %2565 = torch.aten.mul.int %int4_181, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_182 = torch.constant.int 4096
    %2566 = torch.prim.ListConstruct %2565, %int4096_182 : (!torch.int, !torch.int) -> !torch.list<int>
    %2567 = torch.aten.view %2461, %2566 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2567, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2568 = torch.aten.mm %2567, %2538 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2568, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_183 = torch.constant.int 4
    %int128_184 = torch.constant.int 128
    %2569 = torch.prim.ListConstruct %int4_183, %2482, %int128_184 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2570 = torch.aten.view %2568, %2569 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_185 = torch.constant.int 4
    %2571 = torch.aten.mul.int %int4_185, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_186 = torch.constant.int 4096
    %2572 = torch.prim.ListConstruct %2571, %int4096_186 : (!torch.int, !torch.int) -> !torch.list<int>
    %2573 = torch.aten.view %2462, %2572 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2573, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2574 = torch.aten.mm %2573, %2540 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2574, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_187 = torch.constant.int 4
    %int128_188 = torch.constant.int 128
    %2575 = torch.prim.ListConstruct %int4_187, %2482, %int128_188 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2576 = torch.aten.view %2574, %2575 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_189 = torch.constant.int 4
    %2577 = torch.aten.mul.int %int4_189, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_190 = torch.constant.int 4096
    %2578 = torch.prim.ListConstruct %2577, %int4096_190 : (!torch.int, !torch.int) -> !torch.list<int>
    %2579 = torch.aten.view %2463, %2578 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2579, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2580 = torch.aten.mm %2579, %2542 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2580, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_191 = torch.constant.int 4
    %int128_192 = torch.constant.int 128
    %2581 = torch.prim.ListConstruct %int4_191, %2482, %int128_192 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2582 = torch.aten.view %2580, %2581 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_193 = torch.constant.int 4
    %2583 = torch.aten.mul.int %int4_193, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_194 = torch.constant.int 4096
    %2584 = torch.prim.ListConstruct %2583, %int4096_194 : (!torch.int, !torch.int) -> !torch.list<int>
    %2585 = torch.aten.view %2464, %2584 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2585, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2586 = torch.aten.mm %2585, %2544 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2586, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_195 = torch.constant.int 4
    %int128_196 = torch.constant.int 128
    %2587 = torch.prim.ListConstruct %int4_195, %2482, %int128_196 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2588 = torch.aten.view %2586, %2587 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_197 = torch.constant.int 4
    %2589 = torch.aten.mul.int %int4_197, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_198 = torch.constant.int 4096
    %2590 = torch.prim.ListConstruct %2589, %int4096_198 : (!torch.int, !torch.int) -> !torch.list<int>
    %2591 = torch.aten.view %2465, %2590 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2591, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2592 = torch.aten.mm %2591, %2546 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2592, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_199 = torch.constant.int 4
    %int128_200 = torch.constant.int 128
    %2593 = torch.prim.ListConstruct %int4_199, %2482, %int128_200 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2594 = torch.aten.view %2592, %2593 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_201 = torch.constant.int 1
    %int0_202 = torch.constant.int 0
    %2595 = torch.prim.ListConstruct %int1_201, %int0_202 : (!torch.int, !torch.int) -> !torch.list<int>
    %2596 = torch.aten.permute %32, %2595 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_203 = torch.constant.int 1
    %int0_204 = torch.constant.int 0
    %2597 = torch.prim.ListConstruct %int1_203, %int0_204 : (!torch.int, !torch.int) -> !torch.list<int>
    %2598 = torch.aten.permute %33, %2597 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_205 = torch.constant.int 1
    %int0_206 = torch.constant.int 0
    %2599 = torch.prim.ListConstruct %int1_205, %int0_206 : (!torch.int, !torch.int) -> !torch.list<int>
    %2600 = torch.aten.permute %34, %2599 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_207 = torch.constant.int 1
    %int0_208 = torch.constant.int 0
    %2601 = torch.prim.ListConstruct %int1_207, %int0_208 : (!torch.int, !torch.int) -> !torch.list<int>
    %2602 = torch.aten.permute %35, %2601 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_209 = torch.constant.int 1
    %int0_210 = torch.constant.int 0
    %2603 = torch.prim.ListConstruct %int1_209, %int0_210 : (!torch.int, !torch.int) -> !torch.list<int>
    %2604 = torch.aten.permute %36, %2603 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_211 = torch.constant.int 1
    %int0_212 = torch.constant.int 0
    %2605 = torch.prim.ListConstruct %int1_211, %int0_212 : (!torch.int, !torch.int) -> !torch.list<int>
    %2606 = torch.aten.permute %37, %2605 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_213 = torch.constant.int 1
    %int0_214 = torch.constant.int 0
    %2607 = torch.prim.ListConstruct %int1_213, %int0_214 : (!torch.int, !torch.int) -> !torch.list<int>
    %2608 = torch.aten.permute %38, %2607 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_215 = torch.constant.int 1
    %int0_216 = torch.constant.int 0
    %2609 = torch.prim.ListConstruct %int1_215, %int0_216 : (!torch.int, !torch.int) -> !torch.list<int>
    %2610 = torch.aten.permute %39, %2609 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_217 = torch.constant.int 4
    %2611 = torch.aten.mul.int %int4_217, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_218 = torch.constant.int 4096
    %2612 = torch.prim.ListConstruct %2611, %int4096_218 : (!torch.int, !torch.int) -> !torch.list<int>
    %2613 = torch.aten.view %2458, %2612 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2613, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2614 = torch.aten.mm %2613, %2596 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2614, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_219 = torch.constant.int 4
    %int128_220 = torch.constant.int 128
    %2615 = torch.prim.ListConstruct %int4_219, %2482, %int128_220 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2616 = torch.aten.view %2614, %2615 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_221 = torch.constant.int 4
    %2617 = torch.aten.mul.int %int4_221, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_222 = torch.constant.int 4096
    %2618 = torch.prim.ListConstruct %2617, %int4096_222 : (!torch.int, !torch.int) -> !torch.list<int>
    %2619 = torch.aten.view %2459, %2618 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2619, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2620 = torch.aten.mm %2619, %2598 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2620, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_223 = torch.constant.int 4
    %int128_224 = torch.constant.int 128
    %2621 = torch.prim.ListConstruct %int4_223, %2482, %int128_224 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2622 = torch.aten.view %2620, %2621 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_225 = torch.constant.int 4
    %2623 = torch.aten.mul.int %int4_225, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_226 = torch.constant.int 4096
    %2624 = torch.prim.ListConstruct %2623, %int4096_226 : (!torch.int, !torch.int) -> !torch.list<int>
    %2625 = torch.aten.view %2460, %2624 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2625, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2626 = torch.aten.mm %2625, %2600 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2626, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_227 = torch.constant.int 4
    %int128_228 = torch.constant.int 128
    %2627 = torch.prim.ListConstruct %int4_227, %2482, %int128_228 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2628 = torch.aten.view %2626, %2627 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_229 = torch.constant.int 4
    %2629 = torch.aten.mul.int %int4_229, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_230 = torch.constant.int 4096
    %2630 = torch.prim.ListConstruct %2629, %int4096_230 : (!torch.int, !torch.int) -> !torch.list<int>
    %2631 = torch.aten.view %2461, %2630 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2631, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2632 = torch.aten.mm %2631, %2602 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2632, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_231 = torch.constant.int 4
    %int128_232 = torch.constant.int 128
    %2633 = torch.prim.ListConstruct %int4_231, %2482, %int128_232 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2634 = torch.aten.view %2632, %2633 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_233 = torch.constant.int 4
    %2635 = torch.aten.mul.int %int4_233, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_234 = torch.constant.int 4096
    %2636 = torch.prim.ListConstruct %2635, %int4096_234 : (!torch.int, !torch.int) -> !torch.list<int>
    %2637 = torch.aten.view %2462, %2636 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2637, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2638 = torch.aten.mm %2637, %2604 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2638, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_235 = torch.constant.int 4
    %int128_236 = torch.constant.int 128
    %2639 = torch.prim.ListConstruct %int4_235, %2482, %int128_236 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2640 = torch.aten.view %2638, %2639 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_237 = torch.constant.int 4
    %2641 = torch.aten.mul.int %int4_237, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_238 = torch.constant.int 4096
    %2642 = torch.prim.ListConstruct %2641, %int4096_238 : (!torch.int, !torch.int) -> !torch.list<int>
    %2643 = torch.aten.view %2463, %2642 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2643, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2644 = torch.aten.mm %2643, %2606 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2644, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_239 = torch.constant.int 4
    %int128_240 = torch.constant.int 128
    %2645 = torch.prim.ListConstruct %int4_239, %2482, %int128_240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2646 = torch.aten.view %2644, %2645 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_241 = torch.constant.int 4
    %2647 = torch.aten.mul.int %int4_241, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_242 = torch.constant.int 4096
    %2648 = torch.prim.ListConstruct %2647, %int4096_242 : (!torch.int, !torch.int) -> !torch.list<int>
    %2649 = torch.aten.view %2464, %2648 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2649, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2650 = torch.aten.mm %2649, %2608 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2650, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_243 = torch.constant.int 4
    %int128_244 = torch.constant.int 128
    %2651 = torch.prim.ListConstruct %int4_243, %2482, %int128_244 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2652 = torch.aten.view %2650, %2651 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_245 = torch.constant.int 4
    %2653 = torch.aten.mul.int %int4_245, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_246 = torch.constant.int 4096
    %2654 = torch.prim.ListConstruct %2653, %int4096_246 : (!torch.int, !torch.int) -> !torch.list<int>
    %2655 = torch.aten.view %2465, %2654 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %2655, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %2656 = torch.aten.mm %2655, %2610 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %2656, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_247 = torch.constant.int 4
    %int128_248 = torch.constant.int 128
    %2657 = torch.prim.ListConstruct %int4_247, %2482, %int128_248 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2658 = torch.aten.view %2656, %2657 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %2658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_249 = torch.constant.int 4
    %int4_250 = torch.constant.int 4
    %int128_251 = torch.constant.int 128
    %2659 = torch.prim.ListConstruct %int4_249, %2482, %int4_250, %int128_251 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2660 = torch.aten.view %2488, %2659 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_252 = torch.constant.int 4
    %int4_253 = torch.constant.int 4
    %int128_254 = torch.constant.int 128
    %2661 = torch.prim.ListConstruct %int4_252, %2482, %int4_253, %int128_254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2662 = torch.aten.view %2494, %2661 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_255 = torch.constant.int 4
    %int4_256 = torch.constant.int 4
    %int128_257 = torch.constant.int 128
    %2663 = torch.prim.ListConstruct %int4_255, %2482, %int4_256, %int128_257 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2664 = torch.aten.view %2500, %2663 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_258 = torch.constant.int 4
    %int4_259 = torch.constant.int 4
    %int128_260 = torch.constant.int 128
    %2665 = torch.prim.ListConstruct %int4_258, %2482, %int4_259, %int128_260 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2666 = torch.aten.view %2506, %2665 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_261 = torch.constant.int 4
    %int4_262 = torch.constant.int 4
    %int128_263 = torch.constant.int 128
    %2667 = torch.prim.ListConstruct %int4_261, %2482, %int4_262, %int128_263 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2668 = torch.aten.view %2512, %2667 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_264 = torch.constant.int 4
    %int4_265 = torch.constant.int 4
    %int128_266 = torch.constant.int 128
    %2669 = torch.prim.ListConstruct %int4_264, %2482, %int4_265, %int128_266 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2670 = torch.aten.view %2518, %2669 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_267 = torch.constant.int 4
    %int4_268 = torch.constant.int 4
    %int128_269 = torch.constant.int 128
    %2671 = torch.prim.ListConstruct %int4_267, %2482, %int4_268, %int128_269 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2672 = torch.aten.view %2524, %2671 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_270 = torch.constant.int 4
    %int4_271 = torch.constant.int 4
    %int128_272 = torch.constant.int 128
    %2673 = torch.prim.ListConstruct %int4_270, %2482, %int4_271, %int128_272 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2674 = torch.aten.view %2530, %2673 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_273 = torch.constant.int 4
    %int1_274 = torch.constant.int 1
    %int128_275 = torch.constant.int 128
    %2675 = torch.prim.ListConstruct %int4_273, %2482, %int1_274, %int128_275 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2676 = torch.aten.view %2552, %2675 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_276 = torch.constant.int 4
    %int1_277 = torch.constant.int 1
    %int128_278 = torch.constant.int 128
    %2677 = torch.prim.ListConstruct %int4_276, %2482, %int1_277, %int128_278 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2678 = torch.aten.view %2558, %2677 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_279 = torch.constant.int 4
    %int1_280 = torch.constant.int 1
    %int128_281 = torch.constant.int 128
    %2679 = torch.prim.ListConstruct %int4_279, %2482, %int1_280, %int128_281 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2680 = torch.aten.view %2564, %2679 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_282 = torch.constant.int 4
    %int1_283 = torch.constant.int 1
    %int128_284 = torch.constant.int 128
    %2681 = torch.prim.ListConstruct %int4_282, %2482, %int1_283, %int128_284 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2682 = torch.aten.view %2570, %2681 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_285 = torch.constant.int 4
    %int1_286 = torch.constant.int 1
    %int128_287 = torch.constant.int 128
    %2683 = torch.prim.ListConstruct %int4_285, %2482, %int1_286, %int128_287 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2684 = torch.aten.view %2576, %2683 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_288 = torch.constant.int 4
    %int1_289 = torch.constant.int 1
    %int128_290 = torch.constant.int 128
    %2685 = torch.prim.ListConstruct %int4_288, %2482, %int1_289, %int128_290 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2686 = torch.aten.view %2582, %2685 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_291 = torch.constant.int 4
    %int1_292 = torch.constant.int 1
    %int128_293 = torch.constant.int 128
    %2687 = torch.prim.ListConstruct %int4_291, %2482, %int1_292, %int128_293 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2688 = torch.aten.view %2588, %2687 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_294 = torch.constant.int 4
    %int1_295 = torch.constant.int 1
    %int128_296 = torch.constant.int 128
    %2689 = torch.prim.ListConstruct %int4_294, %2482, %int1_295, %int128_296 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2690 = torch.aten.view %2594, %2689 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_297 = torch.constant.int 4
    %int1_298 = torch.constant.int 1
    %int128_299 = torch.constant.int 128
    %2691 = torch.prim.ListConstruct %int4_297, %2482, %int1_298, %int128_299 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2692 = torch.aten.view %2616, %2691 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_300 = torch.constant.int 4
    %int1_301 = torch.constant.int 1
    %int128_302 = torch.constant.int 128
    %2693 = torch.prim.ListConstruct %int4_300, %2482, %int1_301, %int128_302 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2694 = torch.aten.view %2622, %2693 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_303 = torch.constant.int 4
    %int1_304 = torch.constant.int 1
    %int128_305 = torch.constant.int 128
    %2695 = torch.prim.ListConstruct %int4_303, %2482, %int1_304, %int128_305 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2696 = torch.aten.view %2628, %2695 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_306 = torch.constant.int 4
    %int1_307 = torch.constant.int 1
    %int128_308 = torch.constant.int 128
    %2697 = torch.prim.ListConstruct %int4_306, %2482, %int1_307, %int128_308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2698 = torch.aten.view %2634, %2697 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_309 = torch.constant.int 4
    %int1_310 = torch.constant.int 1
    %int128_311 = torch.constant.int 128
    %2699 = torch.prim.ListConstruct %int4_309, %2482, %int1_310, %int128_311 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2700 = torch.aten.view %2640, %2699 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_312 = torch.constant.int 4
    %int1_313 = torch.constant.int 1
    %int128_314 = torch.constant.int 128
    %2701 = torch.prim.ListConstruct %int4_312, %2482, %int1_313, %int128_314 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2702 = torch.aten.view %2646, %2701 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_315 = torch.constant.int 4
    %int1_316 = torch.constant.int 1
    %int128_317 = torch.constant.int 128
    %2703 = torch.prim.ListConstruct %int4_315, %2482, %int1_316, %int128_317 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2704 = torch.aten.view %2652, %2703 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_318 = torch.constant.int 4
    %int1_319 = torch.constant.int 1
    %int128_320 = torch.constant.int 128
    %2705 = torch.prim.ListConstruct %int4_318, %2482, %int1_319, %int128_320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %2706 = torch.aten.view %2658, %2705 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072 = torch.constant.int 131072
    %none_321 = torch.constant.none
    %none_322 = torch.constant.none
    %cpu = torch.constant.device "cpu"
    %false_323 = torch.constant.bool false
    %2707 = torch.aten.arange %int131072, %none_321, %none_322, %cpu, %false_323 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_324 = torch.constant.int 0
    %int128_325 = torch.constant.int 128
    %int2_326 = torch.constant.int 2
    %none_327 = torch.constant.none
    %none_328 = torch.constant.none
    %cpu_329 = torch.constant.device "cpu"
    %false_330 = torch.constant.bool false
    %2708 = torch.aten.arange.start_step %int0_324, %int128_325, %int2_326, %none_327, %none_328, %cpu_329, %false_330 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_331 = torch.constant.int 0
    %int0_332 = torch.constant.int 0
    %int64 = torch.constant.int 64
    %int1_333 = torch.constant.int 1
    %2709 = torch.aten.slice.Tensor %2708, %int0_331, %int0_332, %int64, %int1_333 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_334 = torch.constant.int 6
    %2710 = torch.prims.convert_element_type %2709, %int6_334 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_335 = torch.constant.int 128
    %2711 = torch.aten.div.Scalar %2710, %int128_335 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05 = torch.constant.float 5.000000e+05
    %2712 = torch.aten.pow.Scalar %float5.000000e05, %2711 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2713 = torch.aten.reciprocal %2712 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00 = torch.constant.float 1.000000e+00
    %2714 = torch.aten.mul.Scalar %2713, %float1.000000e00 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_336 = torch.constant.int 131072
    %int1_337 = torch.constant.int 1
    %2715 = torch.prim.ListConstruct %int131072_336, %int1_337 : (!torch.int, !torch.int) -> !torch.list<int>
    %2716 = torch.aten.view %2707, %2715 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2717 = torch.aten.mul.Tensor %2716, %2714 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2718 = torch.aten.cos %2717 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2719 = torch.aten.sin %2717 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2720 = torch.aten.complex %2718, %2719 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %2721 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2722 = flow.tensor.transfer %2721 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %2723 = torch_c.from_builtin_tensor %2722 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2724 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2725 = flow.tensor.transfer %2724 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %2726 = torch_c.from_builtin_tensor %2725 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2727 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2728 = flow.tensor.transfer %2727 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %2729 = torch_c.from_builtin_tensor %2728 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2730 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2731 = flow.tensor.transfer %2730 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %2732 = torch_c.from_builtin_tensor %2731 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2733 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2734 = flow.tensor.transfer %2733 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %2735 = torch_c.from_builtin_tensor %2734 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2736 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2737 = flow.tensor.transfer %2736 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %2738 = torch_c.from_builtin_tensor %2737 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2739 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2740 = flow.tensor.transfer %2739 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %2741 = torch_c.from_builtin_tensor %2740 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2742 = torch_c.to_builtin_tensor %2720 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2743 = flow.tensor.transfer %2742 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %2744 = torch_c.from_builtin_tensor %2743 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_338 = torch.constant.int 1
    %2745 = torch.aten.size.int %2488, %int1_338 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_339 = torch.constant.int 0
    %2746 = torch.aten.add.int %int0_339, %2745 : !torch.int, !torch.int -> !torch.int
    %int0_340 = torch.constant.int 0
    %int0_341 = torch.constant.int 0
    %int1_342 = torch.constant.int 1
    %2747 = torch.aten.slice.Tensor %2723, %int0_340, %int0_341, %2746, %int1_342 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2747, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_343 = torch.constant.int 1
    %int0_344 = torch.constant.int 0
    %int9223372036854775807 = torch.constant.int 9223372036854775807
    %int1_345 = torch.constant.int 1
    %2748 = torch.aten.slice.Tensor %2747, %int1_343, %int0_344, %int9223372036854775807, %int1_345 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2748, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_346 = torch.constant.int 0
    %2749 = torch.aten.unsqueeze %2748, %int0_346 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2749, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_347 = torch.constant.int 2
    %2750 = torch.aten.unsqueeze %2749, %int2_347 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2750, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3 = torch.constant.int 3
    %int0_348 = torch.constant.int 0
    %int9223372036854775807_349 = torch.constant.int 9223372036854775807
    %int1_350 = torch.constant.int 1
    %2751 = torch.aten.slice.Tensor %2750, %int3, %int0_348, %int9223372036854775807_349, %int1_350 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2751, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2752 = torch_c.to_builtin_tensor %2660 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_351 = arith.constant 1 : index
    %dim_352 = tensor.dim %2752, %c1_351 : tensor<4x?x4x128xf16>
    %2753 = flow.tensor.bitcast %2752 : tensor<4x?x4x128xf16>{%dim_352} -> tensor<4x?x4x64xcomplex<f16>>{%dim_352}
    %2754 = torch_c.from_builtin_tensor %2753 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2755 = torch.aten.mul.Tensor %2754, %2751 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2756 = torch_c.to_builtin_tensor %2755 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_353 = arith.constant 1 : index
    %dim_354 = tensor.dim %2756, %c1_353 : tensor<4x?x4x64xcomplex<f32>>
    %2757 = flow.tensor.bitcast %2756 : tensor<4x?x4x64xcomplex<f32>>{%dim_354} -> tensor<4x?x4x128xf32>{%dim_354}
    %2758 = torch_c.from_builtin_tensor %2757 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_355 = torch.constant.int 5
    %2759 = torch.prims.convert_element_type %2758, %int5_355 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_356 = torch.constant.int 1
    %2760 = torch.aten.size.int %2494, %int1_356 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_357 = torch.constant.int 0
    %2761 = torch.aten.add.int %int0_357, %2760 : !torch.int, !torch.int -> !torch.int
    %int0_358 = torch.constant.int 0
    %int0_359 = torch.constant.int 0
    %int1_360 = torch.constant.int 1
    %2762 = torch.aten.slice.Tensor %2726, %int0_358, %int0_359, %2761, %int1_360 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2762, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_361 = torch.constant.int 1
    %int0_362 = torch.constant.int 0
    %int9223372036854775807_363 = torch.constant.int 9223372036854775807
    %int1_364 = torch.constant.int 1
    %2763 = torch.aten.slice.Tensor %2762, %int1_361, %int0_362, %int9223372036854775807_363, %int1_364 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2763, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_365 = torch.constant.int 0
    %2764 = torch.aten.unsqueeze %2763, %int0_365 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2764, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_366 = torch.constant.int 2
    %2765 = torch.aten.unsqueeze %2764, %int2_366 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2765, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_367 = torch.constant.int 3
    %int0_368 = torch.constant.int 0
    %int9223372036854775807_369 = torch.constant.int 9223372036854775807
    %int1_370 = torch.constant.int 1
    %2766 = torch.aten.slice.Tensor %2765, %int3_367, %int0_368, %int9223372036854775807_369, %int1_370 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2766, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2767 = torch_c.to_builtin_tensor %2662 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_371 = arith.constant 1 : index
    %dim_372 = tensor.dim %2767, %c1_371 : tensor<4x?x4x128xf16>
    %2768 = flow.tensor.bitcast %2767 : tensor<4x?x4x128xf16>{%dim_372} -> tensor<4x?x4x64xcomplex<f16>>{%dim_372}
    %2769 = torch_c.from_builtin_tensor %2768 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2770 = torch.aten.mul.Tensor %2769, %2766 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2771 = torch_c.to_builtin_tensor %2770 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_373 = arith.constant 1 : index
    %dim_374 = tensor.dim %2771, %c1_373 : tensor<4x?x4x64xcomplex<f32>>
    %2772 = flow.tensor.bitcast %2771 : tensor<4x?x4x64xcomplex<f32>>{%dim_374} -> tensor<4x?x4x128xf32>{%dim_374}
    %2773 = torch_c.from_builtin_tensor %2772 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_375 = torch.constant.int 5
    %2774 = torch.prims.convert_element_type %2773, %int5_375 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_376 = torch.constant.int 1
    %2775 = torch.aten.size.int %2500, %int1_376 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_377 = torch.constant.int 0
    %2776 = torch.aten.add.int %int0_377, %2775 : !torch.int, !torch.int -> !torch.int
    %int0_378 = torch.constant.int 0
    %int0_379 = torch.constant.int 0
    %int1_380 = torch.constant.int 1
    %2777 = torch.aten.slice.Tensor %2729, %int0_378, %int0_379, %2776, %int1_380 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2777, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_381 = torch.constant.int 1
    %int0_382 = torch.constant.int 0
    %int9223372036854775807_383 = torch.constant.int 9223372036854775807
    %int1_384 = torch.constant.int 1
    %2778 = torch.aten.slice.Tensor %2777, %int1_381, %int0_382, %int9223372036854775807_383, %int1_384 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2778, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_385 = torch.constant.int 0
    %2779 = torch.aten.unsqueeze %2778, %int0_385 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2779, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_386 = torch.constant.int 2
    %2780 = torch.aten.unsqueeze %2779, %int2_386 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2780, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_387 = torch.constant.int 3
    %int0_388 = torch.constant.int 0
    %int9223372036854775807_389 = torch.constant.int 9223372036854775807
    %int1_390 = torch.constant.int 1
    %2781 = torch.aten.slice.Tensor %2780, %int3_387, %int0_388, %int9223372036854775807_389, %int1_390 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2781, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2782 = torch_c.to_builtin_tensor %2664 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_391 = arith.constant 1 : index
    %dim_392 = tensor.dim %2782, %c1_391 : tensor<4x?x4x128xf16>
    %2783 = flow.tensor.bitcast %2782 : tensor<4x?x4x128xf16>{%dim_392} -> tensor<4x?x4x64xcomplex<f16>>{%dim_392}
    %2784 = torch_c.from_builtin_tensor %2783 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2785 = torch.aten.mul.Tensor %2784, %2781 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2786 = torch_c.to_builtin_tensor %2785 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_393 = arith.constant 1 : index
    %dim_394 = tensor.dim %2786, %c1_393 : tensor<4x?x4x64xcomplex<f32>>
    %2787 = flow.tensor.bitcast %2786 : tensor<4x?x4x64xcomplex<f32>>{%dim_394} -> tensor<4x?x4x128xf32>{%dim_394}
    %2788 = torch_c.from_builtin_tensor %2787 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_395 = torch.constant.int 5
    %2789 = torch.prims.convert_element_type %2788, %int5_395 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_396 = torch.constant.int 1
    %2790 = torch.aten.size.int %2506, %int1_396 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_397 = torch.constant.int 0
    %2791 = torch.aten.add.int %int0_397, %2790 : !torch.int, !torch.int -> !torch.int
    %int0_398 = torch.constant.int 0
    %int0_399 = torch.constant.int 0
    %int1_400 = torch.constant.int 1
    %2792 = torch.aten.slice.Tensor %2732, %int0_398, %int0_399, %2791, %int1_400 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2792, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_401 = torch.constant.int 1
    %int0_402 = torch.constant.int 0
    %int9223372036854775807_403 = torch.constant.int 9223372036854775807
    %int1_404 = torch.constant.int 1
    %2793 = torch.aten.slice.Tensor %2792, %int1_401, %int0_402, %int9223372036854775807_403, %int1_404 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2793, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_405 = torch.constant.int 0
    %2794 = torch.aten.unsqueeze %2793, %int0_405 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2794, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_406 = torch.constant.int 2
    %2795 = torch.aten.unsqueeze %2794, %int2_406 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2795, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_407 = torch.constant.int 3
    %int0_408 = torch.constant.int 0
    %int9223372036854775807_409 = torch.constant.int 9223372036854775807
    %int1_410 = torch.constant.int 1
    %2796 = torch.aten.slice.Tensor %2795, %int3_407, %int0_408, %int9223372036854775807_409, %int1_410 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2796, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2797 = torch_c.to_builtin_tensor %2666 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_411 = arith.constant 1 : index
    %dim_412 = tensor.dim %2797, %c1_411 : tensor<4x?x4x128xf16>
    %2798 = flow.tensor.bitcast %2797 : tensor<4x?x4x128xf16>{%dim_412} -> tensor<4x?x4x64xcomplex<f16>>{%dim_412}
    %2799 = torch_c.from_builtin_tensor %2798 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2800 = torch.aten.mul.Tensor %2799, %2796 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2801 = torch_c.to_builtin_tensor %2800 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_413 = arith.constant 1 : index
    %dim_414 = tensor.dim %2801, %c1_413 : tensor<4x?x4x64xcomplex<f32>>
    %2802 = flow.tensor.bitcast %2801 : tensor<4x?x4x64xcomplex<f32>>{%dim_414} -> tensor<4x?x4x128xf32>{%dim_414}
    %2803 = torch_c.from_builtin_tensor %2802 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_415 = torch.constant.int 5
    %2804 = torch.prims.convert_element_type %2803, %int5_415 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_416 = torch.constant.int 1
    %2805 = torch.aten.size.int %2512, %int1_416 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_417 = torch.constant.int 0
    %2806 = torch.aten.add.int %int0_417, %2805 : !torch.int, !torch.int -> !torch.int
    %int0_418 = torch.constant.int 0
    %int0_419 = torch.constant.int 0
    %int1_420 = torch.constant.int 1
    %2807 = torch.aten.slice.Tensor %2735, %int0_418, %int0_419, %2806, %int1_420 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2807, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_421 = torch.constant.int 1
    %int0_422 = torch.constant.int 0
    %int9223372036854775807_423 = torch.constant.int 9223372036854775807
    %int1_424 = torch.constant.int 1
    %2808 = torch.aten.slice.Tensor %2807, %int1_421, %int0_422, %int9223372036854775807_423, %int1_424 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2808, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_425 = torch.constant.int 0
    %2809 = torch.aten.unsqueeze %2808, %int0_425 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2809, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_426 = torch.constant.int 2
    %2810 = torch.aten.unsqueeze %2809, %int2_426 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2810, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_427 = torch.constant.int 3
    %int0_428 = torch.constant.int 0
    %int9223372036854775807_429 = torch.constant.int 9223372036854775807
    %int1_430 = torch.constant.int 1
    %2811 = torch.aten.slice.Tensor %2810, %int3_427, %int0_428, %int9223372036854775807_429, %int1_430 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2811, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2812 = torch_c.to_builtin_tensor %2668 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_431 = arith.constant 1 : index
    %dim_432 = tensor.dim %2812, %c1_431 : tensor<4x?x4x128xf16>
    %2813 = flow.tensor.bitcast %2812 : tensor<4x?x4x128xf16>{%dim_432} -> tensor<4x?x4x64xcomplex<f16>>{%dim_432}
    %2814 = torch_c.from_builtin_tensor %2813 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2815 = torch.aten.mul.Tensor %2814, %2811 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2816 = torch_c.to_builtin_tensor %2815 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_433 = arith.constant 1 : index
    %dim_434 = tensor.dim %2816, %c1_433 : tensor<4x?x4x64xcomplex<f32>>
    %2817 = flow.tensor.bitcast %2816 : tensor<4x?x4x64xcomplex<f32>>{%dim_434} -> tensor<4x?x4x128xf32>{%dim_434}
    %2818 = torch_c.from_builtin_tensor %2817 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_435 = torch.constant.int 5
    %2819 = torch.prims.convert_element_type %2818, %int5_435 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_436 = torch.constant.int 1
    %2820 = torch.aten.size.int %2518, %int1_436 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_437 = torch.constant.int 0
    %2821 = torch.aten.add.int %int0_437, %2820 : !torch.int, !torch.int -> !torch.int
    %int0_438 = torch.constant.int 0
    %int0_439 = torch.constant.int 0
    %int1_440 = torch.constant.int 1
    %2822 = torch.aten.slice.Tensor %2738, %int0_438, %int0_439, %2821, %int1_440 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2822, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_441 = torch.constant.int 1
    %int0_442 = torch.constant.int 0
    %int9223372036854775807_443 = torch.constant.int 9223372036854775807
    %int1_444 = torch.constant.int 1
    %2823 = torch.aten.slice.Tensor %2822, %int1_441, %int0_442, %int9223372036854775807_443, %int1_444 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2823, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_445 = torch.constant.int 0
    %2824 = torch.aten.unsqueeze %2823, %int0_445 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2824, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_446 = torch.constant.int 2
    %2825 = torch.aten.unsqueeze %2824, %int2_446 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2825, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_447 = torch.constant.int 3
    %int0_448 = torch.constant.int 0
    %int9223372036854775807_449 = torch.constant.int 9223372036854775807
    %int1_450 = torch.constant.int 1
    %2826 = torch.aten.slice.Tensor %2825, %int3_447, %int0_448, %int9223372036854775807_449, %int1_450 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2826, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2827 = torch_c.to_builtin_tensor %2670 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_451 = arith.constant 1 : index
    %dim_452 = tensor.dim %2827, %c1_451 : tensor<4x?x4x128xf16>
    %2828 = flow.tensor.bitcast %2827 : tensor<4x?x4x128xf16>{%dim_452} -> tensor<4x?x4x64xcomplex<f16>>{%dim_452}
    %2829 = torch_c.from_builtin_tensor %2828 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2830 = torch.aten.mul.Tensor %2829, %2826 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2831 = torch_c.to_builtin_tensor %2830 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_453 = arith.constant 1 : index
    %dim_454 = tensor.dim %2831, %c1_453 : tensor<4x?x4x64xcomplex<f32>>
    %2832 = flow.tensor.bitcast %2831 : tensor<4x?x4x64xcomplex<f32>>{%dim_454} -> tensor<4x?x4x128xf32>{%dim_454}
    %2833 = torch_c.from_builtin_tensor %2832 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_455 = torch.constant.int 5
    %2834 = torch.prims.convert_element_type %2833, %int5_455 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_456 = torch.constant.int 1
    %2835 = torch.aten.size.int %2524, %int1_456 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_457 = torch.constant.int 0
    %2836 = torch.aten.add.int %int0_457, %2835 : !torch.int, !torch.int -> !torch.int
    %int0_458 = torch.constant.int 0
    %int0_459 = torch.constant.int 0
    %int1_460 = torch.constant.int 1
    %2837 = torch.aten.slice.Tensor %2741, %int0_458, %int0_459, %2836, %int1_460 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2837, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_461 = torch.constant.int 1
    %int0_462 = torch.constant.int 0
    %int9223372036854775807_463 = torch.constant.int 9223372036854775807
    %int1_464 = torch.constant.int 1
    %2838 = torch.aten.slice.Tensor %2837, %int1_461, %int0_462, %int9223372036854775807_463, %int1_464 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2838, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_465 = torch.constant.int 0
    %2839 = torch.aten.unsqueeze %2838, %int0_465 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2839, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_466 = torch.constant.int 2
    %2840 = torch.aten.unsqueeze %2839, %int2_466 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2840, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_467 = torch.constant.int 3
    %int0_468 = torch.constant.int 0
    %int9223372036854775807_469 = torch.constant.int 9223372036854775807
    %int1_470 = torch.constant.int 1
    %2841 = torch.aten.slice.Tensor %2840, %int3_467, %int0_468, %int9223372036854775807_469, %int1_470 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2841, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2842 = torch_c.to_builtin_tensor %2672 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_471 = arith.constant 1 : index
    %dim_472 = tensor.dim %2842, %c1_471 : tensor<4x?x4x128xf16>
    %2843 = flow.tensor.bitcast %2842 : tensor<4x?x4x128xf16>{%dim_472} -> tensor<4x?x4x64xcomplex<f16>>{%dim_472}
    %2844 = torch_c.from_builtin_tensor %2843 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2845 = torch.aten.mul.Tensor %2844, %2841 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2846 = torch_c.to_builtin_tensor %2845 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_473 = arith.constant 1 : index
    %dim_474 = tensor.dim %2846, %c1_473 : tensor<4x?x4x64xcomplex<f32>>
    %2847 = flow.tensor.bitcast %2846 : tensor<4x?x4x64xcomplex<f32>>{%dim_474} -> tensor<4x?x4x128xf32>{%dim_474}
    %2848 = torch_c.from_builtin_tensor %2847 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_475 = torch.constant.int 5
    %2849 = torch.prims.convert_element_type %2848, %int5_475 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_476 = torch.constant.int 1
    %2850 = torch.aten.size.int %2530, %int1_476 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_477 = torch.constant.int 0
    %2851 = torch.aten.add.int %int0_477, %2850 : !torch.int, !torch.int -> !torch.int
    %int0_478 = torch.constant.int 0
    %int0_479 = torch.constant.int 0
    %int1_480 = torch.constant.int 1
    %2852 = torch.aten.slice.Tensor %2744, %int0_478, %int0_479, %2851, %int1_480 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2852, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_481 = torch.constant.int 1
    %int0_482 = torch.constant.int 0
    %int9223372036854775807_483 = torch.constant.int 9223372036854775807
    %int1_484 = torch.constant.int 1
    %2853 = torch.aten.slice.Tensor %2852, %int1_481, %int0_482, %int9223372036854775807_483, %int1_484 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2853, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_485 = torch.constant.int 0
    %2854 = torch.aten.unsqueeze %2853, %int0_485 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2854, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_486 = torch.constant.int 2
    %2855 = torch.aten.unsqueeze %2854, %int2_486 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2855, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_487 = torch.constant.int 3
    %int0_488 = torch.constant.int 0
    %int9223372036854775807_489 = torch.constant.int 9223372036854775807
    %int1_490 = torch.constant.int 1
    %2856 = torch.aten.slice.Tensor %2855, %int3_487, %int0_488, %int9223372036854775807_489, %int1_490 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2856, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2857 = torch_c.to_builtin_tensor %2674 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_491 = arith.constant 1 : index
    %dim_492 = tensor.dim %2857, %c1_491 : tensor<4x?x4x128xf16>
    %2858 = flow.tensor.bitcast %2857 : tensor<4x?x4x128xf16>{%dim_492} -> tensor<4x?x4x64xcomplex<f16>>{%dim_492}
    %2859 = torch_c.from_builtin_tensor %2858 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %2859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %2860 = torch.aten.mul.Tensor %2859, %2856 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %2860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %2861 = torch_c.to_builtin_tensor %2860 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_493 = arith.constant 1 : index
    %dim_494 = tensor.dim %2861, %c1_493 : tensor<4x?x4x64xcomplex<f32>>
    %2862 = flow.tensor.bitcast %2861 : tensor<4x?x4x64xcomplex<f32>>{%dim_494} -> tensor<4x?x4x128xf32>{%dim_494}
    %2863 = torch_c.from_builtin_tensor %2862 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %2863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_495 = torch.constant.int 5
    %2864 = torch.prims.convert_element_type %2863, %int5_495 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %2864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_496 = torch.constant.int 131072
    %none_497 = torch.constant.none
    %none_498 = torch.constant.none
    %cpu_499 = torch.constant.device "cpu"
    %false_500 = torch.constant.bool false
    %2865 = torch.aten.arange %int131072_496, %none_497, %none_498, %cpu_499, %false_500 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_501 = torch.constant.int 0
    %int128_502 = torch.constant.int 128
    %int2_503 = torch.constant.int 2
    %none_504 = torch.constant.none
    %none_505 = torch.constant.none
    %cpu_506 = torch.constant.device "cpu"
    %false_507 = torch.constant.bool false
    %2866 = torch.aten.arange.start_step %int0_501, %int128_502, %int2_503, %none_504, %none_505, %cpu_506, %false_507 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_508 = torch.constant.int 0
    %int0_509 = torch.constant.int 0
    %int64_510 = torch.constant.int 64
    %int1_511 = torch.constant.int 1
    %2867 = torch.aten.slice.Tensor %2866, %int0_508, %int0_509, %int64_510, %int1_511 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_512 = torch.constant.int 6
    %2868 = torch.prims.convert_element_type %2867, %int6_512 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_513 = torch.constant.int 128
    %2869 = torch.aten.div.Scalar %2868, %int128_513 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_514 = torch.constant.float 5.000000e+05
    %2870 = torch.aten.pow.Scalar %float5.000000e05_514, %2869 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %2871 = torch.aten.reciprocal %2870 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_515 = torch.constant.float 1.000000e+00
    %2872 = torch.aten.mul.Scalar %2871, %float1.000000e00_515 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_516 = torch.constant.int 131072
    %int1_517 = torch.constant.int 1
    %2873 = torch.prim.ListConstruct %int131072_516, %int1_517 : (!torch.int, !torch.int) -> !torch.list<int>
    %2874 = torch.aten.view %2865, %2873 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %2875 = torch.aten.mul.Tensor %2874, %2872 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %2876 = torch.aten.cos %2875 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2877 = torch.aten.sin %2875 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %2878 = torch.aten.complex %2876, %2877 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %2879 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2880 = flow.tensor.transfer %2879 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %2881 = torch_c.from_builtin_tensor %2880 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2882 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2883 = flow.tensor.transfer %2882 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %2884 = torch_c.from_builtin_tensor %2883 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2885 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2886 = flow.tensor.transfer %2885 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %2887 = torch_c.from_builtin_tensor %2886 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2888 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2889 = flow.tensor.transfer %2888 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %2890 = torch_c.from_builtin_tensor %2889 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2891 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2892 = flow.tensor.transfer %2891 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %2893 = torch_c.from_builtin_tensor %2892 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2894 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2895 = flow.tensor.transfer %2894 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %2896 = torch_c.from_builtin_tensor %2895 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2897 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2898 = flow.tensor.transfer %2897 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %2899 = torch_c.from_builtin_tensor %2898 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %2900 = torch_c.to_builtin_tensor %2878 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %2901 = flow.tensor.transfer %2900 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %2902 = torch_c.from_builtin_tensor %2901 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_518 = torch.constant.int 1
    %2903 = torch.aten.size.int %2552, %int1_518 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_519 = torch.constant.int 0
    %2904 = torch.aten.add.int %int0_519, %2903 : !torch.int, !torch.int -> !torch.int
    %int0_520 = torch.constant.int 0
    %int0_521 = torch.constant.int 0
    %int1_522 = torch.constant.int 1
    %2905 = torch.aten.slice.Tensor %2881, %int0_520, %int0_521, %2904, %int1_522 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2905, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_523 = torch.constant.int 1
    %int0_524 = torch.constant.int 0
    %int9223372036854775807_525 = torch.constant.int 9223372036854775807
    %int1_526 = torch.constant.int 1
    %2906 = torch.aten.slice.Tensor %2905, %int1_523, %int0_524, %int9223372036854775807_525, %int1_526 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2906, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_527 = torch.constant.int 0
    %2907 = torch.aten.unsqueeze %2906, %int0_527 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2907, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_528 = torch.constant.int 2
    %2908 = torch.aten.unsqueeze %2907, %int2_528 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2908, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_529 = torch.constant.int 3
    %int0_530 = torch.constant.int 0
    %int9223372036854775807_531 = torch.constant.int 9223372036854775807
    %int1_532 = torch.constant.int 1
    %2909 = torch.aten.slice.Tensor %2908, %int3_529, %int0_530, %int9223372036854775807_531, %int1_532 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2909, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2910 = torch_c.to_builtin_tensor %2676 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_533 = arith.constant 1 : index
    %dim_534 = tensor.dim %2910, %c1_533 : tensor<4x?x1x128xf16>
    %2911 = flow.tensor.bitcast %2910 : tensor<4x?x1x128xf16>{%dim_534} -> tensor<4x?x1x64xcomplex<f16>>{%dim_534}
    %2912 = torch_c.from_builtin_tensor %2911 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2913 = torch.aten.mul.Tensor %2912, %2909 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2914 = torch_c.to_builtin_tensor %2913 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_535 = arith.constant 1 : index
    %dim_536 = tensor.dim %2914, %c1_535 : tensor<4x?x1x64xcomplex<f32>>
    %2915 = flow.tensor.bitcast %2914 : tensor<4x?x1x64xcomplex<f32>>{%dim_536} -> tensor<4x?x1x128xf32>{%dim_536}
    %2916 = torch_c.from_builtin_tensor %2915 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_537 = torch.constant.int 5
    %2917 = torch.prims.convert_element_type %2916, %int5_537 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_538 = torch.constant.int 1
    %2918 = torch.aten.size.int %2558, %int1_538 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_539 = torch.constant.int 0
    %2919 = torch.aten.add.int %int0_539, %2918 : !torch.int, !torch.int -> !torch.int
    %int0_540 = torch.constant.int 0
    %int0_541 = torch.constant.int 0
    %int1_542 = torch.constant.int 1
    %2920 = torch.aten.slice.Tensor %2884, %int0_540, %int0_541, %2919, %int1_542 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2920, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_543 = torch.constant.int 1
    %int0_544 = torch.constant.int 0
    %int9223372036854775807_545 = torch.constant.int 9223372036854775807
    %int1_546 = torch.constant.int 1
    %2921 = torch.aten.slice.Tensor %2920, %int1_543, %int0_544, %int9223372036854775807_545, %int1_546 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2921, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_547 = torch.constant.int 0
    %2922 = torch.aten.unsqueeze %2921, %int0_547 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2922, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_548 = torch.constant.int 2
    %2923 = torch.aten.unsqueeze %2922, %int2_548 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2923, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_549 = torch.constant.int 3
    %int0_550 = torch.constant.int 0
    %int9223372036854775807_551 = torch.constant.int 9223372036854775807
    %int1_552 = torch.constant.int 1
    %2924 = torch.aten.slice.Tensor %2923, %int3_549, %int0_550, %int9223372036854775807_551, %int1_552 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2924, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2925 = torch_c.to_builtin_tensor %2678 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_553 = arith.constant 1 : index
    %dim_554 = tensor.dim %2925, %c1_553 : tensor<4x?x1x128xf16>
    %2926 = flow.tensor.bitcast %2925 : tensor<4x?x1x128xf16>{%dim_554} -> tensor<4x?x1x64xcomplex<f16>>{%dim_554}
    %2927 = torch_c.from_builtin_tensor %2926 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2928 = torch.aten.mul.Tensor %2927, %2924 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2929 = torch_c.to_builtin_tensor %2928 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_555 = arith.constant 1 : index
    %dim_556 = tensor.dim %2929, %c1_555 : tensor<4x?x1x64xcomplex<f32>>
    %2930 = flow.tensor.bitcast %2929 : tensor<4x?x1x64xcomplex<f32>>{%dim_556} -> tensor<4x?x1x128xf32>{%dim_556}
    %2931 = torch_c.from_builtin_tensor %2930 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_557 = torch.constant.int 5
    %2932 = torch.prims.convert_element_type %2931, %int5_557 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_558 = torch.constant.int 1
    %2933 = torch.aten.size.int %2564, %int1_558 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_559 = torch.constant.int 0
    %2934 = torch.aten.add.int %int0_559, %2933 : !torch.int, !torch.int -> !torch.int
    %int0_560 = torch.constant.int 0
    %int0_561 = torch.constant.int 0
    %int1_562 = torch.constant.int 1
    %2935 = torch.aten.slice.Tensor %2887, %int0_560, %int0_561, %2934, %int1_562 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2935, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_563 = torch.constant.int 1
    %int0_564 = torch.constant.int 0
    %int9223372036854775807_565 = torch.constant.int 9223372036854775807
    %int1_566 = torch.constant.int 1
    %2936 = torch.aten.slice.Tensor %2935, %int1_563, %int0_564, %int9223372036854775807_565, %int1_566 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2936, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_567 = torch.constant.int 0
    %2937 = torch.aten.unsqueeze %2936, %int0_567 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2937, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_568 = torch.constant.int 2
    %2938 = torch.aten.unsqueeze %2937, %int2_568 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2938, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_569 = torch.constant.int 3
    %int0_570 = torch.constant.int 0
    %int9223372036854775807_571 = torch.constant.int 9223372036854775807
    %int1_572 = torch.constant.int 1
    %2939 = torch.aten.slice.Tensor %2938, %int3_569, %int0_570, %int9223372036854775807_571, %int1_572 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2939, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2940 = torch_c.to_builtin_tensor %2680 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_573 = arith.constant 1 : index
    %dim_574 = tensor.dim %2940, %c1_573 : tensor<4x?x1x128xf16>
    %2941 = flow.tensor.bitcast %2940 : tensor<4x?x1x128xf16>{%dim_574} -> tensor<4x?x1x64xcomplex<f16>>{%dim_574}
    %2942 = torch_c.from_builtin_tensor %2941 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2943 = torch.aten.mul.Tensor %2942, %2939 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2944 = torch_c.to_builtin_tensor %2943 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_575 = arith.constant 1 : index
    %dim_576 = tensor.dim %2944, %c1_575 : tensor<4x?x1x64xcomplex<f32>>
    %2945 = flow.tensor.bitcast %2944 : tensor<4x?x1x64xcomplex<f32>>{%dim_576} -> tensor<4x?x1x128xf32>{%dim_576}
    %2946 = torch_c.from_builtin_tensor %2945 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_577 = torch.constant.int 5
    %2947 = torch.prims.convert_element_type %2946, %int5_577 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_578 = torch.constant.int 1
    %2948 = torch.aten.size.int %2570, %int1_578 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_579 = torch.constant.int 0
    %2949 = torch.aten.add.int %int0_579, %2948 : !torch.int, !torch.int -> !torch.int
    %int0_580 = torch.constant.int 0
    %int0_581 = torch.constant.int 0
    %int1_582 = torch.constant.int 1
    %2950 = torch.aten.slice.Tensor %2890, %int0_580, %int0_581, %2949, %int1_582 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2950, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_583 = torch.constant.int 1
    %int0_584 = torch.constant.int 0
    %int9223372036854775807_585 = torch.constant.int 9223372036854775807
    %int1_586 = torch.constant.int 1
    %2951 = torch.aten.slice.Tensor %2950, %int1_583, %int0_584, %int9223372036854775807_585, %int1_586 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2951, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_587 = torch.constant.int 0
    %2952 = torch.aten.unsqueeze %2951, %int0_587 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2952, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_588 = torch.constant.int 2
    %2953 = torch.aten.unsqueeze %2952, %int2_588 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2953, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_589 = torch.constant.int 3
    %int0_590 = torch.constant.int 0
    %int9223372036854775807_591 = torch.constant.int 9223372036854775807
    %int1_592 = torch.constant.int 1
    %2954 = torch.aten.slice.Tensor %2953, %int3_589, %int0_590, %int9223372036854775807_591, %int1_592 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2954, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2955 = torch_c.to_builtin_tensor %2682 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_593 = arith.constant 1 : index
    %dim_594 = tensor.dim %2955, %c1_593 : tensor<4x?x1x128xf16>
    %2956 = flow.tensor.bitcast %2955 : tensor<4x?x1x128xf16>{%dim_594} -> tensor<4x?x1x64xcomplex<f16>>{%dim_594}
    %2957 = torch_c.from_builtin_tensor %2956 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2958 = torch.aten.mul.Tensor %2957, %2954 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2959 = torch_c.to_builtin_tensor %2958 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_595 = arith.constant 1 : index
    %dim_596 = tensor.dim %2959, %c1_595 : tensor<4x?x1x64xcomplex<f32>>
    %2960 = flow.tensor.bitcast %2959 : tensor<4x?x1x64xcomplex<f32>>{%dim_596} -> tensor<4x?x1x128xf32>{%dim_596}
    %2961 = torch_c.from_builtin_tensor %2960 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_597 = torch.constant.int 5
    %2962 = torch.prims.convert_element_type %2961, %int5_597 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_598 = torch.constant.int 1
    %2963 = torch.aten.size.int %2576, %int1_598 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_599 = torch.constant.int 0
    %2964 = torch.aten.add.int %int0_599, %2963 : !torch.int, !torch.int -> !torch.int
    %int0_600 = torch.constant.int 0
    %int0_601 = torch.constant.int 0
    %int1_602 = torch.constant.int 1
    %2965 = torch.aten.slice.Tensor %2893, %int0_600, %int0_601, %2964, %int1_602 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2965, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_603 = torch.constant.int 1
    %int0_604 = torch.constant.int 0
    %int9223372036854775807_605 = torch.constant.int 9223372036854775807
    %int1_606 = torch.constant.int 1
    %2966 = torch.aten.slice.Tensor %2965, %int1_603, %int0_604, %int9223372036854775807_605, %int1_606 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2966, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_607 = torch.constant.int 0
    %2967 = torch.aten.unsqueeze %2966, %int0_607 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2967, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_608 = torch.constant.int 2
    %2968 = torch.aten.unsqueeze %2967, %int2_608 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2968, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_609 = torch.constant.int 3
    %int0_610 = torch.constant.int 0
    %int9223372036854775807_611 = torch.constant.int 9223372036854775807
    %int1_612 = torch.constant.int 1
    %2969 = torch.aten.slice.Tensor %2968, %int3_609, %int0_610, %int9223372036854775807_611, %int1_612 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2969, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2970 = torch_c.to_builtin_tensor %2684 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_613 = arith.constant 1 : index
    %dim_614 = tensor.dim %2970, %c1_613 : tensor<4x?x1x128xf16>
    %2971 = flow.tensor.bitcast %2970 : tensor<4x?x1x128xf16>{%dim_614} -> tensor<4x?x1x64xcomplex<f16>>{%dim_614}
    %2972 = torch_c.from_builtin_tensor %2971 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2973 = torch.aten.mul.Tensor %2972, %2969 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2974 = torch_c.to_builtin_tensor %2973 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_615 = arith.constant 1 : index
    %dim_616 = tensor.dim %2974, %c1_615 : tensor<4x?x1x64xcomplex<f32>>
    %2975 = flow.tensor.bitcast %2974 : tensor<4x?x1x64xcomplex<f32>>{%dim_616} -> tensor<4x?x1x128xf32>{%dim_616}
    %2976 = torch_c.from_builtin_tensor %2975 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_617 = torch.constant.int 5
    %2977 = torch.prims.convert_element_type %2976, %int5_617 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_618 = torch.constant.int 1
    %2978 = torch.aten.size.int %2582, %int1_618 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_619 = torch.constant.int 0
    %2979 = torch.aten.add.int %int0_619, %2978 : !torch.int, !torch.int -> !torch.int
    %int0_620 = torch.constant.int 0
    %int0_621 = torch.constant.int 0
    %int1_622 = torch.constant.int 1
    %2980 = torch.aten.slice.Tensor %2896, %int0_620, %int0_621, %2979, %int1_622 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2980, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_623 = torch.constant.int 1
    %int0_624 = torch.constant.int 0
    %int9223372036854775807_625 = torch.constant.int 9223372036854775807
    %int1_626 = torch.constant.int 1
    %2981 = torch.aten.slice.Tensor %2980, %int1_623, %int0_624, %int9223372036854775807_625, %int1_626 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2981, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_627 = torch.constant.int 0
    %2982 = torch.aten.unsqueeze %2981, %int0_627 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2982, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_628 = torch.constant.int 2
    %2983 = torch.aten.unsqueeze %2982, %int2_628 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2983, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_629 = torch.constant.int 3
    %int0_630 = torch.constant.int 0
    %int9223372036854775807_631 = torch.constant.int 9223372036854775807
    %int1_632 = torch.constant.int 1
    %2984 = torch.aten.slice.Tensor %2983, %int3_629, %int0_630, %int9223372036854775807_631, %int1_632 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2984, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %2985 = torch_c.to_builtin_tensor %2686 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_633 = arith.constant 1 : index
    %dim_634 = tensor.dim %2985, %c1_633 : tensor<4x?x1x128xf16>
    %2986 = flow.tensor.bitcast %2985 : tensor<4x?x1x128xf16>{%dim_634} -> tensor<4x?x1x64xcomplex<f16>>{%dim_634}
    %2987 = torch_c.from_builtin_tensor %2986 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %2987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %2988 = torch.aten.mul.Tensor %2987, %2984 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %2989 = torch_c.to_builtin_tensor %2988 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_635 = arith.constant 1 : index
    %dim_636 = tensor.dim %2989, %c1_635 : tensor<4x?x1x64xcomplex<f32>>
    %2990 = flow.tensor.bitcast %2989 : tensor<4x?x1x64xcomplex<f32>>{%dim_636} -> tensor<4x?x1x128xf32>{%dim_636}
    %2991 = torch_c.from_builtin_tensor %2990 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %2991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_637 = torch.constant.int 5
    %2992 = torch.prims.convert_element_type %2991, %int5_637 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %2992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_638 = torch.constant.int 1
    %2993 = torch.aten.size.int %2588, %int1_638 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_639 = torch.constant.int 0
    %2994 = torch.aten.add.int %int0_639, %2993 : !torch.int, !torch.int -> !torch.int
    %int0_640 = torch.constant.int 0
    %int0_641 = torch.constant.int 0
    %int1_642 = torch.constant.int 1
    %2995 = torch.aten.slice.Tensor %2899, %int0_640, %int0_641, %2994, %int1_642 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2995, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_643 = torch.constant.int 1
    %int0_644 = torch.constant.int 0
    %int9223372036854775807_645 = torch.constant.int 9223372036854775807
    %int1_646 = torch.constant.int 1
    %2996 = torch.aten.slice.Tensor %2995, %int1_643, %int0_644, %int9223372036854775807_645, %int1_646 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %2996, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_647 = torch.constant.int 0
    %2997 = torch.aten.unsqueeze %2996, %int0_647 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %2997, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_648 = torch.constant.int 2
    %2998 = torch.aten.unsqueeze %2997, %int2_648 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2998, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_649 = torch.constant.int 3
    %int0_650 = torch.constant.int 0
    %int9223372036854775807_651 = torch.constant.int 9223372036854775807
    %int1_652 = torch.constant.int 1
    %2999 = torch.aten.slice.Tensor %2998, %int3_649, %int0_650, %int9223372036854775807_651, %int1_652 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %2999, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3000 = torch_c.to_builtin_tensor %2688 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_653 = arith.constant 1 : index
    %dim_654 = tensor.dim %3000, %c1_653 : tensor<4x?x1x128xf16>
    %3001 = flow.tensor.bitcast %3000 : tensor<4x?x1x128xf16>{%dim_654} -> tensor<4x?x1x64xcomplex<f16>>{%dim_654}
    %3002 = torch_c.from_builtin_tensor %3001 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %3002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %3003 = torch.aten.mul.Tensor %3002, %2999 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %3004 = torch_c.to_builtin_tensor %3003 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_655 = arith.constant 1 : index
    %dim_656 = tensor.dim %3004, %c1_655 : tensor<4x?x1x64xcomplex<f32>>
    %3005 = flow.tensor.bitcast %3004 : tensor<4x?x1x64xcomplex<f32>>{%dim_656} -> tensor<4x?x1x128xf32>{%dim_656}
    %3006 = torch_c.from_builtin_tensor %3005 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %3006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_657 = torch.constant.int 5
    %3007 = torch.prims.convert_element_type %3006, %int5_657 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %3007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_658 = torch.constant.int 1
    %3008 = torch.aten.size.int %2594, %int1_658 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_659 = torch.constant.int 0
    %3009 = torch.aten.add.int %int0_659, %3008 : !torch.int, !torch.int -> !torch.int
    %int0_660 = torch.constant.int 0
    %int0_661 = torch.constant.int 0
    %int1_662 = torch.constant.int 1
    %3010 = torch.aten.slice.Tensor %2902, %int0_660, %int0_661, %3009, %int1_662 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3010, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_663 = torch.constant.int 1
    %int0_664 = torch.constant.int 0
    %int9223372036854775807_665 = torch.constant.int 9223372036854775807
    %int1_666 = torch.constant.int 1
    %3011 = torch.aten.slice.Tensor %3010, %int1_663, %int0_664, %int9223372036854775807_665, %int1_666 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %3011, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_667 = torch.constant.int 0
    %3012 = torch.aten.unsqueeze %3011, %int0_667 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %3012, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_668 = torch.constant.int 2
    %3013 = torch.aten.unsqueeze %3012, %int2_668 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3013, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_669 = torch.constant.int 3
    %int0_670 = torch.constant.int 0
    %int9223372036854775807_671 = torch.constant.int 9223372036854775807
    %int1_672 = torch.constant.int 1
    %3014 = torch.aten.slice.Tensor %3013, %int3_669, %int0_670, %int9223372036854775807_671, %int1_672 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3014, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %3015 = torch_c.to_builtin_tensor %2690 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_673 = arith.constant 1 : index
    %dim_674 = tensor.dim %3015, %c1_673 : tensor<4x?x1x128xf16>
    %3016 = flow.tensor.bitcast %3015 : tensor<4x?x1x128xf16>{%dim_674} -> tensor<4x?x1x64xcomplex<f16>>{%dim_674}
    %3017 = torch_c.from_builtin_tensor %3016 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %3017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %3018 = torch.aten.mul.Tensor %3017, %3014 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %3018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %3019 = torch_c.to_builtin_tensor %3018 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_675 = arith.constant 1 : index
    %dim_676 = tensor.dim %3019, %c1_675 : tensor<4x?x1x64xcomplex<f32>>
    %3020 = flow.tensor.bitcast %3019 : tensor<4x?x1x64xcomplex<f32>>{%dim_676} -> tensor<4x?x1x128xf32>{%dim_676}
    %3021 = torch_c.from_builtin_tensor %3020 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %3021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_677 = torch.constant.int 5
    %3022 = torch.prims.convert_element_type %3021, %int5_677 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %3022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int0_678 = torch.constant.int 0
    %3023 = torch.aten.size.int %2328, %int0_678 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32 = torch.constant.int 32
    %int2_679 = torch.constant.int 2
    %int16 = torch.constant.int 16
    %int1_680 = torch.constant.int 1
    %int128_681 = torch.constant.int 128
    %3024 = torch.prim.ListConstruct %3023, %int32, %int2_679, %int16, %int1_680, %int128_681 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3025 = torch.aten.view %2328, %3024 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3025, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_682 = torch.constant.int 0
    %3026 = torch.aten.size.int %2329, %int0_682 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_683 = torch.constant.int 32
    %int2_684 = torch.constant.int 2
    %int16_685 = torch.constant.int 16
    %int1_686 = torch.constant.int 1
    %int128_687 = torch.constant.int 128
    %3027 = torch.prim.ListConstruct %3026, %int32_683, %int2_684, %int16_685, %int1_686, %int128_687 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3028 = torch.aten.view %2329, %3027 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3028, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_688 = torch.constant.int 0
    %3029 = torch.aten.size.int %2330, %int0_688 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_689 = torch.constant.int 32
    %int2_690 = torch.constant.int 2
    %int16_691 = torch.constant.int 16
    %int1_692 = torch.constant.int 1
    %int128_693 = torch.constant.int 128
    %3030 = torch.prim.ListConstruct %3029, %int32_689, %int2_690, %int16_691, %int1_692, %int128_693 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3031 = torch.aten.view %2330, %3030 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3031, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_694 = torch.constant.int 0
    %3032 = torch.aten.size.int %2331, %int0_694 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_695 = torch.constant.int 32
    %int2_696 = torch.constant.int 2
    %int16_697 = torch.constant.int 16
    %int1_698 = torch.constant.int 1
    %int128_699 = torch.constant.int 128
    %3033 = torch.prim.ListConstruct %3032, %int32_695, %int2_696, %int16_697, %int1_698, %int128_699 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3034 = torch.aten.view %2331, %3033 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3034, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_700 = torch.constant.int 0
    %3035 = torch.aten.size.int %2332, %int0_700 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_701 = torch.constant.int 32
    %int2_702 = torch.constant.int 2
    %int16_703 = torch.constant.int 16
    %int1_704 = torch.constant.int 1
    %int128_705 = torch.constant.int 128
    %3036 = torch.prim.ListConstruct %3035, %int32_701, %int2_702, %int16_703, %int1_704, %int128_705 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3037 = torch.aten.view %2332, %3036 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3037, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_706 = torch.constant.int 0
    %3038 = torch.aten.size.int %2333, %int0_706 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_707 = torch.constant.int 32
    %int2_708 = torch.constant.int 2
    %int16_709 = torch.constant.int 16
    %int1_710 = torch.constant.int 1
    %int128_711 = torch.constant.int 128
    %3039 = torch.prim.ListConstruct %3038, %int32_707, %int2_708, %int16_709, %int1_710, %int128_711 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3040 = torch.aten.view %2333, %3039 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3040, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_712 = torch.constant.int 0
    %3041 = torch.aten.size.int %2334, %int0_712 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_713 = torch.constant.int 32
    %int2_714 = torch.constant.int 2
    %int16_715 = torch.constant.int 16
    %int1_716 = torch.constant.int 1
    %int128_717 = torch.constant.int 128
    %3042 = torch.prim.ListConstruct %3041, %int32_713, %int2_714, %int16_715, %int1_716, %int128_717 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3043 = torch.aten.view %2334, %3042 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3043, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int0_718 = torch.constant.int 0
    %3044 = torch.aten.size.int %2335, %int0_718 : !torch.vtensor<[?,131072],f16>, !torch.int -> !torch.int
    %int32_719 = torch.constant.int 32
    %int2_720 = torch.constant.int 2
    %int16_721 = torch.constant.int 16
    %int1_722 = torch.constant.int 1
    %int128_723 = torch.constant.int 128
    %3045 = torch.prim.ListConstruct %3044, %int32_719, %int2_720, %int16_721, %int1_722, %int128_723 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3046 = torch.aten.view %2335, %3045 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3046, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_724 = torch.constant.int 32
    %3047 = torch.aten.mul.int %3023, %int32_724 : !torch.int, !torch.int -> !torch.int
    %int2_725 = torch.constant.int 2
    %3048 = torch.aten.mul.int %3047, %int2_725 : !torch.int, !torch.int -> !torch.int
    %int16_726 = torch.constant.int 16
    %int1_727 = torch.constant.int 1
    %int128_728 = torch.constant.int 128
    %3049 = torch.prim.ListConstruct %3048, %int16_726, %int1_727, %int128_728 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3050 = torch.aten.view %3025, %3049 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3050, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_729 = torch.constant.int 32
    %3051 = torch.aten.mul.int %3026, %int32_729 : !torch.int, !torch.int -> !torch.int
    %int2_730 = torch.constant.int 2
    %3052 = torch.aten.mul.int %3051, %int2_730 : !torch.int, !torch.int -> !torch.int
    %int16_731 = torch.constant.int 16
    %int1_732 = torch.constant.int 1
    %int128_733 = torch.constant.int 128
    %3053 = torch.prim.ListConstruct %3052, %int16_731, %int1_732, %int128_733 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3054 = torch.aten.view %3028, %3053 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3054, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_734 = torch.constant.int 32
    %3055 = torch.aten.mul.int %3029, %int32_734 : !torch.int, !torch.int -> !torch.int
    %int2_735 = torch.constant.int 2
    %3056 = torch.aten.mul.int %3055, %int2_735 : !torch.int, !torch.int -> !torch.int
    %int16_736 = torch.constant.int 16
    %int1_737 = torch.constant.int 1
    %int128_738 = torch.constant.int 128
    %3057 = torch.prim.ListConstruct %3056, %int16_736, %int1_737, %int128_738 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3058 = torch.aten.view %3031, %3057 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3058, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_739 = torch.constant.int 32
    %3059 = torch.aten.mul.int %3032, %int32_739 : !torch.int, !torch.int -> !torch.int
    %int2_740 = torch.constant.int 2
    %3060 = torch.aten.mul.int %3059, %int2_740 : !torch.int, !torch.int -> !torch.int
    %int16_741 = torch.constant.int 16
    %int1_742 = torch.constant.int 1
    %int128_743 = torch.constant.int 128
    %3061 = torch.prim.ListConstruct %3060, %int16_741, %int1_742, %int128_743 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3062 = torch.aten.view %3034, %3061 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3062, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_744 = torch.constant.int 32
    %3063 = torch.aten.mul.int %3035, %int32_744 : !torch.int, !torch.int -> !torch.int
    %int2_745 = torch.constant.int 2
    %3064 = torch.aten.mul.int %3063, %int2_745 : !torch.int, !torch.int -> !torch.int
    %int16_746 = torch.constant.int 16
    %int1_747 = torch.constant.int 1
    %int128_748 = torch.constant.int 128
    %3065 = torch.prim.ListConstruct %3064, %int16_746, %int1_747, %int128_748 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3066 = torch.aten.view %3037, %3065 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3066, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_749 = torch.constant.int 32
    %3067 = torch.aten.mul.int %3038, %int32_749 : !torch.int, !torch.int -> !torch.int
    %int2_750 = torch.constant.int 2
    %3068 = torch.aten.mul.int %3067, %int2_750 : !torch.int, !torch.int -> !torch.int
    %int16_751 = torch.constant.int 16
    %int1_752 = torch.constant.int 1
    %int128_753 = torch.constant.int 128
    %3069 = torch.prim.ListConstruct %3068, %int16_751, %int1_752, %int128_753 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3070 = torch.aten.view %3040, %3069 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3070, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_754 = torch.constant.int 32
    %3071 = torch.aten.mul.int %3041, %int32_754 : !torch.int, !torch.int -> !torch.int
    %int2_755 = torch.constant.int 2
    %3072 = torch.aten.mul.int %3071, %int2_755 : !torch.int, !torch.int -> !torch.int
    %int16_756 = torch.constant.int 16
    %int1_757 = torch.constant.int 1
    %int128_758 = torch.constant.int 128
    %3073 = torch.prim.ListConstruct %3072, %int16_756, %int1_757, %int128_758 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3074 = torch.aten.view %3043, %3073 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3074, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_759 = torch.constant.int 32
    %3075 = torch.aten.mul.int %3044, %int32_759 : !torch.int, !torch.int -> !torch.int
    %int2_760 = torch.constant.int 2
    %3076 = torch.aten.mul.int %3075, %int2_760 : !torch.int, !torch.int -> !torch.int
    %int16_761 = torch.constant.int 16
    %int1_762 = torch.constant.int 1
    %int128_763 = torch.constant.int 128
    %3077 = torch.prim.ListConstruct %3076, %int16_761, %int1_762, %int128_763 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3078 = torch.aten.view %3046, %3077 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3078, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int64_764 = torch.constant.int 64
    %3079 = torch.aten.mul.Scalar %2364, %int64_764 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3079, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_765 = torch.constant.int 64
    %3080 = torch.aten.mul.Scalar %2367, %int64_765 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3080, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_766 = torch.constant.int 64
    %3081 = torch.aten.mul.Scalar %2370, %int64_766 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3081, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_767 = torch.constant.int 64
    %3082 = torch.aten.mul.Scalar %2373, %int64_767 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3082, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_768 = torch.constant.int 64
    %3083 = torch.aten.mul.Scalar %2376, %int64_768 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3083, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_769 = torch.constant.int 64
    %3084 = torch.aten.mul.Scalar %2379, %int64_769 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3084, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_770 = torch.constant.int 64
    %3085 = torch.aten.mul.Scalar %2382, %int64_770 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3085, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_771 = torch.constant.int 64
    %3086 = torch.aten.mul.Scalar %2385, %int64_771 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3086, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_772 = torch.constant.int 0
    %int1_773 = torch.constant.int 1
    %3087 = torch.aten.add.Scalar %3079, %int0_772, %int1_773 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3087, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_774 = torch.constant.int 0
    %int1_775 = torch.constant.int 1
    %3088 = torch.aten.add.Scalar %3080, %int0_774, %int1_775 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3088, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_776 = torch.constant.int 0
    %int1_777 = torch.constant.int 1
    %3089 = torch.aten.add.Scalar %3081, %int0_776, %int1_777 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3089, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_778 = torch.constant.int 0
    %int1_779 = torch.constant.int 1
    %3090 = torch.aten.add.Scalar %3082, %int0_778, %int1_779 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3090, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_780 = torch.constant.int 0
    %int1_781 = torch.constant.int 1
    %3091 = torch.aten.add.Scalar %3083, %int0_780, %int1_781 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3091, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_782 = torch.constant.int 0
    %int1_783 = torch.constant.int 1
    %3092 = torch.aten.add.Scalar %3084, %int0_782, %int1_783 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3092, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_784 = torch.constant.int 0
    %int1_785 = torch.constant.int 1
    %3093 = torch.aten.add.Scalar %3085, %int0_784, %int1_785 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3093, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int0_786 = torch.constant.int 0
    %int1_787 = torch.constant.int 1
    %3094 = torch.aten.add.Scalar %3086, %int0_786, %int1_787 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3094, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_788 = torch.constant.int 1
    %3095 = torch.aten.size.int %arg2, %int1_788 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.int
    %int4_789 = torch.constant.int 4
    %int16_790 = torch.constant.int 16
    %int1_791 = torch.constant.int 1
    %int128_792 = torch.constant.int 128
    %3096 = torch.prim.ListConstruct %int4_789, %3095, %int16_790, %int1_791, %int128_792 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3097 = torch.aten.view %2917, %3096 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3097, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_793 = torch.constant.int 4
    %int16_794 = torch.constant.int 16
    %int1_795 = torch.constant.int 1
    %int128_796 = torch.constant.int 128
    %3098 = torch.prim.ListConstruct %int4_793, %3095, %int16_794, %int1_795, %int128_796 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3099 = torch.aten.view %2932, %3098 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3099, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_797 = torch.constant.int 4
    %int16_798 = torch.constant.int 16
    %int1_799 = torch.constant.int 1
    %int128_800 = torch.constant.int 128
    %3100 = torch.prim.ListConstruct %int4_797, %3095, %int16_798, %int1_799, %int128_800 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3101 = torch.aten.view %2947, %3100 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3101, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_801 = torch.constant.int 4
    %int16_802 = torch.constant.int 16
    %int1_803 = torch.constant.int 1
    %int128_804 = torch.constant.int 128
    %3102 = torch.prim.ListConstruct %int4_801, %3095, %int16_802, %int1_803, %int128_804 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3103 = torch.aten.view %2962, %3102 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3103, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_805 = torch.constant.int 4
    %int16_806 = torch.constant.int 16
    %int1_807 = torch.constant.int 1
    %int128_808 = torch.constant.int 128
    %3104 = torch.prim.ListConstruct %int4_805, %3095, %int16_806, %int1_807, %int128_808 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3105 = torch.aten.view %2977, %3104 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3105, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_809 = torch.constant.int 4
    %int16_810 = torch.constant.int 16
    %int1_811 = torch.constant.int 1
    %int128_812 = torch.constant.int 128
    %3106 = torch.prim.ListConstruct %int4_809, %3095, %int16_810, %int1_811, %int128_812 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3107 = torch.aten.view %2992, %3106 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3107, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_813 = torch.constant.int 4
    %int16_814 = torch.constant.int 16
    %int1_815 = torch.constant.int 1
    %int128_816 = torch.constant.int 128
    %3108 = torch.prim.ListConstruct %int4_813, %3095, %int16_814, %int1_815, %int128_816 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3109 = torch.aten.view %3007, %3108 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3109, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_817 = torch.constant.int 4
    %int16_818 = torch.constant.int 16
    %int1_819 = torch.constant.int 1
    %int128_820 = torch.constant.int 128
    %3110 = torch.prim.ListConstruct %int4_817, %3095, %int16_818, %int1_819, %int128_820 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3111 = torch.aten.view %3022, %3110 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3111, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_821 = torch.constant.int 4
    %3112 = torch.aten.mul.int %int4_821, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_822 = torch.constant.int 16
    %int1_823 = torch.constant.int 1
    %int128_824 = torch.constant.int 128
    %3113 = torch.prim.ListConstruct %3112, %int16_822, %int1_823, %int128_824 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3114 = torch.aten.view %3097, %3113 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3114, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_825 = torch.constant.int 4
    %3115 = torch.aten.mul.int %int4_825, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_826 = torch.constant.int 16
    %int1_827 = torch.constant.int 1
    %int128_828 = torch.constant.int 128
    %3116 = torch.prim.ListConstruct %3115, %int16_826, %int1_827, %int128_828 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3117 = torch.aten.view %3099, %3116 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3117, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_829 = torch.constant.int 4
    %3118 = torch.aten.mul.int %int4_829, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_830 = torch.constant.int 16
    %int1_831 = torch.constant.int 1
    %int128_832 = torch.constant.int 128
    %3119 = torch.prim.ListConstruct %3118, %int16_830, %int1_831, %int128_832 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3120 = torch.aten.view %3101, %3119 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3120, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_833 = torch.constant.int 4
    %3121 = torch.aten.mul.int %int4_833, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_834 = torch.constant.int 16
    %int1_835 = torch.constant.int 1
    %int128_836 = torch.constant.int 128
    %3122 = torch.prim.ListConstruct %3121, %int16_834, %int1_835, %int128_836 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3123 = torch.aten.view %3103, %3122 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3123, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_837 = torch.constant.int 4
    %3124 = torch.aten.mul.int %int4_837, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_838 = torch.constant.int 16
    %int1_839 = torch.constant.int 1
    %int128_840 = torch.constant.int 128
    %3125 = torch.prim.ListConstruct %3124, %int16_838, %int1_839, %int128_840 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3126 = torch.aten.view %3105, %3125 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3126, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_841 = torch.constant.int 4
    %3127 = torch.aten.mul.int %int4_841, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_842 = torch.constant.int 16
    %int1_843 = torch.constant.int 1
    %int128_844 = torch.constant.int 128
    %3128 = torch.prim.ListConstruct %3127, %int16_842, %int1_843, %int128_844 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3129 = torch.aten.view %3107, %3128 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3129, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_845 = torch.constant.int 4
    %3130 = torch.aten.mul.int %int4_845, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_846 = torch.constant.int 16
    %int1_847 = torch.constant.int 1
    %int128_848 = torch.constant.int 128
    %3131 = torch.prim.ListConstruct %3130, %int16_846, %int1_847, %int128_848 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3132 = torch.aten.view %3109, %3131 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3132, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_849 = torch.constant.int 4
    %3133 = torch.aten.mul.int %int4_849, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_850 = torch.constant.int 16
    %int1_851 = torch.constant.int 1
    %int128_852 = torch.constant.int 128
    %3134 = torch.prim.ListConstruct %3133, %int16_850, %int1_851, %int128_852 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3135 = torch.aten.view %3111, %3134 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3135, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_853 = torch.constant.int 4
    %3136 = torch.aten.mul.int %int4_853, %3095 : !torch.int, !torch.int -> !torch.int
    %3137 = torch.prim.ListConstruct %3136 : (!torch.int) -> !torch.list<int>
    %3138 = torch.aten.view %3087, %3137 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3138, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_854 = torch.constant.int 4
    %3139 = torch.aten.mul.int %int4_854, %3095 : !torch.int, !torch.int -> !torch.int
    %3140 = torch.prim.ListConstruct %3139 : (!torch.int) -> !torch.list<int>
    %3141 = torch.aten.view %3088, %3140 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3141, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_855 = torch.constant.int 4
    %3142 = torch.aten.mul.int %int4_855, %3095 : !torch.int, !torch.int -> !torch.int
    %3143 = torch.prim.ListConstruct %3142 : (!torch.int) -> !torch.list<int>
    %3144 = torch.aten.view %3089, %3143 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3144, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_856 = torch.constant.int 4
    %3145 = torch.aten.mul.int %int4_856, %3095 : !torch.int, !torch.int -> !torch.int
    %3146 = torch.prim.ListConstruct %3145 : (!torch.int) -> !torch.list<int>
    %3147 = torch.aten.view %3090, %3146 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3147, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_857 = torch.constant.int 4
    %3148 = torch.aten.mul.int %int4_857, %3095 : !torch.int, !torch.int -> !torch.int
    %3149 = torch.prim.ListConstruct %3148 : (!torch.int) -> !torch.list<int>
    %3150 = torch.aten.view %3091, %3149 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3150, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_858 = torch.constant.int 4
    %3151 = torch.aten.mul.int %int4_858, %3095 : !torch.int, !torch.int -> !torch.int
    %3152 = torch.prim.ListConstruct %3151 : (!torch.int) -> !torch.list<int>
    %3153 = torch.aten.view %3092, %3152 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3153, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_859 = torch.constant.int 4
    %3154 = torch.aten.mul.int %int4_859, %3095 : !torch.int, !torch.int -> !torch.int
    %3155 = torch.prim.ListConstruct %3154 : (!torch.int) -> !torch.list<int>
    %3156 = torch.aten.view %3093, %3155 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3156, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_860 = torch.constant.int 4
    %3157 = torch.aten.mul.int %int4_860, %3095 : !torch.int, !torch.int -> !torch.int
    %3158 = torch.prim.ListConstruct %3157 : (!torch.int) -> !torch.list<int>
    %3159 = torch.aten.view %3094, %3158 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3159, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_861 = torch.constant.int 4
    %int16_862 = torch.constant.int 16
    %int1_863 = torch.constant.int 1
    %int128_864 = torch.constant.int 128
    %3160 = torch.prim.ListConstruct %int4_861, %3095, %int16_862, %int1_863, %int128_864 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3161 = torch.aten.view %2692, %3160 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3161, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_865 = torch.constant.int 4
    %int16_866 = torch.constant.int 16
    %int1_867 = torch.constant.int 1
    %int128_868 = torch.constant.int 128
    %3162 = torch.prim.ListConstruct %int4_865, %3095, %int16_866, %int1_867, %int128_868 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3163 = torch.aten.view %2694, %3162 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3163, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_869 = torch.constant.int 4
    %int16_870 = torch.constant.int 16
    %int1_871 = torch.constant.int 1
    %int128_872 = torch.constant.int 128
    %3164 = torch.prim.ListConstruct %int4_869, %3095, %int16_870, %int1_871, %int128_872 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3165 = torch.aten.view %2696, %3164 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3165, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_873 = torch.constant.int 4
    %int16_874 = torch.constant.int 16
    %int1_875 = torch.constant.int 1
    %int128_876 = torch.constant.int 128
    %3166 = torch.prim.ListConstruct %int4_873, %3095, %int16_874, %int1_875, %int128_876 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3167 = torch.aten.view %2698, %3166 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3167, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_877 = torch.constant.int 4
    %int16_878 = torch.constant.int 16
    %int1_879 = torch.constant.int 1
    %int128_880 = torch.constant.int 128
    %3168 = torch.prim.ListConstruct %int4_877, %3095, %int16_878, %int1_879, %int128_880 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3169 = torch.aten.view %2700, %3168 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3169, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_881 = torch.constant.int 4
    %int16_882 = torch.constant.int 16
    %int1_883 = torch.constant.int 1
    %int128_884 = torch.constant.int 128
    %3170 = torch.prim.ListConstruct %int4_881, %3095, %int16_882, %int1_883, %int128_884 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3171 = torch.aten.view %2702, %3170 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3171, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_885 = torch.constant.int 4
    %int16_886 = torch.constant.int 16
    %int1_887 = torch.constant.int 1
    %int128_888 = torch.constant.int 128
    %3172 = torch.prim.ListConstruct %int4_885, %3095, %int16_886, %int1_887, %int128_888 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3173 = torch.aten.view %2704, %3172 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3173, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_889 = torch.constant.int 4
    %int16_890 = torch.constant.int 16
    %int1_891 = torch.constant.int 1
    %int128_892 = torch.constant.int 128
    %3174 = torch.prim.ListConstruct %int4_889, %3095, %int16_890, %int1_891, %int128_892 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3175 = torch.aten.view %2706, %3174 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %3175, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_893 = torch.constant.int 4
    %3176 = torch.aten.mul.int %int4_893, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_894 = torch.constant.int 16
    %int1_895 = torch.constant.int 1
    %int128_896 = torch.constant.int 128
    %3177 = torch.prim.ListConstruct %3176, %int16_894, %int1_895, %int128_896 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3178 = torch.aten.view %3161, %3177 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3178, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_897 = torch.constant.int 4
    %3179 = torch.aten.mul.int %int4_897, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_898 = torch.constant.int 16
    %int1_899 = torch.constant.int 1
    %int128_900 = torch.constant.int 128
    %3180 = torch.prim.ListConstruct %3179, %int16_898, %int1_899, %int128_900 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3181 = torch.aten.view %3163, %3180 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3181, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_901 = torch.constant.int 4
    %3182 = torch.aten.mul.int %int4_901, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_902 = torch.constant.int 16
    %int1_903 = torch.constant.int 1
    %int128_904 = torch.constant.int 128
    %3183 = torch.prim.ListConstruct %3182, %int16_902, %int1_903, %int128_904 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3184 = torch.aten.view %3165, %3183 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3184, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_905 = torch.constant.int 4
    %3185 = torch.aten.mul.int %int4_905, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_906 = torch.constant.int 16
    %int1_907 = torch.constant.int 1
    %int128_908 = torch.constant.int 128
    %3186 = torch.prim.ListConstruct %3185, %int16_906, %int1_907, %int128_908 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3187 = torch.aten.view %3167, %3186 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3187, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_909 = torch.constant.int 4
    %3188 = torch.aten.mul.int %int4_909, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_910 = torch.constant.int 16
    %int1_911 = torch.constant.int 1
    %int128_912 = torch.constant.int 128
    %3189 = torch.prim.ListConstruct %3188, %int16_910, %int1_911, %int128_912 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3190 = torch.aten.view %3169, %3189 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3190, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_913 = torch.constant.int 4
    %3191 = torch.aten.mul.int %int4_913, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_914 = torch.constant.int 16
    %int1_915 = torch.constant.int 1
    %int128_916 = torch.constant.int 128
    %3192 = torch.prim.ListConstruct %3191, %int16_914, %int1_915, %int128_916 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3193 = torch.aten.view %3171, %3192 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3193, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_917 = torch.constant.int 4
    %3194 = torch.aten.mul.int %int4_917, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_918 = torch.constant.int 16
    %int1_919 = torch.constant.int 1
    %int128_920 = torch.constant.int 128
    %3195 = torch.prim.ListConstruct %3194, %int16_918, %int1_919, %int128_920 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3196 = torch.aten.view %3173, %3195 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3196, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_921 = torch.constant.int 4
    %3197 = torch.aten.mul.int %int4_921, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_922 = torch.constant.int 16
    %int1_923 = torch.constant.int 1
    %int128_924 = torch.constant.int 128
    %3198 = torch.prim.ListConstruct %3197, %int16_922, %int1_923, %int128_924 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3199 = torch.aten.view %3175, %3198 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3199, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_925 = torch.constant.int 1
    %int1_926 = torch.constant.int 1
    %3200 = torch.aten.add.Scalar %3087, %int1_925, %int1_926 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3200, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_927 = torch.constant.int 1
    %int1_928 = torch.constant.int 1
    %3201 = torch.aten.add.Scalar %3088, %int1_927, %int1_928 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3201, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_929 = torch.constant.int 1
    %int1_930 = torch.constant.int 1
    %3202 = torch.aten.add.Scalar %3089, %int1_929, %int1_930 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3202, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_931 = torch.constant.int 1
    %int1_932 = torch.constant.int 1
    %3203 = torch.aten.add.Scalar %3090, %int1_931, %int1_932 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3203, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_933 = torch.constant.int 1
    %int1_934 = torch.constant.int 1
    %3204 = torch.aten.add.Scalar %3091, %int1_933, %int1_934 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3204, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_935 = torch.constant.int 1
    %int1_936 = torch.constant.int 1
    %3205 = torch.aten.add.Scalar %3092, %int1_935, %int1_936 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3205, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_937 = torch.constant.int 1
    %int1_938 = torch.constant.int 1
    %3206 = torch.aten.add.Scalar %3093, %int1_937, %int1_938 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3206, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_939 = torch.constant.int 1
    %int1_940 = torch.constant.int 1
    %3207 = torch.aten.add.Scalar %3094, %int1_939, %int1_940 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %3207, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_941 = torch.constant.int 4
    %3208 = torch.aten.mul.int %int4_941, %3095 : !torch.int, !torch.int -> !torch.int
    %3209 = torch.prim.ListConstruct %3208 : (!torch.int) -> !torch.list<int>
    %3210 = torch.aten.view %3200, %3209 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3210, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_942 = torch.constant.int 4
    %3211 = torch.aten.mul.int %int4_942, %3095 : !torch.int, !torch.int -> !torch.int
    %3212 = torch.prim.ListConstruct %3211 : (!torch.int) -> !torch.list<int>
    %3213 = torch.aten.view %3201, %3212 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3213, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_943 = torch.constant.int 4
    %3214 = torch.aten.mul.int %int4_943, %3095 : !torch.int, !torch.int -> !torch.int
    %3215 = torch.prim.ListConstruct %3214 : (!torch.int) -> !torch.list<int>
    %3216 = torch.aten.view %3202, %3215 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3216, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_944 = torch.constant.int 4
    %3217 = torch.aten.mul.int %int4_944, %3095 : !torch.int, !torch.int -> !torch.int
    %3218 = torch.prim.ListConstruct %3217 : (!torch.int) -> !torch.list<int>
    %3219 = torch.aten.view %3203, %3218 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3219, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_945 = torch.constant.int 4
    %3220 = torch.aten.mul.int %int4_945, %3095 : !torch.int, !torch.int -> !torch.int
    %3221 = torch.prim.ListConstruct %3220 : (!torch.int) -> !torch.list<int>
    %3222 = torch.aten.view %3204, %3221 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3222, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_946 = torch.constant.int 4
    %3223 = torch.aten.mul.int %int4_946, %3095 : !torch.int, !torch.int -> !torch.int
    %3224 = torch.prim.ListConstruct %3223 : (!torch.int) -> !torch.list<int>
    %3225 = torch.aten.view %3205, %3224 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3225, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_947 = torch.constant.int 4
    %3226 = torch.aten.mul.int %int4_947, %3095 : !torch.int, !torch.int -> !torch.int
    %3227 = torch.prim.ListConstruct %3226 : (!torch.int) -> !torch.list<int>
    %3228 = torch.aten.view %3206, %3227 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3228, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_948 = torch.constant.int 4
    %3229 = torch.aten.mul.int %int4_948, %3095 : !torch.int, !torch.int -> !torch.int
    %3230 = torch.prim.ListConstruct %3229 : (!torch.int) -> !torch.list<int>
    %3231 = torch.aten.view %3207, %3230 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3231, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %3232 = torch.prim.ListConstruct %3138, %3210 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_949 = torch.constant.int 0
    %3233 = torch.aten.cat %3232, %int0_949 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3233, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3234 = torch.prim.ListConstruct %3141, %3213 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_950 = torch.constant.int 0
    %3235 = torch.aten.cat %3234, %int0_950 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3235, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3236 = torch.prim.ListConstruct %3144, %3216 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_951 = torch.constant.int 0
    %3237 = torch.aten.cat %3236, %int0_951 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3237, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3238 = torch.prim.ListConstruct %3147, %3219 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_952 = torch.constant.int 0
    %3239 = torch.aten.cat %3238, %int0_952 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3239, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3240 = torch.prim.ListConstruct %3150, %3222 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_953 = torch.constant.int 0
    %3241 = torch.aten.cat %3240, %int0_953 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3241, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3242 = torch.prim.ListConstruct %3153, %3225 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_954 = torch.constant.int 0
    %3243 = torch.aten.cat %3242, %int0_954 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3243, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3244 = torch.prim.ListConstruct %3156, %3228 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_955 = torch.constant.int 0
    %3245 = torch.aten.cat %3244, %int0_955 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3245, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3246 = torch.prim.ListConstruct %3159, %3231 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_956 = torch.constant.int 0
    %3247 = torch.aten.cat %3246, %int0_956 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %3247, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %3248 = torch.prim.ListConstruct %3114, %3178 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_957 = torch.constant.int 0
    %3249 = torch.aten.cat %3248, %int0_957 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3249, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3250 = torch.prim.ListConstruct %3117, %3181 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_958 = torch.constant.int 0
    %3251 = torch.aten.cat %3250, %int0_958 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3251, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3252 = torch.prim.ListConstruct %3120, %3184 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_959 = torch.constant.int 0
    %3253 = torch.aten.cat %3252, %int0_959 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3253, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3254 = torch.prim.ListConstruct %3123, %3187 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_960 = torch.constant.int 0
    %3255 = torch.aten.cat %3254, %int0_960 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3255, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3256 = torch.prim.ListConstruct %3126, %3190 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_961 = torch.constant.int 0
    %3257 = torch.aten.cat %3256, %int0_961 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3257, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3258 = torch.prim.ListConstruct %3129, %3193 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_962 = torch.constant.int 0
    %3259 = torch.aten.cat %3258, %int0_962 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3259, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3260 = torch.prim.ListConstruct %3132, %3196 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_963 = torch.constant.int 0
    %3261 = torch.aten.cat %3260, %int0_963 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3261, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3262 = torch.prim.ListConstruct %3135, %3199 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_964 = torch.constant.int 0
    %3263 = torch.aten.cat %3262, %int0_964 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3263, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %3264 = torch.prim.ListConstruct %3233 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_965 = torch.constant.bool false
    %3265 = torch.aten.index_put %3050, %3264, %3249, %false_965 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3265, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_966 = torch.constant.int 32
    %int2_967 = torch.constant.int 2
    %int16_968 = torch.constant.int 16
    %int1_969 = torch.constant.int 1
    %int128_970 = torch.constant.int 128
    %3266 = torch.prim.ListConstruct %3023, %int32_966, %int2_967, %int16_968, %int1_969, %int128_970 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3267 = torch.aten.view %3265, %3266 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3267, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_971 = torch.constant.int 131072
    %3268 = torch.prim.ListConstruct %3023, %int131072_971 : (!torch.int, !torch.int) -> !torch.list<int>
    %3269 = torch.aten.view %3267, %3268 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3269, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3270 = torch.prim.ListConstruct %3235 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_972 = torch.constant.bool false
    %3271 = torch.aten.index_put %3054, %3270, %3251, %false_972 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3271, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_973 = torch.constant.int 32
    %int2_974 = torch.constant.int 2
    %int16_975 = torch.constant.int 16
    %int1_976 = torch.constant.int 1
    %int128_977 = torch.constant.int 128
    %3272 = torch.prim.ListConstruct %3026, %int32_973, %int2_974, %int16_975, %int1_976, %int128_977 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3273 = torch.aten.view %3271, %3272 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3273, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_978 = torch.constant.int 131072
    %3274 = torch.prim.ListConstruct %3026, %int131072_978 : (!torch.int, !torch.int) -> !torch.list<int>
    %3275 = torch.aten.view %3273, %3274 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3275, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3276 = torch.prim.ListConstruct %3237 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_979 = torch.constant.bool false
    %3277 = torch.aten.index_put %3058, %3276, %3253, %false_979 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3277, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_980 = torch.constant.int 32
    %int2_981 = torch.constant.int 2
    %int16_982 = torch.constant.int 16
    %int1_983 = torch.constant.int 1
    %int128_984 = torch.constant.int 128
    %3278 = torch.prim.ListConstruct %3029, %int32_980, %int2_981, %int16_982, %int1_983, %int128_984 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3279 = torch.aten.view %3277, %3278 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3279, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_985 = torch.constant.int 131072
    %3280 = torch.prim.ListConstruct %3029, %int131072_985 : (!torch.int, !torch.int) -> !torch.list<int>
    %3281 = torch.aten.view %3279, %3280 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3281, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3282 = torch.prim.ListConstruct %3239 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_986 = torch.constant.bool false
    %3283 = torch.aten.index_put %3062, %3282, %3255, %false_986 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3283, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_987 = torch.constant.int 32
    %int2_988 = torch.constant.int 2
    %int16_989 = torch.constant.int 16
    %int1_990 = torch.constant.int 1
    %int128_991 = torch.constant.int 128
    %3284 = torch.prim.ListConstruct %3032, %int32_987, %int2_988, %int16_989, %int1_990, %int128_991 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3285 = torch.aten.view %3283, %3284 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3285, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_992 = torch.constant.int 131072
    %3286 = torch.prim.ListConstruct %3032, %int131072_992 : (!torch.int, !torch.int) -> !torch.list<int>
    %3287 = torch.aten.view %3285, %3286 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3287, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3288 = torch.prim.ListConstruct %3241 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_993 = torch.constant.bool false
    %3289 = torch.aten.index_put %3066, %3288, %3257, %false_993 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3289, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_994 = torch.constant.int 32
    %int2_995 = torch.constant.int 2
    %int16_996 = torch.constant.int 16
    %int1_997 = torch.constant.int 1
    %int128_998 = torch.constant.int 128
    %3290 = torch.prim.ListConstruct %3035, %int32_994, %int2_995, %int16_996, %int1_997, %int128_998 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3291 = torch.aten.view %3289, %3290 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3291, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_999 = torch.constant.int 131072
    %3292 = torch.prim.ListConstruct %3035, %int131072_999 : (!torch.int, !torch.int) -> !torch.list<int>
    %3293 = torch.aten.view %3291, %3292 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3293, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3294 = torch.prim.ListConstruct %3243 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1000 = torch.constant.bool false
    %3295 = torch.aten.index_put %3070, %3294, %3259, %false_1000 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3295, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_1001 = torch.constant.int 32
    %int2_1002 = torch.constant.int 2
    %int16_1003 = torch.constant.int 16
    %int1_1004 = torch.constant.int 1
    %int128_1005 = torch.constant.int 128
    %3296 = torch.prim.ListConstruct %3038, %int32_1001, %int2_1002, %int16_1003, %int1_1004, %int128_1005 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3297 = torch.aten.view %3295, %3296 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3297, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_1006 = torch.constant.int 131072
    %3298 = torch.prim.ListConstruct %3038, %int131072_1006 : (!torch.int, !torch.int) -> !torch.list<int>
    %3299 = torch.aten.view %3297, %3298 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3299, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3300 = torch.prim.ListConstruct %3245 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1007 = torch.constant.bool false
    %3301 = torch.aten.index_put %3074, %3300, %3261, %false_1007 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3301, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_1008 = torch.constant.int 32
    %int2_1009 = torch.constant.int 2
    %int16_1010 = torch.constant.int 16
    %int1_1011 = torch.constant.int 1
    %int128_1012 = torch.constant.int 128
    %3302 = torch.prim.ListConstruct %3041, %int32_1008, %int2_1009, %int16_1010, %int1_1011, %int128_1012 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3303 = torch.aten.view %3301, %3302 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3303, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_1013 = torch.constant.int 131072
    %3304 = torch.prim.ListConstruct %3041, %int131072_1013 : (!torch.int, !torch.int) -> !torch.list<int>
    %3305 = torch.aten.view %3303, %3304 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3305, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %3306 = torch.prim.ListConstruct %3247 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_1014 = torch.constant.bool false
    %3307 = torch.aten.index_put %3078, %3306, %3263, %false_1014 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %3307, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_1015 = torch.constant.int 32
    %int2_1016 = torch.constant.int 2
    %int16_1017 = torch.constant.int 16
    %int1_1018 = torch.constant.int 1
    %int128_1019 = torch.constant.int 128
    %3308 = torch.prim.ListConstruct %3044, %int32_1015, %int2_1016, %int16_1017, %int1_1018, %int128_1019 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3309 = torch.aten.view %3307, %3308 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %3309, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_1020 = torch.constant.int 131072
    %3310 = torch.prim.ListConstruct %3044, %int131072_1020 : (!torch.int, !torch.int) -> !torch.list<int>
    %3311 = torch.aten.view %3309, %3310 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %3311, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2 = torch.constant.int -2
    %3312 = torch.aten.unsqueeze %2917, %int-2 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1021 = torch.constant.int -2
    %3313 = torch.aten.unsqueeze %2932, %int-2_1021 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1022 = torch.constant.int -2
    %3314 = torch.aten.unsqueeze %2947, %int-2_1022 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1023 = torch.constant.int -2
    %3315 = torch.aten.unsqueeze %2962, %int-2_1023 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1024 = torch.constant.int -2
    %3316 = torch.aten.unsqueeze %2977, %int-2_1024 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1025 = torch.constant.int -2
    %3317 = torch.aten.unsqueeze %2992, %int-2_1025 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1026 = torch.constant.int -2
    %3318 = torch.aten.unsqueeze %3007, %int-2_1026 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1027 = torch.constant.int -2
    %3319 = torch.aten.unsqueeze %3022, %int-2_1027 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_1028 = torch.constant.int 4
    %int1_1029 = torch.constant.int 1
    %int4_1030 = torch.constant.int 4
    %int128_1031 = torch.constant.int 128
    %3320 = torch.prim.ListConstruct %int4_1028, %2903, %int1_1029, %int4_1030, %int128_1031 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1032 = torch.constant.bool false
    %3321 = torch.aten.expand %3312, %3320, %false_1032 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1033 = torch.constant.int 4
    %int1_1034 = torch.constant.int 1
    %int4_1035 = torch.constant.int 4
    %int128_1036 = torch.constant.int 128
    %3322 = torch.prim.ListConstruct %int4_1033, %2903, %int1_1034, %int4_1035, %int128_1036 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1037 = torch.constant.bool false
    %3323 = torch.aten.expand %3313, %3322, %false_1037 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1038 = torch.constant.int 4
    %int1_1039 = torch.constant.int 1
    %int4_1040 = torch.constant.int 4
    %int128_1041 = torch.constant.int 128
    %3324 = torch.prim.ListConstruct %int4_1038, %2903, %int1_1039, %int4_1040, %int128_1041 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1042 = torch.constant.bool false
    %3325 = torch.aten.expand %3314, %3324, %false_1042 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1043 = torch.constant.int 4
    %int1_1044 = torch.constant.int 1
    %int4_1045 = torch.constant.int 4
    %int128_1046 = torch.constant.int 128
    %3326 = torch.prim.ListConstruct %int4_1043, %2903, %int1_1044, %int4_1045, %int128_1046 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1047 = torch.constant.bool false
    %3327 = torch.aten.expand %3315, %3326, %false_1047 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1048 = torch.constant.int 4
    %int1_1049 = torch.constant.int 1
    %int4_1050 = torch.constant.int 4
    %int128_1051 = torch.constant.int 128
    %3328 = torch.prim.ListConstruct %int4_1048, %2903, %int1_1049, %int4_1050, %int128_1051 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1052 = torch.constant.bool false
    %3329 = torch.aten.expand %3316, %3328, %false_1052 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1053 = torch.constant.int 4
    %int1_1054 = torch.constant.int 1
    %int4_1055 = torch.constant.int 4
    %int128_1056 = torch.constant.int 128
    %3330 = torch.prim.ListConstruct %int4_1053, %2903, %int1_1054, %int4_1055, %int128_1056 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1057 = torch.constant.bool false
    %3331 = torch.aten.expand %3317, %3330, %false_1057 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1058 = torch.constant.int 4
    %int1_1059 = torch.constant.int 1
    %int4_1060 = torch.constant.int 4
    %int128_1061 = torch.constant.int 128
    %3332 = torch.prim.ListConstruct %int4_1058, %2903, %int1_1059, %int4_1060, %int128_1061 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1062 = torch.constant.bool false
    %3333 = torch.aten.expand %3318, %3332, %false_1062 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1063 = torch.constant.int 4
    %int1_1064 = torch.constant.int 1
    %int4_1065 = torch.constant.int 4
    %int128_1066 = torch.constant.int 128
    %3334 = torch.prim.ListConstruct %int4_1063, %2903, %int1_1064, %int4_1065, %int128_1066 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1067 = torch.constant.bool false
    %3335 = torch.aten.expand %3319, %3334, %false_1067 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1068 = torch.constant.int 4
    %int4_1069 = torch.constant.int 4
    %int128_1070 = torch.constant.int 128
    %3336 = torch.prim.ListConstruct %int4_1068, %2903, %int4_1069, %int128_1070 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3337 = torch.aten.view %3321, %3336 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1071 = torch.constant.int 4
    %int4_1072 = torch.constant.int 4
    %int128_1073 = torch.constant.int 128
    %3338 = torch.prim.ListConstruct %int4_1071, %2903, %int4_1072, %int128_1073 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3339 = torch.aten.view %3323, %3338 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1074 = torch.constant.int 4
    %int4_1075 = torch.constant.int 4
    %int128_1076 = torch.constant.int 128
    %3340 = torch.prim.ListConstruct %int4_1074, %2903, %int4_1075, %int128_1076 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3341 = torch.aten.view %3325, %3340 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1077 = torch.constant.int 4
    %int4_1078 = torch.constant.int 4
    %int128_1079 = torch.constant.int 128
    %3342 = torch.prim.ListConstruct %int4_1077, %2903, %int4_1078, %int128_1079 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3343 = torch.aten.view %3327, %3342 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1080 = torch.constant.int 4
    %int4_1081 = torch.constant.int 4
    %int128_1082 = torch.constant.int 128
    %3344 = torch.prim.ListConstruct %int4_1080, %2903, %int4_1081, %int128_1082 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3345 = torch.aten.view %3329, %3344 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1083 = torch.constant.int 4
    %int4_1084 = torch.constant.int 4
    %int128_1085 = torch.constant.int 128
    %3346 = torch.prim.ListConstruct %int4_1083, %2903, %int4_1084, %int128_1085 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3347 = torch.aten.view %3331, %3346 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1086 = torch.constant.int 4
    %int4_1087 = torch.constant.int 4
    %int128_1088 = torch.constant.int 128
    %3348 = torch.prim.ListConstruct %int4_1086, %2903, %int4_1087, %int128_1088 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3349 = torch.aten.view %3333, %3348 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1089 = torch.constant.int 4
    %int4_1090 = torch.constant.int 4
    %int128_1091 = torch.constant.int 128
    %3350 = torch.prim.ListConstruct %int4_1089, %2903, %int4_1090, %int128_1091 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3351 = torch.aten.view %3335, %3350 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_1092 = torch.constant.int -2
    %3352 = torch.aten.unsqueeze %2692, %int-2_1092 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1093 = torch.constant.int -2
    %3353 = torch.aten.unsqueeze %2694, %int-2_1093 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1094 = torch.constant.int -2
    %3354 = torch.aten.unsqueeze %2696, %int-2_1094 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1095 = torch.constant.int -2
    %3355 = torch.aten.unsqueeze %2698, %int-2_1095 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1096 = torch.constant.int -2
    %3356 = torch.aten.unsqueeze %2700, %int-2_1096 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1097 = torch.constant.int -2
    %3357 = torch.aten.unsqueeze %2702, %int-2_1097 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1098 = torch.constant.int -2
    %3358 = torch.aten.unsqueeze %2704, %int-2_1098 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_1099 = torch.constant.int -2
    %3359 = torch.aten.unsqueeze %2706, %int-2_1099 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %3359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_1100 = torch.constant.int 1
    %3360 = torch.aten.size.int %2616, %int1_1100 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_1101 = torch.constant.int 4
    %int1_1102 = torch.constant.int 1
    %int4_1103 = torch.constant.int 4
    %int128_1104 = torch.constant.int 128
    %3361 = torch.prim.ListConstruct %int4_1101, %3360, %int1_1102, %int4_1103, %int128_1104 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1105 = torch.constant.bool false
    %3362 = torch.aten.expand %3352, %3361, %false_1105 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1106 = torch.constant.int 4
    %int1_1107 = torch.constant.int 1
    %int4_1108 = torch.constant.int 4
    %int128_1109 = torch.constant.int 128
    %3363 = torch.prim.ListConstruct %int4_1106, %3360, %int1_1107, %int4_1108, %int128_1109 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1110 = torch.constant.bool false
    %3364 = torch.aten.expand %3353, %3363, %false_1110 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1111 = torch.constant.int 4
    %int1_1112 = torch.constant.int 1
    %int4_1113 = torch.constant.int 4
    %int128_1114 = torch.constant.int 128
    %3365 = torch.prim.ListConstruct %int4_1111, %3360, %int1_1112, %int4_1113, %int128_1114 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1115 = torch.constant.bool false
    %3366 = torch.aten.expand %3354, %3365, %false_1115 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1116 = torch.constant.int 4
    %int1_1117 = torch.constant.int 1
    %int4_1118 = torch.constant.int 4
    %int128_1119 = torch.constant.int 128
    %3367 = torch.prim.ListConstruct %int4_1116, %3360, %int1_1117, %int4_1118, %int128_1119 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1120 = torch.constant.bool false
    %3368 = torch.aten.expand %3355, %3367, %false_1120 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1121 = torch.constant.int 4
    %int1_1122 = torch.constant.int 1
    %int4_1123 = torch.constant.int 4
    %int128_1124 = torch.constant.int 128
    %3369 = torch.prim.ListConstruct %int4_1121, %3360, %int1_1122, %int4_1123, %int128_1124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1125 = torch.constant.bool false
    %3370 = torch.aten.expand %3356, %3369, %false_1125 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1126 = torch.constant.int 4
    %int1_1127 = torch.constant.int 1
    %int4_1128 = torch.constant.int 4
    %int128_1129 = torch.constant.int 128
    %3371 = torch.prim.ListConstruct %int4_1126, %3360, %int1_1127, %int4_1128, %int128_1129 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1130 = torch.constant.bool false
    %3372 = torch.aten.expand %3357, %3371, %false_1130 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1131 = torch.constant.int 4
    %int1_1132 = torch.constant.int 1
    %int4_1133 = torch.constant.int 4
    %int128_1134 = torch.constant.int 128
    %3373 = torch.prim.ListConstruct %int4_1131, %3360, %int1_1132, %int4_1133, %int128_1134 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1135 = torch.constant.bool false
    %3374 = torch.aten.expand %3358, %3373, %false_1135 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1136 = torch.constant.int 4
    %int1_1137 = torch.constant.int 1
    %int4_1138 = torch.constant.int 4
    %int128_1139 = torch.constant.int 128
    %3375 = torch.prim.ListConstruct %int4_1136, %3360, %int1_1137, %int4_1138, %int128_1139 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_1140 = torch.constant.bool false
    %3376 = torch.aten.expand %3359, %3375, %false_1140 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %3376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_1141 = torch.constant.int 4
    %int4_1142 = torch.constant.int 4
    %int128_1143 = torch.constant.int 128
    %3377 = torch.prim.ListConstruct %int4_1141, %3360, %int4_1142, %int128_1143 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3378 = torch.aten.view %3362, %3377 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1144 = torch.constant.int 4
    %int4_1145 = torch.constant.int 4
    %int128_1146 = torch.constant.int 128
    %3379 = torch.prim.ListConstruct %int4_1144, %3360, %int4_1145, %int128_1146 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3380 = torch.aten.view %3364, %3379 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1147 = torch.constant.int 4
    %int4_1148 = torch.constant.int 4
    %int128_1149 = torch.constant.int 128
    %3381 = torch.prim.ListConstruct %int4_1147, %3360, %int4_1148, %int128_1149 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3382 = torch.aten.view %3366, %3381 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1150 = torch.constant.int 4
    %int4_1151 = torch.constant.int 4
    %int128_1152 = torch.constant.int 128
    %3383 = torch.prim.ListConstruct %int4_1150, %3360, %int4_1151, %int128_1152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3384 = torch.aten.view %3368, %3383 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1153 = torch.constant.int 4
    %int4_1154 = torch.constant.int 4
    %int128_1155 = torch.constant.int 128
    %3385 = torch.prim.ListConstruct %int4_1153, %3360, %int4_1154, %int128_1155 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3386 = torch.aten.view %3370, %3385 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1156 = torch.constant.int 4
    %int4_1157 = torch.constant.int 4
    %int128_1158 = torch.constant.int 128
    %3387 = torch.prim.ListConstruct %int4_1156, %3360, %int4_1157, %int128_1158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3388 = torch.aten.view %3372, %3387 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1159 = torch.constant.int 4
    %int4_1160 = torch.constant.int 4
    %int128_1161 = torch.constant.int 128
    %3389 = torch.prim.ListConstruct %int4_1159, %3360, %int4_1160, %int128_1161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3390 = torch.aten.view %3374, %3389 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1162 = torch.constant.int 4
    %int4_1163 = torch.constant.int 4
    %int128_1164 = torch.constant.int 128
    %3391 = torch.prim.ListConstruct %int4_1162, %3360, %int4_1163, %int128_1164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3392 = torch.aten.view %3376, %3391 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1165 = torch.constant.int 1
    %int2_1166 = torch.constant.int 2
    %3393 = torch.aten.transpose.int %2759, %int1_1165, %int2_1166 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3393, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1167 = torch.constant.int 1
    %int2_1168 = torch.constant.int 2
    %3394 = torch.aten.transpose.int %2774, %int1_1167, %int2_1168 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3394, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1169 = torch.constant.int 1
    %int2_1170 = torch.constant.int 2
    %3395 = torch.aten.transpose.int %2789, %int1_1169, %int2_1170 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3395, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1171 = torch.constant.int 1
    %int2_1172 = torch.constant.int 2
    %3396 = torch.aten.transpose.int %2804, %int1_1171, %int2_1172 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3396, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1173 = torch.constant.int 1
    %int2_1174 = torch.constant.int 2
    %3397 = torch.aten.transpose.int %2819, %int1_1173, %int2_1174 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3397, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1175 = torch.constant.int 1
    %int2_1176 = torch.constant.int 2
    %3398 = torch.aten.transpose.int %2834, %int1_1175, %int2_1176 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3398, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1177 = torch.constant.int 1
    %int2_1178 = torch.constant.int 2
    %3399 = torch.aten.transpose.int %2849, %int1_1177, %int2_1178 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3399, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1179 = torch.constant.int 1
    %int2_1180 = torch.constant.int 2
    %3400 = torch.aten.transpose.int %2864, %int1_1179, %int2_1180 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3400, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1181 = torch.constant.int 1
    %int2_1182 = torch.constant.int 2
    %3401 = torch.aten.transpose.int %3337, %int1_1181, %int2_1182 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3401, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1183 = torch.constant.int 1
    %int2_1184 = torch.constant.int 2
    %3402 = torch.aten.transpose.int %3339, %int1_1183, %int2_1184 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3402, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1185 = torch.constant.int 1
    %int2_1186 = torch.constant.int 2
    %3403 = torch.aten.transpose.int %3341, %int1_1185, %int2_1186 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3403, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1187 = torch.constant.int 1
    %int2_1188 = torch.constant.int 2
    %3404 = torch.aten.transpose.int %3343, %int1_1187, %int2_1188 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3404, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1189 = torch.constant.int 1
    %int2_1190 = torch.constant.int 2
    %3405 = torch.aten.transpose.int %3345, %int1_1189, %int2_1190 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3405, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1191 = torch.constant.int 1
    %int2_1192 = torch.constant.int 2
    %3406 = torch.aten.transpose.int %3347, %int1_1191, %int2_1192 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3406, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1193 = torch.constant.int 1
    %int2_1194 = torch.constant.int 2
    %3407 = torch.aten.transpose.int %3349, %int1_1193, %int2_1194 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3407, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1195 = torch.constant.int 1
    %int2_1196 = torch.constant.int 2
    %3408 = torch.aten.transpose.int %3351, %int1_1195, %int2_1196 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3408, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1197 = torch.constant.int 1
    %int2_1198 = torch.constant.int 2
    %3409 = torch.aten.transpose.int %3378, %int1_1197, %int2_1198 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3409, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1199 = torch.constant.int 1
    %int2_1200 = torch.constant.int 2
    %3410 = torch.aten.transpose.int %3380, %int1_1199, %int2_1200 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3410, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1201 = torch.constant.int 1
    %int2_1202 = torch.constant.int 2
    %3411 = torch.aten.transpose.int %3382, %int1_1201, %int2_1202 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3411, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1203 = torch.constant.int 1
    %int2_1204 = torch.constant.int 2
    %3412 = torch.aten.transpose.int %3384, %int1_1203, %int2_1204 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3412, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1205 = torch.constant.int 1
    %int2_1206 = torch.constant.int 2
    %3413 = torch.aten.transpose.int %3386, %int1_1205, %int2_1206 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3413, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1207 = torch.constant.int 1
    %int2_1208 = torch.constant.int 2
    %3414 = torch.aten.transpose.int %3388, %int1_1207, %int2_1208 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3414, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1209 = torch.constant.int 1
    %int2_1210 = torch.constant.int 2
    %3415 = torch.aten.transpose.int %3390, %int1_1209, %int2_1210 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3415, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1211 = torch.constant.int 1
    %int2_1212 = torch.constant.int 2
    %3416 = torch.aten.transpose.int %3392, %int1_1211, %int2_1212 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %3416, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00 = torch.constant.float 0.000000e+00
    %true_1213 = torch.constant.bool true
    %none_1214 = torch.constant.none
    %none_1215 = torch.constant.none
    %3417:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3393, %3401, %3409, %float0.000000e00, %true_1213, %none_1214, %none_1215) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3417#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1216 = torch.constant.float 0.000000e+00
    %true_1217 = torch.constant.bool true
    %none_1218 = torch.constant.none
    %none_1219 = torch.constant.none
    %3418:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3394, %3402, %3410, %float0.000000e00_1216, %true_1217, %none_1218, %none_1219) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3418#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1220 = torch.constant.float 0.000000e+00
    %true_1221 = torch.constant.bool true
    %none_1222 = torch.constant.none
    %none_1223 = torch.constant.none
    %3419:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3395, %3403, %3411, %float0.000000e00_1220, %true_1221, %none_1222, %none_1223) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3419#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1224 = torch.constant.float 0.000000e+00
    %true_1225 = torch.constant.bool true
    %none_1226 = torch.constant.none
    %none_1227 = torch.constant.none
    %3420:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3396, %3404, %3412, %float0.000000e00_1224, %true_1225, %none_1226, %none_1227) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3420#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1228 = torch.constant.float 0.000000e+00
    %true_1229 = torch.constant.bool true
    %none_1230 = torch.constant.none
    %none_1231 = torch.constant.none
    %3421:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3397, %3405, %3413, %float0.000000e00_1228, %true_1229, %none_1230, %none_1231) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3421#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1232 = torch.constant.float 0.000000e+00
    %true_1233 = torch.constant.bool true
    %none_1234 = torch.constant.none
    %none_1235 = torch.constant.none
    %3422:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3398, %3406, %3414, %float0.000000e00_1232, %true_1233, %none_1234, %none_1235) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3422#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1236 = torch.constant.float 0.000000e+00
    %true_1237 = torch.constant.bool true
    %none_1238 = torch.constant.none
    %none_1239 = torch.constant.none
    %3423:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3399, %3407, %3415, %float0.000000e00_1236, %true_1237, %none_1238, %none_1239) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3423#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_1240 = torch.constant.float 0.000000e+00
    %true_1241 = torch.constant.bool true
    %none_1242 = torch.constant.none
    %none_1243 = torch.constant.none
    %3424:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%3400, %3408, %3416, %float0.000000e00_1240, %true_1241, %none_1242, %none_1243) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %3424#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_1244 = torch.constant.int 1
    %int2_1245 = torch.constant.int 2
    %3425 = torch.aten.transpose.int %3417#0, %int1_1244, %int2_1245 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1246 = torch.constant.int 1
    %int2_1247 = torch.constant.int 2
    %3426 = torch.aten.transpose.int %3418#0, %int1_1246, %int2_1247 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1248 = torch.constant.int 1
    %int2_1249 = torch.constant.int 2
    %3427 = torch.aten.transpose.int %3419#0, %int1_1248, %int2_1249 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1250 = torch.constant.int 1
    %int2_1251 = torch.constant.int 2
    %3428 = torch.aten.transpose.int %3420#0, %int1_1250, %int2_1251 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1252 = torch.constant.int 1
    %int2_1253 = torch.constant.int 2
    %3429 = torch.aten.transpose.int %3421#0, %int1_1252, %int2_1253 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1254 = torch.constant.int 1
    %int2_1255 = torch.constant.int 2
    %3430 = torch.aten.transpose.int %3422#0, %int1_1254, %int2_1255 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1256 = torch.constant.int 1
    %int2_1257 = torch.constant.int 2
    %3431 = torch.aten.transpose.int %3423#0, %int1_1256, %int2_1257 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_1258 = torch.constant.int 1
    %int2_1259 = torch.constant.int 2
    %3432 = torch.aten.transpose.int %3424#0, %int1_1258, %int2_1259 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %3432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_1260 = torch.constant.int 4
    %int512_1261 = torch.constant.int 512
    %3433 = torch.prim.ListConstruct %int4_1260, %2745, %int512_1261 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3434 = torch.aten.view %3425, %3433 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1262 = torch.constant.int 4
    %int512_1263 = torch.constant.int 512
    %3435 = torch.prim.ListConstruct %int4_1262, %2760, %int512_1263 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3436 = torch.aten.view %3426, %3435 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1264 = torch.constant.int 4
    %int512_1265 = torch.constant.int 512
    %3437 = torch.prim.ListConstruct %int4_1264, %2775, %int512_1265 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3438 = torch.aten.view %3427, %3437 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1266 = torch.constant.int 4
    %int512_1267 = torch.constant.int 512
    %3439 = torch.prim.ListConstruct %int4_1266, %2790, %int512_1267 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3440 = torch.aten.view %3428, %3439 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1268 = torch.constant.int 4
    %int512_1269 = torch.constant.int 512
    %3441 = torch.prim.ListConstruct %int4_1268, %2805, %int512_1269 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3442 = torch.aten.view %3429, %3441 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1270 = torch.constant.int 4
    %int512_1271 = torch.constant.int 512
    %3443 = torch.prim.ListConstruct %int4_1270, %2820, %int512_1271 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3444 = torch.aten.view %3430, %3443 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1272 = torch.constant.int 4
    %int512_1273 = torch.constant.int 512
    %3445 = torch.prim.ListConstruct %int4_1272, %2835, %int512_1273 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3446 = torch.aten.view %3431, %3445 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1274 = torch.constant.int 4
    %int512_1275 = torch.constant.int 512
    %3447 = torch.prim.ListConstruct %int4_1274, %2850, %int512_1275 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3448 = torch.aten.view %3432, %3447 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %3448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_1276 = torch.constant.int 1
    %int0_1277 = torch.constant.int 0
    %3449 = torch.prim.ListConstruct %int1_1276, %int0_1277 : (!torch.int, !torch.int) -> !torch.list<int>
    %3450 = torch.aten.permute %40, %3449 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1278 = torch.constant.int 1
    %int0_1279 = torch.constant.int 0
    %3451 = torch.prim.ListConstruct %int1_1278, %int0_1279 : (!torch.int, !torch.int) -> !torch.list<int>
    %3452 = torch.aten.permute %41, %3451 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1280 = torch.constant.int 1
    %int0_1281 = torch.constant.int 0
    %3453 = torch.prim.ListConstruct %int1_1280, %int0_1281 : (!torch.int, !torch.int) -> !torch.list<int>
    %3454 = torch.aten.permute %42, %3453 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1282 = torch.constant.int 1
    %int0_1283 = torch.constant.int 0
    %3455 = torch.prim.ListConstruct %int1_1282, %int0_1283 : (!torch.int, !torch.int) -> !torch.list<int>
    %3456 = torch.aten.permute %43, %3455 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1284 = torch.constant.int 1
    %int0_1285 = torch.constant.int 0
    %3457 = torch.prim.ListConstruct %int1_1284, %int0_1285 : (!torch.int, !torch.int) -> !torch.list<int>
    %3458 = torch.aten.permute %44, %3457 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1286 = torch.constant.int 1
    %int0_1287 = torch.constant.int 0
    %3459 = torch.prim.ListConstruct %int1_1286, %int0_1287 : (!torch.int, !torch.int) -> !torch.list<int>
    %3460 = torch.aten.permute %45, %3459 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1288 = torch.constant.int 1
    %int0_1289 = torch.constant.int 0
    %3461 = torch.prim.ListConstruct %int1_1288, %int0_1289 : (!torch.int, !torch.int) -> !torch.list<int>
    %3462 = torch.aten.permute %46, %3461 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_1290 = torch.constant.int 1
    %int0_1291 = torch.constant.int 0
    %3463 = torch.prim.ListConstruct %int1_1290, %int0_1291 : (!torch.int, !torch.int) -> !torch.list<int>
    %3464 = torch.aten.permute %47, %3463 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_1292 = torch.constant.int 4
    %3465 = torch.aten.mul.int %int4_1292, %2745 : !torch.int, !torch.int -> !torch.int
    %int512_1293 = torch.constant.int 512
    %3466 = torch.prim.ListConstruct %3465, %int512_1293 : (!torch.int, !torch.int) -> !torch.list<int>
    %3467 = torch.aten.view %3434, %3466 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3467, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3468 = torch.aten.mm %3467, %3450 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3468, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1294 = torch.constant.int 4
    %int4096_1295 = torch.constant.int 4096
    %3469 = torch.prim.ListConstruct %int4_1294, %2745, %int4096_1295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3470 = torch.aten.view %3468, %3469 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1296 = torch.constant.int 4
    %3471 = torch.aten.mul.int %int4_1296, %2760 : !torch.int, !torch.int -> !torch.int
    %int512_1297 = torch.constant.int 512
    %3472 = torch.prim.ListConstruct %3471, %int512_1297 : (!torch.int, !torch.int) -> !torch.list<int>
    %3473 = torch.aten.view %3436, %3472 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3473, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3474 = torch.aten.mm %3473, %3452 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3474, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1298 = torch.constant.int 4
    %int4096_1299 = torch.constant.int 4096
    %3475 = torch.prim.ListConstruct %int4_1298, %2760, %int4096_1299 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3476 = torch.aten.view %3474, %3475 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1300 = torch.constant.int 4
    %3477 = torch.aten.mul.int %int4_1300, %2775 : !torch.int, !torch.int -> !torch.int
    %int512_1301 = torch.constant.int 512
    %3478 = torch.prim.ListConstruct %3477, %int512_1301 : (!torch.int, !torch.int) -> !torch.list<int>
    %3479 = torch.aten.view %3438, %3478 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3479, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3480 = torch.aten.mm %3479, %3454 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3480, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1302 = torch.constant.int 4
    %int4096_1303 = torch.constant.int 4096
    %3481 = torch.prim.ListConstruct %int4_1302, %2775, %int4096_1303 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3482 = torch.aten.view %3480, %3481 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1304 = torch.constant.int 4
    %3483 = torch.aten.mul.int %int4_1304, %2790 : !torch.int, !torch.int -> !torch.int
    %int512_1305 = torch.constant.int 512
    %3484 = torch.prim.ListConstruct %3483, %int512_1305 : (!torch.int, !torch.int) -> !torch.list<int>
    %3485 = torch.aten.view %3440, %3484 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3485, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3486 = torch.aten.mm %3485, %3456 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3486, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1306 = torch.constant.int 4
    %int4096_1307 = torch.constant.int 4096
    %3487 = torch.prim.ListConstruct %int4_1306, %2790, %int4096_1307 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3488 = torch.aten.view %3486, %3487 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1308 = torch.constant.int 4
    %3489 = torch.aten.mul.int %int4_1308, %2805 : !torch.int, !torch.int -> !torch.int
    %int512_1309 = torch.constant.int 512
    %3490 = torch.prim.ListConstruct %3489, %int512_1309 : (!torch.int, !torch.int) -> !torch.list<int>
    %3491 = torch.aten.view %3442, %3490 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3491, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3492 = torch.aten.mm %3491, %3458 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3492, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1310 = torch.constant.int 4
    %int4096_1311 = torch.constant.int 4096
    %3493 = torch.prim.ListConstruct %int4_1310, %2805, %int4096_1311 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3494 = torch.aten.view %3492, %3493 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1312 = torch.constant.int 4
    %3495 = torch.aten.mul.int %int4_1312, %2820 : !torch.int, !torch.int -> !torch.int
    %int512_1313 = torch.constant.int 512
    %3496 = torch.prim.ListConstruct %3495, %int512_1313 : (!torch.int, !torch.int) -> !torch.list<int>
    %3497 = torch.aten.view %3444, %3496 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3497, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3498 = torch.aten.mm %3497, %3460 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3498, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1314 = torch.constant.int 4
    %int4096_1315 = torch.constant.int 4096
    %3499 = torch.prim.ListConstruct %int4_1314, %2820, %int4096_1315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3500 = torch.aten.view %3498, %3499 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1316 = torch.constant.int 4
    %3501 = torch.aten.mul.int %int4_1316, %2835 : !torch.int, !torch.int -> !torch.int
    %int512_1317 = torch.constant.int 512
    %3502 = torch.prim.ListConstruct %3501, %int512_1317 : (!torch.int, !torch.int) -> !torch.list<int>
    %3503 = torch.aten.view %3446, %3502 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3503, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3504 = torch.aten.mm %3503, %3462 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3504, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1318 = torch.constant.int 4
    %int4096_1319 = torch.constant.int 4096
    %3505 = torch.prim.ListConstruct %int4_1318, %2835, %int4096_1319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3506 = torch.aten.view %3504, %3505 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_1320 = torch.constant.int 4
    %3507 = torch.aten.mul.int %int4_1320, %2850 : !torch.int, !torch.int -> !torch.int
    %int512_1321 = torch.constant.int 512
    %3508 = torch.prim.ListConstruct %3507, %int512_1321 : (!torch.int, !torch.int) -> !torch.list<int>
    %3509 = torch.aten.view %3448, %3508 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %3509, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %3510 = torch.aten.mm %3509, %3464 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3510, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1322 = torch.constant.int 4
    %int4096_1323 = torch.constant.int 4096
    %3511 = torch.prim.ListConstruct %int4_1322, %2850, %int4096_1323 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3512 = torch.aten.view %3510, %3511 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3513 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1324 = arith.constant 1 : index
    %dim_1325 = tensor.dim %3513, %c1_1324 : tensor<4x?x4096xf16>
    %3514 = flow.tensor.transfer %3513 : tensor<4x?x4096xf16>{%dim_1325} to #hal.device.promise<@__device_0>
    %3515 = torch_c.from_builtin_tensor %3514 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3516 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1326 = arith.constant 1 : index
    %dim_1327 = tensor.dim %3516, %c1_1326 : tensor<4x?x4096xf16>
    %3517 = flow.tensor.transfer %3516 : tensor<4x?x4096xf16>{%dim_1327} to #hal.device.promise<@__device_0>
    %3518 = torch_c.from_builtin_tensor %3517 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3519 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1328 = arith.constant 1 : index
    %dim_1329 = tensor.dim %3519, %c1_1328 : tensor<4x?x4096xf16>
    %3520 = flow.tensor.transfer %3519 : tensor<4x?x4096xf16>{%dim_1329} to #hal.device.promise<@__device_0>
    %3521 = torch_c.from_builtin_tensor %3520 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3522 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1330 = arith.constant 1 : index
    %dim_1331 = tensor.dim %3522, %c1_1330 : tensor<4x?x4096xf16>
    %3523 = flow.tensor.transfer %3522 : tensor<4x?x4096xf16>{%dim_1331} to #hal.device.promise<@__device_0>
    %3524 = torch_c.from_builtin_tensor %3523 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3525 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1332 = arith.constant 1 : index
    %dim_1333 = tensor.dim %3525, %c1_1332 : tensor<4x?x4096xf16>
    %3526 = flow.tensor.transfer %3525 : tensor<4x?x4096xf16>{%dim_1333} to #hal.device.promise<@__device_0>
    %3527 = torch_c.from_builtin_tensor %3526 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3528 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1334 = arith.constant 1 : index
    %dim_1335 = tensor.dim %3528, %c1_1334 : tensor<4x?x4096xf16>
    %3529 = flow.tensor.transfer %3528 : tensor<4x?x4096xf16>{%dim_1335} to #hal.device.promise<@__device_0>
    %3530 = torch_c.from_builtin_tensor %3529 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3531 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1336 = arith.constant 1 : index
    %dim_1337 = tensor.dim %3531, %c1_1336 : tensor<4x?x4096xf16>
    %3532 = flow.tensor.transfer %3531 : tensor<4x?x4096xf16>{%dim_1337} to #hal.device.promise<@__device_0>
    %3533 = torch_c.from_builtin_tensor %3532 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1338 = torch.constant.int 1
    %3534 = torch.aten.add.Tensor %3470, %3515, %int1_1338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1339 = torch.constant.int 1
    %3535 = torch.aten.add.Tensor %3534, %3518, %int1_1339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1340 = torch.constant.int 1
    %3536 = torch.aten.add.Tensor %3535, %3521, %int1_1340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1341 = torch.constant.int 1
    %3537 = torch.aten.add.Tensor %3536, %3524, %int1_1341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1342 = torch.constant.int 1
    %3538 = torch.aten.add.Tensor %3537, %3527, %int1_1342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1343 = torch.constant.int 1
    %3539 = torch.aten.add.Tensor %3538, %3530, %int1_1343 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1344 = torch.constant.int 1
    %3540 = torch.aten.add.Tensor %3539, %3533, %int1_1344 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3541 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1345 = arith.constant 1 : index
    %dim_1346 = tensor.dim %3541, %c1_1345 : tensor<4x?x4096xf16>
    %3542 = flow.tensor.transfer %3541 : tensor<4x?x4096xf16>{%dim_1346} to #hal.device.promise<@__device_1>
    %3543 = torch_c.from_builtin_tensor %3542 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3544 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1347 = arith.constant 1 : index
    %dim_1348 = tensor.dim %3544, %c1_1347 : tensor<4x?x4096xf16>
    %3545 = flow.tensor.transfer %3544 : tensor<4x?x4096xf16>{%dim_1348} to #hal.device.promise<@__device_1>
    %3546 = torch_c.from_builtin_tensor %3545 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3547 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1349 = arith.constant 1 : index
    %dim_1350 = tensor.dim %3547, %c1_1349 : tensor<4x?x4096xf16>
    %3548 = flow.tensor.transfer %3547 : tensor<4x?x4096xf16>{%dim_1350} to #hal.device.promise<@__device_1>
    %3549 = torch_c.from_builtin_tensor %3548 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3550 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1351 = arith.constant 1 : index
    %dim_1352 = tensor.dim %3550, %c1_1351 : tensor<4x?x4096xf16>
    %3551 = flow.tensor.transfer %3550 : tensor<4x?x4096xf16>{%dim_1352} to #hal.device.promise<@__device_1>
    %3552 = torch_c.from_builtin_tensor %3551 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3553 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1353 = arith.constant 1 : index
    %dim_1354 = tensor.dim %3553, %c1_1353 : tensor<4x?x4096xf16>
    %3554 = flow.tensor.transfer %3553 : tensor<4x?x4096xf16>{%dim_1354} to #hal.device.promise<@__device_1>
    %3555 = torch_c.from_builtin_tensor %3554 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3556 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1355 = arith.constant 1 : index
    %dim_1356 = tensor.dim %3556, %c1_1355 : tensor<4x?x4096xf16>
    %3557 = flow.tensor.transfer %3556 : tensor<4x?x4096xf16>{%dim_1356} to #hal.device.promise<@__device_1>
    %3558 = torch_c.from_builtin_tensor %3557 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3559 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1357 = arith.constant 1 : index
    %dim_1358 = tensor.dim %3559, %c1_1357 : tensor<4x?x4096xf16>
    %3560 = flow.tensor.transfer %3559 : tensor<4x?x4096xf16>{%dim_1358} to #hal.device.promise<@__device_1>
    %3561 = torch_c.from_builtin_tensor %3560 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1359 = torch.constant.int 1
    %3562 = torch.aten.add.Tensor %3543, %3476, %int1_1359 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1360 = torch.constant.int 1
    %3563 = torch.aten.add.Tensor %3562, %3546, %int1_1360 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1361 = torch.constant.int 1
    %3564 = torch.aten.add.Tensor %3563, %3549, %int1_1361 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1362 = torch.constant.int 1
    %3565 = torch.aten.add.Tensor %3564, %3552, %int1_1362 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1363 = torch.constant.int 1
    %3566 = torch.aten.add.Tensor %3565, %3555, %int1_1363 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1364 = torch.constant.int 1
    %3567 = torch.aten.add.Tensor %3566, %3558, %int1_1364 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1365 = torch.constant.int 1
    %3568 = torch.aten.add.Tensor %3567, %3561, %int1_1365 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3569 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1366 = arith.constant 1 : index
    %dim_1367 = tensor.dim %3569, %c1_1366 : tensor<4x?x4096xf16>
    %3570 = flow.tensor.transfer %3569 : tensor<4x?x4096xf16>{%dim_1367} to #hal.device.promise<@__device_2>
    %3571 = torch_c.from_builtin_tensor %3570 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3572 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1368 = arith.constant 1 : index
    %dim_1369 = tensor.dim %3572, %c1_1368 : tensor<4x?x4096xf16>
    %3573 = flow.tensor.transfer %3572 : tensor<4x?x4096xf16>{%dim_1369} to #hal.device.promise<@__device_2>
    %3574 = torch_c.from_builtin_tensor %3573 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3575 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1370 = arith.constant 1 : index
    %dim_1371 = tensor.dim %3575, %c1_1370 : tensor<4x?x4096xf16>
    %3576 = flow.tensor.transfer %3575 : tensor<4x?x4096xf16>{%dim_1371} to #hal.device.promise<@__device_2>
    %3577 = torch_c.from_builtin_tensor %3576 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3578 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1372 = arith.constant 1 : index
    %dim_1373 = tensor.dim %3578, %c1_1372 : tensor<4x?x4096xf16>
    %3579 = flow.tensor.transfer %3578 : tensor<4x?x4096xf16>{%dim_1373} to #hal.device.promise<@__device_2>
    %3580 = torch_c.from_builtin_tensor %3579 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3581 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1374 = arith.constant 1 : index
    %dim_1375 = tensor.dim %3581, %c1_1374 : tensor<4x?x4096xf16>
    %3582 = flow.tensor.transfer %3581 : tensor<4x?x4096xf16>{%dim_1375} to #hal.device.promise<@__device_2>
    %3583 = torch_c.from_builtin_tensor %3582 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3584 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1376 = arith.constant 1 : index
    %dim_1377 = tensor.dim %3584, %c1_1376 : tensor<4x?x4096xf16>
    %3585 = flow.tensor.transfer %3584 : tensor<4x?x4096xf16>{%dim_1377} to #hal.device.promise<@__device_2>
    %3586 = torch_c.from_builtin_tensor %3585 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3587 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1378 = arith.constant 1 : index
    %dim_1379 = tensor.dim %3587, %c1_1378 : tensor<4x?x4096xf16>
    %3588 = flow.tensor.transfer %3587 : tensor<4x?x4096xf16>{%dim_1379} to #hal.device.promise<@__device_2>
    %3589 = torch_c.from_builtin_tensor %3588 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1380 = torch.constant.int 1
    %3590 = torch.aten.add.Tensor %3571, %3574, %int1_1380 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1381 = torch.constant.int 1
    %3591 = torch.aten.add.Tensor %3590, %3482, %int1_1381 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1382 = torch.constant.int 1
    %3592 = torch.aten.add.Tensor %3591, %3577, %int1_1382 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1383 = torch.constant.int 1
    %3593 = torch.aten.add.Tensor %3592, %3580, %int1_1383 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1384 = torch.constant.int 1
    %3594 = torch.aten.add.Tensor %3593, %3583, %int1_1384 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1385 = torch.constant.int 1
    %3595 = torch.aten.add.Tensor %3594, %3586, %int1_1385 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1386 = torch.constant.int 1
    %3596 = torch.aten.add.Tensor %3595, %3589, %int1_1386 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3597 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1387 = arith.constant 1 : index
    %dim_1388 = tensor.dim %3597, %c1_1387 : tensor<4x?x4096xf16>
    %3598 = flow.tensor.transfer %3597 : tensor<4x?x4096xf16>{%dim_1388} to #hal.device.promise<@__device_3>
    %3599 = torch_c.from_builtin_tensor %3598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3600 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1389 = arith.constant 1 : index
    %dim_1390 = tensor.dim %3600, %c1_1389 : tensor<4x?x4096xf16>
    %3601 = flow.tensor.transfer %3600 : tensor<4x?x4096xf16>{%dim_1390} to #hal.device.promise<@__device_3>
    %3602 = torch_c.from_builtin_tensor %3601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3603 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1391 = arith.constant 1 : index
    %dim_1392 = tensor.dim %3603, %c1_1391 : tensor<4x?x4096xf16>
    %3604 = flow.tensor.transfer %3603 : tensor<4x?x4096xf16>{%dim_1392} to #hal.device.promise<@__device_3>
    %3605 = torch_c.from_builtin_tensor %3604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3606 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1393 = arith.constant 1 : index
    %dim_1394 = tensor.dim %3606, %c1_1393 : tensor<4x?x4096xf16>
    %3607 = flow.tensor.transfer %3606 : tensor<4x?x4096xf16>{%dim_1394} to #hal.device.promise<@__device_3>
    %3608 = torch_c.from_builtin_tensor %3607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3609 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1395 = arith.constant 1 : index
    %dim_1396 = tensor.dim %3609, %c1_1395 : tensor<4x?x4096xf16>
    %3610 = flow.tensor.transfer %3609 : tensor<4x?x4096xf16>{%dim_1396} to #hal.device.promise<@__device_3>
    %3611 = torch_c.from_builtin_tensor %3610 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3612 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1397 = arith.constant 1 : index
    %dim_1398 = tensor.dim %3612, %c1_1397 : tensor<4x?x4096xf16>
    %3613 = flow.tensor.transfer %3612 : tensor<4x?x4096xf16>{%dim_1398} to #hal.device.promise<@__device_3>
    %3614 = torch_c.from_builtin_tensor %3613 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3615 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1399 = arith.constant 1 : index
    %dim_1400 = tensor.dim %3615, %c1_1399 : tensor<4x?x4096xf16>
    %3616 = flow.tensor.transfer %3615 : tensor<4x?x4096xf16>{%dim_1400} to #hal.device.promise<@__device_3>
    %3617 = torch_c.from_builtin_tensor %3616 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1401 = torch.constant.int 1
    %3618 = torch.aten.add.Tensor %3599, %3602, %int1_1401 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1402 = torch.constant.int 1
    %3619 = torch.aten.add.Tensor %3618, %3605, %int1_1402 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1403 = torch.constant.int 1
    %3620 = torch.aten.add.Tensor %3619, %3488, %int1_1403 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1404 = torch.constant.int 1
    %3621 = torch.aten.add.Tensor %3620, %3608, %int1_1404 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1405 = torch.constant.int 1
    %3622 = torch.aten.add.Tensor %3621, %3611, %int1_1405 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1406 = torch.constant.int 1
    %3623 = torch.aten.add.Tensor %3622, %3614, %int1_1406 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1407 = torch.constant.int 1
    %3624 = torch.aten.add.Tensor %3623, %3617, %int1_1407 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3625 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1408 = arith.constant 1 : index
    %dim_1409 = tensor.dim %3625, %c1_1408 : tensor<4x?x4096xf16>
    %3626 = flow.tensor.transfer %3625 : tensor<4x?x4096xf16>{%dim_1409} to #hal.device.promise<@__device_4>
    %3627 = torch_c.from_builtin_tensor %3626 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3628 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1410 = arith.constant 1 : index
    %dim_1411 = tensor.dim %3628, %c1_1410 : tensor<4x?x4096xf16>
    %3629 = flow.tensor.transfer %3628 : tensor<4x?x4096xf16>{%dim_1411} to #hal.device.promise<@__device_4>
    %3630 = torch_c.from_builtin_tensor %3629 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3631 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1412 = arith.constant 1 : index
    %dim_1413 = tensor.dim %3631, %c1_1412 : tensor<4x?x4096xf16>
    %3632 = flow.tensor.transfer %3631 : tensor<4x?x4096xf16>{%dim_1413} to #hal.device.promise<@__device_4>
    %3633 = torch_c.from_builtin_tensor %3632 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3634 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1414 = arith.constant 1 : index
    %dim_1415 = tensor.dim %3634, %c1_1414 : tensor<4x?x4096xf16>
    %3635 = flow.tensor.transfer %3634 : tensor<4x?x4096xf16>{%dim_1415} to #hal.device.promise<@__device_4>
    %3636 = torch_c.from_builtin_tensor %3635 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3637 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1416 = arith.constant 1 : index
    %dim_1417 = tensor.dim %3637, %c1_1416 : tensor<4x?x4096xf16>
    %3638 = flow.tensor.transfer %3637 : tensor<4x?x4096xf16>{%dim_1417} to #hal.device.promise<@__device_4>
    %3639 = torch_c.from_builtin_tensor %3638 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3640 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1418 = arith.constant 1 : index
    %dim_1419 = tensor.dim %3640, %c1_1418 : tensor<4x?x4096xf16>
    %3641 = flow.tensor.transfer %3640 : tensor<4x?x4096xf16>{%dim_1419} to #hal.device.promise<@__device_4>
    %3642 = torch_c.from_builtin_tensor %3641 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3643 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1420 = arith.constant 1 : index
    %dim_1421 = tensor.dim %3643, %c1_1420 : tensor<4x?x4096xf16>
    %3644 = flow.tensor.transfer %3643 : tensor<4x?x4096xf16>{%dim_1421} to #hal.device.promise<@__device_4>
    %3645 = torch_c.from_builtin_tensor %3644 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1422 = torch.constant.int 1
    %3646 = torch.aten.add.Tensor %3627, %3630, %int1_1422 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1423 = torch.constant.int 1
    %3647 = torch.aten.add.Tensor %3646, %3633, %int1_1423 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1424 = torch.constant.int 1
    %3648 = torch.aten.add.Tensor %3647, %3636, %int1_1424 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1425 = torch.constant.int 1
    %3649 = torch.aten.add.Tensor %3648, %3494, %int1_1425 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1426 = torch.constant.int 1
    %3650 = torch.aten.add.Tensor %3649, %3639, %int1_1426 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1427 = torch.constant.int 1
    %3651 = torch.aten.add.Tensor %3650, %3642, %int1_1427 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1428 = torch.constant.int 1
    %3652 = torch.aten.add.Tensor %3651, %3645, %int1_1428 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3653 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1429 = arith.constant 1 : index
    %dim_1430 = tensor.dim %3653, %c1_1429 : tensor<4x?x4096xf16>
    %3654 = flow.tensor.transfer %3653 : tensor<4x?x4096xf16>{%dim_1430} to #hal.device.promise<@__device_5>
    %3655 = torch_c.from_builtin_tensor %3654 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3656 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1431 = arith.constant 1 : index
    %dim_1432 = tensor.dim %3656, %c1_1431 : tensor<4x?x4096xf16>
    %3657 = flow.tensor.transfer %3656 : tensor<4x?x4096xf16>{%dim_1432} to #hal.device.promise<@__device_5>
    %3658 = torch_c.from_builtin_tensor %3657 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3659 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1433 = arith.constant 1 : index
    %dim_1434 = tensor.dim %3659, %c1_1433 : tensor<4x?x4096xf16>
    %3660 = flow.tensor.transfer %3659 : tensor<4x?x4096xf16>{%dim_1434} to #hal.device.promise<@__device_5>
    %3661 = torch_c.from_builtin_tensor %3660 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3662 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1435 = arith.constant 1 : index
    %dim_1436 = tensor.dim %3662, %c1_1435 : tensor<4x?x4096xf16>
    %3663 = flow.tensor.transfer %3662 : tensor<4x?x4096xf16>{%dim_1436} to #hal.device.promise<@__device_5>
    %3664 = torch_c.from_builtin_tensor %3663 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3665 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1437 = arith.constant 1 : index
    %dim_1438 = tensor.dim %3665, %c1_1437 : tensor<4x?x4096xf16>
    %3666 = flow.tensor.transfer %3665 : tensor<4x?x4096xf16>{%dim_1438} to #hal.device.promise<@__device_5>
    %3667 = torch_c.from_builtin_tensor %3666 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3668 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1439 = arith.constant 1 : index
    %dim_1440 = tensor.dim %3668, %c1_1439 : tensor<4x?x4096xf16>
    %3669 = flow.tensor.transfer %3668 : tensor<4x?x4096xf16>{%dim_1440} to #hal.device.promise<@__device_5>
    %3670 = torch_c.from_builtin_tensor %3669 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3671 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1441 = arith.constant 1 : index
    %dim_1442 = tensor.dim %3671, %c1_1441 : tensor<4x?x4096xf16>
    %3672 = flow.tensor.transfer %3671 : tensor<4x?x4096xf16>{%dim_1442} to #hal.device.promise<@__device_5>
    %3673 = torch_c.from_builtin_tensor %3672 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1443 = torch.constant.int 1
    %3674 = torch.aten.add.Tensor %3655, %3658, %int1_1443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1444 = torch.constant.int 1
    %3675 = torch.aten.add.Tensor %3674, %3661, %int1_1444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1445 = torch.constant.int 1
    %3676 = torch.aten.add.Tensor %3675, %3664, %int1_1445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1446 = torch.constant.int 1
    %3677 = torch.aten.add.Tensor %3676, %3667, %int1_1446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1447 = torch.constant.int 1
    %3678 = torch.aten.add.Tensor %3677, %3500, %int1_1447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1448 = torch.constant.int 1
    %3679 = torch.aten.add.Tensor %3678, %3670, %int1_1448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1449 = torch.constant.int 1
    %3680 = torch.aten.add.Tensor %3679, %3673, %int1_1449 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3681 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1450 = arith.constant 1 : index
    %dim_1451 = tensor.dim %3681, %c1_1450 : tensor<4x?x4096xf16>
    %3682 = flow.tensor.transfer %3681 : tensor<4x?x4096xf16>{%dim_1451} to #hal.device.promise<@__device_6>
    %3683 = torch_c.from_builtin_tensor %3682 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3684 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1452 = arith.constant 1 : index
    %dim_1453 = tensor.dim %3684, %c1_1452 : tensor<4x?x4096xf16>
    %3685 = flow.tensor.transfer %3684 : tensor<4x?x4096xf16>{%dim_1453} to #hal.device.promise<@__device_6>
    %3686 = torch_c.from_builtin_tensor %3685 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3687 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1454 = arith.constant 1 : index
    %dim_1455 = tensor.dim %3687, %c1_1454 : tensor<4x?x4096xf16>
    %3688 = flow.tensor.transfer %3687 : tensor<4x?x4096xf16>{%dim_1455} to #hal.device.promise<@__device_6>
    %3689 = torch_c.from_builtin_tensor %3688 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3690 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1456 = arith.constant 1 : index
    %dim_1457 = tensor.dim %3690, %c1_1456 : tensor<4x?x4096xf16>
    %3691 = flow.tensor.transfer %3690 : tensor<4x?x4096xf16>{%dim_1457} to #hal.device.promise<@__device_6>
    %3692 = torch_c.from_builtin_tensor %3691 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3693 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1458 = arith.constant 1 : index
    %dim_1459 = tensor.dim %3693, %c1_1458 : tensor<4x?x4096xf16>
    %3694 = flow.tensor.transfer %3693 : tensor<4x?x4096xf16>{%dim_1459} to #hal.device.promise<@__device_6>
    %3695 = torch_c.from_builtin_tensor %3694 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3696 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1460 = arith.constant 1 : index
    %dim_1461 = tensor.dim %3696, %c1_1460 : tensor<4x?x4096xf16>
    %3697 = flow.tensor.transfer %3696 : tensor<4x?x4096xf16>{%dim_1461} to #hal.device.promise<@__device_6>
    %3698 = torch_c.from_builtin_tensor %3697 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3699 = torch_c.to_builtin_tensor %3512 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1462 = arith.constant 1 : index
    %dim_1463 = tensor.dim %3699, %c1_1462 : tensor<4x?x4096xf16>
    %3700 = flow.tensor.transfer %3699 : tensor<4x?x4096xf16>{%dim_1463} to #hal.device.promise<@__device_6>
    %3701 = torch_c.from_builtin_tensor %3700 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1464 = torch.constant.int 1
    %3702 = torch.aten.add.Tensor %3683, %3686, %int1_1464 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1465 = torch.constant.int 1
    %3703 = torch.aten.add.Tensor %3702, %3689, %int1_1465 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1466 = torch.constant.int 1
    %3704 = torch.aten.add.Tensor %3703, %3692, %int1_1466 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1467 = torch.constant.int 1
    %3705 = torch.aten.add.Tensor %3704, %3695, %int1_1467 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1468 = torch.constant.int 1
    %3706 = torch.aten.add.Tensor %3705, %3698, %int1_1468 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1469 = torch.constant.int 1
    %3707 = torch.aten.add.Tensor %3706, %3506, %int1_1469 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1470 = torch.constant.int 1
    %3708 = torch.aten.add.Tensor %3707, %3701, %int1_1470 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3709 = torch_c.to_builtin_tensor %3470 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1471 = arith.constant 1 : index
    %dim_1472 = tensor.dim %3709, %c1_1471 : tensor<4x?x4096xf16>
    %3710 = flow.tensor.transfer %3709 : tensor<4x?x4096xf16>{%dim_1472} to #hal.device.promise<@__device_7>
    %3711 = torch_c.from_builtin_tensor %3710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3712 = torch_c.to_builtin_tensor %3476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1473 = arith.constant 1 : index
    %dim_1474 = tensor.dim %3712, %c1_1473 : tensor<4x?x4096xf16>
    %3713 = flow.tensor.transfer %3712 : tensor<4x?x4096xf16>{%dim_1474} to #hal.device.promise<@__device_7>
    %3714 = torch_c.from_builtin_tensor %3713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3715 = torch_c.to_builtin_tensor %3482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1475 = arith.constant 1 : index
    %dim_1476 = tensor.dim %3715, %c1_1475 : tensor<4x?x4096xf16>
    %3716 = flow.tensor.transfer %3715 : tensor<4x?x4096xf16>{%dim_1476} to #hal.device.promise<@__device_7>
    %3717 = torch_c.from_builtin_tensor %3716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3718 = torch_c.to_builtin_tensor %3488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1477 = arith.constant 1 : index
    %dim_1478 = tensor.dim %3718, %c1_1477 : tensor<4x?x4096xf16>
    %3719 = flow.tensor.transfer %3718 : tensor<4x?x4096xf16>{%dim_1478} to #hal.device.promise<@__device_7>
    %3720 = torch_c.from_builtin_tensor %3719 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3721 = torch_c.to_builtin_tensor %3494 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1479 = arith.constant 1 : index
    %dim_1480 = tensor.dim %3721, %c1_1479 : tensor<4x?x4096xf16>
    %3722 = flow.tensor.transfer %3721 : tensor<4x?x4096xf16>{%dim_1480} to #hal.device.promise<@__device_7>
    %3723 = torch_c.from_builtin_tensor %3722 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3724 = torch_c.to_builtin_tensor %3500 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1481 = arith.constant 1 : index
    %dim_1482 = tensor.dim %3724, %c1_1481 : tensor<4x?x4096xf16>
    %3725 = flow.tensor.transfer %3724 : tensor<4x?x4096xf16>{%dim_1482} to #hal.device.promise<@__device_7>
    %3726 = torch_c.from_builtin_tensor %3725 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %3727 = torch_c.to_builtin_tensor %3506 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1483 = arith.constant 1 : index
    %dim_1484 = tensor.dim %3727, %c1_1483 : tensor<4x?x4096xf16>
    %3728 = flow.tensor.transfer %3727 : tensor<4x?x4096xf16>{%dim_1484} to #hal.device.promise<@__device_7>
    %3729 = torch_c.from_builtin_tensor %3728 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1485 = torch.constant.int 1
    %3730 = torch.aten.add.Tensor %3711, %3714, %int1_1485 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1486 = torch.constant.int 1
    %3731 = torch.aten.add.Tensor %3730, %3717, %int1_1486 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1487 = torch.constant.int 1
    %3732 = torch.aten.add.Tensor %3731, %3720, %int1_1487 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1488 = torch.constant.int 1
    %3733 = torch.aten.add.Tensor %3732, %3723, %int1_1488 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1489 = torch.constant.int 1
    %3734 = torch.aten.add.Tensor %3733, %3726, %int1_1489 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1490 = torch.constant.int 1
    %3735 = torch.aten.add.Tensor %3734, %3729, %int1_1490 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1491 = torch.constant.int 1
    %3736 = torch.aten.add.Tensor %3735, %3512, %int1_1491 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1492 = torch.constant.int 1
    %3737 = torch.aten.add.Tensor %2386, %3540, %int1_1492 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1493 = torch.constant.int 1
    %3738 = torch.aten.add.Tensor %2387, %3568, %int1_1493 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1494 = torch.constant.int 1
    %3739 = torch.aten.add.Tensor %2388, %3596, %int1_1494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1495 = torch.constant.int 1
    %3740 = torch.aten.add.Tensor %2389, %3624, %int1_1495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1496 = torch.constant.int 1
    %3741 = torch.aten.add.Tensor %2390, %3652, %int1_1496 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1497 = torch.constant.int 1
    %3742 = torch.aten.add.Tensor %2391, %3680, %int1_1497 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1498 = torch.constant.int 1
    %3743 = torch.aten.add.Tensor %2392, %3708, %int1_1498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1499 = torch.constant.int 1
    %3744 = torch.aten.add.Tensor %2393, %3736, %int1_1499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1500 = torch.constant.int 6
    %3745 = torch.prims.convert_element_type %3737, %int6_1500 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1501 = torch.constant.int 6
    %3746 = torch.prims.convert_element_type %3738, %int6_1501 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1502 = torch.constant.int 6
    %3747 = torch.prims.convert_element_type %3739, %int6_1502 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1503 = torch.constant.int 6
    %3748 = torch.prims.convert_element_type %3740, %int6_1503 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1504 = torch.constant.int 6
    %3749 = torch.prims.convert_element_type %3741, %int6_1504 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1505 = torch.constant.int 6
    %3750 = torch.prims.convert_element_type %3742, %int6_1505 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1506 = torch.constant.int 6
    %3751 = torch.prims.convert_element_type %3743, %int6_1506 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1507 = torch.constant.int 6
    %3752 = torch.prims.convert_element_type %3744, %int6_1507 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1508 = torch.constant.int 2
    %3753 = torch.aten.pow.Tensor_Scalar %3745, %int2_1508 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1509 = torch.constant.int 2
    %3754 = torch.aten.pow.Tensor_Scalar %3746, %int2_1509 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1510 = torch.constant.int 2
    %3755 = torch.aten.pow.Tensor_Scalar %3747, %int2_1510 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1511 = torch.constant.int 2
    %3756 = torch.aten.pow.Tensor_Scalar %3748, %int2_1511 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1512 = torch.constant.int 2
    %3757 = torch.aten.pow.Tensor_Scalar %3749, %int2_1512 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1513 = torch.constant.int 2
    %3758 = torch.aten.pow.Tensor_Scalar %3750, %int2_1513 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1514 = torch.constant.int 2
    %3759 = torch.aten.pow.Tensor_Scalar %3751, %int2_1514 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1515 = torch.constant.int 2
    %3760 = torch.aten.pow.Tensor_Scalar %3752, %int2_1515 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1516 = torch.constant.int -1
    %3761 = torch.prim.ListConstruct %int-1_1516 : (!torch.int) -> !torch.list<int>
    %true_1517 = torch.constant.bool true
    %none_1518 = torch.constant.none
    %3762 = torch.aten.mean.dim %3753, %3761, %true_1517, %none_1518 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1519 = torch.constant.int -1
    %3763 = torch.prim.ListConstruct %int-1_1519 : (!torch.int) -> !torch.list<int>
    %true_1520 = torch.constant.bool true
    %none_1521 = torch.constant.none
    %3764 = torch.aten.mean.dim %3754, %3763, %true_1520, %none_1521 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1522 = torch.constant.int -1
    %3765 = torch.prim.ListConstruct %int-1_1522 : (!torch.int) -> !torch.list<int>
    %true_1523 = torch.constant.bool true
    %none_1524 = torch.constant.none
    %3766 = torch.aten.mean.dim %3755, %3765, %true_1523, %none_1524 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1525 = torch.constant.int -1
    %3767 = torch.prim.ListConstruct %int-1_1525 : (!torch.int) -> !torch.list<int>
    %true_1526 = torch.constant.bool true
    %none_1527 = torch.constant.none
    %3768 = torch.aten.mean.dim %3756, %3767, %true_1526, %none_1527 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1528 = torch.constant.int -1
    %3769 = torch.prim.ListConstruct %int-1_1528 : (!torch.int) -> !torch.list<int>
    %true_1529 = torch.constant.bool true
    %none_1530 = torch.constant.none
    %3770 = torch.aten.mean.dim %3757, %3769, %true_1529, %none_1530 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1531 = torch.constant.int -1
    %3771 = torch.prim.ListConstruct %int-1_1531 : (!torch.int) -> !torch.list<int>
    %true_1532 = torch.constant.bool true
    %none_1533 = torch.constant.none
    %3772 = torch.aten.mean.dim %3758, %3771, %true_1532, %none_1533 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1534 = torch.constant.int -1
    %3773 = torch.prim.ListConstruct %int-1_1534 : (!torch.int) -> !torch.list<int>
    %true_1535 = torch.constant.bool true
    %none_1536 = torch.constant.none
    %3774 = torch.aten.mean.dim %3759, %3773, %true_1535, %none_1536 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1537 = torch.constant.int -1
    %3775 = torch.prim.ListConstruct %int-1_1537 : (!torch.int) -> !torch.list<int>
    %true_1538 = torch.constant.bool true
    %none_1539 = torch.constant.none
    %3776 = torch.aten.mean.dim %3760, %3775, %true_1538, %none_1539 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1540 = torch.constant.float 9.9999997473787516E-6
    %int1_1541 = torch.constant.int 1
    %3777 = torch.aten.add.Scalar %3762, %float9.999990e-06_1540, %int1_1541 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1542 = torch.constant.float 9.9999997473787516E-6
    %int1_1543 = torch.constant.int 1
    %3778 = torch.aten.add.Scalar %3764, %float9.999990e-06_1542, %int1_1543 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1544 = torch.constant.float 9.9999997473787516E-6
    %int1_1545 = torch.constant.int 1
    %3779 = torch.aten.add.Scalar %3766, %float9.999990e-06_1544, %int1_1545 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1546 = torch.constant.float 9.9999997473787516E-6
    %int1_1547 = torch.constant.int 1
    %3780 = torch.aten.add.Scalar %3768, %float9.999990e-06_1546, %int1_1547 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1548 = torch.constant.float 9.9999997473787516E-6
    %int1_1549 = torch.constant.int 1
    %3781 = torch.aten.add.Scalar %3770, %float9.999990e-06_1548, %int1_1549 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1550 = torch.constant.float 9.9999997473787516E-6
    %int1_1551 = torch.constant.int 1
    %3782 = torch.aten.add.Scalar %3772, %float9.999990e-06_1550, %int1_1551 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1552 = torch.constant.float 9.9999997473787516E-6
    %int1_1553 = torch.constant.int 1
    %3783 = torch.aten.add.Scalar %3774, %float9.999990e-06_1552, %int1_1553 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1554 = torch.constant.float 9.9999997473787516E-6
    %int1_1555 = torch.constant.int 1
    %3784 = torch.aten.add.Scalar %3776, %float9.999990e-06_1554, %int1_1555 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3785 = torch.aten.rsqrt %3777 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3786 = torch.aten.rsqrt %3778 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3787 = torch.aten.rsqrt %3779 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3788 = torch.aten.rsqrt %3780 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3789 = torch.aten.rsqrt %3781 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3790 = torch.aten.rsqrt %3782 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3791 = torch.aten.rsqrt %3783 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3792 = torch.aten.rsqrt %3784 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %3792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %3793 = torch.aten.mul.Tensor %3745, %3785 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3794 = torch.aten.mul.Tensor %3746, %3786 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3795 = torch.aten.mul.Tensor %3747, %3787 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3796 = torch.aten.mul.Tensor %3748, %3788 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3797 = torch.aten.mul.Tensor %3749, %3789 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3798 = torch.aten.mul.Tensor %3750, %3790 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3799 = torch.aten.mul.Tensor %3751, %3791 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3800 = torch.aten.mul.Tensor %3752, %3792 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3801 = torch.aten.mul.Tensor %48, %3793 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3802 = torch.aten.mul.Tensor %49, %3794 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3803 = torch.aten.mul.Tensor %50, %3795 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3804 = torch.aten.mul.Tensor %51, %3796 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3805 = torch.aten.mul.Tensor %52, %3797 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3806 = torch.aten.mul.Tensor %53, %3798 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3807 = torch.aten.mul.Tensor %54, %3799 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %3808 = torch.aten.mul.Tensor %55, %3800 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %3808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1556 = torch.constant.int 5
    %3809 = torch.prims.convert_element_type %3801, %int5_1556 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1557 = torch.constant.int 5
    %3810 = torch.prims.convert_element_type %3802, %int5_1557 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1558 = torch.constant.int 5
    %3811 = torch.prims.convert_element_type %3803, %int5_1558 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1559 = torch.constant.int 5
    %3812 = torch.prims.convert_element_type %3804, %int5_1559 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1560 = torch.constant.int 5
    %3813 = torch.prims.convert_element_type %3805, %int5_1560 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1561 = torch.constant.int 5
    %3814 = torch.prims.convert_element_type %3806, %int5_1561 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1562 = torch.constant.int 5
    %3815 = torch.prims.convert_element_type %3807, %int5_1562 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1563 = torch.constant.int 5
    %3816 = torch.prims.convert_element_type %3808, %int5_1563 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1564 = torch.constant.int 1
    %int0_1565 = torch.constant.int 0
    %3817 = torch.prim.ListConstruct %int1_1564, %int0_1565 : (!torch.int, !torch.int) -> !torch.list<int>
    %3818 = torch.aten.permute %56, %3817 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1566 = torch.constant.int 1
    %int0_1567 = torch.constant.int 0
    %3819 = torch.prim.ListConstruct %int1_1566, %int0_1567 : (!torch.int, !torch.int) -> !torch.list<int>
    %3820 = torch.aten.permute %57, %3819 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1568 = torch.constant.int 1
    %int0_1569 = torch.constant.int 0
    %3821 = torch.prim.ListConstruct %int1_1568, %int0_1569 : (!torch.int, !torch.int) -> !torch.list<int>
    %3822 = torch.aten.permute %58, %3821 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1570 = torch.constant.int 1
    %int0_1571 = torch.constant.int 0
    %3823 = torch.prim.ListConstruct %int1_1570, %int0_1571 : (!torch.int, !torch.int) -> !torch.list<int>
    %3824 = torch.aten.permute %59, %3823 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1572 = torch.constant.int 1
    %int0_1573 = torch.constant.int 0
    %3825 = torch.prim.ListConstruct %int1_1572, %int0_1573 : (!torch.int, !torch.int) -> !torch.list<int>
    %3826 = torch.aten.permute %60, %3825 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1574 = torch.constant.int 1
    %int0_1575 = torch.constant.int 0
    %3827 = torch.prim.ListConstruct %int1_1574, %int0_1575 : (!torch.int, !torch.int) -> !torch.list<int>
    %3828 = torch.aten.permute %61, %3827 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1576 = torch.constant.int 1
    %int0_1577 = torch.constant.int 0
    %3829 = torch.prim.ListConstruct %int1_1576, %int0_1577 : (!torch.int, !torch.int) -> !torch.list<int>
    %3830 = torch.aten.permute %62, %3829 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1578 = torch.constant.int 1
    %int0_1579 = torch.constant.int 0
    %3831 = torch.prim.ListConstruct %int1_1578, %int0_1579 : (!torch.int, !torch.int) -> !torch.list<int>
    %3832 = torch.aten.permute %63, %3831 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_1580 = torch.constant.int 4
    %3833 = torch.aten.mul.int %int4_1580, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1581 = torch.constant.int 4096
    %3834 = torch.prim.ListConstruct %3833, %int4096_1581 : (!torch.int, !torch.int) -> !torch.list<int>
    %3835 = torch.aten.view %3809, %3834 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3835, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3836 = torch.aten.mm %3835, %3818 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3836, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1582 = torch.constant.int 4
    %int1792 = torch.constant.int 1792
    %3837 = torch.prim.ListConstruct %int4_1582, %2482, %int1792 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3838 = torch.aten.view %3836, %3837 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1583 = torch.constant.int 4
    %3839 = torch.aten.mul.int %int4_1583, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1584 = torch.constant.int 4096
    %3840 = torch.prim.ListConstruct %3839, %int4096_1584 : (!torch.int, !torch.int) -> !torch.list<int>
    %3841 = torch.aten.view %3810, %3840 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3841, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3842 = torch.aten.mm %3841, %3820 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3842, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1585 = torch.constant.int 4
    %int1792_1586 = torch.constant.int 1792
    %3843 = torch.prim.ListConstruct %int4_1585, %2482, %int1792_1586 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3844 = torch.aten.view %3842, %3843 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1587 = torch.constant.int 4
    %3845 = torch.aten.mul.int %int4_1587, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1588 = torch.constant.int 4096
    %3846 = torch.prim.ListConstruct %3845, %int4096_1588 : (!torch.int, !torch.int) -> !torch.list<int>
    %3847 = torch.aten.view %3811, %3846 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3847, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3848 = torch.aten.mm %3847, %3822 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3848, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1589 = torch.constant.int 4
    %int1792_1590 = torch.constant.int 1792
    %3849 = torch.prim.ListConstruct %int4_1589, %2482, %int1792_1590 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3850 = torch.aten.view %3848, %3849 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1591 = torch.constant.int 4
    %3851 = torch.aten.mul.int %int4_1591, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1592 = torch.constant.int 4096
    %3852 = torch.prim.ListConstruct %3851, %int4096_1592 : (!torch.int, !torch.int) -> !torch.list<int>
    %3853 = torch.aten.view %3812, %3852 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3853, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3854 = torch.aten.mm %3853, %3824 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3854, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1593 = torch.constant.int 4
    %int1792_1594 = torch.constant.int 1792
    %3855 = torch.prim.ListConstruct %int4_1593, %2482, %int1792_1594 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3856 = torch.aten.view %3854, %3855 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1595 = torch.constant.int 4
    %3857 = torch.aten.mul.int %int4_1595, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1596 = torch.constant.int 4096
    %3858 = torch.prim.ListConstruct %3857, %int4096_1596 : (!torch.int, !torch.int) -> !torch.list<int>
    %3859 = torch.aten.view %3813, %3858 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3859, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3860 = torch.aten.mm %3859, %3826 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3860, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1597 = torch.constant.int 4
    %int1792_1598 = torch.constant.int 1792
    %3861 = torch.prim.ListConstruct %int4_1597, %2482, %int1792_1598 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3862 = torch.aten.view %3860, %3861 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1599 = torch.constant.int 4
    %3863 = torch.aten.mul.int %int4_1599, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1600 = torch.constant.int 4096
    %3864 = torch.prim.ListConstruct %3863, %int4096_1600 : (!torch.int, !torch.int) -> !torch.list<int>
    %3865 = torch.aten.view %3814, %3864 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3865, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3866 = torch.aten.mm %3865, %3828 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3866, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1601 = torch.constant.int 4
    %int1792_1602 = torch.constant.int 1792
    %3867 = torch.prim.ListConstruct %int4_1601, %2482, %int1792_1602 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3868 = torch.aten.view %3866, %3867 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1603 = torch.constant.int 4
    %3869 = torch.aten.mul.int %int4_1603, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1604 = torch.constant.int 4096
    %3870 = torch.prim.ListConstruct %3869, %int4096_1604 : (!torch.int, !torch.int) -> !torch.list<int>
    %3871 = torch.aten.view %3815, %3870 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3871, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3872 = torch.aten.mm %3871, %3830 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3872, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1605 = torch.constant.int 4
    %int1792_1606 = torch.constant.int 1792
    %3873 = torch.prim.ListConstruct %int4_1605, %2482, %int1792_1606 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3874 = torch.aten.view %3872, %3873 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1607 = torch.constant.int 4
    %3875 = torch.aten.mul.int %int4_1607, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1608 = torch.constant.int 4096
    %3876 = torch.prim.ListConstruct %3875, %int4096_1608 : (!torch.int, !torch.int) -> !torch.list<int>
    %3877 = torch.aten.view %3816, %3876 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3877, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3878 = torch.aten.mm %3877, %3832 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3878, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1609 = torch.constant.int 4
    %int1792_1610 = torch.constant.int 1792
    %3879 = torch.prim.ListConstruct %int4_1609, %2482, %int1792_1610 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3880 = torch.aten.view %3878, %3879 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3881 = torch.aten.silu %3838 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3882 = torch.aten.silu %3844 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3883 = torch.aten.silu %3850 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3884 = torch.aten.silu %3856 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3885 = torch.aten.silu %3862 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3886 = torch.aten.silu %3868 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3887 = torch.aten.silu %3874 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3888 = torch.aten.silu %3880 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_1611 = torch.constant.int 1
    %int0_1612 = torch.constant.int 0
    %3889 = torch.prim.ListConstruct %int1_1611, %int0_1612 : (!torch.int, !torch.int) -> !torch.list<int>
    %3890 = torch.aten.permute %64, %3889 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1613 = torch.constant.int 1
    %int0_1614 = torch.constant.int 0
    %3891 = torch.prim.ListConstruct %int1_1613, %int0_1614 : (!torch.int, !torch.int) -> !torch.list<int>
    %3892 = torch.aten.permute %65, %3891 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1615 = torch.constant.int 1
    %int0_1616 = torch.constant.int 0
    %3893 = torch.prim.ListConstruct %int1_1615, %int0_1616 : (!torch.int, !torch.int) -> !torch.list<int>
    %3894 = torch.aten.permute %66, %3893 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1617 = torch.constant.int 1
    %int0_1618 = torch.constant.int 0
    %3895 = torch.prim.ListConstruct %int1_1617, %int0_1618 : (!torch.int, !torch.int) -> !torch.list<int>
    %3896 = torch.aten.permute %67, %3895 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1619 = torch.constant.int 1
    %int0_1620 = torch.constant.int 0
    %3897 = torch.prim.ListConstruct %int1_1619, %int0_1620 : (!torch.int, !torch.int) -> !torch.list<int>
    %3898 = torch.aten.permute %68, %3897 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1621 = torch.constant.int 1
    %int0_1622 = torch.constant.int 0
    %3899 = torch.prim.ListConstruct %int1_1621, %int0_1622 : (!torch.int, !torch.int) -> !torch.list<int>
    %3900 = torch.aten.permute %69, %3899 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1623 = torch.constant.int 1
    %int0_1624 = torch.constant.int 0
    %3901 = torch.prim.ListConstruct %int1_1623, %int0_1624 : (!torch.int, !torch.int) -> !torch.list<int>
    %3902 = torch.aten.permute %70, %3901 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_1625 = torch.constant.int 1
    %int0_1626 = torch.constant.int 0
    %3903 = torch.prim.ListConstruct %int1_1625, %int0_1626 : (!torch.int, !torch.int) -> !torch.list<int>
    %3904 = torch.aten.permute %71, %3903 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_1627 = torch.constant.int 4
    %3905 = torch.aten.mul.int %int4_1627, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1628 = torch.constant.int 4096
    %3906 = torch.prim.ListConstruct %3905, %int4096_1628 : (!torch.int, !torch.int) -> !torch.list<int>
    %3907 = torch.aten.view %3809, %3906 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3907, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3908 = torch.aten.mm %3907, %3890 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3908, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1629 = torch.constant.int 4
    %int1792_1630 = torch.constant.int 1792
    %3909 = torch.prim.ListConstruct %int4_1629, %2482, %int1792_1630 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3910 = torch.aten.view %3908, %3909 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1631 = torch.constant.int 4
    %3911 = torch.aten.mul.int %int4_1631, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1632 = torch.constant.int 4096
    %3912 = torch.prim.ListConstruct %3911, %int4096_1632 : (!torch.int, !torch.int) -> !torch.list<int>
    %3913 = torch.aten.view %3810, %3912 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3913, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3914 = torch.aten.mm %3913, %3892 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3914, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1633 = torch.constant.int 4
    %int1792_1634 = torch.constant.int 1792
    %3915 = torch.prim.ListConstruct %int4_1633, %2482, %int1792_1634 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3916 = torch.aten.view %3914, %3915 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1635 = torch.constant.int 4
    %3917 = torch.aten.mul.int %int4_1635, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1636 = torch.constant.int 4096
    %3918 = torch.prim.ListConstruct %3917, %int4096_1636 : (!torch.int, !torch.int) -> !torch.list<int>
    %3919 = torch.aten.view %3811, %3918 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3919, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3920 = torch.aten.mm %3919, %3894 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3920, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1637 = torch.constant.int 4
    %int1792_1638 = torch.constant.int 1792
    %3921 = torch.prim.ListConstruct %int4_1637, %2482, %int1792_1638 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3922 = torch.aten.view %3920, %3921 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1639 = torch.constant.int 4
    %3923 = torch.aten.mul.int %int4_1639, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1640 = torch.constant.int 4096
    %3924 = torch.prim.ListConstruct %3923, %int4096_1640 : (!torch.int, !torch.int) -> !torch.list<int>
    %3925 = torch.aten.view %3812, %3924 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3925, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3926 = torch.aten.mm %3925, %3896 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3926, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1641 = torch.constant.int 4
    %int1792_1642 = torch.constant.int 1792
    %3927 = torch.prim.ListConstruct %int4_1641, %2482, %int1792_1642 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3928 = torch.aten.view %3926, %3927 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1643 = torch.constant.int 4
    %3929 = torch.aten.mul.int %int4_1643, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1644 = torch.constant.int 4096
    %3930 = torch.prim.ListConstruct %3929, %int4096_1644 : (!torch.int, !torch.int) -> !torch.list<int>
    %3931 = torch.aten.view %3813, %3930 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3931, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3932 = torch.aten.mm %3931, %3898 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3932, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1645 = torch.constant.int 4
    %int1792_1646 = torch.constant.int 1792
    %3933 = torch.prim.ListConstruct %int4_1645, %2482, %int1792_1646 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3934 = torch.aten.view %3932, %3933 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1647 = torch.constant.int 4
    %3935 = torch.aten.mul.int %int4_1647, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1648 = torch.constant.int 4096
    %3936 = torch.prim.ListConstruct %3935, %int4096_1648 : (!torch.int, !torch.int) -> !torch.list<int>
    %3937 = torch.aten.view %3814, %3936 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3937, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3938 = torch.aten.mm %3937, %3900 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3938, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1649 = torch.constant.int 4
    %int1792_1650 = torch.constant.int 1792
    %3939 = torch.prim.ListConstruct %int4_1649, %2482, %int1792_1650 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3940 = torch.aten.view %3938, %3939 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1651 = torch.constant.int 4
    %3941 = torch.aten.mul.int %int4_1651, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1652 = torch.constant.int 4096
    %3942 = torch.prim.ListConstruct %3941, %int4096_1652 : (!torch.int, !torch.int) -> !torch.list<int>
    %3943 = torch.aten.view %3815, %3942 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3943, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3944 = torch.aten.mm %3943, %3902 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3944, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1653 = torch.constant.int 4
    %int1792_1654 = torch.constant.int 1792
    %3945 = torch.prim.ListConstruct %int4_1653, %2482, %int1792_1654 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3946 = torch.aten.view %3944, %3945 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_1655 = torch.constant.int 4
    %3947 = torch.aten.mul.int %int4_1655, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1656 = torch.constant.int 4096
    %3948 = torch.prim.ListConstruct %3947, %int4096_1656 : (!torch.int, !torch.int) -> !torch.list<int>
    %3949 = torch.aten.view %3816, %3948 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3949, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %3950 = torch.aten.mm %3949, %3904 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3950, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_1657 = torch.constant.int 4
    %int1792_1658 = torch.constant.int 1792
    %3951 = torch.prim.ListConstruct %int4_1657, %2482, %int1792_1658 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3952 = torch.aten.view %3950, %3951 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3953 = torch.aten.mul.Tensor %3881, %3910 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3954 = torch.aten.mul.Tensor %3882, %3916 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3955 = torch.aten.mul.Tensor %3883, %3922 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3956 = torch.aten.mul.Tensor %3884, %3928 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3957 = torch.aten.mul.Tensor %3885, %3934 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3958 = torch.aten.mul.Tensor %3886, %3940 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3959 = torch.aten.mul.Tensor %3887, %3946 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %3960 = torch.aten.mul.Tensor %3888, %3952 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %3960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_1659 = torch.constant.int 1
    %int0_1660 = torch.constant.int 0
    %3961 = torch.prim.ListConstruct %int1_1659, %int0_1660 : (!torch.int, !torch.int) -> !torch.list<int>
    %3962 = torch.aten.permute %72, %3961 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1661 = torch.constant.int 1
    %int0_1662 = torch.constant.int 0
    %3963 = torch.prim.ListConstruct %int1_1661, %int0_1662 : (!torch.int, !torch.int) -> !torch.list<int>
    %3964 = torch.aten.permute %73, %3963 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1663 = torch.constant.int 1
    %int0_1664 = torch.constant.int 0
    %3965 = torch.prim.ListConstruct %int1_1663, %int0_1664 : (!torch.int, !torch.int) -> !torch.list<int>
    %3966 = torch.aten.permute %74, %3965 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1665 = torch.constant.int 1
    %int0_1666 = torch.constant.int 0
    %3967 = torch.prim.ListConstruct %int1_1665, %int0_1666 : (!torch.int, !torch.int) -> !torch.list<int>
    %3968 = torch.aten.permute %75, %3967 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1667 = torch.constant.int 1
    %int0_1668 = torch.constant.int 0
    %3969 = torch.prim.ListConstruct %int1_1667, %int0_1668 : (!torch.int, !torch.int) -> !torch.list<int>
    %3970 = torch.aten.permute %76, %3969 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1669 = torch.constant.int 1
    %int0_1670 = torch.constant.int 0
    %3971 = torch.prim.ListConstruct %int1_1669, %int0_1670 : (!torch.int, !torch.int) -> !torch.list<int>
    %3972 = torch.aten.permute %77, %3971 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1671 = torch.constant.int 1
    %int0_1672 = torch.constant.int 0
    %3973 = torch.prim.ListConstruct %int1_1671, %int0_1672 : (!torch.int, !torch.int) -> !torch.list<int>
    %3974 = torch.aten.permute %78, %3973 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1673 = torch.constant.int 1
    %int0_1674 = torch.constant.int 0
    %3975 = torch.prim.ListConstruct %int1_1673, %int0_1674 : (!torch.int, !torch.int) -> !torch.list<int>
    %3976 = torch.aten.permute %79, %3975 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_1675 = torch.constant.int 1
    %3977 = torch.aten.size.int %3838, %int1_1675 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1676 = torch.constant.int 4
    %3978 = torch.aten.mul.int %int4_1676, %3977 : !torch.int, !torch.int -> !torch.int
    %int1792_1677 = torch.constant.int 1792
    %3979 = torch.prim.ListConstruct %3978, %int1792_1677 : (!torch.int, !torch.int) -> !torch.list<int>
    %3980 = torch.aten.view %3953, %3979 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3980, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %3981 = torch.aten.mm %3980, %3962 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3981, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1678 = torch.constant.int 4
    %int4096_1679 = torch.constant.int 4096
    %3982 = torch.prim.ListConstruct %int4_1678, %3977, %int4096_1679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3983 = torch.aten.view %3981, %3982 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1680 = torch.constant.int 1
    %3984 = torch.aten.size.int %3844, %int1_1680 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1681 = torch.constant.int 4
    %3985 = torch.aten.mul.int %int4_1681, %3984 : !torch.int, !torch.int -> !torch.int
    %int1792_1682 = torch.constant.int 1792
    %3986 = torch.prim.ListConstruct %3985, %int1792_1682 : (!torch.int, !torch.int) -> !torch.list<int>
    %3987 = torch.aten.view %3954, %3986 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3987, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %3988 = torch.aten.mm %3987, %3964 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3988, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1683 = torch.constant.int 4
    %int4096_1684 = torch.constant.int 4096
    %3989 = torch.prim.ListConstruct %int4_1683, %3984, %int4096_1684 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3990 = torch.aten.view %3988, %3989 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1685 = torch.constant.int 1
    %3991 = torch.aten.size.int %3850, %int1_1685 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1686 = torch.constant.int 4
    %3992 = torch.aten.mul.int %int4_1686, %3991 : !torch.int, !torch.int -> !torch.int
    %int1792_1687 = torch.constant.int 1792
    %3993 = torch.prim.ListConstruct %3992, %int1792_1687 : (!torch.int, !torch.int) -> !torch.list<int>
    %3994 = torch.aten.view %3955, %3993 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %3994, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %3995 = torch.aten.mm %3994, %3966 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %3995, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1688 = torch.constant.int 4
    %int4096_1689 = torch.constant.int 4096
    %3996 = torch.prim.ListConstruct %int4_1688, %3991, %int4096_1689 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %3997 = torch.aten.view %3995, %3996 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %3997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1690 = torch.constant.int 1
    %3998 = torch.aten.size.int %3856, %int1_1690 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1691 = torch.constant.int 4
    %3999 = torch.aten.mul.int %int4_1691, %3998 : !torch.int, !torch.int -> !torch.int
    %int1792_1692 = torch.constant.int 1792
    %4000 = torch.prim.ListConstruct %3999, %int1792_1692 : (!torch.int, !torch.int) -> !torch.list<int>
    %4001 = torch.aten.view %3956, %4000 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %4001, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %4002 = torch.aten.mm %4001, %3968 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4002, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1693 = torch.constant.int 4
    %int4096_1694 = torch.constant.int 4096
    %4003 = torch.prim.ListConstruct %int4_1693, %3998, %int4096_1694 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4004 = torch.aten.view %4002, %4003 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1695 = torch.constant.int 1
    %4005 = torch.aten.size.int %3862, %int1_1695 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1696 = torch.constant.int 4
    %4006 = torch.aten.mul.int %int4_1696, %4005 : !torch.int, !torch.int -> !torch.int
    %int1792_1697 = torch.constant.int 1792
    %4007 = torch.prim.ListConstruct %4006, %int1792_1697 : (!torch.int, !torch.int) -> !torch.list<int>
    %4008 = torch.aten.view %3957, %4007 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %4008, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %4009 = torch.aten.mm %4008, %3970 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4009, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1698 = torch.constant.int 4
    %int4096_1699 = torch.constant.int 4096
    %4010 = torch.prim.ListConstruct %int4_1698, %4005, %int4096_1699 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4011 = torch.aten.view %4009, %4010 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1700 = torch.constant.int 1
    %4012 = torch.aten.size.int %3868, %int1_1700 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1701 = torch.constant.int 4
    %4013 = torch.aten.mul.int %int4_1701, %4012 : !torch.int, !torch.int -> !torch.int
    %int1792_1702 = torch.constant.int 1792
    %4014 = torch.prim.ListConstruct %4013, %int1792_1702 : (!torch.int, !torch.int) -> !torch.list<int>
    %4015 = torch.aten.view %3958, %4014 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %4015, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %4016 = torch.aten.mm %4015, %3972 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4016, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1703 = torch.constant.int 4
    %int4096_1704 = torch.constant.int 4096
    %4017 = torch.prim.ListConstruct %int4_1703, %4012, %int4096_1704 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4018 = torch.aten.view %4016, %4017 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1705 = torch.constant.int 1
    %4019 = torch.aten.size.int %3874, %int1_1705 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1706 = torch.constant.int 4
    %4020 = torch.aten.mul.int %int4_1706, %4019 : !torch.int, !torch.int -> !torch.int
    %int1792_1707 = torch.constant.int 1792
    %4021 = torch.prim.ListConstruct %4020, %int1792_1707 : (!torch.int, !torch.int) -> !torch.list<int>
    %4022 = torch.aten.view %3959, %4021 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %4022, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %4023 = torch.aten.mm %4022, %3974 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4023, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1708 = torch.constant.int 4
    %int4096_1709 = torch.constant.int 4096
    %4024 = torch.prim.ListConstruct %int4_1708, %4019, %int4096_1709 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4025 = torch.aten.view %4023, %4024 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1710 = torch.constant.int 1
    %4026 = torch.aten.size.int %3880, %int1_1710 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_1711 = torch.constant.int 4
    %4027 = torch.aten.mul.int %int4_1711, %4026 : !torch.int, !torch.int -> !torch.int
    %int1792_1712 = torch.constant.int 1792
    %4028 = torch.prim.ListConstruct %4027, %int1792_1712 : (!torch.int, !torch.int) -> !torch.list<int>
    %4029 = torch.aten.view %3960, %4028 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %4029, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %4030 = torch.aten.mm %4029, %3976 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4030, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_1713 = torch.constant.int 4
    %int4096_1714 = torch.constant.int 4096
    %4031 = torch.prim.ListConstruct %int4_1713, %4026, %int4096_1714 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4032 = torch.aten.view %4030, %4031 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4033 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1715 = arith.constant 1 : index
    %dim_1716 = tensor.dim %4033, %c1_1715 : tensor<4x?x4096xf16>
    %4034 = flow.tensor.transfer %4033 : tensor<4x?x4096xf16>{%dim_1716} to #hal.device.promise<@__device_0>
    %4035 = torch_c.from_builtin_tensor %4034 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4036 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1717 = arith.constant 1 : index
    %dim_1718 = tensor.dim %4036, %c1_1717 : tensor<4x?x4096xf16>
    %4037 = flow.tensor.transfer %4036 : tensor<4x?x4096xf16>{%dim_1718} to #hal.device.promise<@__device_0>
    %4038 = torch_c.from_builtin_tensor %4037 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4039 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1719 = arith.constant 1 : index
    %dim_1720 = tensor.dim %4039, %c1_1719 : tensor<4x?x4096xf16>
    %4040 = flow.tensor.transfer %4039 : tensor<4x?x4096xf16>{%dim_1720} to #hal.device.promise<@__device_0>
    %4041 = torch_c.from_builtin_tensor %4040 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4042 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1721 = arith.constant 1 : index
    %dim_1722 = tensor.dim %4042, %c1_1721 : tensor<4x?x4096xf16>
    %4043 = flow.tensor.transfer %4042 : tensor<4x?x4096xf16>{%dim_1722} to #hal.device.promise<@__device_0>
    %4044 = torch_c.from_builtin_tensor %4043 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4045 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1723 = arith.constant 1 : index
    %dim_1724 = tensor.dim %4045, %c1_1723 : tensor<4x?x4096xf16>
    %4046 = flow.tensor.transfer %4045 : tensor<4x?x4096xf16>{%dim_1724} to #hal.device.promise<@__device_0>
    %4047 = torch_c.from_builtin_tensor %4046 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4048 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1725 = arith.constant 1 : index
    %dim_1726 = tensor.dim %4048, %c1_1725 : tensor<4x?x4096xf16>
    %4049 = flow.tensor.transfer %4048 : tensor<4x?x4096xf16>{%dim_1726} to #hal.device.promise<@__device_0>
    %4050 = torch_c.from_builtin_tensor %4049 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4051 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1727 = arith.constant 1 : index
    %dim_1728 = tensor.dim %4051, %c1_1727 : tensor<4x?x4096xf16>
    %4052 = flow.tensor.transfer %4051 : tensor<4x?x4096xf16>{%dim_1728} to #hal.device.promise<@__device_0>
    %4053 = torch_c.from_builtin_tensor %4052 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1729 = torch.constant.int 1
    %4054 = torch.aten.add.Tensor %3983, %4035, %int1_1729 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1730 = torch.constant.int 1
    %4055 = torch.aten.add.Tensor %4054, %4038, %int1_1730 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1731 = torch.constant.int 1
    %4056 = torch.aten.add.Tensor %4055, %4041, %int1_1731 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1732 = torch.constant.int 1
    %4057 = torch.aten.add.Tensor %4056, %4044, %int1_1732 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1733 = torch.constant.int 1
    %4058 = torch.aten.add.Tensor %4057, %4047, %int1_1733 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1734 = torch.constant.int 1
    %4059 = torch.aten.add.Tensor %4058, %4050, %int1_1734 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1735 = torch.constant.int 1
    %4060 = torch.aten.add.Tensor %4059, %4053, %int1_1735 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4061 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1736 = arith.constant 1 : index
    %dim_1737 = tensor.dim %4061, %c1_1736 : tensor<4x?x4096xf16>
    %4062 = flow.tensor.transfer %4061 : tensor<4x?x4096xf16>{%dim_1737} to #hal.device.promise<@__device_1>
    %4063 = torch_c.from_builtin_tensor %4062 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4064 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1738 = arith.constant 1 : index
    %dim_1739 = tensor.dim %4064, %c1_1738 : tensor<4x?x4096xf16>
    %4065 = flow.tensor.transfer %4064 : tensor<4x?x4096xf16>{%dim_1739} to #hal.device.promise<@__device_1>
    %4066 = torch_c.from_builtin_tensor %4065 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4067 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1740 = arith.constant 1 : index
    %dim_1741 = tensor.dim %4067, %c1_1740 : tensor<4x?x4096xf16>
    %4068 = flow.tensor.transfer %4067 : tensor<4x?x4096xf16>{%dim_1741} to #hal.device.promise<@__device_1>
    %4069 = torch_c.from_builtin_tensor %4068 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4070 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1742 = arith.constant 1 : index
    %dim_1743 = tensor.dim %4070, %c1_1742 : tensor<4x?x4096xf16>
    %4071 = flow.tensor.transfer %4070 : tensor<4x?x4096xf16>{%dim_1743} to #hal.device.promise<@__device_1>
    %4072 = torch_c.from_builtin_tensor %4071 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4073 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1744 = arith.constant 1 : index
    %dim_1745 = tensor.dim %4073, %c1_1744 : tensor<4x?x4096xf16>
    %4074 = flow.tensor.transfer %4073 : tensor<4x?x4096xf16>{%dim_1745} to #hal.device.promise<@__device_1>
    %4075 = torch_c.from_builtin_tensor %4074 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4076 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1746 = arith.constant 1 : index
    %dim_1747 = tensor.dim %4076, %c1_1746 : tensor<4x?x4096xf16>
    %4077 = flow.tensor.transfer %4076 : tensor<4x?x4096xf16>{%dim_1747} to #hal.device.promise<@__device_1>
    %4078 = torch_c.from_builtin_tensor %4077 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4079 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1748 = arith.constant 1 : index
    %dim_1749 = tensor.dim %4079, %c1_1748 : tensor<4x?x4096xf16>
    %4080 = flow.tensor.transfer %4079 : tensor<4x?x4096xf16>{%dim_1749} to #hal.device.promise<@__device_1>
    %4081 = torch_c.from_builtin_tensor %4080 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1750 = torch.constant.int 1
    %4082 = torch.aten.add.Tensor %4063, %3990, %int1_1750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1751 = torch.constant.int 1
    %4083 = torch.aten.add.Tensor %4082, %4066, %int1_1751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1752 = torch.constant.int 1
    %4084 = torch.aten.add.Tensor %4083, %4069, %int1_1752 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1753 = torch.constant.int 1
    %4085 = torch.aten.add.Tensor %4084, %4072, %int1_1753 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1754 = torch.constant.int 1
    %4086 = torch.aten.add.Tensor %4085, %4075, %int1_1754 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1755 = torch.constant.int 1
    %4087 = torch.aten.add.Tensor %4086, %4078, %int1_1755 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1756 = torch.constant.int 1
    %4088 = torch.aten.add.Tensor %4087, %4081, %int1_1756 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4089 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1757 = arith.constant 1 : index
    %dim_1758 = tensor.dim %4089, %c1_1757 : tensor<4x?x4096xf16>
    %4090 = flow.tensor.transfer %4089 : tensor<4x?x4096xf16>{%dim_1758} to #hal.device.promise<@__device_2>
    %4091 = torch_c.from_builtin_tensor %4090 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4092 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1759 = arith.constant 1 : index
    %dim_1760 = tensor.dim %4092, %c1_1759 : tensor<4x?x4096xf16>
    %4093 = flow.tensor.transfer %4092 : tensor<4x?x4096xf16>{%dim_1760} to #hal.device.promise<@__device_2>
    %4094 = torch_c.from_builtin_tensor %4093 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4095 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1761 = arith.constant 1 : index
    %dim_1762 = tensor.dim %4095, %c1_1761 : tensor<4x?x4096xf16>
    %4096 = flow.tensor.transfer %4095 : tensor<4x?x4096xf16>{%dim_1762} to #hal.device.promise<@__device_2>
    %4097 = torch_c.from_builtin_tensor %4096 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4098 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1763 = arith.constant 1 : index
    %dim_1764 = tensor.dim %4098, %c1_1763 : tensor<4x?x4096xf16>
    %4099 = flow.tensor.transfer %4098 : tensor<4x?x4096xf16>{%dim_1764} to #hal.device.promise<@__device_2>
    %4100 = torch_c.from_builtin_tensor %4099 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4101 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1765 = arith.constant 1 : index
    %dim_1766 = tensor.dim %4101, %c1_1765 : tensor<4x?x4096xf16>
    %4102 = flow.tensor.transfer %4101 : tensor<4x?x4096xf16>{%dim_1766} to #hal.device.promise<@__device_2>
    %4103 = torch_c.from_builtin_tensor %4102 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4104 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1767 = arith.constant 1 : index
    %dim_1768 = tensor.dim %4104, %c1_1767 : tensor<4x?x4096xf16>
    %4105 = flow.tensor.transfer %4104 : tensor<4x?x4096xf16>{%dim_1768} to #hal.device.promise<@__device_2>
    %4106 = torch_c.from_builtin_tensor %4105 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4107 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1769 = arith.constant 1 : index
    %dim_1770 = tensor.dim %4107, %c1_1769 : tensor<4x?x4096xf16>
    %4108 = flow.tensor.transfer %4107 : tensor<4x?x4096xf16>{%dim_1770} to #hal.device.promise<@__device_2>
    %4109 = torch_c.from_builtin_tensor %4108 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1771 = torch.constant.int 1
    %4110 = torch.aten.add.Tensor %4091, %4094, %int1_1771 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1772 = torch.constant.int 1
    %4111 = torch.aten.add.Tensor %4110, %3997, %int1_1772 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1773 = torch.constant.int 1
    %4112 = torch.aten.add.Tensor %4111, %4097, %int1_1773 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1774 = torch.constant.int 1
    %4113 = torch.aten.add.Tensor %4112, %4100, %int1_1774 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1775 = torch.constant.int 1
    %4114 = torch.aten.add.Tensor %4113, %4103, %int1_1775 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1776 = torch.constant.int 1
    %4115 = torch.aten.add.Tensor %4114, %4106, %int1_1776 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1777 = torch.constant.int 1
    %4116 = torch.aten.add.Tensor %4115, %4109, %int1_1777 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4117 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1778 = arith.constant 1 : index
    %dim_1779 = tensor.dim %4117, %c1_1778 : tensor<4x?x4096xf16>
    %4118 = flow.tensor.transfer %4117 : tensor<4x?x4096xf16>{%dim_1779} to #hal.device.promise<@__device_3>
    %4119 = torch_c.from_builtin_tensor %4118 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4120 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1780 = arith.constant 1 : index
    %dim_1781 = tensor.dim %4120, %c1_1780 : tensor<4x?x4096xf16>
    %4121 = flow.tensor.transfer %4120 : tensor<4x?x4096xf16>{%dim_1781} to #hal.device.promise<@__device_3>
    %4122 = torch_c.from_builtin_tensor %4121 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4123 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1782 = arith.constant 1 : index
    %dim_1783 = tensor.dim %4123, %c1_1782 : tensor<4x?x4096xf16>
    %4124 = flow.tensor.transfer %4123 : tensor<4x?x4096xf16>{%dim_1783} to #hal.device.promise<@__device_3>
    %4125 = torch_c.from_builtin_tensor %4124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4126 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1784 = arith.constant 1 : index
    %dim_1785 = tensor.dim %4126, %c1_1784 : tensor<4x?x4096xf16>
    %4127 = flow.tensor.transfer %4126 : tensor<4x?x4096xf16>{%dim_1785} to #hal.device.promise<@__device_3>
    %4128 = torch_c.from_builtin_tensor %4127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4129 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1786 = arith.constant 1 : index
    %dim_1787 = tensor.dim %4129, %c1_1786 : tensor<4x?x4096xf16>
    %4130 = flow.tensor.transfer %4129 : tensor<4x?x4096xf16>{%dim_1787} to #hal.device.promise<@__device_3>
    %4131 = torch_c.from_builtin_tensor %4130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4132 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1788 = arith.constant 1 : index
    %dim_1789 = tensor.dim %4132, %c1_1788 : tensor<4x?x4096xf16>
    %4133 = flow.tensor.transfer %4132 : tensor<4x?x4096xf16>{%dim_1789} to #hal.device.promise<@__device_3>
    %4134 = torch_c.from_builtin_tensor %4133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4135 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1790 = arith.constant 1 : index
    %dim_1791 = tensor.dim %4135, %c1_1790 : tensor<4x?x4096xf16>
    %4136 = flow.tensor.transfer %4135 : tensor<4x?x4096xf16>{%dim_1791} to #hal.device.promise<@__device_3>
    %4137 = torch_c.from_builtin_tensor %4136 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1792 = torch.constant.int 1
    %4138 = torch.aten.add.Tensor %4119, %4122, %int1_1792 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1793 = torch.constant.int 1
    %4139 = torch.aten.add.Tensor %4138, %4125, %int1_1793 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1794 = torch.constant.int 1
    %4140 = torch.aten.add.Tensor %4139, %4004, %int1_1794 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1795 = torch.constant.int 1
    %4141 = torch.aten.add.Tensor %4140, %4128, %int1_1795 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1796 = torch.constant.int 1
    %4142 = torch.aten.add.Tensor %4141, %4131, %int1_1796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1797 = torch.constant.int 1
    %4143 = torch.aten.add.Tensor %4142, %4134, %int1_1797 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1798 = torch.constant.int 1
    %4144 = torch.aten.add.Tensor %4143, %4137, %int1_1798 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4145 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1799 = arith.constant 1 : index
    %dim_1800 = tensor.dim %4145, %c1_1799 : tensor<4x?x4096xf16>
    %4146 = flow.tensor.transfer %4145 : tensor<4x?x4096xf16>{%dim_1800} to #hal.device.promise<@__device_4>
    %4147 = torch_c.from_builtin_tensor %4146 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4148 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1801 = arith.constant 1 : index
    %dim_1802 = tensor.dim %4148, %c1_1801 : tensor<4x?x4096xf16>
    %4149 = flow.tensor.transfer %4148 : tensor<4x?x4096xf16>{%dim_1802} to #hal.device.promise<@__device_4>
    %4150 = torch_c.from_builtin_tensor %4149 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4151 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1803 = arith.constant 1 : index
    %dim_1804 = tensor.dim %4151, %c1_1803 : tensor<4x?x4096xf16>
    %4152 = flow.tensor.transfer %4151 : tensor<4x?x4096xf16>{%dim_1804} to #hal.device.promise<@__device_4>
    %4153 = torch_c.from_builtin_tensor %4152 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4154 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1805 = arith.constant 1 : index
    %dim_1806 = tensor.dim %4154, %c1_1805 : tensor<4x?x4096xf16>
    %4155 = flow.tensor.transfer %4154 : tensor<4x?x4096xf16>{%dim_1806} to #hal.device.promise<@__device_4>
    %4156 = torch_c.from_builtin_tensor %4155 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4157 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1807 = arith.constant 1 : index
    %dim_1808 = tensor.dim %4157, %c1_1807 : tensor<4x?x4096xf16>
    %4158 = flow.tensor.transfer %4157 : tensor<4x?x4096xf16>{%dim_1808} to #hal.device.promise<@__device_4>
    %4159 = torch_c.from_builtin_tensor %4158 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4160 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1809 = arith.constant 1 : index
    %dim_1810 = tensor.dim %4160, %c1_1809 : tensor<4x?x4096xf16>
    %4161 = flow.tensor.transfer %4160 : tensor<4x?x4096xf16>{%dim_1810} to #hal.device.promise<@__device_4>
    %4162 = torch_c.from_builtin_tensor %4161 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4163 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1811 = arith.constant 1 : index
    %dim_1812 = tensor.dim %4163, %c1_1811 : tensor<4x?x4096xf16>
    %4164 = flow.tensor.transfer %4163 : tensor<4x?x4096xf16>{%dim_1812} to #hal.device.promise<@__device_4>
    %4165 = torch_c.from_builtin_tensor %4164 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1813 = torch.constant.int 1
    %4166 = torch.aten.add.Tensor %4147, %4150, %int1_1813 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1814 = torch.constant.int 1
    %4167 = torch.aten.add.Tensor %4166, %4153, %int1_1814 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1815 = torch.constant.int 1
    %4168 = torch.aten.add.Tensor %4167, %4156, %int1_1815 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1816 = torch.constant.int 1
    %4169 = torch.aten.add.Tensor %4168, %4011, %int1_1816 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1817 = torch.constant.int 1
    %4170 = torch.aten.add.Tensor %4169, %4159, %int1_1817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1818 = torch.constant.int 1
    %4171 = torch.aten.add.Tensor %4170, %4162, %int1_1818 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1819 = torch.constant.int 1
    %4172 = torch.aten.add.Tensor %4171, %4165, %int1_1819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4173 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1820 = arith.constant 1 : index
    %dim_1821 = tensor.dim %4173, %c1_1820 : tensor<4x?x4096xf16>
    %4174 = flow.tensor.transfer %4173 : tensor<4x?x4096xf16>{%dim_1821} to #hal.device.promise<@__device_5>
    %4175 = torch_c.from_builtin_tensor %4174 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4176 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1822 = arith.constant 1 : index
    %dim_1823 = tensor.dim %4176, %c1_1822 : tensor<4x?x4096xf16>
    %4177 = flow.tensor.transfer %4176 : tensor<4x?x4096xf16>{%dim_1823} to #hal.device.promise<@__device_5>
    %4178 = torch_c.from_builtin_tensor %4177 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4179 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1824 = arith.constant 1 : index
    %dim_1825 = tensor.dim %4179, %c1_1824 : tensor<4x?x4096xf16>
    %4180 = flow.tensor.transfer %4179 : tensor<4x?x4096xf16>{%dim_1825} to #hal.device.promise<@__device_5>
    %4181 = torch_c.from_builtin_tensor %4180 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4182 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1826 = arith.constant 1 : index
    %dim_1827 = tensor.dim %4182, %c1_1826 : tensor<4x?x4096xf16>
    %4183 = flow.tensor.transfer %4182 : tensor<4x?x4096xf16>{%dim_1827} to #hal.device.promise<@__device_5>
    %4184 = torch_c.from_builtin_tensor %4183 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4185 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1828 = arith.constant 1 : index
    %dim_1829 = tensor.dim %4185, %c1_1828 : tensor<4x?x4096xf16>
    %4186 = flow.tensor.transfer %4185 : tensor<4x?x4096xf16>{%dim_1829} to #hal.device.promise<@__device_5>
    %4187 = torch_c.from_builtin_tensor %4186 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4188 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1830 = arith.constant 1 : index
    %dim_1831 = tensor.dim %4188, %c1_1830 : tensor<4x?x4096xf16>
    %4189 = flow.tensor.transfer %4188 : tensor<4x?x4096xf16>{%dim_1831} to #hal.device.promise<@__device_5>
    %4190 = torch_c.from_builtin_tensor %4189 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4191 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1832 = arith.constant 1 : index
    %dim_1833 = tensor.dim %4191, %c1_1832 : tensor<4x?x4096xf16>
    %4192 = flow.tensor.transfer %4191 : tensor<4x?x4096xf16>{%dim_1833} to #hal.device.promise<@__device_5>
    %4193 = torch_c.from_builtin_tensor %4192 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1834 = torch.constant.int 1
    %4194 = torch.aten.add.Tensor %4175, %4178, %int1_1834 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1835 = torch.constant.int 1
    %4195 = torch.aten.add.Tensor %4194, %4181, %int1_1835 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1836 = torch.constant.int 1
    %4196 = torch.aten.add.Tensor %4195, %4184, %int1_1836 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1837 = torch.constant.int 1
    %4197 = torch.aten.add.Tensor %4196, %4187, %int1_1837 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1838 = torch.constant.int 1
    %4198 = torch.aten.add.Tensor %4197, %4018, %int1_1838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1839 = torch.constant.int 1
    %4199 = torch.aten.add.Tensor %4198, %4190, %int1_1839 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1840 = torch.constant.int 1
    %4200 = torch.aten.add.Tensor %4199, %4193, %int1_1840 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4201 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1841 = arith.constant 1 : index
    %dim_1842 = tensor.dim %4201, %c1_1841 : tensor<4x?x4096xf16>
    %4202 = flow.tensor.transfer %4201 : tensor<4x?x4096xf16>{%dim_1842} to #hal.device.promise<@__device_6>
    %4203 = torch_c.from_builtin_tensor %4202 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4204 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1843 = arith.constant 1 : index
    %dim_1844 = tensor.dim %4204, %c1_1843 : tensor<4x?x4096xf16>
    %4205 = flow.tensor.transfer %4204 : tensor<4x?x4096xf16>{%dim_1844} to #hal.device.promise<@__device_6>
    %4206 = torch_c.from_builtin_tensor %4205 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4207 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1845 = arith.constant 1 : index
    %dim_1846 = tensor.dim %4207, %c1_1845 : tensor<4x?x4096xf16>
    %4208 = flow.tensor.transfer %4207 : tensor<4x?x4096xf16>{%dim_1846} to #hal.device.promise<@__device_6>
    %4209 = torch_c.from_builtin_tensor %4208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4210 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1847 = arith.constant 1 : index
    %dim_1848 = tensor.dim %4210, %c1_1847 : tensor<4x?x4096xf16>
    %4211 = flow.tensor.transfer %4210 : tensor<4x?x4096xf16>{%dim_1848} to #hal.device.promise<@__device_6>
    %4212 = torch_c.from_builtin_tensor %4211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4213 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1849 = arith.constant 1 : index
    %dim_1850 = tensor.dim %4213, %c1_1849 : tensor<4x?x4096xf16>
    %4214 = flow.tensor.transfer %4213 : tensor<4x?x4096xf16>{%dim_1850} to #hal.device.promise<@__device_6>
    %4215 = torch_c.from_builtin_tensor %4214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4216 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1851 = arith.constant 1 : index
    %dim_1852 = tensor.dim %4216, %c1_1851 : tensor<4x?x4096xf16>
    %4217 = flow.tensor.transfer %4216 : tensor<4x?x4096xf16>{%dim_1852} to #hal.device.promise<@__device_6>
    %4218 = torch_c.from_builtin_tensor %4217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4219 = torch_c.to_builtin_tensor %4032 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1853 = arith.constant 1 : index
    %dim_1854 = tensor.dim %4219, %c1_1853 : tensor<4x?x4096xf16>
    %4220 = flow.tensor.transfer %4219 : tensor<4x?x4096xf16>{%dim_1854} to #hal.device.promise<@__device_6>
    %4221 = torch_c.from_builtin_tensor %4220 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1855 = torch.constant.int 1
    %4222 = torch.aten.add.Tensor %4203, %4206, %int1_1855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1856 = torch.constant.int 1
    %4223 = torch.aten.add.Tensor %4222, %4209, %int1_1856 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1857 = torch.constant.int 1
    %4224 = torch.aten.add.Tensor %4223, %4212, %int1_1857 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1858 = torch.constant.int 1
    %4225 = torch.aten.add.Tensor %4224, %4215, %int1_1858 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1859 = torch.constant.int 1
    %4226 = torch.aten.add.Tensor %4225, %4218, %int1_1859 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1860 = torch.constant.int 1
    %4227 = torch.aten.add.Tensor %4226, %4025, %int1_1860 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1861 = torch.constant.int 1
    %4228 = torch.aten.add.Tensor %4227, %4221, %int1_1861 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4229 = torch_c.to_builtin_tensor %3983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1862 = arith.constant 1 : index
    %dim_1863 = tensor.dim %4229, %c1_1862 : tensor<4x?x4096xf16>
    %4230 = flow.tensor.transfer %4229 : tensor<4x?x4096xf16>{%dim_1863} to #hal.device.promise<@__device_7>
    %4231 = torch_c.from_builtin_tensor %4230 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4232 = torch_c.to_builtin_tensor %3990 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1864 = arith.constant 1 : index
    %dim_1865 = tensor.dim %4232, %c1_1864 : tensor<4x?x4096xf16>
    %4233 = flow.tensor.transfer %4232 : tensor<4x?x4096xf16>{%dim_1865} to #hal.device.promise<@__device_7>
    %4234 = torch_c.from_builtin_tensor %4233 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4235 = torch_c.to_builtin_tensor %3997 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1866 = arith.constant 1 : index
    %dim_1867 = tensor.dim %4235, %c1_1866 : tensor<4x?x4096xf16>
    %4236 = flow.tensor.transfer %4235 : tensor<4x?x4096xf16>{%dim_1867} to #hal.device.promise<@__device_7>
    %4237 = torch_c.from_builtin_tensor %4236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4238 = torch_c.to_builtin_tensor %4004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1868 = arith.constant 1 : index
    %dim_1869 = tensor.dim %4238, %c1_1868 : tensor<4x?x4096xf16>
    %4239 = flow.tensor.transfer %4238 : tensor<4x?x4096xf16>{%dim_1869} to #hal.device.promise<@__device_7>
    %4240 = torch_c.from_builtin_tensor %4239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4241 = torch_c.to_builtin_tensor %4011 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1870 = arith.constant 1 : index
    %dim_1871 = tensor.dim %4241, %c1_1870 : tensor<4x?x4096xf16>
    %4242 = flow.tensor.transfer %4241 : tensor<4x?x4096xf16>{%dim_1871} to #hal.device.promise<@__device_7>
    %4243 = torch_c.from_builtin_tensor %4242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4244 = torch_c.to_builtin_tensor %4018 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1872 = arith.constant 1 : index
    %dim_1873 = tensor.dim %4244, %c1_1872 : tensor<4x?x4096xf16>
    %4245 = flow.tensor.transfer %4244 : tensor<4x?x4096xf16>{%dim_1873} to #hal.device.promise<@__device_7>
    %4246 = torch_c.from_builtin_tensor %4245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %4247 = torch_c.to_builtin_tensor %4025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_1874 = arith.constant 1 : index
    %dim_1875 = tensor.dim %4247, %c1_1874 : tensor<4x?x4096xf16>
    %4248 = flow.tensor.transfer %4247 : tensor<4x?x4096xf16>{%dim_1875} to #hal.device.promise<@__device_7>
    %4249 = torch_c.from_builtin_tensor %4248 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1876 = torch.constant.int 1
    %4250 = torch.aten.add.Tensor %4231, %4234, %int1_1876 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1877 = torch.constant.int 1
    %4251 = torch.aten.add.Tensor %4250, %4237, %int1_1877 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1878 = torch.constant.int 1
    %4252 = torch.aten.add.Tensor %4251, %4240, %int1_1878 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1879 = torch.constant.int 1
    %4253 = torch.aten.add.Tensor %4252, %4243, %int1_1879 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1880 = torch.constant.int 1
    %4254 = torch.aten.add.Tensor %4253, %4246, %int1_1880 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1881 = torch.constant.int 1
    %4255 = torch.aten.add.Tensor %4254, %4249, %int1_1881 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1882 = torch.constant.int 1
    %4256 = torch.aten.add.Tensor %4255, %4032, %int1_1882 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1883 = torch.constant.int 1
    %4257 = torch.aten.add.Tensor %3737, %4060, %int1_1883 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1884 = torch.constant.int 1
    %4258 = torch.aten.add.Tensor %3738, %4088, %int1_1884 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1885 = torch.constant.int 1
    %4259 = torch.aten.add.Tensor %3739, %4116, %int1_1885 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1886 = torch.constant.int 1
    %4260 = torch.aten.add.Tensor %3740, %4144, %int1_1886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1887 = torch.constant.int 1
    %4261 = torch.aten.add.Tensor %3741, %4172, %int1_1887 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1888 = torch.constant.int 1
    %4262 = torch.aten.add.Tensor %3742, %4200, %int1_1888 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1889 = torch.constant.int 1
    %4263 = torch.aten.add.Tensor %3743, %4228, %int1_1889 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1890 = torch.constant.int 1
    %4264 = torch.aten.add.Tensor %3744, %4256, %int1_1890 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_1891 = torch.constant.int 6
    %4265 = torch.prims.convert_element_type %4257, %int6_1891 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1892 = torch.constant.int 6
    %4266 = torch.prims.convert_element_type %4258, %int6_1892 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1893 = torch.constant.int 6
    %4267 = torch.prims.convert_element_type %4259, %int6_1893 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1894 = torch.constant.int 6
    %4268 = torch.prims.convert_element_type %4260, %int6_1894 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1895 = torch.constant.int 6
    %4269 = torch.prims.convert_element_type %4261, %int6_1895 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1896 = torch.constant.int 6
    %4270 = torch.prims.convert_element_type %4262, %int6_1896 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1897 = torch.constant.int 6
    %4271 = torch.prims.convert_element_type %4263, %int6_1897 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_1898 = torch.constant.int 6
    %4272 = torch.prims.convert_element_type %4264, %int6_1898 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1899 = torch.constant.int 2
    %4273 = torch.aten.pow.Tensor_Scalar %4265, %int2_1899 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1900 = torch.constant.int 2
    %4274 = torch.aten.pow.Tensor_Scalar %4266, %int2_1900 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1901 = torch.constant.int 2
    %4275 = torch.aten.pow.Tensor_Scalar %4267, %int2_1901 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1902 = torch.constant.int 2
    %4276 = torch.aten.pow.Tensor_Scalar %4268, %int2_1902 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1903 = torch.constant.int 2
    %4277 = torch.aten.pow.Tensor_Scalar %4269, %int2_1903 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1904 = torch.constant.int 2
    %4278 = torch.aten.pow.Tensor_Scalar %4270, %int2_1904 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1905 = torch.constant.int 2
    %4279 = torch.aten.pow.Tensor_Scalar %4271, %int2_1905 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_1906 = torch.constant.int 2
    %4280 = torch.aten.pow.Tensor_Scalar %4272, %int2_1906 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_1907 = torch.constant.int -1
    %4281 = torch.prim.ListConstruct %int-1_1907 : (!torch.int) -> !torch.list<int>
    %true_1908 = torch.constant.bool true
    %none_1909 = torch.constant.none
    %4282 = torch.aten.mean.dim %4273, %4281, %true_1908, %none_1909 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1910 = torch.constant.int -1
    %4283 = torch.prim.ListConstruct %int-1_1910 : (!torch.int) -> !torch.list<int>
    %true_1911 = torch.constant.bool true
    %none_1912 = torch.constant.none
    %4284 = torch.aten.mean.dim %4274, %4283, %true_1911, %none_1912 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1913 = torch.constant.int -1
    %4285 = torch.prim.ListConstruct %int-1_1913 : (!torch.int) -> !torch.list<int>
    %true_1914 = torch.constant.bool true
    %none_1915 = torch.constant.none
    %4286 = torch.aten.mean.dim %4275, %4285, %true_1914, %none_1915 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1916 = torch.constant.int -1
    %4287 = torch.prim.ListConstruct %int-1_1916 : (!torch.int) -> !torch.list<int>
    %true_1917 = torch.constant.bool true
    %none_1918 = torch.constant.none
    %4288 = torch.aten.mean.dim %4276, %4287, %true_1917, %none_1918 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1919 = torch.constant.int -1
    %4289 = torch.prim.ListConstruct %int-1_1919 : (!torch.int) -> !torch.list<int>
    %true_1920 = torch.constant.bool true
    %none_1921 = torch.constant.none
    %4290 = torch.aten.mean.dim %4277, %4289, %true_1920, %none_1921 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1922 = torch.constant.int -1
    %4291 = torch.prim.ListConstruct %int-1_1922 : (!torch.int) -> !torch.list<int>
    %true_1923 = torch.constant.bool true
    %none_1924 = torch.constant.none
    %4292 = torch.aten.mean.dim %4278, %4291, %true_1923, %none_1924 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1925 = torch.constant.int -1
    %4293 = torch.prim.ListConstruct %int-1_1925 : (!torch.int) -> !torch.list<int>
    %true_1926 = torch.constant.bool true
    %none_1927 = torch.constant.none
    %4294 = torch.aten.mean.dim %4279, %4293, %true_1926, %none_1927 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_1928 = torch.constant.int -1
    %4295 = torch.prim.ListConstruct %int-1_1928 : (!torch.int) -> !torch.list<int>
    %true_1929 = torch.constant.bool true
    %none_1930 = torch.constant.none
    %4296 = torch.aten.mean.dim %4280, %4295, %true_1929, %none_1930 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1931 = torch.constant.float 9.9999997473787516E-6
    %int1_1932 = torch.constant.int 1
    %4297 = torch.aten.add.Scalar %4282, %float9.999990e-06_1931, %int1_1932 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1933 = torch.constant.float 9.9999997473787516E-6
    %int1_1934 = torch.constant.int 1
    %4298 = torch.aten.add.Scalar %4284, %float9.999990e-06_1933, %int1_1934 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1935 = torch.constant.float 9.9999997473787516E-6
    %int1_1936 = torch.constant.int 1
    %4299 = torch.aten.add.Scalar %4286, %float9.999990e-06_1935, %int1_1936 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1937 = torch.constant.float 9.9999997473787516E-6
    %int1_1938 = torch.constant.int 1
    %4300 = torch.aten.add.Scalar %4288, %float9.999990e-06_1937, %int1_1938 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1939 = torch.constant.float 9.9999997473787516E-6
    %int1_1940 = torch.constant.int 1
    %4301 = torch.aten.add.Scalar %4290, %float9.999990e-06_1939, %int1_1940 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1941 = torch.constant.float 9.9999997473787516E-6
    %int1_1942 = torch.constant.int 1
    %4302 = torch.aten.add.Scalar %4292, %float9.999990e-06_1941, %int1_1942 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1943 = torch.constant.float 9.9999997473787516E-6
    %int1_1944 = torch.constant.int 1
    %4303 = torch.aten.add.Scalar %4294, %float9.999990e-06_1943, %int1_1944 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_1945 = torch.constant.float 9.9999997473787516E-6
    %int1_1946 = torch.constant.int 1
    %4304 = torch.aten.add.Scalar %4296, %float9.999990e-06_1945, %int1_1946 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4305 = torch.aten.rsqrt %4297 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4306 = torch.aten.rsqrt %4298 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4307 = torch.aten.rsqrt %4299 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4308 = torch.aten.rsqrt %4300 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4309 = torch.aten.rsqrt %4301 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4310 = torch.aten.rsqrt %4302 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4311 = torch.aten.rsqrt %4303 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4312 = torch.aten.rsqrt %4304 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %4312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %4313 = torch.aten.mul.Tensor %4265, %4305 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4314 = torch.aten.mul.Tensor %4266, %4306 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4315 = torch.aten.mul.Tensor %4267, %4307 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4316 = torch.aten.mul.Tensor %4268, %4308 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4317 = torch.aten.mul.Tensor %4269, %4309 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4318 = torch.aten.mul.Tensor %4270, %4310 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4319 = torch.aten.mul.Tensor %4271, %4311 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4320 = torch.aten.mul.Tensor %4272, %4312 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4321 = torch.aten.mul.Tensor %80, %4313 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4322 = torch.aten.mul.Tensor %81, %4314 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4323 = torch.aten.mul.Tensor %82, %4315 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4324 = torch.aten.mul.Tensor %83, %4316 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4325 = torch.aten.mul.Tensor %84, %4317 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4326 = torch.aten.mul.Tensor %85, %4318 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4327 = torch.aten.mul.Tensor %86, %4319 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %4328 = torch.aten.mul.Tensor %87, %4320 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %4328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_1947 = torch.constant.int 5
    %4329 = torch.prims.convert_element_type %4321, %int5_1947 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1948 = torch.constant.int 5
    %4330 = torch.prims.convert_element_type %4322, %int5_1948 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1949 = torch.constant.int 5
    %4331 = torch.prims.convert_element_type %4323, %int5_1949 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1950 = torch.constant.int 5
    %4332 = torch.prims.convert_element_type %4324, %int5_1950 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1951 = torch.constant.int 5
    %4333 = torch.prims.convert_element_type %4325, %int5_1951 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1952 = torch.constant.int 5
    %4334 = torch.prims.convert_element_type %4326, %int5_1952 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1953 = torch.constant.int 5
    %4335 = torch.prims.convert_element_type %4327, %int5_1953 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_1954 = torch.constant.int 5
    %4336 = torch.prims.convert_element_type %4328, %int5_1954 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %4336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_1955 = torch.constant.int 1
    %int0_1956 = torch.constant.int 0
    %4337 = torch.prim.ListConstruct %int1_1955, %int0_1956 : (!torch.int, !torch.int) -> !torch.list<int>
    %4338 = torch.aten.permute %88, %4337 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1957 = torch.constant.int 1
    %int0_1958 = torch.constant.int 0
    %4339 = torch.prim.ListConstruct %int1_1957, %int0_1958 : (!torch.int, !torch.int) -> !torch.list<int>
    %4340 = torch.aten.permute %89, %4339 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1959 = torch.constant.int 1
    %int0_1960 = torch.constant.int 0
    %4341 = torch.prim.ListConstruct %int1_1959, %int0_1960 : (!torch.int, !torch.int) -> !torch.list<int>
    %4342 = torch.aten.permute %90, %4341 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1961 = torch.constant.int 1
    %int0_1962 = torch.constant.int 0
    %4343 = torch.prim.ListConstruct %int1_1961, %int0_1962 : (!torch.int, !torch.int) -> !torch.list<int>
    %4344 = torch.aten.permute %91, %4343 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1963 = torch.constant.int 1
    %int0_1964 = torch.constant.int 0
    %4345 = torch.prim.ListConstruct %int1_1963, %int0_1964 : (!torch.int, !torch.int) -> !torch.list<int>
    %4346 = torch.aten.permute %92, %4345 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1965 = torch.constant.int 1
    %int0_1966 = torch.constant.int 0
    %4347 = torch.prim.ListConstruct %int1_1965, %int0_1966 : (!torch.int, !torch.int) -> !torch.list<int>
    %4348 = torch.aten.permute %93, %4347 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1967 = torch.constant.int 1
    %int0_1968 = torch.constant.int 0
    %4349 = torch.prim.ListConstruct %int1_1967, %int0_1968 : (!torch.int, !torch.int) -> !torch.list<int>
    %4350 = torch.aten.permute %94, %4349 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_1969 = torch.constant.int 1
    %int0_1970 = torch.constant.int 0
    %4351 = torch.prim.ListConstruct %int1_1969, %int0_1970 : (!torch.int, !torch.int) -> !torch.list<int>
    %4352 = torch.aten.permute %95, %4351 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_1971 = torch.constant.int 4
    %4353 = torch.aten.mul.int %int4_1971, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1972 = torch.constant.int 4096
    %4354 = torch.prim.ListConstruct %4353, %int4096_1972 : (!torch.int, !torch.int) -> !torch.list<int>
    %4355 = torch.aten.view %4329, %4354 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4355, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4356 = torch.aten.mm %4355, %4338 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4356, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1973 = torch.constant.int 4
    %int512_1974 = torch.constant.int 512
    %4357 = torch.prim.ListConstruct %int4_1973, %2482, %int512_1974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4358 = torch.aten.view %4356, %4357 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1975 = torch.constant.int 4
    %4359 = torch.aten.mul.int %int4_1975, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1976 = torch.constant.int 4096
    %4360 = torch.prim.ListConstruct %4359, %int4096_1976 : (!torch.int, !torch.int) -> !torch.list<int>
    %4361 = torch.aten.view %4330, %4360 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4361, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4362 = torch.aten.mm %4361, %4340 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4362, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1977 = torch.constant.int 4
    %int512_1978 = torch.constant.int 512
    %4363 = torch.prim.ListConstruct %int4_1977, %2482, %int512_1978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4364 = torch.aten.view %4362, %4363 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1979 = torch.constant.int 4
    %4365 = torch.aten.mul.int %int4_1979, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1980 = torch.constant.int 4096
    %4366 = torch.prim.ListConstruct %4365, %int4096_1980 : (!torch.int, !torch.int) -> !torch.list<int>
    %4367 = torch.aten.view %4331, %4366 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4367, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4368 = torch.aten.mm %4367, %4342 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4368, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1981 = torch.constant.int 4
    %int512_1982 = torch.constant.int 512
    %4369 = torch.prim.ListConstruct %int4_1981, %2482, %int512_1982 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4370 = torch.aten.view %4368, %4369 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1983 = torch.constant.int 4
    %4371 = torch.aten.mul.int %int4_1983, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1984 = torch.constant.int 4096
    %4372 = torch.prim.ListConstruct %4371, %int4096_1984 : (!torch.int, !torch.int) -> !torch.list<int>
    %4373 = torch.aten.view %4332, %4372 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4373, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4374 = torch.aten.mm %4373, %4344 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4374, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1985 = torch.constant.int 4
    %int512_1986 = torch.constant.int 512
    %4375 = torch.prim.ListConstruct %int4_1985, %2482, %int512_1986 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4376 = torch.aten.view %4374, %4375 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1987 = torch.constant.int 4
    %4377 = torch.aten.mul.int %int4_1987, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1988 = torch.constant.int 4096
    %4378 = torch.prim.ListConstruct %4377, %int4096_1988 : (!torch.int, !torch.int) -> !torch.list<int>
    %4379 = torch.aten.view %4333, %4378 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4379, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4380 = torch.aten.mm %4379, %4346 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4380, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1989 = torch.constant.int 4
    %int512_1990 = torch.constant.int 512
    %4381 = torch.prim.ListConstruct %int4_1989, %2482, %int512_1990 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4382 = torch.aten.view %4380, %4381 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1991 = torch.constant.int 4
    %4383 = torch.aten.mul.int %int4_1991, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1992 = torch.constant.int 4096
    %4384 = torch.prim.ListConstruct %4383, %int4096_1992 : (!torch.int, !torch.int) -> !torch.list<int>
    %4385 = torch.aten.view %4334, %4384 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4385, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4386 = torch.aten.mm %4385, %4348 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4386, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1993 = torch.constant.int 4
    %int512_1994 = torch.constant.int 512
    %4387 = torch.prim.ListConstruct %int4_1993, %2482, %int512_1994 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4388 = torch.aten.view %4386, %4387 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1995 = torch.constant.int 4
    %4389 = torch.aten.mul.int %int4_1995, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_1996 = torch.constant.int 4096
    %4390 = torch.prim.ListConstruct %4389, %int4096_1996 : (!torch.int, !torch.int) -> !torch.list<int>
    %4391 = torch.aten.view %4335, %4390 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4391, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4392 = torch.aten.mm %4391, %4350 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4392, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_1997 = torch.constant.int 4
    %int512_1998 = torch.constant.int 512
    %4393 = torch.prim.ListConstruct %int4_1997, %2482, %int512_1998 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4394 = torch.aten.view %4392, %4393 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_1999 = torch.constant.int 4
    %4395 = torch.aten.mul.int %int4_1999, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2000 = torch.constant.int 4096
    %4396 = torch.prim.ListConstruct %4395, %int4096_2000 : (!torch.int, !torch.int) -> !torch.list<int>
    %4397 = torch.aten.view %4336, %4396 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4397, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4398 = torch.aten.mm %4397, %4352 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %4398, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_2001 = torch.constant.int 4
    %int512_2002 = torch.constant.int 512
    %4399 = torch.prim.ListConstruct %int4_2001, %2482, %int512_2002 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4400 = torch.aten.view %4398, %4399 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %4400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_2003 = torch.constant.int 1
    %int0_2004 = torch.constant.int 0
    %4401 = torch.prim.ListConstruct %int1_2003, %int0_2004 : (!torch.int, !torch.int) -> !torch.list<int>
    %4402 = torch.aten.permute %96, %4401 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2005 = torch.constant.int 1
    %int0_2006 = torch.constant.int 0
    %4403 = torch.prim.ListConstruct %int1_2005, %int0_2006 : (!torch.int, !torch.int) -> !torch.list<int>
    %4404 = torch.aten.permute %97, %4403 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2007 = torch.constant.int 1
    %int0_2008 = torch.constant.int 0
    %4405 = torch.prim.ListConstruct %int1_2007, %int0_2008 : (!torch.int, !torch.int) -> !torch.list<int>
    %4406 = torch.aten.permute %98, %4405 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2009 = torch.constant.int 1
    %int0_2010 = torch.constant.int 0
    %4407 = torch.prim.ListConstruct %int1_2009, %int0_2010 : (!torch.int, !torch.int) -> !torch.list<int>
    %4408 = torch.aten.permute %99, %4407 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2011 = torch.constant.int 1
    %int0_2012 = torch.constant.int 0
    %4409 = torch.prim.ListConstruct %int1_2011, %int0_2012 : (!torch.int, !torch.int) -> !torch.list<int>
    %4410 = torch.aten.permute %100, %4409 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2013 = torch.constant.int 1
    %int0_2014 = torch.constant.int 0
    %4411 = torch.prim.ListConstruct %int1_2013, %int0_2014 : (!torch.int, !torch.int) -> !torch.list<int>
    %4412 = torch.aten.permute %101, %4411 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2015 = torch.constant.int 1
    %int0_2016 = torch.constant.int 0
    %4413 = torch.prim.ListConstruct %int1_2015, %int0_2016 : (!torch.int, !torch.int) -> !torch.list<int>
    %4414 = torch.aten.permute %102, %4413 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2017 = torch.constant.int 1
    %int0_2018 = torch.constant.int 0
    %4415 = torch.prim.ListConstruct %int1_2017, %int0_2018 : (!torch.int, !torch.int) -> !torch.list<int>
    %4416 = torch.aten.permute %103, %4415 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_2019 = torch.constant.int 4
    %4417 = torch.aten.mul.int %int4_2019, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2020 = torch.constant.int 4096
    %4418 = torch.prim.ListConstruct %4417, %int4096_2020 : (!torch.int, !torch.int) -> !torch.list<int>
    %4419 = torch.aten.view %4329, %4418 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4419, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4420 = torch.aten.mm %4419, %4402 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4420, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2021 = torch.constant.int 4
    %int128_2022 = torch.constant.int 128
    %4421 = torch.prim.ListConstruct %int4_2021, %2482, %int128_2022 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4422 = torch.aten.view %4420, %4421 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2023 = torch.constant.int 4
    %4423 = torch.aten.mul.int %int4_2023, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2024 = torch.constant.int 4096
    %4424 = torch.prim.ListConstruct %4423, %int4096_2024 : (!torch.int, !torch.int) -> !torch.list<int>
    %4425 = torch.aten.view %4330, %4424 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4425, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4426 = torch.aten.mm %4425, %4404 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4426, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2025 = torch.constant.int 4
    %int128_2026 = torch.constant.int 128
    %4427 = torch.prim.ListConstruct %int4_2025, %2482, %int128_2026 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4428 = torch.aten.view %4426, %4427 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2027 = torch.constant.int 4
    %4429 = torch.aten.mul.int %int4_2027, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2028 = torch.constant.int 4096
    %4430 = torch.prim.ListConstruct %4429, %int4096_2028 : (!torch.int, !torch.int) -> !torch.list<int>
    %4431 = torch.aten.view %4331, %4430 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4431, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4432 = torch.aten.mm %4431, %4406 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4432, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2029 = torch.constant.int 4
    %int128_2030 = torch.constant.int 128
    %4433 = torch.prim.ListConstruct %int4_2029, %2482, %int128_2030 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4434 = torch.aten.view %4432, %4433 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2031 = torch.constant.int 4
    %4435 = torch.aten.mul.int %int4_2031, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2032 = torch.constant.int 4096
    %4436 = torch.prim.ListConstruct %4435, %int4096_2032 : (!torch.int, !torch.int) -> !torch.list<int>
    %4437 = torch.aten.view %4332, %4436 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4437, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4438 = torch.aten.mm %4437, %4408 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4438, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2033 = torch.constant.int 4
    %int128_2034 = torch.constant.int 128
    %4439 = torch.prim.ListConstruct %int4_2033, %2482, %int128_2034 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4440 = torch.aten.view %4438, %4439 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2035 = torch.constant.int 4
    %4441 = torch.aten.mul.int %int4_2035, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2036 = torch.constant.int 4096
    %4442 = torch.prim.ListConstruct %4441, %int4096_2036 : (!torch.int, !torch.int) -> !torch.list<int>
    %4443 = torch.aten.view %4333, %4442 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4443, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4444 = torch.aten.mm %4443, %4410 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4444, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2037 = torch.constant.int 4
    %int128_2038 = torch.constant.int 128
    %4445 = torch.prim.ListConstruct %int4_2037, %2482, %int128_2038 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4446 = torch.aten.view %4444, %4445 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2039 = torch.constant.int 4
    %4447 = torch.aten.mul.int %int4_2039, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2040 = torch.constant.int 4096
    %4448 = torch.prim.ListConstruct %4447, %int4096_2040 : (!torch.int, !torch.int) -> !torch.list<int>
    %4449 = torch.aten.view %4334, %4448 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4449, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4450 = torch.aten.mm %4449, %4412 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4450, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2041 = torch.constant.int 4
    %int128_2042 = torch.constant.int 128
    %4451 = torch.prim.ListConstruct %int4_2041, %2482, %int128_2042 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4452 = torch.aten.view %4450, %4451 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2043 = torch.constant.int 4
    %4453 = torch.aten.mul.int %int4_2043, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2044 = torch.constant.int 4096
    %4454 = torch.prim.ListConstruct %4453, %int4096_2044 : (!torch.int, !torch.int) -> !torch.list<int>
    %4455 = torch.aten.view %4335, %4454 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4455, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4456 = torch.aten.mm %4455, %4414 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4456, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2045 = torch.constant.int 4
    %int128_2046 = torch.constant.int 128
    %4457 = torch.prim.ListConstruct %int4_2045, %2482, %int128_2046 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4458 = torch.aten.view %4456, %4457 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2047 = torch.constant.int 4
    %4459 = torch.aten.mul.int %int4_2047, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2048 = torch.constant.int 4096
    %4460 = torch.prim.ListConstruct %4459, %int4096_2048 : (!torch.int, !torch.int) -> !torch.list<int>
    %4461 = torch.aten.view %4336, %4460 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4461, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4462 = torch.aten.mm %4461, %4416 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4462, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2049 = torch.constant.int 4
    %int128_2050 = torch.constant.int 128
    %4463 = torch.prim.ListConstruct %int4_2049, %2482, %int128_2050 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4464 = torch.aten.view %4462, %4463 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_2051 = torch.constant.int 1
    %int0_2052 = torch.constant.int 0
    %4465 = torch.prim.ListConstruct %int1_2051, %int0_2052 : (!torch.int, !torch.int) -> !torch.list<int>
    %4466 = torch.aten.permute %104, %4465 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2053 = torch.constant.int 1
    %int0_2054 = torch.constant.int 0
    %4467 = torch.prim.ListConstruct %int1_2053, %int0_2054 : (!torch.int, !torch.int) -> !torch.list<int>
    %4468 = torch.aten.permute %105, %4467 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2055 = torch.constant.int 1
    %int0_2056 = torch.constant.int 0
    %4469 = torch.prim.ListConstruct %int1_2055, %int0_2056 : (!torch.int, !torch.int) -> !torch.list<int>
    %4470 = torch.aten.permute %106, %4469 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2057 = torch.constant.int 1
    %int0_2058 = torch.constant.int 0
    %4471 = torch.prim.ListConstruct %int1_2057, %int0_2058 : (!torch.int, !torch.int) -> !torch.list<int>
    %4472 = torch.aten.permute %107, %4471 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2059 = torch.constant.int 1
    %int0_2060 = torch.constant.int 0
    %4473 = torch.prim.ListConstruct %int1_2059, %int0_2060 : (!torch.int, !torch.int) -> !torch.list<int>
    %4474 = torch.aten.permute %108, %4473 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2061 = torch.constant.int 1
    %int0_2062 = torch.constant.int 0
    %4475 = torch.prim.ListConstruct %int1_2061, %int0_2062 : (!torch.int, !torch.int) -> !torch.list<int>
    %4476 = torch.aten.permute %109, %4475 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2063 = torch.constant.int 1
    %int0_2064 = torch.constant.int 0
    %4477 = torch.prim.ListConstruct %int1_2063, %int0_2064 : (!torch.int, !torch.int) -> !torch.list<int>
    %4478 = torch.aten.permute %110, %4477 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_2065 = torch.constant.int 1
    %int0_2066 = torch.constant.int 0
    %4479 = torch.prim.ListConstruct %int1_2065, %int0_2066 : (!torch.int, !torch.int) -> !torch.list<int>
    %4480 = torch.aten.permute %111, %4479 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_2067 = torch.constant.int 4
    %4481 = torch.aten.mul.int %int4_2067, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2068 = torch.constant.int 4096
    %4482 = torch.prim.ListConstruct %4481, %int4096_2068 : (!torch.int, !torch.int) -> !torch.list<int>
    %4483 = torch.aten.view %4329, %4482 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4483, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4484 = torch.aten.mm %4483, %4466 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4484, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2069 = torch.constant.int 4
    %int128_2070 = torch.constant.int 128
    %4485 = torch.prim.ListConstruct %int4_2069, %2482, %int128_2070 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4486 = torch.aten.view %4484, %4485 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2071 = torch.constant.int 4
    %4487 = torch.aten.mul.int %int4_2071, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2072 = torch.constant.int 4096
    %4488 = torch.prim.ListConstruct %4487, %int4096_2072 : (!torch.int, !torch.int) -> !torch.list<int>
    %4489 = torch.aten.view %4330, %4488 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4489, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4490 = torch.aten.mm %4489, %4468 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4490, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2073 = torch.constant.int 4
    %int128_2074 = torch.constant.int 128
    %4491 = torch.prim.ListConstruct %int4_2073, %2482, %int128_2074 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4492 = torch.aten.view %4490, %4491 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2075 = torch.constant.int 4
    %4493 = torch.aten.mul.int %int4_2075, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2076 = torch.constant.int 4096
    %4494 = torch.prim.ListConstruct %4493, %int4096_2076 : (!torch.int, !torch.int) -> !torch.list<int>
    %4495 = torch.aten.view %4331, %4494 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4495, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4496 = torch.aten.mm %4495, %4470 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4496, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2077 = torch.constant.int 4
    %int128_2078 = torch.constant.int 128
    %4497 = torch.prim.ListConstruct %int4_2077, %2482, %int128_2078 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4498 = torch.aten.view %4496, %4497 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2079 = torch.constant.int 4
    %4499 = torch.aten.mul.int %int4_2079, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2080 = torch.constant.int 4096
    %4500 = torch.prim.ListConstruct %4499, %int4096_2080 : (!torch.int, !torch.int) -> !torch.list<int>
    %4501 = torch.aten.view %4332, %4500 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4501, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4502 = torch.aten.mm %4501, %4472 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4502, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2081 = torch.constant.int 4
    %int128_2082 = torch.constant.int 128
    %4503 = torch.prim.ListConstruct %int4_2081, %2482, %int128_2082 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4504 = torch.aten.view %4502, %4503 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2083 = torch.constant.int 4
    %4505 = torch.aten.mul.int %int4_2083, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2084 = torch.constant.int 4096
    %4506 = torch.prim.ListConstruct %4505, %int4096_2084 : (!torch.int, !torch.int) -> !torch.list<int>
    %4507 = torch.aten.view %4333, %4506 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4507, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4508 = torch.aten.mm %4507, %4474 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4508, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2085 = torch.constant.int 4
    %int128_2086 = torch.constant.int 128
    %4509 = torch.prim.ListConstruct %int4_2085, %2482, %int128_2086 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4510 = torch.aten.view %4508, %4509 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2087 = torch.constant.int 4
    %4511 = torch.aten.mul.int %int4_2087, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2088 = torch.constant.int 4096
    %4512 = torch.prim.ListConstruct %4511, %int4096_2088 : (!torch.int, !torch.int) -> !torch.list<int>
    %4513 = torch.aten.view %4334, %4512 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4513, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4514 = torch.aten.mm %4513, %4476 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4514, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2089 = torch.constant.int 4
    %int128_2090 = torch.constant.int 128
    %4515 = torch.prim.ListConstruct %int4_2089, %2482, %int128_2090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4516 = torch.aten.view %4514, %4515 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2091 = torch.constant.int 4
    %4517 = torch.aten.mul.int %int4_2091, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2092 = torch.constant.int 4096
    %4518 = torch.prim.ListConstruct %4517, %int4096_2092 : (!torch.int, !torch.int) -> !torch.list<int>
    %4519 = torch.aten.view %4335, %4518 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4519, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4520 = torch.aten.mm %4519, %4478 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4520, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2093 = torch.constant.int 4
    %int128_2094 = torch.constant.int 128
    %4521 = torch.prim.ListConstruct %int4_2093, %2482, %int128_2094 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4522 = torch.aten.view %4520, %4521 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2095 = torch.constant.int 4
    %4523 = torch.aten.mul.int %int4_2095, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_2096 = torch.constant.int 4096
    %4524 = torch.prim.ListConstruct %4523, %int4096_2096 : (!torch.int, !torch.int) -> !torch.list<int>
    %4525 = torch.aten.view %4336, %4524 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %4525, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %4526 = torch.aten.mm %4525, %4480 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %4526, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_2097 = torch.constant.int 4
    %int128_2098 = torch.constant.int 128
    %4527 = torch.prim.ListConstruct %int4_2097, %2482, %int128_2098 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4528 = torch.aten.view %4526, %4527 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %4528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_2099 = torch.constant.int 4
    %int4_2100 = torch.constant.int 4
    %int128_2101 = torch.constant.int 128
    %4529 = torch.prim.ListConstruct %int4_2099, %2482, %int4_2100, %int128_2101 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4530 = torch.aten.view %4358, %4529 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2102 = torch.constant.int 4
    %int4_2103 = torch.constant.int 4
    %int128_2104 = torch.constant.int 128
    %4531 = torch.prim.ListConstruct %int4_2102, %2482, %int4_2103, %int128_2104 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4532 = torch.aten.view %4364, %4531 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2105 = torch.constant.int 4
    %int4_2106 = torch.constant.int 4
    %int128_2107 = torch.constant.int 128
    %4533 = torch.prim.ListConstruct %int4_2105, %2482, %int4_2106, %int128_2107 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4534 = torch.aten.view %4370, %4533 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2108 = torch.constant.int 4
    %int4_2109 = torch.constant.int 4
    %int128_2110 = torch.constant.int 128
    %4535 = torch.prim.ListConstruct %int4_2108, %2482, %int4_2109, %int128_2110 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4536 = torch.aten.view %4376, %4535 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2111 = torch.constant.int 4
    %int4_2112 = torch.constant.int 4
    %int128_2113 = torch.constant.int 128
    %4537 = torch.prim.ListConstruct %int4_2111, %2482, %int4_2112, %int128_2113 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4538 = torch.aten.view %4382, %4537 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2114 = torch.constant.int 4
    %int4_2115 = torch.constant.int 4
    %int128_2116 = torch.constant.int 128
    %4539 = torch.prim.ListConstruct %int4_2114, %2482, %int4_2115, %int128_2116 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4540 = torch.aten.view %4388, %4539 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2117 = torch.constant.int 4
    %int4_2118 = torch.constant.int 4
    %int128_2119 = torch.constant.int 128
    %4541 = torch.prim.ListConstruct %int4_2117, %2482, %int4_2118, %int128_2119 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4542 = torch.aten.view %4394, %4541 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2120 = torch.constant.int 4
    %int4_2121 = torch.constant.int 4
    %int128_2122 = torch.constant.int 128
    %4543 = torch.prim.ListConstruct %int4_2120, %2482, %int4_2121, %int128_2122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4544 = torch.aten.view %4400, %4543 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2123 = torch.constant.int 4
    %int1_2124 = torch.constant.int 1
    %int128_2125 = torch.constant.int 128
    %4545 = torch.prim.ListConstruct %int4_2123, %2482, %int1_2124, %int128_2125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4546 = torch.aten.view %4422, %4545 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2126 = torch.constant.int 4
    %int1_2127 = torch.constant.int 1
    %int128_2128 = torch.constant.int 128
    %4547 = torch.prim.ListConstruct %int4_2126, %2482, %int1_2127, %int128_2128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4548 = torch.aten.view %4428, %4547 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2129 = torch.constant.int 4
    %int1_2130 = torch.constant.int 1
    %int128_2131 = torch.constant.int 128
    %4549 = torch.prim.ListConstruct %int4_2129, %2482, %int1_2130, %int128_2131 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4550 = torch.aten.view %4434, %4549 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2132 = torch.constant.int 4
    %int1_2133 = torch.constant.int 1
    %int128_2134 = torch.constant.int 128
    %4551 = torch.prim.ListConstruct %int4_2132, %2482, %int1_2133, %int128_2134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4552 = torch.aten.view %4440, %4551 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2135 = torch.constant.int 4
    %int1_2136 = torch.constant.int 1
    %int128_2137 = torch.constant.int 128
    %4553 = torch.prim.ListConstruct %int4_2135, %2482, %int1_2136, %int128_2137 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4554 = torch.aten.view %4446, %4553 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2138 = torch.constant.int 4
    %int1_2139 = torch.constant.int 1
    %int128_2140 = torch.constant.int 128
    %4555 = torch.prim.ListConstruct %int4_2138, %2482, %int1_2139, %int128_2140 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4556 = torch.aten.view %4452, %4555 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2141 = torch.constant.int 4
    %int1_2142 = torch.constant.int 1
    %int128_2143 = torch.constant.int 128
    %4557 = torch.prim.ListConstruct %int4_2141, %2482, %int1_2142, %int128_2143 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4558 = torch.aten.view %4458, %4557 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2144 = torch.constant.int 4
    %int1_2145 = torch.constant.int 1
    %int128_2146 = torch.constant.int 128
    %4559 = torch.prim.ListConstruct %int4_2144, %2482, %int1_2145, %int128_2146 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4560 = torch.aten.view %4464, %4559 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2147 = torch.constant.int 4
    %int1_2148 = torch.constant.int 1
    %int128_2149 = torch.constant.int 128
    %4561 = torch.prim.ListConstruct %int4_2147, %2482, %int1_2148, %int128_2149 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4562 = torch.aten.view %4486, %4561 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2150 = torch.constant.int 4
    %int1_2151 = torch.constant.int 1
    %int128_2152 = torch.constant.int 128
    %4563 = torch.prim.ListConstruct %int4_2150, %2482, %int1_2151, %int128_2152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4564 = torch.aten.view %4492, %4563 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2153 = torch.constant.int 4
    %int1_2154 = torch.constant.int 1
    %int128_2155 = torch.constant.int 128
    %4565 = torch.prim.ListConstruct %int4_2153, %2482, %int1_2154, %int128_2155 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4566 = torch.aten.view %4498, %4565 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2156 = torch.constant.int 4
    %int1_2157 = torch.constant.int 1
    %int128_2158 = torch.constant.int 128
    %4567 = torch.prim.ListConstruct %int4_2156, %2482, %int1_2157, %int128_2158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4568 = torch.aten.view %4504, %4567 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2159 = torch.constant.int 4
    %int1_2160 = torch.constant.int 1
    %int128_2161 = torch.constant.int 128
    %4569 = torch.prim.ListConstruct %int4_2159, %2482, %int1_2160, %int128_2161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4570 = torch.aten.view %4510, %4569 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2162 = torch.constant.int 4
    %int1_2163 = torch.constant.int 1
    %int128_2164 = torch.constant.int 128
    %4571 = torch.prim.ListConstruct %int4_2162, %2482, %int1_2163, %int128_2164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4572 = torch.aten.view %4516, %4571 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2165 = torch.constant.int 4
    %int1_2166 = torch.constant.int 1
    %int128_2167 = torch.constant.int 128
    %4573 = torch.prim.ListConstruct %int4_2165, %2482, %int1_2166, %int128_2167 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4574 = torch.aten.view %4522, %4573 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_2168 = torch.constant.int 4
    %int1_2169 = torch.constant.int 1
    %int128_2170 = torch.constant.int 128
    %4575 = torch.prim.ListConstruct %int4_2168, %2482, %int1_2169, %int128_2170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4576 = torch.aten.view %4528, %4575 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_2171 = torch.constant.int 131072
    %none_2172 = torch.constant.none
    %none_2173 = torch.constant.none
    %cpu_2174 = torch.constant.device "cpu"
    %false_2175 = torch.constant.bool false
    %4577 = torch.aten.arange %int131072_2171, %none_2172, %none_2173, %cpu_2174, %false_2175 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2176 = torch.constant.int 0
    %int128_2177 = torch.constant.int 128
    %int2_2178 = torch.constant.int 2
    %none_2179 = torch.constant.none
    %none_2180 = torch.constant.none
    %cpu_2181 = torch.constant.device "cpu"
    %false_2182 = torch.constant.bool false
    %4578 = torch.aten.arange.start_step %int0_2176, %int128_2177, %int2_2178, %none_2179, %none_2180, %cpu_2181, %false_2182 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2183 = torch.constant.int 0
    %int0_2184 = torch.constant.int 0
    %int64_2185 = torch.constant.int 64
    %int1_2186 = torch.constant.int 1
    %4579 = torch.aten.slice.Tensor %4578, %int0_2183, %int0_2184, %int64_2185, %int1_2186 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2187 = torch.constant.int 6
    %4580 = torch.prims.convert_element_type %4579, %int6_2187 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2188 = torch.constant.int 128
    %4581 = torch.aten.div.Scalar %4580, %int128_2188 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2189 = torch.constant.float 5.000000e+05
    %4582 = torch.aten.pow.Scalar %float5.000000e05_2189, %4581 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4583 = torch.aten.reciprocal %4582 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2190 = torch.constant.float 1.000000e+00
    %4584 = torch.aten.mul.Scalar %4583, %float1.000000e00_2190 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2191 = torch.constant.int 131072
    %int1_2192 = torch.constant.int 1
    %4585 = torch.prim.ListConstruct %int131072_2191, %int1_2192 : (!torch.int, !torch.int) -> !torch.list<int>
    %4586 = torch.aten.view %4577, %4585 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4587 = torch.aten.mul.Tensor %4586, %4584 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4588 = torch.aten.cos %4587 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4589 = torch.aten.sin %4587 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4590 = torch.aten.complex %4588, %4589 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %4591 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4592 = flow.tensor.transfer %4591 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %4593 = torch_c.from_builtin_tensor %4592 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4594 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4595 = flow.tensor.transfer %4594 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %4596 = torch_c.from_builtin_tensor %4595 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4597 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4598 = flow.tensor.transfer %4597 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %4599 = torch_c.from_builtin_tensor %4598 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4600 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4601 = flow.tensor.transfer %4600 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %4602 = torch_c.from_builtin_tensor %4601 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4603 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4604 = flow.tensor.transfer %4603 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %4605 = torch_c.from_builtin_tensor %4604 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4606 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4607 = flow.tensor.transfer %4606 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %4608 = torch_c.from_builtin_tensor %4607 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4609 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4610 = flow.tensor.transfer %4609 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %4611 = torch_c.from_builtin_tensor %4610 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4612 = torch_c.to_builtin_tensor %4590 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4613 = flow.tensor.transfer %4612 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %4614 = torch_c.from_builtin_tensor %4613 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2193 = torch.constant.int 1
    %4615 = torch.aten.size.int %4358, %int1_2193 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2194 = torch.constant.int 0
    %4616 = torch.aten.add.int %int0_2194, %4615 : !torch.int, !torch.int -> !torch.int
    %int0_2195 = torch.constant.int 0
    %int0_2196 = torch.constant.int 0
    %int1_2197 = torch.constant.int 1
    %4617 = torch.aten.slice.Tensor %4593, %int0_2195, %int0_2196, %4616, %int1_2197 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4617, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2198 = torch.constant.int 1
    %int0_2199 = torch.constant.int 0
    %int9223372036854775807_2200 = torch.constant.int 9223372036854775807
    %int1_2201 = torch.constant.int 1
    %4618 = torch.aten.slice.Tensor %4617, %int1_2198, %int0_2199, %int9223372036854775807_2200, %int1_2201 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4618, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2202 = torch.constant.int 0
    %4619 = torch.aten.unsqueeze %4618, %int0_2202 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4619, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2203 = torch.constant.int 2
    %4620 = torch.aten.unsqueeze %4619, %int2_2203 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4620, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2204 = torch.constant.int 3
    %int0_2205 = torch.constant.int 0
    %int9223372036854775807_2206 = torch.constant.int 9223372036854775807
    %int1_2207 = torch.constant.int 1
    %4621 = torch.aten.slice.Tensor %4620, %int3_2204, %int0_2205, %int9223372036854775807_2206, %int1_2207 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4621, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4622 = torch_c.to_builtin_tensor %4530 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2208 = arith.constant 1 : index
    %dim_2209 = tensor.dim %4622, %c1_2208 : tensor<4x?x4x128xf16>
    %4623 = flow.tensor.bitcast %4622 : tensor<4x?x4x128xf16>{%dim_2209} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2209}
    %4624 = torch_c.from_builtin_tensor %4623 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4625 = torch.aten.mul.Tensor %4624, %4621 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4626 = torch_c.to_builtin_tensor %4625 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2210 = arith.constant 1 : index
    %dim_2211 = tensor.dim %4626, %c1_2210 : tensor<4x?x4x64xcomplex<f32>>
    %4627 = flow.tensor.bitcast %4626 : tensor<4x?x4x64xcomplex<f32>>{%dim_2211} -> tensor<4x?x4x128xf32>{%dim_2211}
    %4628 = torch_c.from_builtin_tensor %4627 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2212 = torch.constant.int 5
    %4629 = torch.prims.convert_element_type %4628, %int5_2212 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2213 = torch.constant.int 1
    %4630 = torch.aten.size.int %4364, %int1_2213 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2214 = torch.constant.int 0
    %4631 = torch.aten.add.int %int0_2214, %4630 : !torch.int, !torch.int -> !torch.int
    %int0_2215 = torch.constant.int 0
    %int0_2216 = torch.constant.int 0
    %int1_2217 = torch.constant.int 1
    %4632 = torch.aten.slice.Tensor %4596, %int0_2215, %int0_2216, %4631, %int1_2217 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4632, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2218 = torch.constant.int 1
    %int0_2219 = torch.constant.int 0
    %int9223372036854775807_2220 = torch.constant.int 9223372036854775807
    %int1_2221 = torch.constant.int 1
    %4633 = torch.aten.slice.Tensor %4632, %int1_2218, %int0_2219, %int9223372036854775807_2220, %int1_2221 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4633, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2222 = torch.constant.int 0
    %4634 = torch.aten.unsqueeze %4633, %int0_2222 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4634, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2223 = torch.constant.int 2
    %4635 = torch.aten.unsqueeze %4634, %int2_2223 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4635, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2224 = torch.constant.int 3
    %int0_2225 = torch.constant.int 0
    %int9223372036854775807_2226 = torch.constant.int 9223372036854775807
    %int1_2227 = torch.constant.int 1
    %4636 = torch.aten.slice.Tensor %4635, %int3_2224, %int0_2225, %int9223372036854775807_2226, %int1_2227 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4636, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4637 = torch_c.to_builtin_tensor %4532 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2228 = arith.constant 1 : index
    %dim_2229 = tensor.dim %4637, %c1_2228 : tensor<4x?x4x128xf16>
    %4638 = flow.tensor.bitcast %4637 : tensor<4x?x4x128xf16>{%dim_2229} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2229}
    %4639 = torch_c.from_builtin_tensor %4638 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4640 = torch.aten.mul.Tensor %4639, %4636 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4641 = torch_c.to_builtin_tensor %4640 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2230 = arith.constant 1 : index
    %dim_2231 = tensor.dim %4641, %c1_2230 : tensor<4x?x4x64xcomplex<f32>>
    %4642 = flow.tensor.bitcast %4641 : tensor<4x?x4x64xcomplex<f32>>{%dim_2231} -> tensor<4x?x4x128xf32>{%dim_2231}
    %4643 = torch_c.from_builtin_tensor %4642 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2232 = torch.constant.int 5
    %4644 = torch.prims.convert_element_type %4643, %int5_2232 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2233 = torch.constant.int 1
    %4645 = torch.aten.size.int %4370, %int1_2233 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2234 = torch.constant.int 0
    %4646 = torch.aten.add.int %int0_2234, %4645 : !torch.int, !torch.int -> !torch.int
    %int0_2235 = torch.constant.int 0
    %int0_2236 = torch.constant.int 0
    %int1_2237 = torch.constant.int 1
    %4647 = torch.aten.slice.Tensor %4599, %int0_2235, %int0_2236, %4646, %int1_2237 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4647, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2238 = torch.constant.int 1
    %int0_2239 = torch.constant.int 0
    %int9223372036854775807_2240 = torch.constant.int 9223372036854775807
    %int1_2241 = torch.constant.int 1
    %4648 = torch.aten.slice.Tensor %4647, %int1_2238, %int0_2239, %int9223372036854775807_2240, %int1_2241 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4648, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2242 = torch.constant.int 0
    %4649 = torch.aten.unsqueeze %4648, %int0_2242 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4649, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2243 = torch.constant.int 2
    %4650 = torch.aten.unsqueeze %4649, %int2_2243 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4650, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2244 = torch.constant.int 3
    %int0_2245 = torch.constant.int 0
    %int9223372036854775807_2246 = torch.constant.int 9223372036854775807
    %int1_2247 = torch.constant.int 1
    %4651 = torch.aten.slice.Tensor %4650, %int3_2244, %int0_2245, %int9223372036854775807_2246, %int1_2247 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4651, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4652 = torch_c.to_builtin_tensor %4534 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2248 = arith.constant 1 : index
    %dim_2249 = tensor.dim %4652, %c1_2248 : tensor<4x?x4x128xf16>
    %4653 = flow.tensor.bitcast %4652 : tensor<4x?x4x128xf16>{%dim_2249} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2249}
    %4654 = torch_c.from_builtin_tensor %4653 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4655 = torch.aten.mul.Tensor %4654, %4651 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4656 = torch_c.to_builtin_tensor %4655 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2250 = arith.constant 1 : index
    %dim_2251 = tensor.dim %4656, %c1_2250 : tensor<4x?x4x64xcomplex<f32>>
    %4657 = flow.tensor.bitcast %4656 : tensor<4x?x4x64xcomplex<f32>>{%dim_2251} -> tensor<4x?x4x128xf32>{%dim_2251}
    %4658 = torch_c.from_builtin_tensor %4657 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2252 = torch.constant.int 5
    %4659 = torch.prims.convert_element_type %4658, %int5_2252 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2253 = torch.constant.int 1
    %4660 = torch.aten.size.int %4376, %int1_2253 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2254 = torch.constant.int 0
    %4661 = torch.aten.add.int %int0_2254, %4660 : !torch.int, !torch.int -> !torch.int
    %int0_2255 = torch.constant.int 0
    %int0_2256 = torch.constant.int 0
    %int1_2257 = torch.constant.int 1
    %4662 = torch.aten.slice.Tensor %4602, %int0_2255, %int0_2256, %4661, %int1_2257 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4662, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2258 = torch.constant.int 1
    %int0_2259 = torch.constant.int 0
    %int9223372036854775807_2260 = torch.constant.int 9223372036854775807
    %int1_2261 = torch.constant.int 1
    %4663 = torch.aten.slice.Tensor %4662, %int1_2258, %int0_2259, %int9223372036854775807_2260, %int1_2261 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4663, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2262 = torch.constant.int 0
    %4664 = torch.aten.unsqueeze %4663, %int0_2262 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4664, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2263 = torch.constant.int 2
    %4665 = torch.aten.unsqueeze %4664, %int2_2263 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4665, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2264 = torch.constant.int 3
    %int0_2265 = torch.constant.int 0
    %int9223372036854775807_2266 = torch.constant.int 9223372036854775807
    %int1_2267 = torch.constant.int 1
    %4666 = torch.aten.slice.Tensor %4665, %int3_2264, %int0_2265, %int9223372036854775807_2266, %int1_2267 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4666, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4667 = torch_c.to_builtin_tensor %4536 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2268 = arith.constant 1 : index
    %dim_2269 = tensor.dim %4667, %c1_2268 : tensor<4x?x4x128xf16>
    %4668 = flow.tensor.bitcast %4667 : tensor<4x?x4x128xf16>{%dim_2269} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2269}
    %4669 = torch_c.from_builtin_tensor %4668 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4670 = torch.aten.mul.Tensor %4669, %4666 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4671 = torch_c.to_builtin_tensor %4670 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2270 = arith.constant 1 : index
    %dim_2271 = tensor.dim %4671, %c1_2270 : tensor<4x?x4x64xcomplex<f32>>
    %4672 = flow.tensor.bitcast %4671 : tensor<4x?x4x64xcomplex<f32>>{%dim_2271} -> tensor<4x?x4x128xf32>{%dim_2271}
    %4673 = torch_c.from_builtin_tensor %4672 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2272 = torch.constant.int 5
    %4674 = torch.prims.convert_element_type %4673, %int5_2272 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2273 = torch.constant.int 1
    %4675 = torch.aten.size.int %4382, %int1_2273 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2274 = torch.constant.int 0
    %4676 = torch.aten.add.int %int0_2274, %4675 : !torch.int, !torch.int -> !torch.int
    %int0_2275 = torch.constant.int 0
    %int0_2276 = torch.constant.int 0
    %int1_2277 = torch.constant.int 1
    %4677 = torch.aten.slice.Tensor %4605, %int0_2275, %int0_2276, %4676, %int1_2277 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4677, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2278 = torch.constant.int 1
    %int0_2279 = torch.constant.int 0
    %int9223372036854775807_2280 = torch.constant.int 9223372036854775807
    %int1_2281 = torch.constant.int 1
    %4678 = torch.aten.slice.Tensor %4677, %int1_2278, %int0_2279, %int9223372036854775807_2280, %int1_2281 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4678, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2282 = torch.constant.int 0
    %4679 = torch.aten.unsqueeze %4678, %int0_2282 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4679, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2283 = torch.constant.int 2
    %4680 = torch.aten.unsqueeze %4679, %int2_2283 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4680, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2284 = torch.constant.int 3
    %int0_2285 = torch.constant.int 0
    %int9223372036854775807_2286 = torch.constant.int 9223372036854775807
    %int1_2287 = torch.constant.int 1
    %4681 = torch.aten.slice.Tensor %4680, %int3_2284, %int0_2285, %int9223372036854775807_2286, %int1_2287 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4681, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4682 = torch_c.to_builtin_tensor %4538 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2288 = arith.constant 1 : index
    %dim_2289 = tensor.dim %4682, %c1_2288 : tensor<4x?x4x128xf16>
    %4683 = flow.tensor.bitcast %4682 : tensor<4x?x4x128xf16>{%dim_2289} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2289}
    %4684 = torch_c.from_builtin_tensor %4683 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4685 = torch.aten.mul.Tensor %4684, %4681 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4686 = torch_c.to_builtin_tensor %4685 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2290 = arith.constant 1 : index
    %dim_2291 = tensor.dim %4686, %c1_2290 : tensor<4x?x4x64xcomplex<f32>>
    %4687 = flow.tensor.bitcast %4686 : tensor<4x?x4x64xcomplex<f32>>{%dim_2291} -> tensor<4x?x4x128xf32>{%dim_2291}
    %4688 = torch_c.from_builtin_tensor %4687 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2292 = torch.constant.int 5
    %4689 = torch.prims.convert_element_type %4688, %int5_2292 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2293 = torch.constant.int 1
    %4690 = torch.aten.size.int %4388, %int1_2293 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2294 = torch.constant.int 0
    %4691 = torch.aten.add.int %int0_2294, %4690 : !torch.int, !torch.int -> !torch.int
    %int0_2295 = torch.constant.int 0
    %int0_2296 = torch.constant.int 0
    %int1_2297 = torch.constant.int 1
    %4692 = torch.aten.slice.Tensor %4608, %int0_2295, %int0_2296, %4691, %int1_2297 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4692, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2298 = torch.constant.int 1
    %int0_2299 = torch.constant.int 0
    %int9223372036854775807_2300 = torch.constant.int 9223372036854775807
    %int1_2301 = torch.constant.int 1
    %4693 = torch.aten.slice.Tensor %4692, %int1_2298, %int0_2299, %int9223372036854775807_2300, %int1_2301 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4693, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2302 = torch.constant.int 0
    %4694 = torch.aten.unsqueeze %4693, %int0_2302 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4694, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2303 = torch.constant.int 2
    %4695 = torch.aten.unsqueeze %4694, %int2_2303 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4695, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2304 = torch.constant.int 3
    %int0_2305 = torch.constant.int 0
    %int9223372036854775807_2306 = torch.constant.int 9223372036854775807
    %int1_2307 = torch.constant.int 1
    %4696 = torch.aten.slice.Tensor %4695, %int3_2304, %int0_2305, %int9223372036854775807_2306, %int1_2307 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4696, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4697 = torch_c.to_builtin_tensor %4540 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2308 = arith.constant 1 : index
    %dim_2309 = tensor.dim %4697, %c1_2308 : tensor<4x?x4x128xf16>
    %4698 = flow.tensor.bitcast %4697 : tensor<4x?x4x128xf16>{%dim_2309} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2309}
    %4699 = torch_c.from_builtin_tensor %4698 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4700 = torch.aten.mul.Tensor %4699, %4696 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4701 = torch_c.to_builtin_tensor %4700 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2310 = arith.constant 1 : index
    %dim_2311 = tensor.dim %4701, %c1_2310 : tensor<4x?x4x64xcomplex<f32>>
    %4702 = flow.tensor.bitcast %4701 : tensor<4x?x4x64xcomplex<f32>>{%dim_2311} -> tensor<4x?x4x128xf32>{%dim_2311}
    %4703 = torch_c.from_builtin_tensor %4702 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2312 = torch.constant.int 5
    %4704 = torch.prims.convert_element_type %4703, %int5_2312 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2313 = torch.constant.int 1
    %4705 = torch.aten.size.int %4394, %int1_2313 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2314 = torch.constant.int 0
    %4706 = torch.aten.add.int %int0_2314, %4705 : !torch.int, !torch.int -> !torch.int
    %int0_2315 = torch.constant.int 0
    %int0_2316 = torch.constant.int 0
    %int1_2317 = torch.constant.int 1
    %4707 = torch.aten.slice.Tensor %4611, %int0_2315, %int0_2316, %4706, %int1_2317 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4707, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2318 = torch.constant.int 1
    %int0_2319 = torch.constant.int 0
    %int9223372036854775807_2320 = torch.constant.int 9223372036854775807
    %int1_2321 = torch.constant.int 1
    %4708 = torch.aten.slice.Tensor %4707, %int1_2318, %int0_2319, %int9223372036854775807_2320, %int1_2321 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4708, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2322 = torch.constant.int 0
    %4709 = torch.aten.unsqueeze %4708, %int0_2322 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4709, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2323 = torch.constant.int 2
    %4710 = torch.aten.unsqueeze %4709, %int2_2323 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4710, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2324 = torch.constant.int 3
    %int0_2325 = torch.constant.int 0
    %int9223372036854775807_2326 = torch.constant.int 9223372036854775807
    %int1_2327 = torch.constant.int 1
    %4711 = torch.aten.slice.Tensor %4710, %int3_2324, %int0_2325, %int9223372036854775807_2326, %int1_2327 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4711, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4712 = torch_c.to_builtin_tensor %4542 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2328 = arith.constant 1 : index
    %dim_2329 = tensor.dim %4712, %c1_2328 : tensor<4x?x4x128xf16>
    %4713 = flow.tensor.bitcast %4712 : tensor<4x?x4x128xf16>{%dim_2329} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2329}
    %4714 = torch_c.from_builtin_tensor %4713 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4715 = torch.aten.mul.Tensor %4714, %4711 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4716 = torch_c.to_builtin_tensor %4715 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2330 = arith.constant 1 : index
    %dim_2331 = tensor.dim %4716, %c1_2330 : tensor<4x?x4x64xcomplex<f32>>
    %4717 = flow.tensor.bitcast %4716 : tensor<4x?x4x64xcomplex<f32>>{%dim_2331} -> tensor<4x?x4x128xf32>{%dim_2331}
    %4718 = torch_c.from_builtin_tensor %4717 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2332 = torch.constant.int 5
    %4719 = torch.prims.convert_element_type %4718, %int5_2332 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_2333 = torch.constant.int 1
    %4720 = torch.aten.size.int %4400, %int1_2333 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_2334 = torch.constant.int 0
    %4721 = torch.aten.add.int %int0_2334, %4720 : !torch.int, !torch.int -> !torch.int
    %int0_2335 = torch.constant.int 0
    %int0_2336 = torch.constant.int 0
    %int1_2337 = torch.constant.int 1
    %4722 = torch.aten.slice.Tensor %4614, %int0_2335, %int0_2336, %4721, %int1_2337 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4722, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2338 = torch.constant.int 1
    %int0_2339 = torch.constant.int 0
    %int9223372036854775807_2340 = torch.constant.int 9223372036854775807
    %int1_2341 = torch.constant.int 1
    %4723 = torch.aten.slice.Tensor %4722, %int1_2338, %int0_2339, %int9223372036854775807_2340, %int1_2341 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4723, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2342 = torch.constant.int 0
    %4724 = torch.aten.unsqueeze %4723, %int0_2342 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4724, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2343 = torch.constant.int 2
    %4725 = torch.aten.unsqueeze %4724, %int2_2343 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4725, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2344 = torch.constant.int 3
    %int0_2345 = torch.constant.int 0
    %int9223372036854775807_2346 = torch.constant.int 9223372036854775807
    %int1_2347 = torch.constant.int 1
    %4726 = torch.aten.slice.Tensor %4725, %int3_2344, %int0_2345, %int9223372036854775807_2346, %int1_2347 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4726, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4727 = torch_c.to_builtin_tensor %4544 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_2348 = arith.constant 1 : index
    %dim_2349 = tensor.dim %4727, %c1_2348 : tensor<4x?x4x128xf16>
    %4728 = flow.tensor.bitcast %4727 : tensor<4x?x4x128xf16>{%dim_2349} -> tensor<4x?x4x64xcomplex<f16>>{%dim_2349}
    %4729 = torch_c.from_builtin_tensor %4728 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %4729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %4730 = torch.aten.mul.Tensor %4729, %4726 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %4730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %4731 = torch_c.to_builtin_tensor %4730 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_2350 = arith.constant 1 : index
    %dim_2351 = tensor.dim %4731, %c1_2350 : tensor<4x?x4x64xcomplex<f32>>
    %4732 = flow.tensor.bitcast %4731 : tensor<4x?x4x64xcomplex<f32>>{%dim_2351} -> tensor<4x?x4x128xf32>{%dim_2351}
    %4733 = torch_c.from_builtin_tensor %4732 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %4733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_2352 = torch.constant.int 5
    %4734 = torch.prims.convert_element_type %4733, %int5_2352 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %4734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_2353 = torch.constant.int 131072
    %none_2354 = torch.constant.none
    %none_2355 = torch.constant.none
    %cpu_2356 = torch.constant.device "cpu"
    %false_2357 = torch.constant.bool false
    %4735 = torch.aten.arange %int131072_2353, %none_2354, %none_2355, %cpu_2356, %false_2357 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_2358 = torch.constant.int 0
    %int128_2359 = torch.constant.int 128
    %int2_2360 = torch.constant.int 2
    %none_2361 = torch.constant.none
    %none_2362 = torch.constant.none
    %cpu_2363 = torch.constant.device "cpu"
    %false_2364 = torch.constant.bool false
    %4736 = torch.aten.arange.start_step %int0_2358, %int128_2359, %int2_2360, %none_2361, %none_2362, %cpu_2363, %false_2364 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_2365 = torch.constant.int 0
    %int0_2366 = torch.constant.int 0
    %int64_2367 = torch.constant.int 64
    %int1_2368 = torch.constant.int 1
    %4737 = torch.aten.slice.Tensor %4736, %int0_2365, %int0_2366, %int64_2367, %int1_2368 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_2369 = torch.constant.int 6
    %4738 = torch.prims.convert_element_type %4737, %int6_2369 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_2370 = torch.constant.int 128
    %4739 = torch.aten.div.Scalar %4738, %int128_2370 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_2371 = torch.constant.float 5.000000e+05
    %4740 = torch.aten.pow.Scalar %float5.000000e05_2371, %4739 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %4741 = torch.aten.reciprocal %4740 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_2372 = torch.constant.float 1.000000e+00
    %4742 = torch.aten.mul.Scalar %4741, %float1.000000e00_2372 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_2373 = torch.constant.int 131072
    %int1_2374 = torch.constant.int 1
    %4743 = torch.prim.ListConstruct %int131072_2373, %int1_2374 : (!torch.int, !torch.int) -> !torch.list<int>
    %4744 = torch.aten.view %4735, %4743 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %4745 = torch.aten.mul.Tensor %4744, %4742 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %4746 = torch.aten.cos %4745 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4747 = torch.aten.sin %4745 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %4748 = torch.aten.complex %4746, %4747 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %4749 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4750 = flow.tensor.transfer %4749 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %4751 = torch_c.from_builtin_tensor %4750 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4752 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4753 = flow.tensor.transfer %4752 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %4754 = torch_c.from_builtin_tensor %4753 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4755 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4756 = flow.tensor.transfer %4755 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %4757 = torch_c.from_builtin_tensor %4756 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4758 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4759 = flow.tensor.transfer %4758 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %4760 = torch_c.from_builtin_tensor %4759 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4761 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4762 = flow.tensor.transfer %4761 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %4763 = torch_c.from_builtin_tensor %4762 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4764 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4765 = flow.tensor.transfer %4764 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %4766 = torch_c.from_builtin_tensor %4765 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4767 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4768 = flow.tensor.transfer %4767 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %4769 = torch_c.from_builtin_tensor %4768 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %4770 = torch_c.to_builtin_tensor %4748 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %4771 = flow.tensor.transfer %4770 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %4772 = torch_c.from_builtin_tensor %4771 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_2375 = torch.constant.int 1
    %4773 = torch.aten.size.int %4422, %int1_2375 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2376 = torch.constant.int 0
    %4774 = torch.aten.add.int %int0_2376, %4773 : !torch.int, !torch.int -> !torch.int
    %int0_2377 = torch.constant.int 0
    %int0_2378 = torch.constant.int 0
    %int1_2379 = torch.constant.int 1
    %4775 = torch.aten.slice.Tensor %4751, %int0_2377, %int0_2378, %4774, %int1_2379 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4775, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2380 = torch.constant.int 1
    %int0_2381 = torch.constant.int 0
    %int9223372036854775807_2382 = torch.constant.int 9223372036854775807
    %int1_2383 = torch.constant.int 1
    %4776 = torch.aten.slice.Tensor %4775, %int1_2380, %int0_2381, %int9223372036854775807_2382, %int1_2383 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4776, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2384 = torch.constant.int 0
    %4777 = torch.aten.unsqueeze %4776, %int0_2384 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4777, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2385 = torch.constant.int 2
    %4778 = torch.aten.unsqueeze %4777, %int2_2385 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4778, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2386 = torch.constant.int 3
    %int0_2387 = torch.constant.int 0
    %int9223372036854775807_2388 = torch.constant.int 9223372036854775807
    %int1_2389 = torch.constant.int 1
    %4779 = torch.aten.slice.Tensor %4778, %int3_2386, %int0_2387, %int9223372036854775807_2388, %int1_2389 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4779, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4780 = torch_c.to_builtin_tensor %4546 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2390 = arith.constant 1 : index
    %dim_2391 = tensor.dim %4780, %c1_2390 : tensor<4x?x1x128xf16>
    %4781 = flow.tensor.bitcast %4780 : tensor<4x?x1x128xf16>{%dim_2391} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2391}
    %4782 = torch_c.from_builtin_tensor %4781 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4783 = torch.aten.mul.Tensor %4782, %4779 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4784 = torch_c.to_builtin_tensor %4783 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2392 = arith.constant 1 : index
    %dim_2393 = tensor.dim %4784, %c1_2392 : tensor<4x?x1x64xcomplex<f32>>
    %4785 = flow.tensor.bitcast %4784 : tensor<4x?x1x64xcomplex<f32>>{%dim_2393} -> tensor<4x?x1x128xf32>{%dim_2393}
    %4786 = torch_c.from_builtin_tensor %4785 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2394 = torch.constant.int 5
    %4787 = torch.prims.convert_element_type %4786, %int5_2394 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2395 = torch.constant.int 1
    %4788 = torch.aten.size.int %4428, %int1_2395 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2396 = torch.constant.int 0
    %4789 = torch.aten.add.int %int0_2396, %4788 : !torch.int, !torch.int -> !torch.int
    %int0_2397 = torch.constant.int 0
    %int0_2398 = torch.constant.int 0
    %int1_2399 = torch.constant.int 1
    %4790 = torch.aten.slice.Tensor %4754, %int0_2397, %int0_2398, %4789, %int1_2399 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4790, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2400 = torch.constant.int 1
    %int0_2401 = torch.constant.int 0
    %int9223372036854775807_2402 = torch.constant.int 9223372036854775807
    %int1_2403 = torch.constant.int 1
    %4791 = torch.aten.slice.Tensor %4790, %int1_2400, %int0_2401, %int9223372036854775807_2402, %int1_2403 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4791, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2404 = torch.constant.int 0
    %4792 = torch.aten.unsqueeze %4791, %int0_2404 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4792, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2405 = torch.constant.int 2
    %4793 = torch.aten.unsqueeze %4792, %int2_2405 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4793, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2406 = torch.constant.int 3
    %int0_2407 = torch.constant.int 0
    %int9223372036854775807_2408 = torch.constant.int 9223372036854775807
    %int1_2409 = torch.constant.int 1
    %4794 = torch.aten.slice.Tensor %4793, %int3_2406, %int0_2407, %int9223372036854775807_2408, %int1_2409 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4794, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4795 = torch_c.to_builtin_tensor %4548 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2410 = arith.constant 1 : index
    %dim_2411 = tensor.dim %4795, %c1_2410 : tensor<4x?x1x128xf16>
    %4796 = flow.tensor.bitcast %4795 : tensor<4x?x1x128xf16>{%dim_2411} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2411}
    %4797 = torch_c.from_builtin_tensor %4796 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4798 = torch.aten.mul.Tensor %4797, %4794 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4799 = torch_c.to_builtin_tensor %4798 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2412 = arith.constant 1 : index
    %dim_2413 = tensor.dim %4799, %c1_2412 : tensor<4x?x1x64xcomplex<f32>>
    %4800 = flow.tensor.bitcast %4799 : tensor<4x?x1x64xcomplex<f32>>{%dim_2413} -> tensor<4x?x1x128xf32>{%dim_2413}
    %4801 = torch_c.from_builtin_tensor %4800 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2414 = torch.constant.int 5
    %4802 = torch.prims.convert_element_type %4801, %int5_2414 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2415 = torch.constant.int 1
    %4803 = torch.aten.size.int %4434, %int1_2415 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2416 = torch.constant.int 0
    %4804 = torch.aten.add.int %int0_2416, %4803 : !torch.int, !torch.int -> !torch.int
    %int0_2417 = torch.constant.int 0
    %int0_2418 = torch.constant.int 0
    %int1_2419 = torch.constant.int 1
    %4805 = torch.aten.slice.Tensor %4757, %int0_2417, %int0_2418, %4804, %int1_2419 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4805, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2420 = torch.constant.int 1
    %int0_2421 = torch.constant.int 0
    %int9223372036854775807_2422 = torch.constant.int 9223372036854775807
    %int1_2423 = torch.constant.int 1
    %4806 = torch.aten.slice.Tensor %4805, %int1_2420, %int0_2421, %int9223372036854775807_2422, %int1_2423 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4806, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2424 = torch.constant.int 0
    %4807 = torch.aten.unsqueeze %4806, %int0_2424 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4807, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2425 = torch.constant.int 2
    %4808 = torch.aten.unsqueeze %4807, %int2_2425 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4808, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2426 = torch.constant.int 3
    %int0_2427 = torch.constant.int 0
    %int9223372036854775807_2428 = torch.constant.int 9223372036854775807
    %int1_2429 = torch.constant.int 1
    %4809 = torch.aten.slice.Tensor %4808, %int3_2426, %int0_2427, %int9223372036854775807_2428, %int1_2429 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4809, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4810 = torch_c.to_builtin_tensor %4550 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2430 = arith.constant 1 : index
    %dim_2431 = tensor.dim %4810, %c1_2430 : tensor<4x?x1x128xf16>
    %4811 = flow.tensor.bitcast %4810 : tensor<4x?x1x128xf16>{%dim_2431} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2431}
    %4812 = torch_c.from_builtin_tensor %4811 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4813 = torch.aten.mul.Tensor %4812, %4809 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4814 = torch_c.to_builtin_tensor %4813 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2432 = arith.constant 1 : index
    %dim_2433 = tensor.dim %4814, %c1_2432 : tensor<4x?x1x64xcomplex<f32>>
    %4815 = flow.tensor.bitcast %4814 : tensor<4x?x1x64xcomplex<f32>>{%dim_2433} -> tensor<4x?x1x128xf32>{%dim_2433}
    %4816 = torch_c.from_builtin_tensor %4815 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2434 = torch.constant.int 5
    %4817 = torch.prims.convert_element_type %4816, %int5_2434 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2435 = torch.constant.int 1
    %4818 = torch.aten.size.int %4440, %int1_2435 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2436 = torch.constant.int 0
    %4819 = torch.aten.add.int %int0_2436, %4818 : !torch.int, !torch.int -> !torch.int
    %int0_2437 = torch.constant.int 0
    %int0_2438 = torch.constant.int 0
    %int1_2439 = torch.constant.int 1
    %4820 = torch.aten.slice.Tensor %4760, %int0_2437, %int0_2438, %4819, %int1_2439 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4820, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2440 = torch.constant.int 1
    %int0_2441 = torch.constant.int 0
    %int9223372036854775807_2442 = torch.constant.int 9223372036854775807
    %int1_2443 = torch.constant.int 1
    %4821 = torch.aten.slice.Tensor %4820, %int1_2440, %int0_2441, %int9223372036854775807_2442, %int1_2443 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4821, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2444 = torch.constant.int 0
    %4822 = torch.aten.unsqueeze %4821, %int0_2444 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4822, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2445 = torch.constant.int 2
    %4823 = torch.aten.unsqueeze %4822, %int2_2445 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4823, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2446 = torch.constant.int 3
    %int0_2447 = torch.constant.int 0
    %int9223372036854775807_2448 = torch.constant.int 9223372036854775807
    %int1_2449 = torch.constant.int 1
    %4824 = torch.aten.slice.Tensor %4823, %int3_2446, %int0_2447, %int9223372036854775807_2448, %int1_2449 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4824, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4825 = torch_c.to_builtin_tensor %4552 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2450 = arith.constant 1 : index
    %dim_2451 = tensor.dim %4825, %c1_2450 : tensor<4x?x1x128xf16>
    %4826 = flow.tensor.bitcast %4825 : tensor<4x?x1x128xf16>{%dim_2451} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2451}
    %4827 = torch_c.from_builtin_tensor %4826 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4828 = torch.aten.mul.Tensor %4827, %4824 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4829 = torch_c.to_builtin_tensor %4828 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2452 = arith.constant 1 : index
    %dim_2453 = tensor.dim %4829, %c1_2452 : tensor<4x?x1x64xcomplex<f32>>
    %4830 = flow.tensor.bitcast %4829 : tensor<4x?x1x64xcomplex<f32>>{%dim_2453} -> tensor<4x?x1x128xf32>{%dim_2453}
    %4831 = torch_c.from_builtin_tensor %4830 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2454 = torch.constant.int 5
    %4832 = torch.prims.convert_element_type %4831, %int5_2454 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2455 = torch.constant.int 1
    %4833 = torch.aten.size.int %4446, %int1_2455 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2456 = torch.constant.int 0
    %4834 = torch.aten.add.int %int0_2456, %4833 : !torch.int, !torch.int -> !torch.int
    %int0_2457 = torch.constant.int 0
    %int0_2458 = torch.constant.int 0
    %int1_2459 = torch.constant.int 1
    %4835 = torch.aten.slice.Tensor %4763, %int0_2457, %int0_2458, %4834, %int1_2459 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4835, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2460 = torch.constant.int 1
    %int0_2461 = torch.constant.int 0
    %int9223372036854775807_2462 = torch.constant.int 9223372036854775807
    %int1_2463 = torch.constant.int 1
    %4836 = torch.aten.slice.Tensor %4835, %int1_2460, %int0_2461, %int9223372036854775807_2462, %int1_2463 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4836, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2464 = torch.constant.int 0
    %4837 = torch.aten.unsqueeze %4836, %int0_2464 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4837, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2465 = torch.constant.int 2
    %4838 = torch.aten.unsqueeze %4837, %int2_2465 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4838, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2466 = torch.constant.int 3
    %int0_2467 = torch.constant.int 0
    %int9223372036854775807_2468 = torch.constant.int 9223372036854775807
    %int1_2469 = torch.constant.int 1
    %4839 = torch.aten.slice.Tensor %4838, %int3_2466, %int0_2467, %int9223372036854775807_2468, %int1_2469 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4839, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4840 = torch_c.to_builtin_tensor %4554 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2470 = arith.constant 1 : index
    %dim_2471 = tensor.dim %4840, %c1_2470 : tensor<4x?x1x128xf16>
    %4841 = flow.tensor.bitcast %4840 : tensor<4x?x1x128xf16>{%dim_2471} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2471}
    %4842 = torch_c.from_builtin_tensor %4841 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4843 = torch.aten.mul.Tensor %4842, %4839 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4844 = torch_c.to_builtin_tensor %4843 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2472 = arith.constant 1 : index
    %dim_2473 = tensor.dim %4844, %c1_2472 : tensor<4x?x1x64xcomplex<f32>>
    %4845 = flow.tensor.bitcast %4844 : tensor<4x?x1x64xcomplex<f32>>{%dim_2473} -> tensor<4x?x1x128xf32>{%dim_2473}
    %4846 = torch_c.from_builtin_tensor %4845 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2474 = torch.constant.int 5
    %4847 = torch.prims.convert_element_type %4846, %int5_2474 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2475 = torch.constant.int 1
    %4848 = torch.aten.size.int %4452, %int1_2475 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2476 = torch.constant.int 0
    %4849 = torch.aten.add.int %int0_2476, %4848 : !torch.int, !torch.int -> !torch.int
    %int0_2477 = torch.constant.int 0
    %int0_2478 = torch.constant.int 0
    %int1_2479 = torch.constant.int 1
    %4850 = torch.aten.slice.Tensor %4766, %int0_2477, %int0_2478, %4849, %int1_2479 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4850, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2480 = torch.constant.int 1
    %int0_2481 = torch.constant.int 0
    %int9223372036854775807_2482 = torch.constant.int 9223372036854775807
    %int1_2483 = torch.constant.int 1
    %4851 = torch.aten.slice.Tensor %4850, %int1_2480, %int0_2481, %int9223372036854775807_2482, %int1_2483 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4851, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2484 = torch.constant.int 0
    %4852 = torch.aten.unsqueeze %4851, %int0_2484 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4852, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2485 = torch.constant.int 2
    %4853 = torch.aten.unsqueeze %4852, %int2_2485 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4853, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2486 = torch.constant.int 3
    %int0_2487 = torch.constant.int 0
    %int9223372036854775807_2488 = torch.constant.int 9223372036854775807
    %int1_2489 = torch.constant.int 1
    %4854 = torch.aten.slice.Tensor %4853, %int3_2486, %int0_2487, %int9223372036854775807_2488, %int1_2489 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4854, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4855 = torch_c.to_builtin_tensor %4556 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2490 = arith.constant 1 : index
    %dim_2491 = tensor.dim %4855, %c1_2490 : tensor<4x?x1x128xf16>
    %4856 = flow.tensor.bitcast %4855 : tensor<4x?x1x128xf16>{%dim_2491} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2491}
    %4857 = torch_c.from_builtin_tensor %4856 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4858 = torch.aten.mul.Tensor %4857, %4854 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4859 = torch_c.to_builtin_tensor %4858 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2492 = arith.constant 1 : index
    %dim_2493 = tensor.dim %4859, %c1_2492 : tensor<4x?x1x64xcomplex<f32>>
    %4860 = flow.tensor.bitcast %4859 : tensor<4x?x1x64xcomplex<f32>>{%dim_2493} -> tensor<4x?x1x128xf32>{%dim_2493}
    %4861 = torch_c.from_builtin_tensor %4860 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2494 = torch.constant.int 5
    %4862 = torch.prims.convert_element_type %4861, %int5_2494 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2495 = torch.constant.int 1
    %4863 = torch.aten.size.int %4458, %int1_2495 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2496 = torch.constant.int 0
    %4864 = torch.aten.add.int %int0_2496, %4863 : !torch.int, !torch.int -> !torch.int
    %int0_2497 = torch.constant.int 0
    %int0_2498 = torch.constant.int 0
    %int1_2499 = torch.constant.int 1
    %4865 = torch.aten.slice.Tensor %4769, %int0_2497, %int0_2498, %4864, %int1_2499 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4865, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2500 = torch.constant.int 1
    %int0_2501 = torch.constant.int 0
    %int9223372036854775807_2502 = torch.constant.int 9223372036854775807
    %int1_2503 = torch.constant.int 1
    %4866 = torch.aten.slice.Tensor %4865, %int1_2500, %int0_2501, %int9223372036854775807_2502, %int1_2503 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4866, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2504 = torch.constant.int 0
    %4867 = torch.aten.unsqueeze %4866, %int0_2504 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4867, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2505 = torch.constant.int 2
    %4868 = torch.aten.unsqueeze %4867, %int2_2505 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4868, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2506 = torch.constant.int 3
    %int0_2507 = torch.constant.int 0
    %int9223372036854775807_2508 = torch.constant.int 9223372036854775807
    %int1_2509 = torch.constant.int 1
    %4869 = torch.aten.slice.Tensor %4868, %int3_2506, %int0_2507, %int9223372036854775807_2508, %int1_2509 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4869, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4870 = torch_c.to_builtin_tensor %4558 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2510 = arith.constant 1 : index
    %dim_2511 = tensor.dim %4870, %c1_2510 : tensor<4x?x1x128xf16>
    %4871 = flow.tensor.bitcast %4870 : tensor<4x?x1x128xf16>{%dim_2511} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2511}
    %4872 = torch_c.from_builtin_tensor %4871 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4873 = torch.aten.mul.Tensor %4872, %4869 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4874 = torch_c.to_builtin_tensor %4873 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2512 = arith.constant 1 : index
    %dim_2513 = tensor.dim %4874, %c1_2512 : tensor<4x?x1x64xcomplex<f32>>
    %4875 = flow.tensor.bitcast %4874 : tensor<4x?x1x64xcomplex<f32>>{%dim_2513} -> tensor<4x?x1x128xf32>{%dim_2513}
    %4876 = torch_c.from_builtin_tensor %4875 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2514 = torch.constant.int 5
    %4877 = torch.prims.convert_element_type %4876, %int5_2514 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_2515 = torch.constant.int 1
    %4878 = torch.aten.size.int %4464, %int1_2515 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_2516 = torch.constant.int 0
    %4879 = torch.aten.add.int %int0_2516, %4878 : !torch.int, !torch.int -> !torch.int
    %int0_2517 = torch.constant.int 0
    %int0_2518 = torch.constant.int 0
    %int1_2519 = torch.constant.int 1
    %4880 = torch.aten.slice.Tensor %4772, %int0_2517, %int0_2518, %4879, %int1_2519 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4880, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_2520 = torch.constant.int 1
    %int0_2521 = torch.constant.int 0
    %int9223372036854775807_2522 = torch.constant.int 9223372036854775807
    %int1_2523 = torch.constant.int 1
    %4881 = torch.aten.slice.Tensor %4880, %int1_2520, %int0_2521, %int9223372036854775807_2522, %int1_2523 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %4881, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_2524 = torch.constant.int 0
    %4882 = torch.aten.unsqueeze %4881, %int0_2524 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %4882, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_2525 = torch.constant.int 2
    %4883 = torch.aten.unsqueeze %4882, %int2_2525 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4883, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_2526 = torch.constant.int 3
    %int0_2527 = torch.constant.int 0
    %int9223372036854775807_2528 = torch.constant.int 9223372036854775807
    %int1_2529 = torch.constant.int 1
    %4884 = torch.aten.slice.Tensor %4883, %int3_2526, %int0_2527, %int9223372036854775807_2528, %int1_2529 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4884, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %4885 = torch_c.to_builtin_tensor %4560 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_2530 = arith.constant 1 : index
    %dim_2531 = tensor.dim %4885, %c1_2530 : tensor<4x?x1x128xf16>
    %4886 = flow.tensor.bitcast %4885 : tensor<4x?x1x128xf16>{%dim_2531} -> tensor<4x?x1x64xcomplex<f16>>{%dim_2531}
    %4887 = torch_c.from_builtin_tensor %4886 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %4887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %4888 = torch.aten.mul.Tensor %4887, %4884 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %4888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %4889 = torch_c.to_builtin_tensor %4888 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_2532 = arith.constant 1 : index
    %dim_2533 = tensor.dim %4889, %c1_2532 : tensor<4x?x1x64xcomplex<f32>>
    %4890 = flow.tensor.bitcast %4889 : tensor<4x?x1x64xcomplex<f32>>{%dim_2533} -> tensor<4x?x1x128xf32>{%dim_2533}
    %4891 = torch_c.from_builtin_tensor %4890 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %4891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_2534 = torch.constant.int 5
    %4892 = torch.prims.convert_element_type %4891, %int5_2534 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %4892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_2535 = torch.constant.int 64
    %4893 = torch.aten.mul.Scalar %2364, %int64_2535 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4893, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2536 = torch.constant.int 64
    %4894 = torch.aten.mul.Scalar %2367, %int64_2536 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4894, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2537 = torch.constant.int 64
    %4895 = torch.aten.mul.Scalar %2370, %int64_2537 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4895, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2538 = torch.constant.int 64
    %4896 = torch.aten.mul.Scalar %2373, %int64_2538 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4896, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2539 = torch.constant.int 64
    %4897 = torch.aten.mul.Scalar %2376, %int64_2539 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4897, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2540 = torch.constant.int 64
    %4898 = torch.aten.mul.Scalar %2379, %int64_2540 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4898, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2541 = torch.constant.int 64
    %4899 = torch.aten.mul.Scalar %2382, %int64_2541 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4899, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_2542 = torch.constant.int 64
    %4900 = torch.aten.mul.Scalar %2385, %int64_2542 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4900, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2543 = torch.constant.int 2
    %int1_2544 = torch.constant.int 1
    %4901 = torch.aten.add.Scalar %4893, %int2_2543, %int1_2544 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4901, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2545 = torch.constant.int 2
    %int1_2546 = torch.constant.int 1
    %4902 = torch.aten.add.Scalar %4894, %int2_2545, %int1_2546 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4902, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2547 = torch.constant.int 2
    %int1_2548 = torch.constant.int 1
    %4903 = torch.aten.add.Scalar %4895, %int2_2547, %int1_2548 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4903, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2549 = torch.constant.int 2
    %int1_2550 = torch.constant.int 1
    %4904 = torch.aten.add.Scalar %4896, %int2_2549, %int1_2550 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4904, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2551 = torch.constant.int 2
    %int1_2552 = torch.constant.int 1
    %4905 = torch.aten.add.Scalar %4897, %int2_2551, %int1_2552 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4905, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2553 = torch.constant.int 2
    %int1_2554 = torch.constant.int 1
    %4906 = torch.aten.add.Scalar %4898, %int2_2553, %int1_2554 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4906, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2555 = torch.constant.int 2
    %int1_2556 = torch.constant.int 1
    %4907 = torch.aten.add.Scalar %4899, %int2_2555, %int1_2556 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4907, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int2_2557 = torch.constant.int 2
    %int1_2558 = torch.constant.int 1
    %4908 = torch.aten.add.Scalar %4900, %int2_2557, %int1_2558 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %4908, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2559 = torch.constant.int 4
    %int16_2560 = torch.constant.int 16
    %int1_2561 = torch.constant.int 1
    %int128_2562 = torch.constant.int 128
    %4909 = torch.prim.ListConstruct %int4_2559, %3095, %int16_2560, %int1_2561, %int128_2562 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4910 = torch.aten.view %4787, %4909 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4910, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2563 = torch.constant.int 4
    %int16_2564 = torch.constant.int 16
    %int1_2565 = torch.constant.int 1
    %int128_2566 = torch.constant.int 128
    %4911 = torch.prim.ListConstruct %int4_2563, %3095, %int16_2564, %int1_2565, %int128_2566 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4912 = torch.aten.view %4802, %4911 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4912, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2567 = torch.constant.int 4
    %int16_2568 = torch.constant.int 16
    %int1_2569 = torch.constant.int 1
    %int128_2570 = torch.constant.int 128
    %4913 = torch.prim.ListConstruct %int4_2567, %3095, %int16_2568, %int1_2569, %int128_2570 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4914 = torch.aten.view %4817, %4913 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4914, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2571 = torch.constant.int 4
    %int16_2572 = torch.constant.int 16
    %int1_2573 = torch.constant.int 1
    %int128_2574 = torch.constant.int 128
    %4915 = torch.prim.ListConstruct %int4_2571, %3095, %int16_2572, %int1_2573, %int128_2574 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4916 = torch.aten.view %4832, %4915 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4916, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2575 = torch.constant.int 4
    %int16_2576 = torch.constant.int 16
    %int1_2577 = torch.constant.int 1
    %int128_2578 = torch.constant.int 128
    %4917 = torch.prim.ListConstruct %int4_2575, %3095, %int16_2576, %int1_2577, %int128_2578 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4918 = torch.aten.view %4847, %4917 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4918, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2579 = torch.constant.int 4
    %int16_2580 = torch.constant.int 16
    %int1_2581 = torch.constant.int 1
    %int128_2582 = torch.constant.int 128
    %4919 = torch.prim.ListConstruct %int4_2579, %3095, %int16_2580, %int1_2581, %int128_2582 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4920 = torch.aten.view %4862, %4919 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4920, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2583 = torch.constant.int 4
    %int16_2584 = torch.constant.int 16
    %int1_2585 = torch.constant.int 1
    %int128_2586 = torch.constant.int 128
    %4921 = torch.prim.ListConstruct %int4_2583, %3095, %int16_2584, %int1_2585, %int128_2586 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4922 = torch.aten.view %4877, %4921 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4922, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2587 = torch.constant.int 4
    %int16_2588 = torch.constant.int 16
    %int1_2589 = torch.constant.int 1
    %int128_2590 = torch.constant.int 128
    %4923 = torch.prim.ListConstruct %int4_2587, %3095, %int16_2588, %int1_2589, %int128_2590 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4924 = torch.aten.view %4892, %4923 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4924, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2591 = torch.constant.int 4
    %4925 = torch.aten.mul.int %int4_2591, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2592 = torch.constant.int 16
    %int1_2593 = torch.constant.int 1
    %int128_2594 = torch.constant.int 128
    %4926 = torch.prim.ListConstruct %4925, %int16_2592, %int1_2593, %int128_2594 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4927 = torch.aten.view %4910, %4926 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4927, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2595 = torch.constant.int 4
    %4928 = torch.aten.mul.int %int4_2595, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2596 = torch.constant.int 16
    %int1_2597 = torch.constant.int 1
    %int128_2598 = torch.constant.int 128
    %4929 = torch.prim.ListConstruct %4928, %int16_2596, %int1_2597, %int128_2598 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4930 = torch.aten.view %4912, %4929 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4930, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2599 = torch.constant.int 4
    %4931 = torch.aten.mul.int %int4_2599, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2600 = torch.constant.int 16
    %int1_2601 = torch.constant.int 1
    %int128_2602 = torch.constant.int 128
    %4932 = torch.prim.ListConstruct %4931, %int16_2600, %int1_2601, %int128_2602 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4933 = torch.aten.view %4914, %4932 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4933, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2603 = torch.constant.int 4
    %4934 = torch.aten.mul.int %int4_2603, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2604 = torch.constant.int 16
    %int1_2605 = torch.constant.int 1
    %int128_2606 = torch.constant.int 128
    %4935 = torch.prim.ListConstruct %4934, %int16_2604, %int1_2605, %int128_2606 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4936 = torch.aten.view %4916, %4935 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4936, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2607 = torch.constant.int 4
    %4937 = torch.aten.mul.int %int4_2607, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2608 = torch.constant.int 16
    %int1_2609 = torch.constant.int 1
    %int128_2610 = torch.constant.int 128
    %4938 = torch.prim.ListConstruct %4937, %int16_2608, %int1_2609, %int128_2610 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4939 = torch.aten.view %4918, %4938 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4939, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2611 = torch.constant.int 4
    %4940 = torch.aten.mul.int %int4_2611, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2612 = torch.constant.int 16
    %int1_2613 = torch.constant.int 1
    %int128_2614 = torch.constant.int 128
    %4941 = torch.prim.ListConstruct %4940, %int16_2612, %int1_2613, %int128_2614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4942 = torch.aten.view %4920, %4941 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4942, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2615 = torch.constant.int 4
    %4943 = torch.aten.mul.int %int4_2615, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2616 = torch.constant.int 16
    %int1_2617 = torch.constant.int 1
    %int128_2618 = torch.constant.int 128
    %4944 = torch.prim.ListConstruct %4943, %int16_2616, %int1_2617, %int128_2618 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4945 = torch.aten.view %4922, %4944 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4945, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2619 = torch.constant.int 4
    %4946 = torch.aten.mul.int %int4_2619, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2620 = torch.constant.int 16
    %int1_2621 = torch.constant.int 1
    %int128_2622 = torch.constant.int 128
    %4947 = torch.prim.ListConstruct %4946, %int16_2620, %int1_2621, %int128_2622 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4948 = torch.aten.view %4924, %4947 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4948, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2623 = torch.constant.int 4
    %4949 = torch.aten.mul.int %int4_2623, %3095 : !torch.int, !torch.int -> !torch.int
    %4950 = torch.prim.ListConstruct %4949 : (!torch.int) -> !torch.list<int>
    %4951 = torch.aten.view %4901, %4950 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4951, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2624 = torch.constant.int 4
    %4952 = torch.aten.mul.int %int4_2624, %3095 : !torch.int, !torch.int -> !torch.int
    %4953 = torch.prim.ListConstruct %4952 : (!torch.int) -> !torch.list<int>
    %4954 = torch.aten.view %4902, %4953 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4954, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2625 = torch.constant.int 4
    %4955 = torch.aten.mul.int %int4_2625, %3095 : !torch.int, !torch.int -> !torch.int
    %4956 = torch.prim.ListConstruct %4955 : (!torch.int) -> !torch.list<int>
    %4957 = torch.aten.view %4903, %4956 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4957, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2626 = torch.constant.int 4
    %4958 = torch.aten.mul.int %int4_2626, %3095 : !torch.int, !torch.int -> !torch.int
    %4959 = torch.prim.ListConstruct %4958 : (!torch.int) -> !torch.list<int>
    %4960 = torch.aten.view %4904, %4959 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4960, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2627 = torch.constant.int 4
    %4961 = torch.aten.mul.int %int4_2627, %3095 : !torch.int, !torch.int -> !torch.int
    %4962 = torch.prim.ListConstruct %4961 : (!torch.int) -> !torch.list<int>
    %4963 = torch.aten.view %4905, %4962 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4963, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2628 = torch.constant.int 4
    %4964 = torch.aten.mul.int %int4_2628, %3095 : !torch.int, !torch.int -> !torch.int
    %4965 = torch.prim.ListConstruct %4964 : (!torch.int) -> !torch.list<int>
    %4966 = torch.aten.view %4906, %4965 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4966, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2629 = torch.constant.int 4
    %4967 = torch.aten.mul.int %int4_2629, %3095 : !torch.int, !torch.int -> !torch.int
    %4968 = torch.prim.ListConstruct %4967 : (!torch.int) -> !torch.list<int>
    %4969 = torch.aten.view %4907, %4968 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4969, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2630 = torch.constant.int 4
    %4970 = torch.aten.mul.int %int4_2630, %3095 : !torch.int, !torch.int -> !torch.int
    %4971 = torch.prim.ListConstruct %4970 : (!torch.int) -> !torch.list<int>
    %4972 = torch.aten.view %4908, %4971 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %4972, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2631 = torch.constant.int 4
    %int16_2632 = torch.constant.int 16
    %int1_2633 = torch.constant.int 1
    %int128_2634 = torch.constant.int 128
    %4973 = torch.prim.ListConstruct %int4_2631, %3095, %int16_2632, %int1_2633, %int128_2634 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4974 = torch.aten.view %4562, %4973 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4974, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2635 = torch.constant.int 4
    %int16_2636 = torch.constant.int 16
    %int1_2637 = torch.constant.int 1
    %int128_2638 = torch.constant.int 128
    %4975 = torch.prim.ListConstruct %int4_2635, %3095, %int16_2636, %int1_2637, %int128_2638 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4976 = torch.aten.view %4564, %4975 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4976, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2639 = torch.constant.int 4
    %int16_2640 = torch.constant.int 16
    %int1_2641 = torch.constant.int 1
    %int128_2642 = torch.constant.int 128
    %4977 = torch.prim.ListConstruct %int4_2639, %3095, %int16_2640, %int1_2641, %int128_2642 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4978 = torch.aten.view %4566, %4977 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4978, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2643 = torch.constant.int 4
    %int16_2644 = torch.constant.int 16
    %int1_2645 = torch.constant.int 1
    %int128_2646 = torch.constant.int 128
    %4979 = torch.prim.ListConstruct %int4_2643, %3095, %int16_2644, %int1_2645, %int128_2646 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4980 = torch.aten.view %4568, %4979 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4980, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2647 = torch.constant.int 4
    %int16_2648 = torch.constant.int 16
    %int1_2649 = torch.constant.int 1
    %int128_2650 = torch.constant.int 128
    %4981 = torch.prim.ListConstruct %int4_2647, %3095, %int16_2648, %int1_2649, %int128_2650 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4982 = torch.aten.view %4570, %4981 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4982, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2651 = torch.constant.int 4
    %int16_2652 = torch.constant.int 16
    %int1_2653 = torch.constant.int 1
    %int128_2654 = torch.constant.int 128
    %4983 = torch.prim.ListConstruct %int4_2651, %3095, %int16_2652, %int1_2653, %int128_2654 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4984 = torch.aten.view %4572, %4983 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4984, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2655 = torch.constant.int 4
    %int16_2656 = torch.constant.int 16
    %int1_2657 = torch.constant.int 1
    %int128_2658 = torch.constant.int 128
    %4985 = torch.prim.ListConstruct %int4_2655, %3095, %int16_2656, %int1_2657, %int128_2658 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4986 = torch.aten.view %4574, %4985 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4986, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2659 = torch.constant.int 4
    %int16_2660 = torch.constant.int 16
    %int1_2661 = torch.constant.int 1
    %int128_2662 = torch.constant.int 128
    %4987 = torch.prim.ListConstruct %int4_2659, %3095, %int16_2660, %int1_2661, %int128_2662 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4988 = torch.aten.view %4576, %4987 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %4988, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_2663 = torch.constant.int 4
    %4989 = torch.aten.mul.int %int4_2663, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2664 = torch.constant.int 16
    %int1_2665 = torch.constant.int 1
    %int128_2666 = torch.constant.int 128
    %4990 = torch.prim.ListConstruct %4989, %int16_2664, %int1_2665, %int128_2666 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4991 = torch.aten.view %4974, %4990 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4991, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2667 = torch.constant.int 4
    %4992 = torch.aten.mul.int %int4_2667, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2668 = torch.constant.int 16
    %int1_2669 = torch.constant.int 1
    %int128_2670 = torch.constant.int 128
    %4993 = torch.prim.ListConstruct %4992, %int16_2668, %int1_2669, %int128_2670 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4994 = torch.aten.view %4976, %4993 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4994, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2671 = torch.constant.int 4
    %4995 = torch.aten.mul.int %int4_2671, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2672 = torch.constant.int 16
    %int1_2673 = torch.constant.int 1
    %int128_2674 = torch.constant.int 128
    %4996 = torch.prim.ListConstruct %4995, %int16_2672, %int1_2673, %int128_2674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %4997 = torch.aten.view %4978, %4996 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %4997, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2675 = torch.constant.int 4
    %4998 = torch.aten.mul.int %int4_2675, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2676 = torch.constant.int 16
    %int1_2677 = torch.constant.int 1
    %int128_2678 = torch.constant.int 128
    %4999 = torch.prim.ListConstruct %4998, %int16_2676, %int1_2677, %int128_2678 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5000 = torch.aten.view %4980, %4999 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5000, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2679 = torch.constant.int 4
    %5001 = torch.aten.mul.int %int4_2679, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2680 = torch.constant.int 16
    %int1_2681 = torch.constant.int 1
    %int128_2682 = torch.constant.int 128
    %5002 = torch.prim.ListConstruct %5001, %int16_2680, %int1_2681, %int128_2682 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5003 = torch.aten.view %4982, %5002 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5003, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2683 = torch.constant.int 4
    %5004 = torch.aten.mul.int %int4_2683, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2684 = torch.constant.int 16
    %int1_2685 = torch.constant.int 1
    %int128_2686 = torch.constant.int 128
    %5005 = torch.prim.ListConstruct %5004, %int16_2684, %int1_2685, %int128_2686 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5006 = torch.aten.view %4984, %5005 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5006, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2687 = torch.constant.int 4
    %5007 = torch.aten.mul.int %int4_2687, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2688 = torch.constant.int 16
    %int1_2689 = torch.constant.int 1
    %int128_2690 = torch.constant.int 128
    %5008 = torch.prim.ListConstruct %5007, %int16_2688, %int1_2689, %int128_2690 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5009 = torch.aten.view %4986, %5008 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5009, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_2691 = torch.constant.int 4
    %5010 = torch.aten.mul.int %int4_2691, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_2692 = torch.constant.int 16
    %int1_2693 = torch.constant.int 1
    %int128_2694 = torch.constant.int 128
    %5011 = torch.prim.ListConstruct %5010, %int16_2692, %int1_2693, %int128_2694 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5012 = torch.aten.view %4988, %5011 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5012, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_2695 = torch.constant.int 1
    %int1_2696 = torch.constant.int 1
    %5013 = torch.aten.add.Scalar %4901, %int1_2695, %int1_2696 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5013, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2697 = torch.constant.int 1
    %int1_2698 = torch.constant.int 1
    %5014 = torch.aten.add.Scalar %4902, %int1_2697, %int1_2698 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5014, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2699 = torch.constant.int 1
    %int1_2700 = torch.constant.int 1
    %5015 = torch.aten.add.Scalar %4903, %int1_2699, %int1_2700 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5015, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2701 = torch.constant.int 1
    %int1_2702 = torch.constant.int 1
    %5016 = torch.aten.add.Scalar %4904, %int1_2701, %int1_2702 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5016, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2703 = torch.constant.int 1
    %int1_2704 = torch.constant.int 1
    %5017 = torch.aten.add.Scalar %4905, %int1_2703, %int1_2704 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5017, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2705 = torch.constant.int 1
    %int1_2706 = torch.constant.int 1
    %5018 = torch.aten.add.Scalar %4906, %int1_2705, %int1_2706 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5018, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2707 = torch.constant.int 1
    %int1_2708 = torch.constant.int 1
    %5019 = torch.aten.add.Scalar %4907, %int1_2707, %int1_2708 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5019, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_2709 = torch.constant.int 1
    %int1_2710 = torch.constant.int 1
    %5020 = torch.aten.add.Scalar %4908, %int1_2709, %int1_2710 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %5020, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_2711 = torch.constant.int 4
    %5021 = torch.aten.mul.int %int4_2711, %3095 : !torch.int, !torch.int -> !torch.int
    %5022 = torch.prim.ListConstruct %5021 : (!torch.int) -> !torch.list<int>
    %5023 = torch.aten.view %5013, %5022 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5023, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2712 = torch.constant.int 4
    %5024 = torch.aten.mul.int %int4_2712, %3095 : !torch.int, !torch.int -> !torch.int
    %5025 = torch.prim.ListConstruct %5024 : (!torch.int) -> !torch.list<int>
    %5026 = torch.aten.view %5014, %5025 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5026, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2713 = torch.constant.int 4
    %5027 = torch.aten.mul.int %int4_2713, %3095 : !torch.int, !torch.int -> !torch.int
    %5028 = torch.prim.ListConstruct %5027 : (!torch.int) -> !torch.list<int>
    %5029 = torch.aten.view %5015, %5028 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5029, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2714 = torch.constant.int 4
    %5030 = torch.aten.mul.int %int4_2714, %3095 : !torch.int, !torch.int -> !torch.int
    %5031 = torch.prim.ListConstruct %5030 : (!torch.int) -> !torch.list<int>
    %5032 = torch.aten.view %5016, %5031 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5032, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2715 = torch.constant.int 4
    %5033 = torch.aten.mul.int %int4_2715, %3095 : !torch.int, !torch.int -> !torch.int
    %5034 = torch.prim.ListConstruct %5033 : (!torch.int) -> !torch.list<int>
    %5035 = torch.aten.view %5017, %5034 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5035, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2716 = torch.constant.int 4
    %5036 = torch.aten.mul.int %int4_2716, %3095 : !torch.int, !torch.int -> !torch.int
    %5037 = torch.prim.ListConstruct %5036 : (!torch.int) -> !torch.list<int>
    %5038 = torch.aten.view %5018, %5037 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5038, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2717 = torch.constant.int 4
    %5039 = torch.aten.mul.int %int4_2717, %3095 : !torch.int, !torch.int -> !torch.int
    %5040 = torch.prim.ListConstruct %5039 : (!torch.int) -> !torch.list<int>
    %5041 = torch.aten.view %5019, %5040 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5041, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_2718 = torch.constant.int 4
    %5042 = torch.aten.mul.int %int4_2718, %3095 : !torch.int, !torch.int -> !torch.int
    %5043 = torch.prim.ListConstruct %5042 : (!torch.int) -> !torch.list<int>
    %5044 = torch.aten.view %5020, %5043 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5044, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %5045 = torch.prim.ListConstruct %4951, %5023 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2719 = torch.constant.int 0
    %5046 = torch.aten.cat %5045, %int0_2719 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5046, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5047 = torch.prim.ListConstruct %4954, %5026 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2720 = torch.constant.int 0
    %5048 = torch.aten.cat %5047, %int0_2720 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5048, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5049 = torch.prim.ListConstruct %4957, %5029 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2721 = torch.constant.int 0
    %5050 = torch.aten.cat %5049, %int0_2721 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5050, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5051 = torch.prim.ListConstruct %4960, %5032 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2722 = torch.constant.int 0
    %5052 = torch.aten.cat %5051, %int0_2722 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5052, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5053 = torch.prim.ListConstruct %4963, %5035 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2723 = torch.constant.int 0
    %5054 = torch.aten.cat %5053, %int0_2723 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5054, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5055 = torch.prim.ListConstruct %4966, %5038 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2724 = torch.constant.int 0
    %5056 = torch.aten.cat %5055, %int0_2724 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5056, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5057 = torch.prim.ListConstruct %4969, %5041 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2725 = torch.constant.int 0
    %5058 = torch.aten.cat %5057, %int0_2725 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5058, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5059 = torch.prim.ListConstruct %4972, %5044 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_2726 = torch.constant.int 0
    %5060 = torch.aten.cat %5059, %int0_2726 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %5060, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %5061 = torch.prim.ListConstruct %4927, %4991 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2727 = torch.constant.int 0
    %5062 = torch.aten.cat %5061, %int0_2727 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5062, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5063 = torch.prim.ListConstruct %4930, %4994 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2728 = torch.constant.int 0
    %5064 = torch.aten.cat %5063, %int0_2728 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5064, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5065 = torch.prim.ListConstruct %4933, %4997 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2729 = torch.constant.int 0
    %5066 = torch.aten.cat %5065, %int0_2729 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5066, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5067 = torch.prim.ListConstruct %4936, %5000 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2730 = torch.constant.int 0
    %5068 = torch.aten.cat %5067, %int0_2730 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5068, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5069 = torch.prim.ListConstruct %4939, %5003 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2731 = torch.constant.int 0
    %5070 = torch.aten.cat %5069, %int0_2731 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5070, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5071 = torch.prim.ListConstruct %4942, %5006 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2732 = torch.constant.int 0
    %5072 = torch.aten.cat %5071, %int0_2732 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5072, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5073 = torch.prim.ListConstruct %4945, %5009 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2733 = torch.constant.int 0
    %5074 = torch.aten.cat %5073, %int0_2733 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5074, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5075 = torch.prim.ListConstruct %4948, %5012 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_2734 = torch.constant.int 0
    %5076 = torch.aten.cat %5075, %int0_2734 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5076, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2735 = torch.constant.int 32
    %int2_2736 = torch.constant.int 2
    %int16_2737 = torch.constant.int 16
    %int1_2738 = torch.constant.int 1
    %int128_2739 = torch.constant.int 128
    %5077 = torch.prim.ListConstruct %3023, %int32_2735, %int2_2736, %int16_2737, %int1_2738, %int128_2739 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5078 = torch.aten.view %3269, %5077 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5078, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2740 = torch.constant.int 32
    %5079 = torch.aten.mul.int %3023, %int32_2740 : !torch.int, !torch.int -> !torch.int
    %int2_2741 = torch.constant.int 2
    %5080 = torch.aten.mul.int %5079, %int2_2741 : !torch.int, !torch.int -> !torch.int
    %int16_2742 = torch.constant.int 16
    %int1_2743 = torch.constant.int 1
    %int128_2744 = torch.constant.int 128
    %5081 = torch.prim.ListConstruct %5080, %int16_2742, %int1_2743, %int128_2744 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5082 = torch.aten.view %5078, %5081 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5082, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5083 = torch.prim.ListConstruct %5046 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2745 = torch.constant.bool false
    %5084 = torch.aten.index_put %5082, %5083, %5062, %false_2745 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5084, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2746 = torch.constant.int 32
    %int2_2747 = torch.constant.int 2
    %int16_2748 = torch.constant.int 16
    %int1_2749 = torch.constant.int 1
    %int128_2750 = torch.constant.int 128
    %5085 = torch.prim.ListConstruct %3023, %int32_2746, %int2_2747, %int16_2748, %int1_2749, %int128_2750 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5086 = torch.aten.view %5084, %5085 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5086, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2751 = torch.constant.int 131072
    %5087 = torch.prim.ListConstruct %3023, %int131072_2751 : (!torch.int, !torch.int) -> !torch.list<int>
    %5088 = torch.aten.view %5086, %5087 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5088, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2752 = torch.constant.int 32
    %int2_2753 = torch.constant.int 2
    %int16_2754 = torch.constant.int 16
    %int1_2755 = torch.constant.int 1
    %int128_2756 = torch.constant.int 128
    %5089 = torch.prim.ListConstruct %3026, %int32_2752, %int2_2753, %int16_2754, %int1_2755, %int128_2756 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5090 = torch.aten.view %3275, %5089 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5090, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2757 = torch.constant.int 32
    %5091 = torch.aten.mul.int %3026, %int32_2757 : !torch.int, !torch.int -> !torch.int
    %int2_2758 = torch.constant.int 2
    %5092 = torch.aten.mul.int %5091, %int2_2758 : !torch.int, !torch.int -> !torch.int
    %int16_2759 = torch.constant.int 16
    %int1_2760 = torch.constant.int 1
    %int128_2761 = torch.constant.int 128
    %5093 = torch.prim.ListConstruct %5092, %int16_2759, %int1_2760, %int128_2761 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5094 = torch.aten.view %5090, %5093 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5094, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5095 = torch.prim.ListConstruct %5048 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2762 = torch.constant.bool false
    %5096 = torch.aten.index_put %5094, %5095, %5064, %false_2762 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5096, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2763 = torch.constant.int 32
    %int2_2764 = torch.constant.int 2
    %int16_2765 = torch.constant.int 16
    %int1_2766 = torch.constant.int 1
    %int128_2767 = torch.constant.int 128
    %5097 = torch.prim.ListConstruct %3026, %int32_2763, %int2_2764, %int16_2765, %int1_2766, %int128_2767 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5098 = torch.aten.view %5096, %5097 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5098, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2768 = torch.constant.int 131072
    %5099 = torch.prim.ListConstruct %3026, %int131072_2768 : (!torch.int, !torch.int) -> !torch.list<int>
    %5100 = torch.aten.view %5098, %5099 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5100, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2769 = torch.constant.int 32
    %int2_2770 = torch.constant.int 2
    %int16_2771 = torch.constant.int 16
    %int1_2772 = torch.constant.int 1
    %int128_2773 = torch.constant.int 128
    %5101 = torch.prim.ListConstruct %3029, %int32_2769, %int2_2770, %int16_2771, %int1_2772, %int128_2773 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5102 = torch.aten.view %3281, %5101 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5102, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2774 = torch.constant.int 32
    %5103 = torch.aten.mul.int %3029, %int32_2774 : !torch.int, !torch.int -> !torch.int
    %int2_2775 = torch.constant.int 2
    %5104 = torch.aten.mul.int %5103, %int2_2775 : !torch.int, !torch.int -> !torch.int
    %int16_2776 = torch.constant.int 16
    %int1_2777 = torch.constant.int 1
    %int128_2778 = torch.constant.int 128
    %5105 = torch.prim.ListConstruct %5104, %int16_2776, %int1_2777, %int128_2778 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5106 = torch.aten.view %5102, %5105 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5106, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5107 = torch.prim.ListConstruct %5050 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2779 = torch.constant.bool false
    %5108 = torch.aten.index_put %5106, %5107, %5066, %false_2779 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5108, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2780 = torch.constant.int 32
    %int2_2781 = torch.constant.int 2
    %int16_2782 = torch.constant.int 16
    %int1_2783 = torch.constant.int 1
    %int128_2784 = torch.constant.int 128
    %5109 = torch.prim.ListConstruct %3029, %int32_2780, %int2_2781, %int16_2782, %int1_2783, %int128_2784 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5110 = torch.aten.view %5108, %5109 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5110, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2785 = torch.constant.int 131072
    %5111 = torch.prim.ListConstruct %3029, %int131072_2785 : (!torch.int, !torch.int) -> !torch.list<int>
    %5112 = torch.aten.view %5110, %5111 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5112, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2786 = torch.constant.int 32
    %int2_2787 = torch.constant.int 2
    %int16_2788 = torch.constant.int 16
    %int1_2789 = torch.constant.int 1
    %int128_2790 = torch.constant.int 128
    %5113 = torch.prim.ListConstruct %3032, %int32_2786, %int2_2787, %int16_2788, %int1_2789, %int128_2790 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5114 = torch.aten.view %3287, %5113 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5114, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2791 = torch.constant.int 32
    %5115 = torch.aten.mul.int %3032, %int32_2791 : !torch.int, !torch.int -> !torch.int
    %int2_2792 = torch.constant.int 2
    %5116 = torch.aten.mul.int %5115, %int2_2792 : !torch.int, !torch.int -> !torch.int
    %int16_2793 = torch.constant.int 16
    %int1_2794 = torch.constant.int 1
    %int128_2795 = torch.constant.int 128
    %5117 = torch.prim.ListConstruct %5116, %int16_2793, %int1_2794, %int128_2795 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5118 = torch.aten.view %5114, %5117 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5118, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5119 = torch.prim.ListConstruct %5052 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2796 = torch.constant.bool false
    %5120 = torch.aten.index_put %5118, %5119, %5068, %false_2796 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5120, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2797 = torch.constant.int 32
    %int2_2798 = torch.constant.int 2
    %int16_2799 = torch.constant.int 16
    %int1_2800 = torch.constant.int 1
    %int128_2801 = torch.constant.int 128
    %5121 = torch.prim.ListConstruct %3032, %int32_2797, %int2_2798, %int16_2799, %int1_2800, %int128_2801 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5122 = torch.aten.view %5120, %5121 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5122, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2802 = torch.constant.int 131072
    %5123 = torch.prim.ListConstruct %3032, %int131072_2802 : (!torch.int, !torch.int) -> !torch.list<int>
    %5124 = torch.aten.view %5122, %5123 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5124, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2803 = torch.constant.int 32
    %int2_2804 = torch.constant.int 2
    %int16_2805 = torch.constant.int 16
    %int1_2806 = torch.constant.int 1
    %int128_2807 = torch.constant.int 128
    %5125 = torch.prim.ListConstruct %3035, %int32_2803, %int2_2804, %int16_2805, %int1_2806, %int128_2807 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5126 = torch.aten.view %3293, %5125 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5126, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2808 = torch.constant.int 32
    %5127 = torch.aten.mul.int %3035, %int32_2808 : !torch.int, !torch.int -> !torch.int
    %int2_2809 = torch.constant.int 2
    %5128 = torch.aten.mul.int %5127, %int2_2809 : !torch.int, !torch.int -> !torch.int
    %int16_2810 = torch.constant.int 16
    %int1_2811 = torch.constant.int 1
    %int128_2812 = torch.constant.int 128
    %5129 = torch.prim.ListConstruct %5128, %int16_2810, %int1_2811, %int128_2812 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5130 = torch.aten.view %5126, %5129 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5130, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5131 = torch.prim.ListConstruct %5054 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2813 = torch.constant.bool false
    %5132 = torch.aten.index_put %5130, %5131, %5070, %false_2813 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5132, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2814 = torch.constant.int 32
    %int2_2815 = torch.constant.int 2
    %int16_2816 = torch.constant.int 16
    %int1_2817 = torch.constant.int 1
    %int128_2818 = torch.constant.int 128
    %5133 = torch.prim.ListConstruct %3035, %int32_2814, %int2_2815, %int16_2816, %int1_2817, %int128_2818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5134 = torch.aten.view %5132, %5133 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5134, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2819 = torch.constant.int 131072
    %5135 = torch.prim.ListConstruct %3035, %int131072_2819 : (!torch.int, !torch.int) -> !torch.list<int>
    %5136 = torch.aten.view %5134, %5135 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5136, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2820 = torch.constant.int 32
    %int2_2821 = torch.constant.int 2
    %int16_2822 = torch.constant.int 16
    %int1_2823 = torch.constant.int 1
    %int128_2824 = torch.constant.int 128
    %5137 = torch.prim.ListConstruct %3038, %int32_2820, %int2_2821, %int16_2822, %int1_2823, %int128_2824 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5138 = torch.aten.view %3299, %5137 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5138, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2825 = torch.constant.int 32
    %5139 = torch.aten.mul.int %3038, %int32_2825 : !torch.int, !torch.int -> !torch.int
    %int2_2826 = torch.constant.int 2
    %5140 = torch.aten.mul.int %5139, %int2_2826 : !torch.int, !torch.int -> !torch.int
    %int16_2827 = torch.constant.int 16
    %int1_2828 = torch.constant.int 1
    %int128_2829 = torch.constant.int 128
    %5141 = torch.prim.ListConstruct %5140, %int16_2827, %int1_2828, %int128_2829 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5142 = torch.aten.view %5138, %5141 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5142, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5143 = torch.prim.ListConstruct %5056 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2830 = torch.constant.bool false
    %5144 = torch.aten.index_put %5142, %5143, %5072, %false_2830 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5144, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2831 = torch.constant.int 32
    %int2_2832 = torch.constant.int 2
    %int16_2833 = torch.constant.int 16
    %int1_2834 = torch.constant.int 1
    %int128_2835 = torch.constant.int 128
    %5145 = torch.prim.ListConstruct %3038, %int32_2831, %int2_2832, %int16_2833, %int1_2834, %int128_2835 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5146 = torch.aten.view %5144, %5145 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5146, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2836 = torch.constant.int 131072
    %5147 = torch.prim.ListConstruct %3038, %int131072_2836 : (!torch.int, !torch.int) -> !torch.list<int>
    %5148 = torch.aten.view %5146, %5147 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5148, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2837 = torch.constant.int 32
    %int2_2838 = torch.constant.int 2
    %int16_2839 = torch.constant.int 16
    %int1_2840 = torch.constant.int 1
    %int128_2841 = torch.constant.int 128
    %5149 = torch.prim.ListConstruct %3041, %int32_2837, %int2_2838, %int16_2839, %int1_2840, %int128_2841 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5150 = torch.aten.view %3305, %5149 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5150, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2842 = torch.constant.int 32
    %5151 = torch.aten.mul.int %3041, %int32_2842 : !torch.int, !torch.int -> !torch.int
    %int2_2843 = torch.constant.int 2
    %5152 = torch.aten.mul.int %5151, %int2_2843 : !torch.int, !torch.int -> !torch.int
    %int16_2844 = torch.constant.int 16
    %int1_2845 = torch.constant.int 1
    %int128_2846 = torch.constant.int 128
    %5153 = torch.prim.ListConstruct %5152, %int16_2844, %int1_2845, %int128_2846 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5154 = torch.aten.view %5150, %5153 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5154, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5155 = torch.prim.ListConstruct %5058 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2847 = torch.constant.bool false
    %5156 = torch.aten.index_put %5154, %5155, %5074, %false_2847 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5156, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2848 = torch.constant.int 32
    %int2_2849 = torch.constant.int 2
    %int16_2850 = torch.constant.int 16
    %int1_2851 = torch.constant.int 1
    %int128_2852 = torch.constant.int 128
    %5157 = torch.prim.ListConstruct %3041, %int32_2848, %int2_2849, %int16_2850, %int1_2851, %int128_2852 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5158 = torch.aten.view %5156, %5157 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5158, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2853 = torch.constant.int 131072
    %5159 = torch.prim.ListConstruct %3041, %int131072_2853 : (!torch.int, !torch.int) -> !torch.list<int>
    %5160 = torch.aten.view %5158, %5159 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5160, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_2854 = torch.constant.int 32
    %int2_2855 = torch.constant.int 2
    %int16_2856 = torch.constant.int 16
    %int1_2857 = torch.constant.int 1
    %int128_2858 = torch.constant.int 128
    %5161 = torch.prim.ListConstruct %3044, %int32_2854, %int2_2855, %int16_2856, %int1_2857, %int128_2858 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5162 = torch.aten.view %3311, %5161 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5162, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_2859 = torch.constant.int 32
    %5163 = torch.aten.mul.int %3044, %int32_2859 : !torch.int, !torch.int -> !torch.int
    %int2_2860 = torch.constant.int 2
    %5164 = torch.aten.mul.int %5163, %int2_2860 : !torch.int, !torch.int -> !torch.int
    %int16_2861 = torch.constant.int 16
    %int1_2862 = torch.constant.int 1
    %int128_2863 = torch.constant.int 128
    %5165 = torch.prim.ListConstruct %5164, %int16_2861, %int1_2862, %int128_2863 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5166 = torch.aten.view %5162, %5165 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5166, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %5167 = torch.prim.ListConstruct %5060 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_2864 = torch.constant.bool false
    %5168 = torch.aten.index_put %5166, %5167, %5076, %false_2864 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %5168, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_2865 = torch.constant.int 32
    %int2_2866 = torch.constant.int 2
    %int16_2867 = torch.constant.int 16
    %int1_2868 = torch.constant.int 1
    %int128_2869 = torch.constant.int 128
    %5169 = torch.prim.ListConstruct %3044, %int32_2865, %int2_2866, %int16_2867, %int1_2868, %int128_2869 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5170 = torch.aten.view %5168, %5169 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %5170, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_2870 = torch.constant.int 131072
    %5171 = torch.prim.ListConstruct %3044, %int131072_2870 : (!torch.int, !torch.int) -> !torch.list<int>
    %5172 = torch.aten.view %5170, %5171 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %5172, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_2871 = torch.constant.int -2
    %5173 = torch.aten.unsqueeze %4787, %int-2_2871 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2872 = torch.constant.int -2
    %5174 = torch.aten.unsqueeze %4802, %int-2_2872 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2873 = torch.constant.int -2
    %5175 = torch.aten.unsqueeze %4817, %int-2_2873 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2874 = torch.constant.int -2
    %5176 = torch.aten.unsqueeze %4832, %int-2_2874 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2875 = torch.constant.int -2
    %5177 = torch.aten.unsqueeze %4847, %int-2_2875 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2876 = torch.constant.int -2
    %5178 = torch.aten.unsqueeze %4862, %int-2_2876 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2877 = torch.constant.int -2
    %5179 = torch.aten.unsqueeze %4877, %int-2_2877 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2878 = torch.constant.int -2
    %5180 = torch.aten.unsqueeze %4892, %int-2_2878 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_2879 = torch.constant.int 4
    %int1_2880 = torch.constant.int 1
    %int4_2881 = torch.constant.int 4
    %int128_2882 = torch.constant.int 128
    %5181 = torch.prim.ListConstruct %int4_2879, %4773, %int1_2880, %int4_2881, %int128_2882 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2883 = torch.constant.bool false
    %5182 = torch.aten.expand %5173, %5181, %false_2883 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2884 = torch.constant.int 4
    %int1_2885 = torch.constant.int 1
    %int4_2886 = torch.constant.int 4
    %int128_2887 = torch.constant.int 128
    %5183 = torch.prim.ListConstruct %int4_2884, %4773, %int1_2885, %int4_2886, %int128_2887 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2888 = torch.constant.bool false
    %5184 = torch.aten.expand %5174, %5183, %false_2888 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2889 = torch.constant.int 4
    %int1_2890 = torch.constant.int 1
    %int4_2891 = torch.constant.int 4
    %int128_2892 = torch.constant.int 128
    %5185 = torch.prim.ListConstruct %int4_2889, %4773, %int1_2890, %int4_2891, %int128_2892 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2893 = torch.constant.bool false
    %5186 = torch.aten.expand %5175, %5185, %false_2893 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2894 = torch.constant.int 4
    %int1_2895 = torch.constant.int 1
    %int4_2896 = torch.constant.int 4
    %int128_2897 = torch.constant.int 128
    %5187 = torch.prim.ListConstruct %int4_2894, %4773, %int1_2895, %int4_2896, %int128_2897 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2898 = torch.constant.bool false
    %5188 = torch.aten.expand %5176, %5187, %false_2898 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2899 = torch.constant.int 4
    %int1_2900 = torch.constant.int 1
    %int4_2901 = torch.constant.int 4
    %int128_2902 = torch.constant.int 128
    %5189 = torch.prim.ListConstruct %int4_2899, %4773, %int1_2900, %int4_2901, %int128_2902 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2903 = torch.constant.bool false
    %5190 = torch.aten.expand %5177, %5189, %false_2903 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2904 = torch.constant.int 4
    %int1_2905 = torch.constant.int 1
    %int4_2906 = torch.constant.int 4
    %int128_2907 = torch.constant.int 128
    %5191 = torch.prim.ListConstruct %int4_2904, %4773, %int1_2905, %int4_2906, %int128_2907 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2908 = torch.constant.bool false
    %5192 = torch.aten.expand %5178, %5191, %false_2908 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2909 = torch.constant.int 4
    %int1_2910 = torch.constant.int 1
    %int4_2911 = torch.constant.int 4
    %int128_2912 = torch.constant.int 128
    %5193 = torch.prim.ListConstruct %int4_2909, %4773, %int1_2910, %int4_2911, %int128_2912 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2913 = torch.constant.bool false
    %5194 = torch.aten.expand %5179, %5193, %false_2913 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2914 = torch.constant.int 4
    %int1_2915 = torch.constant.int 1
    %int4_2916 = torch.constant.int 4
    %int128_2917 = torch.constant.int 128
    %5195 = torch.prim.ListConstruct %int4_2914, %4773, %int1_2915, %int4_2916, %int128_2917 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2918 = torch.constant.bool false
    %5196 = torch.aten.expand %5180, %5195, %false_2918 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2919 = torch.constant.int 4
    %int4_2920 = torch.constant.int 4
    %int128_2921 = torch.constant.int 128
    %5197 = torch.prim.ListConstruct %int4_2919, %4773, %int4_2920, %int128_2921 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5198 = torch.aten.view %5182, %5197 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2922 = torch.constant.int 4
    %int4_2923 = torch.constant.int 4
    %int128_2924 = torch.constant.int 128
    %5199 = torch.prim.ListConstruct %int4_2922, %4773, %int4_2923, %int128_2924 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5200 = torch.aten.view %5184, %5199 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2925 = torch.constant.int 4
    %int4_2926 = torch.constant.int 4
    %int128_2927 = torch.constant.int 128
    %5201 = torch.prim.ListConstruct %int4_2925, %4773, %int4_2926, %int128_2927 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5202 = torch.aten.view %5186, %5201 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2928 = torch.constant.int 4
    %int4_2929 = torch.constant.int 4
    %int128_2930 = torch.constant.int 128
    %5203 = torch.prim.ListConstruct %int4_2928, %4773, %int4_2929, %int128_2930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5204 = torch.aten.view %5188, %5203 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2931 = torch.constant.int 4
    %int4_2932 = torch.constant.int 4
    %int128_2933 = torch.constant.int 128
    %5205 = torch.prim.ListConstruct %int4_2931, %4773, %int4_2932, %int128_2933 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5206 = torch.aten.view %5190, %5205 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2934 = torch.constant.int 4
    %int4_2935 = torch.constant.int 4
    %int128_2936 = torch.constant.int 128
    %5207 = torch.prim.ListConstruct %int4_2934, %4773, %int4_2935, %int128_2936 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5208 = torch.aten.view %5192, %5207 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2937 = torch.constant.int 4
    %int4_2938 = torch.constant.int 4
    %int128_2939 = torch.constant.int 128
    %5209 = torch.prim.ListConstruct %int4_2937, %4773, %int4_2938, %int128_2939 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5210 = torch.aten.view %5194, %5209 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2940 = torch.constant.int 4
    %int4_2941 = torch.constant.int 4
    %int128_2942 = torch.constant.int 128
    %5211 = torch.prim.ListConstruct %int4_2940, %4773, %int4_2941, %int128_2942 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5212 = torch.aten.view %5196, %5211 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_2943 = torch.constant.int -2
    %5213 = torch.aten.unsqueeze %4562, %int-2_2943 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2944 = torch.constant.int -2
    %5214 = torch.aten.unsqueeze %4564, %int-2_2944 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2945 = torch.constant.int -2
    %5215 = torch.aten.unsqueeze %4566, %int-2_2945 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2946 = torch.constant.int -2
    %5216 = torch.aten.unsqueeze %4568, %int-2_2946 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2947 = torch.constant.int -2
    %5217 = torch.aten.unsqueeze %4570, %int-2_2947 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2948 = torch.constant.int -2
    %5218 = torch.aten.unsqueeze %4572, %int-2_2948 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2949 = torch.constant.int -2
    %5219 = torch.aten.unsqueeze %4574, %int-2_2949 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_2950 = torch.constant.int -2
    %5220 = torch.aten.unsqueeze %4576, %int-2_2950 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %5220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_2951 = torch.constant.int 1
    %5221 = torch.aten.size.int %4486, %int1_2951 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_2952 = torch.constant.int 4
    %int1_2953 = torch.constant.int 1
    %int4_2954 = torch.constant.int 4
    %int128_2955 = torch.constant.int 128
    %5222 = torch.prim.ListConstruct %int4_2952, %5221, %int1_2953, %int4_2954, %int128_2955 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2956 = torch.constant.bool false
    %5223 = torch.aten.expand %5213, %5222, %false_2956 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2957 = torch.constant.int 4
    %int1_2958 = torch.constant.int 1
    %int4_2959 = torch.constant.int 4
    %int128_2960 = torch.constant.int 128
    %5224 = torch.prim.ListConstruct %int4_2957, %5221, %int1_2958, %int4_2959, %int128_2960 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2961 = torch.constant.bool false
    %5225 = torch.aten.expand %5214, %5224, %false_2961 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2962 = torch.constant.int 4
    %int1_2963 = torch.constant.int 1
    %int4_2964 = torch.constant.int 4
    %int128_2965 = torch.constant.int 128
    %5226 = torch.prim.ListConstruct %int4_2962, %5221, %int1_2963, %int4_2964, %int128_2965 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2966 = torch.constant.bool false
    %5227 = torch.aten.expand %5215, %5226, %false_2966 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2967 = torch.constant.int 4
    %int1_2968 = torch.constant.int 1
    %int4_2969 = torch.constant.int 4
    %int128_2970 = torch.constant.int 128
    %5228 = torch.prim.ListConstruct %int4_2967, %5221, %int1_2968, %int4_2969, %int128_2970 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2971 = torch.constant.bool false
    %5229 = torch.aten.expand %5216, %5228, %false_2971 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2972 = torch.constant.int 4
    %int1_2973 = torch.constant.int 1
    %int4_2974 = torch.constant.int 4
    %int128_2975 = torch.constant.int 128
    %5230 = torch.prim.ListConstruct %int4_2972, %5221, %int1_2973, %int4_2974, %int128_2975 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2976 = torch.constant.bool false
    %5231 = torch.aten.expand %5217, %5230, %false_2976 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2977 = torch.constant.int 4
    %int1_2978 = torch.constant.int 1
    %int4_2979 = torch.constant.int 4
    %int128_2980 = torch.constant.int 128
    %5232 = torch.prim.ListConstruct %int4_2977, %5221, %int1_2978, %int4_2979, %int128_2980 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2981 = torch.constant.bool false
    %5233 = torch.aten.expand %5218, %5232, %false_2981 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2982 = torch.constant.int 4
    %int1_2983 = torch.constant.int 1
    %int4_2984 = torch.constant.int 4
    %int128_2985 = torch.constant.int 128
    %5234 = torch.prim.ListConstruct %int4_2982, %5221, %int1_2983, %int4_2984, %int128_2985 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2986 = torch.constant.bool false
    %5235 = torch.aten.expand %5219, %5234, %false_2986 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2987 = torch.constant.int 4
    %int1_2988 = torch.constant.int 1
    %int4_2989 = torch.constant.int 4
    %int128_2990 = torch.constant.int 128
    %5236 = torch.prim.ListConstruct %int4_2987, %5221, %int1_2988, %int4_2989, %int128_2990 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_2991 = torch.constant.bool false
    %5237 = torch.aten.expand %5220, %5236, %false_2991 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %5237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_2992 = torch.constant.int 4
    %int4_2993 = torch.constant.int 4
    %int128_2994 = torch.constant.int 128
    %5238 = torch.prim.ListConstruct %int4_2992, %5221, %int4_2993, %int128_2994 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5239 = torch.aten.view %5223, %5238 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2995 = torch.constant.int 4
    %int4_2996 = torch.constant.int 4
    %int128_2997 = torch.constant.int 128
    %5240 = torch.prim.ListConstruct %int4_2995, %5221, %int4_2996, %int128_2997 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5241 = torch.aten.view %5225, %5240 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_2998 = torch.constant.int 4
    %int4_2999 = torch.constant.int 4
    %int128_3000 = torch.constant.int 128
    %5242 = torch.prim.ListConstruct %int4_2998, %5221, %int4_2999, %int128_3000 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5243 = torch.aten.view %5227, %5242 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3001 = torch.constant.int 4
    %int4_3002 = torch.constant.int 4
    %int128_3003 = torch.constant.int 128
    %5244 = torch.prim.ListConstruct %int4_3001, %5221, %int4_3002, %int128_3003 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5245 = torch.aten.view %5229, %5244 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3004 = torch.constant.int 4
    %int4_3005 = torch.constant.int 4
    %int128_3006 = torch.constant.int 128
    %5246 = torch.prim.ListConstruct %int4_3004, %5221, %int4_3005, %int128_3006 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5247 = torch.aten.view %5231, %5246 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3007 = torch.constant.int 4
    %int4_3008 = torch.constant.int 4
    %int128_3009 = torch.constant.int 128
    %5248 = torch.prim.ListConstruct %int4_3007, %5221, %int4_3008, %int128_3009 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5249 = torch.aten.view %5233, %5248 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3010 = torch.constant.int 4
    %int4_3011 = torch.constant.int 4
    %int128_3012 = torch.constant.int 128
    %5250 = torch.prim.ListConstruct %int4_3010, %5221, %int4_3011, %int128_3012 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5251 = torch.aten.view %5235, %5250 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3013 = torch.constant.int 4
    %int4_3014 = torch.constant.int 4
    %int128_3015 = torch.constant.int 128
    %5252 = torch.prim.ListConstruct %int4_3013, %5221, %int4_3014, %int128_3015 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5253 = torch.aten.view %5237, %5252 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3016 = torch.constant.int 1
    %int2_3017 = torch.constant.int 2
    %5254 = torch.aten.transpose.int %4629, %int1_3016, %int2_3017 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5254, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3018 = torch.constant.int 1
    %int2_3019 = torch.constant.int 2
    %5255 = torch.aten.transpose.int %4644, %int1_3018, %int2_3019 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5255, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3020 = torch.constant.int 1
    %int2_3021 = torch.constant.int 2
    %5256 = torch.aten.transpose.int %4659, %int1_3020, %int2_3021 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5256, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3022 = torch.constant.int 1
    %int2_3023 = torch.constant.int 2
    %5257 = torch.aten.transpose.int %4674, %int1_3022, %int2_3023 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5257, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3024 = torch.constant.int 1
    %int2_3025 = torch.constant.int 2
    %5258 = torch.aten.transpose.int %4689, %int1_3024, %int2_3025 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5258, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3026 = torch.constant.int 1
    %int2_3027 = torch.constant.int 2
    %5259 = torch.aten.transpose.int %4704, %int1_3026, %int2_3027 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5259, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3028 = torch.constant.int 1
    %int2_3029 = torch.constant.int 2
    %5260 = torch.aten.transpose.int %4719, %int1_3028, %int2_3029 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5260, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3030 = torch.constant.int 1
    %int2_3031 = torch.constant.int 2
    %5261 = torch.aten.transpose.int %4734, %int1_3030, %int2_3031 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5261, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3032 = torch.constant.int 1
    %int2_3033 = torch.constant.int 2
    %5262 = torch.aten.transpose.int %5198, %int1_3032, %int2_3033 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5262, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3034 = torch.constant.int 1
    %int2_3035 = torch.constant.int 2
    %5263 = torch.aten.transpose.int %5200, %int1_3034, %int2_3035 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5263, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3036 = torch.constant.int 1
    %int2_3037 = torch.constant.int 2
    %5264 = torch.aten.transpose.int %5202, %int1_3036, %int2_3037 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5264, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3038 = torch.constant.int 1
    %int2_3039 = torch.constant.int 2
    %5265 = torch.aten.transpose.int %5204, %int1_3038, %int2_3039 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5265, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3040 = torch.constant.int 1
    %int2_3041 = torch.constant.int 2
    %5266 = torch.aten.transpose.int %5206, %int1_3040, %int2_3041 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5266, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3042 = torch.constant.int 1
    %int2_3043 = torch.constant.int 2
    %5267 = torch.aten.transpose.int %5208, %int1_3042, %int2_3043 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5267, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3044 = torch.constant.int 1
    %int2_3045 = torch.constant.int 2
    %5268 = torch.aten.transpose.int %5210, %int1_3044, %int2_3045 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5268, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3046 = torch.constant.int 1
    %int2_3047 = torch.constant.int 2
    %5269 = torch.aten.transpose.int %5212, %int1_3046, %int2_3047 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5269, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3048 = torch.constant.int 1
    %int2_3049 = torch.constant.int 2
    %5270 = torch.aten.transpose.int %5239, %int1_3048, %int2_3049 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5270, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3050 = torch.constant.int 1
    %int2_3051 = torch.constant.int 2
    %5271 = torch.aten.transpose.int %5241, %int1_3050, %int2_3051 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5271, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3052 = torch.constant.int 1
    %int2_3053 = torch.constant.int 2
    %5272 = torch.aten.transpose.int %5243, %int1_3052, %int2_3053 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5272, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3054 = torch.constant.int 1
    %int2_3055 = torch.constant.int 2
    %5273 = torch.aten.transpose.int %5245, %int1_3054, %int2_3055 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5273, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3056 = torch.constant.int 1
    %int2_3057 = torch.constant.int 2
    %5274 = torch.aten.transpose.int %5247, %int1_3056, %int2_3057 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5274, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3058 = torch.constant.int 1
    %int2_3059 = torch.constant.int 2
    %5275 = torch.aten.transpose.int %5249, %int1_3058, %int2_3059 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5275, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3060 = torch.constant.int 1
    %int2_3061 = torch.constant.int 2
    %5276 = torch.aten.transpose.int %5251, %int1_3060, %int2_3061 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5276, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3062 = torch.constant.int 1
    %int2_3063 = torch.constant.int 2
    %5277 = torch.aten.transpose.int %5253, %int1_3062, %int2_3063 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %5277, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3064 = torch.constant.float 0.000000e+00
    %true_3065 = torch.constant.bool true
    %none_3066 = torch.constant.none
    %none_3067 = torch.constant.none
    %5278:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5254, %5262, %5270, %float0.000000e00_3064, %true_3065, %none_3066, %none_3067) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5278#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3068 = torch.constant.float 0.000000e+00
    %true_3069 = torch.constant.bool true
    %none_3070 = torch.constant.none
    %none_3071 = torch.constant.none
    %5279:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5255, %5263, %5271, %float0.000000e00_3068, %true_3069, %none_3070, %none_3071) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5279#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3072 = torch.constant.float 0.000000e+00
    %true_3073 = torch.constant.bool true
    %none_3074 = torch.constant.none
    %none_3075 = torch.constant.none
    %5280:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5256, %5264, %5272, %float0.000000e00_3072, %true_3073, %none_3074, %none_3075) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5280#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3076 = torch.constant.float 0.000000e+00
    %true_3077 = torch.constant.bool true
    %none_3078 = torch.constant.none
    %none_3079 = torch.constant.none
    %5281:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5257, %5265, %5273, %float0.000000e00_3076, %true_3077, %none_3078, %none_3079) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5281#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3080 = torch.constant.float 0.000000e+00
    %true_3081 = torch.constant.bool true
    %none_3082 = torch.constant.none
    %none_3083 = torch.constant.none
    %5282:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5258, %5266, %5274, %float0.000000e00_3080, %true_3081, %none_3082, %none_3083) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5282#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3084 = torch.constant.float 0.000000e+00
    %true_3085 = torch.constant.bool true
    %none_3086 = torch.constant.none
    %none_3087 = torch.constant.none
    %5283:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5259, %5267, %5275, %float0.000000e00_3084, %true_3085, %none_3086, %none_3087) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5283#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3088 = torch.constant.float 0.000000e+00
    %true_3089 = torch.constant.bool true
    %none_3090 = torch.constant.none
    %none_3091 = torch.constant.none
    %5284:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5260, %5268, %5276, %float0.000000e00_3088, %true_3089, %none_3090, %none_3091) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5284#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_3092 = torch.constant.float 0.000000e+00
    %true_3093 = torch.constant.bool true
    %none_3094 = torch.constant.none
    %none_3095 = torch.constant.none
    %5285:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%5261, %5269, %5277, %float0.000000e00_3092, %true_3093, %none_3094, %none_3095) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %5285#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_3096 = torch.constant.int 1
    %int2_3097 = torch.constant.int 2
    %5286 = torch.aten.transpose.int %5278#0, %int1_3096, %int2_3097 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3098 = torch.constant.int 1
    %int2_3099 = torch.constant.int 2
    %5287 = torch.aten.transpose.int %5279#0, %int1_3098, %int2_3099 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3100 = torch.constant.int 1
    %int2_3101 = torch.constant.int 2
    %5288 = torch.aten.transpose.int %5280#0, %int1_3100, %int2_3101 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3102 = torch.constant.int 1
    %int2_3103 = torch.constant.int 2
    %5289 = torch.aten.transpose.int %5281#0, %int1_3102, %int2_3103 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3104 = torch.constant.int 1
    %int2_3105 = torch.constant.int 2
    %5290 = torch.aten.transpose.int %5282#0, %int1_3104, %int2_3105 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3106 = torch.constant.int 1
    %int2_3107 = torch.constant.int 2
    %5291 = torch.aten.transpose.int %5283#0, %int1_3106, %int2_3107 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3108 = torch.constant.int 1
    %int2_3109 = torch.constant.int 2
    %5292 = torch.aten.transpose.int %5284#0, %int1_3108, %int2_3109 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_3110 = torch.constant.int 1
    %int2_3111 = torch.constant.int 2
    %5293 = torch.aten.transpose.int %5285#0, %int1_3110, %int2_3111 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %5293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3112 = torch.constant.int 4
    %int512_3113 = torch.constant.int 512
    %5294 = torch.prim.ListConstruct %int4_3112, %4615, %int512_3113 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5295 = torch.aten.view %5286, %5294 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3114 = torch.constant.int 4
    %int512_3115 = torch.constant.int 512
    %5296 = torch.prim.ListConstruct %int4_3114, %4630, %int512_3115 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5297 = torch.aten.view %5287, %5296 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3116 = torch.constant.int 4
    %int512_3117 = torch.constant.int 512
    %5298 = torch.prim.ListConstruct %int4_3116, %4645, %int512_3117 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5299 = torch.aten.view %5288, %5298 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3118 = torch.constant.int 4
    %int512_3119 = torch.constant.int 512
    %5300 = torch.prim.ListConstruct %int4_3118, %4660, %int512_3119 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5301 = torch.aten.view %5289, %5300 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3120 = torch.constant.int 4
    %int512_3121 = torch.constant.int 512
    %5302 = torch.prim.ListConstruct %int4_3120, %4675, %int512_3121 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5303 = torch.aten.view %5290, %5302 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3122 = torch.constant.int 4
    %int512_3123 = torch.constant.int 512
    %5304 = torch.prim.ListConstruct %int4_3122, %4690, %int512_3123 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5305 = torch.aten.view %5291, %5304 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3124 = torch.constant.int 4
    %int512_3125 = torch.constant.int 512
    %5306 = torch.prim.ListConstruct %int4_3124, %4705, %int512_3125 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5307 = torch.aten.view %5292, %5306 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3126 = torch.constant.int 4
    %int512_3127 = torch.constant.int 512
    %5308 = torch.prim.ListConstruct %int4_3126, %4720, %int512_3127 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5309 = torch.aten.view %5293, %5308 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %5309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_3128 = torch.constant.int 1
    %int0_3129 = torch.constant.int 0
    %5310 = torch.prim.ListConstruct %int1_3128, %int0_3129 : (!torch.int, !torch.int) -> !torch.list<int>
    %5311 = torch.aten.permute %112, %5310 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3130 = torch.constant.int 1
    %int0_3131 = torch.constant.int 0
    %5312 = torch.prim.ListConstruct %int1_3130, %int0_3131 : (!torch.int, !torch.int) -> !torch.list<int>
    %5313 = torch.aten.permute %113, %5312 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3132 = torch.constant.int 1
    %int0_3133 = torch.constant.int 0
    %5314 = torch.prim.ListConstruct %int1_3132, %int0_3133 : (!torch.int, !torch.int) -> !torch.list<int>
    %5315 = torch.aten.permute %114, %5314 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3134 = torch.constant.int 1
    %int0_3135 = torch.constant.int 0
    %5316 = torch.prim.ListConstruct %int1_3134, %int0_3135 : (!torch.int, !torch.int) -> !torch.list<int>
    %5317 = torch.aten.permute %115, %5316 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3136 = torch.constant.int 1
    %int0_3137 = torch.constant.int 0
    %5318 = torch.prim.ListConstruct %int1_3136, %int0_3137 : (!torch.int, !torch.int) -> !torch.list<int>
    %5319 = torch.aten.permute %116, %5318 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3138 = torch.constant.int 1
    %int0_3139 = torch.constant.int 0
    %5320 = torch.prim.ListConstruct %int1_3138, %int0_3139 : (!torch.int, !torch.int) -> !torch.list<int>
    %5321 = torch.aten.permute %117, %5320 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3140 = torch.constant.int 1
    %int0_3141 = torch.constant.int 0
    %5322 = torch.prim.ListConstruct %int1_3140, %int0_3141 : (!torch.int, !torch.int) -> !torch.list<int>
    %5323 = torch.aten.permute %118, %5322 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_3142 = torch.constant.int 1
    %int0_3143 = torch.constant.int 0
    %5324 = torch.prim.ListConstruct %int1_3142, %int0_3143 : (!torch.int, !torch.int) -> !torch.list<int>
    %5325 = torch.aten.permute %119, %5324 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_3144 = torch.constant.int 4
    %5326 = torch.aten.mul.int %int4_3144, %4615 : !torch.int, !torch.int -> !torch.int
    %int512_3145 = torch.constant.int 512
    %5327 = torch.prim.ListConstruct %5326, %int512_3145 : (!torch.int, !torch.int) -> !torch.list<int>
    %5328 = torch.aten.view %5295, %5327 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5328, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5329 = torch.aten.mm %5328, %5311 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5329, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3146 = torch.constant.int 4
    %int4096_3147 = torch.constant.int 4096
    %5330 = torch.prim.ListConstruct %int4_3146, %4615, %int4096_3147 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5331 = torch.aten.view %5329, %5330 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3148 = torch.constant.int 4
    %5332 = torch.aten.mul.int %int4_3148, %4630 : !torch.int, !torch.int -> !torch.int
    %int512_3149 = torch.constant.int 512
    %5333 = torch.prim.ListConstruct %5332, %int512_3149 : (!torch.int, !torch.int) -> !torch.list<int>
    %5334 = torch.aten.view %5297, %5333 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5334, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5335 = torch.aten.mm %5334, %5313 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5335, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3150 = torch.constant.int 4
    %int4096_3151 = torch.constant.int 4096
    %5336 = torch.prim.ListConstruct %int4_3150, %4630, %int4096_3151 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5337 = torch.aten.view %5335, %5336 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3152 = torch.constant.int 4
    %5338 = torch.aten.mul.int %int4_3152, %4645 : !torch.int, !torch.int -> !torch.int
    %int512_3153 = torch.constant.int 512
    %5339 = torch.prim.ListConstruct %5338, %int512_3153 : (!torch.int, !torch.int) -> !torch.list<int>
    %5340 = torch.aten.view %5299, %5339 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5340, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5341 = torch.aten.mm %5340, %5315 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5341, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3154 = torch.constant.int 4
    %int4096_3155 = torch.constant.int 4096
    %5342 = torch.prim.ListConstruct %int4_3154, %4645, %int4096_3155 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5343 = torch.aten.view %5341, %5342 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3156 = torch.constant.int 4
    %5344 = torch.aten.mul.int %int4_3156, %4660 : !torch.int, !torch.int -> !torch.int
    %int512_3157 = torch.constant.int 512
    %5345 = torch.prim.ListConstruct %5344, %int512_3157 : (!torch.int, !torch.int) -> !torch.list<int>
    %5346 = torch.aten.view %5301, %5345 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5346, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5347 = torch.aten.mm %5346, %5317 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5347, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3158 = torch.constant.int 4
    %int4096_3159 = torch.constant.int 4096
    %5348 = torch.prim.ListConstruct %int4_3158, %4660, %int4096_3159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5349 = torch.aten.view %5347, %5348 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3160 = torch.constant.int 4
    %5350 = torch.aten.mul.int %int4_3160, %4675 : !torch.int, !torch.int -> !torch.int
    %int512_3161 = torch.constant.int 512
    %5351 = torch.prim.ListConstruct %5350, %int512_3161 : (!torch.int, !torch.int) -> !torch.list<int>
    %5352 = torch.aten.view %5303, %5351 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5352, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5353 = torch.aten.mm %5352, %5319 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5353, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3162 = torch.constant.int 4
    %int4096_3163 = torch.constant.int 4096
    %5354 = torch.prim.ListConstruct %int4_3162, %4675, %int4096_3163 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5355 = torch.aten.view %5353, %5354 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3164 = torch.constant.int 4
    %5356 = torch.aten.mul.int %int4_3164, %4690 : !torch.int, !torch.int -> !torch.int
    %int512_3165 = torch.constant.int 512
    %5357 = torch.prim.ListConstruct %5356, %int512_3165 : (!torch.int, !torch.int) -> !torch.list<int>
    %5358 = torch.aten.view %5305, %5357 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5358, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5359 = torch.aten.mm %5358, %5321 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5359, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3166 = torch.constant.int 4
    %int4096_3167 = torch.constant.int 4096
    %5360 = torch.prim.ListConstruct %int4_3166, %4690, %int4096_3167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5361 = torch.aten.view %5359, %5360 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3168 = torch.constant.int 4
    %5362 = torch.aten.mul.int %int4_3168, %4705 : !torch.int, !torch.int -> !torch.int
    %int512_3169 = torch.constant.int 512
    %5363 = torch.prim.ListConstruct %5362, %int512_3169 : (!torch.int, !torch.int) -> !torch.list<int>
    %5364 = torch.aten.view %5307, %5363 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5364, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5365 = torch.aten.mm %5364, %5323 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5365, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3170 = torch.constant.int 4
    %int4096_3171 = torch.constant.int 4096
    %5366 = torch.prim.ListConstruct %int4_3170, %4705, %int4096_3171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5367 = torch.aten.view %5365, %5366 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_3172 = torch.constant.int 4
    %5368 = torch.aten.mul.int %int4_3172, %4720 : !torch.int, !torch.int -> !torch.int
    %int512_3173 = torch.constant.int 512
    %5369 = torch.prim.ListConstruct %5368, %int512_3173 : (!torch.int, !torch.int) -> !torch.list<int>
    %5370 = torch.aten.view %5309, %5369 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %5370, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %5371 = torch.aten.mm %5370, %5325 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5371, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3174 = torch.constant.int 4
    %int4096_3175 = torch.constant.int 4096
    %5372 = torch.prim.ListConstruct %int4_3174, %4720, %int4096_3175 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5373 = torch.aten.view %5371, %5372 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5374 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3176 = arith.constant 1 : index
    %dim_3177 = tensor.dim %5374, %c1_3176 : tensor<4x?x4096xf16>
    %5375 = flow.tensor.transfer %5374 : tensor<4x?x4096xf16>{%dim_3177} to #hal.device.promise<@__device_0>
    %5376 = torch_c.from_builtin_tensor %5375 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5377 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3178 = arith.constant 1 : index
    %dim_3179 = tensor.dim %5377, %c1_3178 : tensor<4x?x4096xf16>
    %5378 = flow.tensor.transfer %5377 : tensor<4x?x4096xf16>{%dim_3179} to #hal.device.promise<@__device_0>
    %5379 = torch_c.from_builtin_tensor %5378 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5380 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3180 = arith.constant 1 : index
    %dim_3181 = tensor.dim %5380, %c1_3180 : tensor<4x?x4096xf16>
    %5381 = flow.tensor.transfer %5380 : tensor<4x?x4096xf16>{%dim_3181} to #hal.device.promise<@__device_0>
    %5382 = torch_c.from_builtin_tensor %5381 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5383 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3182 = arith.constant 1 : index
    %dim_3183 = tensor.dim %5383, %c1_3182 : tensor<4x?x4096xf16>
    %5384 = flow.tensor.transfer %5383 : tensor<4x?x4096xf16>{%dim_3183} to #hal.device.promise<@__device_0>
    %5385 = torch_c.from_builtin_tensor %5384 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5386 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3184 = arith.constant 1 : index
    %dim_3185 = tensor.dim %5386, %c1_3184 : tensor<4x?x4096xf16>
    %5387 = flow.tensor.transfer %5386 : tensor<4x?x4096xf16>{%dim_3185} to #hal.device.promise<@__device_0>
    %5388 = torch_c.from_builtin_tensor %5387 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5389 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3186 = arith.constant 1 : index
    %dim_3187 = tensor.dim %5389, %c1_3186 : tensor<4x?x4096xf16>
    %5390 = flow.tensor.transfer %5389 : tensor<4x?x4096xf16>{%dim_3187} to #hal.device.promise<@__device_0>
    %5391 = torch_c.from_builtin_tensor %5390 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5392 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3188 = arith.constant 1 : index
    %dim_3189 = tensor.dim %5392, %c1_3188 : tensor<4x?x4096xf16>
    %5393 = flow.tensor.transfer %5392 : tensor<4x?x4096xf16>{%dim_3189} to #hal.device.promise<@__device_0>
    %5394 = torch_c.from_builtin_tensor %5393 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3190 = torch.constant.int 1
    %5395 = torch.aten.add.Tensor %5331, %5376, %int1_3190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3191 = torch.constant.int 1
    %5396 = torch.aten.add.Tensor %5395, %5379, %int1_3191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3192 = torch.constant.int 1
    %5397 = torch.aten.add.Tensor %5396, %5382, %int1_3192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3193 = torch.constant.int 1
    %5398 = torch.aten.add.Tensor %5397, %5385, %int1_3193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3194 = torch.constant.int 1
    %5399 = torch.aten.add.Tensor %5398, %5388, %int1_3194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3195 = torch.constant.int 1
    %5400 = torch.aten.add.Tensor %5399, %5391, %int1_3195 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3196 = torch.constant.int 1
    %5401 = torch.aten.add.Tensor %5400, %5394, %int1_3196 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5402 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3197 = arith.constant 1 : index
    %dim_3198 = tensor.dim %5402, %c1_3197 : tensor<4x?x4096xf16>
    %5403 = flow.tensor.transfer %5402 : tensor<4x?x4096xf16>{%dim_3198} to #hal.device.promise<@__device_1>
    %5404 = torch_c.from_builtin_tensor %5403 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5405 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3199 = arith.constant 1 : index
    %dim_3200 = tensor.dim %5405, %c1_3199 : tensor<4x?x4096xf16>
    %5406 = flow.tensor.transfer %5405 : tensor<4x?x4096xf16>{%dim_3200} to #hal.device.promise<@__device_1>
    %5407 = torch_c.from_builtin_tensor %5406 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5408 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3201 = arith.constant 1 : index
    %dim_3202 = tensor.dim %5408, %c1_3201 : tensor<4x?x4096xf16>
    %5409 = flow.tensor.transfer %5408 : tensor<4x?x4096xf16>{%dim_3202} to #hal.device.promise<@__device_1>
    %5410 = torch_c.from_builtin_tensor %5409 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5411 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3203 = arith.constant 1 : index
    %dim_3204 = tensor.dim %5411, %c1_3203 : tensor<4x?x4096xf16>
    %5412 = flow.tensor.transfer %5411 : tensor<4x?x4096xf16>{%dim_3204} to #hal.device.promise<@__device_1>
    %5413 = torch_c.from_builtin_tensor %5412 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5414 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3205 = arith.constant 1 : index
    %dim_3206 = tensor.dim %5414, %c1_3205 : tensor<4x?x4096xf16>
    %5415 = flow.tensor.transfer %5414 : tensor<4x?x4096xf16>{%dim_3206} to #hal.device.promise<@__device_1>
    %5416 = torch_c.from_builtin_tensor %5415 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5417 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3207 = arith.constant 1 : index
    %dim_3208 = tensor.dim %5417, %c1_3207 : tensor<4x?x4096xf16>
    %5418 = flow.tensor.transfer %5417 : tensor<4x?x4096xf16>{%dim_3208} to #hal.device.promise<@__device_1>
    %5419 = torch_c.from_builtin_tensor %5418 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5420 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3209 = arith.constant 1 : index
    %dim_3210 = tensor.dim %5420, %c1_3209 : tensor<4x?x4096xf16>
    %5421 = flow.tensor.transfer %5420 : tensor<4x?x4096xf16>{%dim_3210} to #hal.device.promise<@__device_1>
    %5422 = torch_c.from_builtin_tensor %5421 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3211 = torch.constant.int 1
    %5423 = torch.aten.add.Tensor %5404, %5337, %int1_3211 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3212 = torch.constant.int 1
    %5424 = torch.aten.add.Tensor %5423, %5407, %int1_3212 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3213 = torch.constant.int 1
    %5425 = torch.aten.add.Tensor %5424, %5410, %int1_3213 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3214 = torch.constant.int 1
    %5426 = torch.aten.add.Tensor %5425, %5413, %int1_3214 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3215 = torch.constant.int 1
    %5427 = torch.aten.add.Tensor %5426, %5416, %int1_3215 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3216 = torch.constant.int 1
    %5428 = torch.aten.add.Tensor %5427, %5419, %int1_3216 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3217 = torch.constant.int 1
    %5429 = torch.aten.add.Tensor %5428, %5422, %int1_3217 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5430 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3218 = arith.constant 1 : index
    %dim_3219 = tensor.dim %5430, %c1_3218 : tensor<4x?x4096xf16>
    %5431 = flow.tensor.transfer %5430 : tensor<4x?x4096xf16>{%dim_3219} to #hal.device.promise<@__device_2>
    %5432 = torch_c.from_builtin_tensor %5431 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5433 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3220 = arith.constant 1 : index
    %dim_3221 = tensor.dim %5433, %c1_3220 : tensor<4x?x4096xf16>
    %5434 = flow.tensor.transfer %5433 : tensor<4x?x4096xf16>{%dim_3221} to #hal.device.promise<@__device_2>
    %5435 = torch_c.from_builtin_tensor %5434 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5436 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3222 = arith.constant 1 : index
    %dim_3223 = tensor.dim %5436, %c1_3222 : tensor<4x?x4096xf16>
    %5437 = flow.tensor.transfer %5436 : tensor<4x?x4096xf16>{%dim_3223} to #hal.device.promise<@__device_2>
    %5438 = torch_c.from_builtin_tensor %5437 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5439 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3224 = arith.constant 1 : index
    %dim_3225 = tensor.dim %5439, %c1_3224 : tensor<4x?x4096xf16>
    %5440 = flow.tensor.transfer %5439 : tensor<4x?x4096xf16>{%dim_3225} to #hal.device.promise<@__device_2>
    %5441 = torch_c.from_builtin_tensor %5440 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5442 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3226 = arith.constant 1 : index
    %dim_3227 = tensor.dim %5442, %c1_3226 : tensor<4x?x4096xf16>
    %5443 = flow.tensor.transfer %5442 : tensor<4x?x4096xf16>{%dim_3227} to #hal.device.promise<@__device_2>
    %5444 = torch_c.from_builtin_tensor %5443 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5445 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3228 = arith.constant 1 : index
    %dim_3229 = tensor.dim %5445, %c1_3228 : tensor<4x?x4096xf16>
    %5446 = flow.tensor.transfer %5445 : tensor<4x?x4096xf16>{%dim_3229} to #hal.device.promise<@__device_2>
    %5447 = torch_c.from_builtin_tensor %5446 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5448 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3230 = arith.constant 1 : index
    %dim_3231 = tensor.dim %5448, %c1_3230 : tensor<4x?x4096xf16>
    %5449 = flow.tensor.transfer %5448 : tensor<4x?x4096xf16>{%dim_3231} to #hal.device.promise<@__device_2>
    %5450 = torch_c.from_builtin_tensor %5449 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3232 = torch.constant.int 1
    %5451 = torch.aten.add.Tensor %5432, %5435, %int1_3232 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3233 = torch.constant.int 1
    %5452 = torch.aten.add.Tensor %5451, %5343, %int1_3233 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3234 = torch.constant.int 1
    %5453 = torch.aten.add.Tensor %5452, %5438, %int1_3234 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3235 = torch.constant.int 1
    %5454 = torch.aten.add.Tensor %5453, %5441, %int1_3235 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3236 = torch.constant.int 1
    %5455 = torch.aten.add.Tensor %5454, %5444, %int1_3236 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3237 = torch.constant.int 1
    %5456 = torch.aten.add.Tensor %5455, %5447, %int1_3237 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3238 = torch.constant.int 1
    %5457 = torch.aten.add.Tensor %5456, %5450, %int1_3238 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5458 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3239 = arith.constant 1 : index
    %dim_3240 = tensor.dim %5458, %c1_3239 : tensor<4x?x4096xf16>
    %5459 = flow.tensor.transfer %5458 : tensor<4x?x4096xf16>{%dim_3240} to #hal.device.promise<@__device_3>
    %5460 = torch_c.from_builtin_tensor %5459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5461 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3241 = arith.constant 1 : index
    %dim_3242 = tensor.dim %5461, %c1_3241 : tensor<4x?x4096xf16>
    %5462 = flow.tensor.transfer %5461 : tensor<4x?x4096xf16>{%dim_3242} to #hal.device.promise<@__device_3>
    %5463 = torch_c.from_builtin_tensor %5462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5464 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3243 = arith.constant 1 : index
    %dim_3244 = tensor.dim %5464, %c1_3243 : tensor<4x?x4096xf16>
    %5465 = flow.tensor.transfer %5464 : tensor<4x?x4096xf16>{%dim_3244} to #hal.device.promise<@__device_3>
    %5466 = torch_c.from_builtin_tensor %5465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5467 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3245 = arith.constant 1 : index
    %dim_3246 = tensor.dim %5467, %c1_3245 : tensor<4x?x4096xf16>
    %5468 = flow.tensor.transfer %5467 : tensor<4x?x4096xf16>{%dim_3246} to #hal.device.promise<@__device_3>
    %5469 = torch_c.from_builtin_tensor %5468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5470 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3247 = arith.constant 1 : index
    %dim_3248 = tensor.dim %5470, %c1_3247 : tensor<4x?x4096xf16>
    %5471 = flow.tensor.transfer %5470 : tensor<4x?x4096xf16>{%dim_3248} to #hal.device.promise<@__device_3>
    %5472 = torch_c.from_builtin_tensor %5471 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5473 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3249 = arith.constant 1 : index
    %dim_3250 = tensor.dim %5473, %c1_3249 : tensor<4x?x4096xf16>
    %5474 = flow.tensor.transfer %5473 : tensor<4x?x4096xf16>{%dim_3250} to #hal.device.promise<@__device_3>
    %5475 = torch_c.from_builtin_tensor %5474 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5476 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3251 = arith.constant 1 : index
    %dim_3252 = tensor.dim %5476, %c1_3251 : tensor<4x?x4096xf16>
    %5477 = flow.tensor.transfer %5476 : tensor<4x?x4096xf16>{%dim_3252} to #hal.device.promise<@__device_3>
    %5478 = torch_c.from_builtin_tensor %5477 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3253 = torch.constant.int 1
    %5479 = torch.aten.add.Tensor %5460, %5463, %int1_3253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3254 = torch.constant.int 1
    %5480 = torch.aten.add.Tensor %5479, %5466, %int1_3254 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3255 = torch.constant.int 1
    %5481 = torch.aten.add.Tensor %5480, %5349, %int1_3255 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3256 = torch.constant.int 1
    %5482 = torch.aten.add.Tensor %5481, %5469, %int1_3256 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3257 = torch.constant.int 1
    %5483 = torch.aten.add.Tensor %5482, %5472, %int1_3257 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3258 = torch.constant.int 1
    %5484 = torch.aten.add.Tensor %5483, %5475, %int1_3258 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3259 = torch.constant.int 1
    %5485 = torch.aten.add.Tensor %5484, %5478, %int1_3259 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5486 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3260 = arith.constant 1 : index
    %dim_3261 = tensor.dim %5486, %c1_3260 : tensor<4x?x4096xf16>
    %5487 = flow.tensor.transfer %5486 : tensor<4x?x4096xf16>{%dim_3261} to #hal.device.promise<@__device_4>
    %5488 = torch_c.from_builtin_tensor %5487 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5489 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3262 = arith.constant 1 : index
    %dim_3263 = tensor.dim %5489, %c1_3262 : tensor<4x?x4096xf16>
    %5490 = flow.tensor.transfer %5489 : tensor<4x?x4096xf16>{%dim_3263} to #hal.device.promise<@__device_4>
    %5491 = torch_c.from_builtin_tensor %5490 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5492 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3264 = arith.constant 1 : index
    %dim_3265 = tensor.dim %5492, %c1_3264 : tensor<4x?x4096xf16>
    %5493 = flow.tensor.transfer %5492 : tensor<4x?x4096xf16>{%dim_3265} to #hal.device.promise<@__device_4>
    %5494 = torch_c.from_builtin_tensor %5493 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5495 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3266 = arith.constant 1 : index
    %dim_3267 = tensor.dim %5495, %c1_3266 : tensor<4x?x4096xf16>
    %5496 = flow.tensor.transfer %5495 : tensor<4x?x4096xf16>{%dim_3267} to #hal.device.promise<@__device_4>
    %5497 = torch_c.from_builtin_tensor %5496 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5498 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3268 = arith.constant 1 : index
    %dim_3269 = tensor.dim %5498, %c1_3268 : tensor<4x?x4096xf16>
    %5499 = flow.tensor.transfer %5498 : tensor<4x?x4096xf16>{%dim_3269} to #hal.device.promise<@__device_4>
    %5500 = torch_c.from_builtin_tensor %5499 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5501 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3270 = arith.constant 1 : index
    %dim_3271 = tensor.dim %5501, %c1_3270 : tensor<4x?x4096xf16>
    %5502 = flow.tensor.transfer %5501 : tensor<4x?x4096xf16>{%dim_3271} to #hal.device.promise<@__device_4>
    %5503 = torch_c.from_builtin_tensor %5502 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5504 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3272 = arith.constant 1 : index
    %dim_3273 = tensor.dim %5504, %c1_3272 : tensor<4x?x4096xf16>
    %5505 = flow.tensor.transfer %5504 : tensor<4x?x4096xf16>{%dim_3273} to #hal.device.promise<@__device_4>
    %5506 = torch_c.from_builtin_tensor %5505 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3274 = torch.constant.int 1
    %5507 = torch.aten.add.Tensor %5488, %5491, %int1_3274 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3275 = torch.constant.int 1
    %5508 = torch.aten.add.Tensor %5507, %5494, %int1_3275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3276 = torch.constant.int 1
    %5509 = torch.aten.add.Tensor %5508, %5497, %int1_3276 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3277 = torch.constant.int 1
    %5510 = torch.aten.add.Tensor %5509, %5355, %int1_3277 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3278 = torch.constant.int 1
    %5511 = torch.aten.add.Tensor %5510, %5500, %int1_3278 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3279 = torch.constant.int 1
    %5512 = torch.aten.add.Tensor %5511, %5503, %int1_3279 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3280 = torch.constant.int 1
    %5513 = torch.aten.add.Tensor %5512, %5506, %int1_3280 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5514 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3281 = arith.constant 1 : index
    %dim_3282 = tensor.dim %5514, %c1_3281 : tensor<4x?x4096xf16>
    %5515 = flow.tensor.transfer %5514 : tensor<4x?x4096xf16>{%dim_3282} to #hal.device.promise<@__device_5>
    %5516 = torch_c.from_builtin_tensor %5515 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5517 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3283 = arith.constant 1 : index
    %dim_3284 = tensor.dim %5517, %c1_3283 : tensor<4x?x4096xf16>
    %5518 = flow.tensor.transfer %5517 : tensor<4x?x4096xf16>{%dim_3284} to #hal.device.promise<@__device_5>
    %5519 = torch_c.from_builtin_tensor %5518 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5520 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3285 = arith.constant 1 : index
    %dim_3286 = tensor.dim %5520, %c1_3285 : tensor<4x?x4096xf16>
    %5521 = flow.tensor.transfer %5520 : tensor<4x?x4096xf16>{%dim_3286} to #hal.device.promise<@__device_5>
    %5522 = torch_c.from_builtin_tensor %5521 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5523 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3287 = arith.constant 1 : index
    %dim_3288 = tensor.dim %5523, %c1_3287 : tensor<4x?x4096xf16>
    %5524 = flow.tensor.transfer %5523 : tensor<4x?x4096xf16>{%dim_3288} to #hal.device.promise<@__device_5>
    %5525 = torch_c.from_builtin_tensor %5524 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5526 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3289 = arith.constant 1 : index
    %dim_3290 = tensor.dim %5526, %c1_3289 : tensor<4x?x4096xf16>
    %5527 = flow.tensor.transfer %5526 : tensor<4x?x4096xf16>{%dim_3290} to #hal.device.promise<@__device_5>
    %5528 = torch_c.from_builtin_tensor %5527 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5529 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3291 = arith.constant 1 : index
    %dim_3292 = tensor.dim %5529, %c1_3291 : tensor<4x?x4096xf16>
    %5530 = flow.tensor.transfer %5529 : tensor<4x?x4096xf16>{%dim_3292} to #hal.device.promise<@__device_5>
    %5531 = torch_c.from_builtin_tensor %5530 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5532 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3293 = arith.constant 1 : index
    %dim_3294 = tensor.dim %5532, %c1_3293 : tensor<4x?x4096xf16>
    %5533 = flow.tensor.transfer %5532 : tensor<4x?x4096xf16>{%dim_3294} to #hal.device.promise<@__device_5>
    %5534 = torch_c.from_builtin_tensor %5533 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3295 = torch.constant.int 1
    %5535 = torch.aten.add.Tensor %5516, %5519, %int1_3295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3296 = torch.constant.int 1
    %5536 = torch.aten.add.Tensor %5535, %5522, %int1_3296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3297 = torch.constant.int 1
    %5537 = torch.aten.add.Tensor %5536, %5525, %int1_3297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3298 = torch.constant.int 1
    %5538 = torch.aten.add.Tensor %5537, %5528, %int1_3298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3299 = torch.constant.int 1
    %5539 = torch.aten.add.Tensor %5538, %5361, %int1_3299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3300 = torch.constant.int 1
    %5540 = torch.aten.add.Tensor %5539, %5531, %int1_3300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3301 = torch.constant.int 1
    %5541 = torch.aten.add.Tensor %5540, %5534, %int1_3301 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5542 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3302 = arith.constant 1 : index
    %dim_3303 = tensor.dim %5542, %c1_3302 : tensor<4x?x4096xf16>
    %5543 = flow.tensor.transfer %5542 : tensor<4x?x4096xf16>{%dim_3303} to #hal.device.promise<@__device_6>
    %5544 = torch_c.from_builtin_tensor %5543 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5545 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3304 = arith.constant 1 : index
    %dim_3305 = tensor.dim %5545, %c1_3304 : tensor<4x?x4096xf16>
    %5546 = flow.tensor.transfer %5545 : tensor<4x?x4096xf16>{%dim_3305} to #hal.device.promise<@__device_6>
    %5547 = torch_c.from_builtin_tensor %5546 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5548 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3306 = arith.constant 1 : index
    %dim_3307 = tensor.dim %5548, %c1_3306 : tensor<4x?x4096xf16>
    %5549 = flow.tensor.transfer %5548 : tensor<4x?x4096xf16>{%dim_3307} to #hal.device.promise<@__device_6>
    %5550 = torch_c.from_builtin_tensor %5549 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5551 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3308 = arith.constant 1 : index
    %dim_3309 = tensor.dim %5551, %c1_3308 : tensor<4x?x4096xf16>
    %5552 = flow.tensor.transfer %5551 : tensor<4x?x4096xf16>{%dim_3309} to #hal.device.promise<@__device_6>
    %5553 = torch_c.from_builtin_tensor %5552 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5554 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3310 = arith.constant 1 : index
    %dim_3311 = tensor.dim %5554, %c1_3310 : tensor<4x?x4096xf16>
    %5555 = flow.tensor.transfer %5554 : tensor<4x?x4096xf16>{%dim_3311} to #hal.device.promise<@__device_6>
    %5556 = torch_c.from_builtin_tensor %5555 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5557 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3312 = arith.constant 1 : index
    %dim_3313 = tensor.dim %5557, %c1_3312 : tensor<4x?x4096xf16>
    %5558 = flow.tensor.transfer %5557 : tensor<4x?x4096xf16>{%dim_3313} to #hal.device.promise<@__device_6>
    %5559 = torch_c.from_builtin_tensor %5558 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5560 = torch_c.to_builtin_tensor %5373 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3314 = arith.constant 1 : index
    %dim_3315 = tensor.dim %5560, %c1_3314 : tensor<4x?x4096xf16>
    %5561 = flow.tensor.transfer %5560 : tensor<4x?x4096xf16>{%dim_3315} to #hal.device.promise<@__device_6>
    %5562 = torch_c.from_builtin_tensor %5561 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3316 = torch.constant.int 1
    %5563 = torch.aten.add.Tensor %5544, %5547, %int1_3316 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3317 = torch.constant.int 1
    %5564 = torch.aten.add.Tensor %5563, %5550, %int1_3317 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3318 = torch.constant.int 1
    %5565 = torch.aten.add.Tensor %5564, %5553, %int1_3318 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3319 = torch.constant.int 1
    %5566 = torch.aten.add.Tensor %5565, %5556, %int1_3319 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3320 = torch.constant.int 1
    %5567 = torch.aten.add.Tensor %5566, %5559, %int1_3320 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3321 = torch.constant.int 1
    %5568 = torch.aten.add.Tensor %5567, %5367, %int1_3321 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3322 = torch.constant.int 1
    %5569 = torch.aten.add.Tensor %5568, %5562, %int1_3322 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5570 = torch_c.to_builtin_tensor %5331 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3323 = arith.constant 1 : index
    %dim_3324 = tensor.dim %5570, %c1_3323 : tensor<4x?x4096xf16>
    %5571 = flow.tensor.transfer %5570 : tensor<4x?x4096xf16>{%dim_3324} to #hal.device.promise<@__device_7>
    %5572 = torch_c.from_builtin_tensor %5571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5573 = torch_c.to_builtin_tensor %5337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3325 = arith.constant 1 : index
    %dim_3326 = tensor.dim %5573, %c1_3325 : tensor<4x?x4096xf16>
    %5574 = flow.tensor.transfer %5573 : tensor<4x?x4096xf16>{%dim_3326} to #hal.device.promise<@__device_7>
    %5575 = torch_c.from_builtin_tensor %5574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5576 = torch_c.to_builtin_tensor %5343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3327 = arith.constant 1 : index
    %dim_3328 = tensor.dim %5576, %c1_3327 : tensor<4x?x4096xf16>
    %5577 = flow.tensor.transfer %5576 : tensor<4x?x4096xf16>{%dim_3328} to #hal.device.promise<@__device_7>
    %5578 = torch_c.from_builtin_tensor %5577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5579 = torch_c.to_builtin_tensor %5349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3329 = arith.constant 1 : index
    %dim_3330 = tensor.dim %5579, %c1_3329 : tensor<4x?x4096xf16>
    %5580 = flow.tensor.transfer %5579 : tensor<4x?x4096xf16>{%dim_3330} to #hal.device.promise<@__device_7>
    %5581 = torch_c.from_builtin_tensor %5580 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5582 = torch_c.to_builtin_tensor %5355 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3331 = arith.constant 1 : index
    %dim_3332 = tensor.dim %5582, %c1_3331 : tensor<4x?x4096xf16>
    %5583 = flow.tensor.transfer %5582 : tensor<4x?x4096xf16>{%dim_3332} to #hal.device.promise<@__device_7>
    %5584 = torch_c.from_builtin_tensor %5583 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5585 = torch_c.to_builtin_tensor %5361 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3333 = arith.constant 1 : index
    %dim_3334 = tensor.dim %5585, %c1_3333 : tensor<4x?x4096xf16>
    %5586 = flow.tensor.transfer %5585 : tensor<4x?x4096xf16>{%dim_3334} to #hal.device.promise<@__device_7>
    %5587 = torch_c.from_builtin_tensor %5586 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5588 = torch_c.to_builtin_tensor %5367 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3335 = arith.constant 1 : index
    %dim_3336 = tensor.dim %5588, %c1_3335 : tensor<4x?x4096xf16>
    %5589 = flow.tensor.transfer %5588 : tensor<4x?x4096xf16>{%dim_3336} to #hal.device.promise<@__device_7>
    %5590 = torch_c.from_builtin_tensor %5589 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3337 = torch.constant.int 1
    %5591 = torch.aten.add.Tensor %5572, %5575, %int1_3337 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3338 = torch.constant.int 1
    %5592 = torch.aten.add.Tensor %5591, %5578, %int1_3338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3339 = torch.constant.int 1
    %5593 = torch.aten.add.Tensor %5592, %5581, %int1_3339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3340 = torch.constant.int 1
    %5594 = torch.aten.add.Tensor %5593, %5584, %int1_3340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3341 = torch.constant.int 1
    %5595 = torch.aten.add.Tensor %5594, %5587, %int1_3341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3342 = torch.constant.int 1
    %5596 = torch.aten.add.Tensor %5595, %5590, %int1_3342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3343 = torch.constant.int 1
    %5597 = torch.aten.add.Tensor %5596, %5373, %int1_3343 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3344 = torch.constant.int 1
    %5598 = torch.aten.add.Tensor %4257, %5401, %int1_3344 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3345 = torch.constant.int 1
    %5599 = torch.aten.add.Tensor %4258, %5429, %int1_3345 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3346 = torch.constant.int 1
    %5600 = torch.aten.add.Tensor %4259, %5457, %int1_3346 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3347 = torch.constant.int 1
    %5601 = torch.aten.add.Tensor %4260, %5485, %int1_3347 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3348 = torch.constant.int 1
    %5602 = torch.aten.add.Tensor %4261, %5513, %int1_3348 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3349 = torch.constant.int 1
    %5603 = torch.aten.add.Tensor %4262, %5541, %int1_3349 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3350 = torch.constant.int 1
    %5604 = torch.aten.add.Tensor %4263, %5569, %int1_3350 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3351 = torch.constant.int 1
    %5605 = torch.aten.add.Tensor %4264, %5597, %int1_3351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3352 = torch.constant.int 6
    %5606 = torch.prims.convert_element_type %5598, %int6_3352 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3353 = torch.constant.int 6
    %5607 = torch.prims.convert_element_type %5599, %int6_3353 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3354 = torch.constant.int 6
    %5608 = torch.prims.convert_element_type %5600, %int6_3354 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3355 = torch.constant.int 6
    %5609 = torch.prims.convert_element_type %5601, %int6_3355 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3356 = torch.constant.int 6
    %5610 = torch.prims.convert_element_type %5602, %int6_3356 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3357 = torch.constant.int 6
    %5611 = torch.prims.convert_element_type %5603, %int6_3357 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3358 = torch.constant.int 6
    %5612 = torch.prims.convert_element_type %5604, %int6_3358 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3359 = torch.constant.int 6
    %5613 = torch.prims.convert_element_type %5605, %int6_3359 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3360 = torch.constant.int 2
    %5614 = torch.aten.pow.Tensor_Scalar %5606, %int2_3360 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3361 = torch.constant.int 2
    %5615 = torch.aten.pow.Tensor_Scalar %5607, %int2_3361 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3362 = torch.constant.int 2
    %5616 = torch.aten.pow.Tensor_Scalar %5608, %int2_3362 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3363 = torch.constant.int 2
    %5617 = torch.aten.pow.Tensor_Scalar %5609, %int2_3363 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3364 = torch.constant.int 2
    %5618 = torch.aten.pow.Tensor_Scalar %5610, %int2_3364 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3365 = torch.constant.int 2
    %5619 = torch.aten.pow.Tensor_Scalar %5611, %int2_3365 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3366 = torch.constant.int 2
    %5620 = torch.aten.pow.Tensor_Scalar %5612, %int2_3366 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3367 = torch.constant.int 2
    %5621 = torch.aten.pow.Tensor_Scalar %5613, %int2_3367 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3368 = torch.constant.int -1
    %5622 = torch.prim.ListConstruct %int-1_3368 : (!torch.int) -> !torch.list<int>
    %true_3369 = torch.constant.bool true
    %none_3370 = torch.constant.none
    %5623 = torch.aten.mean.dim %5614, %5622, %true_3369, %none_3370 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3371 = torch.constant.int -1
    %5624 = torch.prim.ListConstruct %int-1_3371 : (!torch.int) -> !torch.list<int>
    %true_3372 = torch.constant.bool true
    %none_3373 = torch.constant.none
    %5625 = torch.aten.mean.dim %5615, %5624, %true_3372, %none_3373 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3374 = torch.constant.int -1
    %5626 = torch.prim.ListConstruct %int-1_3374 : (!torch.int) -> !torch.list<int>
    %true_3375 = torch.constant.bool true
    %none_3376 = torch.constant.none
    %5627 = torch.aten.mean.dim %5616, %5626, %true_3375, %none_3376 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3377 = torch.constant.int -1
    %5628 = torch.prim.ListConstruct %int-1_3377 : (!torch.int) -> !torch.list<int>
    %true_3378 = torch.constant.bool true
    %none_3379 = torch.constant.none
    %5629 = torch.aten.mean.dim %5617, %5628, %true_3378, %none_3379 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3380 = torch.constant.int -1
    %5630 = torch.prim.ListConstruct %int-1_3380 : (!torch.int) -> !torch.list<int>
    %true_3381 = torch.constant.bool true
    %none_3382 = torch.constant.none
    %5631 = torch.aten.mean.dim %5618, %5630, %true_3381, %none_3382 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3383 = torch.constant.int -1
    %5632 = torch.prim.ListConstruct %int-1_3383 : (!torch.int) -> !torch.list<int>
    %true_3384 = torch.constant.bool true
    %none_3385 = torch.constant.none
    %5633 = torch.aten.mean.dim %5619, %5632, %true_3384, %none_3385 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3386 = torch.constant.int -1
    %5634 = torch.prim.ListConstruct %int-1_3386 : (!torch.int) -> !torch.list<int>
    %true_3387 = torch.constant.bool true
    %none_3388 = torch.constant.none
    %5635 = torch.aten.mean.dim %5620, %5634, %true_3387, %none_3388 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3389 = torch.constant.int -1
    %5636 = torch.prim.ListConstruct %int-1_3389 : (!torch.int) -> !torch.list<int>
    %true_3390 = torch.constant.bool true
    %none_3391 = torch.constant.none
    %5637 = torch.aten.mean.dim %5621, %5636, %true_3390, %none_3391 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3392 = torch.constant.float 9.9999997473787516E-6
    %int1_3393 = torch.constant.int 1
    %5638 = torch.aten.add.Scalar %5623, %float9.999990e-06_3392, %int1_3393 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3394 = torch.constant.float 9.9999997473787516E-6
    %int1_3395 = torch.constant.int 1
    %5639 = torch.aten.add.Scalar %5625, %float9.999990e-06_3394, %int1_3395 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3396 = torch.constant.float 9.9999997473787516E-6
    %int1_3397 = torch.constant.int 1
    %5640 = torch.aten.add.Scalar %5627, %float9.999990e-06_3396, %int1_3397 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3398 = torch.constant.float 9.9999997473787516E-6
    %int1_3399 = torch.constant.int 1
    %5641 = torch.aten.add.Scalar %5629, %float9.999990e-06_3398, %int1_3399 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3400 = torch.constant.float 9.9999997473787516E-6
    %int1_3401 = torch.constant.int 1
    %5642 = torch.aten.add.Scalar %5631, %float9.999990e-06_3400, %int1_3401 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3402 = torch.constant.float 9.9999997473787516E-6
    %int1_3403 = torch.constant.int 1
    %5643 = torch.aten.add.Scalar %5633, %float9.999990e-06_3402, %int1_3403 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3404 = torch.constant.float 9.9999997473787516E-6
    %int1_3405 = torch.constant.int 1
    %5644 = torch.aten.add.Scalar %5635, %float9.999990e-06_3404, %int1_3405 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3406 = torch.constant.float 9.9999997473787516E-6
    %int1_3407 = torch.constant.int 1
    %5645 = torch.aten.add.Scalar %5637, %float9.999990e-06_3406, %int1_3407 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5646 = torch.aten.rsqrt %5638 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5647 = torch.aten.rsqrt %5639 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5648 = torch.aten.rsqrt %5640 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5649 = torch.aten.rsqrt %5641 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5650 = torch.aten.rsqrt %5642 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5651 = torch.aten.rsqrt %5643 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5652 = torch.aten.rsqrt %5644 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5653 = torch.aten.rsqrt %5645 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %5653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %5654 = torch.aten.mul.Tensor %5606, %5646 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5655 = torch.aten.mul.Tensor %5607, %5647 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5656 = torch.aten.mul.Tensor %5608, %5648 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5657 = torch.aten.mul.Tensor %5609, %5649 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5658 = torch.aten.mul.Tensor %5610, %5650 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5659 = torch.aten.mul.Tensor %5611, %5651 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5660 = torch.aten.mul.Tensor %5612, %5652 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5661 = torch.aten.mul.Tensor %5613, %5653 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5662 = torch.aten.mul.Tensor %120, %5654 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5663 = torch.aten.mul.Tensor %121, %5655 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5664 = torch.aten.mul.Tensor %122, %5656 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5665 = torch.aten.mul.Tensor %123, %5657 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5666 = torch.aten.mul.Tensor %124, %5658 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5667 = torch.aten.mul.Tensor %125, %5659 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5668 = torch.aten.mul.Tensor %126, %5660 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %5669 = torch.aten.mul.Tensor %127, %5661 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %5669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3408 = torch.constant.int 5
    %5670 = torch.prims.convert_element_type %5662, %int5_3408 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3409 = torch.constant.int 5
    %5671 = torch.prims.convert_element_type %5663, %int5_3409 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3410 = torch.constant.int 5
    %5672 = torch.prims.convert_element_type %5664, %int5_3410 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3411 = torch.constant.int 5
    %5673 = torch.prims.convert_element_type %5665, %int5_3411 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3412 = torch.constant.int 5
    %5674 = torch.prims.convert_element_type %5666, %int5_3412 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3413 = torch.constant.int 5
    %5675 = torch.prims.convert_element_type %5667, %int5_3413 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3414 = torch.constant.int 5
    %5676 = torch.prims.convert_element_type %5668, %int5_3414 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3415 = torch.constant.int 5
    %5677 = torch.prims.convert_element_type %5669, %int5_3415 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3416 = torch.constant.int 1
    %int0_3417 = torch.constant.int 0
    %5678 = torch.prim.ListConstruct %int1_3416, %int0_3417 : (!torch.int, !torch.int) -> !torch.list<int>
    %5679 = torch.aten.permute %128, %5678 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3418 = torch.constant.int 1
    %int0_3419 = torch.constant.int 0
    %5680 = torch.prim.ListConstruct %int1_3418, %int0_3419 : (!torch.int, !torch.int) -> !torch.list<int>
    %5681 = torch.aten.permute %129, %5680 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3420 = torch.constant.int 1
    %int0_3421 = torch.constant.int 0
    %5682 = torch.prim.ListConstruct %int1_3420, %int0_3421 : (!torch.int, !torch.int) -> !torch.list<int>
    %5683 = torch.aten.permute %130, %5682 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3422 = torch.constant.int 1
    %int0_3423 = torch.constant.int 0
    %5684 = torch.prim.ListConstruct %int1_3422, %int0_3423 : (!torch.int, !torch.int) -> !torch.list<int>
    %5685 = torch.aten.permute %131, %5684 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3424 = torch.constant.int 1
    %int0_3425 = torch.constant.int 0
    %5686 = torch.prim.ListConstruct %int1_3424, %int0_3425 : (!torch.int, !torch.int) -> !torch.list<int>
    %5687 = torch.aten.permute %132, %5686 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3426 = torch.constant.int 1
    %int0_3427 = torch.constant.int 0
    %5688 = torch.prim.ListConstruct %int1_3426, %int0_3427 : (!torch.int, !torch.int) -> !torch.list<int>
    %5689 = torch.aten.permute %133, %5688 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3428 = torch.constant.int 1
    %int0_3429 = torch.constant.int 0
    %5690 = torch.prim.ListConstruct %int1_3428, %int0_3429 : (!torch.int, !torch.int) -> !torch.list<int>
    %5691 = torch.aten.permute %134, %5690 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3430 = torch.constant.int 1
    %int0_3431 = torch.constant.int 0
    %5692 = torch.prim.ListConstruct %int1_3430, %int0_3431 : (!torch.int, !torch.int) -> !torch.list<int>
    %5693 = torch.aten.permute %135, %5692 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_3432 = torch.constant.int 4
    %5694 = torch.aten.mul.int %int4_3432, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3433 = torch.constant.int 4096
    %5695 = torch.prim.ListConstruct %5694, %int4096_3433 : (!torch.int, !torch.int) -> !torch.list<int>
    %5696 = torch.aten.view %5670, %5695 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5696, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5697 = torch.aten.mm %5696, %5679 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5697, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3434 = torch.constant.int 4
    %int1792_3435 = torch.constant.int 1792
    %5698 = torch.prim.ListConstruct %int4_3434, %2482, %int1792_3435 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5699 = torch.aten.view %5697, %5698 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3436 = torch.constant.int 4
    %5700 = torch.aten.mul.int %int4_3436, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3437 = torch.constant.int 4096
    %5701 = torch.prim.ListConstruct %5700, %int4096_3437 : (!torch.int, !torch.int) -> !torch.list<int>
    %5702 = torch.aten.view %5671, %5701 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5702, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5703 = torch.aten.mm %5702, %5681 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5703, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3438 = torch.constant.int 4
    %int1792_3439 = torch.constant.int 1792
    %5704 = torch.prim.ListConstruct %int4_3438, %2482, %int1792_3439 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5705 = torch.aten.view %5703, %5704 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3440 = torch.constant.int 4
    %5706 = torch.aten.mul.int %int4_3440, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3441 = torch.constant.int 4096
    %5707 = torch.prim.ListConstruct %5706, %int4096_3441 : (!torch.int, !torch.int) -> !torch.list<int>
    %5708 = torch.aten.view %5672, %5707 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5708, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5709 = torch.aten.mm %5708, %5683 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5709, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3442 = torch.constant.int 4
    %int1792_3443 = torch.constant.int 1792
    %5710 = torch.prim.ListConstruct %int4_3442, %2482, %int1792_3443 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5711 = torch.aten.view %5709, %5710 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3444 = torch.constant.int 4
    %5712 = torch.aten.mul.int %int4_3444, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3445 = torch.constant.int 4096
    %5713 = torch.prim.ListConstruct %5712, %int4096_3445 : (!torch.int, !torch.int) -> !torch.list<int>
    %5714 = torch.aten.view %5673, %5713 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5714, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5715 = torch.aten.mm %5714, %5685 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5715, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3446 = torch.constant.int 4
    %int1792_3447 = torch.constant.int 1792
    %5716 = torch.prim.ListConstruct %int4_3446, %2482, %int1792_3447 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5717 = torch.aten.view %5715, %5716 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3448 = torch.constant.int 4
    %5718 = torch.aten.mul.int %int4_3448, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3449 = torch.constant.int 4096
    %5719 = torch.prim.ListConstruct %5718, %int4096_3449 : (!torch.int, !torch.int) -> !torch.list<int>
    %5720 = torch.aten.view %5674, %5719 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5720, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5721 = torch.aten.mm %5720, %5687 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5721, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3450 = torch.constant.int 4
    %int1792_3451 = torch.constant.int 1792
    %5722 = torch.prim.ListConstruct %int4_3450, %2482, %int1792_3451 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5723 = torch.aten.view %5721, %5722 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3452 = torch.constant.int 4
    %5724 = torch.aten.mul.int %int4_3452, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3453 = torch.constant.int 4096
    %5725 = torch.prim.ListConstruct %5724, %int4096_3453 : (!torch.int, !torch.int) -> !torch.list<int>
    %5726 = torch.aten.view %5675, %5725 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5726, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5727 = torch.aten.mm %5726, %5689 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5727, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3454 = torch.constant.int 4
    %int1792_3455 = torch.constant.int 1792
    %5728 = torch.prim.ListConstruct %int4_3454, %2482, %int1792_3455 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5729 = torch.aten.view %5727, %5728 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3456 = torch.constant.int 4
    %5730 = torch.aten.mul.int %int4_3456, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3457 = torch.constant.int 4096
    %5731 = torch.prim.ListConstruct %5730, %int4096_3457 : (!torch.int, !torch.int) -> !torch.list<int>
    %5732 = torch.aten.view %5676, %5731 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5732, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5733 = torch.aten.mm %5732, %5691 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5733, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3458 = torch.constant.int 4
    %int1792_3459 = torch.constant.int 1792
    %5734 = torch.prim.ListConstruct %int4_3458, %2482, %int1792_3459 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5735 = torch.aten.view %5733, %5734 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3460 = torch.constant.int 4
    %5736 = torch.aten.mul.int %int4_3460, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3461 = torch.constant.int 4096
    %5737 = torch.prim.ListConstruct %5736, %int4096_3461 : (!torch.int, !torch.int) -> !torch.list<int>
    %5738 = torch.aten.view %5677, %5737 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5738, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5739 = torch.aten.mm %5738, %5693 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5739, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3462 = torch.constant.int 4
    %int1792_3463 = torch.constant.int 1792
    %5740 = torch.prim.ListConstruct %int4_3462, %2482, %int1792_3463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5741 = torch.aten.view %5739, %5740 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5742 = torch.aten.silu %5699 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5743 = torch.aten.silu %5705 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5744 = torch.aten.silu %5711 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5745 = torch.aten.silu %5717 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5746 = torch.aten.silu %5723 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5747 = torch.aten.silu %5729 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5748 = torch.aten.silu %5735 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5749 = torch.aten.silu %5741 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_3464 = torch.constant.int 1
    %int0_3465 = torch.constant.int 0
    %5750 = torch.prim.ListConstruct %int1_3464, %int0_3465 : (!torch.int, !torch.int) -> !torch.list<int>
    %5751 = torch.aten.permute %136, %5750 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3466 = torch.constant.int 1
    %int0_3467 = torch.constant.int 0
    %5752 = torch.prim.ListConstruct %int1_3466, %int0_3467 : (!torch.int, !torch.int) -> !torch.list<int>
    %5753 = torch.aten.permute %137, %5752 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3468 = torch.constant.int 1
    %int0_3469 = torch.constant.int 0
    %5754 = torch.prim.ListConstruct %int1_3468, %int0_3469 : (!torch.int, !torch.int) -> !torch.list<int>
    %5755 = torch.aten.permute %138, %5754 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3470 = torch.constant.int 1
    %int0_3471 = torch.constant.int 0
    %5756 = torch.prim.ListConstruct %int1_3470, %int0_3471 : (!torch.int, !torch.int) -> !torch.list<int>
    %5757 = torch.aten.permute %139, %5756 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3472 = torch.constant.int 1
    %int0_3473 = torch.constant.int 0
    %5758 = torch.prim.ListConstruct %int1_3472, %int0_3473 : (!torch.int, !torch.int) -> !torch.list<int>
    %5759 = torch.aten.permute %140, %5758 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3474 = torch.constant.int 1
    %int0_3475 = torch.constant.int 0
    %5760 = torch.prim.ListConstruct %int1_3474, %int0_3475 : (!torch.int, !torch.int) -> !torch.list<int>
    %5761 = torch.aten.permute %141, %5760 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3476 = torch.constant.int 1
    %int0_3477 = torch.constant.int 0
    %5762 = torch.prim.ListConstruct %int1_3476, %int0_3477 : (!torch.int, !torch.int) -> !torch.list<int>
    %5763 = torch.aten.permute %142, %5762 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_3478 = torch.constant.int 1
    %int0_3479 = torch.constant.int 0
    %5764 = torch.prim.ListConstruct %int1_3478, %int0_3479 : (!torch.int, !torch.int) -> !torch.list<int>
    %5765 = torch.aten.permute %143, %5764 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_3480 = torch.constant.int 4
    %5766 = torch.aten.mul.int %int4_3480, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3481 = torch.constant.int 4096
    %5767 = torch.prim.ListConstruct %5766, %int4096_3481 : (!torch.int, !torch.int) -> !torch.list<int>
    %5768 = torch.aten.view %5670, %5767 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5768, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5769 = torch.aten.mm %5768, %5751 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5769, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3482 = torch.constant.int 4
    %int1792_3483 = torch.constant.int 1792
    %5770 = torch.prim.ListConstruct %int4_3482, %2482, %int1792_3483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5771 = torch.aten.view %5769, %5770 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3484 = torch.constant.int 4
    %5772 = torch.aten.mul.int %int4_3484, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3485 = torch.constant.int 4096
    %5773 = torch.prim.ListConstruct %5772, %int4096_3485 : (!torch.int, !torch.int) -> !torch.list<int>
    %5774 = torch.aten.view %5671, %5773 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5774, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5775 = torch.aten.mm %5774, %5753 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5775, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3486 = torch.constant.int 4
    %int1792_3487 = torch.constant.int 1792
    %5776 = torch.prim.ListConstruct %int4_3486, %2482, %int1792_3487 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5777 = torch.aten.view %5775, %5776 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3488 = torch.constant.int 4
    %5778 = torch.aten.mul.int %int4_3488, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3489 = torch.constant.int 4096
    %5779 = torch.prim.ListConstruct %5778, %int4096_3489 : (!torch.int, !torch.int) -> !torch.list<int>
    %5780 = torch.aten.view %5672, %5779 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5780, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5781 = torch.aten.mm %5780, %5755 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5781, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3490 = torch.constant.int 4
    %int1792_3491 = torch.constant.int 1792
    %5782 = torch.prim.ListConstruct %int4_3490, %2482, %int1792_3491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5783 = torch.aten.view %5781, %5782 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3492 = torch.constant.int 4
    %5784 = torch.aten.mul.int %int4_3492, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3493 = torch.constant.int 4096
    %5785 = torch.prim.ListConstruct %5784, %int4096_3493 : (!torch.int, !torch.int) -> !torch.list<int>
    %5786 = torch.aten.view %5673, %5785 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5786, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5787 = torch.aten.mm %5786, %5757 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5787, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3494 = torch.constant.int 4
    %int1792_3495 = torch.constant.int 1792
    %5788 = torch.prim.ListConstruct %int4_3494, %2482, %int1792_3495 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5789 = torch.aten.view %5787, %5788 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3496 = torch.constant.int 4
    %5790 = torch.aten.mul.int %int4_3496, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3497 = torch.constant.int 4096
    %5791 = torch.prim.ListConstruct %5790, %int4096_3497 : (!torch.int, !torch.int) -> !torch.list<int>
    %5792 = torch.aten.view %5674, %5791 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5792, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5793 = torch.aten.mm %5792, %5759 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5793, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3498 = torch.constant.int 4
    %int1792_3499 = torch.constant.int 1792
    %5794 = torch.prim.ListConstruct %int4_3498, %2482, %int1792_3499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5795 = torch.aten.view %5793, %5794 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3500 = torch.constant.int 4
    %5796 = torch.aten.mul.int %int4_3500, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3501 = torch.constant.int 4096
    %5797 = torch.prim.ListConstruct %5796, %int4096_3501 : (!torch.int, !torch.int) -> !torch.list<int>
    %5798 = torch.aten.view %5675, %5797 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5798, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5799 = torch.aten.mm %5798, %5761 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5799, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3502 = torch.constant.int 4
    %int1792_3503 = torch.constant.int 1792
    %5800 = torch.prim.ListConstruct %int4_3502, %2482, %int1792_3503 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5801 = torch.aten.view %5799, %5800 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3504 = torch.constant.int 4
    %5802 = torch.aten.mul.int %int4_3504, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3505 = torch.constant.int 4096
    %5803 = torch.prim.ListConstruct %5802, %int4096_3505 : (!torch.int, !torch.int) -> !torch.list<int>
    %5804 = torch.aten.view %5676, %5803 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5804, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5805 = torch.aten.mm %5804, %5763 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5805, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3506 = torch.constant.int 4
    %int1792_3507 = torch.constant.int 1792
    %5806 = torch.prim.ListConstruct %int4_3506, %2482, %int1792_3507 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5807 = torch.aten.view %5805, %5806 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_3508 = torch.constant.int 4
    %5808 = torch.aten.mul.int %int4_3508, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3509 = torch.constant.int 4096
    %5809 = torch.prim.ListConstruct %5808, %int4096_3509 : (!torch.int, !torch.int) -> !torch.list<int>
    %5810 = torch.aten.view %5677, %5809 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5810, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %5811 = torch.aten.mm %5810, %5765 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5811, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_3510 = torch.constant.int 4
    %int1792_3511 = torch.constant.int 1792
    %5812 = torch.prim.ListConstruct %int4_3510, %2482, %int1792_3511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5813 = torch.aten.view %5811, %5812 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5814 = torch.aten.mul.Tensor %5742, %5771 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5815 = torch.aten.mul.Tensor %5743, %5777 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5816 = torch.aten.mul.Tensor %5744, %5783 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5817 = torch.aten.mul.Tensor %5745, %5789 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5818 = torch.aten.mul.Tensor %5746, %5795 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5819 = torch.aten.mul.Tensor %5747, %5801 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5820 = torch.aten.mul.Tensor %5748, %5807 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %5821 = torch.aten.mul.Tensor %5749, %5813 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %5821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_3512 = torch.constant.int 1
    %int0_3513 = torch.constant.int 0
    %5822 = torch.prim.ListConstruct %int1_3512, %int0_3513 : (!torch.int, !torch.int) -> !torch.list<int>
    %5823 = torch.aten.permute %144, %5822 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3514 = torch.constant.int 1
    %int0_3515 = torch.constant.int 0
    %5824 = torch.prim.ListConstruct %int1_3514, %int0_3515 : (!torch.int, !torch.int) -> !torch.list<int>
    %5825 = torch.aten.permute %145, %5824 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3516 = torch.constant.int 1
    %int0_3517 = torch.constant.int 0
    %5826 = torch.prim.ListConstruct %int1_3516, %int0_3517 : (!torch.int, !torch.int) -> !torch.list<int>
    %5827 = torch.aten.permute %146, %5826 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3518 = torch.constant.int 1
    %int0_3519 = torch.constant.int 0
    %5828 = torch.prim.ListConstruct %int1_3518, %int0_3519 : (!torch.int, !torch.int) -> !torch.list<int>
    %5829 = torch.aten.permute %147, %5828 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3520 = torch.constant.int 1
    %int0_3521 = torch.constant.int 0
    %5830 = torch.prim.ListConstruct %int1_3520, %int0_3521 : (!torch.int, !torch.int) -> !torch.list<int>
    %5831 = torch.aten.permute %148, %5830 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3522 = torch.constant.int 1
    %int0_3523 = torch.constant.int 0
    %5832 = torch.prim.ListConstruct %int1_3522, %int0_3523 : (!torch.int, !torch.int) -> !torch.list<int>
    %5833 = torch.aten.permute %149, %5832 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3524 = torch.constant.int 1
    %int0_3525 = torch.constant.int 0
    %5834 = torch.prim.ListConstruct %int1_3524, %int0_3525 : (!torch.int, !torch.int) -> !torch.list<int>
    %5835 = torch.aten.permute %150, %5834 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3526 = torch.constant.int 1
    %int0_3527 = torch.constant.int 0
    %5836 = torch.prim.ListConstruct %int1_3526, %int0_3527 : (!torch.int, !torch.int) -> !torch.list<int>
    %5837 = torch.aten.permute %151, %5836 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_3528 = torch.constant.int 1
    %5838 = torch.aten.size.int %5699, %int1_3528 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3529 = torch.constant.int 4
    %5839 = torch.aten.mul.int %int4_3529, %5838 : !torch.int, !torch.int -> !torch.int
    %int1792_3530 = torch.constant.int 1792
    %5840 = torch.prim.ListConstruct %5839, %int1792_3530 : (!torch.int, !torch.int) -> !torch.list<int>
    %5841 = torch.aten.view %5814, %5840 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5841, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5842 = torch.aten.mm %5841, %5823 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5842, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3531 = torch.constant.int 4
    %int4096_3532 = torch.constant.int 4096
    %5843 = torch.prim.ListConstruct %int4_3531, %5838, %int4096_3532 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5844 = torch.aten.view %5842, %5843 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3533 = torch.constant.int 1
    %5845 = torch.aten.size.int %5705, %int1_3533 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3534 = torch.constant.int 4
    %5846 = torch.aten.mul.int %int4_3534, %5845 : !torch.int, !torch.int -> !torch.int
    %int1792_3535 = torch.constant.int 1792
    %5847 = torch.prim.ListConstruct %5846, %int1792_3535 : (!torch.int, !torch.int) -> !torch.list<int>
    %5848 = torch.aten.view %5815, %5847 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5848, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5849 = torch.aten.mm %5848, %5825 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5849, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3536 = torch.constant.int 4
    %int4096_3537 = torch.constant.int 4096
    %5850 = torch.prim.ListConstruct %int4_3536, %5845, %int4096_3537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5851 = torch.aten.view %5849, %5850 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3538 = torch.constant.int 1
    %5852 = torch.aten.size.int %5711, %int1_3538 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3539 = torch.constant.int 4
    %5853 = torch.aten.mul.int %int4_3539, %5852 : !torch.int, !torch.int -> !torch.int
    %int1792_3540 = torch.constant.int 1792
    %5854 = torch.prim.ListConstruct %5853, %int1792_3540 : (!torch.int, !torch.int) -> !torch.list<int>
    %5855 = torch.aten.view %5816, %5854 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5855, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5856 = torch.aten.mm %5855, %5827 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5856, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3541 = torch.constant.int 4
    %int4096_3542 = torch.constant.int 4096
    %5857 = torch.prim.ListConstruct %int4_3541, %5852, %int4096_3542 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5858 = torch.aten.view %5856, %5857 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3543 = torch.constant.int 1
    %5859 = torch.aten.size.int %5717, %int1_3543 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3544 = torch.constant.int 4
    %5860 = torch.aten.mul.int %int4_3544, %5859 : !torch.int, !torch.int -> !torch.int
    %int1792_3545 = torch.constant.int 1792
    %5861 = torch.prim.ListConstruct %5860, %int1792_3545 : (!torch.int, !torch.int) -> !torch.list<int>
    %5862 = torch.aten.view %5817, %5861 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5862, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5863 = torch.aten.mm %5862, %5829 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5863, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3546 = torch.constant.int 4
    %int4096_3547 = torch.constant.int 4096
    %5864 = torch.prim.ListConstruct %int4_3546, %5859, %int4096_3547 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5865 = torch.aten.view %5863, %5864 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3548 = torch.constant.int 1
    %5866 = torch.aten.size.int %5723, %int1_3548 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3549 = torch.constant.int 4
    %5867 = torch.aten.mul.int %int4_3549, %5866 : !torch.int, !torch.int -> !torch.int
    %int1792_3550 = torch.constant.int 1792
    %5868 = torch.prim.ListConstruct %5867, %int1792_3550 : (!torch.int, !torch.int) -> !torch.list<int>
    %5869 = torch.aten.view %5818, %5868 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5869, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5870 = torch.aten.mm %5869, %5831 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5870, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3551 = torch.constant.int 4
    %int4096_3552 = torch.constant.int 4096
    %5871 = torch.prim.ListConstruct %int4_3551, %5866, %int4096_3552 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5872 = torch.aten.view %5870, %5871 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3553 = torch.constant.int 1
    %5873 = torch.aten.size.int %5729, %int1_3553 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3554 = torch.constant.int 4
    %5874 = torch.aten.mul.int %int4_3554, %5873 : !torch.int, !torch.int -> !torch.int
    %int1792_3555 = torch.constant.int 1792
    %5875 = torch.prim.ListConstruct %5874, %int1792_3555 : (!torch.int, !torch.int) -> !torch.list<int>
    %5876 = torch.aten.view %5819, %5875 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5876, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5877 = torch.aten.mm %5876, %5833 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5877, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3556 = torch.constant.int 4
    %int4096_3557 = torch.constant.int 4096
    %5878 = torch.prim.ListConstruct %int4_3556, %5873, %int4096_3557 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5879 = torch.aten.view %5877, %5878 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3558 = torch.constant.int 1
    %5880 = torch.aten.size.int %5735, %int1_3558 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3559 = torch.constant.int 4
    %5881 = torch.aten.mul.int %int4_3559, %5880 : !torch.int, !torch.int -> !torch.int
    %int1792_3560 = torch.constant.int 1792
    %5882 = torch.prim.ListConstruct %5881, %int1792_3560 : (!torch.int, !torch.int) -> !torch.list<int>
    %5883 = torch.aten.view %5820, %5882 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5883, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5884 = torch.aten.mm %5883, %5835 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5884, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3561 = torch.constant.int 4
    %int4096_3562 = torch.constant.int 4096
    %5885 = torch.prim.ListConstruct %int4_3561, %5880, %int4096_3562 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5886 = torch.aten.view %5884, %5885 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3563 = torch.constant.int 1
    %5887 = torch.aten.size.int %5741, %int1_3563 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_3564 = torch.constant.int 4
    %5888 = torch.aten.mul.int %int4_3564, %5887 : !torch.int, !torch.int -> !torch.int
    %int1792_3565 = torch.constant.int 1792
    %5889 = torch.prim.ListConstruct %5888, %int1792_3565 : (!torch.int, !torch.int) -> !torch.list<int>
    %5890 = torch.aten.view %5821, %5889 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %5890, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %5891 = torch.aten.mm %5890, %5837 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %5891, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_3566 = torch.constant.int 4
    %int4096_3567 = torch.constant.int 4096
    %5892 = torch.prim.ListConstruct %int4_3566, %5887, %int4096_3567 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %5893 = torch.aten.view %5891, %5892 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5894 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3568 = arith.constant 1 : index
    %dim_3569 = tensor.dim %5894, %c1_3568 : tensor<4x?x4096xf16>
    %5895 = flow.tensor.transfer %5894 : tensor<4x?x4096xf16>{%dim_3569} to #hal.device.promise<@__device_0>
    %5896 = torch_c.from_builtin_tensor %5895 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5897 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3570 = arith.constant 1 : index
    %dim_3571 = tensor.dim %5897, %c1_3570 : tensor<4x?x4096xf16>
    %5898 = flow.tensor.transfer %5897 : tensor<4x?x4096xf16>{%dim_3571} to #hal.device.promise<@__device_0>
    %5899 = torch_c.from_builtin_tensor %5898 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5900 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3572 = arith.constant 1 : index
    %dim_3573 = tensor.dim %5900, %c1_3572 : tensor<4x?x4096xf16>
    %5901 = flow.tensor.transfer %5900 : tensor<4x?x4096xf16>{%dim_3573} to #hal.device.promise<@__device_0>
    %5902 = torch_c.from_builtin_tensor %5901 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5903 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3574 = arith.constant 1 : index
    %dim_3575 = tensor.dim %5903, %c1_3574 : tensor<4x?x4096xf16>
    %5904 = flow.tensor.transfer %5903 : tensor<4x?x4096xf16>{%dim_3575} to #hal.device.promise<@__device_0>
    %5905 = torch_c.from_builtin_tensor %5904 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5906 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3576 = arith.constant 1 : index
    %dim_3577 = tensor.dim %5906, %c1_3576 : tensor<4x?x4096xf16>
    %5907 = flow.tensor.transfer %5906 : tensor<4x?x4096xf16>{%dim_3577} to #hal.device.promise<@__device_0>
    %5908 = torch_c.from_builtin_tensor %5907 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5909 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3578 = arith.constant 1 : index
    %dim_3579 = tensor.dim %5909, %c1_3578 : tensor<4x?x4096xf16>
    %5910 = flow.tensor.transfer %5909 : tensor<4x?x4096xf16>{%dim_3579} to #hal.device.promise<@__device_0>
    %5911 = torch_c.from_builtin_tensor %5910 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5912 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3580 = arith.constant 1 : index
    %dim_3581 = tensor.dim %5912, %c1_3580 : tensor<4x?x4096xf16>
    %5913 = flow.tensor.transfer %5912 : tensor<4x?x4096xf16>{%dim_3581} to #hal.device.promise<@__device_0>
    %5914 = torch_c.from_builtin_tensor %5913 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3582 = torch.constant.int 1
    %5915 = torch.aten.add.Tensor %5844, %5896, %int1_3582 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3583 = torch.constant.int 1
    %5916 = torch.aten.add.Tensor %5915, %5899, %int1_3583 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3584 = torch.constant.int 1
    %5917 = torch.aten.add.Tensor %5916, %5902, %int1_3584 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3585 = torch.constant.int 1
    %5918 = torch.aten.add.Tensor %5917, %5905, %int1_3585 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3586 = torch.constant.int 1
    %5919 = torch.aten.add.Tensor %5918, %5908, %int1_3586 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3587 = torch.constant.int 1
    %5920 = torch.aten.add.Tensor %5919, %5911, %int1_3587 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3588 = torch.constant.int 1
    %5921 = torch.aten.add.Tensor %5920, %5914, %int1_3588 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5922 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3589 = arith.constant 1 : index
    %dim_3590 = tensor.dim %5922, %c1_3589 : tensor<4x?x4096xf16>
    %5923 = flow.tensor.transfer %5922 : tensor<4x?x4096xf16>{%dim_3590} to #hal.device.promise<@__device_1>
    %5924 = torch_c.from_builtin_tensor %5923 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5925 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3591 = arith.constant 1 : index
    %dim_3592 = tensor.dim %5925, %c1_3591 : tensor<4x?x4096xf16>
    %5926 = flow.tensor.transfer %5925 : tensor<4x?x4096xf16>{%dim_3592} to #hal.device.promise<@__device_1>
    %5927 = torch_c.from_builtin_tensor %5926 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5928 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3593 = arith.constant 1 : index
    %dim_3594 = tensor.dim %5928, %c1_3593 : tensor<4x?x4096xf16>
    %5929 = flow.tensor.transfer %5928 : tensor<4x?x4096xf16>{%dim_3594} to #hal.device.promise<@__device_1>
    %5930 = torch_c.from_builtin_tensor %5929 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5931 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3595 = arith.constant 1 : index
    %dim_3596 = tensor.dim %5931, %c1_3595 : tensor<4x?x4096xf16>
    %5932 = flow.tensor.transfer %5931 : tensor<4x?x4096xf16>{%dim_3596} to #hal.device.promise<@__device_1>
    %5933 = torch_c.from_builtin_tensor %5932 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5934 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3597 = arith.constant 1 : index
    %dim_3598 = tensor.dim %5934, %c1_3597 : tensor<4x?x4096xf16>
    %5935 = flow.tensor.transfer %5934 : tensor<4x?x4096xf16>{%dim_3598} to #hal.device.promise<@__device_1>
    %5936 = torch_c.from_builtin_tensor %5935 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5937 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3599 = arith.constant 1 : index
    %dim_3600 = tensor.dim %5937, %c1_3599 : tensor<4x?x4096xf16>
    %5938 = flow.tensor.transfer %5937 : tensor<4x?x4096xf16>{%dim_3600} to #hal.device.promise<@__device_1>
    %5939 = torch_c.from_builtin_tensor %5938 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5940 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3601 = arith.constant 1 : index
    %dim_3602 = tensor.dim %5940, %c1_3601 : tensor<4x?x4096xf16>
    %5941 = flow.tensor.transfer %5940 : tensor<4x?x4096xf16>{%dim_3602} to #hal.device.promise<@__device_1>
    %5942 = torch_c.from_builtin_tensor %5941 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3603 = torch.constant.int 1
    %5943 = torch.aten.add.Tensor %5924, %5851, %int1_3603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3604 = torch.constant.int 1
    %5944 = torch.aten.add.Tensor %5943, %5927, %int1_3604 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3605 = torch.constant.int 1
    %5945 = torch.aten.add.Tensor %5944, %5930, %int1_3605 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3606 = torch.constant.int 1
    %5946 = torch.aten.add.Tensor %5945, %5933, %int1_3606 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3607 = torch.constant.int 1
    %5947 = torch.aten.add.Tensor %5946, %5936, %int1_3607 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3608 = torch.constant.int 1
    %5948 = torch.aten.add.Tensor %5947, %5939, %int1_3608 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3609 = torch.constant.int 1
    %5949 = torch.aten.add.Tensor %5948, %5942, %int1_3609 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5950 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3610 = arith.constant 1 : index
    %dim_3611 = tensor.dim %5950, %c1_3610 : tensor<4x?x4096xf16>
    %5951 = flow.tensor.transfer %5950 : tensor<4x?x4096xf16>{%dim_3611} to #hal.device.promise<@__device_2>
    %5952 = torch_c.from_builtin_tensor %5951 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5953 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3612 = arith.constant 1 : index
    %dim_3613 = tensor.dim %5953, %c1_3612 : tensor<4x?x4096xf16>
    %5954 = flow.tensor.transfer %5953 : tensor<4x?x4096xf16>{%dim_3613} to #hal.device.promise<@__device_2>
    %5955 = torch_c.from_builtin_tensor %5954 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5956 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3614 = arith.constant 1 : index
    %dim_3615 = tensor.dim %5956, %c1_3614 : tensor<4x?x4096xf16>
    %5957 = flow.tensor.transfer %5956 : tensor<4x?x4096xf16>{%dim_3615} to #hal.device.promise<@__device_2>
    %5958 = torch_c.from_builtin_tensor %5957 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5959 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3616 = arith.constant 1 : index
    %dim_3617 = tensor.dim %5959, %c1_3616 : tensor<4x?x4096xf16>
    %5960 = flow.tensor.transfer %5959 : tensor<4x?x4096xf16>{%dim_3617} to #hal.device.promise<@__device_2>
    %5961 = torch_c.from_builtin_tensor %5960 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5962 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3618 = arith.constant 1 : index
    %dim_3619 = tensor.dim %5962, %c1_3618 : tensor<4x?x4096xf16>
    %5963 = flow.tensor.transfer %5962 : tensor<4x?x4096xf16>{%dim_3619} to #hal.device.promise<@__device_2>
    %5964 = torch_c.from_builtin_tensor %5963 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5965 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3620 = arith.constant 1 : index
    %dim_3621 = tensor.dim %5965, %c1_3620 : tensor<4x?x4096xf16>
    %5966 = flow.tensor.transfer %5965 : tensor<4x?x4096xf16>{%dim_3621} to #hal.device.promise<@__device_2>
    %5967 = torch_c.from_builtin_tensor %5966 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5968 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3622 = arith.constant 1 : index
    %dim_3623 = tensor.dim %5968, %c1_3622 : tensor<4x?x4096xf16>
    %5969 = flow.tensor.transfer %5968 : tensor<4x?x4096xf16>{%dim_3623} to #hal.device.promise<@__device_2>
    %5970 = torch_c.from_builtin_tensor %5969 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3624 = torch.constant.int 1
    %5971 = torch.aten.add.Tensor %5952, %5955, %int1_3624 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3625 = torch.constant.int 1
    %5972 = torch.aten.add.Tensor %5971, %5858, %int1_3625 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3626 = torch.constant.int 1
    %5973 = torch.aten.add.Tensor %5972, %5958, %int1_3626 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3627 = torch.constant.int 1
    %5974 = torch.aten.add.Tensor %5973, %5961, %int1_3627 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3628 = torch.constant.int 1
    %5975 = torch.aten.add.Tensor %5974, %5964, %int1_3628 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3629 = torch.constant.int 1
    %5976 = torch.aten.add.Tensor %5975, %5967, %int1_3629 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3630 = torch.constant.int 1
    %5977 = torch.aten.add.Tensor %5976, %5970, %int1_3630 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5978 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3631 = arith.constant 1 : index
    %dim_3632 = tensor.dim %5978, %c1_3631 : tensor<4x?x4096xf16>
    %5979 = flow.tensor.transfer %5978 : tensor<4x?x4096xf16>{%dim_3632} to #hal.device.promise<@__device_3>
    %5980 = torch_c.from_builtin_tensor %5979 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5981 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3633 = arith.constant 1 : index
    %dim_3634 = tensor.dim %5981, %c1_3633 : tensor<4x?x4096xf16>
    %5982 = flow.tensor.transfer %5981 : tensor<4x?x4096xf16>{%dim_3634} to #hal.device.promise<@__device_3>
    %5983 = torch_c.from_builtin_tensor %5982 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5984 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3635 = arith.constant 1 : index
    %dim_3636 = tensor.dim %5984, %c1_3635 : tensor<4x?x4096xf16>
    %5985 = flow.tensor.transfer %5984 : tensor<4x?x4096xf16>{%dim_3636} to #hal.device.promise<@__device_3>
    %5986 = torch_c.from_builtin_tensor %5985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5987 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3637 = arith.constant 1 : index
    %dim_3638 = tensor.dim %5987, %c1_3637 : tensor<4x?x4096xf16>
    %5988 = flow.tensor.transfer %5987 : tensor<4x?x4096xf16>{%dim_3638} to #hal.device.promise<@__device_3>
    %5989 = torch_c.from_builtin_tensor %5988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5990 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3639 = arith.constant 1 : index
    %dim_3640 = tensor.dim %5990, %c1_3639 : tensor<4x?x4096xf16>
    %5991 = flow.tensor.transfer %5990 : tensor<4x?x4096xf16>{%dim_3640} to #hal.device.promise<@__device_3>
    %5992 = torch_c.from_builtin_tensor %5991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5993 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3641 = arith.constant 1 : index
    %dim_3642 = tensor.dim %5993, %c1_3641 : tensor<4x?x4096xf16>
    %5994 = flow.tensor.transfer %5993 : tensor<4x?x4096xf16>{%dim_3642} to #hal.device.promise<@__device_3>
    %5995 = torch_c.from_builtin_tensor %5994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %5996 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3643 = arith.constant 1 : index
    %dim_3644 = tensor.dim %5996, %c1_3643 : tensor<4x?x4096xf16>
    %5997 = flow.tensor.transfer %5996 : tensor<4x?x4096xf16>{%dim_3644} to #hal.device.promise<@__device_3>
    %5998 = torch_c.from_builtin_tensor %5997 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3645 = torch.constant.int 1
    %5999 = torch.aten.add.Tensor %5980, %5983, %int1_3645 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %5999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3646 = torch.constant.int 1
    %6000 = torch.aten.add.Tensor %5999, %5986, %int1_3646 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3647 = torch.constant.int 1
    %6001 = torch.aten.add.Tensor %6000, %5865, %int1_3647 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3648 = torch.constant.int 1
    %6002 = torch.aten.add.Tensor %6001, %5989, %int1_3648 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3649 = torch.constant.int 1
    %6003 = torch.aten.add.Tensor %6002, %5992, %int1_3649 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3650 = torch.constant.int 1
    %6004 = torch.aten.add.Tensor %6003, %5995, %int1_3650 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3651 = torch.constant.int 1
    %6005 = torch.aten.add.Tensor %6004, %5998, %int1_3651 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6006 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3652 = arith.constant 1 : index
    %dim_3653 = tensor.dim %6006, %c1_3652 : tensor<4x?x4096xf16>
    %6007 = flow.tensor.transfer %6006 : tensor<4x?x4096xf16>{%dim_3653} to #hal.device.promise<@__device_4>
    %6008 = torch_c.from_builtin_tensor %6007 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6009 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3654 = arith.constant 1 : index
    %dim_3655 = tensor.dim %6009, %c1_3654 : tensor<4x?x4096xf16>
    %6010 = flow.tensor.transfer %6009 : tensor<4x?x4096xf16>{%dim_3655} to #hal.device.promise<@__device_4>
    %6011 = torch_c.from_builtin_tensor %6010 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6012 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3656 = arith.constant 1 : index
    %dim_3657 = tensor.dim %6012, %c1_3656 : tensor<4x?x4096xf16>
    %6013 = flow.tensor.transfer %6012 : tensor<4x?x4096xf16>{%dim_3657} to #hal.device.promise<@__device_4>
    %6014 = torch_c.from_builtin_tensor %6013 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6015 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3658 = arith.constant 1 : index
    %dim_3659 = tensor.dim %6015, %c1_3658 : tensor<4x?x4096xf16>
    %6016 = flow.tensor.transfer %6015 : tensor<4x?x4096xf16>{%dim_3659} to #hal.device.promise<@__device_4>
    %6017 = torch_c.from_builtin_tensor %6016 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6018 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3660 = arith.constant 1 : index
    %dim_3661 = tensor.dim %6018, %c1_3660 : tensor<4x?x4096xf16>
    %6019 = flow.tensor.transfer %6018 : tensor<4x?x4096xf16>{%dim_3661} to #hal.device.promise<@__device_4>
    %6020 = torch_c.from_builtin_tensor %6019 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6021 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3662 = arith.constant 1 : index
    %dim_3663 = tensor.dim %6021, %c1_3662 : tensor<4x?x4096xf16>
    %6022 = flow.tensor.transfer %6021 : tensor<4x?x4096xf16>{%dim_3663} to #hal.device.promise<@__device_4>
    %6023 = torch_c.from_builtin_tensor %6022 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6024 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3664 = arith.constant 1 : index
    %dim_3665 = tensor.dim %6024, %c1_3664 : tensor<4x?x4096xf16>
    %6025 = flow.tensor.transfer %6024 : tensor<4x?x4096xf16>{%dim_3665} to #hal.device.promise<@__device_4>
    %6026 = torch_c.from_builtin_tensor %6025 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3666 = torch.constant.int 1
    %6027 = torch.aten.add.Tensor %6008, %6011, %int1_3666 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3667 = torch.constant.int 1
    %6028 = torch.aten.add.Tensor %6027, %6014, %int1_3667 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3668 = torch.constant.int 1
    %6029 = torch.aten.add.Tensor %6028, %6017, %int1_3668 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3669 = torch.constant.int 1
    %6030 = torch.aten.add.Tensor %6029, %5872, %int1_3669 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3670 = torch.constant.int 1
    %6031 = torch.aten.add.Tensor %6030, %6020, %int1_3670 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3671 = torch.constant.int 1
    %6032 = torch.aten.add.Tensor %6031, %6023, %int1_3671 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3672 = torch.constant.int 1
    %6033 = torch.aten.add.Tensor %6032, %6026, %int1_3672 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6034 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3673 = arith.constant 1 : index
    %dim_3674 = tensor.dim %6034, %c1_3673 : tensor<4x?x4096xf16>
    %6035 = flow.tensor.transfer %6034 : tensor<4x?x4096xf16>{%dim_3674} to #hal.device.promise<@__device_5>
    %6036 = torch_c.from_builtin_tensor %6035 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6037 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3675 = arith.constant 1 : index
    %dim_3676 = tensor.dim %6037, %c1_3675 : tensor<4x?x4096xf16>
    %6038 = flow.tensor.transfer %6037 : tensor<4x?x4096xf16>{%dim_3676} to #hal.device.promise<@__device_5>
    %6039 = torch_c.from_builtin_tensor %6038 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6040 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3677 = arith.constant 1 : index
    %dim_3678 = tensor.dim %6040, %c1_3677 : tensor<4x?x4096xf16>
    %6041 = flow.tensor.transfer %6040 : tensor<4x?x4096xf16>{%dim_3678} to #hal.device.promise<@__device_5>
    %6042 = torch_c.from_builtin_tensor %6041 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6043 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3679 = arith.constant 1 : index
    %dim_3680 = tensor.dim %6043, %c1_3679 : tensor<4x?x4096xf16>
    %6044 = flow.tensor.transfer %6043 : tensor<4x?x4096xf16>{%dim_3680} to #hal.device.promise<@__device_5>
    %6045 = torch_c.from_builtin_tensor %6044 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6046 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3681 = arith.constant 1 : index
    %dim_3682 = tensor.dim %6046, %c1_3681 : tensor<4x?x4096xf16>
    %6047 = flow.tensor.transfer %6046 : tensor<4x?x4096xf16>{%dim_3682} to #hal.device.promise<@__device_5>
    %6048 = torch_c.from_builtin_tensor %6047 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6049 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3683 = arith.constant 1 : index
    %dim_3684 = tensor.dim %6049, %c1_3683 : tensor<4x?x4096xf16>
    %6050 = flow.tensor.transfer %6049 : tensor<4x?x4096xf16>{%dim_3684} to #hal.device.promise<@__device_5>
    %6051 = torch_c.from_builtin_tensor %6050 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6052 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3685 = arith.constant 1 : index
    %dim_3686 = tensor.dim %6052, %c1_3685 : tensor<4x?x4096xf16>
    %6053 = flow.tensor.transfer %6052 : tensor<4x?x4096xf16>{%dim_3686} to #hal.device.promise<@__device_5>
    %6054 = torch_c.from_builtin_tensor %6053 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3687 = torch.constant.int 1
    %6055 = torch.aten.add.Tensor %6036, %6039, %int1_3687 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3688 = torch.constant.int 1
    %6056 = torch.aten.add.Tensor %6055, %6042, %int1_3688 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3689 = torch.constant.int 1
    %6057 = torch.aten.add.Tensor %6056, %6045, %int1_3689 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3690 = torch.constant.int 1
    %6058 = torch.aten.add.Tensor %6057, %6048, %int1_3690 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3691 = torch.constant.int 1
    %6059 = torch.aten.add.Tensor %6058, %5879, %int1_3691 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3692 = torch.constant.int 1
    %6060 = torch.aten.add.Tensor %6059, %6051, %int1_3692 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3693 = torch.constant.int 1
    %6061 = torch.aten.add.Tensor %6060, %6054, %int1_3693 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6062 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3694 = arith.constant 1 : index
    %dim_3695 = tensor.dim %6062, %c1_3694 : tensor<4x?x4096xf16>
    %6063 = flow.tensor.transfer %6062 : tensor<4x?x4096xf16>{%dim_3695} to #hal.device.promise<@__device_6>
    %6064 = torch_c.from_builtin_tensor %6063 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6065 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3696 = arith.constant 1 : index
    %dim_3697 = tensor.dim %6065, %c1_3696 : tensor<4x?x4096xf16>
    %6066 = flow.tensor.transfer %6065 : tensor<4x?x4096xf16>{%dim_3697} to #hal.device.promise<@__device_6>
    %6067 = torch_c.from_builtin_tensor %6066 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6068 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3698 = arith.constant 1 : index
    %dim_3699 = tensor.dim %6068, %c1_3698 : tensor<4x?x4096xf16>
    %6069 = flow.tensor.transfer %6068 : tensor<4x?x4096xf16>{%dim_3699} to #hal.device.promise<@__device_6>
    %6070 = torch_c.from_builtin_tensor %6069 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6071 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3700 = arith.constant 1 : index
    %dim_3701 = tensor.dim %6071, %c1_3700 : tensor<4x?x4096xf16>
    %6072 = flow.tensor.transfer %6071 : tensor<4x?x4096xf16>{%dim_3701} to #hal.device.promise<@__device_6>
    %6073 = torch_c.from_builtin_tensor %6072 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6074 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3702 = arith.constant 1 : index
    %dim_3703 = tensor.dim %6074, %c1_3702 : tensor<4x?x4096xf16>
    %6075 = flow.tensor.transfer %6074 : tensor<4x?x4096xf16>{%dim_3703} to #hal.device.promise<@__device_6>
    %6076 = torch_c.from_builtin_tensor %6075 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6077 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3704 = arith.constant 1 : index
    %dim_3705 = tensor.dim %6077, %c1_3704 : tensor<4x?x4096xf16>
    %6078 = flow.tensor.transfer %6077 : tensor<4x?x4096xf16>{%dim_3705} to #hal.device.promise<@__device_6>
    %6079 = torch_c.from_builtin_tensor %6078 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6080 = torch_c.to_builtin_tensor %5893 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3706 = arith.constant 1 : index
    %dim_3707 = tensor.dim %6080, %c1_3706 : tensor<4x?x4096xf16>
    %6081 = flow.tensor.transfer %6080 : tensor<4x?x4096xf16>{%dim_3707} to #hal.device.promise<@__device_6>
    %6082 = torch_c.from_builtin_tensor %6081 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3708 = torch.constant.int 1
    %6083 = torch.aten.add.Tensor %6064, %6067, %int1_3708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3709 = torch.constant.int 1
    %6084 = torch.aten.add.Tensor %6083, %6070, %int1_3709 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3710 = torch.constant.int 1
    %6085 = torch.aten.add.Tensor %6084, %6073, %int1_3710 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3711 = torch.constant.int 1
    %6086 = torch.aten.add.Tensor %6085, %6076, %int1_3711 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3712 = torch.constant.int 1
    %6087 = torch.aten.add.Tensor %6086, %6079, %int1_3712 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3713 = torch.constant.int 1
    %6088 = torch.aten.add.Tensor %6087, %5886, %int1_3713 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3714 = torch.constant.int 1
    %6089 = torch.aten.add.Tensor %6088, %6082, %int1_3714 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6090 = torch_c.to_builtin_tensor %5844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3715 = arith.constant 1 : index
    %dim_3716 = tensor.dim %6090, %c1_3715 : tensor<4x?x4096xf16>
    %6091 = flow.tensor.transfer %6090 : tensor<4x?x4096xf16>{%dim_3716} to #hal.device.promise<@__device_7>
    %6092 = torch_c.from_builtin_tensor %6091 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6093 = torch_c.to_builtin_tensor %5851 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3717 = arith.constant 1 : index
    %dim_3718 = tensor.dim %6093, %c1_3717 : tensor<4x?x4096xf16>
    %6094 = flow.tensor.transfer %6093 : tensor<4x?x4096xf16>{%dim_3718} to #hal.device.promise<@__device_7>
    %6095 = torch_c.from_builtin_tensor %6094 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6096 = torch_c.to_builtin_tensor %5858 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3719 = arith.constant 1 : index
    %dim_3720 = tensor.dim %6096, %c1_3719 : tensor<4x?x4096xf16>
    %6097 = flow.tensor.transfer %6096 : tensor<4x?x4096xf16>{%dim_3720} to #hal.device.promise<@__device_7>
    %6098 = torch_c.from_builtin_tensor %6097 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6099 = torch_c.to_builtin_tensor %5865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3721 = arith.constant 1 : index
    %dim_3722 = tensor.dim %6099, %c1_3721 : tensor<4x?x4096xf16>
    %6100 = flow.tensor.transfer %6099 : tensor<4x?x4096xf16>{%dim_3722} to #hal.device.promise<@__device_7>
    %6101 = torch_c.from_builtin_tensor %6100 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6102 = torch_c.to_builtin_tensor %5872 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3723 = arith.constant 1 : index
    %dim_3724 = tensor.dim %6102, %c1_3723 : tensor<4x?x4096xf16>
    %6103 = flow.tensor.transfer %6102 : tensor<4x?x4096xf16>{%dim_3724} to #hal.device.promise<@__device_7>
    %6104 = torch_c.from_builtin_tensor %6103 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6105 = torch_c.to_builtin_tensor %5879 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3725 = arith.constant 1 : index
    %dim_3726 = tensor.dim %6105, %c1_3725 : tensor<4x?x4096xf16>
    %6106 = flow.tensor.transfer %6105 : tensor<4x?x4096xf16>{%dim_3726} to #hal.device.promise<@__device_7>
    %6107 = torch_c.from_builtin_tensor %6106 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %6108 = torch_c.to_builtin_tensor %5886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_3727 = arith.constant 1 : index
    %dim_3728 = tensor.dim %6108, %c1_3727 : tensor<4x?x4096xf16>
    %6109 = flow.tensor.transfer %6108 : tensor<4x?x4096xf16>{%dim_3728} to #hal.device.promise<@__device_7>
    %6110 = torch_c.from_builtin_tensor %6109 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3729 = torch.constant.int 1
    %6111 = torch.aten.add.Tensor %6092, %6095, %int1_3729 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3730 = torch.constant.int 1
    %6112 = torch.aten.add.Tensor %6111, %6098, %int1_3730 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3731 = torch.constant.int 1
    %6113 = torch.aten.add.Tensor %6112, %6101, %int1_3731 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3732 = torch.constant.int 1
    %6114 = torch.aten.add.Tensor %6113, %6104, %int1_3732 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3733 = torch.constant.int 1
    %6115 = torch.aten.add.Tensor %6114, %6107, %int1_3733 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3734 = torch.constant.int 1
    %6116 = torch.aten.add.Tensor %6115, %6110, %int1_3734 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3735 = torch.constant.int 1
    %6117 = torch.aten.add.Tensor %6116, %5893, %int1_3735 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3736 = torch.constant.int 1
    %6118 = torch.aten.add.Tensor %5598, %5921, %int1_3736 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3737 = torch.constant.int 1
    %6119 = torch.aten.add.Tensor %5599, %5949, %int1_3737 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3738 = torch.constant.int 1
    %6120 = torch.aten.add.Tensor %5600, %5977, %int1_3738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3739 = torch.constant.int 1
    %6121 = torch.aten.add.Tensor %5601, %6005, %int1_3739 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3740 = torch.constant.int 1
    %6122 = torch.aten.add.Tensor %5602, %6033, %int1_3740 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3741 = torch.constant.int 1
    %6123 = torch.aten.add.Tensor %5603, %6061, %int1_3741 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3742 = torch.constant.int 1
    %6124 = torch.aten.add.Tensor %5604, %6089, %int1_3742 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3743 = torch.constant.int 1
    %6125 = torch.aten.add.Tensor %5605, %6117, %int1_3743 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_3744 = torch.constant.int 6
    %6126 = torch.prims.convert_element_type %6118, %int6_3744 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3745 = torch.constant.int 6
    %6127 = torch.prims.convert_element_type %6119, %int6_3745 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3746 = torch.constant.int 6
    %6128 = torch.prims.convert_element_type %6120, %int6_3746 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3747 = torch.constant.int 6
    %6129 = torch.prims.convert_element_type %6121, %int6_3747 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3748 = torch.constant.int 6
    %6130 = torch.prims.convert_element_type %6122, %int6_3748 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3749 = torch.constant.int 6
    %6131 = torch.prims.convert_element_type %6123, %int6_3749 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3750 = torch.constant.int 6
    %6132 = torch.prims.convert_element_type %6124, %int6_3750 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_3751 = torch.constant.int 6
    %6133 = torch.prims.convert_element_type %6125, %int6_3751 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3752 = torch.constant.int 2
    %6134 = torch.aten.pow.Tensor_Scalar %6126, %int2_3752 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3753 = torch.constant.int 2
    %6135 = torch.aten.pow.Tensor_Scalar %6127, %int2_3753 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3754 = torch.constant.int 2
    %6136 = torch.aten.pow.Tensor_Scalar %6128, %int2_3754 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3755 = torch.constant.int 2
    %6137 = torch.aten.pow.Tensor_Scalar %6129, %int2_3755 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3756 = torch.constant.int 2
    %6138 = torch.aten.pow.Tensor_Scalar %6130, %int2_3756 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3757 = torch.constant.int 2
    %6139 = torch.aten.pow.Tensor_Scalar %6131, %int2_3757 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3758 = torch.constant.int 2
    %6140 = torch.aten.pow.Tensor_Scalar %6132, %int2_3758 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_3759 = torch.constant.int 2
    %6141 = torch.aten.pow.Tensor_Scalar %6133, %int2_3759 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_3760 = torch.constant.int -1
    %6142 = torch.prim.ListConstruct %int-1_3760 : (!torch.int) -> !torch.list<int>
    %true_3761 = torch.constant.bool true
    %none_3762 = torch.constant.none
    %6143 = torch.aten.mean.dim %6134, %6142, %true_3761, %none_3762 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3763 = torch.constant.int -1
    %6144 = torch.prim.ListConstruct %int-1_3763 : (!torch.int) -> !torch.list<int>
    %true_3764 = torch.constant.bool true
    %none_3765 = torch.constant.none
    %6145 = torch.aten.mean.dim %6135, %6144, %true_3764, %none_3765 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3766 = torch.constant.int -1
    %6146 = torch.prim.ListConstruct %int-1_3766 : (!torch.int) -> !torch.list<int>
    %true_3767 = torch.constant.bool true
    %none_3768 = torch.constant.none
    %6147 = torch.aten.mean.dim %6136, %6146, %true_3767, %none_3768 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3769 = torch.constant.int -1
    %6148 = torch.prim.ListConstruct %int-1_3769 : (!torch.int) -> !torch.list<int>
    %true_3770 = torch.constant.bool true
    %none_3771 = torch.constant.none
    %6149 = torch.aten.mean.dim %6137, %6148, %true_3770, %none_3771 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3772 = torch.constant.int -1
    %6150 = torch.prim.ListConstruct %int-1_3772 : (!torch.int) -> !torch.list<int>
    %true_3773 = torch.constant.bool true
    %none_3774 = torch.constant.none
    %6151 = torch.aten.mean.dim %6138, %6150, %true_3773, %none_3774 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3775 = torch.constant.int -1
    %6152 = torch.prim.ListConstruct %int-1_3775 : (!torch.int) -> !torch.list<int>
    %true_3776 = torch.constant.bool true
    %none_3777 = torch.constant.none
    %6153 = torch.aten.mean.dim %6139, %6152, %true_3776, %none_3777 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3778 = torch.constant.int -1
    %6154 = torch.prim.ListConstruct %int-1_3778 : (!torch.int) -> !torch.list<int>
    %true_3779 = torch.constant.bool true
    %none_3780 = torch.constant.none
    %6155 = torch.aten.mean.dim %6140, %6154, %true_3779, %none_3780 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_3781 = torch.constant.int -1
    %6156 = torch.prim.ListConstruct %int-1_3781 : (!torch.int) -> !torch.list<int>
    %true_3782 = torch.constant.bool true
    %none_3783 = torch.constant.none
    %6157 = torch.aten.mean.dim %6141, %6156, %true_3782, %none_3783 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3784 = torch.constant.float 9.9999997473787516E-6
    %int1_3785 = torch.constant.int 1
    %6158 = torch.aten.add.Scalar %6143, %float9.999990e-06_3784, %int1_3785 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3786 = torch.constant.float 9.9999997473787516E-6
    %int1_3787 = torch.constant.int 1
    %6159 = torch.aten.add.Scalar %6145, %float9.999990e-06_3786, %int1_3787 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3788 = torch.constant.float 9.9999997473787516E-6
    %int1_3789 = torch.constant.int 1
    %6160 = torch.aten.add.Scalar %6147, %float9.999990e-06_3788, %int1_3789 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3790 = torch.constant.float 9.9999997473787516E-6
    %int1_3791 = torch.constant.int 1
    %6161 = torch.aten.add.Scalar %6149, %float9.999990e-06_3790, %int1_3791 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3792 = torch.constant.float 9.9999997473787516E-6
    %int1_3793 = torch.constant.int 1
    %6162 = torch.aten.add.Scalar %6151, %float9.999990e-06_3792, %int1_3793 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3794 = torch.constant.float 9.9999997473787516E-6
    %int1_3795 = torch.constant.int 1
    %6163 = torch.aten.add.Scalar %6153, %float9.999990e-06_3794, %int1_3795 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3796 = torch.constant.float 9.9999997473787516E-6
    %int1_3797 = torch.constant.int 1
    %6164 = torch.aten.add.Scalar %6155, %float9.999990e-06_3796, %int1_3797 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_3798 = torch.constant.float 9.9999997473787516E-6
    %int1_3799 = torch.constant.int 1
    %6165 = torch.aten.add.Scalar %6157, %float9.999990e-06_3798, %int1_3799 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6166 = torch.aten.rsqrt %6158 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6167 = torch.aten.rsqrt %6159 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6168 = torch.aten.rsqrt %6160 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6169 = torch.aten.rsqrt %6161 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6170 = torch.aten.rsqrt %6162 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6171 = torch.aten.rsqrt %6163 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6172 = torch.aten.rsqrt %6164 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6173 = torch.aten.rsqrt %6165 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %6173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %6174 = torch.aten.mul.Tensor %6126, %6166 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6175 = torch.aten.mul.Tensor %6127, %6167 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6176 = torch.aten.mul.Tensor %6128, %6168 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6177 = torch.aten.mul.Tensor %6129, %6169 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6178 = torch.aten.mul.Tensor %6130, %6170 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6179 = torch.aten.mul.Tensor %6131, %6171 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6180 = torch.aten.mul.Tensor %6132, %6172 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6181 = torch.aten.mul.Tensor %6133, %6173 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6182 = torch.aten.mul.Tensor %152, %6174 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6183 = torch.aten.mul.Tensor %153, %6175 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6184 = torch.aten.mul.Tensor %154, %6176 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6185 = torch.aten.mul.Tensor %155, %6177 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6186 = torch.aten.mul.Tensor %156, %6178 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6187 = torch.aten.mul.Tensor %157, %6179 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6188 = torch.aten.mul.Tensor %158, %6180 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %6189 = torch.aten.mul.Tensor %159, %6181 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %6189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_3800 = torch.constant.int 5
    %6190 = torch.prims.convert_element_type %6182, %int5_3800 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3801 = torch.constant.int 5
    %6191 = torch.prims.convert_element_type %6183, %int5_3801 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3802 = torch.constant.int 5
    %6192 = torch.prims.convert_element_type %6184, %int5_3802 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3803 = torch.constant.int 5
    %6193 = torch.prims.convert_element_type %6185, %int5_3803 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3804 = torch.constant.int 5
    %6194 = torch.prims.convert_element_type %6186, %int5_3804 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3805 = torch.constant.int 5
    %6195 = torch.prims.convert_element_type %6187, %int5_3805 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3806 = torch.constant.int 5
    %6196 = torch.prims.convert_element_type %6188, %int5_3806 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_3807 = torch.constant.int 5
    %6197 = torch.prims.convert_element_type %6189, %int5_3807 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %6197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_3808 = torch.constant.int 1
    %int0_3809 = torch.constant.int 0
    %6198 = torch.prim.ListConstruct %int1_3808, %int0_3809 : (!torch.int, !torch.int) -> !torch.list<int>
    %6199 = torch.aten.permute %160, %6198 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3810 = torch.constant.int 1
    %int0_3811 = torch.constant.int 0
    %6200 = torch.prim.ListConstruct %int1_3810, %int0_3811 : (!torch.int, !torch.int) -> !torch.list<int>
    %6201 = torch.aten.permute %161, %6200 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3812 = torch.constant.int 1
    %int0_3813 = torch.constant.int 0
    %6202 = torch.prim.ListConstruct %int1_3812, %int0_3813 : (!torch.int, !torch.int) -> !torch.list<int>
    %6203 = torch.aten.permute %162, %6202 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3814 = torch.constant.int 1
    %int0_3815 = torch.constant.int 0
    %6204 = torch.prim.ListConstruct %int1_3814, %int0_3815 : (!torch.int, !torch.int) -> !torch.list<int>
    %6205 = torch.aten.permute %163, %6204 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3816 = torch.constant.int 1
    %int0_3817 = torch.constant.int 0
    %6206 = torch.prim.ListConstruct %int1_3816, %int0_3817 : (!torch.int, !torch.int) -> !torch.list<int>
    %6207 = torch.aten.permute %164, %6206 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3818 = torch.constant.int 1
    %int0_3819 = torch.constant.int 0
    %6208 = torch.prim.ListConstruct %int1_3818, %int0_3819 : (!torch.int, !torch.int) -> !torch.list<int>
    %6209 = torch.aten.permute %165, %6208 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3820 = torch.constant.int 1
    %int0_3821 = torch.constant.int 0
    %6210 = torch.prim.ListConstruct %int1_3820, %int0_3821 : (!torch.int, !torch.int) -> !torch.list<int>
    %6211 = torch.aten.permute %166, %6210 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_3822 = torch.constant.int 1
    %int0_3823 = torch.constant.int 0
    %6212 = torch.prim.ListConstruct %int1_3822, %int0_3823 : (!torch.int, !torch.int) -> !torch.list<int>
    %6213 = torch.aten.permute %167, %6212 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_3824 = torch.constant.int 4
    %6214 = torch.aten.mul.int %int4_3824, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3825 = torch.constant.int 4096
    %6215 = torch.prim.ListConstruct %6214, %int4096_3825 : (!torch.int, !torch.int) -> !torch.list<int>
    %6216 = torch.aten.view %6190, %6215 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6216, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6217 = torch.aten.mm %6216, %6199 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6217, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3826 = torch.constant.int 4
    %int512_3827 = torch.constant.int 512
    %6218 = torch.prim.ListConstruct %int4_3826, %2482, %int512_3827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6219 = torch.aten.view %6217, %6218 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3828 = torch.constant.int 4
    %6220 = torch.aten.mul.int %int4_3828, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3829 = torch.constant.int 4096
    %6221 = torch.prim.ListConstruct %6220, %int4096_3829 : (!torch.int, !torch.int) -> !torch.list<int>
    %6222 = torch.aten.view %6191, %6221 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6222, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6223 = torch.aten.mm %6222, %6201 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6223, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3830 = torch.constant.int 4
    %int512_3831 = torch.constant.int 512
    %6224 = torch.prim.ListConstruct %int4_3830, %2482, %int512_3831 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6225 = torch.aten.view %6223, %6224 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3832 = torch.constant.int 4
    %6226 = torch.aten.mul.int %int4_3832, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3833 = torch.constant.int 4096
    %6227 = torch.prim.ListConstruct %6226, %int4096_3833 : (!torch.int, !torch.int) -> !torch.list<int>
    %6228 = torch.aten.view %6192, %6227 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6228, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6229 = torch.aten.mm %6228, %6203 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6229, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3834 = torch.constant.int 4
    %int512_3835 = torch.constant.int 512
    %6230 = torch.prim.ListConstruct %int4_3834, %2482, %int512_3835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6231 = torch.aten.view %6229, %6230 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3836 = torch.constant.int 4
    %6232 = torch.aten.mul.int %int4_3836, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3837 = torch.constant.int 4096
    %6233 = torch.prim.ListConstruct %6232, %int4096_3837 : (!torch.int, !torch.int) -> !torch.list<int>
    %6234 = torch.aten.view %6193, %6233 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6234, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6235 = torch.aten.mm %6234, %6205 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6235, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3838 = torch.constant.int 4
    %int512_3839 = torch.constant.int 512
    %6236 = torch.prim.ListConstruct %int4_3838, %2482, %int512_3839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6237 = torch.aten.view %6235, %6236 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3840 = torch.constant.int 4
    %6238 = torch.aten.mul.int %int4_3840, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3841 = torch.constant.int 4096
    %6239 = torch.prim.ListConstruct %6238, %int4096_3841 : (!torch.int, !torch.int) -> !torch.list<int>
    %6240 = torch.aten.view %6194, %6239 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6240, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6241 = torch.aten.mm %6240, %6207 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6241, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3842 = torch.constant.int 4
    %int512_3843 = torch.constant.int 512
    %6242 = torch.prim.ListConstruct %int4_3842, %2482, %int512_3843 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6243 = torch.aten.view %6241, %6242 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3844 = torch.constant.int 4
    %6244 = torch.aten.mul.int %int4_3844, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3845 = torch.constant.int 4096
    %6245 = torch.prim.ListConstruct %6244, %int4096_3845 : (!torch.int, !torch.int) -> !torch.list<int>
    %6246 = torch.aten.view %6195, %6245 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6246, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6247 = torch.aten.mm %6246, %6209 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6247, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3846 = torch.constant.int 4
    %int512_3847 = torch.constant.int 512
    %6248 = torch.prim.ListConstruct %int4_3846, %2482, %int512_3847 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6249 = torch.aten.view %6247, %6248 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3848 = torch.constant.int 4
    %6250 = torch.aten.mul.int %int4_3848, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3849 = torch.constant.int 4096
    %6251 = torch.prim.ListConstruct %6250, %int4096_3849 : (!torch.int, !torch.int) -> !torch.list<int>
    %6252 = torch.aten.view %6196, %6251 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6252, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6253 = torch.aten.mm %6252, %6211 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6253, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3850 = torch.constant.int 4
    %int512_3851 = torch.constant.int 512
    %6254 = torch.prim.ListConstruct %int4_3850, %2482, %int512_3851 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6255 = torch.aten.view %6253, %6254 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_3852 = torch.constant.int 4
    %6256 = torch.aten.mul.int %int4_3852, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3853 = torch.constant.int 4096
    %6257 = torch.prim.ListConstruct %6256, %int4096_3853 : (!torch.int, !torch.int) -> !torch.list<int>
    %6258 = torch.aten.view %6197, %6257 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6258, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6259 = torch.aten.mm %6258, %6213 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %6259, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_3854 = torch.constant.int 4
    %int512_3855 = torch.constant.int 512
    %6260 = torch.prim.ListConstruct %int4_3854, %2482, %int512_3855 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6261 = torch.aten.view %6259, %6260 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %6261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_3856 = torch.constant.int 1
    %int0_3857 = torch.constant.int 0
    %6262 = torch.prim.ListConstruct %int1_3856, %int0_3857 : (!torch.int, !torch.int) -> !torch.list<int>
    %6263 = torch.aten.permute %168, %6262 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3858 = torch.constant.int 1
    %int0_3859 = torch.constant.int 0
    %6264 = torch.prim.ListConstruct %int1_3858, %int0_3859 : (!torch.int, !torch.int) -> !torch.list<int>
    %6265 = torch.aten.permute %169, %6264 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3860 = torch.constant.int 1
    %int0_3861 = torch.constant.int 0
    %6266 = torch.prim.ListConstruct %int1_3860, %int0_3861 : (!torch.int, !torch.int) -> !torch.list<int>
    %6267 = torch.aten.permute %170, %6266 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3862 = torch.constant.int 1
    %int0_3863 = torch.constant.int 0
    %6268 = torch.prim.ListConstruct %int1_3862, %int0_3863 : (!torch.int, !torch.int) -> !torch.list<int>
    %6269 = torch.aten.permute %171, %6268 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3864 = torch.constant.int 1
    %int0_3865 = torch.constant.int 0
    %6270 = torch.prim.ListConstruct %int1_3864, %int0_3865 : (!torch.int, !torch.int) -> !torch.list<int>
    %6271 = torch.aten.permute %172, %6270 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3866 = torch.constant.int 1
    %int0_3867 = torch.constant.int 0
    %6272 = torch.prim.ListConstruct %int1_3866, %int0_3867 : (!torch.int, !torch.int) -> !torch.list<int>
    %6273 = torch.aten.permute %173, %6272 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3868 = torch.constant.int 1
    %int0_3869 = torch.constant.int 0
    %6274 = torch.prim.ListConstruct %int1_3868, %int0_3869 : (!torch.int, !torch.int) -> !torch.list<int>
    %6275 = torch.aten.permute %174, %6274 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3870 = torch.constant.int 1
    %int0_3871 = torch.constant.int 0
    %6276 = torch.prim.ListConstruct %int1_3870, %int0_3871 : (!torch.int, !torch.int) -> !torch.list<int>
    %6277 = torch.aten.permute %175, %6276 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_3872 = torch.constant.int 4
    %6278 = torch.aten.mul.int %int4_3872, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3873 = torch.constant.int 4096
    %6279 = torch.prim.ListConstruct %6278, %int4096_3873 : (!torch.int, !torch.int) -> !torch.list<int>
    %6280 = torch.aten.view %6190, %6279 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6280, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6281 = torch.aten.mm %6280, %6263 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6281, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3874 = torch.constant.int 4
    %int128_3875 = torch.constant.int 128
    %6282 = torch.prim.ListConstruct %int4_3874, %2482, %int128_3875 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6283 = torch.aten.view %6281, %6282 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3876 = torch.constant.int 4
    %6284 = torch.aten.mul.int %int4_3876, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3877 = torch.constant.int 4096
    %6285 = torch.prim.ListConstruct %6284, %int4096_3877 : (!torch.int, !torch.int) -> !torch.list<int>
    %6286 = torch.aten.view %6191, %6285 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6286, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6287 = torch.aten.mm %6286, %6265 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6287, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3878 = torch.constant.int 4
    %int128_3879 = torch.constant.int 128
    %6288 = torch.prim.ListConstruct %int4_3878, %2482, %int128_3879 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6289 = torch.aten.view %6287, %6288 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3880 = torch.constant.int 4
    %6290 = torch.aten.mul.int %int4_3880, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3881 = torch.constant.int 4096
    %6291 = torch.prim.ListConstruct %6290, %int4096_3881 : (!torch.int, !torch.int) -> !torch.list<int>
    %6292 = torch.aten.view %6192, %6291 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6292, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6293 = torch.aten.mm %6292, %6267 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6293, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3882 = torch.constant.int 4
    %int128_3883 = torch.constant.int 128
    %6294 = torch.prim.ListConstruct %int4_3882, %2482, %int128_3883 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6295 = torch.aten.view %6293, %6294 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3884 = torch.constant.int 4
    %6296 = torch.aten.mul.int %int4_3884, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3885 = torch.constant.int 4096
    %6297 = torch.prim.ListConstruct %6296, %int4096_3885 : (!torch.int, !torch.int) -> !torch.list<int>
    %6298 = torch.aten.view %6193, %6297 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6298, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6299 = torch.aten.mm %6298, %6269 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6299, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3886 = torch.constant.int 4
    %int128_3887 = torch.constant.int 128
    %6300 = torch.prim.ListConstruct %int4_3886, %2482, %int128_3887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6301 = torch.aten.view %6299, %6300 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3888 = torch.constant.int 4
    %6302 = torch.aten.mul.int %int4_3888, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3889 = torch.constant.int 4096
    %6303 = torch.prim.ListConstruct %6302, %int4096_3889 : (!torch.int, !torch.int) -> !torch.list<int>
    %6304 = torch.aten.view %6194, %6303 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6304, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6305 = torch.aten.mm %6304, %6271 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6305, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3890 = torch.constant.int 4
    %int128_3891 = torch.constant.int 128
    %6306 = torch.prim.ListConstruct %int4_3890, %2482, %int128_3891 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6307 = torch.aten.view %6305, %6306 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3892 = torch.constant.int 4
    %6308 = torch.aten.mul.int %int4_3892, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3893 = torch.constant.int 4096
    %6309 = torch.prim.ListConstruct %6308, %int4096_3893 : (!torch.int, !torch.int) -> !torch.list<int>
    %6310 = torch.aten.view %6195, %6309 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6310, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6311 = torch.aten.mm %6310, %6273 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6311, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3894 = torch.constant.int 4
    %int128_3895 = torch.constant.int 128
    %6312 = torch.prim.ListConstruct %int4_3894, %2482, %int128_3895 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6313 = torch.aten.view %6311, %6312 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3896 = torch.constant.int 4
    %6314 = torch.aten.mul.int %int4_3896, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3897 = torch.constant.int 4096
    %6315 = torch.prim.ListConstruct %6314, %int4096_3897 : (!torch.int, !torch.int) -> !torch.list<int>
    %6316 = torch.aten.view %6196, %6315 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6316, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6317 = torch.aten.mm %6316, %6275 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6317, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3898 = torch.constant.int 4
    %int128_3899 = torch.constant.int 128
    %6318 = torch.prim.ListConstruct %int4_3898, %2482, %int128_3899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6319 = torch.aten.view %6317, %6318 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3900 = torch.constant.int 4
    %6320 = torch.aten.mul.int %int4_3900, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3901 = torch.constant.int 4096
    %6321 = torch.prim.ListConstruct %6320, %int4096_3901 : (!torch.int, !torch.int) -> !torch.list<int>
    %6322 = torch.aten.view %6197, %6321 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6322, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6323 = torch.aten.mm %6322, %6277 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6323, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3902 = torch.constant.int 4
    %int128_3903 = torch.constant.int 128
    %6324 = torch.prim.ListConstruct %int4_3902, %2482, %int128_3903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6325 = torch.aten.view %6323, %6324 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_3904 = torch.constant.int 1
    %int0_3905 = torch.constant.int 0
    %6326 = torch.prim.ListConstruct %int1_3904, %int0_3905 : (!torch.int, !torch.int) -> !torch.list<int>
    %6327 = torch.aten.permute %176, %6326 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3906 = torch.constant.int 1
    %int0_3907 = torch.constant.int 0
    %6328 = torch.prim.ListConstruct %int1_3906, %int0_3907 : (!torch.int, !torch.int) -> !torch.list<int>
    %6329 = torch.aten.permute %177, %6328 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3908 = torch.constant.int 1
    %int0_3909 = torch.constant.int 0
    %6330 = torch.prim.ListConstruct %int1_3908, %int0_3909 : (!torch.int, !torch.int) -> !torch.list<int>
    %6331 = torch.aten.permute %178, %6330 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3910 = torch.constant.int 1
    %int0_3911 = torch.constant.int 0
    %6332 = torch.prim.ListConstruct %int1_3910, %int0_3911 : (!torch.int, !torch.int) -> !torch.list<int>
    %6333 = torch.aten.permute %179, %6332 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3912 = torch.constant.int 1
    %int0_3913 = torch.constant.int 0
    %6334 = torch.prim.ListConstruct %int1_3912, %int0_3913 : (!torch.int, !torch.int) -> !torch.list<int>
    %6335 = torch.aten.permute %180, %6334 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3914 = torch.constant.int 1
    %int0_3915 = torch.constant.int 0
    %6336 = torch.prim.ListConstruct %int1_3914, %int0_3915 : (!torch.int, !torch.int) -> !torch.list<int>
    %6337 = torch.aten.permute %181, %6336 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3916 = torch.constant.int 1
    %int0_3917 = torch.constant.int 0
    %6338 = torch.prim.ListConstruct %int1_3916, %int0_3917 : (!torch.int, !torch.int) -> !torch.list<int>
    %6339 = torch.aten.permute %182, %6338 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_3918 = torch.constant.int 1
    %int0_3919 = torch.constant.int 0
    %6340 = torch.prim.ListConstruct %int1_3918, %int0_3919 : (!torch.int, !torch.int) -> !torch.list<int>
    %6341 = torch.aten.permute %183, %6340 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_3920 = torch.constant.int 4
    %6342 = torch.aten.mul.int %int4_3920, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3921 = torch.constant.int 4096
    %6343 = torch.prim.ListConstruct %6342, %int4096_3921 : (!torch.int, !torch.int) -> !torch.list<int>
    %6344 = torch.aten.view %6190, %6343 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6344, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6345 = torch.aten.mm %6344, %6327 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6345, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3922 = torch.constant.int 4
    %int128_3923 = torch.constant.int 128
    %6346 = torch.prim.ListConstruct %int4_3922, %2482, %int128_3923 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6347 = torch.aten.view %6345, %6346 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3924 = torch.constant.int 4
    %6348 = torch.aten.mul.int %int4_3924, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3925 = torch.constant.int 4096
    %6349 = torch.prim.ListConstruct %6348, %int4096_3925 : (!torch.int, !torch.int) -> !torch.list<int>
    %6350 = torch.aten.view %6191, %6349 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6350, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6351 = torch.aten.mm %6350, %6329 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6351, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3926 = torch.constant.int 4
    %int128_3927 = torch.constant.int 128
    %6352 = torch.prim.ListConstruct %int4_3926, %2482, %int128_3927 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6353 = torch.aten.view %6351, %6352 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3928 = torch.constant.int 4
    %6354 = torch.aten.mul.int %int4_3928, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3929 = torch.constant.int 4096
    %6355 = torch.prim.ListConstruct %6354, %int4096_3929 : (!torch.int, !torch.int) -> !torch.list<int>
    %6356 = torch.aten.view %6192, %6355 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6356, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6357 = torch.aten.mm %6356, %6331 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6357, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3930 = torch.constant.int 4
    %int128_3931 = torch.constant.int 128
    %6358 = torch.prim.ListConstruct %int4_3930, %2482, %int128_3931 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6359 = torch.aten.view %6357, %6358 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3932 = torch.constant.int 4
    %6360 = torch.aten.mul.int %int4_3932, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3933 = torch.constant.int 4096
    %6361 = torch.prim.ListConstruct %6360, %int4096_3933 : (!torch.int, !torch.int) -> !torch.list<int>
    %6362 = torch.aten.view %6193, %6361 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6362, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6363 = torch.aten.mm %6362, %6333 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6363, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3934 = torch.constant.int 4
    %int128_3935 = torch.constant.int 128
    %6364 = torch.prim.ListConstruct %int4_3934, %2482, %int128_3935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6365 = torch.aten.view %6363, %6364 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3936 = torch.constant.int 4
    %6366 = torch.aten.mul.int %int4_3936, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3937 = torch.constant.int 4096
    %6367 = torch.prim.ListConstruct %6366, %int4096_3937 : (!torch.int, !torch.int) -> !torch.list<int>
    %6368 = torch.aten.view %6194, %6367 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6368, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6369 = torch.aten.mm %6368, %6335 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6369, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3938 = torch.constant.int 4
    %int128_3939 = torch.constant.int 128
    %6370 = torch.prim.ListConstruct %int4_3938, %2482, %int128_3939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6371 = torch.aten.view %6369, %6370 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3940 = torch.constant.int 4
    %6372 = torch.aten.mul.int %int4_3940, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3941 = torch.constant.int 4096
    %6373 = torch.prim.ListConstruct %6372, %int4096_3941 : (!torch.int, !torch.int) -> !torch.list<int>
    %6374 = torch.aten.view %6195, %6373 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6374, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6375 = torch.aten.mm %6374, %6337 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6375, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3942 = torch.constant.int 4
    %int128_3943 = torch.constant.int 128
    %6376 = torch.prim.ListConstruct %int4_3942, %2482, %int128_3943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6377 = torch.aten.view %6375, %6376 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3944 = torch.constant.int 4
    %6378 = torch.aten.mul.int %int4_3944, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3945 = torch.constant.int 4096
    %6379 = torch.prim.ListConstruct %6378, %int4096_3945 : (!torch.int, !torch.int) -> !torch.list<int>
    %6380 = torch.aten.view %6196, %6379 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6380, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6381 = torch.aten.mm %6380, %6339 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6381, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3946 = torch.constant.int 4
    %int128_3947 = torch.constant.int 128
    %6382 = torch.prim.ListConstruct %int4_3946, %2482, %int128_3947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6383 = torch.aten.view %6381, %6382 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3948 = torch.constant.int 4
    %6384 = torch.aten.mul.int %int4_3948, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_3949 = torch.constant.int 4096
    %6385 = torch.prim.ListConstruct %6384, %int4096_3949 : (!torch.int, !torch.int) -> !torch.list<int>
    %6386 = torch.aten.view %6197, %6385 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %6386, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %6387 = torch.aten.mm %6386, %6341 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %6387, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_3950 = torch.constant.int 4
    %int128_3951 = torch.constant.int 128
    %6388 = torch.prim.ListConstruct %int4_3950, %2482, %int128_3951 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6389 = torch.aten.view %6387, %6388 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %6389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_3952 = torch.constant.int 4
    %int4_3953 = torch.constant.int 4
    %int128_3954 = torch.constant.int 128
    %6390 = torch.prim.ListConstruct %int4_3952, %2482, %int4_3953, %int128_3954 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6391 = torch.aten.view %6219, %6390 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3955 = torch.constant.int 4
    %int4_3956 = torch.constant.int 4
    %int128_3957 = torch.constant.int 128
    %6392 = torch.prim.ListConstruct %int4_3955, %2482, %int4_3956, %int128_3957 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6393 = torch.aten.view %6225, %6392 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3958 = torch.constant.int 4
    %int4_3959 = torch.constant.int 4
    %int128_3960 = torch.constant.int 128
    %6394 = torch.prim.ListConstruct %int4_3958, %2482, %int4_3959, %int128_3960 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6395 = torch.aten.view %6231, %6394 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3961 = torch.constant.int 4
    %int4_3962 = torch.constant.int 4
    %int128_3963 = torch.constant.int 128
    %6396 = torch.prim.ListConstruct %int4_3961, %2482, %int4_3962, %int128_3963 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6397 = torch.aten.view %6237, %6396 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3964 = torch.constant.int 4
    %int4_3965 = torch.constant.int 4
    %int128_3966 = torch.constant.int 128
    %6398 = torch.prim.ListConstruct %int4_3964, %2482, %int4_3965, %int128_3966 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6399 = torch.aten.view %6243, %6398 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3967 = torch.constant.int 4
    %int4_3968 = torch.constant.int 4
    %int128_3969 = torch.constant.int 128
    %6400 = torch.prim.ListConstruct %int4_3967, %2482, %int4_3968, %int128_3969 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6401 = torch.aten.view %6249, %6400 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3970 = torch.constant.int 4
    %int4_3971 = torch.constant.int 4
    %int128_3972 = torch.constant.int 128
    %6402 = torch.prim.ListConstruct %int4_3970, %2482, %int4_3971, %int128_3972 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6403 = torch.aten.view %6255, %6402 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3973 = torch.constant.int 4
    %int4_3974 = torch.constant.int 4
    %int128_3975 = torch.constant.int 128
    %6404 = torch.prim.ListConstruct %int4_3973, %2482, %int4_3974, %int128_3975 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6405 = torch.aten.view %6261, %6404 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_3976 = torch.constant.int 4
    %int1_3977 = torch.constant.int 1
    %int128_3978 = torch.constant.int 128
    %6406 = torch.prim.ListConstruct %int4_3976, %2482, %int1_3977, %int128_3978 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6407 = torch.aten.view %6283, %6406 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3979 = torch.constant.int 4
    %int1_3980 = torch.constant.int 1
    %int128_3981 = torch.constant.int 128
    %6408 = torch.prim.ListConstruct %int4_3979, %2482, %int1_3980, %int128_3981 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6409 = torch.aten.view %6289, %6408 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3982 = torch.constant.int 4
    %int1_3983 = torch.constant.int 1
    %int128_3984 = torch.constant.int 128
    %6410 = torch.prim.ListConstruct %int4_3982, %2482, %int1_3983, %int128_3984 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6411 = torch.aten.view %6295, %6410 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3985 = torch.constant.int 4
    %int1_3986 = torch.constant.int 1
    %int128_3987 = torch.constant.int 128
    %6412 = torch.prim.ListConstruct %int4_3985, %2482, %int1_3986, %int128_3987 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6413 = torch.aten.view %6301, %6412 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3988 = torch.constant.int 4
    %int1_3989 = torch.constant.int 1
    %int128_3990 = torch.constant.int 128
    %6414 = torch.prim.ListConstruct %int4_3988, %2482, %int1_3989, %int128_3990 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6415 = torch.aten.view %6307, %6414 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3991 = torch.constant.int 4
    %int1_3992 = torch.constant.int 1
    %int128_3993 = torch.constant.int 128
    %6416 = torch.prim.ListConstruct %int4_3991, %2482, %int1_3992, %int128_3993 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6417 = torch.aten.view %6313, %6416 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3994 = torch.constant.int 4
    %int1_3995 = torch.constant.int 1
    %int128_3996 = torch.constant.int 128
    %6418 = torch.prim.ListConstruct %int4_3994, %2482, %int1_3995, %int128_3996 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6419 = torch.aten.view %6319, %6418 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_3997 = torch.constant.int 4
    %int1_3998 = torch.constant.int 1
    %int128_3999 = torch.constant.int 128
    %6420 = torch.prim.ListConstruct %int4_3997, %2482, %int1_3998, %int128_3999 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6421 = torch.aten.view %6325, %6420 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4000 = torch.constant.int 4
    %int1_4001 = torch.constant.int 1
    %int128_4002 = torch.constant.int 128
    %6422 = torch.prim.ListConstruct %int4_4000, %2482, %int1_4001, %int128_4002 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6423 = torch.aten.view %6347, %6422 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4003 = torch.constant.int 4
    %int1_4004 = torch.constant.int 1
    %int128_4005 = torch.constant.int 128
    %6424 = torch.prim.ListConstruct %int4_4003, %2482, %int1_4004, %int128_4005 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6425 = torch.aten.view %6353, %6424 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4006 = torch.constant.int 4
    %int1_4007 = torch.constant.int 1
    %int128_4008 = torch.constant.int 128
    %6426 = torch.prim.ListConstruct %int4_4006, %2482, %int1_4007, %int128_4008 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6427 = torch.aten.view %6359, %6426 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4009 = torch.constant.int 4
    %int1_4010 = torch.constant.int 1
    %int128_4011 = torch.constant.int 128
    %6428 = torch.prim.ListConstruct %int4_4009, %2482, %int1_4010, %int128_4011 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6429 = torch.aten.view %6365, %6428 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4012 = torch.constant.int 4
    %int1_4013 = torch.constant.int 1
    %int128_4014 = torch.constant.int 128
    %6430 = torch.prim.ListConstruct %int4_4012, %2482, %int1_4013, %int128_4014 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6431 = torch.aten.view %6371, %6430 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4015 = torch.constant.int 4
    %int1_4016 = torch.constant.int 1
    %int128_4017 = torch.constant.int 128
    %6432 = torch.prim.ListConstruct %int4_4015, %2482, %int1_4016, %int128_4017 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6433 = torch.aten.view %6377, %6432 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4018 = torch.constant.int 4
    %int1_4019 = torch.constant.int 1
    %int128_4020 = torch.constant.int 128
    %6434 = torch.prim.ListConstruct %int4_4018, %2482, %int1_4019, %int128_4020 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6435 = torch.aten.view %6383, %6434 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_4021 = torch.constant.int 4
    %int1_4022 = torch.constant.int 1
    %int128_4023 = torch.constant.int 128
    %6436 = torch.prim.ListConstruct %int4_4021, %2482, %int1_4022, %int128_4023 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6437 = torch.aten.view %6389, %6436 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_4024 = torch.constant.int 131072
    %none_4025 = torch.constant.none
    %none_4026 = torch.constant.none
    %cpu_4027 = torch.constant.device "cpu"
    %false_4028 = torch.constant.bool false
    %6438 = torch.aten.arange %int131072_4024, %none_4025, %none_4026, %cpu_4027, %false_4028 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4029 = torch.constant.int 0
    %int128_4030 = torch.constant.int 128
    %int2_4031 = torch.constant.int 2
    %none_4032 = torch.constant.none
    %none_4033 = torch.constant.none
    %cpu_4034 = torch.constant.device "cpu"
    %false_4035 = torch.constant.bool false
    %6439 = torch.aten.arange.start_step %int0_4029, %int128_4030, %int2_4031, %none_4032, %none_4033, %cpu_4034, %false_4035 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4036 = torch.constant.int 0
    %int0_4037 = torch.constant.int 0
    %int64_4038 = torch.constant.int 64
    %int1_4039 = torch.constant.int 1
    %6440 = torch.aten.slice.Tensor %6439, %int0_4036, %int0_4037, %int64_4038, %int1_4039 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4040 = torch.constant.int 6
    %6441 = torch.prims.convert_element_type %6440, %int6_4040 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4041 = torch.constant.int 128
    %6442 = torch.aten.div.Scalar %6441, %int128_4041 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4042 = torch.constant.float 5.000000e+05
    %6443 = torch.aten.pow.Scalar %float5.000000e05_4042, %6442 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6444 = torch.aten.reciprocal %6443 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4043 = torch.constant.float 1.000000e+00
    %6445 = torch.aten.mul.Scalar %6444, %float1.000000e00_4043 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4044 = torch.constant.int 131072
    %int1_4045 = torch.constant.int 1
    %6446 = torch.prim.ListConstruct %int131072_4044, %int1_4045 : (!torch.int, !torch.int) -> !torch.list<int>
    %6447 = torch.aten.view %6438, %6446 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6448 = torch.aten.mul.Tensor %6447, %6445 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6449 = torch.aten.cos %6448 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6450 = torch.aten.sin %6448 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6451 = torch.aten.complex %6449, %6450 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %6452 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6453 = flow.tensor.transfer %6452 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %6454 = torch_c.from_builtin_tensor %6453 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6455 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6456 = flow.tensor.transfer %6455 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %6457 = torch_c.from_builtin_tensor %6456 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6458 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6459 = flow.tensor.transfer %6458 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %6460 = torch_c.from_builtin_tensor %6459 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6461 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6462 = flow.tensor.transfer %6461 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %6463 = torch_c.from_builtin_tensor %6462 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6464 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6465 = flow.tensor.transfer %6464 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %6466 = torch_c.from_builtin_tensor %6465 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6467 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6468 = flow.tensor.transfer %6467 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %6469 = torch_c.from_builtin_tensor %6468 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6470 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6471 = flow.tensor.transfer %6470 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %6472 = torch_c.from_builtin_tensor %6471 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6473 = torch_c.to_builtin_tensor %6451 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6474 = flow.tensor.transfer %6473 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %6475 = torch_c.from_builtin_tensor %6474 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4046 = torch.constant.int 1
    %6476 = torch.aten.size.int %6219, %int1_4046 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4047 = torch.constant.int 0
    %6477 = torch.aten.add.int %int0_4047, %6476 : !torch.int, !torch.int -> !torch.int
    %int0_4048 = torch.constant.int 0
    %int0_4049 = torch.constant.int 0
    %int1_4050 = torch.constant.int 1
    %6478 = torch.aten.slice.Tensor %6454, %int0_4048, %int0_4049, %6477, %int1_4050 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6478, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4051 = torch.constant.int 1
    %int0_4052 = torch.constant.int 0
    %int9223372036854775807_4053 = torch.constant.int 9223372036854775807
    %int1_4054 = torch.constant.int 1
    %6479 = torch.aten.slice.Tensor %6478, %int1_4051, %int0_4052, %int9223372036854775807_4053, %int1_4054 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6479, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4055 = torch.constant.int 0
    %6480 = torch.aten.unsqueeze %6479, %int0_4055 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6480, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4056 = torch.constant.int 2
    %6481 = torch.aten.unsqueeze %6480, %int2_4056 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6481, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4057 = torch.constant.int 3
    %int0_4058 = torch.constant.int 0
    %int9223372036854775807_4059 = torch.constant.int 9223372036854775807
    %int1_4060 = torch.constant.int 1
    %6482 = torch.aten.slice.Tensor %6481, %int3_4057, %int0_4058, %int9223372036854775807_4059, %int1_4060 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6482, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6483 = torch_c.to_builtin_tensor %6391 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4061 = arith.constant 1 : index
    %dim_4062 = tensor.dim %6483, %c1_4061 : tensor<4x?x4x128xf16>
    %6484 = flow.tensor.bitcast %6483 : tensor<4x?x4x128xf16>{%dim_4062} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4062}
    %6485 = torch_c.from_builtin_tensor %6484 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6486 = torch.aten.mul.Tensor %6485, %6482 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6487 = torch_c.to_builtin_tensor %6486 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4063 = arith.constant 1 : index
    %dim_4064 = tensor.dim %6487, %c1_4063 : tensor<4x?x4x64xcomplex<f32>>
    %6488 = flow.tensor.bitcast %6487 : tensor<4x?x4x64xcomplex<f32>>{%dim_4064} -> tensor<4x?x4x128xf32>{%dim_4064}
    %6489 = torch_c.from_builtin_tensor %6488 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4065 = torch.constant.int 5
    %6490 = torch.prims.convert_element_type %6489, %int5_4065 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4066 = torch.constant.int 1
    %6491 = torch.aten.size.int %6225, %int1_4066 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4067 = torch.constant.int 0
    %6492 = torch.aten.add.int %int0_4067, %6491 : !torch.int, !torch.int -> !torch.int
    %int0_4068 = torch.constant.int 0
    %int0_4069 = torch.constant.int 0
    %int1_4070 = torch.constant.int 1
    %6493 = torch.aten.slice.Tensor %6457, %int0_4068, %int0_4069, %6492, %int1_4070 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6493, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4071 = torch.constant.int 1
    %int0_4072 = torch.constant.int 0
    %int9223372036854775807_4073 = torch.constant.int 9223372036854775807
    %int1_4074 = torch.constant.int 1
    %6494 = torch.aten.slice.Tensor %6493, %int1_4071, %int0_4072, %int9223372036854775807_4073, %int1_4074 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6494, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4075 = torch.constant.int 0
    %6495 = torch.aten.unsqueeze %6494, %int0_4075 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6495, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4076 = torch.constant.int 2
    %6496 = torch.aten.unsqueeze %6495, %int2_4076 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6496, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4077 = torch.constant.int 3
    %int0_4078 = torch.constant.int 0
    %int9223372036854775807_4079 = torch.constant.int 9223372036854775807
    %int1_4080 = torch.constant.int 1
    %6497 = torch.aten.slice.Tensor %6496, %int3_4077, %int0_4078, %int9223372036854775807_4079, %int1_4080 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6497, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6498 = torch_c.to_builtin_tensor %6393 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4081 = arith.constant 1 : index
    %dim_4082 = tensor.dim %6498, %c1_4081 : tensor<4x?x4x128xf16>
    %6499 = flow.tensor.bitcast %6498 : tensor<4x?x4x128xf16>{%dim_4082} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4082}
    %6500 = torch_c.from_builtin_tensor %6499 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6501 = torch.aten.mul.Tensor %6500, %6497 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6502 = torch_c.to_builtin_tensor %6501 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4083 = arith.constant 1 : index
    %dim_4084 = tensor.dim %6502, %c1_4083 : tensor<4x?x4x64xcomplex<f32>>
    %6503 = flow.tensor.bitcast %6502 : tensor<4x?x4x64xcomplex<f32>>{%dim_4084} -> tensor<4x?x4x128xf32>{%dim_4084}
    %6504 = torch_c.from_builtin_tensor %6503 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4085 = torch.constant.int 5
    %6505 = torch.prims.convert_element_type %6504, %int5_4085 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4086 = torch.constant.int 1
    %6506 = torch.aten.size.int %6231, %int1_4086 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4087 = torch.constant.int 0
    %6507 = torch.aten.add.int %int0_4087, %6506 : !torch.int, !torch.int -> !torch.int
    %int0_4088 = torch.constant.int 0
    %int0_4089 = torch.constant.int 0
    %int1_4090 = torch.constant.int 1
    %6508 = torch.aten.slice.Tensor %6460, %int0_4088, %int0_4089, %6507, %int1_4090 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6508, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4091 = torch.constant.int 1
    %int0_4092 = torch.constant.int 0
    %int9223372036854775807_4093 = torch.constant.int 9223372036854775807
    %int1_4094 = torch.constant.int 1
    %6509 = torch.aten.slice.Tensor %6508, %int1_4091, %int0_4092, %int9223372036854775807_4093, %int1_4094 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6509, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4095 = torch.constant.int 0
    %6510 = torch.aten.unsqueeze %6509, %int0_4095 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6510, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4096 = torch.constant.int 2
    %6511 = torch.aten.unsqueeze %6510, %int2_4096 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6511, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4097 = torch.constant.int 3
    %int0_4098 = torch.constant.int 0
    %int9223372036854775807_4099 = torch.constant.int 9223372036854775807
    %int1_4100 = torch.constant.int 1
    %6512 = torch.aten.slice.Tensor %6511, %int3_4097, %int0_4098, %int9223372036854775807_4099, %int1_4100 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6512, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6513 = torch_c.to_builtin_tensor %6395 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4101 = arith.constant 1 : index
    %dim_4102 = tensor.dim %6513, %c1_4101 : tensor<4x?x4x128xf16>
    %6514 = flow.tensor.bitcast %6513 : tensor<4x?x4x128xf16>{%dim_4102} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4102}
    %6515 = torch_c.from_builtin_tensor %6514 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6516 = torch.aten.mul.Tensor %6515, %6512 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6517 = torch_c.to_builtin_tensor %6516 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4103 = arith.constant 1 : index
    %dim_4104 = tensor.dim %6517, %c1_4103 : tensor<4x?x4x64xcomplex<f32>>
    %6518 = flow.tensor.bitcast %6517 : tensor<4x?x4x64xcomplex<f32>>{%dim_4104} -> tensor<4x?x4x128xf32>{%dim_4104}
    %6519 = torch_c.from_builtin_tensor %6518 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4105 = torch.constant.int 5
    %6520 = torch.prims.convert_element_type %6519, %int5_4105 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4106 = torch.constant.int 1
    %6521 = torch.aten.size.int %6237, %int1_4106 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4107 = torch.constant.int 0
    %6522 = torch.aten.add.int %int0_4107, %6521 : !torch.int, !torch.int -> !torch.int
    %int0_4108 = torch.constant.int 0
    %int0_4109 = torch.constant.int 0
    %int1_4110 = torch.constant.int 1
    %6523 = torch.aten.slice.Tensor %6463, %int0_4108, %int0_4109, %6522, %int1_4110 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6523, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4111 = torch.constant.int 1
    %int0_4112 = torch.constant.int 0
    %int9223372036854775807_4113 = torch.constant.int 9223372036854775807
    %int1_4114 = torch.constant.int 1
    %6524 = torch.aten.slice.Tensor %6523, %int1_4111, %int0_4112, %int9223372036854775807_4113, %int1_4114 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6524, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4115 = torch.constant.int 0
    %6525 = torch.aten.unsqueeze %6524, %int0_4115 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6525, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4116 = torch.constant.int 2
    %6526 = torch.aten.unsqueeze %6525, %int2_4116 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6526, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4117 = torch.constant.int 3
    %int0_4118 = torch.constant.int 0
    %int9223372036854775807_4119 = torch.constant.int 9223372036854775807
    %int1_4120 = torch.constant.int 1
    %6527 = torch.aten.slice.Tensor %6526, %int3_4117, %int0_4118, %int9223372036854775807_4119, %int1_4120 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6527, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6528 = torch_c.to_builtin_tensor %6397 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4121 = arith.constant 1 : index
    %dim_4122 = tensor.dim %6528, %c1_4121 : tensor<4x?x4x128xf16>
    %6529 = flow.tensor.bitcast %6528 : tensor<4x?x4x128xf16>{%dim_4122} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4122}
    %6530 = torch_c.from_builtin_tensor %6529 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6531 = torch.aten.mul.Tensor %6530, %6527 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6532 = torch_c.to_builtin_tensor %6531 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4123 = arith.constant 1 : index
    %dim_4124 = tensor.dim %6532, %c1_4123 : tensor<4x?x4x64xcomplex<f32>>
    %6533 = flow.tensor.bitcast %6532 : tensor<4x?x4x64xcomplex<f32>>{%dim_4124} -> tensor<4x?x4x128xf32>{%dim_4124}
    %6534 = torch_c.from_builtin_tensor %6533 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4125 = torch.constant.int 5
    %6535 = torch.prims.convert_element_type %6534, %int5_4125 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4126 = torch.constant.int 1
    %6536 = torch.aten.size.int %6243, %int1_4126 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4127 = torch.constant.int 0
    %6537 = torch.aten.add.int %int0_4127, %6536 : !torch.int, !torch.int -> !torch.int
    %int0_4128 = torch.constant.int 0
    %int0_4129 = torch.constant.int 0
    %int1_4130 = torch.constant.int 1
    %6538 = torch.aten.slice.Tensor %6466, %int0_4128, %int0_4129, %6537, %int1_4130 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6538, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4131 = torch.constant.int 1
    %int0_4132 = torch.constant.int 0
    %int9223372036854775807_4133 = torch.constant.int 9223372036854775807
    %int1_4134 = torch.constant.int 1
    %6539 = torch.aten.slice.Tensor %6538, %int1_4131, %int0_4132, %int9223372036854775807_4133, %int1_4134 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6539, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4135 = torch.constant.int 0
    %6540 = torch.aten.unsqueeze %6539, %int0_4135 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6540, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4136 = torch.constant.int 2
    %6541 = torch.aten.unsqueeze %6540, %int2_4136 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6541, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4137 = torch.constant.int 3
    %int0_4138 = torch.constant.int 0
    %int9223372036854775807_4139 = torch.constant.int 9223372036854775807
    %int1_4140 = torch.constant.int 1
    %6542 = torch.aten.slice.Tensor %6541, %int3_4137, %int0_4138, %int9223372036854775807_4139, %int1_4140 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6542, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6543 = torch_c.to_builtin_tensor %6399 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4141 = arith.constant 1 : index
    %dim_4142 = tensor.dim %6543, %c1_4141 : tensor<4x?x4x128xf16>
    %6544 = flow.tensor.bitcast %6543 : tensor<4x?x4x128xf16>{%dim_4142} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4142}
    %6545 = torch_c.from_builtin_tensor %6544 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6546 = torch.aten.mul.Tensor %6545, %6542 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6547 = torch_c.to_builtin_tensor %6546 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4143 = arith.constant 1 : index
    %dim_4144 = tensor.dim %6547, %c1_4143 : tensor<4x?x4x64xcomplex<f32>>
    %6548 = flow.tensor.bitcast %6547 : tensor<4x?x4x64xcomplex<f32>>{%dim_4144} -> tensor<4x?x4x128xf32>{%dim_4144}
    %6549 = torch_c.from_builtin_tensor %6548 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4145 = torch.constant.int 5
    %6550 = torch.prims.convert_element_type %6549, %int5_4145 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4146 = torch.constant.int 1
    %6551 = torch.aten.size.int %6249, %int1_4146 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4147 = torch.constant.int 0
    %6552 = torch.aten.add.int %int0_4147, %6551 : !torch.int, !torch.int -> !torch.int
    %int0_4148 = torch.constant.int 0
    %int0_4149 = torch.constant.int 0
    %int1_4150 = torch.constant.int 1
    %6553 = torch.aten.slice.Tensor %6469, %int0_4148, %int0_4149, %6552, %int1_4150 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6553, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4151 = torch.constant.int 1
    %int0_4152 = torch.constant.int 0
    %int9223372036854775807_4153 = torch.constant.int 9223372036854775807
    %int1_4154 = torch.constant.int 1
    %6554 = torch.aten.slice.Tensor %6553, %int1_4151, %int0_4152, %int9223372036854775807_4153, %int1_4154 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6554, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4155 = torch.constant.int 0
    %6555 = torch.aten.unsqueeze %6554, %int0_4155 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6555, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4156 = torch.constant.int 2
    %6556 = torch.aten.unsqueeze %6555, %int2_4156 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6556, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4157 = torch.constant.int 3
    %int0_4158 = torch.constant.int 0
    %int9223372036854775807_4159 = torch.constant.int 9223372036854775807
    %int1_4160 = torch.constant.int 1
    %6557 = torch.aten.slice.Tensor %6556, %int3_4157, %int0_4158, %int9223372036854775807_4159, %int1_4160 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6557, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6558 = torch_c.to_builtin_tensor %6401 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4161 = arith.constant 1 : index
    %dim_4162 = tensor.dim %6558, %c1_4161 : tensor<4x?x4x128xf16>
    %6559 = flow.tensor.bitcast %6558 : tensor<4x?x4x128xf16>{%dim_4162} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4162}
    %6560 = torch_c.from_builtin_tensor %6559 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6561 = torch.aten.mul.Tensor %6560, %6557 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6562 = torch_c.to_builtin_tensor %6561 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4163 = arith.constant 1 : index
    %dim_4164 = tensor.dim %6562, %c1_4163 : tensor<4x?x4x64xcomplex<f32>>
    %6563 = flow.tensor.bitcast %6562 : tensor<4x?x4x64xcomplex<f32>>{%dim_4164} -> tensor<4x?x4x128xf32>{%dim_4164}
    %6564 = torch_c.from_builtin_tensor %6563 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4165 = torch.constant.int 5
    %6565 = torch.prims.convert_element_type %6564, %int5_4165 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4166 = torch.constant.int 1
    %6566 = torch.aten.size.int %6255, %int1_4166 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4167 = torch.constant.int 0
    %6567 = torch.aten.add.int %int0_4167, %6566 : !torch.int, !torch.int -> !torch.int
    %int0_4168 = torch.constant.int 0
    %int0_4169 = torch.constant.int 0
    %int1_4170 = torch.constant.int 1
    %6568 = torch.aten.slice.Tensor %6472, %int0_4168, %int0_4169, %6567, %int1_4170 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6568, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4171 = torch.constant.int 1
    %int0_4172 = torch.constant.int 0
    %int9223372036854775807_4173 = torch.constant.int 9223372036854775807
    %int1_4174 = torch.constant.int 1
    %6569 = torch.aten.slice.Tensor %6568, %int1_4171, %int0_4172, %int9223372036854775807_4173, %int1_4174 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6569, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4175 = torch.constant.int 0
    %6570 = torch.aten.unsqueeze %6569, %int0_4175 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6570, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4176 = torch.constant.int 2
    %6571 = torch.aten.unsqueeze %6570, %int2_4176 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6571, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4177 = torch.constant.int 3
    %int0_4178 = torch.constant.int 0
    %int9223372036854775807_4179 = torch.constant.int 9223372036854775807
    %int1_4180 = torch.constant.int 1
    %6572 = torch.aten.slice.Tensor %6571, %int3_4177, %int0_4178, %int9223372036854775807_4179, %int1_4180 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6572, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6573 = torch_c.to_builtin_tensor %6403 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4181 = arith.constant 1 : index
    %dim_4182 = tensor.dim %6573, %c1_4181 : tensor<4x?x4x128xf16>
    %6574 = flow.tensor.bitcast %6573 : tensor<4x?x4x128xf16>{%dim_4182} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4182}
    %6575 = torch_c.from_builtin_tensor %6574 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6576 = torch.aten.mul.Tensor %6575, %6572 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6577 = torch_c.to_builtin_tensor %6576 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4183 = arith.constant 1 : index
    %dim_4184 = tensor.dim %6577, %c1_4183 : tensor<4x?x4x64xcomplex<f32>>
    %6578 = flow.tensor.bitcast %6577 : tensor<4x?x4x64xcomplex<f32>>{%dim_4184} -> tensor<4x?x4x128xf32>{%dim_4184}
    %6579 = torch_c.from_builtin_tensor %6578 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4185 = torch.constant.int 5
    %6580 = torch.prims.convert_element_type %6579, %int5_4185 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4186 = torch.constant.int 1
    %6581 = torch.aten.size.int %6261, %int1_4186 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_4187 = torch.constant.int 0
    %6582 = torch.aten.add.int %int0_4187, %6581 : !torch.int, !torch.int -> !torch.int
    %int0_4188 = torch.constant.int 0
    %int0_4189 = torch.constant.int 0
    %int1_4190 = torch.constant.int 1
    %6583 = torch.aten.slice.Tensor %6475, %int0_4188, %int0_4189, %6582, %int1_4190 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6583, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4191 = torch.constant.int 1
    %int0_4192 = torch.constant.int 0
    %int9223372036854775807_4193 = torch.constant.int 9223372036854775807
    %int1_4194 = torch.constant.int 1
    %6584 = torch.aten.slice.Tensor %6583, %int1_4191, %int0_4192, %int9223372036854775807_4193, %int1_4194 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6584, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4195 = torch.constant.int 0
    %6585 = torch.aten.unsqueeze %6584, %int0_4195 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6585, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4196 = torch.constant.int 2
    %6586 = torch.aten.unsqueeze %6585, %int2_4196 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6586, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4197 = torch.constant.int 3
    %int0_4198 = torch.constant.int 0
    %int9223372036854775807_4199 = torch.constant.int 9223372036854775807
    %int1_4200 = torch.constant.int 1
    %6587 = torch.aten.slice.Tensor %6586, %int3_4197, %int0_4198, %int9223372036854775807_4199, %int1_4200 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6587, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6588 = torch_c.to_builtin_tensor %6405 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_4201 = arith.constant 1 : index
    %dim_4202 = tensor.dim %6588, %c1_4201 : tensor<4x?x4x128xf16>
    %6589 = flow.tensor.bitcast %6588 : tensor<4x?x4x128xf16>{%dim_4202} -> tensor<4x?x4x64xcomplex<f16>>{%dim_4202}
    %6590 = torch_c.from_builtin_tensor %6589 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %6590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %6591 = torch.aten.mul.Tensor %6590, %6587 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %6591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %6592 = torch_c.to_builtin_tensor %6591 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_4203 = arith.constant 1 : index
    %dim_4204 = tensor.dim %6592, %c1_4203 : tensor<4x?x4x64xcomplex<f32>>
    %6593 = flow.tensor.bitcast %6592 : tensor<4x?x4x64xcomplex<f32>>{%dim_4204} -> tensor<4x?x4x128xf32>{%dim_4204}
    %6594 = torch_c.from_builtin_tensor %6593 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %6594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_4205 = torch.constant.int 5
    %6595 = torch.prims.convert_element_type %6594, %int5_4205 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %6595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_4206 = torch.constant.int 131072
    %none_4207 = torch.constant.none
    %none_4208 = torch.constant.none
    %cpu_4209 = torch.constant.device "cpu"
    %false_4210 = torch.constant.bool false
    %6596 = torch.aten.arange %int131072_4206, %none_4207, %none_4208, %cpu_4209, %false_4210 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_4211 = torch.constant.int 0
    %int128_4212 = torch.constant.int 128
    %int2_4213 = torch.constant.int 2
    %none_4214 = torch.constant.none
    %none_4215 = torch.constant.none
    %cpu_4216 = torch.constant.device "cpu"
    %false_4217 = torch.constant.bool false
    %6597 = torch.aten.arange.start_step %int0_4211, %int128_4212, %int2_4213, %none_4214, %none_4215, %cpu_4216, %false_4217 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_4218 = torch.constant.int 0
    %int0_4219 = torch.constant.int 0
    %int64_4220 = torch.constant.int 64
    %int1_4221 = torch.constant.int 1
    %6598 = torch.aten.slice.Tensor %6597, %int0_4218, %int0_4219, %int64_4220, %int1_4221 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_4222 = torch.constant.int 6
    %6599 = torch.prims.convert_element_type %6598, %int6_4222 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_4223 = torch.constant.int 128
    %6600 = torch.aten.div.Scalar %6599, %int128_4223 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_4224 = torch.constant.float 5.000000e+05
    %6601 = torch.aten.pow.Scalar %float5.000000e05_4224, %6600 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %6602 = torch.aten.reciprocal %6601 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_4225 = torch.constant.float 1.000000e+00
    %6603 = torch.aten.mul.Scalar %6602, %float1.000000e00_4225 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_4226 = torch.constant.int 131072
    %int1_4227 = torch.constant.int 1
    %6604 = torch.prim.ListConstruct %int131072_4226, %int1_4227 : (!torch.int, !torch.int) -> !torch.list<int>
    %6605 = torch.aten.view %6596, %6604 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %6606 = torch.aten.mul.Tensor %6605, %6603 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %6607 = torch.aten.cos %6606 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6608 = torch.aten.sin %6606 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %6609 = torch.aten.complex %6607, %6608 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %6610 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6611 = flow.tensor.transfer %6610 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %6612 = torch_c.from_builtin_tensor %6611 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6613 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6614 = flow.tensor.transfer %6613 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %6615 = torch_c.from_builtin_tensor %6614 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6616 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6617 = flow.tensor.transfer %6616 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %6618 = torch_c.from_builtin_tensor %6617 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6619 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6620 = flow.tensor.transfer %6619 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %6621 = torch_c.from_builtin_tensor %6620 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6622 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6623 = flow.tensor.transfer %6622 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %6624 = torch_c.from_builtin_tensor %6623 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6625 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6626 = flow.tensor.transfer %6625 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %6627 = torch_c.from_builtin_tensor %6626 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6628 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6629 = flow.tensor.transfer %6628 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %6630 = torch_c.from_builtin_tensor %6629 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %6631 = torch_c.to_builtin_tensor %6609 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %6632 = flow.tensor.transfer %6631 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %6633 = torch_c.from_builtin_tensor %6632 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_4228 = torch.constant.int 1
    %6634 = torch.aten.size.int %6283, %int1_4228 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4229 = torch.constant.int 0
    %6635 = torch.aten.add.int %int0_4229, %6634 : !torch.int, !torch.int -> !torch.int
    %int0_4230 = torch.constant.int 0
    %int0_4231 = torch.constant.int 0
    %int1_4232 = torch.constant.int 1
    %6636 = torch.aten.slice.Tensor %6612, %int0_4230, %int0_4231, %6635, %int1_4232 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6636, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4233 = torch.constant.int 1
    %int0_4234 = torch.constant.int 0
    %int9223372036854775807_4235 = torch.constant.int 9223372036854775807
    %int1_4236 = torch.constant.int 1
    %6637 = torch.aten.slice.Tensor %6636, %int1_4233, %int0_4234, %int9223372036854775807_4235, %int1_4236 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6637, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4237 = torch.constant.int 0
    %6638 = torch.aten.unsqueeze %6637, %int0_4237 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6638, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4238 = torch.constant.int 2
    %6639 = torch.aten.unsqueeze %6638, %int2_4238 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6639, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4239 = torch.constant.int 3
    %int0_4240 = torch.constant.int 0
    %int9223372036854775807_4241 = torch.constant.int 9223372036854775807
    %int1_4242 = torch.constant.int 1
    %6640 = torch.aten.slice.Tensor %6639, %int3_4239, %int0_4240, %int9223372036854775807_4241, %int1_4242 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6640, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6641 = torch_c.to_builtin_tensor %6407 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4243 = arith.constant 1 : index
    %dim_4244 = tensor.dim %6641, %c1_4243 : tensor<4x?x1x128xf16>
    %6642 = flow.tensor.bitcast %6641 : tensor<4x?x1x128xf16>{%dim_4244} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4244}
    %6643 = torch_c.from_builtin_tensor %6642 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6644 = torch.aten.mul.Tensor %6643, %6640 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6645 = torch_c.to_builtin_tensor %6644 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4245 = arith.constant 1 : index
    %dim_4246 = tensor.dim %6645, %c1_4245 : tensor<4x?x1x64xcomplex<f32>>
    %6646 = flow.tensor.bitcast %6645 : tensor<4x?x1x64xcomplex<f32>>{%dim_4246} -> tensor<4x?x1x128xf32>{%dim_4246}
    %6647 = torch_c.from_builtin_tensor %6646 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4247 = torch.constant.int 5
    %6648 = torch.prims.convert_element_type %6647, %int5_4247 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4248 = torch.constant.int 1
    %6649 = torch.aten.size.int %6289, %int1_4248 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4249 = torch.constant.int 0
    %6650 = torch.aten.add.int %int0_4249, %6649 : !torch.int, !torch.int -> !torch.int
    %int0_4250 = torch.constant.int 0
    %int0_4251 = torch.constant.int 0
    %int1_4252 = torch.constant.int 1
    %6651 = torch.aten.slice.Tensor %6615, %int0_4250, %int0_4251, %6650, %int1_4252 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6651, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4253 = torch.constant.int 1
    %int0_4254 = torch.constant.int 0
    %int9223372036854775807_4255 = torch.constant.int 9223372036854775807
    %int1_4256 = torch.constant.int 1
    %6652 = torch.aten.slice.Tensor %6651, %int1_4253, %int0_4254, %int9223372036854775807_4255, %int1_4256 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6652, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4257 = torch.constant.int 0
    %6653 = torch.aten.unsqueeze %6652, %int0_4257 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6653, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4258 = torch.constant.int 2
    %6654 = torch.aten.unsqueeze %6653, %int2_4258 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6654, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4259 = torch.constant.int 3
    %int0_4260 = torch.constant.int 0
    %int9223372036854775807_4261 = torch.constant.int 9223372036854775807
    %int1_4262 = torch.constant.int 1
    %6655 = torch.aten.slice.Tensor %6654, %int3_4259, %int0_4260, %int9223372036854775807_4261, %int1_4262 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6655, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6656 = torch_c.to_builtin_tensor %6409 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4263 = arith.constant 1 : index
    %dim_4264 = tensor.dim %6656, %c1_4263 : tensor<4x?x1x128xf16>
    %6657 = flow.tensor.bitcast %6656 : tensor<4x?x1x128xf16>{%dim_4264} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4264}
    %6658 = torch_c.from_builtin_tensor %6657 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6659 = torch.aten.mul.Tensor %6658, %6655 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6660 = torch_c.to_builtin_tensor %6659 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4265 = arith.constant 1 : index
    %dim_4266 = tensor.dim %6660, %c1_4265 : tensor<4x?x1x64xcomplex<f32>>
    %6661 = flow.tensor.bitcast %6660 : tensor<4x?x1x64xcomplex<f32>>{%dim_4266} -> tensor<4x?x1x128xf32>{%dim_4266}
    %6662 = torch_c.from_builtin_tensor %6661 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4267 = torch.constant.int 5
    %6663 = torch.prims.convert_element_type %6662, %int5_4267 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4268 = torch.constant.int 1
    %6664 = torch.aten.size.int %6295, %int1_4268 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4269 = torch.constant.int 0
    %6665 = torch.aten.add.int %int0_4269, %6664 : !torch.int, !torch.int -> !torch.int
    %int0_4270 = torch.constant.int 0
    %int0_4271 = torch.constant.int 0
    %int1_4272 = torch.constant.int 1
    %6666 = torch.aten.slice.Tensor %6618, %int0_4270, %int0_4271, %6665, %int1_4272 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6666, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4273 = torch.constant.int 1
    %int0_4274 = torch.constant.int 0
    %int9223372036854775807_4275 = torch.constant.int 9223372036854775807
    %int1_4276 = torch.constant.int 1
    %6667 = torch.aten.slice.Tensor %6666, %int1_4273, %int0_4274, %int9223372036854775807_4275, %int1_4276 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6667, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4277 = torch.constant.int 0
    %6668 = torch.aten.unsqueeze %6667, %int0_4277 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6668, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4278 = torch.constant.int 2
    %6669 = torch.aten.unsqueeze %6668, %int2_4278 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6669, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4279 = torch.constant.int 3
    %int0_4280 = torch.constant.int 0
    %int9223372036854775807_4281 = torch.constant.int 9223372036854775807
    %int1_4282 = torch.constant.int 1
    %6670 = torch.aten.slice.Tensor %6669, %int3_4279, %int0_4280, %int9223372036854775807_4281, %int1_4282 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6670, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6671 = torch_c.to_builtin_tensor %6411 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4283 = arith.constant 1 : index
    %dim_4284 = tensor.dim %6671, %c1_4283 : tensor<4x?x1x128xf16>
    %6672 = flow.tensor.bitcast %6671 : tensor<4x?x1x128xf16>{%dim_4284} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4284}
    %6673 = torch_c.from_builtin_tensor %6672 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6674 = torch.aten.mul.Tensor %6673, %6670 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6675 = torch_c.to_builtin_tensor %6674 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4285 = arith.constant 1 : index
    %dim_4286 = tensor.dim %6675, %c1_4285 : tensor<4x?x1x64xcomplex<f32>>
    %6676 = flow.tensor.bitcast %6675 : tensor<4x?x1x64xcomplex<f32>>{%dim_4286} -> tensor<4x?x1x128xf32>{%dim_4286}
    %6677 = torch_c.from_builtin_tensor %6676 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4287 = torch.constant.int 5
    %6678 = torch.prims.convert_element_type %6677, %int5_4287 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4288 = torch.constant.int 1
    %6679 = torch.aten.size.int %6301, %int1_4288 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4289 = torch.constant.int 0
    %6680 = torch.aten.add.int %int0_4289, %6679 : !torch.int, !torch.int -> !torch.int
    %int0_4290 = torch.constant.int 0
    %int0_4291 = torch.constant.int 0
    %int1_4292 = torch.constant.int 1
    %6681 = torch.aten.slice.Tensor %6621, %int0_4290, %int0_4291, %6680, %int1_4292 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6681, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4293 = torch.constant.int 1
    %int0_4294 = torch.constant.int 0
    %int9223372036854775807_4295 = torch.constant.int 9223372036854775807
    %int1_4296 = torch.constant.int 1
    %6682 = torch.aten.slice.Tensor %6681, %int1_4293, %int0_4294, %int9223372036854775807_4295, %int1_4296 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6682, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4297 = torch.constant.int 0
    %6683 = torch.aten.unsqueeze %6682, %int0_4297 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6683, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4298 = torch.constant.int 2
    %6684 = torch.aten.unsqueeze %6683, %int2_4298 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6684, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4299 = torch.constant.int 3
    %int0_4300 = torch.constant.int 0
    %int9223372036854775807_4301 = torch.constant.int 9223372036854775807
    %int1_4302 = torch.constant.int 1
    %6685 = torch.aten.slice.Tensor %6684, %int3_4299, %int0_4300, %int9223372036854775807_4301, %int1_4302 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6685, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6686 = torch_c.to_builtin_tensor %6413 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4303 = arith.constant 1 : index
    %dim_4304 = tensor.dim %6686, %c1_4303 : tensor<4x?x1x128xf16>
    %6687 = flow.tensor.bitcast %6686 : tensor<4x?x1x128xf16>{%dim_4304} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4304}
    %6688 = torch_c.from_builtin_tensor %6687 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6689 = torch.aten.mul.Tensor %6688, %6685 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6690 = torch_c.to_builtin_tensor %6689 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4305 = arith.constant 1 : index
    %dim_4306 = tensor.dim %6690, %c1_4305 : tensor<4x?x1x64xcomplex<f32>>
    %6691 = flow.tensor.bitcast %6690 : tensor<4x?x1x64xcomplex<f32>>{%dim_4306} -> tensor<4x?x1x128xf32>{%dim_4306}
    %6692 = torch_c.from_builtin_tensor %6691 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4307 = torch.constant.int 5
    %6693 = torch.prims.convert_element_type %6692, %int5_4307 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4308 = torch.constant.int 1
    %6694 = torch.aten.size.int %6307, %int1_4308 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4309 = torch.constant.int 0
    %6695 = torch.aten.add.int %int0_4309, %6694 : !torch.int, !torch.int -> !torch.int
    %int0_4310 = torch.constant.int 0
    %int0_4311 = torch.constant.int 0
    %int1_4312 = torch.constant.int 1
    %6696 = torch.aten.slice.Tensor %6624, %int0_4310, %int0_4311, %6695, %int1_4312 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6696, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4313 = torch.constant.int 1
    %int0_4314 = torch.constant.int 0
    %int9223372036854775807_4315 = torch.constant.int 9223372036854775807
    %int1_4316 = torch.constant.int 1
    %6697 = torch.aten.slice.Tensor %6696, %int1_4313, %int0_4314, %int9223372036854775807_4315, %int1_4316 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6697, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4317 = torch.constant.int 0
    %6698 = torch.aten.unsqueeze %6697, %int0_4317 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6698, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4318 = torch.constant.int 2
    %6699 = torch.aten.unsqueeze %6698, %int2_4318 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6699, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4319 = torch.constant.int 3
    %int0_4320 = torch.constant.int 0
    %int9223372036854775807_4321 = torch.constant.int 9223372036854775807
    %int1_4322 = torch.constant.int 1
    %6700 = torch.aten.slice.Tensor %6699, %int3_4319, %int0_4320, %int9223372036854775807_4321, %int1_4322 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6700, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6701 = torch_c.to_builtin_tensor %6415 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4323 = arith.constant 1 : index
    %dim_4324 = tensor.dim %6701, %c1_4323 : tensor<4x?x1x128xf16>
    %6702 = flow.tensor.bitcast %6701 : tensor<4x?x1x128xf16>{%dim_4324} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4324}
    %6703 = torch_c.from_builtin_tensor %6702 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6704 = torch.aten.mul.Tensor %6703, %6700 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6705 = torch_c.to_builtin_tensor %6704 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4325 = arith.constant 1 : index
    %dim_4326 = tensor.dim %6705, %c1_4325 : tensor<4x?x1x64xcomplex<f32>>
    %6706 = flow.tensor.bitcast %6705 : tensor<4x?x1x64xcomplex<f32>>{%dim_4326} -> tensor<4x?x1x128xf32>{%dim_4326}
    %6707 = torch_c.from_builtin_tensor %6706 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4327 = torch.constant.int 5
    %6708 = torch.prims.convert_element_type %6707, %int5_4327 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4328 = torch.constant.int 1
    %6709 = torch.aten.size.int %6313, %int1_4328 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4329 = torch.constant.int 0
    %6710 = torch.aten.add.int %int0_4329, %6709 : !torch.int, !torch.int -> !torch.int
    %int0_4330 = torch.constant.int 0
    %int0_4331 = torch.constant.int 0
    %int1_4332 = torch.constant.int 1
    %6711 = torch.aten.slice.Tensor %6627, %int0_4330, %int0_4331, %6710, %int1_4332 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6711, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4333 = torch.constant.int 1
    %int0_4334 = torch.constant.int 0
    %int9223372036854775807_4335 = torch.constant.int 9223372036854775807
    %int1_4336 = torch.constant.int 1
    %6712 = torch.aten.slice.Tensor %6711, %int1_4333, %int0_4334, %int9223372036854775807_4335, %int1_4336 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6712, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4337 = torch.constant.int 0
    %6713 = torch.aten.unsqueeze %6712, %int0_4337 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6713, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4338 = torch.constant.int 2
    %6714 = torch.aten.unsqueeze %6713, %int2_4338 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6714, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4339 = torch.constant.int 3
    %int0_4340 = torch.constant.int 0
    %int9223372036854775807_4341 = torch.constant.int 9223372036854775807
    %int1_4342 = torch.constant.int 1
    %6715 = torch.aten.slice.Tensor %6714, %int3_4339, %int0_4340, %int9223372036854775807_4341, %int1_4342 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6715, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6716 = torch_c.to_builtin_tensor %6417 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4343 = arith.constant 1 : index
    %dim_4344 = tensor.dim %6716, %c1_4343 : tensor<4x?x1x128xf16>
    %6717 = flow.tensor.bitcast %6716 : tensor<4x?x1x128xf16>{%dim_4344} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4344}
    %6718 = torch_c.from_builtin_tensor %6717 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6719 = torch.aten.mul.Tensor %6718, %6715 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6720 = torch_c.to_builtin_tensor %6719 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4345 = arith.constant 1 : index
    %dim_4346 = tensor.dim %6720, %c1_4345 : tensor<4x?x1x64xcomplex<f32>>
    %6721 = flow.tensor.bitcast %6720 : tensor<4x?x1x64xcomplex<f32>>{%dim_4346} -> tensor<4x?x1x128xf32>{%dim_4346}
    %6722 = torch_c.from_builtin_tensor %6721 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4347 = torch.constant.int 5
    %6723 = torch.prims.convert_element_type %6722, %int5_4347 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4348 = torch.constant.int 1
    %6724 = torch.aten.size.int %6319, %int1_4348 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4349 = torch.constant.int 0
    %6725 = torch.aten.add.int %int0_4349, %6724 : !torch.int, !torch.int -> !torch.int
    %int0_4350 = torch.constant.int 0
    %int0_4351 = torch.constant.int 0
    %int1_4352 = torch.constant.int 1
    %6726 = torch.aten.slice.Tensor %6630, %int0_4350, %int0_4351, %6725, %int1_4352 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6726, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4353 = torch.constant.int 1
    %int0_4354 = torch.constant.int 0
    %int9223372036854775807_4355 = torch.constant.int 9223372036854775807
    %int1_4356 = torch.constant.int 1
    %6727 = torch.aten.slice.Tensor %6726, %int1_4353, %int0_4354, %int9223372036854775807_4355, %int1_4356 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6727, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4357 = torch.constant.int 0
    %6728 = torch.aten.unsqueeze %6727, %int0_4357 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6728, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4358 = torch.constant.int 2
    %6729 = torch.aten.unsqueeze %6728, %int2_4358 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6729, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4359 = torch.constant.int 3
    %int0_4360 = torch.constant.int 0
    %int9223372036854775807_4361 = torch.constant.int 9223372036854775807
    %int1_4362 = torch.constant.int 1
    %6730 = torch.aten.slice.Tensor %6729, %int3_4359, %int0_4360, %int9223372036854775807_4361, %int1_4362 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6730, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6731 = torch_c.to_builtin_tensor %6419 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4363 = arith.constant 1 : index
    %dim_4364 = tensor.dim %6731, %c1_4363 : tensor<4x?x1x128xf16>
    %6732 = flow.tensor.bitcast %6731 : tensor<4x?x1x128xf16>{%dim_4364} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4364}
    %6733 = torch_c.from_builtin_tensor %6732 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6734 = torch.aten.mul.Tensor %6733, %6730 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6735 = torch_c.to_builtin_tensor %6734 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4365 = arith.constant 1 : index
    %dim_4366 = tensor.dim %6735, %c1_4365 : tensor<4x?x1x64xcomplex<f32>>
    %6736 = flow.tensor.bitcast %6735 : tensor<4x?x1x64xcomplex<f32>>{%dim_4366} -> tensor<4x?x1x128xf32>{%dim_4366}
    %6737 = torch_c.from_builtin_tensor %6736 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4367 = torch.constant.int 5
    %6738 = torch.prims.convert_element_type %6737, %int5_4367 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_4368 = torch.constant.int 1
    %6739 = torch.aten.size.int %6325, %int1_4368 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_4369 = torch.constant.int 0
    %6740 = torch.aten.add.int %int0_4369, %6739 : !torch.int, !torch.int -> !torch.int
    %int0_4370 = torch.constant.int 0
    %int0_4371 = torch.constant.int 0
    %int1_4372 = torch.constant.int 1
    %6741 = torch.aten.slice.Tensor %6633, %int0_4370, %int0_4371, %6740, %int1_4372 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6741, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_4373 = torch.constant.int 1
    %int0_4374 = torch.constant.int 0
    %int9223372036854775807_4375 = torch.constant.int 9223372036854775807
    %int1_4376 = torch.constant.int 1
    %6742 = torch.aten.slice.Tensor %6741, %int1_4373, %int0_4374, %int9223372036854775807_4375, %int1_4376 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %6742, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_4377 = torch.constant.int 0
    %6743 = torch.aten.unsqueeze %6742, %int0_4377 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %6743, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_4378 = torch.constant.int 2
    %6744 = torch.aten.unsqueeze %6743, %int2_4378 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6744, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_4379 = torch.constant.int 3
    %int0_4380 = torch.constant.int 0
    %int9223372036854775807_4381 = torch.constant.int 9223372036854775807
    %int1_4382 = torch.constant.int 1
    %6745 = torch.aten.slice.Tensor %6744, %int3_4379, %int0_4380, %int9223372036854775807_4381, %int1_4382 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6745, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %6746 = torch_c.to_builtin_tensor %6421 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_4383 = arith.constant 1 : index
    %dim_4384 = tensor.dim %6746, %c1_4383 : tensor<4x?x1x128xf16>
    %6747 = flow.tensor.bitcast %6746 : tensor<4x?x1x128xf16>{%dim_4384} -> tensor<4x?x1x64xcomplex<f16>>{%dim_4384}
    %6748 = torch_c.from_builtin_tensor %6747 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %6748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %6749 = torch.aten.mul.Tensor %6748, %6745 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %6749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %6750 = torch_c.to_builtin_tensor %6749 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_4385 = arith.constant 1 : index
    %dim_4386 = tensor.dim %6750, %c1_4385 : tensor<4x?x1x64xcomplex<f32>>
    %6751 = flow.tensor.bitcast %6750 : tensor<4x?x1x64xcomplex<f32>>{%dim_4386} -> tensor<4x?x1x128xf32>{%dim_4386}
    %6752 = torch_c.from_builtin_tensor %6751 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %6752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_4387 = torch.constant.int 5
    %6753 = torch.prims.convert_element_type %6752, %int5_4387 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %6753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_4388 = torch.constant.int 64
    %6754 = torch.aten.mul.Scalar %2364, %int64_4388 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6754, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4389 = torch.constant.int 64
    %6755 = torch.aten.mul.Scalar %2367, %int64_4389 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6755, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4390 = torch.constant.int 64
    %6756 = torch.aten.mul.Scalar %2370, %int64_4390 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6756, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4391 = torch.constant.int 64
    %6757 = torch.aten.mul.Scalar %2373, %int64_4391 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6757, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4392 = torch.constant.int 64
    %6758 = torch.aten.mul.Scalar %2376, %int64_4392 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6758, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4393 = torch.constant.int 64
    %6759 = torch.aten.mul.Scalar %2379, %int64_4393 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6759, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4394 = torch.constant.int 64
    %6760 = torch.aten.mul.Scalar %2382, %int64_4394 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6760, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_4395 = torch.constant.int 64
    %6761 = torch.aten.mul.Scalar %2385, %int64_4395 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6761, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4396 = torch.constant.int 4
    %int1_4397 = torch.constant.int 1
    %6762 = torch.aten.add.Scalar %6754, %int4_4396, %int1_4397 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6762, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4398 = torch.constant.int 4
    %int1_4399 = torch.constant.int 1
    %6763 = torch.aten.add.Scalar %6755, %int4_4398, %int1_4399 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6763, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4400 = torch.constant.int 4
    %int1_4401 = torch.constant.int 1
    %6764 = torch.aten.add.Scalar %6756, %int4_4400, %int1_4401 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6764, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4402 = torch.constant.int 4
    %int1_4403 = torch.constant.int 1
    %6765 = torch.aten.add.Scalar %6757, %int4_4402, %int1_4403 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6765, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4404 = torch.constant.int 4
    %int1_4405 = torch.constant.int 1
    %6766 = torch.aten.add.Scalar %6758, %int4_4404, %int1_4405 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6766, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4406 = torch.constant.int 4
    %int1_4407 = torch.constant.int 1
    %6767 = torch.aten.add.Scalar %6759, %int4_4406, %int1_4407 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6767, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4408 = torch.constant.int 4
    %int1_4409 = torch.constant.int 1
    %6768 = torch.aten.add.Scalar %6760, %int4_4408, %int1_4409 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6768, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4410 = torch.constant.int 4
    %int1_4411 = torch.constant.int 1
    %6769 = torch.aten.add.Scalar %6761, %int4_4410, %int1_4411 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6769, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4412 = torch.constant.int 4
    %int16_4413 = torch.constant.int 16
    %int1_4414 = torch.constant.int 1
    %int128_4415 = torch.constant.int 128
    %6770 = torch.prim.ListConstruct %int4_4412, %3095, %int16_4413, %int1_4414, %int128_4415 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6771 = torch.aten.view %6648, %6770 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6771, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4416 = torch.constant.int 4
    %int16_4417 = torch.constant.int 16
    %int1_4418 = torch.constant.int 1
    %int128_4419 = torch.constant.int 128
    %6772 = torch.prim.ListConstruct %int4_4416, %3095, %int16_4417, %int1_4418, %int128_4419 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6773 = torch.aten.view %6663, %6772 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6773, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4420 = torch.constant.int 4
    %int16_4421 = torch.constant.int 16
    %int1_4422 = torch.constant.int 1
    %int128_4423 = torch.constant.int 128
    %6774 = torch.prim.ListConstruct %int4_4420, %3095, %int16_4421, %int1_4422, %int128_4423 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6775 = torch.aten.view %6678, %6774 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6775, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4424 = torch.constant.int 4
    %int16_4425 = torch.constant.int 16
    %int1_4426 = torch.constant.int 1
    %int128_4427 = torch.constant.int 128
    %6776 = torch.prim.ListConstruct %int4_4424, %3095, %int16_4425, %int1_4426, %int128_4427 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6777 = torch.aten.view %6693, %6776 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6777, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4428 = torch.constant.int 4
    %int16_4429 = torch.constant.int 16
    %int1_4430 = torch.constant.int 1
    %int128_4431 = torch.constant.int 128
    %6778 = torch.prim.ListConstruct %int4_4428, %3095, %int16_4429, %int1_4430, %int128_4431 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6779 = torch.aten.view %6708, %6778 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6779, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4432 = torch.constant.int 4
    %int16_4433 = torch.constant.int 16
    %int1_4434 = torch.constant.int 1
    %int128_4435 = torch.constant.int 128
    %6780 = torch.prim.ListConstruct %int4_4432, %3095, %int16_4433, %int1_4434, %int128_4435 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6781 = torch.aten.view %6723, %6780 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6781, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4436 = torch.constant.int 4
    %int16_4437 = torch.constant.int 16
    %int1_4438 = torch.constant.int 1
    %int128_4439 = torch.constant.int 128
    %6782 = torch.prim.ListConstruct %int4_4436, %3095, %int16_4437, %int1_4438, %int128_4439 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6783 = torch.aten.view %6738, %6782 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6783, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4440 = torch.constant.int 4
    %int16_4441 = torch.constant.int 16
    %int1_4442 = torch.constant.int 1
    %int128_4443 = torch.constant.int 128
    %6784 = torch.prim.ListConstruct %int4_4440, %3095, %int16_4441, %int1_4442, %int128_4443 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6785 = torch.aten.view %6753, %6784 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6785, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4444 = torch.constant.int 4
    %6786 = torch.aten.mul.int %int4_4444, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4445 = torch.constant.int 16
    %int1_4446 = torch.constant.int 1
    %int128_4447 = torch.constant.int 128
    %6787 = torch.prim.ListConstruct %6786, %int16_4445, %int1_4446, %int128_4447 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6788 = torch.aten.view %6771, %6787 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6788, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4448 = torch.constant.int 4
    %6789 = torch.aten.mul.int %int4_4448, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4449 = torch.constant.int 16
    %int1_4450 = torch.constant.int 1
    %int128_4451 = torch.constant.int 128
    %6790 = torch.prim.ListConstruct %6789, %int16_4449, %int1_4450, %int128_4451 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6791 = torch.aten.view %6773, %6790 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6791, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4452 = torch.constant.int 4
    %6792 = torch.aten.mul.int %int4_4452, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4453 = torch.constant.int 16
    %int1_4454 = torch.constant.int 1
    %int128_4455 = torch.constant.int 128
    %6793 = torch.prim.ListConstruct %6792, %int16_4453, %int1_4454, %int128_4455 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6794 = torch.aten.view %6775, %6793 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6794, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4456 = torch.constant.int 4
    %6795 = torch.aten.mul.int %int4_4456, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4457 = torch.constant.int 16
    %int1_4458 = torch.constant.int 1
    %int128_4459 = torch.constant.int 128
    %6796 = torch.prim.ListConstruct %6795, %int16_4457, %int1_4458, %int128_4459 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6797 = torch.aten.view %6777, %6796 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6797, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4460 = torch.constant.int 4
    %6798 = torch.aten.mul.int %int4_4460, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4461 = torch.constant.int 16
    %int1_4462 = torch.constant.int 1
    %int128_4463 = torch.constant.int 128
    %6799 = torch.prim.ListConstruct %6798, %int16_4461, %int1_4462, %int128_4463 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6800 = torch.aten.view %6779, %6799 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6800, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4464 = torch.constant.int 4
    %6801 = torch.aten.mul.int %int4_4464, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4465 = torch.constant.int 16
    %int1_4466 = torch.constant.int 1
    %int128_4467 = torch.constant.int 128
    %6802 = torch.prim.ListConstruct %6801, %int16_4465, %int1_4466, %int128_4467 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6803 = torch.aten.view %6781, %6802 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6803, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4468 = torch.constant.int 4
    %6804 = torch.aten.mul.int %int4_4468, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4469 = torch.constant.int 16
    %int1_4470 = torch.constant.int 1
    %int128_4471 = torch.constant.int 128
    %6805 = torch.prim.ListConstruct %6804, %int16_4469, %int1_4470, %int128_4471 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6806 = torch.aten.view %6783, %6805 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6806, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4472 = torch.constant.int 4
    %6807 = torch.aten.mul.int %int4_4472, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4473 = torch.constant.int 16
    %int1_4474 = torch.constant.int 1
    %int128_4475 = torch.constant.int 128
    %6808 = torch.prim.ListConstruct %6807, %int16_4473, %int1_4474, %int128_4475 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6809 = torch.aten.view %6785, %6808 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6809, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4476 = torch.constant.int 4
    %6810 = torch.aten.mul.int %int4_4476, %3095 : !torch.int, !torch.int -> !torch.int
    %6811 = torch.prim.ListConstruct %6810 : (!torch.int) -> !torch.list<int>
    %6812 = torch.aten.view %6762, %6811 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6812, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4477 = torch.constant.int 4
    %6813 = torch.aten.mul.int %int4_4477, %3095 : !torch.int, !torch.int -> !torch.int
    %6814 = torch.prim.ListConstruct %6813 : (!torch.int) -> !torch.list<int>
    %6815 = torch.aten.view %6763, %6814 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6815, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4478 = torch.constant.int 4
    %6816 = torch.aten.mul.int %int4_4478, %3095 : !torch.int, !torch.int -> !torch.int
    %6817 = torch.prim.ListConstruct %6816 : (!torch.int) -> !torch.list<int>
    %6818 = torch.aten.view %6764, %6817 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6818, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4479 = torch.constant.int 4
    %6819 = torch.aten.mul.int %int4_4479, %3095 : !torch.int, !torch.int -> !torch.int
    %6820 = torch.prim.ListConstruct %6819 : (!torch.int) -> !torch.list<int>
    %6821 = torch.aten.view %6765, %6820 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6821, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4480 = torch.constant.int 4
    %6822 = torch.aten.mul.int %int4_4480, %3095 : !torch.int, !torch.int -> !torch.int
    %6823 = torch.prim.ListConstruct %6822 : (!torch.int) -> !torch.list<int>
    %6824 = torch.aten.view %6766, %6823 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6824, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4481 = torch.constant.int 4
    %6825 = torch.aten.mul.int %int4_4481, %3095 : !torch.int, !torch.int -> !torch.int
    %6826 = torch.prim.ListConstruct %6825 : (!torch.int) -> !torch.list<int>
    %6827 = torch.aten.view %6767, %6826 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6827, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4482 = torch.constant.int 4
    %6828 = torch.aten.mul.int %int4_4482, %3095 : !torch.int, !torch.int -> !torch.int
    %6829 = torch.prim.ListConstruct %6828 : (!torch.int) -> !torch.list<int>
    %6830 = torch.aten.view %6768, %6829 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6830, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4483 = torch.constant.int 4
    %6831 = torch.aten.mul.int %int4_4483, %3095 : !torch.int, !torch.int -> !torch.int
    %6832 = torch.prim.ListConstruct %6831 : (!torch.int) -> !torch.list<int>
    %6833 = torch.aten.view %6769, %6832 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6833, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4484 = torch.constant.int 4
    %int16_4485 = torch.constant.int 16
    %int1_4486 = torch.constant.int 1
    %int128_4487 = torch.constant.int 128
    %6834 = torch.prim.ListConstruct %int4_4484, %3095, %int16_4485, %int1_4486, %int128_4487 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6835 = torch.aten.view %6423, %6834 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6835, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4488 = torch.constant.int 4
    %int16_4489 = torch.constant.int 16
    %int1_4490 = torch.constant.int 1
    %int128_4491 = torch.constant.int 128
    %6836 = torch.prim.ListConstruct %int4_4488, %3095, %int16_4489, %int1_4490, %int128_4491 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6837 = torch.aten.view %6425, %6836 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6837, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4492 = torch.constant.int 4
    %int16_4493 = torch.constant.int 16
    %int1_4494 = torch.constant.int 1
    %int128_4495 = torch.constant.int 128
    %6838 = torch.prim.ListConstruct %int4_4492, %3095, %int16_4493, %int1_4494, %int128_4495 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6839 = torch.aten.view %6427, %6838 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6839, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4496 = torch.constant.int 4
    %int16_4497 = torch.constant.int 16
    %int1_4498 = torch.constant.int 1
    %int128_4499 = torch.constant.int 128
    %6840 = torch.prim.ListConstruct %int4_4496, %3095, %int16_4497, %int1_4498, %int128_4499 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6841 = torch.aten.view %6429, %6840 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6841, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4500 = torch.constant.int 4
    %int16_4501 = torch.constant.int 16
    %int1_4502 = torch.constant.int 1
    %int128_4503 = torch.constant.int 128
    %6842 = torch.prim.ListConstruct %int4_4500, %3095, %int16_4501, %int1_4502, %int128_4503 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6843 = torch.aten.view %6431, %6842 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6843, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4504 = torch.constant.int 4
    %int16_4505 = torch.constant.int 16
    %int1_4506 = torch.constant.int 1
    %int128_4507 = torch.constant.int 128
    %6844 = torch.prim.ListConstruct %int4_4504, %3095, %int16_4505, %int1_4506, %int128_4507 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6845 = torch.aten.view %6433, %6844 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6845, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4508 = torch.constant.int 4
    %int16_4509 = torch.constant.int 16
    %int1_4510 = torch.constant.int 1
    %int128_4511 = torch.constant.int 128
    %6846 = torch.prim.ListConstruct %int4_4508, %3095, %int16_4509, %int1_4510, %int128_4511 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6847 = torch.aten.view %6435, %6846 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6847, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4512 = torch.constant.int 4
    %int16_4513 = torch.constant.int 16
    %int1_4514 = torch.constant.int 1
    %int128_4515 = torch.constant.int 128
    %6848 = torch.prim.ListConstruct %int4_4512, %3095, %int16_4513, %int1_4514, %int128_4515 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6849 = torch.aten.view %6437, %6848 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %6849, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_4516 = torch.constant.int 4
    %6850 = torch.aten.mul.int %int4_4516, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4517 = torch.constant.int 16
    %int1_4518 = torch.constant.int 1
    %int128_4519 = torch.constant.int 128
    %6851 = torch.prim.ListConstruct %6850, %int16_4517, %int1_4518, %int128_4519 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6852 = torch.aten.view %6835, %6851 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6852, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4520 = torch.constant.int 4
    %6853 = torch.aten.mul.int %int4_4520, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4521 = torch.constant.int 16
    %int1_4522 = torch.constant.int 1
    %int128_4523 = torch.constant.int 128
    %6854 = torch.prim.ListConstruct %6853, %int16_4521, %int1_4522, %int128_4523 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6855 = torch.aten.view %6837, %6854 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6855, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4524 = torch.constant.int 4
    %6856 = torch.aten.mul.int %int4_4524, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4525 = torch.constant.int 16
    %int1_4526 = torch.constant.int 1
    %int128_4527 = torch.constant.int 128
    %6857 = torch.prim.ListConstruct %6856, %int16_4525, %int1_4526, %int128_4527 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6858 = torch.aten.view %6839, %6857 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6858, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4528 = torch.constant.int 4
    %6859 = torch.aten.mul.int %int4_4528, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4529 = torch.constant.int 16
    %int1_4530 = torch.constant.int 1
    %int128_4531 = torch.constant.int 128
    %6860 = torch.prim.ListConstruct %6859, %int16_4529, %int1_4530, %int128_4531 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6861 = torch.aten.view %6841, %6860 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6861, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4532 = torch.constant.int 4
    %6862 = torch.aten.mul.int %int4_4532, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4533 = torch.constant.int 16
    %int1_4534 = torch.constant.int 1
    %int128_4535 = torch.constant.int 128
    %6863 = torch.prim.ListConstruct %6862, %int16_4533, %int1_4534, %int128_4535 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6864 = torch.aten.view %6843, %6863 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6864, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4536 = torch.constant.int 4
    %6865 = torch.aten.mul.int %int4_4536, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4537 = torch.constant.int 16
    %int1_4538 = torch.constant.int 1
    %int128_4539 = torch.constant.int 128
    %6866 = torch.prim.ListConstruct %6865, %int16_4537, %int1_4538, %int128_4539 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6867 = torch.aten.view %6845, %6866 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6867, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4540 = torch.constant.int 4
    %6868 = torch.aten.mul.int %int4_4540, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4541 = torch.constant.int 16
    %int1_4542 = torch.constant.int 1
    %int128_4543 = torch.constant.int 128
    %6869 = torch.prim.ListConstruct %6868, %int16_4541, %int1_4542, %int128_4543 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6870 = torch.aten.view %6847, %6869 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6870, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_4544 = torch.constant.int 4
    %6871 = torch.aten.mul.int %int4_4544, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_4545 = torch.constant.int 16
    %int1_4546 = torch.constant.int 1
    %int128_4547 = torch.constant.int 128
    %6872 = torch.prim.ListConstruct %6871, %int16_4545, %int1_4546, %int128_4547 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6873 = torch.aten.view %6849, %6872 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6873, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_4548 = torch.constant.int 1
    %int1_4549 = torch.constant.int 1
    %6874 = torch.aten.add.Scalar %6762, %int1_4548, %int1_4549 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6874, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4550 = torch.constant.int 1
    %int1_4551 = torch.constant.int 1
    %6875 = torch.aten.add.Scalar %6763, %int1_4550, %int1_4551 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6875, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4552 = torch.constant.int 1
    %int1_4553 = torch.constant.int 1
    %6876 = torch.aten.add.Scalar %6764, %int1_4552, %int1_4553 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6876, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4554 = torch.constant.int 1
    %int1_4555 = torch.constant.int 1
    %6877 = torch.aten.add.Scalar %6765, %int1_4554, %int1_4555 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6877, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4556 = torch.constant.int 1
    %int1_4557 = torch.constant.int 1
    %6878 = torch.aten.add.Scalar %6766, %int1_4556, %int1_4557 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6878, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4558 = torch.constant.int 1
    %int1_4559 = torch.constant.int 1
    %6879 = torch.aten.add.Scalar %6767, %int1_4558, %int1_4559 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6879, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4560 = torch.constant.int 1
    %int1_4561 = torch.constant.int 1
    %6880 = torch.aten.add.Scalar %6768, %int1_4560, %int1_4561 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6880, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_4562 = torch.constant.int 1
    %int1_4563 = torch.constant.int 1
    %6881 = torch.aten.add.Scalar %6769, %int1_4562, %int1_4563 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %6881, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_4564 = torch.constant.int 4
    %6882 = torch.aten.mul.int %int4_4564, %3095 : !torch.int, !torch.int -> !torch.int
    %6883 = torch.prim.ListConstruct %6882 : (!torch.int) -> !torch.list<int>
    %6884 = torch.aten.view %6874, %6883 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6884, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4565 = torch.constant.int 4
    %6885 = torch.aten.mul.int %int4_4565, %3095 : !torch.int, !torch.int -> !torch.int
    %6886 = torch.prim.ListConstruct %6885 : (!torch.int) -> !torch.list<int>
    %6887 = torch.aten.view %6875, %6886 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6887, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4566 = torch.constant.int 4
    %6888 = torch.aten.mul.int %int4_4566, %3095 : !torch.int, !torch.int -> !torch.int
    %6889 = torch.prim.ListConstruct %6888 : (!torch.int) -> !torch.list<int>
    %6890 = torch.aten.view %6876, %6889 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6890, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4567 = torch.constant.int 4
    %6891 = torch.aten.mul.int %int4_4567, %3095 : !torch.int, !torch.int -> !torch.int
    %6892 = torch.prim.ListConstruct %6891 : (!torch.int) -> !torch.list<int>
    %6893 = torch.aten.view %6877, %6892 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6893, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4568 = torch.constant.int 4
    %6894 = torch.aten.mul.int %int4_4568, %3095 : !torch.int, !torch.int -> !torch.int
    %6895 = torch.prim.ListConstruct %6894 : (!torch.int) -> !torch.list<int>
    %6896 = torch.aten.view %6878, %6895 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6896, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4569 = torch.constant.int 4
    %6897 = torch.aten.mul.int %int4_4569, %3095 : !torch.int, !torch.int -> !torch.int
    %6898 = torch.prim.ListConstruct %6897 : (!torch.int) -> !torch.list<int>
    %6899 = torch.aten.view %6879, %6898 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6899, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4570 = torch.constant.int 4
    %6900 = torch.aten.mul.int %int4_4570, %3095 : !torch.int, !torch.int -> !torch.int
    %6901 = torch.prim.ListConstruct %6900 : (!torch.int) -> !torch.list<int>
    %6902 = torch.aten.view %6880, %6901 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6902, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_4571 = torch.constant.int 4
    %6903 = torch.aten.mul.int %int4_4571, %3095 : !torch.int, !torch.int -> !torch.int
    %6904 = torch.prim.ListConstruct %6903 : (!torch.int) -> !torch.list<int>
    %6905 = torch.aten.view %6881, %6904 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6905, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %6906 = torch.prim.ListConstruct %6812, %6884 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4572 = torch.constant.int 0
    %6907 = torch.aten.cat %6906, %int0_4572 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6907, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6908 = torch.prim.ListConstruct %6815, %6887 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4573 = torch.constant.int 0
    %6909 = torch.aten.cat %6908, %int0_4573 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6909, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6910 = torch.prim.ListConstruct %6818, %6890 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4574 = torch.constant.int 0
    %6911 = torch.aten.cat %6910, %int0_4574 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6911, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6912 = torch.prim.ListConstruct %6821, %6893 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4575 = torch.constant.int 0
    %6913 = torch.aten.cat %6912, %int0_4575 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6913, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6914 = torch.prim.ListConstruct %6824, %6896 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4576 = torch.constant.int 0
    %6915 = torch.aten.cat %6914, %int0_4576 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6915, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6916 = torch.prim.ListConstruct %6827, %6899 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4577 = torch.constant.int 0
    %6917 = torch.aten.cat %6916, %int0_4577 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6917, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6918 = torch.prim.ListConstruct %6830, %6902 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4578 = torch.constant.int 0
    %6919 = torch.aten.cat %6918, %int0_4578 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6919, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6920 = torch.prim.ListConstruct %6833, %6905 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_4579 = torch.constant.int 0
    %6921 = torch.aten.cat %6920, %int0_4579 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %6921, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %6922 = torch.prim.ListConstruct %6788, %6852 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4580 = torch.constant.int 0
    %6923 = torch.aten.cat %6922, %int0_4580 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6923, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6924 = torch.prim.ListConstruct %6791, %6855 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4581 = torch.constant.int 0
    %6925 = torch.aten.cat %6924, %int0_4581 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6925, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6926 = torch.prim.ListConstruct %6794, %6858 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4582 = torch.constant.int 0
    %6927 = torch.aten.cat %6926, %int0_4582 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6927, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6928 = torch.prim.ListConstruct %6797, %6861 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4583 = torch.constant.int 0
    %6929 = torch.aten.cat %6928, %int0_4583 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6929, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6930 = torch.prim.ListConstruct %6800, %6864 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4584 = torch.constant.int 0
    %6931 = torch.aten.cat %6930, %int0_4584 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6931, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6932 = torch.prim.ListConstruct %6803, %6867 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4585 = torch.constant.int 0
    %6933 = torch.aten.cat %6932, %int0_4585 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6933, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6934 = torch.prim.ListConstruct %6806, %6870 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4586 = torch.constant.int 0
    %6935 = torch.aten.cat %6934, %int0_4586 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6935, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6936 = torch.prim.ListConstruct %6809, %6873 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_4587 = torch.constant.int 0
    %6937 = torch.aten.cat %6936, %int0_4587 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6937, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4588 = torch.constant.int 32
    %int2_4589 = torch.constant.int 2
    %int16_4590 = torch.constant.int 16
    %int1_4591 = torch.constant.int 1
    %int128_4592 = torch.constant.int 128
    %6938 = torch.prim.ListConstruct %3023, %int32_4588, %int2_4589, %int16_4590, %int1_4591, %int128_4592 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6939 = torch.aten.view %5088, %6938 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6939, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4593 = torch.constant.int 32
    %6940 = torch.aten.mul.int %3023, %int32_4593 : !torch.int, !torch.int -> !torch.int
    %int2_4594 = torch.constant.int 2
    %6941 = torch.aten.mul.int %6940, %int2_4594 : !torch.int, !torch.int -> !torch.int
    %int16_4595 = torch.constant.int 16
    %int1_4596 = torch.constant.int 1
    %int128_4597 = torch.constant.int 128
    %6942 = torch.prim.ListConstruct %6941, %int16_4595, %int1_4596, %int128_4597 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6943 = torch.aten.view %6939, %6942 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6943, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6944 = torch.prim.ListConstruct %6907 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4598 = torch.constant.bool false
    %6945 = torch.aten.index_put %6943, %6944, %6923, %false_4598 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6945, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4599 = torch.constant.int 32
    %int2_4600 = torch.constant.int 2
    %int16_4601 = torch.constant.int 16
    %int1_4602 = torch.constant.int 1
    %int128_4603 = torch.constant.int 128
    %6946 = torch.prim.ListConstruct %3023, %int32_4599, %int2_4600, %int16_4601, %int1_4602, %int128_4603 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6947 = torch.aten.view %6945, %6946 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6947, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4604 = torch.constant.int 131072
    %6948 = torch.prim.ListConstruct %3023, %int131072_4604 : (!torch.int, !torch.int) -> !torch.list<int>
    %6949 = torch.aten.view %6947, %6948 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %6949, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4605 = torch.constant.int 32
    %int2_4606 = torch.constant.int 2
    %int16_4607 = torch.constant.int 16
    %int1_4608 = torch.constant.int 1
    %int128_4609 = torch.constant.int 128
    %6950 = torch.prim.ListConstruct %3026, %int32_4605, %int2_4606, %int16_4607, %int1_4608, %int128_4609 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6951 = torch.aten.view %5100, %6950 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6951, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4610 = torch.constant.int 32
    %6952 = torch.aten.mul.int %3026, %int32_4610 : !torch.int, !torch.int -> !torch.int
    %int2_4611 = torch.constant.int 2
    %6953 = torch.aten.mul.int %6952, %int2_4611 : !torch.int, !torch.int -> !torch.int
    %int16_4612 = torch.constant.int 16
    %int1_4613 = torch.constant.int 1
    %int128_4614 = torch.constant.int 128
    %6954 = torch.prim.ListConstruct %6953, %int16_4612, %int1_4613, %int128_4614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6955 = torch.aten.view %6951, %6954 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6955, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6956 = torch.prim.ListConstruct %6909 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4615 = torch.constant.bool false
    %6957 = torch.aten.index_put %6955, %6956, %6925, %false_4615 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6957, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4616 = torch.constant.int 32
    %int2_4617 = torch.constant.int 2
    %int16_4618 = torch.constant.int 16
    %int1_4619 = torch.constant.int 1
    %int128_4620 = torch.constant.int 128
    %6958 = torch.prim.ListConstruct %3026, %int32_4616, %int2_4617, %int16_4618, %int1_4619, %int128_4620 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6959 = torch.aten.view %6957, %6958 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6959, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4621 = torch.constant.int 131072
    %6960 = torch.prim.ListConstruct %3026, %int131072_4621 : (!torch.int, !torch.int) -> !torch.list<int>
    %6961 = torch.aten.view %6959, %6960 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %6961, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4622 = torch.constant.int 32
    %int2_4623 = torch.constant.int 2
    %int16_4624 = torch.constant.int 16
    %int1_4625 = torch.constant.int 1
    %int128_4626 = torch.constant.int 128
    %6962 = torch.prim.ListConstruct %3029, %int32_4622, %int2_4623, %int16_4624, %int1_4625, %int128_4626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6963 = torch.aten.view %5112, %6962 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6963, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4627 = torch.constant.int 32
    %6964 = torch.aten.mul.int %3029, %int32_4627 : !torch.int, !torch.int -> !torch.int
    %int2_4628 = torch.constant.int 2
    %6965 = torch.aten.mul.int %6964, %int2_4628 : !torch.int, !torch.int -> !torch.int
    %int16_4629 = torch.constant.int 16
    %int1_4630 = torch.constant.int 1
    %int128_4631 = torch.constant.int 128
    %6966 = torch.prim.ListConstruct %6965, %int16_4629, %int1_4630, %int128_4631 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6967 = torch.aten.view %6963, %6966 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6967, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6968 = torch.prim.ListConstruct %6911 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4632 = torch.constant.bool false
    %6969 = torch.aten.index_put %6967, %6968, %6927, %false_4632 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6969, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4633 = torch.constant.int 32
    %int2_4634 = torch.constant.int 2
    %int16_4635 = torch.constant.int 16
    %int1_4636 = torch.constant.int 1
    %int128_4637 = torch.constant.int 128
    %6970 = torch.prim.ListConstruct %3029, %int32_4633, %int2_4634, %int16_4635, %int1_4636, %int128_4637 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6971 = torch.aten.view %6969, %6970 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6971, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4638 = torch.constant.int 131072
    %6972 = torch.prim.ListConstruct %3029, %int131072_4638 : (!torch.int, !torch.int) -> !torch.list<int>
    %6973 = torch.aten.view %6971, %6972 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %6973, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4639 = torch.constant.int 32
    %int2_4640 = torch.constant.int 2
    %int16_4641 = torch.constant.int 16
    %int1_4642 = torch.constant.int 1
    %int128_4643 = torch.constant.int 128
    %6974 = torch.prim.ListConstruct %3032, %int32_4639, %int2_4640, %int16_4641, %int1_4642, %int128_4643 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6975 = torch.aten.view %5124, %6974 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6975, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4644 = torch.constant.int 32
    %6976 = torch.aten.mul.int %3032, %int32_4644 : !torch.int, !torch.int -> !torch.int
    %int2_4645 = torch.constant.int 2
    %6977 = torch.aten.mul.int %6976, %int2_4645 : !torch.int, !torch.int -> !torch.int
    %int16_4646 = torch.constant.int 16
    %int1_4647 = torch.constant.int 1
    %int128_4648 = torch.constant.int 128
    %6978 = torch.prim.ListConstruct %6977, %int16_4646, %int1_4647, %int128_4648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6979 = torch.aten.view %6975, %6978 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6979, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6980 = torch.prim.ListConstruct %6913 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4649 = torch.constant.bool false
    %6981 = torch.aten.index_put %6979, %6980, %6929, %false_4649 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6981, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4650 = torch.constant.int 32
    %int2_4651 = torch.constant.int 2
    %int16_4652 = torch.constant.int 16
    %int1_4653 = torch.constant.int 1
    %int128_4654 = torch.constant.int 128
    %6982 = torch.prim.ListConstruct %3032, %int32_4650, %int2_4651, %int16_4652, %int1_4653, %int128_4654 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6983 = torch.aten.view %6981, %6982 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6983, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4655 = torch.constant.int 131072
    %6984 = torch.prim.ListConstruct %3032, %int131072_4655 : (!torch.int, !torch.int) -> !torch.list<int>
    %6985 = torch.aten.view %6983, %6984 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %6985, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4656 = torch.constant.int 32
    %int2_4657 = torch.constant.int 2
    %int16_4658 = torch.constant.int 16
    %int1_4659 = torch.constant.int 1
    %int128_4660 = torch.constant.int 128
    %6986 = torch.prim.ListConstruct %3035, %int32_4656, %int2_4657, %int16_4658, %int1_4659, %int128_4660 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6987 = torch.aten.view %5136, %6986 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6987, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4661 = torch.constant.int 32
    %6988 = torch.aten.mul.int %3035, %int32_4661 : !torch.int, !torch.int -> !torch.int
    %int2_4662 = torch.constant.int 2
    %6989 = torch.aten.mul.int %6988, %int2_4662 : !torch.int, !torch.int -> !torch.int
    %int16_4663 = torch.constant.int 16
    %int1_4664 = torch.constant.int 1
    %int128_4665 = torch.constant.int 128
    %6990 = torch.prim.ListConstruct %6989, %int16_4663, %int1_4664, %int128_4665 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6991 = torch.aten.view %6987, %6990 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6991, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %6992 = torch.prim.ListConstruct %6915 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4666 = torch.constant.bool false
    %6993 = torch.aten.index_put %6991, %6992, %6931, %false_4666 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %6993, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4667 = torch.constant.int 32
    %int2_4668 = torch.constant.int 2
    %int16_4669 = torch.constant.int 16
    %int1_4670 = torch.constant.int 1
    %int128_4671 = torch.constant.int 128
    %6994 = torch.prim.ListConstruct %3035, %int32_4667, %int2_4668, %int16_4669, %int1_4670, %int128_4671 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6995 = torch.aten.view %6993, %6994 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6995, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4672 = torch.constant.int 131072
    %6996 = torch.prim.ListConstruct %3035, %int131072_4672 : (!torch.int, !torch.int) -> !torch.list<int>
    %6997 = torch.aten.view %6995, %6996 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %6997, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4673 = torch.constant.int 32
    %int2_4674 = torch.constant.int 2
    %int16_4675 = torch.constant.int 16
    %int1_4676 = torch.constant.int 1
    %int128_4677 = torch.constant.int 128
    %6998 = torch.prim.ListConstruct %3038, %int32_4673, %int2_4674, %int16_4675, %int1_4676, %int128_4677 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %6999 = torch.aten.view %5148, %6998 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %6999, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4678 = torch.constant.int 32
    %7000 = torch.aten.mul.int %3038, %int32_4678 : !torch.int, !torch.int -> !torch.int
    %int2_4679 = torch.constant.int 2
    %7001 = torch.aten.mul.int %7000, %int2_4679 : !torch.int, !torch.int -> !torch.int
    %int16_4680 = torch.constant.int 16
    %int1_4681 = torch.constant.int 1
    %int128_4682 = torch.constant.int 128
    %7002 = torch.prim.ListConstruct %7001, %int16_4680, %int1_4681, %int128_4682 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7003 = torch.aten.view %6999, %7002 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7003, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %7004 = torch.prim.ListConstruct %6917 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4683 = torch.constant.bool false
    %7005 = torch.aten.index_put %7003, %7004, %6933, %false_4683 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7005, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4684 = torch.constant.int 32
    %int2_4685 = torch.constant.int 2
    %int16_4686 = torch.constant.int 16
    %int1_4687 = torch.constant.int 1
    %int128_4688 = torch.constant.int 128
    %7006 = torch.prim.ListConstruct %3038, %int32_4684, %int2_4685, %int16_4686, %int1_4687, %int128_4688 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7007 = torch.aten.view %7005, %7006 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %7007, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4689 = torch.constant.int 131072
    %7008 = torch.prim.ListConstruct %3038, %int131072_4689 : (!torch.int, !torch.int) -> !torch.list<int>
    %7009 = torch.aten.view %7007, %7008 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %7009, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4690 = torch.constant.int 32
    %int2_4691 = torch.constant.int 2
    %int16_4692 = torch.constant.int 16
    %int1_4693 = torch.constant.int 1
    %int128_4694 = torch.constant.int 128
    %7010 = torch.prim.ListConstruct %3041, %int32_4690, %int2_4691, %int16_4692, %int1_4693, %int128_4694 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7011 = torch.aten.view %5160, %7010 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %7011, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4695 = torch.constant.int 32
    %7012 = torch.aten.mul.int %3041, %int32_4695 : !torch.int, !torch.int -> !torch.int
    %int2_4696 = torch.constant.int 2
    %7013 = torch.aten.mul.int %7012, %int2_4696 : !torch.int, !torch.int -> !torch.int
    %int16_4697 = torch.constant.int 16
    %int1_4698 = torch.constant.int 1
    %int128_4699 = torch.constant.int 128
    %7014 = torch.prim.ListConstruct %7013, %int16_4697, %int1_4698, %int128_4699 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7015 = torch.aten.view %7011, %7014 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7015, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %7016 = torch.prim.ListConstruct %6919 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4700 = torch.constant.bool false
    %7017 = torch.aten.index_put %7015, %7016, %6935, %false_4700 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7017, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4701 = torch.constant.int 32
    %int2_4702 = torch.constant.int 2
    %int16_4703 = torch.constant.int 16
    %int1_4704 = torch.constant.int 1
    %int128_4705 = torch.constant.int 128
    %7018 = torch.prim.ListConstruct %3041, %int32_4701, %int2_4702, %int16_4703, %int1_4704, %int128_4705 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7019 = torch.aten.view %7017, %7018 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %7019, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4706 = torch.constant.int 131072
    %7020 = torch.prim.ListConstruct %3041, %int131072_4706 : (!torch.int, !torch.int) -> !torch.list<int>
    %7021 = torch.aten.view %7019, %7020 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %7021, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_4707 = torch.constant.int 32
    %int2_4708 = torch.constant.int 2
    %int16_4709 = torch.constant.int 16
    %int1_4710 = torch.constant.int 1
    %int128_4711 = torch.constant.int 128
    %7022 = torch.prim.ListConstruct %3044, %int32_4707, %int2_4708, %int16_4709, %int1_4710, %int128_4711 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7023 = torch.aten.view %5172, %7022 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %7023, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_4712 = torch.constant.int 32
    %7024 = torch.aten.mul.int %3044, %int32_4712 : !torch.int, !torch.int -> !torch.int
    %int2_4713 = torch.constant.int 2
    %7025 = torch.aten.mul.int %7024, %int2_4713 : !torch.int, !torch.int -> !torch.int
    %int16_4714 = torch.constant.int 16
    %int1_4715 = torch.constant.int 1
    %int128_4716 = torch.constant.int 128
    %7026 = torch.prim.ListConstruct %7025, %int16_4714, %int1_4715, %int128_4716 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7027 = torch.aten.view %7023, %7026 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7027, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %7028 = torch.prim.ListConstruct %6921 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_4717 = torch.constant.bool false
    %7029 = torch.aten.index_put %7027, %7028, %6937, %false_4717 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %7029, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_4718 = torch.constant.int 32
    %int2_4719 = torch.constant.int 2
    %int16_4720 = torch.constant.int 16
    %int1_4721 = torch.constant.int 1
    %int128_4722 = torch.constant.int 128
    %7030 = torch.prim.ListConstruct %3044, %int32_4718, %int2_4719, %int16_4720, %int1_4721, %int128_4722 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7031 = torch.aten.view %7029, %7030 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %7031, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_4723 = torch.constant.int 131072
    %7032 = torch.prim.ListConstruct %3044, %int131072_4723 : (!torch.int, !torch.int) -> !torch.list<int>
    %7033 = torch.aten.view %7031, %7032 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %7033, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_4724 = torch.constant.int -2
    %7034 = torch.aten.unsqueeze %6648, %int-2_4724 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4725 = torch.constant.int -2
    %7035 = torch.aten.unsqueeze %6663, %int-2_4725 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4726 = torch.constant.int -2
    %7036 = torch.aten.unsqueeze %6678, %int-2_4726 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4727 = torch.constant.int -2
    %7037 = torch.aten.unsqueeze %6693, %int-2_4727 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4728 = torch.constant.int -2
    %7038 = torch.aten.unsqueeze %6708, %int-2_4728 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4729 = torch.constant.int -2
    %7039 = torch.aten.unsqueeze %6723, %int-2_4729 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4730 = torch.constant.int -2
    %7040 = torch.aten.unsqueeze %6738, %int-2_4730 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4731 = torch.constant.int -2
    %7041 = torch.aten.unsqueeze %6753, %int-2_4731 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_4732 = torch.constant.int 4
    %int1_4733 = torch.constant.int 1
    %int4_4734 = torch.constant.int 4
    %int128_4735 = torch.constant.int 128
    %7042 = torch.prim.ListConstruct %int4_4732, %6634, %int1_4733, %int4_4734, %int128_4735 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4736 = torch.constant.bool false
    %7043 = torch.aten.expand %7034, %7042, %false_4736 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4737 = torch.constant.int 4
    %int1_4738 = torch.constant.int 1
    %int4_4739 = torch.constant.int 4
    %int128_4740 = torch.constant.int 128
    %7044 = torch.prim.ListConstruct %int4_4737, %6634, %int1_4738, %int4_4739, %int128_4740 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4741 = torch.constant.bool false
    %7045 = torch.aten.expand %7035, %7044, %false_4741 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4742 = torch.constant.int 4
    %int1_4743 = torch.constant.int 1
    %int4_4744 = torch.constant.int 4
    %int128_4745 = torch.constant.int 128
    %7046 = torch.prim.ListConstruct %int4_4742, %6634, %int1_4743, %int4_4744, %int128_4745 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4746 = torch.constant.bool false
    %7047 = torch.aten.expand %7036, %7046, %false_4746 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4747 = torch.constant.int 4
    %int1_4748 = torch.constant.int 1
    %int4_4749 = torch.constant.int 4
    %int128_4750 = torch.constant.int 128
    %7048 = torch.prim.ListConstruct %int4_4747, %6634, %int1_4748, %int4_4749, %int128_4750 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4751 = torch.constant.bool false
    %7049 = torch.aten.expand %7037, %7048, %false_4751 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4752 = torch.constant.int 4
    %int1_4753 = torch.constant.int 1
    %int4_4754 = torch.constant.int 4
    %int128_4755 = torch.constant.int 128
    %7050 = torch.prim.ListConstruct %int4_4752, %6634, %int1_4753, %int4_4754, %int128_4755 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4756 = torch.constant.bool false
    %7051 = torch.aten.expand %7038, %7050, %false_4756 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4757 = torch.constant.int 4
    %int1_4758 = torch.constant.int 1
    %int4_4759 = torch.constant.int 4
    %int128_4760 = torch.constant.int 128
    %7052 = torch.prim.ListConstruct %int4_4757, %6634, %int1_4758, %int4_4759, %int128_4760 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4761 = torch.constant.bool false
    %7053 = torch.aten.expand %7039, %7052, %false_4761 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4762 = torch.constant.int 4
    %int1_4763 = torch.constant.int 1
    %int4_4764 = torch.constant.int 4
    %int128_4765 = torch.constant.int 128
    %7054 = torch.prim.ListConstruct %int4_4762, %6634, %int1_4763, %int4_4764, %int128_4765 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4766 = torch.constant.bool false
    %7055 = torch.aten.expand %7040, %7054, %false_4766 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4767 = torch.constant.int 4
    %int1_4768 = torch.constant.int 1
    %int4_4769 = torch.constant.int 4
    %int128_4770 = torch.constant.int 128
    %7056 = torch.prim.ListConstruct %int4_4767, %6634, %int1_4768, %int4_4769, %int128_4770 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4771 = torch.constant.bool false
    %7057 = torch.aten.expand %7041, %7056, %false_4771 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4772 = torch.constant.int 4
    %int4_4773 = torch.constant.int 4
    %int128_4774 = torch.constant.int 128
    %7058 = torch.prim.ListConstruct %int4_4772, %6634, %int4_4773, %int128_4774 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7059 = torch.aten.view %7043, %7058 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4775 = torch.constant.int 4
    %int4_4776 = torch.constant.int 4
    %int128_4777 = torch.constant.int 128
    %7060 = torch.prim.ListConstruct %int4_4775, %6634, %int4_4776, %int128_4777 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7061 = torch.aten.view %7045, %7060 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4778 = torch.constant.int 4
    %int4_4779 = torch.constant.int 4
    %int128_4780 = torch.constant.int 128
    %7062 = torch.prim.ListConstruct %int4_4778, %6634, %int4_4779, %int128_4780 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7063 = torch.aten.view %7047, %7062 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4781 = torch.constant.int 4
    %int4_4782 = torch.constant.int 4
    %int128_4783 = torch.constant.int 128
    %7064 = torch.prim.ListConstruct %int4_4781, %6634, %int4_4782, %int128_4783 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7065 = torch.aten.view %7049, %7064 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4784 = torch.constant.int 4
    %int4_4785 = torch.constant.int 4
    %int128_4786 = torch.constant.int 128
    %7066 = torch.prim.ListConstruct %int4_4784, %6634, %int4_4785, %int128_4786 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7067 = torch.aten.view %7051, %7066 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4787 = torch.constant.int 4
    %int4_4788 = torch.constant.int 4
    %int128_4789 = torch.constant.int 128
    %7068 = torch.prim.ListConstruct %int4_4787, %6634, %int4_4788, %int128_4789 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7069 = torch.aten.view %7053, %7068 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4790 = torch.constant.int 4
    %int4_4791 = torch.constant.int 4
    %int128_4792 = torch.constant.int 128
    %7070 = torch.prim.ListConstruct %int4_4790, %6634, %int4_4791, %int128_4792 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7071 = torch.aten.view %7055, %7070 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4793 = torch.constant.int 4
    %int4_4794 = torch.constant.int 4
    %int128_4795 = torch.constant.int 128
    %7072 = torch.prim.ListConstruct %int4_4793, %6634, %int4_4794, %int128_4795 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7073 = torch.aten.view %7057, %7072 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_4796 = torch.constant.int -2
    %7074 = torch.aten.unsqueeze %6423, %int-2_4796 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4797 = torch.constant.int -2
    %7075 = torch.aten.unsqueeze %6425, %int-2_4797 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4798 = torch.constant.int -2
    %7076 = torch.aten.unsqueeze %6427, %int-2_4798 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4799 = torch.constant.int -2
    %7077 = torch.aten.unsqueeze %6429, %int-2_4799 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4800 = torch.constant.int -2
    %7078 = torch.aten.unsqueeze %6431, %int-2_4800 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4801 = torch.constant.int -2
    %7079 = torch.aten.unsqueeze %6433, %int-2_4801 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4802 = torch.constant.int -2
    %7080 = torch.aten.unsqueeze %6435, %int-2_4802 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_4803 = torch.constant.int -2
    %7081 = torch.aten.unsqueeze %6437, %int-2_4803 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %7081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_4804 = torch.constant.int 1
    %7082 = torch.aten.size.int %6347, %int1_4804 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_4805 = torch.constant.int 4
    %int1_4806 = torch.constant.int 1
    %int4_4807 = torch.constant.int 4
    %int128_4808 = torch.constant.int 128
    %7083 = torch.prim.ListConstruct %int4_4805, %7082, %int1_4806, %int4_4807, %int128_4808 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4809 = torch.constant.bool false
    %7084 = torch.aten.expand %7074, %7083, %false_4809 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4810 = torch.constant.int 4
    %int1_4811 = torch.constant.int 1
    %int4_4812 = torch.constant.int 4
    %int128_4813 = torch.constant.int 128
    %7085 = torch.prim.ListConstruct %int4_4810, %7082, %int1_4811, %int4_4812, %int128_4813 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4814 = torch.constant.bool false
    %7086 = torch.aten.expand %7075, %7085, %false_4814 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4815 = torch.constant.int 4
    %int1_4816 = torch.constant.int 1
    %int4_4817 = torch.constant.int 4
    %int128_4818 = torch.constant.int 128
    %7087 = torch.prim.ListConstruct %int4_4815, %7082, %int1_4816, %int4_4817, %int128_4818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4819 = torch.constant.bool false
    %7088 = torch.aten.expand %7076, %7087, %false_4819 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4820 = torch.constant.int 4
    %int1_4821 = torch.constant.int 1
    %int4_4822 = torch.constant.int 4
    %int128_4823 = torch.constant.int 128
    %7089 = torch.prim.ListConstruct %int4_4820, %7082, %int1_4821, %int4_4822, %int128_4823 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4824 = torch.constant.bool false
    %7090 = torch.aten.expand %7077, %7089, %false_4824 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4825 = torch.constant.int 4
    %int1_4826 = torch.constant.int 1
    %int4_4827 = torch.constant.int 4
    %int128_4828 = torch.constant.int 128
    %7091 = torch.prim.ListConstruct %int4_4825, %7082, %int1_4826, %int4_4827, %int128_4828 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4829 = torch.constant.bool false
    %7092 = torch.aten.expand %7078, %7091, %false_4829 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4830 = torch.constant.int 4
    %int1_4831 = torch.constant.int 1
    %int4_4832 = torch.constant.int 4
    %int128_4833 = torch.constant.int 128
    %7093 = torch.prim.ListConstruct %int4_4830, %7082, %int1_4831, %int4_4832, %int128_4833 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4834 = torch.constant.bool false
    %7094 = torch.aten.expand %7079, %7093, %false_4834 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4835 = torch.constant.int 4
    %int1_4836 = torch.constant.int 1
    %int4_4837 = torch.constant.int 4
    %int128_4838 = torch.constant.int 128
    %7095 = torch.prim.ListConstruct %int4_4835, %7082, %int1_4836, %int4_4837, %int128_4838 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4839 = torch.constant.bool false
    %7096 = torch.aten.expand %7080, %7095, %false_4839 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4840 = torch.constant.int 4
    %int1_4841 = torch.constant.int 1
    %int4_4842 = torch.constant.int 4
    %int128_4843 = torch.constant.int 128
    %7097 = torch.prim.ListConstruct %int4_4840, %7082, %int1_4841, %int4_4842, %int128_4843 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_4844 = torch.constant.bool false
    %7098 = torch.aten.expand %7081, %7097, %false_4844 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %7098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_4845 = torch.constant.int 4
    %int4_4846 = torch.constant.int 4
    %int128_4847 = torch.constant.int 128
    %7099 = torch.prim.ListConstruct %int4_4845, %7082, %int4_4846, %int128_4847 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7100 = torch.aten.view %7084, %7099 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4848 = torch.constant.int 4
    %int4_4849 = torch.constant.int 4
    %int128_4850 = torch.constant.int 128
    %7101 = torch.prim.ListConstruct %int4_4848, %7082, %int4_4849, %int128_4850 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7102 = torch.aten.view %7086, %7101 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4851 = torch.constant.int 4
    %int4_4852 = torch.constant.int 4
    %int128_4853 = torch.constant.int 128
    %7103 = torch.prim.ListConstruct %int4_4851, %7082, %int4_4852, %int128_4853 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7104 = torch.aten.view %7088, %7103 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4854 = torch.constant.int 4
    %int4_4855 = torch.constant.int 4
    %int128_4856 = torch.constant.int 128
    %7105 = torch.prim.ListConstruct %int4_4854, %7082, %int4_4855, %int128_4856 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7106 = torch.aten.view %7090, %7105 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4857 = torch.constant.int 4
    %int4_4858 = torch.constant.int 4
    %int128_4859 = torch.constant.int 128
    %7107 = torch.prim.ListConstruct %int4_4857, %7082, %int4_4858, %int128_4859 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7108 = torch.aten.view %7092, %7107 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4860 = torch.constant.int 4
    %int4_4861 = torch.constant.int 4
    %int128_4862 = torch.constant.int 128
    %7109 = torch.prim.ListConstruct %int4_4860, %7082, %int4_4861, %int128_4862 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7110 = torch.aten.view %7094, %7109 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4863 = torch.constant.int 4
    %int4_4864 = torch.constant.int 4
    %int128_4865 = torch.constant.int 128
    %7111 = torch.prim.ListConstruct %int4_4863, %7082, %int4_4864, %int128_4865 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7112 = torch.aten.view %7096, %7111 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4866 = torch.constant.int 4
    %int4_4867 = torch.constant.int 4
    %int128_4868 = torch.constant.int 128
    %7113 = torch.prim.ListConstruct %int4_4866, %7082, %int4_4867, %int128_4868 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7114 = torch.aten.view %7098, %7113 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4869 = torch.constant.int 1
    %int2_4870 = torch.constant.int 2
    %7115 = torch.aten.transpose.int %6490, %int1_4869, %int2_4870 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7115, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4871 = torch.constant.int 1
    %int2_4872 = torch.constant.int 2
    %7116 = torch.aten.transpose.int %6505, %int1_4871, %int2_4872 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7116, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4873 = torch.constant.int 1
    %int2_4874 = torch.constant.int 2
    %7117 = torch.aten.transpose.int %6520, %int1_4873, %int2_4874 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7117, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4875 = torch.constant.int 1
    %int2_4876 = torch.constant.int 2
    %7118 = torch.aten.transpose.int %6535, %int1_4875, %int2_4876 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7118, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4877 = torch.constant.int 1
    %int2_4878 = torch.constant.int 2
    %7119 = torch.aten.transpose.int %6550, %int1_4877, %int2_4878 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7119, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4879 = torch.constant.int 1
    %int2_4880 = torch.constant.int 2
    %7120 = torch.aten.transpose.int %6565, %int1_4879, %int2_4880 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7120, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4881 = torch.constant.int 1
    %int2_4882 = torch.constant.int 2
    %7121 = torch.aten.transpose.int %6580, %int1_4881, %int2_4882 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7121, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4883 = torch.constant.int 1
    %int2_4884 = torch.constant.int 2
    %7122 = torch.aten.transpose.int %6595, %int1_4883, %int2_4884 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7122, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4885 = torch.constant.int 1
    %int2_4886 = torch.constant.int 2
    %7123 = torch.aten.transpose.int %7059, %int1_4885, %int2_4886 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7123, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4887 = torch.constant.int 1
    %int2_4888 = torch.constant.int 2
    %7124 = torch.aten.transpose.int %7061, %int1_4887, %int2_4888 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7124, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4889 = torch.constant.int 1
    %int2_4890 = torch.constant.int 2
    %7125 = torch.aten.transpose.int %7063, %int1_4889, %int2_4890 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7125, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4891 = torch.constant.int 1
    %int2_4892 = torch.constant.int 2
    %7126 = torch.aten.transpose.int %7065, %int1_4891, %int2_4892 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7126, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4893 = torch.constant.int 1
    %int2_4894 = torch.constant.int 2
    %7127 = torch.aten.transpose.int %7067, %int1_4893, %int2_4894 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7127, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4895 = torch.constant.int 1
    %int2_4896 = torch.constant.int 2
    %7128 = torch.aten.transpose.int %7069, %int1_4895, %int2_4896 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7128, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4897 = torch.constant.int 1
    %int2_4898 = torch.constant.int 2
    %7129 = torch.aten.transpose.int %7071, %int1_4897, %int2_4898 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7129, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4899 = torch.constant.int 1
    %int2_4900 = torch.constant.int 2
    %7130 = torch.aten.transpose.int %7073, %int1_4899, %int2_4900 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7130, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4901 = torch.constant.int 1
    %int2_4902 = torch.constant.int 2
    %7131 = torch.aten.transpose.int %7100, %int1_4901, %int2_4902 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7131, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4903 = torch.constant.int 1
    %int2_4904 = torch.constant.int 2
    %7132 = torch.aten.transpose.int %7102, %int1_4903, %int2_4904 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7132, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4905 = torch.constant.int 1
    %int2_4906 = torch.constant.int 2
    %7133 = torch.aten.transpose.int %7104, %int1_4905, %int2_4906 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7133, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4907 = torch.constant.int 1
    %int2_4908 = torch.constant.int 2
    %7134 = torch.aten.transpose.int %7106, %int1_4907, %int2_4908 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7134, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4909 = torch.constant.int 1
    %int2_4910 = torch.constant.int 2
    %7135 = torch.aten.transpose.int %7108, %int1_4909, %int2_4910 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7135, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4911 = torch.constant.int 1
    %int2_4912 = torch.constant.int 2
    %7136 = torch.aten.transpose.int %7110, %int1_4911, %int2_4912 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7136, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4913 = torch.constant.int 1
    %int2_4914 = torch.constant.int 2
    %7137 = torch.aten.transpose.int %7112, %int1_4913, %int2_4914 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7137, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4915 = torch.constant.int 1
    %int2_4916 = torch.constant.int 2
    %7138 = torch.aten.transpose.int %7114, %int1_4915, %int2_4916 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %7138, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4917 = torch.constant.float 0.000000e+00
    %true_4918 = torch.constant.bool true
    %none_4919 = torch.constant.none
    %none_4920 = torch.constant.none
    %7139:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7115, %7123, %7131, %float0.000000e00_4917, %true_4918, %none_4919, %none_4920) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7139#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4921 = torch.constant.float 0.000000e+00
    %true_4922 = torch.constant.bool true
    %none_4923 = torch.constant.none
    %none_4924 = torch.constant.none
    %7140:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7116, %7124, %7132, %float0.000000e00_4921, %true_4922, %none_4923, %none_4924) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7140#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4925 = torch.constant.float 0.000000e+00
    %true_4926 = torch.constant.bool true
    %none_4927 = torch.constant.none
    %none_4928 = torch.constant.none
    %7141:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7117, %7125, %7133, %float0.000000e00_4925, %true_4926, %none_4927, %none_4928) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7141#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4929 = torch.constant.float 0.000000e+00
    %true_4930 = torch.constant.bool true
    %none_4931 = torch.constant.none
    %none_4932 = torch.constant.none
    %7142:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7118, %7126, %7134, %float0.000000e00_4929, %true_4930, %none_4931, %none_4932) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7142#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4933 = torch.constant.float 0.000000e+00
    %true_4934 = torch.constant.bool true
    %none_4935 = torch.constant.none
    %none_4936 = torch.constant.none
    %7143:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7119, %7127, %7135, %float0.000000e00_4933, %true_4934, %none_4935, %none_4936) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7143#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4937 = torch.constant.float 0.000000e+00
    %true_4938 = torch.constant.bool true
    %none_4939 = torch.constant.none
    %none_4940 = torch.constant.none
    %7144:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7120, %7128, %7136, %float0.000000e00_4937, %true_4938, %none_4939, %none_4940) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7144#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4941 = torch.constant.float 0.000000e+00
    %true_4942 = torch.constant.bool true
    %none_4943 = torch.constant.none
    %none_4944 = torch.constant.none
    %7145:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7121, %7129, %7137, %float0.000000e00_4941, %true_4942, %none_4943, %none_4944) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7145#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_4945 = torch.constant.float 0.000000e+00
    %true_4946 = torch.constant.bool true
    %none_4947 = torch.constant.none
    %none_4948 = torch.constant.none
    %7146:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%7122, %7130, %7138, %float0.000000e00_4945, %true_4946, %none_4947, %none_4948) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %7146#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_4949 = torch.constant.int 1
    %int2_4950 = torch.constant.int 2
    %7147 = torch.aten.transpose.int %7139#0, %int1_4949, %int2_4950 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4951 = torch.constant.int 1
    %int2_4952 = torch.constant.int 2
    %7148 = torch.aten.transpose.int %7140#0, %int1_4951, %int2_4952 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4953 = torch.constant.int 1
    %int2_4954 = torch.constant.int 2
    %7149 = torch.aten.transpose.int %7141#0, %int1_4953, %int2_4954 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4955 = torch.constant.int 1
    %int2_4956 = torch.constant.int 2
    %7150 = torch.aten.transpose.int %7142#0, %int1_4955, %int2_4956 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4957 = torch.constant.int 1
    %int2_4958 = torch.constant.int 2
    %7151 = torch.aten.transpose.int %7143#0, %int1_4957, %int2_4958 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4959 = torch.constant.int 1
    %int2_4960 = torch.constant.int 2
    %7152 = torch.aten.transpose.int %7144#0, %int1_4959, %int2_4960 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4961 = torch.constant.int 1
    %int2_4962 = torch.constant.int 2
    %7153 = torch.aten.transpose.int %7145#0, %int1_4961, %int2_4962 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_4963 = torch.constant.int 1
    %int2_4964 = torch.constant.int 2
    %7154 = torch.aten.transpose.int %7146#0, %int1_4963, %int2_4964 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %7154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_4965 = torch.constant.int 4
    %int512_4966 = torch.constant.int 512
    %7155 = torch.prim.ListConstruct %int4_4965, %6476, %int512_4966 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7156 = torch.aten.view %7147, %7155 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4967 = torch.constant.int 4
    %int512_4968 = torch.constant.int 512
    %7157 = torch.prim.ListConstruct %int4_4967, %6491, %int512_4968 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7158 = torch.aten.view %7148, %7157 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4969 = torch.constant.int 4
    %int512_4970 = torch.constant.int 512
    %7159 = torch.prim.ListConstruct %int4_4969, %6506, %int512_4970 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7160 = torch.aten.view %7149, %7159 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4971 = torch.constant.int 4
    %int512_4972 = torch.constant.int 512
    %7161 = torch.prim.ListConstruct %int4_4971, %6521, %int512_4972 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7162 = torch.aten.view %7150, %7161 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4973 = torch.constant.int 4
    %int512_4974 = torch.constant.int 512
    %7163 = torch.prim.ListConstruct %int4_4973, %6536, %int512_4974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7164 = torch.aten.view %7151, %7163 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4975 = torch.constant.int 4
    %int512_4976 = torch.constant.int 512
    %7165 = torch.prim.ListConstruct %int4_4975, %6551, %int512_4976 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7166 = torch.aten.view %7152, %7165 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4977 = torch.constant.int 4
    %int512_4978 = torch.constant.int 512
    %7167 = torch.prim.ListConstruct %int4_4977, %6566, %int512_4978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7168 = torch.aten.view %7153, %7167 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_4979 = torch.constant.int 4
    %int512_4980 = torch.constant.int 512
    %7169 = torch.prim.ListConstruct %int4_4979, %6581, %int512_4980 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7170 = torch.aten.view %7154, %7169 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %7170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_4981 = torch.constant.int 1
    %int0_4982 = torch.constant.int 0
    %7171 = torch.prim.ListConstruct %int1_4981, %int0_4982 : (!torch.int, !torch.int) -> !torch.list<int>
    %7172 = torch.aten.permute %184, %7171 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4983 = torch.constant.int 1
    %int0_4984 = torch.constant.int 0
    %7173 = torch.prim.ListConstruct %int1_4983, %int0_4984 : (!torch.int, !torch.int) -> !torch.list<int>
    %7174 = torch.aten.permute %185, %7173 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4985 = torch.constant.int 1
    %int0_4986 = torch.constant.int 0
    %7175 = torch.prim.ListConstruct %int1_4985, %int0_4986 : (!torch.int, !torch.int) -> !torch.list<int>
    %7176 = torch.aten.permute %186, %7175 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4987 = torch.constant.int 1
    %int0_4988 = torch.constant.int 0
    %7177 = torch.prim.ListConstruct %int1_4987, %int0_4988 : (!torch.int, !torch.int) -> !torch.list<int>
    %7178 = torch.aten.permute %187, %7177 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4989 = torch.constant.int 1
    %int0_4990 = torch.constant.int 0
    %7179 = torch.prim.ListConstruct %int1_4989, %int0_4990 : (!torch.int, !torch.int) -> !torch.list<int>
    %7180 = torch.aten.permute %188, %7179 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4991 = torch.constant.int 1
    %int0_4992 = torch.constant.int 0
    %7181 = torch.prim.ListConstruct %int1_4991, %int0_4992 : (!torch.int, !torch.int) -> !torch.list<int>
    %7182 = torch.aten.permute %189, %7181 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4993 = torch.constant.int 1
    %int0_4994 = torch.constant.int 0
    %7183 = torch.prim.ListConstruct %int1_4993, %int0_4994 : (!torch.int, !torch.int) -> !torch.list<int>
    %7184 = torch.aten.permute %190, %7183 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_4995 = torch.constant.int 1
    %int0_4996 = torch.constant.int 0
    %7185 = torch.prim.ListConstruct %int1_4995, %int0_4996 : (!torch.int, !torch.int) -> !torch.list<int>
    %7186 = torch.aten.permute %191, %7185 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_4997 = torch.constant.int 4
    %7187 = torch.aten.mul.int %int4_4997, %6476 : !torch.int, !torch.int -> !torch.int
    %int512_4998 = torch.constant.int 512
    %7188 = torch.prim.ListConstruct %7187, %int512_4998 : (!torch.int, !torch.int) -> !torch.list<int>
    %7189 = torch.aten.view %7156, %7188 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7189, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7190 = torch.aten.mm %7189, %7172 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7190, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_4999 = torch.constant.int 4
    %int4096_5000 = torch.constant.int 4096
    %7191 = torch.prim.ListConstruct %int4_4999, %6476, %int4096_5000 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7192 = torch.aten.view %7190, %7191 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5001 = torch.constant.int 4
    %7193 = torch.aten.mul.int %int4_5001, %6491 : !torch.int, !torch.int -> !torch.int
    %int512_5002 = torch.constant.int 512
    %7194 = torch.prim.ListConstruct %7193, %int512_5002 : (!torch.int, !torch.int) -> !torch.list<int>
    %7195 = torch.aten.view %7158, %7194 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7195, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7196 = torch.aten.mm %7195, %7174 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7196, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5003 = torch.constant.int 4
    %int4096_5004 = torch.constant.int 4096
    %7197 = torch.prim.ListConstruct %int4_5003, %6491, %int4096_5004 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7198 = torch.aten.view %7196, %7197 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5005 = torch.constant.int 4
    %7199 = torch.aten.mul.int %int4_5005, %6506 : !torch.int, !torch.int -> !torch.int
    %int512_5006 = torch.constant.int 512
    %7200 = torch.prim.ListConstruct %7199, %int512_5006 : (!torch.int, !torch.int) -> !torch.list<int>
    %7201 = torch.aten.view %7160, %7200 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7201, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7202 = torch.aten.mm %7201, %7176 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7202, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5007 = torch.constant.int 4
    %int4096_5008 = torch.constant.int 4096
    %7203 = torch.prim.ListConstruct %int4_5007, %6506, %int4096_5008 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7204 = torch.aten.view %7202, %7203 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5009 = torch.constant.int 4
    %7205 = torch.aten.mul.int %int4_5009, %6521 : !torch.int, !torch.int -> !torch.int
    %int512_5010 = torch.constant.int 512
    %7206 = torch.prim.ListConstruct %7205, %int512_5010 : (!torch.int, !torch.int) -> !torch.list<int>
    %7207 = torch.aten.view %7162, %7206 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7207, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7208 = torch.aten.mm %7207, %7178 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7208, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5011 = torch.constant.int 4
    %int4096_5012 = torch.constant.int 4096
    %7209 = torch.prim.ListConstruct %int4_5011, %6521, %int4096_5012 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7210 = torch.aten.view %7208, %7209 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5013 = torch.constant.int 4
    %7211 = torch.aten.mul.int %int4_5013, %6536 : !torch.int, !torch.int -> !torch.int
    %int512_5014 = torch.constant.int 512
    %7212 = torch.prim.ListConstruct %7211, %int512_5014 : (!torch.int, !torch.int) -> !torch.list<int>
    %7213 = torch.aten.view %7164, %7212 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7213, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7214 = torch.aten.mm %7213, %7180 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7214, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5015 = torch.constant.int 4
    %int4096_5016 = torch.constant.int 4096
    %7215 = torch.prim.ListConstruct %int4_5015, %6536, %int4096_5016 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7216 = torch.aten.view %7214, %7215 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5017 = torch.constant.int 4
    %7217 = torch.aten.mul.int %int4_5017, %6551 : !torch.int, !torch.int -> !torch.int
    %int512_5018 = torch.constant.int 512
    %7218 = torch.prim.ListConstruct %7217, %int512_5018 : (!torch.int, !torch.int) -> !torch.list<int>
    %7219 = torch.aten.view %7166, %7218 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7219, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7220 = torch.aten.mm %7219, %7182 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7220, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5019 = torch.constant.int 4
    %int4096_5020 = torch.constant.int 4096
    %7221 = torch.prim.ListConstruct %int4_5019, %6551, %int4096_5020 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7222 = torch.aten.view %7220, %7221 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5021 = torch.constant.int 4
    %7223 = torch.aten.mul.int %int4_5021, %6566 : !torch.int, !torch.int -> !torch.int
    %int512_5022 = torch.constant.int 512
    %7224 = torch.prim.ListConstruct %7223, %int512_5022 : (!torch.int, !torch.int) -> !torch.list<int>
    %7225 = torch.aten.view %7168, %7224 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7225, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7226 = torch.aten.mm %7225, %7184 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7226, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5023 = torch.constant.int 4
    %int4096_5024 = torch.constant.int 4096
    %7227 = torch.prim.ListConstruct %int4_5023, %6566, %int4096_5024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7228 = torch.aten.view %7226, %7227 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_5025 = torch.constant.int 4
    %7229 = torch.aten.mul.int %int4_5025, %6581 : !torch.int, !torch.int -> !torch.int
    %int512_5026 = torch.constant.int 512
    %7230 = torch.prim.ListConstruct %7229, %int512_5026 : (!torch.int, !torch.int) -> !torch.list<int>
    %7231 = torch.aten.view %7170, %7230 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %7231, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %7232 = torch.aten.mm %7231, %7186 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7232, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5027 = torch.constant.int 4
    %int4096_5028 = torch.constant.int 4096
    %7233 = torch.prim.ListConstruct %int4_5027, %6581, %int4096_5028 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7234 = torch.aten.view %7232, %7233 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7235 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5029 = arith.constant 1 : index
    %dim_5030 = tensor.dim %7235, %c1_5029 : tensor<4x?x4096xf16>
    %7236 = flow.tensor.transfer %7235 : tensor<4x?x4096xf16>{%dim_5030} to #hal.device.promise<@__device_0>
    %7237 = torch_c.from_builtin_tensor %7236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7238 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5031 = arith.constant 1 : index
    %dim_5032 = tensor.dim %7238, %c1_5031 : tensor<4x?x4096xf16>
    %7239 = flow.tensor.transfer %7238 : tensor<4x?x4096xf16>{%dim_5032} to #hal.device.promise<@__device_0>
    %7240 = torch_c.from_builtin_tensor %7239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7241 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5033 = arith.constant 1 : index
    %dim_5034 = tensor.dim %7241, %c1_5033 : tensor<4x?x4096xf16>
    %7242 = flow.tensor.transfer %7241 : tensor<4x?x4096xf16>{%dim_5034} to #hal.device.promise<@__device_0>
    %7243 = torch_c.from_builtin_tensor %7242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7244 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5035 = arith.constant 1 : index
    %dim_5036 = tensor.dim %7244, %c1_5035 : tensor<4x?x4096xf16>
    %7245 = flow.tensor.transfer %7244 : tensor<4x?x4096xf16>{%dim_5036} to #hal.device.promise<@__device_0>
    %7246 = torch_c.from_builtin_tensor %7245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7247 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5037 = arith.constant 1 : index
    %dim_5038 = tensor.dim %7247, %c1_5037 : tensor<4x?x4096xf16>
    %7248 = flow.tensor.transfer %7247 : tensor<4x?x4096xf16>{%dim_5038} to #hal.device.promise<@__device_0>
    %7249 = torch_c.from_builtin_tensor %7248 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7250 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5039 = arith.constant 1 : index
    %dim_5040 = tensor.dim %7250, %c1_5039 : tensor<4x?x4096xf16>
    %7251 = flow.tensor.transfer %7250 : tensor<4x?x4096xf16>{%dim_5040} to #hal.device.promise<@__device_0>
    %7252 = torch_c.from_builtin_tensor %7251 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7253 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5041 = arith.constant 1 : index
    %dim_5042 = tensor.dim %7253, %c1_5041 : tensor<4x?x4096xf16>
    %7254 = flow.tensor.transfer %7253 : tensor<4x?x4096xf16>{%dim_5042} to #hal.device.promise<@__device_0>
    %7255 = torch_c.from_builtin_tensor %7254 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5043 = torch.constant.int 1
    %7256 = torch.aten.add.Tensor %7192, %7237, %int1_5043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5044 = torch.constant.int 1
    %7257 = torch.aten.add.Tensor %7256, %7240, %int1_5044 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5045 = torch.constant.int 1
    %7258 = torch.aten.add.Tensor %7257, %7243, %int1_5045 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5046 = torch.constant.int 1
    %7259 = torch.aten.add.Tensor %7258, %7246, %int1_5046 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5047 = torch.constant.int 1
    %7260 = torch.aten.add.Tensor %7259, %7249, %int1_5047 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5048 = torch.constant.int 1
    %7261 = torch.aten.add.Tensor %7260, %7252, %int1_5048 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5049 = torch.constant.int 1
    %7262 = torch.aten.add.Tensor %7261, %7255, %int1_5049 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7263 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5050 = arith.constant 1 : index
    %dim_5051 = tensor.dim %7263, %c1_5050 : tensor<4x?x4096xf16>
    %7264 = flow.tensor.transfer %7263 : tensor<4x?x4096xf16>{%dim_5051} to #hal.device.promise<@__device_1>
    %7265 = torch_c.from_builtin_tensor %7264 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7266 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5052 = arith.constant 1 : index
    %dim_5053 = tensor.dim %7266, %c1_5052 : tensor<4x?x4096xf16>
    %7267 = flow.tensor.transfer %7266 : tensor<4x?x4096xf16>{%dim_5053} to #hal.device.promise<@__device_1>
    %7268 = torch_c.from_builtin_tensor %7267 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7269 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5054 = arith.constant 1 : index
    %dim_5055 = tensor.dim %7269, %c1_5054 : tensor<4x?x4096xf16>
    %7270 = flow.tensor.transfer %7269 : tensor<4x?x4096xf16>{%dim_5055} to #hal.device.promise<@__device_1>
    %7271 = torch_c.from_builtin_tensor %7270 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7272 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5056 = arith.constant 1 : index
    %dim_5057 = tensor.dim %7272, %c1_5056 : tensor<4x?x4096xf16>
    %7273 = flow.tensor.transfer %7272 : tensor<4x?x4096xf16>{%dim_5057} to #hal.device.promise<@__device_1>
    %7274 = torch_c.from_builtin_tensor %7273 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7275 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5058 = arith.constant 1 : index
    %dim_5059 = tensor.dim %7275, %c1_5058 : tensor<4x?x4096xf16>
    %7276 = flow.tensor.transfer %7275 : tensor<4x?x4096xf16>{%dim_5059} to #hal.device.promise<@__device_1>
    %7277 = torch_c.from_builtin_tensor %7276 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7278 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5060 = arith.constant 1 : index
    %dim_5061 = tensor.dim %7278, %c1_5060 : tensor<4x?x4096xf16>
    %7279 = flow.tensor.transfer %7278 : tensor<4x?x4096xf16>{%dim_5061} to #hal.device.promise<@__device_1>
    %7280 = torch_c.from_builtin_tensor %7279 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7281 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5062 = arith.constant 1 : index
    %dim_5063 = tensor.dim %7281, %c1_5062 : tensor<4x?x4096xf16>
    %7282 = flow.tensor.transfer %7281 : tensor<4x?x4096xf16>{%dim_5063} to #hal.device.promise<@__device_1>
    %7283 = torch_c.from_builtin_tensor %7282 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5064 = torch.constant.int 1
    %7284 = torch.aten.add.Tensor %7265, %7198, %int1_5064 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5065 = torch.constant.int 1
    %7285 = torch.aten.add.Tensor %7284, %7268, %int1_5065 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5066 = torch.constant.int 1
    %7286 = torch.aten.add.Tensor %7285, %7271, %int1_5066 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5067 = torch.constant.int 1
    %7287 = torch.aten.add.Tensor %7286, %7274, %int1_5067 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5068 = torch.constant.int 1
    %7288 = torch.aten.add.Tensor %7287, %7277, %int1_5068 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5069 = torch.constant.int 1
    %7289 = torch.aten.add.Tensor %7288, %7280, %int1_5069 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5070 = torch.constant.int 1
    %7290 = torch.aten.add.Tensor %7289, %7283, %int1_5070 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7291 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5071 = arith.constant 1 : index
    %dim_5072 = tensor.dim %7291, %c1_5071 : tensor<4x?x4096xf16>
    %7292 = flow.tensor.transfer %7291 : tensor<4x?x4096xf16>{%dim_5072} to #hal.device.promise<@__device_2>
    %7293 = torch_c.from_builtin_tensor %7292 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7294 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5073 = arith.constant 1 : index
    %dim_5074 = tensor.dim %7294, %c1_5073 : tensor<4x?x4096xf16>
    %7295 = flow.tensor.transfer %7294 : tensor<4x?x4096xf16>{%dim_5074} to #hal.device.promise<@__device_2>
    %7296 = torch_c.from_builtin_tensor %7295 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7297 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5075 = arith.constant 1 : index
    %dim_5076 = tensor.dim %7297, %c1_5075 : tensor<4x?x4096xf16>
    %7298 = flow.tensor.transfer %7297 : tensor<4x?x4096xf16>{%dim_5076} to #hal.device.promise<@__device_2>
    %7299 = torch_c.from_builtin_tensor %7298 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7300 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5077 = arith.constant 1 : index
    %dim_5078 = tensor.dim %7300, %c1_5077 : tensor<4x?x4096xf16>
    %7301 = flow.tensor.transfer %7300 : tensor<4x?x4096xf16>{%dim_5078} to #hal.device.promise<@__device_2>
    %7302 = torch_c.from_builtin_tensor %7301 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7303 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5079 = arith.constant 1 : index
    %dim_5080 = tensor.dim %7303, %c1_5079 : tensor<4x?x4096xf16>
    %7304 = flow.tensor.transfer %7303 : tensor<4x?x4096xf16>{%dim_5080} to #hal.device.promise<@__device_2>
    %7305 = torch_c.from_builtin_tensor %7304 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7306 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5081 = arith.constant 1 : index
    %dim_5082 = tensor.dim %7306, %c1_5081 : tensor<4x?x4096xf16>
    %7307 = flow.tensor.transfer %7306 : tensor<4x?x4096xf16>{%dim_5082} to #hal.device.promise<@__device_2>
    %7308 = torch_c.from_builtin_tensor %7307 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7309 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5083 = arith.constant 1 : index
    %dim_5084 = tensor.dim %7309, %c1_5083 : tensor<4x?x4096xf16>
    %7310 = flow.tensor.transfer %7309 : tensor<4x?x4096xf16>{%dim_5084} to #hal.device.promise<@__device_2>
    %7311 = torch_c.from_builtin_tensor %7310 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5085 = torch.constant.int 1
    %7312 = torch.aten.add.Tensor %7293, %7296, %int1_5085 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5086 = torch.constant.int 1
    %7313 = torch.aten.add.Tensor %7312, %7204, %int1_5086 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5087 = torch.constant.int 1
    %7314 = torch.aten.add.Tensor %7313, %7299, %int1_5087 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5088 = torch.constant.int 1
    %7315 = torch.aten.add.Tensor %7314, %7302, %int1_5088 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5089 = torch.constant.int 1
    %7316 = torch.aten.add.Tensor %7315, %7305, %int1_5089 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5090 = torch.constant.int 1
    %7317 = torch.aten.add.Tensor %7316, %7308, %int1_5090 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5091 = torch.constant.int 1
    %7318 = torch.aten.add.Tensor %7317, %7311, %int1_5091 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7319 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5092 = arith.constant 1 : index
    %dim_5093 = tensor.dim %7319, %c1_5092 : tensor<4x?x4096xf16>
    %7320 = flow.tensor.transfer %7319 : tensor<4x?x4096xf16>{%dim_5093} to #hal.device.promise<@__device_3>
    %7321 = torch_c.from_builtin_tensor %7320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7322 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5094 = arith.constant 1 : index
    %dim_5095 = tensor.dim %7322, %c1_5094 : tensor<4x?x4096xf16>
    %7323 = flow.tensor.transfer %7322 : tensor<4x?x4096xf16>{%dim_5095} to #hal.device.promise<@__device_3>
    %7324 = torch_c.from_builtin_tensor %7323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7325 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5096 = arith.constant 1 : index
    %dim_5097 = tensor.dim %7325, %c1_5096 : tensor<4x?x4096xf16>
    %7326 = flow.tensor.transfer %7325 : tensor<4x?x4096xf16>{%dim_5097} to #hal.device.promise<@__device_3>
    %7327 = torch_c.from_builtin_tensor %7326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7328 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5098 = arith.constant 1 : index
    %dim_5099 = tensor.dim %7328, %c1_5098 : tensor<4x?x4096xf16>
    %7329 = flow.tensor.transfer %7328 : tensor<4x?x4096xf16>{%dim_5099} to #hal.device.promise<@__device_3>
    %7330 = torch_c.from_builtin_tensor %7329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7331 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5100 = arith.constant 1 : index
    %dim_5101 = tensor.dim %7331, %c1_5100 : tensor<4x?x4096xf16>
    %7332 = flow.tensor.transfer %7331 : tensor<4x?x4096xf16>{%dim_5101} to #hal.device.promise<@__device_3>
    %7333 = torch_c.from_builtin_tensor %7332 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7334 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5102 = arith.constant 1 : index
    %dim_5103 = tensor.dim %7334, %c1_5102 : tensor<4x?x4096xf16>
    %7335 = flow.tensor.transfer %7334 : tensor<4x?x4096xf16>{%dim_5103} to #hal.device.promise<@__device_3>
    %7336 = torch_c.from_builtin_tensor %7335 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7337 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5104 = arith.constant 1 : index
    %dim_5105 = tensor.dim %7337, %c1_5104 : tensor<4x?x4096xf16>
    %7338 = flow.tensor.transfer %7337 : tensor<4x?x4096xf16>{%dim_5105} to #hal.device.promise<@__device_3>
    %7339 = torch_c.from_builtin_tensor %7338 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5106 = torch.constant.int 1
    %7340 = torch.aten.add.Tensor %7321, %7324, %int1_5106 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5107 = torch.constant.int 1
    %7341 = torch.aten.add.Tensor %7340, %7327, %int1_5107 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5108 = torch.constant.int 1
    %7342 = torch.aten.add.Tensor %7341, %7210, %int1_5108 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5109 = torch.constant.int 1
    %7343 = torch.aten.add.Tensor %7342, %7330, %int1_5109 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5110 = torch.constant.int 1
    %7344 = torch.aten.add.Tensor %7343, %7333, %int1_5110 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5111 = torch.constant.int 1
    %7345 = torch.aten.add.Tensor %7344, %7336, %int1_5111 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5112 = torch.constant.int 1
    %7346 = torch.aten.add.Tensor %7345, %7339, %int1_5112 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7347 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5113 = arith.constant 1 : index
    %dim_5114 = tensor.dim %7347, %c1_5113 : tensor<4x?x4096xf16>
    %7348 = flow.tensor.transfer %7347 : tensor<4x?x4096xf16>{%dim_5114} to #hal.device.promise<@__device_4>
    %7349 = torch_c.from_builtin_tensor %7348 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7350 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5115 = arith.constant 1 : index
    %dim_5116 = tensor.dim %7350, %c1_5115 : tensor<4x?x4096xf16>
    %7351 = flow.tensor.transfer %7350 : tensor<4x?x4096xf16>{%dim_5116} to #hal.device.promise<@__device_4>
    %7352 = torch_c.from_builtin_tensor %7351 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7353 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5117 = arith.constant 1 : index
    %dim_5118 = tensor.dim %7353, %c1_5117 : tensor<4x?x4096xf16>
    %7354 = flow.tensor.transfer %7353 : tensor<4x?x4096xf16>{%dim_5118} to #hal.device.promise<@__device_4>
    %7355 = torch_c.from_builtin_tensor %7354 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7356 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5119 = arith.constant 1 : index
    %dim_5120 = tensor.dim %7356, %c1_5119 : tensor<4x?x4096xf16>
    %7357 = flow.tensor.transfer %7356 : tensor<4x?x4096xf16>{%dim_5120} to #hal.device.promise<@__device_4>
    %7358 = torch_c.from_builtin_tensor %7357 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7359 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5121 = arith.constant 1 : index
    %dim_5122 = tensor.dim %7359, %c1_5121 : tensor<4x?x4096xf16>
    %7360 = flow.tensor.transfer %7359 : tensor<4x?x4096xf16>{%dim_5122} to #hal.device.promise<@__device_4>
    %7361 = torch_c.from_builtin_tensor %7360 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7362 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5123 = arith.constant 1 : index
    %dim_5124 = tensor.dim %7362, %c1_5123 : tensor<4x?x4096xf16>
    %7363 = flow.tensor.transfer %7362 : tensor<4x?x4096xf16>{%dim_5124} to #hal.device.promise<@__device_4>
    %7364 = torch_c.from_builtin_tensor %7363 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7365 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5125 = arith.constant 1 : index
    %dim_5126 = tensor.dim %7365, %c1_5125 : tensor<4x?x4096xf16>
    %7366 = flow.tensor.transfer %7365 : tensor<4x?x4096xf16>{%dim_5126} to #hal.device.promise<@__device_4>
    %7367 = torch_c.from_builtin_tensor %7366 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5127 = torch.constant.int 1
    %7368 = torch.aten.add.Tensor %7349, %7352, %int1_5127 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5128 = torch.constant.int 1
    %7369 = torch.aten.add.Tensor %7368, %7355, %int1_5128 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5129 = torch.constant.int 1
    %7370 = torch.aten.add.Tensor %7369, %7358, %int1_5129 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5130 = torch.constant.int 1
    %7371 = torch.aten.add.Tensor %7370, %7216, %int1_5130 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5131 = torch.constant.int 1
    %7372 = torch.aten.add.Tensor %7371, %7361, %int1_5131 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5132 = torch.constant.int 1
    %7373 = torch.aten.add.Tensor %7372, %7364, %int1_5132 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5133 = torch.constant.int 1
    %7374 = torch.aten.add.Tensor %7373, %7367, %int1_5133 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7375 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5134 = arith.constant 1 : index
    %dim_5135 = tensor.dim %7375, %c1_5134 : tensor<4x?x4096xf16>
    %7376 = flow.tensor.transfer %7375 : tensor<4x?x4096xf16>{%dim_5135} to #hal.device.promise<@__device_5>
    %7377 = torch_c.from_builtin_tensor %7376 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7378 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5136 = arith.constant 1 : index
    %dim_5137 = tensor.dim %7378, %c1_5136 : tensor<4x?x4096xf16>
    %7379 = flow.tensor.transfer %7378 : tensor<4x?x4096xf16>{%dim_5137} to #hal.device.promise<@__device_5>
    %7380 = torch_c.from_builtin_tensor %7379 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7381 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5138 = arith.constant 1 : index
    %dim_5139 = tensor.dim %7381, %c1_5138 : tensor<4x?x4096xf16>
    %7382 = flow.tensor.transfer %7381 : tensor<4x?x4096xf16>{%dim_5139} to #hal.device.promise<@__device_5>
    %7383 = torch_c.from_builtin_tensor %7382 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7384 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5140 = arith.constant 1 : index
    %dim_5141 = tensor.dim %7384, %c1_5140 : tensor<4x?x4096xf16>
    %7385 = flow.tensor.transfer %7384 : tensor<4x?x4096xf16>{%dim_5141} to #hal.device.promise<@__device_5>
    %7386 = torch_c.from_builtin_tensor %7385 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7387 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5142 = arith.constant 1 : index
    %dim_5143 = tensor.dim %7387, %c1_5142 : tensor<4x?x4096xf16>
    %7388 = flow.tensor.transfer %7387 : tensor<4x?x4096xf16>{%dim_5143} to #hal.device.promise<@__device_5>
    %7389 = torch_c.from_builtin_tensor %7388 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7390 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5144 = arith.constant 1 : index
    %dim_5145 = tensor.dim %7390, %c1_5144 : tensor<4x?x4096xf16>
    %7391 = flow.tensor.transfer %7390 : tensor<4x?x4096xf16>{%dim_5145} to #hal.device.promise<@__device_5>
    %7392 = torch_c.from_builtin_tensor %7391 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7393 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5146 = arith.constant 1 : index
    %dim_5147 = tensor.dim %7393, %c1_5146 : tensor<4x?x4096xf16>
    %7394 = flow.tensor.transfer %7393 : tensor<4x?x4096xf16>{%dim_5147} to #hal.device.promise<@__device_5>
    %7395 = torch_c.from_builtin_tensor %7394 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5148 = torch.constant.int 1
    %7396 = torch.aten.add.Tensor %7377, %7380, %int1_5148 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5149 = torch.constant.int 1
    %7397 = torch.aten.add.Tensor %7396, %7383, %int1_5149 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5150 = torch.constant.int 1
    %7398 = torch.aten.add.Tensor %7397, %7386, %int1_5150 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5151 = torch.constant.int 1
    %7399 = torch.aten.add.Tensor %7398, %7389, %int1_5151 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5152 = torch.constant.int 1
    %7400 = torch.aten.add.Tensor %7399, %7222, %int1_5152 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5153 = torch.constant.int 1
    %7401 = torch.aten.add.Tensor %7400, %7392, %int1_5153 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5154 = torch.constant.int 1
    %7402 = torch.aten.add.Tensor %7401, %7395, %int1_5154 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7403 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5155 = arith.constant 1 : index
    %dim_5156 = tensor.dim %7403, %c1_5155 : tensor<4x?x4096xf16>
    %7404 = flow.tensor.transfer %7403 : tensor<4x?x4096xf16>{%dim_5156} to #hal.device.promise<@__device_6>
    %7405 = torch_c.from_builtin_tensor %7404 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7406 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5157 = arith.constant 1 : index
    %dim_5158 = tensor.dim %7406, %c1_5157 : tensor<4x?x4096xf16>
    %7407 = flow.tensor.transfer %7406 : tensor<4x?x4096xf16>{%dim_5158} to #hal.device.promise<@__device_6>
    %7408 = torch_c.from_builtin_tensor %7407 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7409 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5159 = arith.constant 1 : index
    %dim_5160 = tensor.dim %7409, %c1_5159 : tensor<4x?x4096xf16>
    %7410 = flow.tensor.transfer %7409 : tensor<4x?x4096xf16>{%dim_5160} to #hal.device.promise<@__device_6>
    %7411 = torch_c.from_builtin_tensor %7410 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7412 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5161 = arith.constant 1 : index
    %dim_5162 = tensor.dim %7412, %c1_5161 : tensor<4x?x4096xf16>
    %7413 = flow.tensor.transfer %7412 : tensor<4x?x4096xf16>{%dim_5162} to #hal.device.promise<@__device_6>
    %7414 = torch_c.from_builtin_tensor %7413 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7415 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5163 = arith.constant 1 : index
    %dim_5164 = tensor.dim %7415, %c1_5163 : tensor<4x?x4096xf16>
    %7416 = flow.tensor.transfer %7415 : tensor<4x?x4096xf16>{%dim_5164} to #hal.device.promise<@__device_6>
    %7417 = torch_c.from_builtin_tensor %7416 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7418 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5165 = arith.constant 1 : index
    %dim_5166 = tensor.dim %7418, %c1_5165 : tensor<4x?x4096xf16>
    %7419 = flow.tensor.transfer %7418 : tensor<4x?x4096xf16>{%dim_5166} to #hal.device.promise<@__device_6>
    %7420 = torch_c.from_builtin_tensor %7419 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7421 = torch_c.to_builtin_tensor %7234 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5167 = arith.constant 1 : index
    %dim_5168 = tensor.dim %7421, %c1_5167 : tensor<4x?x4096xf16>
    %7422 = flow.tensor.transfer %7421 : tensor<4x?x4096xf16>{%dim_5168} to #hal.device.promise<@__device_6>
    %7423 = torch_c.from_builtin_tensor %7422 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5169 = torch.constant.int 1
    %7424 = torch.aten.add.Tensor %7405, %7408, %int1_5169 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5170 = torch.constant.int 1
    %7425 = torch.aten.add.Tensor %7424, %7411, %int1_5170 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5171 = torch.constant.int 1
    %7426 = torch.aten.add.Tensor %7425, %7414, %int1_5171 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5172 = torch.constant.int 1
    %7427 = torch.aten.add.Tensor %7426, %7417, %int1_5172 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5173 = torch.constant.int 1
    %7428 = torch.aten.add.Tensor %7427, %7420, %int1_5173 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5174 = torch.constant.int 1
    %7429 = torch.aten.add.Tensor %7428, %7228, %int1_5174 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5175 = torch.constant.int 1
    %7430 = torch.aten.add.Tensor %7429, %7423, %int1_5175 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7431 = torch_c.to_builtin_tensor %7192 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5176 = arith.constant 1 : index
    %dim_5177 = tensor.dim %7431, %c1_5176 : tensor<4x?x4096xf16>
    %7432 = flow.tensor.transfer %7431 : tensor<4x?x4096xf16>{%dim_5177} to #hal.device.promise<@__device_7>
    %7433 = torch_c.from_builtin_tensor %7432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7434 = torch_c.to_builtin_tensor %7198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5178 = arith.constant 1 : index
    %dim_5179 = tensor.dim %7434, %c1_5178 : tensor<4x?x4096xf16>
    %7435 = flow.tensor.transfer %7434 : tensor<4x?x4096xf16>{%dim_5179} to #hal.device.promise<@__device_7>
    %7436 = torch_c.from_builtin_tensor %7435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7437 = torch_c.to_builtin_tensor %7204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5180 = arith.constant 1 : index
    %dim_5181 = tensor.dim %7437, %c1_5180 : tensor<4x?x4096xf16>
    %7438 = flow.tensor.transfer %7437 : tensor<4x?x4096xf16>{%dim_5181} to #hal.device.promise<@__device_7>
    %7439 = torch_c.from_builtin_tensor %7438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7440 = torch_c.to_builtin_tensor %7210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5182 = arith.constant 1 : index
    %dim_5183 = tensor.dim %7440, %c1_5182 : tensor<4x?x4096xf16>
    %7441 = flow.tensor.transfer %7440 : tensor<4x?x4096xf16>{%dim_5183} to #hal.device.promise<@__device_7>
    %7442 = torch_c.from_builtin_tensor %7441 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7443 = torch_c.to_builtin_tensor %7216 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5184 = arith.constant 1 : index
    %dim_5185 = tensor.dim %7443, %c1_5184 : tensor<4x?x4096xf16>
    %7444 = flow.tensor.transfer %7443 : tensor<4x?x4096xf16>{%dim_5185} to #hal.device.promise<@__device_7>
    %7445 = torch_c.from_builtin_tensor %7444 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7446 = torch_c.to_builtin_tensor %7222 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5186 = arith.constant 1 : index
    %dim_5187 = tensor.dim %7446, %c1_5186 : tensor<4x?x4096xf16>
    %7447 = flow.tensor.transfer %7446 : tensor<4x?x4096xf16>{%dim_5187} to #hal.device.promise<@__device_7>
    %7448 = torch_c.from_builtin_tensor %7447 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7449 = torch_c.to_builtin_tensor %7228 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5188 = arith.constant 1 : index
    %dim_5189 = tensor.dim %7449, %c1_5188 : tensor<4x?x4096xf16>
    %7450 = flow.tensor.transfer %7449 : tensor<4x?x4096xf16>{%dim_5189} to #hal.device.promise<@__device_7>
    %7451 = torch_c.from_builtin_tensor %7450 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5190 = torch.constant.int 1
    %7452 = torch.aten.add.Tensor %7433, %7436, %int1_5190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5191 = torch.constant.int 1
    %7453 = torch.aten.add.Tensor %7452, %7439, %int1_5191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5192 = torch.constant.int 1
    %7454 = torch.aten.add.Tensor %7453, %7442, %int1_5192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5193 = torch.constant.int 1
    %7455 = torch.aten.add.Tensor %7454, %7445, %int1_5193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5194 = torch.constant.int 1
    %7456 = torch.aten.add.Tensor %7455, %7448, %int1_5194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5195 = torch.constant.int 1
    %7457 = torch.aten.add.Tensor %7456, %7451, %int1_5195 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5196 = torch.constant.int 1
    %7458 = torch.aten.add.Tensor %7457, %7234, %int1_5196 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5197 = torch.constant.int 1
    %7459 = torch.aten.add.Tensor %6118, %7262, %int1_5197 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5198 = torch.constant.int 1
    %7460 = torch.aten.add.Tensor %6119, %7290, %int1_5198 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5199 = torch.constant.int 1
    %7461 = torch.aten.add.Tensor %6120, %7318, %int1_5199 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5200 = torch.constant.int 1
    %7462 = torch.aten.add.Tensor %6121, %7346, %int1_5200 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5201 = torch.constant.int 1
    %7463 = torch.aten.add.Tensor %6122, %7374, %int1_5201 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5202 = torch.constant.int 1
    %7464 = torch.aten.add.Tensor %6123, %7402, %int1_5202 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5203 = torch.constant.int 1
    %7465 = torch.aten.add.Tensor %6124, %7430, %int1_5203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5204 = torch.constant.int 1
    %7466 = torch.aten.add.Tensor %6125, %7458, %int1_5204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5205 = torch.constant.int 6
    %7467 = torch.prims.convert_element_type %7459, %int6_5205 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5206 = torch.constant.int 6
    %7468 = torch.prims.convert_element_type %7460, %int6_5206 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5207 = torch.constant.int 6
    %7469 = torch.prims.convert_element_type %7461, %int6_5207 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5208 = torch.constant.int 6
    %7470 = torch.prims.convert_element_type %7462, %int6_5208 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5209 = torch.constant.int 6
    %7471 = torch.prims.convert_element_type %7463, %int6_5209 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5210 = torch.constant.int 6
    %7472 = torch.prims.convert_element_type %7464, %int6_5210 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5211 = torch.constant.int 6
    %7473 = torch.prims.convert_element_type %7465, %int6_5211 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5212 = torch.constant.int 6
    %7474 = torch.prims.convert_element_type %7466, %int6_5212 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5213 = torch.constant.int 2
    %7475 = torch.aten.pow.Tensor_Scalar %7467, %int2_5213 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5214 = torch.constant.int 2
    %7476 = torch.aten.pow.Tensor_Scalar %7468, %int2_5214 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5215 = torch.constant.int 2
    %7477 = torch.aten.pow.Tensor_Scalar %7469, %int2_5215 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5216 = torch.constant.int 2
    %7478 = torch.aten.pow.Tensor_Scalar %7470, %int2_5216 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5217 = torch.constant.int 2
    %7479 = torch.aten.pow.Tensor_Scalar %7471, %int2_5217 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5218 = torch.constant.int 2
    %7480 = torch.aten.pow.Tensor_Scalar %7472, %int2_5218 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5219 = torch.constant.int 2
    %7481 = torch.aten.pow.Tensor_Scalar %7473, %int2_5219 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5220 = torch.constant.int 2
    %7482 = torch.aten.pow.Tensor_Scalar %7474, %int2_5220 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5221 = torch.constant.int -1
    %7483 = torch.prim.ListConstruct %int-1_5221 : (!torch.int) -> !torch.list<int>
    %true_5222 = torch.constant.bool true
    %none_5223 = torch.constant.none
    %7484 = torch.aten.mean.dim %7475, %7483, %true_5222, %none_5223 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5224 = torch.constant.int -1
    %7485 = torch.prim.ListConstruct %int-1_5224 : (!torch.int) -> !torch.list<int>
    %true_5225 = torch.constant.bool true
    %none_5226 = torch.constant.none
    %7486 = torch.aten.mean.dim %7476, %7485, %true_5225, %none_5226 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5227 = torch.constant.int -1
    %7487 = torch.prim.ListConstruct %int-1_5227 : (!torch.int) -> !torch.list<int>
    %true_5228 = torch.constant.bool true
    %none_5229 = torch.constant.none
    %7488 = torch.aten.mean.dim %7477, %7487, %true_5228, %none_5229 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5230 = torch.constant.int -1
    %7489 = torch.prim.ListConstruct %int-1_5230 : (!torch.int) -> !torch.list<int>
    %true_5231 = torch.constant.bool true
    %none_5232 = torch.constant.none
    %7490 = torch.aten.mean.dim %7478, %7489, %true_5231, %none_5232 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5233 = torch.constant.int -1
    %7491 = torch.prim.ListConstruct %int-1_5233 : (!torch.int) -> !torch.list<int>
    %true_5234 = torch.constant.bool true
    %none_5235 = torch.constant.none
    %7492 = torch.aten.mean.dim %7479, %7491, %true_5234, %none_5235 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5236 = torch.constant.int -1
    %7493 = torch.prim.ListConstruct %int-1_5236 : (!torch.int) -> !torch.list<int>
    %true_5237 = torch.constant.bool true
    %none_5238 = torch.constant.none
    %7494 = torch.aten.mean.dim %7480, %7493, %true_5237, %none_5238 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5239 = torch.constant.int -1
    %7495 = torch.prim.ListConstruct %int-1_5239 : (!torch.int) -> !torch.list<int>
    %true_5240 = torch.constant.bool true
    %none_5241 = torch.constant.none
    %7496 = torch.aten.mean.dim %7481, %7495, %true_5240, %none_5241 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5242 = torch.constant.int -1
    %7497 = torch.prim.ListConstruct %int-1_5242 : (!torch.int) -> !torch.list<int>
    %true_5243 = torch.constant.bool true
    %none_5244 = torch.constant.none
    %7498 = torch.aten.mean.dim %7482, %7497, %true_5243, %none_5244 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5245 = torch.constant.float 9.9999997473787516E-6
    %int1_5246 = torch.constant.int 1
    %7499 = torch.aten.add.Scalar %7484, %float9.999990e-06_5245, %int1_5246 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5247 = torch.constant.float 9.9999997473787516E-6
    %int1_5248 = torch.constant.int 1
    %7500 = torch.aten.add.Scalar %7486, %float9.999990e-06_5247, %int1_5248 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5249 = torch.constant.float 9.9999997473787516E-6
    %int1_5250 = torch.constant.int 1
    %7501 = torch.aten.add.Scalar %7488, %float9.999990e-06_5249, %int1_5250 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5251 = torch.constant.float 9.9999997473787516E-6
    %int1_5252 = torch.constant.int 1
    %7502 = torch.aten.add.Scalar %7490, %float9.999990e-06_5251, %int1_5252 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5253 = torch.constant.float 9.9999997473787516E-6
    %int1_5254 = torch.constant.int 1
    %7503 = torch.aten.add.Scalar %7492, %float9.999990e-06_5253, %int1_5254 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5255 = torch.constant.float 9.9999997473787516E-6
    %int1_5256 = torch.constant.int 1
    %7504 = torch.aten.add.Scalar %7494, %float9.999990e-06_5255, %int1_5256 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5257 = torch.constant.float 9.9999997473787516E-6
    %int1_5258 = torch.constant.int 1
    %7505 = torch.aten.add.Scalar %7496, %float9.999990e-06_5257, %int1_5258 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5259 = torch.constant.float 9.9999997473787516E-6
    %int1_5260 = torch.constant.int 1
    %7506 = torch.aten.add.Scalar %7498, %float9.999990e-06_5259, %int1_5260 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7507 = torch.aten.rsqrt %7499 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7508 = torch.aten.rsqrt %7500 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7509 = torch.aten.rsqrt %7501 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7510 = torch.aten.rsqrt %7502 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7511 = torch.aten.rsqrt %7503 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7512 = torch.aten.rsqrt %7504 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7513 = torch.aten.rsqrt %7505 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7514 = torch.aten.rsqrt %7506 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %7514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %7515 = torch.aten.mul.Tensor %7467, %7507 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7516 = torch.aten.mul.Tensor %7468, %7508 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7517 = torch.aten.mul.Tensor %7469, %7509 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7518 = torch.aten.mul.Tensor %7470, %7510 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7519 = torch.aten.mul.Tensor %7471, %7511 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7520 = torch.aten.mul.Tensor %7472, %7512 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7521 = torch.aten.mul.Tensor %7473, %7513 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7522 = torch.aten.mul.Tensor %7474, %7514 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7523 = torch.aten.mul.Tensor %192, %7515 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7524 = torch.aten.mul.Tensor %193, %7516 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7525 = torch.aten.mul.Tensor %194, %7517 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7526 = torch.aten.mul.Tensor %195, %7518 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7527 = torch.aten.mul.Tensor %196, %7519 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7528 = torch.aten.mul.Tensor %197, %7520 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7529 = torch.aten.mul.Tensor %198, %7521 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %7530 = torch.aten.mul.Tensor %199, %7522 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5261 = torch.constant.int 5
    %7531 = torch.prims.convert_element_type %7523, %int5_5261 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5262 = torch.constant.int 5
    %7532 = torch.prims.convert_element_type %7524, %int5_5262 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5263 = torch.constant.int 5
    %7533 = torch.prims.convert_element_type %7525, %int5_5263 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5264 = torch.constant.int 5
    %7534 = torch.prims.convert_element_type %7526, %int5_5264 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5265 = torch.constant.int 5
    %7535 = torch.prims.convert_element_type %7527, %int5_5265 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5266 = torch.constant.int 5
    %7536 = torch.prims.convert_element_type %7528, %int5_5266 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5267 = torch.constant.int 5
    %7537 = torch.prims.convert_element_type %7529, %int5_5267 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5268 = torch.constant.int 5
    %7538 = torch.prims.convert_element_type %7530, %int5_5268 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5269 = torch.constant.int 1
    %int0_5270 = torch.constant.int 0
    %7539 = torch.prim.ListConstruct %int1_5269, %int0_5270 : (!torch.int, !torch.int) -> !torch.list<int>
    %7540 = torch.aten.permute %200, %7539 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5271 = torch.constant.int 1
    %int0_5272 = torch.constant.int 0
    %7541 = torch.prim.ListConstruct %int1_5271, %int0_5272 : (!torch.int, !torch.int) -> !torch.list<int>
    %7542 = torch.aten.permute %201, %7541 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5273 = torch.constant.int 1
    %int0_5274 = torch.constant.int 0
    %7543 = torch.prim.ListConstruct %int1_5273, %int0_5274 : (!torch.int, !torch.int) -> !torch.list<int>
    %7544 = torch.aten.permute %202, %7543 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5275 = torch.constant.int 1
    %int0_5276 = torch.constant.int 0
    %7545 = torch.prim.ListConstruct %int1_5275, %int0_5276 : (!torch.int, !torch.int) -> !torch.list<int>
    %7546 = torch.aten.permute %203, %7545 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5277 = torch.constant.int 1
    %int0_5278 = torch.constant.int 0
    %7547 = torch.prim.ListConstruct %int1_5277, %int0_5278 : (!torch.int, !torch.int) -> !torch.list<int>
    %7548 = torch.aten.permute %204, %7547 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5279 = torch.constant.int 1
    %int0_5280 = torch.constant.int 0
    %7549 = torch.prim.ListConstruct %int1_5279, %int0_5280 : (!torch.int, !torch.int) -> !torch.list<int>
    %7550 = torch.aten.permute %205, %7549 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5281 = torch.constant.int 1
    %int0_5282 = torch.constant.int 0
    %7551 = torch.prim.ListConstruct %int1_5281, %int0_5282 : (!torch.int, !torch.int) -> !torch.list<int>
    %7552 = torch.aten.permute %206, %7551 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5283 = torch.constant.int 1
    %int0_5284 = torch.constant.int 0
    %7553 = torch.prim.ListConstruct %int1_5283, %int0_5284 : (!torch.int, !torch.int) -> !torch.list<int>
    %7554 = torch.aten.permute %207, %7553 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_5285 = torch.constant.int 4
    %7555 = torch.aten.mul.int %int4_5285, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5286 = torch.constant.int 4096
    %7556 = torch.prim.ListConstruct %7555, %int4096_5286 : (!torch.int, !torch.int) -> !torch.list<int>
    %7557 = torch.aten.view %7531, %7556 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7557, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7558 = torch.aten.mm %7557, %7540 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7558, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5287 = torch.constant.int 4
    %int1792_5288 = torch.constant.int 1792
    %7559 = torch.prim.ListConstruct %int4_5287, %2482, %int1792_5288 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7560 = torch.aten.view %7558, %7559 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5289 = torch.constant.int 4
    %7561 = torch.aten.mul.int %int4_5289, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5290 = torch.constant.int 4096
    %7562 = torch.prim.ListConstruct %7561, %int4096_5290 : (!torch.int, !torch.int) -> !torch.list<int>
    %7563 = torch.aten.view %7532, %7562 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7563, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7564 = torch.aten.mm %7563, %7542 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7564, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5291 = torch.constant.int 4
    %int1792_5292 = torch.constant.int 1792
    %7565 = torch.prim.ListConstruct %int4_5291, %2482, %int1792_5292 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7566 = torch.aten.view %7564, %7565 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5293 = torch.constant.int 4
    %7567 = torch.aten.mul.int %int4_5293, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5294 = torch.constant.int 4096
    %7568 = torch.prim.ListConstruct %7567, %int4096_5294 : (!torch.int, !torch.int) -> !torch.list<int>
    %7569 = torch.aten.view %7533, %7568 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7569, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7570 = torch.aten.mm %7569, %7544 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7570, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5295 = torch.constant.int 4
    %int1792_5296 = torch.constant.int 1792
    %7571 = torch.prim.ListConstruct %int4_5295, %2482, %int1792_5296 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7572 = torch.aten.view %7570, %7571 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5297 = torch.constant.int 4
    %7573 = torch.aten.mul.int %int4_5297, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5298 = torch.constant.int 4096
    %7574 = torch.prim.ListConstruct %7573, %int4096_5298 : (!torch.int, !torch.int) -> !torch.list<int>
    %7575 = torch.aten.view %7534, %7574 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7575, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7576 = torch.aten.mm %7575, %7546 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7576, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5299 = torch.constant.int 4
    %int1792_5300 = torch.constant.int 1792
    %7577 = torch.prim.ListConstruct %int4_5299, %2482, %int1792_5300 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7578 = torch.aten.view %7576, %7577 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5301 = torch.constant.int 4
    %7579 = torch.aten.mul.int %int4_5301, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5302 = torch.constant.int 4096
    %7580 = torch.prim.ListConstruct %7579, %int4096_5302 : (!torch.int, !torch.int) -> !torch.list<int>
    %7581 = torch.aten.view %7535, %7580 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7581, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7582 = torch.aten.mm %7581, %7548 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7582, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5303 = torch.constant.int 4
    %int1792_5304 = torch.constant.int 1792
    %7583 = torch.prim.ListConstruct %int4_5303, %2482, %int1792_5304 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7584 = torch.aten.view %7582, %7583 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5305 = torch.constant.int 4
    %7585 = torch.aten.mul.int %int4_5305, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5306 = torch.constant.int 4096
    %7586 = torch.prim.ListConstruct %7585, %int4096_5306 : (!torch.int, !torch.int) -> !torch.list<int>
    %7587 = torch.aten.view %7536, %7586 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7587, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7588 = torch.aten.mm %7587, %7550 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7588, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5307 = torch.constant.int 4
    %int1792_5308 = torch.constant.int 1792
    %7589 = torch.prim.ListConstruct %int4_5307, %2482, %int1792_5308 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7590 = torch.aten.view %7588, %7589 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5309 = torch.constant.int 4
    %7591 = torch.aten.mul.int %int4_5309, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5310 = torch.constant.int 4096
    %7592 = torch.prim.ListConstruct %7591, %int4096_5310 : (!torch.int, !torch.int) -> !torch.list<int>
    %7593 = torch.aten.view %7537, %7592 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7593, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7594 = torch.aten.mm %7593, %7552 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7594, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5311 = torch.constant.int 4
    %int1792_5312 = torch.constant.int 1792
    %7595 = torch.prim.ListConstruct %int4_5311, %2482, %int1792_5312 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7596 = torch.aten.view %7594, %7595 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5313 = torch.constant.int 4
    %7597 = torch.aten.mul.int %int4_5313, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5314 = torch.constant.int 4096
    %7598 = torch.prim.ListConstruct %7597, %int4096_5314 : (!torch.int, !torch.int) -> !torch.list<int>
    %7599 = torch.aten.view %7538, %7598 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7599, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7600 = torch.aten.mm %7599, %7554 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7600, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5315 = torch.constant.int 4
    %int1792_5316 = torch.constant.int 1792
    %7601 = torch.prim.ListConstruct %int4_5315, %2482, %int1792_5316 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7602 = torch.aten.view %7600, %7601 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7603 = torch.aten.silu %7560 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7604 = torch.aten.silu %7566 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7605 = torch.aten.silu %7572 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7606 = torch.aten.silu %7578 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7607 = torch.aten.silu %7584 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7608 = torch.aten.silu %7590 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7609 = torch.aten.silu %7596 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7610 = torch.aten.silu %7602 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_5317 = torch.constant.int 1
    %int0_5318 = torch.constant.int 0
    %7611 = torch.prim.ListConstruct %int1_5317, %int0_5318 : (!torch.int, !torch.int) -> !torch.list<int>
    %7612 = torch.aten.permute %208, %7611 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5319 = torch.constant.int 1
    %int0_5320 = torch.constant.int 0
    %7613 = torch.prim.ListConstruct %int1_5319, %int0_5320 : (!torch.int, !torch.int) -> !torch.list<int>
    %7614 = torch.aten.permute %209, %7613 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5321 = torch.constant.int 1
    %int0_5322 = torch.constant.int 0
    %7615 = torch.prim.ListConstruct %int1_5321, %int0_5322 : (!torch.int, !torch.int) -> !torch.list<int>
    %7616 = torch.aten.permute %210, %7615 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5323 = torch.constant.int 1
    %int0_5324 = torch.constant.int 0
    %7617 = torch.prim.ListConstruct %int1_5323, %int0_5324 : (!torch.int, !torch.int) -> !torch.list<int>
    %7618 = torch.aten.permute %211, %7617 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5325 = torch.constant.int 1
    %int0_5326 = torch.constant.int 0
    %7619 = torch.prim.ListConstruct %int1_5325, %int0_5326 : (!torch.int, !torch.int) -> !torch.list<int>
    %7620 = torch.aten.permute %212, %7619 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5327 = torch.constant.int 1
    %int0_5328 = torch.constant.int 0
    %7621 = torch.prim.ListConstruct %int1_5327, %int0_5328 : (!torch.int, !torch.int) -> !torch.list<int>
    %7622 = torch.aten.permute %213, %7621 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5329 = torch.constant.int 1
    %int0_5330 = torch.constant.int 0
    %7623 = torch.prim.ListConstruct %int1_5329, %int0_5330 : (!torch.int, !torch.int) -> !torch.list<int>
    %7624 = torch.aten.permute %214, %7623 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_5331 = torch.constant.int 1
    %int0_5332 = torch.constant.int 0
    %7625 = torch.prim.ListConstruct %int1_5331, %int0_5332 : (!torch.int, !torch.int) -> !torch.list<int>
    %7626 = torch.aten.permute %215, %7625 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_5333 = torch.constant.int 4
    %7627 = torch.aten.mul.int %int4_5333, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5334 = torch.constant.int 4096
    %7628 = torch.prim.ListConstruct %7627, %int4096_5334 : (!torch.int, !torch.int) -> !torch.list<int>
    %7629 = torch.aten.view %7531, %7628 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7629, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7630 = torch.aten.mm %7629, %7612 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7630, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5335 = torch.constant.int 4
    %int1792_5336 = torch.constant.int 1792
    %7631 = torch.prim.ListConstruct %int4_5335, %2482, %int1792_5336 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7632 = torch.aten.view %7630, %7631 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5337 = torch.constant.int 4
    %7633 = torch.aten.mul.int %int4_5337, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5338 = torch.constant.int 4096
    %7634 = torch.prim.ListConstruct %7633, %int4096_5338 : (!torch.int, !torch.int) -> !torch.list<int>
    %7635 = torch.aten.view %7532, %7634 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7635, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7636 = torch.aten.mm %7635, %7614 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7636, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5339 = torch.constant.int 4
    %int1792_5340 = torch.constant.int 1792
    %7637 = torch.prim.ListConstruct %int4_5339, %2482, %int1792_5340 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7638 = torch.aten.view %7636, %7637 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5341 = torch.constant.int 4
    %7639 = torch.aten.mul.int %int4_5341, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5342 = torch.constant.int 4096
    %7640 = torch.prim.ListConstruct %7639, %int4096_5342 : (!torch.int, !torch.int) -> !torch.list<int>
    %7641 = torch.aten.view %7533, %7640 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7641, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7642 = torch.aten.mm %7641, %7616 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7642, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5343 = torch.constant.int 4
    %int1792_5344 = torch.constant.int 1792
    %7643 = torch.prim.ListConstruct %int4_5343, %2482, %int1792_5344 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7644 = torch.aten.view %7642, %7643 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5345 = torch.constant.int 4
    %7645 = torch.aten.mul.int %int4_5345, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5346 = torch.constant.int 4096
    %7646 = torch.prim.ListConstruct %7645, %int4096_5346 : (!torch.int, !torch.int) -> !torch.list<int>
    %7647 = torch.aten.view %7534, %7646 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7647, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7648 = torch.aten.mm %7647, %7618 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7648, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5347 = torch.constant.int 4
    %int1792_5348 = torch.constant.int 1792
    %7649 = torch.prim.ListConstruct %int4_5347, %2482, %int1792_5348 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7650 = torch.aten.view %7648, %7649 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5349 = torch.constant.int 4
    %7651 = torch.aten.mul.int %int4_5349, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5350 = torch.constant.int 4096
    %7652 = torch.prim.ListConstruct %7651, %int4096_5350 : (!torch.int, !torch.int) -> !torch.list<int>
    %7653 = torch.aten.view %7535, %7652 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7653, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7654 = torch.aten.mm %7653, %7620 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7654, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5351 = torch.constant.int 4
    %int1792_5352 = torch.constant.int 1792
    %7655 = torch.prim.ListConstruct %int4_5351, %2482, %int1792_5352 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7656 = torch.aten.view %7654, %7655 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5353 = torch.constant.int 4
    %7657 = torch.aten.mul.int %int4_5353, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5354 = torch.constant.int 4096
    %7658 = torch.prim.ListConstruct %7657, %int4096_5354 : (!torch.int, !torch.int) -> !torch.list<int>
    %7659 = torch.aten.view %7536, %7658 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7659, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7660 = torch.aten.mm %7659, %7622 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7660, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5355 = torch.constant.int 4
    %int1792_5356 = torch.constant.int 1792
    %7661 = torch.prim.ListConstruct %int4_5355, %2482, %int1792_5356 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7662 = torch.aten.view %7660, %7661 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5357 = torch.constant.int 4
    %7663 = torch.aten.mul.int %int4_5357, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5358 = torch.constant.int 4096
    %7664 = torch.prim.ListConstruct %7663, %int4096_5358 : (!torch.int, !torch.int) -> !torch.list<int>
    %7665 = torch.aten.view %7537, %7664 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7665, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7666 = torch.aten.mm %7665, %7624 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7666, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5359 = torch.constant.int 4
    %int1792_5360 = torch.constant.int 1792
    %7667 = torch.prim.ListConstruct %int4_5359, %2482, %int1792_5360 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7668 = torch.aten.view %7666, %7667 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_5361 = torch.constant.int 4
    %7669 = torch.aten.mul.int %int4_5361, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5362 = torch.constant.int 4096
    %7670 = torch.prim.ListConstruct %7669, %int4096_5362 : (!torch.int, !torch.int) -> !torch.list<int>
    %7671 = torch.aten.view %7538, %7670 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7671, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %7672 = torch.aten.mm %7671, %7626 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7672, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_5363 = torch.constant.int 4
    %int1792_5364 = torch.constant.int 1792
    %7673 = torch.prim.ListConstruct %int4_5363, %2482, %int1792_5364 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7674 = torch.aten.view %7672, %7673 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7675 = torch.aten.mul.Tensor %7603, %7632 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7676 = torch.aten.mul.Tensor %7604, %7638 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7677 = torch.aten.mul.Tensor %7605, %7644 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7678 = torch.aten.mul.Tensor %7606, %7650 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7679 = torch.aten.mul.Tensor %7607, %7656 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7680 = torch.aten.mul.Tensor %7608, %7662 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7681 = torch.aten.mul.Tensor %7609, %7668 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %7682 = torch.aten.mul.Tensor %7610, %7674 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %7682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_5365 = torch.constant.int 1
    %int0_5366 = torch.constant.int 0
    %7683 = torch.prim.ListConstruct %int1_5365, %int0_5366 : (!torch.int, !torch.int) -> !torch.list<int>
    %7684 = torch.aten.permute %216, %7683 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5367 = torch.constant.int 1
    %int0_5368 = torch.constant.int 0
    %7685 = torch.prim.ListConstruct %int1_5367, %int0_5368 : (!torch.int, !torch.int) -> !torch.list<int>
    %7686 = torch.aten.permute %217, %7685 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5369 = torch.constant.int 1
    %int0_5370 = torch.constant.int 0
    %7687 = torch.prim.ListConstruct %int1_5369, %int0_5370 : (!torch.int, !torch.int) -> !torch.list<int>
    %7688 = torch.aten.permute %218, %7687 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5371 = torch.constant.int 1
    %int0_5372 = torch.constant.int 0
    %7689 = torch.prim.ListConstruct %int1_5371, %int0_5372 : (!torch.int, !torch.int) -> !torch.list<int>
    %7690 = torch.aten.permute %219, %7689 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5373 = torch.constant.int 1
    %int0_5374 = torch.constant.int 0
    %7691 = torch.prim.ListConstruct %int1_5373, %int0_5374 : (!torch.int, !torch.int) -> !torch.list<int>
    %7692 = torch.aten.permute %220, %7691 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5375 = torch.constant.int 1
    %int0_5376 = torch.constant.int 0
    %7693 = torch.prim.ListConstruct %int1_5375, %int0_5376 : (!torch.int, !torch.int) -> !torch.list<int>
    %7694 = torch.aten.permute %221, %7693 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5377 = torch.constant.int 1
    %int0_5378 = torch.constant.int 0
    %7695 = torch.prim.ListConstruct %int1_5377, %int0_5378 : (!torch.int, !torch.int) -> !torch.list<int>
    %7696 = torch.aten.permute %222, %7695 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5379 = torch.constant.int 1
    %int0_5380 = torch.constant.int 0
    %7697 = torch.prim.ListConstruct %int1_5379, %int0_5380 : (!torch.int, !torch.int) -> !torch.list<int>
    %7698 = torch.aten.permute %223, %7697 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_5381 = torch.constant.int 1
    %7699 = torch.aten.size.int %7560, %int1_5381 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5382 = torch.constant.int 4
    %7700 = torch.aten.mul.int %int4_5382, %7699 : !torch.int, !torch.int -> !torch.int
    %int1792_5383 = torch.constant.int 1792
    %7701 = torch.prim.ListConstruct %7700, %int1792_5383 : (!torch.int, !torch.int) -> !torch.list<int>
    %7702 = torch.aten.view %7675, %7701 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7702, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7703 = torch.aten.mm %7702, %7684 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7703, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5384 = torch.constant.int 4
    %int4096_5385 = torch.constant.int 4096
    %7704 = torch.prim.ListConstruct %int4_5384, %7699, %int4096_5385 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7705 = torch.aten.view %7703, %7704 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5386 = torch.constant.int 1
    %7706 = torch.aten.size.int %7566, %int1_5386 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5387 = torch.constant.int 4
    %7707 = torch.aten.mul.int %int4_5387, %7706 : !torch.int, !torch.int -> !torch.int
    %int1792_5388 = torch.constant.int 1792
    %7708 = torch.prim.ListConstruct %7707, %int1792_5388 : (!torch.int, !torch.int) -> !torch.list<int>
    %7709 = torch.aten.view %7676, %7708 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7709, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7710 = torch.aten.mm %7709, %7686 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7710, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5389 = torch.constant.int 4
    %int4096_5390 = torch.constant.int 4096
    %7711 = torch.prim.ListConstruct %int4_5389, %7706, %int4096_5390 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7712 = torch.aten.view %7710, %7711 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5391 = torch.constant.int 1
    %7713 = torch.aten.size.int %7572, %int1_5391 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5392 = torch.constant.int 4
    %7714 = torch.aten.mul.int %int4_5392, %7713 : !torch.int, !torch.int -> !torch.int
    %int1792_5393 = torch.constant.int 1792
    %7715 = torch.prim.ListConstruct %7714, %int1792_5393 : (!torch.int, !torch.int) -> !torch.list<int>
    %7716 = torch.aten.view %7677, %7715 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7716, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7717 = torch.aten.mm %7716, %7688 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7717, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5394 = torch.constant.int 4
    %int4096_5395 = torch.constant.int 4096
    %7718 = torch.prim.ListConstruct %int4_5394, %7713, %int4096_5395 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7719 = torch.aten.view %7717, %7718 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5396 = torch.constant.int 1
    %7720 = torch.aten.size.int %7578, %int1_5396 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5397 = torch.constant.int 4
    %7721 = torch.aten.mul.int %int4_5397, %7720 : !torch.int, !torch.int -> !torch.int
    %int1792_5398 = torch.constant.int 1792
    %7722 = torch.prim.ListConstruct %7721, %int1792_5398 : (!torch.int, !torch.int) -> !torch.list<int>
    %7723 = torch.aten.view %7678, %7722 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7723, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7724 = torch.aten.mm %7723, %7690 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7724, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5399 = torch.constant.int 4
    %int4096_5400 = torch.constant.int 4096
    %7725 = torch.prim.ListConstruct %int4_5399, %7720, %int4096_5400 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7726 = torch.aten.view %7724, %7725 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5401 = torch.constant.int 1
    %7727 = torch.aten.size.int %7584, %int1_5401 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5402 = torch.constant.int 4
    %7728 = torch.aten.mul.int %int4_5402, %7727 : !torch.int, !torch.int -> !torch.int
    %int1792_5403 = torch.constant.int 1792
    %7729 = torch.prim.ListConstruct %7728, %int1792_5403 : (!torch.int, !torch.int) -> !torch.list<int>
    %7730 = torch.aten.view %7679, %7729 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7730, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7731 = torch.aten.mm %7730, %7692 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7731, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5404 = torch.constant.int 4
    %int4096_5405 = torch.constant.int 4096
    %7732 = torch.prim.ListConstruct %int4_5404, %7727, %int4096_5405 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7733 = torch.aten.view %7731, %7732 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5406 = torch.constant.int 1
    %7734 = torch.aten.size.int %7590, %int1_5406 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5407 = torch.constant.int 4
    %7735 = torch.aten.mul.int %int4_5407, %7734 : !torch.int, !torch.int -> !torch.int
    %int1792_5408 = torch.constant.int 1792
    %7736 = torch.prim.ListConstruct %7735, %int1792_5408 : (!torch.int, !torch.int) -> !torch.list<int>
    %7737 = torch.aten.view %7680, %7736 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7737, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7738 = torch.aten.mm %7737, %7694 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7738, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5409 = torch.constant.int 4
    %int4096_5410 = torch.constant.int 4096
    %7739 = torch.prim.ListConstruct %int4_5409, %7734, %int4096_5410 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7740 = torch.aten.view %7738, %7739 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5411 = torch.constant.int 1
    %7741 = torch.aten.size.int %7596, %int1_5411 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5412 = torch.constant.int 4
    %7742 = torch.aten.mul.int %int4_5412, %7741 : !torch.int, !torch.int -> !torch.int
    %int1792_5413 = torch.constant.int 1792
    %7743 = torch.prim.ListConstruct %7742, %int1792_5413 : (!torch.int, !torch.int) -> !torch.list<int>
    %7744 = torch.aten.view %7681, %7743 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7744, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7745 = torch.aten.mm %7744, %7696 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7745, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5414 = torch.constant.int 4
    %int4096_5415 = torch.constant.int 4096
    %7746 = torch.prim.ListConstruct %int4_5414, %7741, %int4096_5415 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7747 = torch.aten.view %7745, %7746 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5416 = torch.constant.int 1
    %7748 = torch.aten.size.int %7602, %int1_5416 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_5417 = torch.constant.int 4
    %7749 = torch.aten.mul.int %int4_5417, %7748 : !torch.int, !torch.int -> !torch.int
    %int1792_5418 = torch.constant.int 1792
    %7750 = torch.prim.ListConstruct %7749, %int1792_5418 : (!torch.int, !torch.int) -> !torch.list<int>
    %7751 = torch.aten.view %7682, %7750 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %7751, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %7752 = torch.aten.mm %7751, %7698 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %7752, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_5419 = torch.constant.int 4
    %int4096_5420 = torch.constant.int 4096
    %7753 = torch.prim.ListConstruct %int4_5419, %7748, %int4096_5420 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %7754 = torch.aten.view %7752, %7753 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7755 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5421 = arith.constant 1 : index
    %dim_5422 = tensor.dim %7755, %c1_5421 : tensor<4x?x4096xf16>
    %7756 = flow.tensor.transfer %7755 : tensor<4x?x4096xf16>{%dim_5422} to #hal.device.promise<@__device_0>
    %7757 = torch_c.from_builtin_tensor %7756 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7758 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5423 = arith.constant 1 : index
    %dim_5424 = tensor.dim %7758, %c1_5423 : tensor<4x?x4096xf16>
    %7759 = flow.tensor.transfer %7758 : tensor<4x?x4096xf16>{%dim_5424} to #hal.device.promise<@__device_0>
    %7760 = torch_c.from_builtin_tensor %7759 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7761 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5425 = arith.constant 1 : index
    %dim_5426 = tensor.dim %7761, %c1_5425 : tensor<4x?x4096xf16>
    %7762 = flow.tensor.transfer %7761 : tensor<4x?x4096xf16>{%dim_5426} to #hal.device.promise<@__device_0>
    %7763 = torch_c.from_builtin_tensor %7762 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7764 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5427 = arith.constant 1 : index
    %dim_5428 = tensor.dim %7764, %c1_5427 : tensor<4x?x4096xf16>
    %7765 = flow.tensor.transfer %7764 : tensor<4x?x4096xf16>{%dim_5428} to #hal.device.promise<@__device_0>
    %7766 = torch_c.from_builtin_tensor %7765 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7767 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5429 = arith.constant 1 : index
    %dim_5430 = tensor.dim %7767, %c1_5429 : tensor<4x?x4096xf16>
    %7768 = flow.tensor.transfer %7767 : tensor<4x?x4096xf16>{%dim_5430} to #hal.device.promise<@__device_0>
    %7769 = torch_c.from_builtin_tensor %7768 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7770 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5431 = arith.constant 1 : index
    %dim_5432 = tensor.dim %7770, %c1_5431 : tensor<4x?x4096xf16>
    %7771 = flow.tensor.transfer %7770 : tensor<4x?x4096xf16>{%dim_5432} to #hal.device.promise<@__device_0>
    %7772 = torch_c.from_builtin_tensor %7771 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7773 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5433 = arith.constant 1 : index
    %dim_5434 = tensor.dim %7773, %c1_5433 : tensor<4x?x4096xf16>
    %7774 = flow.tensor.transfer %7773 : tensor<4x?x4096xf16>{%dim_5434} to #hal.device.promise<@__device_0>
    %7775 = torch_c.from_builtin_tensor %7774 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5435 = torch.constant.int 1
    %7776 = torch.aten.add.Tensor %7705, %7757, %int1_5435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5436 = torch.constant.int 1
    %7777 = torch.aten.add.Tensor %7776, %7760, %int1_5436 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5437 = torch.constant.int 1
    %7778 = torch.aten.add.Tensor %7777, %7763, %int1_5437 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5438 = torch.constant.int 1
    %7779 = torch.aten.add.Tensor %7778, %7766, %int1_5438 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5439 = torch.constant.int 1
    %7780 = torch.aten.add.Tensor %7779, %7769, %int1_5439 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5440 = torch.constant.int 1
    %7781 = torch.aten.add.Tensor %7780, %7772, %int1_5440 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5441 = torch.constant.int 1
    %7782 = torch.aten.add.Tensor %7781, %7775, %int1_5441 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7783 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5442 = arith.constant 1 : index
    %dim_5443 = tensor.dim %7783, %c1_5442 : tensor<4x?x4096xf16>
    %7784 = flow.tensor.transfer %7783 : tensor<4x?x4096xf16>{%dim_5443} to #hal.device.promise<@__device_1>
    %7785 = torch_c.from_builtin_tensor %7784 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7786 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5444 = arith.constant 1 : index
    %dim_5445 = tensor.dim %7786, %c1_5444 : tensor<4x?x4096xf16>
    %7787 = flow.tensor.transfer %7786 : tensor<4x?x4096xf16>{%dim_5445} to #hal.device.promise<@__device_1>
    %7788 = torch_c.from_builtin_tensor %7787 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7789 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5446 = arith.constant 1 : index
    %dim_5447 = tensor.dim %7789, %c1_5446 : tensor<4x?x4096xf16>
    %7790 = flow.tensor.transfer %7789 : tensor<4x?x4096xf16>{%dim_5447} to #hal.device.promise<@__device_1>
    %7791 = torch_c.from_builtin_tensor %7790 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7792 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5448 = arith.constant 1 : index
    %dim_5449 = tensor.dim %7792, %c1_5448 : tensor<4x?x4096xf16>
    %7793 = flow.tensor.transfer %7792 : tensor<4x?x4096xf16>{%dim_5449} to #hal.device.promise<@__device_1>
    %7794 = torch_c.from_builtin_tensor %7793 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7795 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5450 = arith.constant 1 : index
    %dim_5451 = tensor.dim %7795, %c1_5450 : tensor<4x?x4096xf16>
    %7796 = flow.tensor.transfer %7795 : tensor<4x?x4096xf16>{%dim_5451} to #hal.device.promise<@__device_1>
    %7797 = torch_c.from_builtin_tensor %7796 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7798 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5452 = arith.constant 1 : index
    %dim_5453 = tensor.dim %7798, %c1_5452 : tensor<4x?x4096xf16>
    %7799 = flow.tensor.transfer %7798 : tensor<4x?x4096xf16>{%dim_5453} to #hal.device.promise<@__device_1>
    %7800 = torch_c.from_builtin_tensor %7799 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7801 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5454 = arith.constant 1 : index
    %dim_5455 = tensor.dim %7801, %c1_5454 : tensor<4x?x4096xf16>
    %7802 = flow.tensor.transfer %7801 : tensor<4x?x4096xf16>{%dim_5455} to #hal.device.promise<@__device_1>
    %7803 = torch_c.from_builtin_tensor %7802 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5456 = torch.constant.int 1
    %7804 = torch.aten.add.Tensor %7785, %7712, %int1_5456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5457 = torch.constant.int 1
    %7805 = torch.aten.add.Tensor %7804, %7788, %int1_5457 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5458 = torch.constant.int 1
    %7806 = torch.aten.add.Tensor %7805, %7791, %int1_5458 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5459 = torch.constant.int 1
    %7807 = torch.aten.add.Tensor %7806, %7794, %int1_5459 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5460 = torch.constant.int 1
    %7808 = torch.aten.add.Tensor %7807, %7797, %int1_5460 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5461 = torch.constant.int 1
    %7809 = torch.aten.add.Tensor %7808, %7800, %int1_5461 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5462 = torch.constant.int 1
    %7810 = torch.aten.add.Tensor %7809, %7803, %int1_5462 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7811 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5463 = arith.constant 1 : index
    %dim_5464 = tensor.dim %7811, %c1_5463 : tensor<4x?x4096xf16>
    %7812 = flow.tensor.transfer %7811 : tensor<4x?x4096xf16>{%dim_5464} to #hal.device.promise<@__device_2>
    %7813 = torch_c.from_builtin_tensor %7812 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7814 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5465 = arith.constant 1 : index
    %dim_5466 = tensor.dim %7814, %c1_5465 : tensor<4x?x4096xf16>
    %7815 = flow.tensor.transfer %7814 : tensor<4x?x4096xf16>{%dim_5466} to #hal.device.promise<@__device_2>
    %7816 = torch_c.from_builtin_tensor %7815 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7817 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5467 = arith.constant 1 : index
    %dim_5468 = tensor.dim %7817, %c1_5467 : tensor<4x?x4096xf16>
    %7818 = flow.tensor.transfer %7817 : tensor<4x?x4096xf16>{%dim_5468} to #hal.device.promise<@__device_2>
    %7819 = torch_c.from_builtin_tensor %7818 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7820 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5469 = arith.constant 1 : index
    %dim_5470 = tensor.dim %7820, %c1_5469 : tensor<4x?x4096xf16>
    %7821 = flow.tensor.transfer %7820 : tensor<4x?x4096xf16>{%dim_5470} to #hal.device.promise<@__device_2>
    %7822 = torch_c.from_builtin_tensor %7821 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7823 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5471 = arith.constant 1 : index
    %dim_5472 = tensor.dim %7823, %c1_5471 : tensor<4x?x4096xf16>
    %7824 = flow.tensor.transfer %7823 : tensor<4x?x4096xf16>{%dim_5472} to #hal.device.promise<@__device_2>
    %7825 = torch_c.from_builtin_tensor %7824 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7826 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5473 = arith.constant 1 : index
    %dim_5474 = tensor.dim %7826, %c1_5473 : tensor<4x?x4096xf16>
    %7827 = flow.tensor.transfer %7826 : tensor<4x?x4096xf16>{%dim_5474} to #hal.device.promise<@__device_2>
    %7828 = torch_c.from_builtin_tensor %7827 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7829 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5475 = arith.constant 1 : index
    %dim_5476 = tensor.dim %7829, %c1_5475 : tensor<4x?x4096xf16>
    %7830 = flow.tensor.transfer %7829 : tensor<4x?x4096xf16>{%dim_5476} to #hal.device.promise<@__device_2>
    %7831 = torch_c.from_builtin_tensor %7830 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5477 = torch.constant.int 1
    %7832 = torch.aten.add.Tensor %7813, %7816, %int1_5477 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5478 = torch.constant.int 1
    %7833 = torch.aten.add.Tensor %7832, %7719, %int1_5478 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5479 = torch.constant.int 1
    %7834 = torch.aten.add.Tensor %7833, %7819, %int1_5479 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5480 = torch.constant.int 1
    %7835 = torch.aten.add.Tensor %7834, %7822, %int1_5480 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5481 = torch.constant.int 1
    %7836 = torch.aten.add.Tensor %7835, %7825, %int1_5481 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5482 = torch.constant.int 1
    %7837 = torch.aten.add.Tensor %7836, %7828, %int1_5482 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5483 = torch.constant.int 1
    %7838 = torch.aten.add.Tensor %7837, %7831, %int1_5483 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7839 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5484 = arith.constant 1 : index
    %dim_5485 = tensor.dim %7839, %c1_5484 : tensor<4x?x4096xf16>
    %7840 = flow.tensor.transfer %7839 : tensor<4x?x4096xf16>{%dim_5485} to #hal.device.promise<@__device_3>
    %7841 = torch_c.from_builtin_tensor %7840 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7842 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5486 = arith.constant 1 : index
    %dim_5487 = tensor.dim %7842, %c1_5486 : tensor<4x?x4096xf16>
    %7843 = flow.tensor.transfer %7842 : tensor<4x?x4096xf16>{%dim_5487} to #hal.device.promise<@__device_3>
    %7844 = torch_c.from_builtin_tensor %7843 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7845 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5488 = arith.constant 1 : index
    %dim_5489 = tensor.dim %7845, %c1_5488 : tensor<4x?x4096xf16>
    %7846 = flow.tensor.transfer %7845 : tensor<4x?x4096xf16>{%dim_5489} to #hal.device.promise<@__device_3>
    %7847 = torch_c.from_builtin_tensor %7846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7848 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5490 = arith.constant 1 : index
    %dim_5491 = tensor.dim %7848, %c1_5490 : tensor<4x?x4096xf16>
    %7849 = flow.tensor.transfer %7848 : tensor<4x?x4096xf16>{%dim_5491} to #hal.device.promise<@__device_3>
    %7850 = torch_c.from_builtin_tensor %7849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7851 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5492 = arith.constant 1 : index
    %dim_5493 = tensor.dim %7851, %c1_5492 : tensor<4x?x4096xf16>
    %7852 = flow.tensor.transfer %7851 : tensor<4x?x4096xf16>{%dim_5493} to #hal.device.promise<@__device_3>
    %7853 = torch_c.from_builtin_tensor %7852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7854 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5494 = arith.constant 1 : index
    %dim_5495 = tensor.dim %7854, %c1_5494 : tensor<4x?x4096xf16>
    %7855 = flow.tensor.transfer %7854 : tensor<4x?x4096xf16>{%dim_5495} to #hal.device.promise<@__device_3>
    %7856 = torch_c.from_builtin_tensor %7855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7857 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5496 = arith.constant 1 : index
    %dim_5497 = tensor.dim %7857, %c1_5496 : tensor<4x?x4096xf16>
    %7858 = flow.tensor.transfer %7857 : tensor<4x?x4096xf16>{%dim_5497} to #hal.device.promise<@__device_3>
    %7859 = torch_c.from_builtin_tensor %7858 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5498 = torch.constant.int 1
    %7860 = torch.aten.add.Tensor %7841, %7844, %int1_5498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5499 = torch.constant.int 1
    %7861 = torch.aten.add.Tensor %7860, %7847, %int1_5499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5500 = torch.constant.int 1
    %7862 = torch.aten.add.Tensor %7861, %7726, %int1_5500 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5501 = torch.constant.int 1
    %7863 = torch.aten.add.Tensor %7862, %7850, %int1_5501 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5502 = torch.constant.int 1
    %7864 = torch.aten.add.Tensor %7863, %7853, %int1_5502 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5503 = torch.constant.int 1
    %7865 = torch.aten.add.Tensor %7864, %7856, %int1_5503 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5504 = torch.constant.int 1
    %7866 = torch.aten.add.Tensor %7865, %7859, %int1_5504 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7867 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5505 = arith.constant 1 : index
    %dim_5506 = tensor.dim %7867, %c1_5505 : tensor<4x?x4096xf16>
    %7868 = flow.tensor.transfer %7867 : tensor<4x?x4096xf16>{%dim_5506} to #hal.device.promise<@__device_4>
    %7869 = torch_c.from_builtin_tensor %7868 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7870 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5507 = arith.constant 1 : index
    %dim_5508 = tensor.dim %7870, %c1_5507 : tensor<4x?x4096xf16>
    %7871 = flow.tensor.transfer %7870 : tensor<4x?x4096xf16>{%dim_5508} to #hal.device.promise<@__device_4>
    %7872 = torch_c.from_builtin_tensor %7871 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7873 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5509 = arith.constant 1 : index
    %dim_5510 = tensor.dim %7873, %c1_5509 : tensor<4x?x4096xf16>
    %7874 = flow.tensor.transfer %7873 : tensor<4x?x4096xf16>{%dim_5510} to #hal.device.promise<@__device_4>
    %7875 = torch_c.from_builtin_tensor %7874 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7876 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5511 = arith.constant 1 : index
    %dim_5512 = tensor.dim %7876, %c1_5511 : tensor<4x?x4096xf16>
    %7877 = flow.tensor.transfer %7876 : tensor<4x?x4096xf16>{%dim_5512} to #hal.device.promise<@__device_4>
    %7878 = torch_c.from_builtin_tensor %7877 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7879 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5513 = arith.constant 1 : index
    %dim_5514 = tensor.dim %7879, %c1_5513 : tensor<4x?x4096xf16>
    %7880 = flow.tensor.transfer %7879 : tensor<4x?x4096xf16>{%dim_5514} to #hal.device.promise<@__device_4>
    %7881 = torch_c.from_builtin_tensor %7880 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7882 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5515 = arith.constant 1 : index
    %dim_5516 = tensor.dim %7882, %c1_5515 : tensor<4x?x4096xf16>
    %7883 = flow.tensor.transfer %7882 : tensor<4x?x4096xf16>{%dim_5516} to #hal.device.promise<@__device_4>
    %7884 = torch_c.from_builtin_tensor %7883 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7885 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5517 = arith.constant 1 : index
    %dim_5518 = tensor.dim %7885, %c1_5517 : tensor<4x?x4096xf16>
    %7886 = flow.tensor.transfer %7885 : tensor<4x?x4096xf16>{%dim_5518} to #hal.device.promise<@__device_4>
    %7887 = torch_c.from_builtin_tensor %7886 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5519 = torch.constant.int 1
    %7888 = torch.aten.add.Tensor %7869, %7872, %int1_5519 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5520 = torch.constant.int 1
    %7889 = torch.aten.add.Tensor %7888, %7875, %int1_5520 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5521 = torch.constant.int 1
    %7890 = torch.aten.add.Tensor %7889, %7878, %int1_5521 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5522 = torch.constant.int 1
    %7891 = torch.aten.add.Tensor %7890, %7733, %int1_5522 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5523 = torch.constant.int 1
    %7892 = torch.aten.add.Tensor %7891, %7881, %int1_5523 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5524 = torch.constant.int 1
    %7893 = torch.aten.add.Tensor %7892, %7884, %int1_5524 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5525 = torch.constant.int 1
    %7894 = torch.aten.add.Tensor %7893, %7887, %int1_5525 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7895 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5526 = arith.constant 1 : index
    %dim_5527 = tensor.dim %7895, %c1_5526 : tensor<4x?x4096xf16>
    %7896 = flow.tensor.transfer %7895 : tensor<4x?x4096xf16>{%dim_5527} to #hal.device.promise<@__device_5>
    %7897 = torch_c.from_builtin_tensor %7896 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7898 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5528 = arith.constant 1 : index
    %dim_5529 = tensor.dim %7898, %c1_5528 : tensor<4x?x4096xf16>
    %7899 = flow.tensor.transfer %7898 : tensor<4x?x4096xf16>{%dim_5529} to #hal.device.promise<@__device_5>
    %7900 = torch_c.from_builtin_tensor %7899 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7901 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5530 = arith.constant 1 : index
    %dim_5531 = tensor.dim %7901, %c1_5530 : tensor<4x?x4096xf16>
    %7902 = flow.tensor.transfer %7901 : tensor<4x?x4096xf16>{%dim_5531} to #hal.device.promise<@__device_5>
    %7903 = torch_c.from_builtin_tensor %7902 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7904 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5532 = arith.constant 1 : index
    %dim_5533 = tensor.dim %7904, %c1_5532 : tensor<4x?x4096xf16>
    %7905 = flow.tensor.transfer %7904 : tensor<4x?x4096xf16>{%dim_5533} to #hal.device.promise<@__device_5>
    %7906 = torch_c.from_builtin_tensor %7905 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7907 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5534 = arith.constant 1 : index
    %dim_5535 = tensor.dim %7907, %c1_5534 : tensor<4x?x4096xf16>
    %7908 = flow.tensor.transfer %7907 : tensor<4x?x4096xf16>{%dim_5535} to #hal.device.promise<@__device_5>
    %7909 = torch_c.from_builtin_tensor %7908 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7910 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5536 = arith.constant 1 : index
    %dim_5537 = tensor.dim %7910, %c1_5536 : tensor<4x?x4096xf16>
    %7911 = flow.tensor.transfer %7910 : tensor<4x?x4096xf16>{%dim_5537} to #hal.device.promise<@__device_5>
    %7912 = torch_c.from_builtin_tensor %7911 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7913 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5538 = arith.constant 1 : index
    %dim_5539 = tensor.dim %7913, %c1_5538 : tensor<4x?x4096xf16>
    %7914 = flow.tensor.transfer %7913 : tensor<4x?x4096xf16>{%dim_5539} to #hal.device.promise<@__device_5>
    %7915 = torch_c.from_builtin_tensor %7914 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5540 = torch.constant.int 1
    %7916 = torch.aten.add.Tensor %7897, %7900, %int1_5540 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5541 = torch.constant.int 1
    %7917 = torch.aten.add.Tensor %7916, %7903, %int1_5541 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5542 = torch.constant.int 1
    %7918 = torch.aten.add.Tensor %7917, %7906, %int1_5542 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5543 = torch.constant.int 1
    %7919 = torch.aten.add.Tensor %7918, %7909, %int1_5543 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5544 = torch.constant.int 1
    %7920 = torch.aten.add.Tensor %7919, %7740, %int1_5544 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5545 = torch.constant.int 1
    %7921 = torch.aten.add.Tensor %7920, %7912, %int1_5545 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5546 = torch.constant.int 1
    %7922 = torch.aten.add.Tensor %7921, %7915, %int1_5546 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7923 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5547 = arith.constant 1 : index
    %dim_5548 = tensor.dim %7923, %c1_5547 : tensor<4x?x4096xf16>
    %7924 = flow.tensor.transfer %7923 : tensor<4x?x4096xf16>{%dim_5548} to #hal.device.promise<@__device_6>
    %7925 = torch_c.from_builtin_tensor %7924 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7926 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5549 = arith.constant 1 : index
    %dim_5550 = tensor.dim %7926, %c1_5549 : tensor<4x?x4096xf16>
    %7927 = flow.tensor.transfer %7926 : tensor<4x?x4096xf16>{%dim_5550} to #hal.device.promise<@__device_6>
    %7928 = torch_c.from_builtin_tensor %7927 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7929 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5551 = arith.constant 1 : index
    %dim_5552 = tensor.dim %7929, %c1_5551 : tensor<4x?x4096xf16>
    %7930 = flow.tensor.transfer %7929 : tensor<4x?x4096xf16>{%dim_5552} to #hal.device.promise<@__device_6>
    %7931 = torch_c.from_builtin_tensor %7930 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7932 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5553 = arith.constant 1 : index
    %dim_5554 = tensor.dim %7932, %c1_5553 : tensor<4x?x4096xf16>
    %7933 = flow.tensor.transfer %7932 : tensor<4x?x4096xf16>{%dim_5554} to #hal.device.promise<@__device_6>
    %7934 = torch_c.from_builtin_tensor %7933 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7935 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5555 = arith.constant 1 : index
    %dim_5556 = tensor.dim %7935, %c1_5555 : tensor<4x?x4096xf16>
    %7936 = flow.tensor.transfer %7935 : tensor<4x?x4096xf16>{%dim_5556} to #hal.device.promise<@__device_6>
    %7937 = torch_c.from_builtin_tensor %7936 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7938 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5557 = arith.constant 1 : index
    %dim_5558 = tensor.dim %7938, %c1_5557 : tensor<4x?x4096xf16>
    %7939 = flow.tensor.transfer %7938 : tensor<4x?x4096xf16>{%dim_5558} to #hal.device.promise<@__device_6>
    %7940 = torch_c.from_builtin_tensor %7939 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7941 = torch_c.to_builtin_tensor %7754 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5559 = arith.constant 1 : index
    %dim_5560 = tensor.dim %7941, %c1_5559 : tensor<4x?x4096xf16>
    %7942 = flow.tensor.transfer %7941 : tensor<4x?x4096xf16>{%dim_5560} to #hal.device.promise<@__device_6>
    %7943 = torch_c.from_builtin_tensor %7942 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5561 = torch.constant.int 1
    %7944 = torch.aten.add.Tensor %7925, %7928, %int1_5561 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5562 = torch.constant.int 1
    %7945 = torch.aten.add.Tensor %7944, %7931, %int1_5562 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5563 = torch.constant.int 1
    %7946 = torch.aten.add.Tensor %7945, %7934, %int1_5563 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5564 = torch.constant.int 1
    %7947 = torch.aten.add.Tensor %7946, %7937, %int1_5564 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5565 = torch.constant.int 1
    %7948 = torch.aten.add.Tensor %7947, %7940, %int1_5565 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5566 = torch.constant.int 1
    %7949 = torch.aten.add.Tensor %7948, %7747, %int1_5566 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5567 = torch.constant.int 1
    %7950 = torch.aten.add.Tensor %7949, %7943, %int1_5567 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7951 = torch_c.to_builtin_tensor %7705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5568 = arith.constant 1 : index
    %dim_5569 = tensor.dim %7951, %c1_5568 : tensor<4x?x4096xf16>
    %7952 = flow.tensor.transfer %7951 : tensor<4x?x4096xf16>{%dim_5569} to #hal.device.promise<@__device_7>
    %7953 = torch_c.from_builtin_tensor %7952 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7954 = torch_c.to_builtin_tensor %7712 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5570 = arith.constant 1 : index
    %dim_5571 = tensor.dim %7954, %c1_5570 : tensor<4x?x4096xf16>
    %7955 = flow.tensor.transfer %7954 : tensor<4x?x4096xf16>{%dim_5571} to #hal.device.promise<@__device_7>
    %7956 = torch_c.from_builtin_tensor %7955 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7957 = torch_c.to_builtin_tensor %7719 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5572 = arith.constant 1 : index
    %dim_5573 = tensor.dim %7957, %c1_5572 : tensor<4x?x4096xf16>
    %7958 = flow.tensor.transfer %7957 : tensor<4x?x4096xf16>{%dim_5573} to #hal.device.promise<@__device_7>
    %7959 = torch_c.from_builtin_tensor %7958 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7960 = torch_c.to_builtin_tensor %7726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5574 = arith.constant 1 : index
    %dim_5575 = tensor.dim %7960, %c1_5574 : tensor<4x?x4096xf16>
    %7961 = flow.tensor.transfer %7960 : tensor<4x?x4096xf16>{%dim_5575} to #hal.device.promise<@__device_7>
    %7962 = torch_c.from_builtin_tensor %7961 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7963 = torch_c.to_builtin_tensor %7733 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5576 = arith.constant 1 : index
    %dim_5577 = tensor.dim %7963, %c1_5576 : tensor<4x?x4096xf16>
    %7964 = flow.tensor.transfer %7963 : tensor<4x?x4096xf16>{%dim_5577} to #hal.device.promise<@__device_7>
    %7965 = torch_c.from_builtin_tensor %7964 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7966 = torch_c.to_builtin_tensor %7740 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5578 = arith.constant 1 : index
    %dim_5579 = tensor.dim %7966, %c1_5578 : tensor<4x?x4096xf16>
    %7967 = flow.tensor.transfer %7966 : tensor<4x?x4096xf16>{%dim_5579} to #hal.device.promise<@__device_7>
    %7968 = torch_c.from_builtin_tensor %7967 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %7969 = torch_c.to_builtin_tensor %7747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_5580 = arith.constant 1 : index
    %dim_5581 = tensor.dim %7969, %c1_5580 : tensor<4x?x4096xf16>
    %7970 = flow.tensor.transfer %7969 : tensor<4x?x4096xf16>{%dim_5581} to #hal.device.promise<@__device_7>
    %7971 = torch_c.from_builtin_tensor %7970 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5582 = torch.constant.int 1
    %7972 = torch.aten.add.Tensor %7953, %7956, %int1_5582 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5583 = torch.constant.int 1
    %7973 = torch.aten.add.Tensor %7972, %7959, %int1_5583 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5584 = torch.constant.int 1
    %7974 = torch.aten.add.Tensor %7973, %7962, %int1_5584 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5585 = torch.constant.int 1
    %7975 = torch.aten.add.Tensor %7974, %7965, %int1_5585 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5586 = torch.constant.int 1
    %7976 = torch.aten.add.Tensor %7975, %7968, %int1_5586 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5587 = torch.constant.int 1
    %7977 = torch.aten.add.Tensor %7976, %7971, %int1_5587 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5588 = torch.constant.int 1
    %7978 = torch.aten.add.Tensor %7977, %7754, %int1_5588 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5589 = torch.constant.int 1
    %7979 = torch.aten.add.Tensor %7459, %7782, %int1_5589 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5590 = torch.constant.int 1
    %7980 = torch.aten.add.Tensor %7460, %7810, %int1_5590 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5591 = torch.constant.int 1
    %7981 = torch.aten.add.Tensor %7461, %7838, %int1_5591 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5592 = torch.constant.int 1
    %7982 = torch.aten.add.Tensor %7462, %7866, %int1_5592 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5593 = torch.constant.int 1
    %7983 = torch.aten.add.Tensor %7463, %7894, %int1_5593 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5594 = torch.constant.int 1
    %7984 = torch.aten.add.Tensor %7464, %7922, %int1_5594 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5595 = torch.constant.int 1
    %7985 = torch.aten.add.Tensor %7465, %7950, %int1_5595 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5596 = torch.constant.int 1
    %7986 = torch.aten.add.Tensor %7466, %7978, %int1_5596 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %7986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_5597 = torch.constant.int 6
    %7987 = torch.prims.convert_element_type %7979, %int6_5597 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5598 = torch.constant.int 6
    %7988 = torch.prims.convert_element_type %7980, %int6_5598 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5599 = torch.constant.int 6
    %7989 = torch.prims.convert_element_type %7981, %int6_5599 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5600 = torch.constant.int 6
    %7990 = torch.prims.convert_element_type %7982, %int6_5600 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5601 = torch.constant.int 6
    %7991 = torch.prims.convert_element_type %7983, %int6_5601 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5602 = torch.constant.int 6
    %7992 = torch.prims.convert_element_type %7984, %int6_5602 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5603 = torch.constant.int 6
    %7993 = torch.prims.convert_element_type %7985, %int6_5603 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_5604 = torch.constant.int 6
    %7994 = torch.prims.convert_element_type %7986, %int6_5604 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5605 = torch.constant.int 2
    %7995 = torch.aten.pow.Tensor_Scalar %7987, %int2_5605 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5606 = torch.constant.int 2
    %7996 = torch.aten.pow.Tensor_Scalar %7988, %int2_5606 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5607 = torch.constant.int 2
    %7997 = torch.aten.pow.Tensor_Scalar %7989, %int2_5607 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5608 = torch.constant.int 2
    %7998 = torch.aten.pow.Tensor_Scalar %7990, %int2_5608 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5609 = torch.constant.int 2
    %7999 = torch.aten.pow.Tensor_Scalar %7991, %int2_5609 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %7999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5610 = torch.constant.int 2
    %8000 = torch.aten.pow.Tensor_Scalar %7992, %int2_5610 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5611 = torch.constant.int 2
    %8001 = torch.aten.pow.Tensor_Scalar %7993, %int2_5611 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_5612 = torch.constant.int 2
    %8002 = torch.aten.pow.Tensor_Scalar %7994, %int2_5612 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_5613 = torch.constant.int -1
    %8003 = torch.prim.ListConstruct %int-1_5613 : (!torch.int) -> !torch.list<int>
    %true_5614 = torch.constant.bool true
    %none_5615 = torch.constant.none
    %8004 = torch.aten.mean.dim %7995, %8003, %true_5614, %none_5615 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5616 = torch.constant.int -1
    %8005 = torch.prim.ListConstruct %int-1_5616 : (!torch.int) -> !torch.list<int>
    %true_5617 = torch.constant.bool true
    %none_5618 = torch.constant.none
    %8006 = torch.aten.mean.dim %7996, %8005, %true_5617, %none_5618 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5619 = torch.constant.int -1
    %8007 = torch.prim.ListConstruct %int-1_5619 : (!torch.int) -> !torch.list<int>
    %true_5620 = torch.constant.bool true
    %none_5621 = torch.constant.none
    %8008 = torch.aten.mean.dim %7997, %8007, %true_5620, %none_5621 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5622 = torch.constant.int -1
    %8009 = torch.prim.ListConstruct %int-1_5622 : (!torch.int) -> !torch.list<int>
    %true_5623 = torch.constant.bool true
    %none_5624 = torch.constant.none
    %8010 = torch.aten.mean.dim %7998, %8009, %true_5623, %none_5624 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5625 = torch.constant.int -1
    %8011 = torch.prim.ListConstruct %int-1_5625 : (!torch.int) -> !torch.list<int>
    %true_5626 = torch.constant.bool true
    %none_5627 = torch.constant.none
    %8012 = torch.aten.mean.dim %7999, %8011, %true_5626, %none_5627 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5628 = torch.constant.int -1
    %8013 = torch.prim.ListConstruct %int-1_5628 : (!torch.int) -> !torch.list<int>
    %true_5629 = torch.constant.bool true
    %none_5630 = torch.constant.none
    %8014 = torch.aten.mean.dim %8000, %8013, %true_5629, %none_5630 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5631 = torch.constant.int -1
    %8015 = torch.prim.ListConstruct %int-1_5631 : (!torch.int) -> !torch.list<int>
    %true_5632 = torch.constant.bool true
    %none_5633 = torch.constant.none
    %8016 = torch.aten.mean.dim %8001, %8015, %true_5632, %none_5633 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_5634 = torch.constant.int -1
    %8017 = torch.prim.ListConstruct %int-1_5634 : (!torch.int) -> !torch.list<int>
    %true_5635 = torch.constant.bool true
    %none_5636 = torch.constant.none
    %8018 = torch.aten.mean.dim %8002, %8017, %true_5635, %none_5636 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5637 = torch.constant.float 9.9999997473787516E-6
    %int1_5638 = torch.constant.int 1
    %8019 = torch.aten.add.Scalar %8004, %float9.999990e-06_5637, %int1_5638 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5639 = torch.constant.float 9.9999997473787516E-6
    %int1_5640 = torch.constant.int 1
    %8020 = torch.aten.add.Scalar %8006, %float9.999990e-06_5639, %int1_5640 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5641 = torch.constant.float 9.9999997473787516E-6
    %int1_5642 = torch.constant.int 1
    %8021 = torch.aten.add.Scalar %8008, %float9.999990e-06_5641, %int1_5642 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5643 = torch.constant.float 9.9999997473787516E-6
    %int1_5644 = torch.constant.int 1
    %8022 = torch.aten.add.Scalar %8010, %float9.999990e-06_5643, %int1_5644 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5645 = torch.constant.float 9.9999997473787516E-6
    %int1_5646 = torch.constant.int 1
    %8023 = torch.aten.add.Scalar %8012, %float9.999990e-06_5645, %int1_5646 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5647 = torch.constant.float 9.9999997473787516E-6
    %int1_5648 = torch.constant.int 1
    %8024 = torch.aten.add.Scalar %8014, %float9.999990e-06_5647, %int1_5648 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5649 = torch.constant.float 9.9999997473787516E-6
    %int1_5650 = torch.constant.int 1
    %8025 = torch.aten.add.Scalar %8016, %float9.999990e-06_5649, %int1_5650 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_5651 = torch.constant.float 9.9999997473787516E-6
    %int1_5652 = torch.constant.int 1
    %8026 = torch.aten.add.Scalar %8018, %float9.999990e-06_5651, %int1_5652 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8027 = torch.aten.rsqrt %8019 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8028 = torch.aten.rsqrt %8020 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8029 = torch.aten.rsqrt %8021 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8030 = torch.aten.rsqrt %8022 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8031 = torch.aten.rsqrt %8023 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8032 = torch.aten.rsqrt %8024 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8033 = torch.aten.rsqrt %8025 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8034 = torch.aten.rsqrt %8026 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %8034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %8035 = torch.aten.mul.Tensor %7987, %8027 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8036 = torch.aten.mul.Tensor %7988, %8028 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8037 = torch.aten.mul.Tensor %7989, %8029 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8038 = torch.aten.mul.Tensor %7990, %8030 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8039 = torch.aten.mul.Tensor %7991, %8031 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8040 = torch.aten.mul.Tensor %7992, %8032 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8041 = torch.aten.mul.Tensor %7993, %8033 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8042 = torch.aten.mul.Tensor %7994, %8034 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8043 = torch.aten.mul.Tensor %224, %8035 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8044 = torch.aten.mul.Tensor %225, %8036 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8045 = torch.aten.mul.Tensor %226, %8037 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8046 = torch.aten.mul.Tensor %227, %8038 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8047 = torch.aten.mul.Tensor %228, %8039 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8048 = torch.aten.mul.Tensor %229, %8040 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8049 = torch.aten.mul.Tensor %230, %8041 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %8050 = torch.aten.mul.Tensor %231, %8042 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %8050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_5653 = torch.constant.int 5
    %8051 = torch.prims.convert_element_type %8043, %int5_5653 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5654 = torch.constant.int 5
    %8052 = torch.prims.convert_element_type %8044, %int5_5654 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5655 = torch.constant.int 5
    %8053 = torch.prims.convert_element_type %8045, %int5_5655 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5656 = torch.constant.int 5
    %8054 = torch.prims.convert_element_type %8046, %int5_5656 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5657 = torch.constant.int 5
    %8055 = torch.prims.convert_element_type %8047, %int5_5657 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5658 = torch.constant.int 5
    %8056 = torch.prims.convert_element_type %8048, %int5_5658 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5659 = torch.constant.int 5
    %8057 = torch.prims.convert_element_type %8049, %int5_5659 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_5660 = torch.constant.int 5
    %8058 = torch.prims.convert_element_type %8050, %int5_5660 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %8058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_5661 = torch.constant.int 1
    %int0_5662 = torch.constant.int 0
    %8059 = torch.prim.ListConstruct %int1_5661, %int0_5662 : (!torch.int, !torch.int) -> !torch.list<int>
    %8060 = torch.aten.permute %232, %8059 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5663 = torch.constant.int 1
    %int0_5664 = torch.constant.int 0
    %8061 = torch.prim.ListConstruct %int1_5663, %int0_5664 : (!torch.int, !torch.int) -> !torch.list<int>
    %8062 = torch.aten.permute %233, %8061 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5665 = torch.constant.int 1
    %int0_5666 = torch.constant.int 0
    %8063 = torch.prim.ListConstruct %int1_5665, %int0_5666 : (!torch.int, !torch.int) -> !torch.list<int>
    %8064 = torch.aten.permute %234, %8063 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5667 = torch.constant.int 1
    %int0_5668 = torch.constant.int 0
    %8065 = torch.prim.ListConstruct %int1_5667, %int0_5668 : (!torch.int, !torch.int) -> !torch.list<int>
    %8066 = torch.aten.permute %235, %8065 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5669 = torch.constant.int 1
    %int0_5670 = torch.constant.int 0
    %8067 = torch.prim.ListConstruct %int1_5669, %int0_5670 : (!torch.int, !torch.int) -> !torch.list<int>
    %8068 = torch.aten.permute %236, %8067 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5671 = torch.constant.int 1
    %int0_5672 = torch.constant.int 0
    %8069 = torch.prim.ListConstruct %int1_5671, %int0_5672 : (!torch.int, !torch.int) -> !torch.list<int>
    %8070 = torch.aten.permute %237, %8069 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5673 = torch.constant.int 1
    %int0_5674 = torch.constant.int 0
    %8071 = torch.prim.ListConstruct %int1_5673, %int0_5674 : (!torch.int, !torch.int) -> !torch.list<int>
    %8072 = torch.aten.permute %238, %8071 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_5675 = torch.constant.int 1
    %int0_5676 = torch.constant.int 0
    %8073 = torch.prim.ListConstruct %int1_5675, %int0_5676 : (!torch.int, !torch.int) -> !torch.list<int>
    %8074 = torch.aten.permute %239, %8073 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_5677 = torch.constant.int 4
    %8075 = torch.aten.mul.int %int4_5677, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5678 = torch.constant.int 4096
    %8076 = torch.prim.ListConstruct %8075, %int4096_5678 : (!torch.int, !torch.int) -> !torch.list<int>
    %8077 = torch.aten.view %8051, %8076 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8077, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8078 = torch.aten.mm %8077, %8060 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8078, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5679 = torch.constant.int 4
    %int512_5680 = torch.constant.int 512
    %8079 = torch.prim.ListConstruct %int4_5679, %2482, %int512_5680 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8080 = torch.aten.view %8078, %8079 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5681 = torch.constant.int 4
    %8081 = torch.aten.mul.int %int4_5681, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5682 = torch.constant.int 4096
    %8082 = torch.prim.ListConstruct %8081, %int4096_5682 : (!torch.int, !torch.int) -> !torch.list<int>
    %8083 = torch.aten.view %8052, %8082 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8083, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8084 = torch.aten.mm %8083, %8062 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8084, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5683 = torch.constant.int 4
    %int512_5684 = torch.constant.int 512
    %8085 = torch.prim.ListConstruct %int4_5683, %2482, %int512_5684 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8086 = torch.aten.view %8084, %8085 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5685 = torch.constant.int 4
    %8087 = torch.aten.mul.int %int4_5685, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5686 = torch.constant.int 4096
    %8088 = torch.prim.ListConstruct %8087, %int4096_5686 : (!torch.int, !torch.int) -> !torch.list<int>
    %8089 = torch.aten.view %8053, %8088 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8089, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8090 = torch.aten.mm %8089, %8064 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8090, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5687 = torch.constant.int 4
    %int512_5688 = torch.constant.int 512
    %8091 = torch.prim.ListConstruct %int4_5687, %2482, %int512_5688 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8092 = torch.aten.view %8090, %8091 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5689 = torch.constant.int 4
    %8093 = torch.aten.mul.int %int4_5689, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5690 = torch.constant.int 4096
    %8094 = torch.prim.ListConstruct %8093, %int4096_5690 : (!torch.int, !torch.int) -> !torch.list<int>
    %8095 = torch.aten.view %8054, %8094 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8095, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8096 = torch.aten.mm %8095, %8066 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8096, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5691 = torch.constant.int 4
    %int512_5692 = torch.constant.int 512
    %8097 = torch.prim.ListConstruct %int4_5691, %2482, %int512_5692 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8098 = torch.aten.view %8096, %8097 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5693 = torch.constant.int 4
    %8099 = torch.aten.mul.int %int4_5693, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5694 = torch.constant.int 4096
    %8100 = torch.prim.ListConstruct %8099, %int4096_5694 : (!torch.int, !torch.int) -> !torch.list<int>
    %8101 = torch.aten.view %8055, %8100 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8101, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8102 = torch.aten.mm %8101, %8068 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8102, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5695 = torch.constant.int 4
    %int512_5696 = torch.constant.int 512
    %8103 = torch.prim.ListConstruct %int4_5695, %2482, %int512_5696 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8104 = torch.aten.view %8102, %8103 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5697 = torch.constant.int 4
    %8105 = torch.aten.mul.int %int4_5697, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5698 = torch.constant.int 4096
    %8106 = torch.prim.ListConstruct %8105, %int4096_5698 : (!torch.int, !torch.int) -> !torch.list<int>
    %8107 = torch.aten.view %8056, %8106 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8107, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8108 = torch.aten.mm %8107, %8070 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8108, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5699 = torch.constant.int 4
    %int512_5700 = torch.constant.int 512
    %8109 = torch.prim.ListConstruct %int4_5699, %2482, %int512_5700 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8110 = torch.aten.view %8108, %8109 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5701 = torch.constant.int 4
    %8111 = torch.aten.mul.int %int4_5701, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5702 = torch.constant.int 4096
    %8112 = torch.prim.ListConstruct %8111, %int4096_5702 : (!torch.int, !torch.int) -> !torch.list<int>
    %8113 = torch.aten.view %8057, %8112 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8113, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8114 = torch.aten.mm %8113, %8072 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8114, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5703 = torch.constant.int 4
    %int512_5704 = torch.constant.int 512
    %8115 = torch.prim.ListConstruct %int4_5703, %2482, %int512_5704 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8116 = torch.aten.view %8114, %8115 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_5705 = torch.constant.int 4
    %8117 = torch.aten.mul.int %int4_5705, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5706 = torch.constant.int 4096
    %8118 = torch.prim.ListConstruct %8117, %int4096_5706 : (!torch.int, !torch.int) -> !torch.list<int>
    %8119 = torch.aten.view %8058, %8118 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8119, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8120 = torch.aten.mm %8119, %8074 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %8120, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_5707 = torch.constant.int 4
    %int512_5708 = torch.constant.int 512
    %8121 = torch.prim.ListConstruct %int4_5707, %2482, %int512_5708 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8122 = torch.aten.view %8120, %8121 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %8122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_5709 = torch.constant.int 1
    %int0_5710 = torch.constant.int 0
    %8123 = torch.prim.ListConstruct %int1_5709, %int0_5710 : (!torch.int, !torch.int) -> !torch.list<int>
    %8124 = torch.aten.permute %240, %8123 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5711 = torch.constant.int 1
    %int0_5712 = torch.constant.int 0
    %8125 = torch.prim.ListConstruct %int1_5711, %int0_5712 : (!torch.int, !torch.int) -> !torch.list<int>
    %8126 = torch.aten.permute %241, %8125 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5713 = torch.constant.int 1
    %int0_5714 = torch.constant.int 0
    %8127 = torch.prim.ListConstruct %int1_5713, %int0_5714 : (!torch.int, !torch.int) -> !torch.list<int>
    %8128 = torch.aten.permute %242, %8127 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5715 = torch.constant.int 1
    %int0_5716 = torch.constant.int 0
    %8129 = torch.prim.ListConstruct %int1_5715, %int0_5716 : (!torch.int, !torch.int) -> !torch.list<int>
    %8130 = torch.aten.permute %243, %8129 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5717 = torch.constant.int 1
    %int0_5718 = torch.constant.int 0
    %8131 = torch.prim.ListConstruct %int1_5717, %int0_5718 : (!torch.int, !torch.int) -> !torch.list<int>
    %8132 = torch.aten.permute %244, %8131 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5719 = torch.constant.int 1
    %int0_5720 = torch.constant.int 0
    %8133 = torch.prim.ListConstruct %int1_5719, %int0_5720 : (!torch.int, !torch.int) -> !torch.list<int>
    %8134 = torch.aten.permute %245, %8133 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5721 = torch.constant.int 1
    %int0_5722 = torch.constant.int 0
    %8135 = torch.prim.ListConstruct %int1_5721, %int0_5722 : (!torch.int, !torch.int) -> !torch.list<int>
    %8136 = torch.aten.permute %246, %8135 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5723 = torch.constant.int 1
    %int0_5724 = torch.constant.int 0
    %8137 = torch.prim.ListConstruct %int1_5723, %int0_5724 : (!torch.int, !torch.int) -> !torch.list<int>
    %8138 = torch.aten.permute %247, %8137 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_5725 = torch.constant.int 4
    %8139 = torch.aten.mul.int %int4_5725, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5726 = torch.constant.int 4096
    %8140 = torch.prim.ListConstruct %8139, %int4096_5726 : (!torch.int, !torch.int) -> !torch.list<int>
    %8141 = torch.aten.view %8051, %8140 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8141, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8142 = torch.aten.mm %8141, %8124 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8142, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5727 = torch.constant.int 4
    %int128_5728 = torch.constant.int 128
    %8143 = torch.prim.ListConstruct %int4_5727, %2482, %int128_5728 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8144 = torch.aten.view %8142, %8143 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5729 = torch.constant.int 4
    %8145 = torch.aten.mul.int %int4_5729, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5730 = torch.constant.int 4096
    %8146 = torch.prim.ListConstruct %8145, %int4096_5730 : (!torch.int, !torch.int) -> !torch.list<int>
    %8147 = torch.aten.view %8052, %8146 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8147, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8148 = torch.aten.mm %8147, %8126 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8148, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5731 = torch.constant.int 4
    %int128_5732 = torch.constant.int 128
    %8149 = torch.prim.ListConstruct %int4_5731, %2482, %int128_5732 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8150 = torch.aten.view %8148, %8149 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5733 = torch.constant.int 4
    %8151 = torch.aten.mul.int %int4_5733, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5734 = torch.constant.int 4096
    %8152 = torch.prim.ListConstruct %8151, %int4096_5734 : (!torch.int, !torch.int) -> !torch.list<int>
    %8153 = torch.aten.view %8053, %8152 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8153, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8154 = torch.aten.mm %8153, %8128 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8154, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5735 = torch.constant.int 4
    %int128_5736 = torch.constant.int 128
    %8155 = torch.prim.ListConstruct %int4_5735, %2482, %int128_5736 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8156 = torch.aten.view %8154, %8155 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5737 = torch.constant.int 4
    %8157 = torch.aten.mul.int %int4_5737, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5738 = torch.constant.int 4096
    %8158 = torch.prim.ListConstruct %8157, %int4096_5738 : (!torch.int, !torch.int) -> !torch.list<int>
    %8159 = torch.aten.view %8054, %8158 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8159, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8160 = torch.aten.mm %8159, %8130 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8160, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5739 = torch.constant.int 4
    %int128_5740 = torch.constant.int 128
    %8161 = torch.prim.ListConstruct %int4_5739, %2482, %int128_5740 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8162 = torch.aten.view %8160, %8161 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5741 = torch.constant.int 4
    %8163 = torch.aten.mul.int %int4_5741, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5742 = torch.constant.int 4096
    %8164 = torch.prim.ListConstruct %8163, %int4096_5742 : (!torch.int, !torch.int) -> !torch.list<int>
    %8165 = torch.aten.view %8055, %8164 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8165, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8166 = torch.aten.mm %8165, %8132 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8166, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5743 = torch.constant.int 4
    %int128_5744 = torch.constant.int 128
    %8167 = torch.prim.ListConstruct %int4_5743, %2482, %int128_5744 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8168 = torch.aten.view %8166, %8167 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5745 = torch.constant.int 4
    %8169 = torch.aten.mul.int %int4_5745, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5746 = torch.constant.int 4096
    %8170 = torch.prim.ListConstruct %8169, %int4096_5746 : (!torch.int, !torch.int) -> !torch.list<int>
    %8171 = torch.aten.view %8056, %8170 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8171, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8172 = torch.aten.mm %8171, %8134 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8172, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5747 = torch.constant.int 4
    %int128_5748 = torch.constant.int 128
    %8173 = torch.prim.ListConstruct %int4_5747, %2482, %int128_5748 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8174 = torch.aten.view %8172, %8173 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5749 = torch.constant.int 4
    %8175 = torch.aten.mul.int %int4_5749, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5750 = torch.constant.int 4096
    %8176 = torch.prim.ListConstruct %8175, %int4096_5750 : (!torch.int, !torch.int) -> !torch.list<int>
    %8177 = torch.aten.view %8057, %8176 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8177, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8178 = torch.aten.mm %8177, %8136 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8178, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5751 = torch.constant.int 4
    %int128_5752 = torch.constant.int 128
    %8179 = torch.prim.ListConstruct %int4_5751, %2482, %int128_5752 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8180 = torch.aten.view %8178, %8179 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5753 = torch.constant.int 4
    %8181 = torch.aten.mul.int %int4_5753, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5754 = torch.constant.int 4096
    %8182 = torch.prim.ListConstruct %8181, %int4096_5754 : (!torch.int, !torch.int) -> !torch.list<int>
    %8183 = torch.aten.view %8058, %8182 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8183, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8184 = torch.aten.mm %8183, %8138 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8184, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5755 = torch.constant.int 4
    %int128_5756 = torch.constant.int 128
    %8185 = torch.prim.ListConstruct %int4_5755, %2482, %int128_5756 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8186 = torch.aten.view %8184, %8185 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_5757 = torch.constant.int 1
    %int0_5758 = torch.constant.int 0
    %8187 = torch.prim.ListConstruct %int1_5757, %int0_5758 : (!torch.int, !torch.int) -> !torch.list<int>
    %8188 = torch.aten.permute %248, %8187 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5759 = torch.constant.int 1
    %int0_5760 = torch.constant.int 0
    %8189 = torch.prim.ListConstruct %int1_5759, %int0_5760 : (!torch.int, !torch.int) -> !torch.list<int>
    %8190 = torch.aten.permute %249, %8189 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5761 = torch.constant.int 1
    %int0_5762 = torch.constant.int 0
    %8191 = torch.prim.ListConstruct %int1_5761, %int0_5762 : (!torch.int, !torch.int) -> !torch.list<int>
    %8192 = torch.aten.permute %250, %8191 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5763 = torch.constant.int 1
    %int0_5764 = torch.constant.int 0
    %8193 = torch.prim.ListConstruct %int1_5763, %int0_5764 : (!torch.int, !torch.int) -> !torch.list<int>
    %8194 = torch.aten.permute %251, %8193 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5765 = torch.constant.int 1
    %int0_5766 = torch.constant.int 0
    %8195 = torch.prim.ListConstruct %int1_5765, %int0_5766 : (!torch.int, !torch.int) -> !torch.list<int>
    %8196 = torch.aten.permute %252, %8195 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5767 = torch.constant.int 1
    %int0_5768 = torch.constant.int 0
    %8197 = torch.prim.ListConstruct %int1_5767, %int0_5768 : (!torch.int, !torch.int) -> !torch.list<int>
    %8198 = torch.aten.permute %253, %8197 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5769 = torch.constant.int 1
    %int0_5770 = torch.constant.int 0
    %8199 = torch.prim.ListConstruct %int1_5769, %int0_5770 : (!torch.int, !torch.int) -> !torch.list<int>
    %8200 = torch.aten.permute %254, %8199 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_5771 = torch.constant.int 1
    %int0_5772 = torch.constant.int 0
    %8201 = torch.prim.ListConstruct %int1_5771, %int0_5772 : (!torch.int, !torch.int) -> !torch.list<int>
    %8202 = torch.aten.permute %255, %8201 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_5773 = torch.constant.int 4
    %8203 = torch.aten.mul.int %int4_5773, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5774 = torch.constant.int 4096
    %8204 = torch.prim.ListConstruct %8203, %int4096_5774 : (!torch.int, !torch.int) -> !torch.list<int>
    %8205 = torch.aten.view %8051, %8204 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8205, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8206 = torch.aten.mm %8205, %8188 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8206, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5775 = torch.constant.int 4
    %int128_5776 = torch.constant.int 128
    %8207 = torch.prim.ListConstruct %int4_5775, %2482, %int128_5776 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8208 = torch.aten.view %8206, %8207 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5777 = torch.constant.int 4
    %8209 = torch.aten.mul.int %int4_5777, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5778 = torch.constant.int 4096
    %8210 = torch.prim.ListConstruct %8209, %int4096_5778 : (!torch.int, !torch.int) -> !torch.list<int>
    %8211 = torch.aten.view %8052, %8210 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8211, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8212 = torch.aten.mm %8211, %8190 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8212, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5779 = torch.constant.int 4
    %int128_5780 = torch.constant.int 128
    %8213 = torch.prim.ListConstruct %int4_5779, %2482, %int128_5780 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8214 = torch.aten.view %8212, %8213 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5781 = torch.constant.int 4
    %8215 = torch.aten.mul.int %int4_5781, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5782 = torch.constant.int 4096
    %8216 = torch.prim.ListConstruct %8215, %int4096_5782 : (!torch.int, !torch.int) -> !torch.list<int>
    %8217 = torch.aten.view %8053, %8216 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8217, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8218 = torch.aten.mm %8217, %8192 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8218, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5783 = torch.constant.int 4
    %int128_5784 = torch.constant.int 128
    %8219 = torch.prim.ListConstruct %int4_5783, %2482, %int128_5784 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8220 = torch.aten.view %8218, %8219 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5785 = torch.constant.int 4
    %8221 = torch.aten.mul.int %int4_5785, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5786 = torch.constant.int 4096
    %8222 = torch.prim.ListConstruct %8221, %int4096_5786 : (!torch.int, !torch.int) -> !torch.list<int>
    %8223 = torch.aten.view %8054, %8222 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8223, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8224 = torch.aten.mm %8223, %8194 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8224, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5787 = torch.constant.int 4
    %int128_5788 = torch.constant.int 128
    %8225 = torch.prim.ListConstruct %int4_5787, %2482, %int128_5788 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8226 = torch.aten.view %8224, %8225 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5789 = torch.constant.int 4
    %8227 = torch.aten.mul.int %int4_5789, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5790 = torch.constant.int 4096
    %8228 = torch.prim.ListConstruct %8227, %int4096_5790 : (!torch.int, !torch.int) -> !torch.list<int>
    %8229 = torch.aten.view %8055, %8228 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8229, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8230 = torch.aten.mm %8229, %8196 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8230, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5791 = torch.constant.int 4
    %int128_5792 = torch.constant.int 128
    %8231 = torch.prim.ListConstruct %int4_5791, %2482, %int128_5792 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8232 = torch.aten.view %8230, %8231 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5793 = torch.constant.int 4
    %8233 = torch.aten.mul.int %int4_5793, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5794 = torch.constant.int 4096
    %8234 = torch.prim.ListConstruct %8233, %int4096_5794 : (!torch.int, !torch.int) -> !torch.list<int>
    %8235 = torch.aten.view %8056, %8234 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8235, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8236 = torch.aten.mm %8235, %8198 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8236, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5795 = torch.constant.int 4
    %int128_5796 = torch.constant.int 128
    %8237 = torch.prim.ListConstruct %int4_5795, %2482, %int128_5796 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8238 = torch.aten.view %8236, %8237 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5797 = torch.constant.int 4
    %8239 = torch.aten.mul.int %int4_5797, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5798 = torch.constant.int 4096
    %8240 = torch.prim.ListConstruct %8239, %int4096_5798 : (!torch.int, !torch.int) -> !torch.list<int>
    %8241 = torch.aten.view %8057, %8240 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8241, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8242 = torch.aten.mm %8241, %8200 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8242, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5799 = torch.constant.int 4
    %int128_5800 = torch.constant.int 128
    %8243 = torch.prim.ListConstruct %int4_5799, %2482, %int128_5800 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8244 = torch.aten.view %8242, %8243 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5801 = torch.constant.int 4
    %8245 = torch.aten.mul.int %int4_5801, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_5802 = torch.constant.int 4096
    %8246 = torch.prim.ListConstruct %8245, %int4096_5802 : (!torch.int, !torch.int) -> !torch.list<int>
    %8247 = torch.aten.view %8058, %8246 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %8247, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %8248 = torch.aten.mm %8247, %8202 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %8248, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_5803 = torch.constant.int 4
    %int128_5804 = torch.constant.int 128
    %8249 = torch.prim.ListConstruct %int4_5803, %2482, %int128_5804 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8250 = torch.aten.view %8248, %8249 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %8250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_5805 = torch.constant.int 4
    %int4_5806 = torch.constant.int 4
    %int128_5807 = torch.constant.int 128
    %8251 = torch.prim.ListConstruct %int4_5805, %2482, %int4_5806, %int128_5807 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8252 = torch.aten.view %8080, %8251 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5808 = torch.constant.int 4
    %int4_5809 = torch.constant.int 4
    %int128_5810 = torch.constant.int 128
    %8253 = torch.prim.ListConstruct %int4_5808, %2482, %int4_5809, %int128_5810 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8254 = torch.aten.view %8086, %8253 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5811 = torch.constant.int 4
    %int4_5812 = torch.constant.int 4
    %int128_5813 = torch.constant.int 128
    %8255 = torch.prim.ListConstruct %int4_5811, %2482, %int4_5812, %int128_5813 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8256 = torch.aten.view %8092, %8255 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5814 = torch.constant.int 4
    %int4_5815 = torch.constant.int 4
    %int128_5816 = torch.constant.int 128
    %8257 = torch.prim.ListConstruct %int4_5814, %2482, %int4_5815, %int128_5816 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8258 = torch.aten.view %8098, %8257 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5817 = torch.constant.int 4
    %int4_5818 = torch.constant.int 4
    %int128_5819 = torch.constant.int 128
    %8259 = torch.prim.ListConstruct %int4_5817, %2482, %int4_5818, %int128_5819 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8260 = torch.aten.view %8104, %8259 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5820 = torch.constant.int 4
    %int4_5821 = torch.constant.int 4
    %int128_5822 = torch.constant.int 128
    %8261 = torch.prim.ListConstruct %int4_5820, %2482, %int4_5821, %int128_5822 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8262 = torch.aten.view %8110, %8261 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5823 = torch.constant.int 4
    %int4_5824 = torch.constant.int 4
    %int128_5825 = torch.constant.int 128
    %8263 = torch.prim.ListConstruct %int4_5823, %2482, %int4_5824, %int128_5825 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8264 = torch.aten.view %8116, %8263 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5826 = torch.constant.int 4
    %int4_5827 = torch.constant.int 4
    %int128_5828 = torch.constant.int 128
    %8265 = torch.prim.ListConstruct %int4_5826, %2482, %int4_5827, %int128_5828 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8266 = torch.aten.view %8122, %8265 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_5829 = torch.constant.int 4
    %int1_5830 = torch.constant.int 1
    %int128_5831 = torch.constant.int 128
    %8267 = torch.prim.ListConstruct %int4_5829, %2482, %int1_5830, %int128_5831 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8268 = torch.aten.view %8144, %8267 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5832 = torch.constant.int 4
    %int1_5833 = torch.constant.int 1
    %int128_5834 = torch.constant.int 128
    %8269 = torch.prim.ListConstruct %int4_5832, %2482, %int1_5833, %int128_5834 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8270 = torch.aten.view %8150, %8269 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5835 = torch.constant.int 4
    %int1_5836 = torch.constant.int 1
    %int128_5837 = torch.constant.int 128
    %8271 = torch.prim.ListConstruct %int4_5835, %2482, %int1_5836, %int128_5837 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8272 = torch.aten.view %8156, %8271 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5838 = torch.constant.int 4
    %int1_5839 = torch.constant.int 1
    %int128_5840 = torch.constant.int 128
    %8273 = torch.prim.ListConstruct %int4_5838, %2482, %int1_5839, %int128_5840 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8274 = torch.aten.view %8162, %8273 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5841 = torch.constant.int 4
    %int1_5842 = torch.constant.int 1
    %int128_5843 = torch.constant.int 128
    %8275 = torch.prim.ListConstruct %int4_5841, %2482, %int1_5842, %int128_5843 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8276 = torch.aten.view %8168, %8275 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5844 = torch.constant.int 4
    %int1_5845 = torch.constant.int 1
    %int128_5846 = torch.constant.int 128
    %8277 = torch.prim.ListConstruct %int4_5844, %2482, %int1_5845, %int128_5846 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8278 = torch.aten.view %8174, %8277 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5847 = torch.constant.int 4
    %int1_5848 = torch.constant.int 1
    %int128_5849 = torch.constant.int 128
    %8279 = torch.prim.ListConstruct %int4_5847, %2482, %int1_5848, %int128_5849 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8280 = torch.aten.view %8180, %8279 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5850 = torch.constant.int 4
    %int1_5851 = torch.constant.int 1
    %int128_5852 = torch.constant.int 128
    %8281 = torch.prim.ListConstruct %int4_5850, %2482, %int1_5851, %int128_5852 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8282 = torch.aten.view %8186, %8281 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5853 = torch.constant.int 4
    %int1_5854 = torch.constant.int 1
    %int128_5855 = torch.constant.int 128
    %8283 = torch.prim.ListConstruct %int4_5853, %2482, %int1_5854, %int128_5855 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8284 = torch.aten.view %8208, %8283 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5856 = torch.constant.int 4
    %int1_5857 = torch.constant.int 1
    %int128_5858 = torch.constant.int 128
    %8285 = torch.prim.ListConstruct %int4_5856, %2482, %int1_5857, %int128_5858 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8286 = torch.aten.view %8214, %8285 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5859 = torch.constant.int 4
    %int1_5860 = torch.constant.int 1
    %int128_5861 = torch.constant.int 128
    %8287 = torch.prim.ListConstruct %int4_5859, %2482, %int1_5860, %int128_5861 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8288 = torch.aten.view %8220, %8287 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5862 = torch.constant.int 4
    %int1_5863 = torch.constant.int 1
    %int128_5864 = torch.constant.int 128
    %8289 = torch.prim.ListConstruct %int4_5862, %2482, %int1_5863, %int128_5864 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8290 = torch.aten.view %8226, %8289 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5865 = torch.constant.int 4
    %int1_5866 = torch.constant.int 1
    %int128_5867 = torch.constant.int 128
    %8291 = torch.prim.ListConstruct %int4_5865, %2482, %int1_5866, %int128_5867 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8292 = torch.aten.view %8232, %8291 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5868 = torch.constant.int 4
    %int1_5869 = torch.constant.int 1
    %int128_5870 = torch.constant.int 128
    %8293 = torch.prim.ListConstruct %int4_5868, %2482, %int1_5869, %int128_5870 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8294 = torch.aten.view %8238, %8293 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5871 = torch.constant.int 4
    %int1_5872 = torch.constant.int 1
    %int128_5873 = torch.constant.int 128
    %8295 = torch.prim.ListConstruct %int4_5871, %2482, %int1_5872, %int128_5873 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8296 = torch.aten.view %8244, %8295 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_5874 = torch.constant.int 4
    %int1_5875 = torch.constant.int 1
    %int128_5876 = torch.constant.int 128
    %8297 = torch.prim.ListConstruct %int4_5874, %2482, %int1_5875, %int128_5876 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8298 = torch.aten.view %8250, %8297 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_5877 = torch.constant.int 131072
    %none_5878 = torch.constant.none
    %none_5879 = torch.constant.none
    %cpu_5880 = torch.constant.device "cpu"
    %false_5881 = torch.constant.bool false
    %8299 = torch.aten.arange %int131072_5877, %none_5878, %none_5879, %cpu_5880, %false_5881 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_5882 = torch.constant.int 0
    %int128_5883 = torch.constant.int 128
    %int2_5884 = torch.constant.int 2
    %none_5885 = torch.constant.none
    %none_5886 = torch.constant.none
    %cpu_5887 = torch.constant.device "cpu"
    %false_5888 = torch.constant.bool false
    %8300 = torch.aten.arange.start_step %int0_5882, %int128_5883, %int2_5884, %none_5885, %none_5886, %cpu_5887, %false_5888 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_5889 = torch.constant.int 0
    %int0_5890 = torch.constant.int 0
    %int64_5891 = torch.constant.int 64
    %int1_5892 = torch.constant.int 1
    %8301 = torch.aten.slice.Tensor %8300, %int0_5889, %int0_5890, %int64_5891, %int1_5892 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_5893 = torch.constant.int 6
    %8302 = torch.prims.convert_element_type %8301, %int6_5893 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_5894 = torch.constant.int 128
    %8303 = torch.aten.div.Scalar %8302, %int128_5894 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_5895 = torch.constant.float 5.000000e+05
    %8304 = torch.aten.pow.Scalar %float5.000000e05_5895, %8303 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %8305 = torch.aten.reciprocal %8304 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_5896 = torch.constant.float 1.000000e+00
    %8306 = torch.aten.mul.Scalar %8305, %float1.000000e00_5896 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_5897 = torch.constant.int 131072
    %int1_5898 = torch.constant.int 1
    %8307 = torch.prim.ListConstruct %int131072_5897, %int1_5898 : (!torch.int, !torch.int) -> !torch.list<int>
    %8308 = torch.aten.view %8299, %8307 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %8309 = torch.aten.mul.Tensor %8308, %8306 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %8310 = torch.aten.cos %8309 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %8311 = torch.aten.sin %8309 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %8312 = torch.aten.complex %8310, %8311 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %8313 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8314 = flow.tensor.transfer %8313 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %8315 = torch_c.from_builtin_tensor %8314 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8316 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8317 = flow.tensor.transfer %8316 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %8318 = torch_c.from_builtin_tensor %8317 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8319 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8320 = flow.tensor.transfer %8319 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %8321 = torch_c.from_builtin_tensor %8320 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8322 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8323 = flow.tensor.transfer %8322 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %8324 = torch_c.from_builtin_tensor %8323 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8325 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8326 = flow.tensor.transfer %8325 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %8327 = torch_c.from_builtin_tensor %8326 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8328 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8329 = flow.tensor.transfer %8328 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %8330 = torch_c.from_builtin_tensor %8329 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8331 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8332 = flow.tensor.transfer %8331 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %8333 = torch_c.from_builtin_tensor %8332 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8334 = torch_c.to_builtin_tensor %8312 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8335 = flow.tensor.transfer %8334 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %8336 = torch_c.from_builtin_tensor %8335 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_5899 = torch.constant.int 1
    %8337 = torch.aten.size.int %8080, %int1_5899 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_5900 = torch.constant.int 0
    %8338 = torch.aten.add.int %int0_5900, %8337 : !torch.int, !torch.int -> !torch.int
    %int0_5901 = torch.constant.int 0
    %int0_5902 = torch.constant.int 0
    %int1_5903 = torch.constant.int 1
    %8339 = torch.aten.slice.Tensor %8315, %int0_5901, %int0_5902, %8338, %int1_5903 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8339, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5904 = torch.constant.int 1
    %int0_5905 = torch.constant.int 0
    %int9223372036854775807_5906 = torch.constant.int 9223372036854775807
    %int1_5907 = torch.constant.int 1
    %8340 = torch.aten.slice.Tensor %8339, %int1_5904, %int0_5905, %int9223372036854775807_5906, %int1_5907 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8340, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5908 = torch.constant.int 0
    %8341 = torch.aten.unsqueeze %8340, %int0_5908 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8341, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5909 = torch.constant.int 2
    %8342 = torch.aten.unsqueeze %8341, %int2_5909 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8342, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5910 = torch.constant.int 3
    %int0_5911 = torch.constant.int 0
    %int9223372036854775807_5912 = torch.constant.int 9223372036854775807
    %int1_5913 = torch.constant.int 1
    %8343 = torch.aten.slice.Tensor %8342, %int3_5910, %int0_5911, %int9223372036854775807_5912, %int1_5913 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8343, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8344 = torch_c.to_builtin_tensor %8252 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_5914 = arith.constant 1 : index
    %dim_5915 = tensor.dim %8344, %c1_5914 : tensor<4x?x4x128xf16>
    %8345 = flow.tensor.bitcast %8344 : tensor<4x?x4x128xf16>{%dim_5915} -> tensor<4x?x4x64xcomplex<f16>>{%dim_5915}
    %8346 = torch_c.from_builtin_tensor %8345 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8347 = torch.aten.mul.Tensor %8346, %8343 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8348 = torch_c.to_builtin_tensor %8347 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_5916 = arith.constant 1 : index
    %dim_5917 = tensor.dim %8348, %c1_5916 : tensor<4x?x4x64xcomplex<f32>>
    %8349 = flow.tensor.bitcast %8348 : tensor<4x?x4x64xcomplex<f32>>{%dim_5917} -> tensor<4x?x4x128xf32>{%dim_5917}
    %8350 = torch_c.from_builtin_tensor %8349 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_5918 = torch.constant.int 5
    %8351 = torch.prims.convert_element_type %8350, %int5_5918 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_5919 = torch.constant.int 1
    %8352 = torch.aten.size.int %8086, %int1_5919 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_5920 = torch.constant.int 0
    %8353 = torch.aten.add.int %int0_5920, %8352 : !torch.int, !torch.int -> !torch.int
    %int0_5921 = torch.constant.int 0
    %int0_5922 = torch.constant.int 0
    %int1_5923 = torch.constant.int 1
    %8354 = torch.aten.slice.Tensor %8318, %int0_5921, %int0_5922, %8353, %int1_5923 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8354, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5924 = torch.constant.int 1
    %int0_5925 = torch.constant.int 0
    %int9223372036854775807_5926 = torch.constant.int 9223372036854775807
    %int1_5927 = torch.constant.int 1
    %8355 = torch.aten.slice.Tensor %8354, %int1_5924, %int0_5925, %int9223372036854775807_5926, %int1_5927 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8355, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5928 = torch.constant.int 0
    %8356 = torch.aten.unsqueeze %8355, %int0_5928 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8356, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5929 = torch.constant.int 2
    %8357 = torch.aten.unsqueeze %8356, %int2_5929 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8357, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5930 = torch.constant.int 3
    %int0_5931 = torch.constant.int 0
    %int9223372036854775807_5932 = torch.constant.int 9223372036854775807
    %int1_5933 = torch.constant.int 1
    %8358 = torch.aten.slice.Tensor %8357, %int3_5930, %int0_5931, %int9223372036854775807_5932, %int1_5933 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8358, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8359 = torch_c.to_builtin_tensor %8254 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_5934 = arith.constant 1 : index
    %dim_5935 = tensor.dim %8359, %c1_5934 : tensor<4x?x4x128xf16>
    %8360 = flow.tensor.bitcast %8359 : tensor<4x?x4x128xf16>{%dim_5935} -> tensor<4x?x4x64xcomplex<f16>>{%dim_5935}
    %8361 = torch_c.from_builtin_tensor %8360 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8362 = torch.aten.mul.Tensor %8361, %8358 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8363 = torch_c.to_builtin_tensor %8362 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_5936 = arith.constant 1 : index
    %dim_5937 = tensor.dim %8363, %c1_5936 : tensor<4x?x4x64xcomplex<f32>>
    %8364 = flow.tensor.bitcast %8363 : tensor<4x?x4x64xcomplex<f32>>{%dim_5937} -> tensor<4x?x4x128xf32>{%dim_5937}
    %8365 = torch_c.from_builtin_tensor %8364 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_5938 = torch.constant.int 5
    %8366 = torch.prims.convert_element_type %8365, %int5_5938 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_5939 = torch.constant.int 1
    %8367 = torch.aten.size.int %8092, %int1_5939 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_5940 = torch.constant.int 0
    %8368 = torch.aten.add.int %int0_5940, %8367 : !torch.int, !torch.int -> !torch.int
    %int0_5941 = torch.constant.int 0
    %int0_5942 = torch.constant.int 0
    %int1_5943 = torch.constant.int 1
    %8369 = torch.aten.slice.Tensor %8321, %int0_5941, %int0_5942, %8368, %int1_5943 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8369, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5944 = torch.constant.int 1
    %int0_5945 = torch.constant.int 0
    %int9223372036854775807_5946 = torch.constant.int 9223372036854775807
    %int1_5947 = torch.constant.int 1
    %8370 = torch.aten.slice.Tensor %8369, %int1_5944, %int0_5945, %int9223372036854775807_5946, %int1_5947 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8370, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5948 = torch.constant.int 0
    %8371 = torch.aten.unsqueeze %8370, %int0_5948 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8371, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5949 = torch.constant.int 2
    %8372 = torch.aten.unsqueeze %8371, %int2_5949 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8372, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5950 = torch.constant.int 3
    %int0_5951 = torch.constant.int 0
    %int9223372036854775807_5952 = torch.constant.int 9223372036854775807
    %int1_5953 = torch.constant.int 1
    %8373 = torch.aten.slice.Tensor %8372, %int3_5950, %int0_5951, %int9223372036854775807_5952, %int1_5953 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8373, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8374 = torch_c.to_builtin_tensor %8256 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_5954 = arith.constant 1 : index
    %dim_5955 = tensor.dim %8374, %c1_5954 : tensor<4x?x4x128xf16>
    %8375 = flow.tensor.bitcast %8374 : tensor<4x?x4x128xf16>{%dim_5955} -> tensor<4x?x4x64xcomplex<f16>>{%dim_5955}
    %8376 = torch_c.from_builtin_tensor %8375 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8377 = torch.aten.mul.Tensor %8376, %8373 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8378 = torch_c.to_builtin_tensor %8377 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_5956 = arith.constant 1 : index
    %dim_5957 = tensor.dim %8378, %c1_5956 : tensor<4x?x4x64xcomplex<f32>>
    %8379 = flow.tensor.bitcast %8378 : tensor<4x?x4x64xcomplex<f32>>{%dim_5957} -> tensor<4x?x4x128xf32>{%dim_5957}
    %8380 = torch_c.from_builtin_tensor %8379 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_5958 = torch.constant.int 5
    %8381 = torch.prims.convert_element_type %8380, %int5_5958 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_5959 = torch.constant.int 1
    %8382 = torch.aten.size.int %8098, %int1_5959 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_5960 = torch.constant.int 0
    %8383 = torch.aten.add.int %int0_5960, %8382 : !torch.int, !torch.int -> !torch.int
    %int0_5961 = torch.constant.int 0
    %int0_5962 = torch.constant.int 0
    %int1_5963 = torch.constant.int 1
    %8384 = torch.aten.slice.Tensor %8324, %int0_5961, %int0_5962, %8383, %int1_5963 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8384, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5964 = torch.constant.int 1
    %int0_5965 = torch.constant.int 0
    %int9223372036854775807_5966 = torch.constant.int 9223372036854775807
    %int1_5967 = torch.constant.int 1
    %8385 = torch.aten.slice.Tensor %8384, %int1_5964, %int0_5965, %int9223372036854775807_5966, %int1_5967 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8385, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5968 = torch.constant.int 0
    %8386 = torch.aten.unsqueeze %8385, %int0_5968 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8386, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5969 = torch.constant.int 2
    %8387 = torch.aten.unsqueeze %8386, %int2_5969 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8387, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5970 = torch.constant.int 3
    %int0_5971 = torch.constant.int 0
    %int9223372036854775807_5972 = torch.constant.int 9223372036854775807
    %int1_5973 = torch.constant.int 1
    %8388 = torch.aten.slice.Tensor %8387, %int3_5970, %int0_5971, %int9223372036854775807_5972, %int1_5973 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8388, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8389 = torch_c.to_builtin_tensor %8258 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_5974 = arith.constant 1 : index
    %dim_5975 = tensor.dim %8389, %c1_5974 : tensor<4x?x4x128xf16>
    %8390 = flow.tensor.bitcast %8389 : tensor<4x?x4x128xf16>{%dim_5975} -> tensor<4x?x4x64xcomplex<f16>>{%dim_5975}
    %8391 = torch_c.from_builtin_tensor %8390 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8392 = torch.aten.mul.Tensor %8391, %8388 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8393 = torch_c.to_builtin_tensor %8392 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_5976 = arith.constant 1 : index
    %dim_5977 = tensor.dim %8393, %c1_5976 : tensor<4x?x4x64xcomplex<f32>>
    %8394 = flow.tensor.bitcast %8393 : tensor<4x?x4x64xcomplex<f32>>{%dim_5977} -> tensor<4x?x4x128xf32>{%dim_5977}
    %8395 = torch_c.from_builtin_tensor %8394 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_5978 = torch.constant.int 5
    %8396 = torch.prims.convert_element_type %8395, %int5_5978 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_5979 = torch.constant.int 1
    %8397 = torch.aten.size.int %8104, %int1_5979 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_5980 = torch.constant.int 0
    %8398 = torch.aten.add.int %int0_5980, %8397 : !torch.int, !torch.int -> !torch.int
    %int0_5981 = torch.constant.int 0
    %int0_5982 = torch.constant.int 0
    %int1_5983 = torch.constant.int 1
    %8399 = torch.aten.slice.Tensor %8327, %int0_5981, %int0_5982, %8398, %int1_5983 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8399, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_5984 = torch.constant.int 1
    %int0_5985 = torch.constant.int 0
    %int9223372036854775807_5986 = torch.constant.int 9223372036854775807
    %int1_5987 = torch.constant.int 1
    %8400 = torch.aten.slice.Tensor %8399, %int1_5984, %int0_5985, %int9223372036854775807_5986, %int1_5987 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8400, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_5988 = torch.constant.int 0
    %8401 = torch.aten.unsqueeze %8400, %int0_5988 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8401, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_5989 = torch.constant.int 2
    %8402 = torch.aten.unsqueeze %8401, %int2_5989 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8402, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_5990 = torch.constant.int 3
    %int0_5991 = torch.constant.int 0
    %int9223372036854775807_5992 = torch.constant.int 9223372036854775807
    %int1_5993 = torch.constant.int 1
    %8403 = torch.aten.slice.Tensor %8402, %int3_5990, %int0_5991, %int9223372036854775807_5992, %int1_5993 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8403, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8404 = torch_c.to_builtin_tensor %8260 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_5994 = arith.constant 1 : index
    %dim_5995 = tensor.dim %8404, %c1_5994 : tensor<4x?x4x128xf16>
    %8405 = flow.tensor.bitcast %8404 : tensor<4x?x4x128xf16>{%dim_5995} -> tensor<4x?x4x64xcomplex<f16>>{%dim_5995}
    %8406 = torch_c.from_builtin_tensor %8405 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8407 = torch.aten.mul.Tensor %8406, %8403 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8408 = torch_c.to_builtin_tensor %8407 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_5996 = arith.constant 1 : index
    %dim_5997 = tensor.dim %8408, %c1_5996 : tensor<4x?x4x64xcomplex<f32>>
    %8409 = flow.tensor.bitcast %8408 : tensor<4x?x4x64xcomplex<f32>>{%dim_5997} -> tensor<4x?x4x128xf32>{%dim_5997}
    %8410 = torch_c.from_builtin_tensor %8409 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_5998 = torch.constant.int 5
    %8411 = torch.prims.convert_element_type %8410, %int5_5998 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_5999 = torch.constant.int 1
    %8412 = torch.aten.size.int %8110, %int1_5999 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_6000 = torch.constant.int 0
    %8413 = torch.aten.add.int %int0_6000, %8412 : !torch.int, !torch.int -> !torch.int
    %int0_6001 = torch.constant.int 0
    %int0_6002 = torch.constant.int 0
    %int1_6003 = torch.constant.int 1
    %8414 = torch.aten.slice.Tensor %8330, %int0_6001, %int0_6002, %8413, %int1_6003 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8414, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6004 = torch.constant.int 1
    %int0_6005 = torch.constant.int 0
    %int9223372036854775807_6006 = torch.constant.int 9223372036854775807
    %int1_6007 = torch.constant.int 1
    %8415 = torch.aten.slice.Tensor %8414, %int1_6004, %int0_6005, %int9223372036854775807_6006, %int1_6007 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8415, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6008 = torch.constant.int 0
    %8416 = torch.aten.unsqueeze %8415, %int0_6008 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8416, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6009 = torch.constant.int 2
    %8417 = torch.aten.unsqueeze %8416, %int2_6009 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8417, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6010 = torch.constant.int 3
    %int0_6011 = torch.constant.int 0
    %int9223372036854775807_6012 = torch.constant.int 9223372036854775807
    %int1_6013 = torch.constant.int 1
    %8418 = torch.aten.slice.Tensor %8417, %int3_6010, %int0_6011, %int9223372036854775807_6012, %int1_6013 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8418, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8419 = torch_c.to_builtin_tensor %8262 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_6014 = arith.constant 1 : index
    %dim_6015 = tensor.dim %8419, %c1_6014 : tensor<4x?x4x128xf16>
    %8420 = flow.tensor.bitcast %8419 : tensor<4x?x4x128xf16>{%dim_6015} -> tensor<4x?x4x64xcomplex<f16>>{%dim_6015}
    %8421 = torch_c.from_builtin_tensor %8420 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8422 = torch.aten.mul.Tensor %8421, %8418 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8423 = torch_c.to_builtin_tensor %8422 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_6016 = arith.constant 1 : index
    %dim_6017 = tensor.dim %8423, %c1_6016 : tensor<4x?x4x64xcomplex<f32>>
    %8424 = flow.tensor.bitcast %8423 : tensor<4x?x4x64xcomplex<f32>>{%dim_6017} -> tensor<4x?x4x128xf32>{%dim_6017}
    %8425 = torch_c.from_builtin_tensor %8424 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_6018 = torch.constant.int 5
    %8426 = torch.prims.convert_element_type %8425, %int5_6018 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6019 = torch.constant.int 1
    %8427 = torch.aten.size.int %8116, %int1_6019 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_6020 = torch.constant.int 0
    %8428 = torch.aten.add.int %int0_6020, %8427 : !torch.int, !torch.int -> !torch.int
    %int0_6021 = torch.constant.int 0
    %int0_6022 = torch.constant.int 0
    %int1_6023 = torch.constant.int 1
    %8429 = torch.aten.slice.Tensor %8333, %int0_6021, %int0_6022, %8428, %int1_6023 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8429, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6024 = torch.constant.int 1
    %int0_6025 = torch.constant.int 0
    %int9223372036854775807_6026 = torch.constant.int 9223372036854775807
    %int1_6027 = torch.constant.int 1
    %8430 = torch.aten.slice.Tensor %8429, %int1_6024, %int0_6025, %int9223372036854775807_6026, %int1_6027 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8430, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6028 = torch.constant.int 0
    %8431 = torch.aten.unsqueeze %8430, %int0_6028 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8431, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6029 = torch.constant.int 2
    %8432 = torch.aten.unsqueeze %8431, %int2_6029 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8432, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6030 = torch.constant.int 3
    %int0_6031 = torch.constant.int 0
    %int9223372036854775807_6032 = torch.constant.int 9223372036854775807
    %int1_6033 = torch.constant.int 1
    %8433 = torch.aten.slice.Tensor %8432, %int3_6030, %int0_6031, %int9223372036854775807_6032, %int1_6033 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8433, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8434 = torch_c.to_builtin_tensor %8264 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_6034 = arith.constant 1 : index
    %dim_6035 = tensor.dim %8434, %c1_6034 : tensor<4x?x4x128xf16>
    %8435 = flow.tensor.bitcast %8434 : tensor<4x?x4x128xf16>{%dim_6035} -> tensor<4x?x4x64xcomplex<f16>>{%dim_6035}
    %8436 = torch_c.from_builtin_tensor %8435 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8437 = torch.aten.mul.Tensor %8436, %8433 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8438 = torch_c.to_builtin_tensor %8437 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_6036 = arith.constant 1 : index
    %dim_6037 = tensor.dim %8438, %c1_6036 : tensor<4x?x4x64xcomplex<f32>>
    %8439 = flow.tensor.bitcast %8438 : tensor<4x?x4x64xcomplex<f32>>{%dim_6037} -> tensor<4x?x4x128xf32>{%dim_6037}
    %8440 = torch_c.from_builtin_tensor %8439 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_6038 = torch.constant.int 5
    %8441 = torch.prims.convert_element_type %8440, %int5_6038 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6039 = torch.constant.int 1
    %8442 = torch.aten.size.int %8122, %int1_6039 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_6040 = torch.constant.int 0
    %8443 = torch.aten.add.int %int0_6040, %8442 : !torch.int, !torch.int -> !torch.int
    %int0_6041 = torch.constant.int 0
    %int0_6042 = torch.constant.int 0
    %int1_6043 = torch.constant.int 1
    %8444 = torch.aten.slice.Tensor %8336, %int0_6041, %int0_6042, %8443, %int1_6043 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8444, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6044 = torch.constant.int 1
    %int0_6045 = torch.constant.int 0
    %int9223372036854775807_6046 = torch.constant.int 9223372036854775807
    %int1_6047 = torch.constant.int 1
    %8445 = torch.aten.slice.Tensor %8444, %int1_6044, %int0_6045, %int9223372036854775807_6046, %int1_6047 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8445, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6048 = torch.constant.int 0
    %8446 = torch.aten.unsqueeze %8445, %int0_6048 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8446, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6049 = torch.constant.int 2
    %8447 = torch.aten.unsqueeze %8446, %int2_6049 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8447, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6050 = torch.constant.int 3
    %int0_6051 = torch.constant.int 0
    %int9223372036854775807_6052 = torch.constant.int 9223372036854775807
    %int1_6053 = torch.constant.int 1
    %8448 = torch.aten.slice.Tensor %8447, %int3_6050, %int0_6051, %int9223372036854775807_6052, %int1_6053 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8448, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8449 = torch_c.to_builtin_tensor %8266 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_6054 = arith.constant 1 : index
    %dim_6055 = tensor.dim %8449, %c1_6054 : tensor<4x?x4x128xf16>
    %8450 = flow.tensor.bitcast %8449 : tensor<4x?x4x128xf16>{%dim_6055} -> tensor<4x?x4x64xcomplex<f16>>{%dim_6055}
    %8451 = torch_c.from_builtin_tensor %8450 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %8451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %8452 = torch.aten.mul.Tensor %8451, %8448 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %8452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %8453 = torch_c.to_builtin_tensor %8452 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_6056 = arith.constant 1 : index
    %dim_6057 = tensor.dim %8453, %c1_6056 : tensor<4x?x4x64xcomplex<f32>>
    %8454 = flow.tensor.bitcast %8453 : tensor<4x?x4x64xcomplex<f32>>{%dim_6057} -> tensor<4x?x4x128xf32>{%dim_6057}
    %8455 = torch_c.from_builtin_tensor %8454 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %8455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_6058 = torch.constant.int 5
    %8456 = torch.prims.convert_element_type %8455, %int5_6058 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_6059 = torch.constant.int 131072
    %none_6060 = torch.constant.none
    %none_6061 = torch.constant.none
    %cpu_6062 = torch.constant.device "cpu"
    %false_6063 = torch.constant.bool false
    %8457 = torch.aten.arange %int131072_6059, %none_6060, %none_6061, %cpu_6062, %false_6063 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_6064 = torch.constant.int 0
    %int128_6065 = torch.constant.int 128
    %int2_6066 = torch.constant.int 2
    %none_6067 = torch.constant.none
    %none_6068 = torch.constant.none
    %cpu_6069 = torch.constant.device "cpu"
    %false_6070 = torch.constant.bool false
    %8458 = torch.aten.arange.start_step %int0_6064, %int128_6065, %int2_6066, %none_6067, %none_6068, %cpu_6069, %false_6070 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_6071 = torch.constant.int 0
    %int0_6072 = torch.constant.int 0
    %int64_6073 = torch.constant.int 64
    %int1_6074 = torch.constant.int 1
    %8459 = torch.aten.slice.Tensor %8458, %int0_6071, %int0_6072, %int64_6073, %int1_6074 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_6075 = torch.constant.int 6
    %8460 = torch.prims.convert_element_type %8459, %int6_6075 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_6076 = torch.constant.int 128
    %8461 = torch.aten.div.Scalar %8460, %int128_6076 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_6077 = torch.constant.float 5.000000e+05
    %8462 = torch.aten.pow.Scalar %float5.000000e05_6077, %8461 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %8463 = torch.aten.reciprocal %8462 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_6078 = torch.constant.float 1.000000e+00
    %8464 = torch.aten.mul.Scalar %8463, %float1.000000e00_6078 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_6079 = torch.constant.int 131072
    %int1_6080 = torch.constant.int 1
    %8465 = torch.prim.ListConstruct %int131072_6079, %int1_6080 : (!torch.int, !torch.int) -> !torch.list<int>
    %8466 = torch.aten.view %8457, %8465 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %8467 = torch.aten.mul.Tensor %8466, %8464 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %8468 = torch.aten.cos %8467 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %8469 = torch.aten.sin %8467 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %8470 = torch.aten.complex %8468, %8469 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %8471 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8472 = flow.tensor.transfer %8471 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %8473 = torch_c.from_builtin_tensor %8472 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8474 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8475 = flow.tensor.transfer %8474 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %8476 = torch_c.from_builtin_tensor %8475 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8477 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8478 = flow.tensor.transfer %8477 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %8479 = torch_c.from_builtin_tensor %8478 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8480 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8481 = flow.tensor.transfer %8480 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %8482 = torch_c.from_builtin_tensor %8481 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8483 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8484 = flow.tensor.transfer %8483 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %8485 = torch_c.from_builtin_tensor %8484 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8486 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8487 = flow.tensor.transfer %8486 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %8488 = torch_c.from_builtin_tensor %8487 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8489 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8490 = flow.tensor.transfer %8489 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %8491 = torch_c.from_builtin_tensor %8490 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %8492 = torch_c.to_builtin_tensor %8470 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %8493 = flow.tensor.transfer %8492 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %8494 = torch_c.from_builtin_tensor %8493 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_6081 = torch.constant.int 1
    %8495 = torch.aten.size.int %8144, %int1_6081 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6082 = torch.constant.int 0
    %8496 = torch.aten.add.int %int0_6082, %8495 : !torch.int, !torch.int -> !torch.int
    %int0_6083 = torch.constant.int 0
    %int0_6084 = torch.constant.int 0
    %int1_6085 = torch.constant.int 1
    %8497 = torch.aten.slice.Tensor %8473, %int0_6083, %int0_6084, %8496, %int1_6085 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8497, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6086 = torch.constant.int 1
    %int0_6087 = torch.constant.int 0
    %int9223372036854775807_6088 = torch.constant.int 9223372036854775807
    %int1_6089 = torch.constant.int 1
    %8498 = torch.aten.slice.Tensor %8497, %int1_6086, %int0_6087, %int9223372036854775807_6088, %int1_6089 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8498, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6090 = torch.constant.int 0
    %8499 = torch.aten.unsqueeze %8498, %int0_6090 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8499, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6091 = torch.constant.int 2
    %8500 = torch.aten.unsqueeze %8499, %int2_6091 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8500, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6092 = torch.constant.int 3
    %int0_6093 = torch.constant.int 0
    %int9223372036854775807_6094 = torch.constant.int 9223372036854775807
    %int1_6095 = torch.constant.int 1
    %8501 = torch.aten.slice.Tensor %8500, %int3_6092, %int0_6093, %int9223372036854775807_6094, %int1_6095 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8501, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8502 = torch_c.to_builtin_tensor %8268 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6096 = arith.constant 1 : index
    %dim_6097 = tensor.dim %8502, %c1_6096 : tensor<4x?x1x128xf16>
    %8503 = flow.tensor.bitcast %8502 : tensor<4x?x1x128xf16>{%dim_6097} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6097}
    %8504 = torch_c.from_builtin_tensor %8503 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8505 = torch.aten.mul.Tensor %8504, %8501 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8506 = torch_c.to_builtin_tensor %8505 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6098 = arith.constant 1 : index
    %dim_6099 = tensor.dim %8506, %c1_6098 : tensor<4x?x1x64xcomplex<f32>>
    %8507 = flow.tensor.bitcast %8506 : tensor<4x?x1x64xcomplex<f32>>{%dim_6099} -> tensor<4x?x1x128xf32>{%dim_6099}
    %8508 = torch_c.from_builtin_tensor %8507 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6100 = torch.constant.int 5
    %8509 = torch.prims.convert_element_type %8508, %int5_6100 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6101 = torch.constant.int 1
    %8510 = torch.aten.size.int %8150, %int1_6101 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6102 = torch.constant.int 0
    %8511 = torch.aten.add.int %int0_6102, %8510 : !torch.int, !torch.int -> !torch.int
    %int0_6103 = torch.constant.int 0
    %int0_6104 = torch.constant.int 0
    %int1_6105 = torch.constant.int 1
    %8512 = torch.aten.slice.Tensor %8476, %int0_6103, %int0_6104, %8511, %int1_6105 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8512, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6106 = torch.constant.int 1
    %int0_6107 = torch.constant.int 0
    %int9223372036854775807_6108 = torch.constant.int 9223372036854775807
    %int1_6109 = torch.constant.int 1
    %8513 = torch.aten.slice.Tensor %8512, %int1_6106, %int0_6107, %int9223372036854775807_6108, %int1_6109 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8513, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6110 = torch.constant.int 0
    %8514 = torch.aten.unsqueeze %8513, %int0_6110 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8514, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6111 = torch.constant.int 2
    %8515 = torch.aten.unsqueeze %8514, %int2_6111 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8515, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6112 = torch.constant.int 3
    %int0_6113 = torch.constant.int 0
    %int9223372036854775807_6114 = torch.constant.int 9223372036854775807
    %int1_6115 = torch.constant.int 1
    %8516 = torch.aten.slice.Tensor %8515, %int3_6112, %int0_6113, %int9223372036854775807_6114, %int1_6115 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8516, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8517 = torch_c.to_builtin_tensor %8270 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6116 = arith.constant 1 : index
    %dim_6117 = tensor.dim %8517, %c1_6116 : tensor<4x?x1x128xf16>
    %8518 = flow.tensor.bitcast %8517 : tensor<4x?x1x128xf16>{%dim_6117} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6117}
    %8519 = torch_c.from_builtin_tensor %8518 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8520 = torch.aten.mul.Tensor %8519, %8516 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8521 = torch_c.to_builtin_tensor %8520 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6118 = arith.constant 1 : index
    %dim_6119 = tensor.dim %8521, %c1_6118 : tensor<4x?x1x64xcomplex<f32>>
    %8522 = flow.tensor.bitcast %8521 : tensor<4x?x1x64xcomplex<f32>>{%dim_6119} -> tensor<4x?x1x128xf32>{%dim_6119}
    %8523 = torch_c.from_builtin_tensor %8522 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6120 = torch.constant.int 5
    %8524 = torch.prims.convert_element_type %8523, %int5_6120 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6121 = torch.constant.int 1
    %8525 = torch.aten.size.int %8156, %int1_6121 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6122 = torch.constant.int 0
    %8526 = torch.aten.add.int %int0_6122, %8525 : !torch.int, !torch.int -> !torch.int
    %int0_6123 = torch.constant.int 0
    %int0_6124 = torch.constant.int 0
    %int1_6125 = torch.constant.int 1
    %8527 = torch.aten.slice.Tensor %8479, %int0_6123, %int0_6124, %8526, %int1_6125 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8527, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6126 = torch.constant.int 1
    %int0_6127 = torch.constant.int 0
    %int9223372036854775807_6128 = torch.constant.int 9223372036854775807
    %int1_6129 = torch.constant.int 1
    %8528 = torch.aten.slice.Tensor %8527, %int1_6126, %int0_6127, %int9223372036854775807_6128, %int1_6129 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8528, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6130 = torch.constant.int 0
    %8529 = torch.aten.unsqueeze %8528, %int0_6130 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8529, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6131 = torch.constant.int 2
    %8530 = torch.aten.unsqueeze %8529, %int2_6131 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8530, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6132 = torch.constant.int 3
    %int0_6133 = torch.constant.int 0
    %int9223372036854775807_6134 = torch.constant.int 9223372036854775807
    %int1_6135 = torch.constant.int 1
    %8531 = torch.aten.slice.Tensor %8530, %int3_6132, %int0_6133, %int9223372036854775807_6134, %int1_6135 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8531, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8532 = torch_c.to_builtin_tensor %8272 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6136 = arith.constant 1 : index
    %dim_6137 = tensor.dim %8532, %c1_6136 : tensor<4x?x1x128xf16>
    %8533 = flow.tensor.bitcast %8532 : tensor<4x?x1x128xf16>{%dim_6137} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6137}
    %8534 = torch_c.from_builtin_tensor %8533 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8535 = torch.aten.mul.Tensor %8534, %8531 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8536 = torch_c.to_builtin_tensor %8535 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6138 = arith.constant 1 : index
    %dim_6139 = tensor.dim %8536, %c1_6138 : tensor<4x?x1x64xcomplex<f32>>
    %8537 = flow.tensor.bitcast %8536 : tensor<4x?x1x64xcomplex<f32>>{%dim_6139} -> tensor<4x?x1x128xf32>{%dim_6139}
    %8538 = torch_c.from_builtin_tensor %8537 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6140 = torch.constant.int 5
    %8539 = torch.prims.convert_element_type %8538, %int5_6140 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6141 = torch.constant.int 1
    %8540 = torch.aten.size.int %8162, %int1_6141 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6142 = torch.constant.int 0
    %8541 = torch.aten.add.int %int0_6142, %8540 : !torch.int, !torch.int -> !torch.int
    %int0_6143 = torch.constant.int 0
    %int0_6144 = torch.constant.int 0
    %int1_6145 = torch.constant.int 1
    %8542 = torch.aten.slice.Tensor %8482, %int0_6143, %int0_6144, %8541, %int1_6145 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8542, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6146 = torch.constant.int 1
    %int0_6147 = torch.constant.int 0
    %int9223372036854775807_6148 = torch.constant.int 9223372036854775807
    %int1_6149 = torch.constant.int 1
    %8543 = torch.aten.slice.Tensor %8542, %int1_6146, %int0_6147, %int9223372036854775807_6148, %int1_6149 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8543, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6150 = torch.constant.int 0
    %8544 = torch.aten.unsqueeze %8543, %int0_6150 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8544, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6151 = torch.constant.int 2
    %8545 = torch.aten.unsqueeze %8544, %int2_6151 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8545, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6152 = torch.constant.int 3
    %int0_6153 = torch.constant.int 0
    %int9223372036854775807_6154 = torch.constant.int 9223372036854775807
    %int1_6155 = torch.constant.int 1
    %8546 = torch.aten.slice.Tensor %8545, %int3_6152, %int0_6153, %int9223372036854775807_6154, %int1_6155 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8546, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8547 = torch_c.to_builtin_tensor %8274 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6156 = arith.constant 1 : index
    %dim_6157 = tensor.dim %8547, %c1_6156 : tensor<4x?x1x128xf16>
    %8548 = flow.tensor.bitcast %8547 : tensor<4x?x1x128xf16>{%dim_6157} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6157}
    %8549 = torch_c.from_builtin_tensor %8548 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8550 = torch.aten.mul.Tensor %8549, %8546 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8551 = torch_c.to_builtin_tensor %8550 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6158 = arith.constant 1 : index
    %dim_6159 = tensor.dim %8551, %c1_6158 : tensor<4x?x1x64xcomplex<f32>>
    %8552 = flow.tensor.bitcast %8551 : tensor<4x?x1x64xcomplex<f32>>{%dim_6159} -> tensor<4x?x1x128xf32>{%dim_6159}
    %8553 = torch_c.from_builtin_tensor %8552 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6160 = torch.constant.int 5
    %8554 = torch.prims.convert_element_type %8553, %int5_6160 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6161 = torch.constant.int 1
    %8555 = torch.aten.size.int %8168, %int1_6161 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6162 = torch.constant.int 0
    %8556 = torch.aten.add.int %int0_6162, %8555 : !torch.int, !torch.int -> !torch.int
    %int0_6163 = torch.constant.int 0
    %int0_6164 = torch.constant.int 0
    %int1_6165 = torch.constant.int 1
    %8557 = torch.aten.slice.Tensor %8485, %int0_6163, %int0_6164, %8556, %int1_6165 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8557, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6166 = torch.constant.int 1
    %int0_6167 = torch.constant.int 0
    %int9223372036854775807_6168 = torch.constant.int 9223372036854775807
    %int1_6169 = torch.constant.int 1
    %8558 = torch.aten.slice.Tensor %8557, %int1_6166, %int0_6167, %int9223372036854775807_6168, %int1_6169 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8558, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6170 = torch.constant.int 0
    %8559 = torch.aten.unsqueeze %8558, %int0_6170 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8559, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6171 = torch.constant.int 2
    %8560 = torch.aten.unsqueeze %8559, %int2_6171 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8560, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6172 = torch.constant.int 3
    %int0_6173 = torch.constant.int 0
    %int9223372036854775807_6174 = torch.constant.int 9223372036854775807
    %int1_6175 = torch.constant.int 1
    %8561 = torch.aten.slice.Tensor %8560, %int3_6172, %int0_6173, %int9223372036854775807_6174, %int1_6175 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8561, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8562 = torch_c.to_builtin_tensor %8276 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6176 = arith.constant 1 : index
    %dim_6177 = tensor.dim %8562, %c1_6176 : tensor<4x?x1x128xf16>
    %8563 = flow.tensor.bitcast %8562 : tensor<4x?x1x128xf16>{%dim_6177} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6177}
    %8564 = torch_c.from_builtin_tensor %8563 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8565 = torch.aten.mul.Tensor %8564, %8561 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8566 = torch_c.to_builtin_tensor %8565 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6178 = arith.constant 1 : index
    %dim_6179 = tensor.dim %8566, %c1_6178 : tensor<4x?x1x64xcomplex<f32>>
    %8567 = flow.tensor.bitcast %8566 : tensor<4x?x1x64xcomplex<f32>>{%dim_6179} -> tensor<4x?x1x128xf32>{%dim_6179}
    %8568 = torch_c.from_builtin_tensor %8567 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6180 = torch.constant.int 5
    %8569 = torch.prims.convert_element_type %8568, %int5_6180 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6181 = torch.constant.int 1
    %8570 = torch.aten.size.int %8174, %int1_6181 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6182 = torch.constant.int 0
    %8571 = torch.aten.add.int %int0_6182, %8570 : !torch.int, !torch.int -> !torch.int
    %int0_6183 = torch.constant.int 0
    %int0_6184 = torch.constant.int 0
    %int1_6185 = torch.constant.int 1
    %8572 = torch.aten.slice.Tensor %8488, %int0_6183, %int0_6184, %8571, %int1_6185 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8572, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6186 = torch.constant.int 1
    %int0_6187 = torch.constant.int 0
    %int9223372036854775807_6188 = torch.constant.int 9223372036854775807
    %int1_6189 = torch.constant.int 1
    %8573 = torch.aten.slice.Tensor %8572, %int1_6186, %int0_6187, %int9223372036854775807_6188, %int1_6189 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8573, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6190 = torch.constant.int 0
    %8574 = torch.aten.unsqueeze %8573, %int0_6190 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8574, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6191 = torch.constant.int 2
    %8575 = torch.aten.unsqueeze %8574, %int2_6191 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8575, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6192 = torch.constant.int 3
    %int0_6193 = torch.constant.int 0
    %int9223372036854775807_6194 = torch.constant.int 9223372036854775807
    %int1_6195 = torch.constant.int 1
    %8576 = torch.aten.slice.Tensor %8575, %int3_6192, %int0_6193, %int9223372036854775807_6194, %int1_6195 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8576, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8577 = torch_c.to_builtin_tensor %8278 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6196 = arith.constant 1 : index
    %dim_6197 = tensor.dim %8577, %c1_6196 : tensor<4x?x1x128xf16>
    %8578 = flow.tensor.bitcast %8577 : tensor<4x?x1x128xf16>{%dim_6197} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6197}
    %8579 = torch_c.from_builtin_tensor %8578 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8580 = torch.aten.mul.Tensor %8579, %8576 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8581 = torch_c.to_builtin_tensor %8580 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6198 = arith.constant 1 : index
    %dim_6199 = tensor.dim %8581, %c1_6198 : tensor<4x?x1x64xcomplex<f32>>
    %8582 = flow.tensor.bitcast %8581 : tensor<4x?x1x64xcomplex<f32>>{%dim_6199} -> tensor<4x?x1x128xf32>{%dim_6199}
    %8583 = torch_c.from_builtin_tensor %8582 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6200 = torch.constant.int 5
    %8584 = torch.prims.convert_element_type %8583, %int5_6200 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6201 = torch.constant.int 1
    %8585 = torch.aten.size.int %8180, %int1_6201 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6202 = torch.constant.int 0
    %8586 = torch.aten.add.int %int0_6202, %8585 : !torch.int, !torch.int -> !torch.int
    %int0_6203 = torch.constant.int 0
    %int0_6204 = torch.constant.int 0
    %int1_6205 = torch.constant.int 1
    %8587 = torch.aten.slice.Tensor %8491, %int0_6203, %int0_6204, %8586, %int1_6205 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8587, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6206 = torch.constant.int 1
    %int0_6207 = torch.constant.int 0
    %int9223372036854775807_6208 = torch.constant.int 9223372036854775807
    %int1_6209 = torch.constant.int 1
    %8588 = torch.aten.slice.Tensor %8587, %int1_6206, %int0_6207, %int9223372036854775807_6208, %int1_6209 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8588, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6210 = torch.constant.int 0
    %8589 = torch.aten.unsqueeze %8588, %int0_6210 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8589, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6211 = torch.constant.int 2
    %8590 = torch.aten.unsqueeze %8589, %int2_6211 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8590, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6212 = torch.constant.int 3
    %int0_6213 = torch.constant.int 0
    %int9223372036854775807_6214 = torch.constant.int 9223372036854775807
    %int1_6215 = torch.constant.int 1
    %8591 = torch.aten.slice.Tensor %8590, %int3_6212, %int0_6213, %int9223372036854775807_6214, %int1_6215 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8591, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8592 = torch_c.to_builtin_tensor %8280 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6216 = arith.constant 1 : index
    %dim_6217 = tensor.dim %8592, %c1_6216 : tensor<4x?x1x128xf16>
    %8593 = flow.tensor.bitcast %8592 : tensor<4x?x1x128xf16>{%dim_6217} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6217}
    %8594 = torch_c.from_builtin_tensor %8593 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8595 = torch.aten.mul.Tensor %8594, %8591 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8596 = torch_c.to_builtin_tensor %8595 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6218 = arith.constant 1 : index
    %dim_6219 = tensor.dim %8596, %c1_6218 : tensor<4x?x1x64xcomplex<f32>>
    %8597 = flow.tensor.bitcast %8596 : tensor<4x?x1x64xcomplex<f32>>{%dim_6219} -> tensor<4x?x1x128xf32>{%dim_6219}
    %8598 = torch_c.from_builtin_tensor %8597 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6220 = torch.constant.int 5
    %8599 = torch.prims.convert_element_type %8598, %int5_6220 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_6221 = torch.constant.int 1
    %8600 = torch.aten.size.int %8186, %int1_6221 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_6222 = torch.constant.int 0
    %8601 = torch.aten.add.int %int0_6222, %8600 : !torch.int, !torch.int -> !torch.int
    %int0_6223 = torch.constant.int 0
    %int0_6224 = torch.constant.int 0
    %int1_6225 = torch.constant.int 1
    %8602 = torch.aten.slice.Tensor %8494, %int0_6223, %int0_6224, %8601, %int1_6225 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8602, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_6226 = torch.constant.int 1
    %int0_6227 = torch.constant.int 0
    %int9223372036854775807_6228 = torch.constant.int 9223372036854775807
    %int1_6229 = torch.constant.int 1
    %8603 = torch.aten.slice.Tensor %8602, %int1_6226, %int0_6227, %int9223372036854775807_6228, %int1_6229 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %8603, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_6230 = torch.constant.int 0
    %8604 = torch.aten.unsqueeze %8603, %int0_6230 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %8604, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_6231 = torch.constant.int 2
    %8605 = torch.aten.unsqueeze %8604, %int2_6231 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8605, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_6232 = torch.constant.int 3
    %int0_6233 = torch.constant.int 0
    %int9223372036854775807_6234 = torch.constant.int 9223372036854775807
    %int1_6235 = torch.constant.int 1
    %8606 = torch.aten.slice.Tensor %8605, %int3_6232, %int0_6233, %int9223372036854775807_6234, %int1_6235 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8606, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %8607 = torch_c.to_builtin_tensor %8282 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_6236 = arith.constant 1 : index
    %dim_6237 = tensor.dim %8607, %c1_6236 : tensor<4x?x1x128xf16>
    %8608 = flow.tensor.bitcast %8607 : tensor<4x?x1x128xf16>{%dim_6237} -> tensor<4x?x1x64xcomplex<f16>>{%dim_6237}
    %8609 = torch_c.from_builtin_tensor %8608 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %8609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %8610 = torch.aten.mul.Tensor %8609, %8606 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %8610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %8611 = torch_c.to_builtin_tensor %8610 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_6238 = arith.constant 1 : index
    %dim_6239 = tensor.dim %8611, %c1_6238 : tensor<4x?x1x64xcomplex<f32>>
    %8612 = flow.tensor.bitcast %8611 : tensor<4x?x1x64xcomplex<f32>>{%dim_6239} -> tensor<4x?x1x128xf32>{%dim_6239}
    %8613 = torch_c.from_builtin_tensor %8612 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %8613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_6240 = torch.constant.int 5
    %8614 = torch.prims.convert_element_type %8613, %int5_6240 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %8614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_6241 = torch.constant.int 64
    %8615 = torch.aten.mul.Scalar %2364, %int64_6241 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8615, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6242 = torch.constant.int 64
    %8616 = torch.aten.mul.Scalar %2367, %int64_6242 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8616, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6243 = torch.constant.int 64
    %8617 = torch.aten.mul.Scalar %2370, %int64_6243 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8617, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6244 = torch.constant.int 64
    %8618 = torch.aten.mul.Scalar %2373, %int64_6244 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8618, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6245 = torch.constant.int 64
    %8619 = torch.aten.mul.Scalar %2376, %int64_6245 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8619, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6246 = torch.constant.int 64
    %8620 = torch.aten.mul.Scalar %2379, %int64_6246 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8620, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6247 = torch.constant.int 64
    %8621 = torch.aten.mul.Scalar %2382, %int64_6247 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8621, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_6248 = torch.constant.int 64
    %8622 = torch.aten.mul.Scalar %2385, %int64_6248 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8622, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6249 = torch.constant.int 6
    %int1_6250 = torch.constant.int 1
    %8623 = torch.aten.add.Scalar %8615, %int6_6249, %int1_6250 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8623, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6251 = torch.constant.int 6
    %int1_6252 = torch.constant.int 1
    %8624 = torch.aten.add.Scalar %8616, %int6_6251, %int1_6252 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8624, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6253 = torch.constant.int 6
    %int1_6254 = torch.constant.int 1
    %8625 = torch.aten.add.Scalar %8617, %int6_6253, %int1_6254 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8625, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6255 = torch.constant.int 6
    %int1_6256 = torch.constant.int 1
    %8626 = torch.aten.add.Scalar %8618, %int6_6255, %int1_6256 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8626, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6257 = torch.constant.int 6
    %int1_6258 = torch.constant.int 1
    %8627 = torch.aten.add.Scalar %8619, %int6_6257, %int1_6258 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8627, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6259 = torch.constant.int 6
    %int1_6260 = torch.constant.int 1
    %8628 = torch.aten.add.Scalar %8620, %int6_6259, %int1_6260 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8628, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6261 = torch.constant.int 6
    %int1_6262 = torch.constant.int 1
    %8629 = torch.aten.add.Scalar %8621, %int6_6261, %int1_6262 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8629, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int6_6263 = torch.constant.int 6
    %int1_6264 = torch.constant.int 1
    %8630 = torch.aten.add.Scalar %8622, %int6_6263, %int1_6264 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8630, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6265 = torch.constant.int 4
    %int16_6266 = torch.constant.int 16
    %int1_6267 = torch.constant.int 1
    %int128_6268 = torch.constant.int 128
    %8631 = torch.prim.ListConstruct %int4_6265, %3095, %int16_6266, %int1_6267, %int128_6268 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8632 = torch.aten.view %8509, %8631 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8632, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6269 = torch.constant.int 4
    %int16_6270 = torch.constant.int 16
    %int1_6271 = torch.constant.int 1
    %int128_6272 = torch.constant.int 128
    %8633 = torch.prim.ListConstruct %int4_6269, %3095, %int16_6270, %int1_6271, %int128_6272 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8634 = torch.aten.view %8524, %8633 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8634, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6273 = torch.constant.int 4
    %int16_6274 = torch.constant.int 16
    %int1_6275 = torch.constant.int 1
    %int128_6276 = torch.constant.int 128
    %8635 = torch.prim.ListConstruct %int4_6273, %3095, %int16_6274, %int1_6275, %int128_6276 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8636 = torch.aten.view %8539, %8635 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8636, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6277 = torch.constant.int 4
    %int16_6278 = torch.constant.int 16
    %int1_6279 = torch.constant.int 1
    %int128_6280 = torch.constant.int 128
    %8637 = torch.prim.ListConstruct %int4_6277, %3095, %int16_6278, %int1_6279, %int128_6280 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8638 = torch.aten.view %8554, %8637 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8638, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6281 = torch.constant.int 4
    %int16_6282 = torch.constant.int 16
    %int1_6283 = torch.constant.int 1
    %int128_6284 = torch.constant.int 128
    %8639 = torch.prim.ListConstruct %int4_6281, %3095, %int16_6282, %int1_6283, %int128_6284 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8640 = torch.aten.view %8569, %8639 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8640, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6285 = torch.constant.int 4
    %int16_6286 = torch.constant.int 16
    %int1_6287 = torch.constant.int 1
    %int128_6288 = torch.constant.int 128
    %8641 = torch.prim.ListConstruct %int4_6285, %3095, %int16_6286, %int1_6287, %int128_6288 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8642 = torch.aten.view %8584, %8641 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8642, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6289 = torch.constant.int 4
    %int16_6290 = torch.constant.int 16
    %int1_6291 = torch.constant.int 1
    %int128_6292 = torch.constant.int 128
    %8643 = torch.prim.ListConstruct %int4_6289, %3095, %int16_6290, %int1_6291, %int128_6292 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8644 = torch.aten.view %8599, %8643 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8644, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6293 = torch.constant.int 4
    %int16_6294 = torch.constant.int 16
    %int1_6295 = torch.constant.int 1
    %int128_6296 = torch.constant.int 128
    %8645 = torch.prim.ListConstruct %int4_6293, %3095, %int16_6294, %int1_6295, %int128_6296 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8646 = torch.aten.view %8614, %8645 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8646, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6297 = torch.constant.int 4
    %8647 = torch.aten.mul.int %int4_6297, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6298 = torch.constant.int 16
    %int1_6299 = torch.constant.int 1
    %int128_6300 = torch.constant.int 128
    %8648 = torch.prim.ListConstruct %8647, %int16_6298, %int1_6299, %int128_6300 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8649 = torch.aten.view %8632, %8648 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8649, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6301 = torch.constant.int 4
    %8650 = torch.aten.mul.int %int4_6301, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6302 = torch.constant.int 16
    %int1_6303 = torch.constant.int 1
    %int128_6304 = torch.constant.int 128
    %8651 = torch.prim.ListConstruct %8650, %int16_6302, %int1_6303, %int128_6304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8652 = torch.aten.view %8634, %8651 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8652, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6305 = torch.constant.int 4
    %8653 = torch.aten.mul.int %int4_6305, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6306 = torch.constant.int 16
    %int1_6307 = torch.constant.int 1
    %int128_6308 = torch.constant.int 128
    %8654 = torch.prim.ListConstruct %8653, %int16_6306, %int1_6307, %int128_6308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8655 = torch.aten.view %8636, %8654 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8655, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6309 = torch.constant.int 4
    %8656 = torch.aten.mul.int %int4_6309, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6310 = torch.constant.int 16
    %int1_6311 = torch.constant.int 1
    %int128_6312 = torch.constant.int 128
    %8657 = torch.prim.ListConstruct %8656, %int16_6310, %int1_6311, %int128_6312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8658 = torch.aten.view %8638, %8657 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8658, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6313 = torch.constant.int 4
    %8659 = torch.aten.mul.int %int4_6313, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6314 = torch.constant.int 16
    %int1_6315 = torch.constant.int 1
    %int128_6316 = torch.constant.int 128
    %8660 = torch.prim.ListConstruct %8659, %int16_6314, %int1_6315, %int128_6316 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8661 = torch.aten.view %8640, %8660 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8661, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6317 = torch.constant.int 4
    %8662 = torch.aten.mul.int %int4_6317, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6318 = torch.constant.int 16
    %int1_6319 = torch.constant.int 1
    %int128_6320 = torch.constant.int 128
    %8663 = torch.prim.ListConstruct %8662, %int16_6318, %int1_6319, %int128_6320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8664 = torch.aten.view %8642, %8663 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8664, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6321 = torch.constant.int 4
    %8665 = torch.aten.mul.int %int4_6321, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6322 = torch.constant.int 16
    %int1_6323 = torch.constant.int 1
    %int128_6324 = torch.constant.int 128
    %8666 = torch.prim.ListConstruct %8665, %int16_6322, %int1_6323, %int128_6324 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8667 = torch.aten.view %8644, %8666 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8667, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6325 = torch.constant.int 4
    %8668 = torch.aten.mul.int %int4_6325, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6326 = torch.constant.int 16
    %int1_6327 = torch.constant.int 1
    %int128_6328 = torch.constant.int 128
    %8669 = torch.prim.ListConstruct %8668, %int16_6326, %int1_6327, %int128_6328 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8670 = torch.aten.view %8646, %8669 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8670, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6329 = torch.constant.int 4
    %8671 = torch.aten.mul.int %int4_6329, %3095 : !torch.int, !torch.int -> !torch.int
    %8672 = torch.prim.ListConstruct %8671 : (!torch.int) -> !torch.list<int>
    %8673 = torch.aten.view %8623, %8672 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8673, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6330 = torch.constant.int 4
    %8674 = torch.aten.mul.int %int4_6330, %3095 : !torch.int, !torch.int -> !torch.int
    %8675 = torch.prim.ListConstruct %8674 : (!torch.int) -> !torch.list<int>
    %8676 = torch.aten.view %8624, %8675 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8676, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6331 = torch.constant.int 4
    %8677 = torch.aten.mul.int %int4_6331, %3095 : !torch.int, !torch.int -> !torch.int
    %8678 = torch.prim.ListConstruct %8677 : (!torch.int) -> !torch.list<int>
    %8679 = torch.aten.view %8625, %8678 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8679, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6332 = torch.constant.int 4
    %8680 = torch.aten.mul.int %int4_6332, %3095 : !torch.int, !torch.int -> !torch.int
    %8681 = torch.prim.ListConstruct %8680 : (!torch.int) -> !torch.list<int>
    %8682 = torch.aten.view %8626, %8681 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8682, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6333 = torch.constant.int 4
    %8683 = torch.aten.mul.int %int4_6333, %3095 : !torch.int, !torch.int -> !torch.int
    %8684 = torch.prim.ListConstruct %8683 : (!torch.int) -> !torch.list<int>
    %8685 = torch.aten.view %8627, %8684 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8685, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6334 = torch.constant.int 4
    %8686 = torch.aten.mul.int %int4_6334, %3095 : !torch.int, !torch.int -> !torch.int
    %8687 = torch.prim.ListConstruct %8686 : (!torch.int) -> !torch.list<int>
    %8688 = torch.aten.view %8628, %8687 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8688, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6335 = torch.constant.int 4
    %8689 = torch.aten.mul.int %int4_6335, %3095 : !torch.int, !torch.int -> !torch.int
    %8690 = torch.prim.ListConstruct %8689 : (!torch.int) -> !torch.list<int>
    %8691 = torch.aten.view %8629, %8690 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8691, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6336 = torch.constant.int 4
    %8692 = torch.aten.mul.int %int4_6336, %3095 : !torch.int, !torch.int -> !torch.int
    %8693 = torch.prim.ListConstruct %8692 : (!torch.int) -> !torch.list<int>
    %8694 = torch.aten.view %8630, %8693 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8694, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6337 = torch.constant.int 4
    %int16_6338 = torch.constant.int 16
    %int1_6339 = torch.constant.int 1
    %int128_6340 = torch.constant.int 128
    %8695 = torch.prim.ListConstruct %int4_6337, %3095, %int16_6338, %int1_6339, %int128_6340 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8696 = torch.aten.view %8284, %8695 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8696, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6341 = torch.constant.int 4
    %int16_6342 = torch.constant.int 16
    %int1_6343 = torch.constant.int 1
    %int128_6344 = torch.constant.int 128
    %8697 = torch.prim.ListConstruct %int4_6341, %3095, %int16_6342, %int1_6343, %int128_6344 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8698 = torch.aten.view %8286, %8697 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8698, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6345 = torch.constant.int 4
    %int16_6346 = torch.constant.int 16
    %int1_6347 = torch.constant.int 1
    %int128_6348 = torch.constant.int 128
    %8699 = torch.prim.ListConstruct %int4_6345, %3095, %int16_6346, %int1_6347, %int128_6348 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8700 = torch.aten.view %8288, %8699 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8700, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6349 = torch.constant.int 4
    %int16_6350 = torch.constant.int 16
    %int1_6351 = torch.constant.int 1
    %int128_6352 = torch.constant.int 128
    %8701 = torch.prim.ListConstruct %int4_6349, %3095, %int16_6350, %int1_6351, %int128_6352 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8702 = torch.aten.view %8290, %8701 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8702, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6353 = torch.constant.int 4
    %int16_6354 = torch.constant.int 16
    %int1_6355 = torch.constant.int 1
    %int128_6356 = torch.constant.int 128
    %8703 = torch.prim.ListConstruct %int4_6353, %3095, %int16_6354, %int1_6355, %int128_6356 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8704 = torch.aten.view %8292, %8703 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8704, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6357 = torch.constant.int 4
    %int16_6358 = torch.constant.int 16
    %int1_6359 = torch.constant.int 1
    %int128_6360 = torch.constant.int 128
    %8705 = torch.prim.ListConstruct %int4_6357, %3095, %int16_6358, %int1_6359, %int128_6360 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8706 = torch.aten.view %8294, %8705 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8706, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6361 = torch.constant.int 4
    %int16_6362 = torch.constant.int 16
    %int1_6363 = torch.constant.int 1
    %int128_6364 = torch.constant.int 128
    %8707 = torch.prim.ListConstruct %int4_6361, %3095, %int16_6362, %int1_6363, %int128_6364 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8708 = torch.aten.view %8296, %8707 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8708, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6365 = torch.constant.int 4
    %int16_6366 = torch.constant.int 16
    %int1_6367 = torch.constant.int 1
    %int128_6368 = torch.constant.int 128
    %8709 = torch.prim.ListConstruct %int4_6365, %3095, %int16_6366, %int1_6367, %int128_6368 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8710 = torch.aten.view %8298, %8709 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %8710, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_6369 = torch.constant.int 4
    %8711 = torch.aten.mul.int %int4_6369, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6370 = torch.constant.int 16
    %int1_6371 = torch.constant.int 1
    %int128_6372 = torch.constant.int 128
    %8712 = torch.prim.ListConstruct %8711, %int16_6370, %int1_6371, %int128_6372 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8713 = torch.aten.view %8696, %8712 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8713, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6373 = torch.constant.int 4
    %8714 = torch.aten.mul.int %int4_6373, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6374 = torch.constant.int 16
    %int1_6375 = torch.constant.int 1
    %int128_6376 = torch.constant.int 128
    %8715 = torch.prim.ListConstruct %8714, %int16_6374, %int1_6375, %int128_6376 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8716 = torch.aten.view %8698, %8715 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8716, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6377 = torch.constant.int 4
    %8717 = torch.aten.mul.int %int4_6377, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6378 = torch.constant.int 16
    %int1_6379 = torch.constant.int 1
    %int128_6380 = torch.constant.int 128
    %8718 = torch.prim.ListConstruct %8717, %int16_6378, %int1_6379, %int128_6380 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8719 = torch.aten.view %8700, %8718 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8719, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6381 = torch.constant.int 4
    %8720 = torch.aten.mul.int %int4_6381, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6382 = torch.constant.int 16
    %int1_6383 = torch.constant.int 1
    %int128_6384 = torch.constant.int 128
    %8721 = torch.prim.ListConstruct %8720, %int16_6382, %int1_6383, %int128_6384 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8722 = torch.aten.view %8702, %8721 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8722, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6385 = torch.constant.int 4
    %8723 = torch.aten.mul.int %int4_6385, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6386 = torch.constant.int 16
    %int1_6387 = torch.constant.int 1
    %int128_6388 = torch.constant.int 128
    %8724 = torch.prim.ListConstruct %8723, %int16_6386, %int1_6387, %int128_6388 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8725 = torch.aten.view %8704, %8724 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8725, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6389 = torch.constant.int 4
    %8726 = torch.aten.mul.int %int4_6389, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6390 = torch.constant.int 16
    %int1_6391 = torch.constant.int 1
    %int128_6392 = torch.constant.int 128
    %8727 = torch.prim.ListConstruct %8726, %int16_6390, %int1_6391, %int128_6392 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8728 = torch.aten.view %8706, %8727 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8728, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6393 = torch.constant.int 4
    %8729 = torch.aten.mul.int %int4_6393, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6394 = torch.constant.int 16
    %int1_6395 = torch.constant.int 1
    %int128_6396 = torch.constant.int 128
    %8730 = torch.prim.ListConstruct %8729, %int16_6394, %int1_6395, %int128_6396 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8731 = torch.aten.view %8708, %8730 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8731, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_6397 = torch.constant.int 4
    %8732 = torch.aten.mul.int %int4_6397, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_6398 = torch.constant.int 16
    %int1_6399 = torch.constant.int 1
    %int128_6400 = torch.constant.int 128
    %8733 = torch.prim.ListConstruct %8732, %int16_6398, %int1_6399, %int128_6400 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8734 = torch.aten.view %8710, %8733 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8734, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_6401 = torch.constant.int 1
    %int1_6402 = torch.constant.int 1
    %8735 = torch.aten.add.Scalar %8623, %int1_6401, %int1_6402 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8735, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6403 = torch.constant.int 1
    %int1_6404 = torch.constant.int 1
    %8736 = torch.aten.add.Scalar %8624, %int1_6403, %int1_6404 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8736, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6405 = torch.constant.int 1
    %int1_6406 = torch.constant.int 1
    %8737 = torch.aten.add.Scalar %8625, %int1_6405, %int1_6406 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8737, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6407 = torch.constant.int 1
    %int1_6408 = torch.constant.int 1
    %8738 = torch.aten.add.Scalar %8626, %int1_6407, %int1_6408 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8738, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6409 = torch.constant.int 1
    %int1_6410 = torch.constant.int 1
    %8739 = torch.aten.add.Scalar %8627, %int1_6409, %int1_6410 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8739, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6411 = torch.constant.int 1
    %int1_6412 = torch.constant.int 1
    %8740 = torch.aten.add.Scalar %8628, %int1_6411, %int1_6412 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8740, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6413 = torch.constant.int 1
    %int1_6414 = torch.constant.int 1
    %8741 = torch.aten.add.Scalar %8629, %int1_6413, %int1_6414 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8741, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_6415 = torch.constant.int 1
    %int1_6416 = torch.constant.int 1
    %8742 = torch.aten.add.Scalar %8630, %int1_6415, %int1_6416 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %8742, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_6417 = torch.constant.int 4
    %8743 = torch.aten.mul.int %int4_6417, %3095 : !torch.int, !torch.int -> !torch.int
    %8744 = torch.prim.ListConstruct %8743 : (!torch.int) -> !torch.list<int>
    %8745 = torch.aten.view %8735, %8744 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8745, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6418 = torch.constant.int 4
    %8746 = torch.aten.mul.int %int4_6418, %3095 : !torch.int, !torch.int -> !torch.int
    %8747 = torch.prim.ListConstruct %8746 : (!torch.int) -> !torch.list<int>
    %8748 = torch.aten.view %8736, %8747 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8748, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6419 = torch.constant.int 4
    %8749 = torch.aten.mul.int %int4_6419, %3095 : !torch.int, !torch.int -> !torch.int
    %8750 = torch.prim.ListConstruct %8749 : (!torch.int) -> !torch.list<int>
    %8751 = torch.aten.view %8737, %8750 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8751, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6420 = torch.constant.int 4
    %8752 = torch.aten.mul.int %int4_6420, %3095 : !torch.int, !torch.int -> !torch.int
    %8753 = torch.prim.ListConstruct %8752 : (!torch.int) -> !torch.list<int>
    %8754 = torch.aten.view %8738, %8753 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8754, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6421 = torch.constant.int 4
    %8755 = torch.aten.mul.int %int4_6421, %3095 : !torch.int, !torch.int -> !torch.int
    %8756 = torch.prim.ListConstruct %8755 : (!torch.int) -> !torch.list<int>
    %8757 = torch.aten.view %8739, %8756 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8757, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6422 = torch.constant.int 4
    %8758 = torch.aten.mul.int %int4_6422, %3095 : !torch.int, !torch.int -> !torch.int
    %8759 = torch.prim.ListConstruct %8758 : (!torch.int) -> !torch.list<int>
    %8760 = torch.aten.view %8740, %8759 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8760, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6423 = torch.constant.int 4
    %8761 = torch.aten.mul.int %int4_6423, %3095 : !torch.int, !torch.int -> !torch.int
    %8762 = torch.prim.ListConstruct %8761 : (!torch.int) -> !torch.list<int>
    %8763 = torch.aten.view %8741, %8762 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8763, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_6424 = torch.constant.int 4
    %8764 = torch.aten.mul.int %int4_6424, %3095 : !torch.int, !torch.int -> !torch.int
    %8765 = torch.prim.ListConstruct %8764 : (!torch.int) -> !torch.list<int>
    %8766 = torch.aten.view %8742, %8765 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8766, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %8767 = torch.prim.ListConstruct %8673, %8745 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6425 = torch.constant.int 0
    %8768 = torch.aten.cat %8767, %int0_6425 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8768, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8769 = torch.prim.ListConstruct %8676, %8748 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6426 = torch.constant.int 0
    %8770 = torch.aten.cat %8769, %int0_6426 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8770, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8771 = torch.prim.ListConstruct %8679, %8751 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6427 = torch.constant.int 0
    %8772 = torch.aten.cat %8771, %int0_6427 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8772, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8773 = torch.prim.ListConstruct %8682, %8754 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6428 = torch.constant.int 0
    %8774 = torch.aten.cat %8773, %int0_6428 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8774, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8775 = torch.prim.ListConstruct %8685, %8757 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6429 = torch.constant.int 0
    %8776 = torch.aten.cat %8775, %int0_6429 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8776, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8777 = torch.prim.ListConstruct %8688, %8760 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6430 = torch.constant.int 0
    %8778 = torch.aten.cat %8777, %int0_6430 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8778, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8779 = torch.prim.ListConstruct %8691, %8763 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6431 = torch.constant.int 0
    %8780 = torch.aten.cat %8779, %int0_6431 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8780, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8781 = torch.prim.ListConstruct %8694, %8766 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_6432 = torch.constant.int 0
    %8782 = torch.aten.cat %8781, %int0_6432 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %8782, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %8783 = torch.prim.ListConstruct %8649, %8713 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6433 = torch.constant.int 0
    %8784 = torch.aten.cat %8783, %int0_6433 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8784, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8785 = torch.prim.ListConstruct %8652, %8716 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6434 = torch.constant.int 0
    %8786 = torch.aten.cat %8785, %int0_6434 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8786, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8787 = torch.prim.ListConstruct %8655, %8719 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6435 = torch.constant.int 0
    %8788 = torch.aten.cat %8787, %int0_6435 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8788, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8789 = torch.prim.ListConstruct %8658, %8722 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6436 = torch.constant.int 0
    %8790 = torch.aten.cat %8789, %int0_6436 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8790, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8791 = torch.prim.ListConstruct %8661, %8725 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6437 = torch.constant.int 0
    %8792 = torch.aten.cat %8791, %int0_6437 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8792, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8793 = torch.prim.ListConstruct %8664, %8728 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6438 = torch.constant.int 0
    %8794 = torch.aten.cat %8793, %int0_6438 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8794, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8795 = torch.prim.ListConstruct %8667, %8731 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6439 = torch.constant.int 0
    %8796 = torch.aten.cat %8795, %int0_6439 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8796, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8797 = torch.prim.ListConstruct %8670, %8734 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_6440 = torch.constant.int 0
    %8798 = torch.aten.cat %8797, %int0_6440 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8798, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6441 = torch.constant.int 32
    %int2_6442 = torch.constant.int 2
    %int16_6443 = torch.constant.int 16
    %int1_6444 = torch.constant.int 1
    %int128_6445 = torch.constant.int 128
    %8799 = torch.prim.ListConstruct %3023, %int32_6441, %int2_6442, %int16_6443, %int1_6444, %int128_6445 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8800 = torch.aten.view %6949, %8799 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8800, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6446 = torch.constant.int 32
    %8801 = torch.aten.mul.int %3023, %int32_6446 : !torch.int, !torch.int -> !torch.int
    %int2_6447 = torch.constant.int 2
    %8802 = torch.aten.mul.int %8801, %int2_6447 : !torch.int, !torch.int -> !torch.int
    %int16_6448 = torch.constant.int 16
    %int1_6449 = torch.constant.int 1
    %int128_6450 = torch.constant.int 128
    %8803 = torch.prim.ListConstruct %8802, %int16_6448, %int1_6449, %int128_6450 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8804 = torch.aten.view %8800, %8803 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8804, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8805 = torch.prim.ListConstruct %8768 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6451 = torch.constant.bool false
    %8806 = torch.aten.index_put %8804, %8805, %8784, %false_6451 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8806, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6452 = torch.constant.int 32
    %int2_6453 = torch.constant.int 2
    %int16_6454 = torch.constant.int 16
    %int1_6455 = torch.constant.int 1
    %int128_6456 = torch.constant.int 128
    %8807 = torch.prim.ListConstruct %3023, %int32_6452, %int2_6453, %int16_6454, %int1_6455, %int128_6456 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8808 = torch.aten.view %8806, %8807 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8808, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6457 = torch.constant.int 131072
    %8809 = torch.prim.ListConstruct %3023, %int131072_6457 : (!torch.int, !torch.int) -> !torch.list<int>
    %8810 = torch.aten.view %8808, %8809 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8810, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6458 = torch.constant.int 32
    %int2_6459 = torch.constant.int 2
    %int16_6460 = torch.constant.int 16
    %int1_6461 = torch.constant.int 1
    %int128_6462 = torch.constant.int 128
    %8811 = torch.prim.ListConstruct %3026, %int32_6458, %int2_6459, %int16_6460, %int1_6461, %int128_6462 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8812 = torch.aten.view %6961, %8811 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8812, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6463 = torch.constant.int 32
    %8813 = torch.aten.mul.int %3026, %int32_6463 : !torch.int, !torch.int -> !torch.int
    %int2_6464 = torch.constant.int 2
    %8814 = torch.aten.mul.int %8813, %int2_6464 : !torch.int, !torch.int -> !torch.int
    %int16_6465 = torch.constant.int 16
    %int1_6466 = torch.constant.int 1
    %int128_6467 = torch.constant.int 128
    %8815 = torch.prim.ListConstruct %8814, %int16_6465, %int1_6466, %int128_6467 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8816 = torch.aten.view %8812, %8815 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8816, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8817 = torch.prim.ListConstruct %8770 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6468 = torch.constant.bool false
    %8818 = torch.aten.index_put %8816, %8817, %8786, %false_6468 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8818, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6469 = torch.constant.int 32
    %int2_6470 = torch.constant.int 2
    %int16_6471 = torch.constant.int 16
    %int1_6472 = torch.constant.int 1
    %int128_6473 = torch.constant.int 128
    %8819 = torch.prim.ListConstruct %3026, %int32_6469, %int2_6470, %int16_6471, %int1_6472, %int128_6473 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8820 = torch.aten.view %8818, %8819 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8820, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6474 = torch.constant.int 131072
    %8821 = torch.prim.ListConstruct %3026, %int131072_6474 : (!torch.int, !torch.int) -> !torch.list<int>
    %8822 = torch.aten.view %8820, %8821 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8822, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6475 = torch.constant.int 32
    %int2_6476 = torch.constant.int 2
    %int16_6477 = torch.constant.int 16
    %int1_6478 = torch.constant.int 1
    %int128_6479 = torch.constant.int 128
    %8823 = torch.prim.ListConstruct %3029, %int32_6475, %int2_6476, %int16_6477, %int1_6478, %int128_6479 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8824 = torch.aten.view %6973, %8823 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8824, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6480 = torch.constant.int 32
    %8825 = torch.aten.mul.int %3029, %int32_6480 : !torch.int, !torch.int -> !torch.int
    %int2_6481 = torch.constant.int 2
    %8826 = torch.aten.mul.int %8825, %int2_6481 : !torch.int, !torch.int -> !torch.int
    %int16_6482 = torch.constant.int 16
    %int1_6483 = torch.constant.int 1
    %int128_6484 = torch.constant.int 128
    %8827 = torch.prim.ListConstruct %8826, %int16_6482, %int1_6483, %int128_6484 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8828 = torch.aten.view %8824, %8827 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8828, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8829 = torch.prim.ListConstruct %8772 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6485 = torch.constant.bool false
    %8830 = torch.aten.index_put %8828, %8829, %8788, %false_6485 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8830, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6486 = torch.constant.int 32
    %int2_6487 = torch.constant.int 2
    %int16_6488 = torch.constant.int 16
    %int1_6489 = torch.constant.int 1
    %int128_6490 = torch.constant.int 128
    %8831 = torch.prim.ListConstruct %3029, %int32_6486, %int2_6487, %int16_6488, %int1_6489, %int128_6490 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8832 = torch.aten.view %8830, %8831 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8832, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6491 = torch.constant.int 131072
    %8833 = torch.prim.ListConstruct %3029, %int131072_6491 : (!torch.int, !torch.int) -> !torch.list<int>
    %8834 = torch.aten.view %8832, %8833 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8834, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6492 = torch.constant.int 32
    %int2_6493 = torch.constant.int 2
    %int16_6494 = torch.constant.int 16
    %int1_6495 = torch.constant.int 1
    %int128_6496 = torch.constant.int 128
    %8835 = torch.prim.ListConstruct %3032, %int32_6492, %int2_6493, %int16_6494, %int1_6495, %int128_6496 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8836 = torch.aten.view %6985, %8835 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8836, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6497 = torch.constant.int 32
    %8837 = torch.aten.mul.int %3032, %int32_6497 : !torch.int, !torch.int -> !torch.int
    %int2_6498 = torch.constant.int 2
    %8838 = torch.aten.mul.int %8837, %int2_6498 : !torch.int, !torch.int -> !torch.int
    %int16_6499 = torch.constant.int 16
    %int1_6500 = torch.constant.int 1
    %int128_6501 = torch.constant.int 128
    %8839 = torch.prim.ListConstruct %8838, %int16_6499, %int1_6500, %int128_6501 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8840 = torch.aten.view %8836, %8839 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8840, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8841 = torch.prim.ListConstruct %8774 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6502 = torch.constant.bool false
    %8842 = torch.aten.index_put %8840, %8841, %8790, %false_6502 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8842, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6503 = torch.constant.int 32
    %int2_6504 = torch.constant.int 2
    %int16_6505 = torch.constant.int 16
    %int1_6506 = torch.constant.int 1
    %int128_6507 = torch.constant.int 128
    %8843 = torch.prim.ListConstruct %3032, %int32_6503, %int2_6504, %int16_6505, %int1_6506, %int128_6507 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8844 = torch.aten.view %8842, %8843 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8844, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6508 = torch.constant.int 131072
    %8845 = torch.prim.ListConstruct %3032, %int131072_6508 : (!torch.int, !torch.int) -> !torch.list<int>
    %8846 = torch.aten.view %8844, %8845 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8846, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6509 = torch.constant.int 32
    %int2_6510 = torch.constant.int 2
    %int16_6511 = torch.constant.int 16
    %int1_6512 = torch.constant.int 1
    %int128_6513 = torch.constant.int 128
    %8847 = torch.prim.ListConstruct %3035, %int32_6509, %int2_6510, %int16_6511, %int1_6512, %int128_6513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8848 = torch.aten.view %6997, %8847 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8848, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6514 = torch.constant.int 32
    %8849 = torch.aten.mul.int %3035, %int32_6514 : !torch.int, !torch.int -> !torch.int
    %int2_6515 = torch.constant.int 2
    %8850 = torch.aten.mul.int %8849, %int2_6515 : !torch.int, !torch.int -> !torch.int
    %int16_6516 = torch.constant.int 16
    %int1_6517 = torch.constant.int 1
    %int128_6518 = torch.constant.int 128
    %8851 = torch.prim.ListConstruct %8850, %int16_6516, %int1_6517, %int128_6518 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8852 = torch.aten.view %8848, %8851 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8852, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8853 = torch.prim.ListConstruct %8776 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6519 = torch.constant.bool false
    %8854 = torch.aten.index_put %8852, %8853, %8792, %false_6519 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8854, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6520 = torch.constant.int 32
    %int2_6521 = torch.constant.int 2
    %int16_6522 = torch.constant.int 16
    %int1_6523 = torch.constant.int 1
    %int128_6524 = torch.constant.int 128
    %8855 = torch.prim.ListConstruct %3035, %int32_6520, %int2_6521, %int16_6522, %int1_6523, %int128_6524 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8856 = torch.aten.view %8854, %8855 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8856, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6525 = torch.constant.int 131072
    %8857 = torch.prim.ListConstruct %3035, %int131072_6525 : (!torch.int, !torch.int) -> !torch.list<int>
    %8858 = torch.aten.view %8856, %8857 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8858, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6526 = torch.constant.int 32
    %int2_6527 = torch.constant.int 2
    %int16_6528 = torch.constant.int 16
    %int1_6529 = torch.constant.int 1
    %int128_6530 = torch.constant.int 128
    %8859 = torch.prim.ListConstruct %3038, %int32_6526, %int2_6527, %int16_6528, %int1_6529, %int128_6530 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8860 = torch.aten.view %7009, %8859 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8860, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6531 = torch.constant.int 32
    %8861 = torch.aten.mul.int %3038, %int32_6531 : !torch.int, !torch.int -> !torch.int
    %int2_6532 = torch.constant.int 2
    %8862 = torch.aten.mul.int %8861, %int2_6532 : !torch.int, !torch.int -> !torch.int
    %int16_6533 = torch.constant.int 16
    %int1_6534 = torch.constant.int 1
    %int128_6535 = torch.constant.int 128
    %8863 = torch.prim.ListConstruct %8862, %int16_6533, %int1_6534, %int128_6535 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8864 = torch.aten.view %8860, %8863 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8864, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8865 = torch.prim.ListConstruct %8778 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6536 = torch.constant.bool false
    %8866 = torch.aten.index_put %8864, %8865, %8794, %false_6536 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8866, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6537 = torch.constant.int 32
    %int2_6538 = torch.constant.int 2
    %int16_6539 = torch.constant.int 16
    %int1_6540 = torch.constant.int 1
    %int128_6541 = torch.constant.int 128
    %8867 = torch.prim.ListConstruct %3038, %int32_6537, %int2_6538, %int16_6539, %int1_6540, %int128_6541 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8868 = torch.aten.view %8866, %8867 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8868, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6542 = torch.constant.int 131072
    %8869 = torch.prim.ListConstruct %3038, %int131072_6542 : (!torch.int, !torch.int) -> !torch.list<int>
    %8870 = torch.aten.view %8868, %8869 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8870, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6543 = torch.constant.int 32
    %int2_6544 = torch.constant.int 2
    %int16_6545 = torch.constant.int 16
    %int1_6546 = torch.constant.int 1
    %int128_6547 = torch.constant.int 128
    %8871 = torch.prim.ListConstruct %3041, %int32_6543, %int2_6544, %int16_6545, %int1_6546, %int128_6547 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8872 = torch.aten.view %7021, %8871 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8872, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6548 = torch.constant.int 32
    %8873 = torch.aten.mul.int %3041, %int32_6548 : !torch.int, !torch.int -> !torch.int
    %int2_6549 = torch.constant.int 2
    %8874 = torch.aten.mul.int %8873, %int2_6549 : !torch.int, !torch.int -> !torch.int
    %int16_6550 = torch.constant.int 16
    %int1_6551 = torch.constant.int 1
    %int128_6552 = torch.constant.int 128
    %8875 = torch.prim.ListConstruct %8874, %int16_6550, %int1_6551, %int128_6552 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8876 = torch.aten.view %8872, %8875 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8876, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8877 = torch.prim.ListConstruct %8780 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6553 = torch.constant.bool false
    %8878 = torch.aten.index_put %8876, %8877, %8796, %false_6553 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8878, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6554 = torch.constant.int 32
    %int2_6555 = torch.constant.int 2
    %int16_6556 = torch.constant.int 16
    %int1_6557 = torch.constant.int 1
    %int128_6558 = torch.constant.int 128
    %8879 = torch.prim.ListConstruct %3041, %int32_6554, %int2_6555, %int16_6556, %int1_6557, %int128_6558 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8880 = torch.aten.view %8878, %8879 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8880, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6559 = torch.constant.int 131072
    %8881 = torch.prim.ListConstruct %3041, %int131072_6559 : (!torch.int, !torch.int) -> !torch.list<int>
    %8882 = torch.aten.view %8880, %8881 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8882, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_6560 = torch.constant.int 32
    %int2_6561 = torch.constant.int 2
    %int16_6562 = torch.constant.int 16
    %int1_6563 = torch.constant.int 1
    %int128_6564 = torch.constant.int 128
    %8883 = torch.prim.ListConstruct %3044, %int32_6560, %int2_6561, %int16_6562, %int1_6563, %int128_6564 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8884 = torch.aten.view %7033, %8883 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8884, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_6565 = torch.constant.int 32
    %8885 = torch.aten.mul.int %3044, %int32_6565 : !torch.int, !torch.int -> !torch.int
    %int2_6566 = torch.constant.int 2
    %8886 = torch.aten.mul.int %8885, %int2_6566 : !torch.int, !torch.int -> !torch.int
    %int16_6567 = torch.constant.int 16
    %int1_6568 = torch.constant.int 1
    %int128_6569 = torch.constant.int 128
    %8887 = torch.prim.ListConstruct %8886, %int16_6567, %int1_6568, %int128_6569 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8888 = torch.aten.view %8884, %8887 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8888, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %8889 = torch.prim.ListConstruct %8782 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_6570 = torch.constant.bool false
    %8890 = torch.aten.index_put %8888, %8889, %8798, %false_6570 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %8890, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_6571 = torch.constant.int 32
    %int2_6572 = torch.constant.int 2
    %int16_6573 = torch.constant.int 16
    %int1_6574 = torch.constant.int 1
    %int128_6575 = torch.constant.int 128
    %8891 = torch.prim.ListConstruct %3044, %int32_6571, %int2_6572, %int16_6573, %int1_6574, %int128_6575 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8892 = torch.aten.view %8890, %8891 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %8892, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_6576 = torch.constant.int 131072
    %8893 = torch.prim.ListConstruct %3044, %int131072_6576 : (!torch.int, !torch.int) -> !torch.list<int>
    %8894 = torch.aten.view %8892, %8893 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %8894, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_6577 = torch.constant.int -2
    %8895 = torch.aten.unsqueeze %8509, %int-2_6577 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6578 = torch.constant.int -2
    %8896 = torch.aten.unsqueeze %8524, %int-2_6578 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6579 = torch.constant.int -2
    %8897 = torch.aten.unsqueeze %8539, %int-2_6579 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6580 = torch.constant.int -2
    %8898 = torch.aten.unsqueeze %8554, %int-2_6580 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6581 = torch.constant.int -2
    %8899 = torch.aten.unsqueeze %8569, %int-2_6581 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6582 = torch.constant.int -2
    %8900 = torch.aten.unsqueeze %8584, %int-2_6582 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6583 = torch.constant.int -2
    %8901 = torch.aten.unsqueeze %8599, %int-2_6583 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6584 = torch.constant.int -2
    %8902 = torch.aten.unsqueeze %8614, %int-2_6584 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_6585 = torch.constant.int 4
    %int1_6586 = torch.constant.int 1
    %int4_6587 = torch.constant.int 4
    %int128_6588 = torch.constant.int 128
    %8903 = torch.prim.ListConstruct %int4_6585, %8495, %int1_6586, %int4_6587, %int128_6588 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6589 = torch.constant.bool false
    %8904 = torch.aten.expand %8895, %8903, %false_6589 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6590 = torch.constant.int 4
    %int1_6591 = torch.constant.int 1
    %int4_6592 = torch.constant.int 4
    %int128_6593 = torch.constant.int 128
    %8905 = torch.prim.ListConstruct %int4_6590, %8495, %int1_6591, %int4_6592, %int128_6593 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6594 = torch.constant.bool false
    %8906 = torch.aten.expand %8896, %8905, %false_6594 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6595 = torch.constant.int 4
    %int1_6596 = torch.constant.int 1
    %int4_6597 = torch.constant.int 4
    %int128_6598 = torch.constant.int 128
    %8907 = torch.prim.ListConstruct %int4_6595, %8495, %int1_6596, %int4_6597, %int128_6598 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6599 = torch.constant.bool false
    %8908 = torch.aten.expand %8897, %8907, %false_6599 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6600 = torch.constant.int 4
    %int1_6601 = torch.constant.int 1
    %int4_6602 = torch.constant.int 4
    %int128_6603 = torch.constant.int 128
    %8909 = torch.prim.ListConstruct %int4_6600, %8495, %int1_6601, %int4_6602, %int128_6603 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6604 = torch.constant.bool false
    %8910 = torch.aten.expand %8898, %8909, %false_6604 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6605 = torch.constant.int 4
    %int1_6606 = torch.constant.int 1
    %int4_6607 = torch.constant.int 4
    %int128_6608 = torch.constant.int 128
    %8911 = torch.prim.ListConstruct %int4_6605, %8495, %int1_6606, %int4_6607, %int128_6608 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6609 = torch.constant.bool false
    %8912 = torch.aten.expand %8899, %8911, %false_6609 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6610 = torch.constant.int 4
    %int1_6611 = torch.constant.int 1
    %int4_6612 = torch.constant.int 4
    %int128_6613 = torch.constant.int 128
    %8913 = torch.prim.ListConstruct %int4_6610, %8495, %int1_6611, %int4_6612, %int128_6613 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6614 = torch.constant.bool false
    %8914 = torch.aten.expand %8900, %8913, %false_6614 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6615 = torch.constant.int 4
    %int1_6616 = torch.constant.int 1
    %int4_6617 = torch.constant.int 4
    %int128_6618 = torch.constant.int 128
    %8915 = torch.prim.ListConstruct %int4_6615, %8495, %int1_6616, %int4_6617, %int128_6618 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6619 = torch.constant.bool false
    %8916 = torch.aten.expand %8901, %8915, %false_6619 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6620 = torch.constant.int 4
    %int1_6621 = torch.constant.int 1
    %int4_6622 = torch.constant.int 4
    %int128_6623 = torch.constant.int 128
    %8917 = torch.prim.ListConstruct %int4_6620, %8495, %int1_6621, %int4_6622, %int128_6623 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6624 = torch.constant.bool false
    %8918 = torch.aten.expand %8902, %8917, %false_6624 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6625 = torch.constant.int 4
    %int4_6626 = torch.constant.int 4
    %int128_6627 = torch.constant.int 128
    %8919 = torch.prim.ListConstruct %int4_6625, %8495, %int4_6626, %int128_6627 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8920 = torch.aten.view %8904, %8919 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6628 = torch.constant.int 4
    %int4_6629 = torch.constant.int 4
    %int128_6630 = torch.constant.int 128
    %8921 = torch.prim.ListConstruct %int4_6628, %8495, %int4_6629, %int128_6630 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8922 = torch.aten.view %8906, %8921 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6631 = torch.constant.int 4
    %int4_6632 = torch.constant.int 4
    %int128_6633 = torch.constant.int 128
    %8923 = torch.prim.ListConstruct %int4_6631, %8495, %int4_6632, %int128_6633 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8924 = torch.aten.view %8908, %8923 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6634 = torch.constant.int 4
    %int4_6635 = torch.constant.int 4
    %int128_6636 = torch.constant.int 128
    %8925 = torch.prim.ListConstruct %int4_6634, %8495, %int4_6635, %int128_6636 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8926 = torch.aten.view %8910, %8925 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6637 = torch.constant.int 4
    %int4_6638 = torch.constant.int 4
    %int128_6639 = torch.constant.int 128
    %8927 = torch.prim.ListConstruct %int4_6637, %8495, %int4_6638, %int128_6639 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8928 = torch.aten.view %8912, %8927 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6640 = torch.constant.int 4
    %int4_6641 = torch.constant.int 4
    %int128_6642 = torch.constant.int 128
    %8929 = torch.prim.ListConstruct %int4_6640, %8495, %int4_6641, %int128_6642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8930 = torch.aten.view %8914, %8929 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6643 = torch.constant.int 4
    %int4_6644 = torch.constant.int 4
    %int128_6645 = torch.constant.int 128
    %8931 = torch.prim.ListConstruct %int4_6643, %8495, %int4_6644, %int128_6645 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8932 = torch.aten.view %8916, %8931 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6646 = torch.constant.int 4
    %int4_6647 = torch.constant.int 4
    %int128_6648 = torch.constant.int 128
    %8933 = torch.prim.ListConstruct %int4_6646, %8495, %int4_6647, %int128_6648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8934 = torch.aten.view %8918, %8933 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_6649 = torch.constant.int -2
    %8935 = torch.aten.unsqueeze %8284, %int-2_6649 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6650 = torch.constant.int -2
    %8936 = torch.aten.unsqueeze %8286, %int-2_6650 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6651 = torch.constant.int -2
    %8937 = torch.aten.unsqueeze %8288, %int-2_6651 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6652 = torch.constant.int -2
    %8938 = torch.aten.unsqueeze %8290, %int-2_6652 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6653 = torch.constant.int -2
    %8939 = torch.aten.unsqueeze %8292, %int-2_6653 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6654 = torch.constant.int -2
    %8940 = torch.aten.unsqueeze %8294, %int-2_6654 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6655 = torch.constant.int -2
    %8941 = torch.aten.unsqueeze %8296, %int-2_6655 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_6656 = torch.constant.int -2
    %8942 = torch.aten.unsqueeze %8298, %int-2_6656 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %8942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_6657 = torch.constant.int 1
    %8943 = torch.aten.size.int %8208, %int1_6657 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_6658 = torch.constant.int 4
    %int1_6659 = torch.constant.int 1
    %int4_6660 = torch.constant.int 4
    %int128_6661 = torch.constant.int 128
    %8944 = torch.prim.ListConstruct %int4_6658, %8943, %int1_6659, %int4_6660, %int128_6661 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6662 = torch.constant.bool false
    %8945 = torch.aten.expand %8935, %8944, %false_6662 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6663 = torch.constant.int 4
    %int1_6664 = torch.constant.int 1
    %int4_6665 = torch.constant.int 4
    %int128_6666 = torch.constant.int 128
    %8946 = torch.prim.ListConstruct %int4_6663, %8943, %int1_6664, %int4_6665, %int128_6666 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6667 = torch.constant.bool false
    %8947 = torch.aten.expand %8936, %8946, %false_6667 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6668 = torch.constant.int 4
    %int1_6669 = torch.constant.int 1
    %int4_6670 = torch.constant.int 4
    %int128_6671 = torch.constant.int 128
    %8948 = torch.prim.ListConstruct %int4_6668, %8943, %int1_6669, %int4_6670, %int128_6671 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6672 = torch.constant.bool false
    %8949 = torch.aten.expand %8937, %8948, %false_6672 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6673 = torch.constant.int 4
    %int1_6674 = torch.constant.int 1
    %int4_6675 = torch.constant.int 4
    %int128_6676 = torch.constant.int 128
    %8950 = torch.prim.ListConstruct %int4_6673, %8943, %int1_6674, %int4_6675, %int128_6676 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6677 = torch.constant.bool false
    %8951 = torch.aten.expand %8938, %8950, %false_6677 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6678 = torch.constant.int 4
    %int1_6679 = torch.constant.int 1
    %int4_6680 = torch.constant.int 4
    %int128_6681 = torch.constant.int 128
    %8952 = torch.prim.ListConstruct %int4_6678, %8943, %int1_6679, %int4_6680, %int128_6681 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6682 = torch.constant.bool false
    %8953 = torch.aten.expand %8939, %8952, %false_6682 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6683 = torch.constant.int 4
    %int1_6684 = torch.constant.int 1
    %int4_6685 = torch.constant.int 4
    %int128_6686 = torch.constant.int 128
    %8954 = torch.prim.ListConstruct %int4_6683, %8943, %int1_6684, %int4_6685, %int128_6686 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6687 = torch.constant.bool false
    %8955 = torch.aten.expand %8940, %8954, %false_6687 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6688 = torch.constant.int 4
    %int1_6689 = torch.constant.int 1
    %int4_6690 = torch.constant.int 4
    %int128_6691 = torch.constant.int 128
    %8956 = torch.prim.ListConstruct %int4_6688, %8943, %int1_6689, %int4_6690, %int128_6691 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6692 = torch.constant.bool false
    %8957 = torch.aten.expand %8941, %8956, %false_6692 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6693 = torch.constant.int 4
    %int1_6694 = torch.constant.int 1
    %int4_6695 = torch.constant.int 4
    %int128_6696 = torch.constant.int 128
    %8958 = torch.prim.ListConstruct %int4_6693, %8943, %int1_6694, %int4_6695, %int128_6696 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_6697 = torch.constant.bool false
    %8959 = torch.aten.expand %8942, %8958, %false_6697 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %8959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_6698 = torch.constant.int 4
    %int4_6699 = torch.constant.int 4
    %int128_6700 = torch.constant.int 128
    %8960 = torch.prim.ListConstruct %int4_6698, %8943, %int4_6699, %int128_6700 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8961 = torch.aten.view %8945, %8960 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6701 = torch.constant.int 4
    %int4_6702 = torch.constant.int 4
    %int128_6703 = torch.constant.int 128
    %8962 = torch.prim.ListConstruct %int4_6701, %8943, %int4_6702, %int128_6703 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8963 = torch.aten.view %8947, %8962 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6704 = torch.constant.int 4
    %int4_6705 = torch.constant.int 4
    %int128_6706 = torch.constant.int 128
    %8964 = torch.prim.ListConstruct %int4_6704, %8943, %int4_6705, %int128_6706 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8965 = torch.aten.view %8949, %8964 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6707 = torch.constant.int 4
    %int4_6708 = torch.constant.int 4
    %int128_6709 = torch.constant.int 128
    %8966 = torch.prim.ListConstruct %int4_6707, %8943, %int4_6708, %int128_6709 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8967 = torch.aten.view %8951, %8966 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6710 = torch.constant.int 4
    %int4_6711 = torch.constant.int 4
    %int128_6712 = torch.constant.int 128
    %8968 = torch.prim.ListConstruct %int4_6710, %8943, %int4_6711, %int128_6712 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8969 = torch.aten.view %8953, %8968 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6713 = torch.constant.int 4
    %int4_6714 = torch.constant.int 4
    %int128_6715 = torch.constant.int 128
    %8970 = torch.prim.ListConstruct %int4_6713, %8943, %int4_6714, %int128_6715 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8971 = torch.aten.view %8955, %8970 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6716 = torch.constant.int 4
    %int4_6717 = torch.constant.int 4
    %int128_6718 = torch.constant.int 128
    %8972 = torch.prim.ListConstruct %int4_6716, %8943, %int4_6717, %int128_6718 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8973 = torch.aten.view %8957, %8972 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6719 = torch.constant.int 4
    %int4_6720 = torch.constant.int 4
    %int128_6721 = torch.constant.int 128
    %8974 = torch.prim.ListConstruct %int4_6719, %8943, %int4_6720, %int128_6721 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %8975 = torch.aten.view %8959, %8974 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %8975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6722 = torch.constant.int 1
    %int2_6723 = torch.constant.int 2
    %8976 = torch.aten.transpose.int %8351, %int1_6722, %int2_6723 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8976, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6724 = torch.constant.int 1
    %int2_6725 = torch.constant.int 2
    %8977 = torch.aten.transpose.int %8366, %int1_6724, %int2_6725 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8977, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6726 = torch.constant.int 1
    %int2_6727 = torch.constant.int 2
    %8978 = torch.aten.transpose.int %8381, %int1_6726, %int2_6727 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8978, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6728 = torch.constant.int 1
    %int2_6729 = torch.constant.int 2
    %8979 = torch.aten.transpose.int %8396, %int1_6728, %int2_6729 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8979, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6730 = torch.constant.int 1
    %int2_6731 = torch.constant.int 2
    %8980 = torch.aten.transpose.int %8411, %int1_6730, %int2_6731 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8980, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6732 = torch.constant.int 1
    %int2_6733 = torch.constant.int 2
    %8981 = torch.aten.transpose.int %8426, %int1_6732, %int2_6733 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8981, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6734 = torch.constant.int 1
    %int2_6735 = torch.constant.int 2
    %8982 = torch.aten.transpose.int %8441, %int1_6734, %int2_6735 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8982, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6736 = torch.constant.int 1
    %int2_6737 = torch.constant.int 2
    %8983 = torch.aten.transpose.int %8456, %int1_6736, %int2_6737 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8983, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6738 = torch.constant.int 1
    %int2_6739 = torch.constant.int 2
    %8984 = torch.aten.transpose.int %8920, %int1_6738, %int2_6739 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8984, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6740 = torch.constant.int 1
    %int2_6741 = torch.constant.int 2
    %8985 = torch.aten.transpose.int %8922, %int1_6740, %int2_6741 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8985, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6742 = torch.constant.int 1
    %int2_6743 = torch.constant.int 2
    %8986 = torch.aten.transpose.int %8924, %int1_6742, %int2_6743 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8986, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6744 = torch.constant.int 1
    %int2_6745 = torch.constant.int 2
    %8987 = torch.aten.transpose.int %8926, %int1_6744, %int2_6745 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8987, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6746 = torch.constant.int 1
    %int2_6747 = torch.constant.int 2
    %8988 = torch.aten.transpose.int %8928, %int1_6746, %int2_6747 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8988, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6748 = torch.constant.int 1
    %int2_6749 = torch.constant.int 2
    %8989 = torch.aten.transpose.int %8930, %int1_6748, %int2_6749 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8989, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6750 = torch.constant.int 1
    %int2_6751 = torch.constant.int 2
    %8990 = torch.aten.transpose.int %8932, %int1_6750, %int2_6751 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8990, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6752 = torch.constant.int 1
    %int2_6753 = torch.constant.int 2
    %8991 = torch.aten.transpose.int %8934, %int1_6752, %int2_6753 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8991, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6754 = torch.constant.int 1
    %int2_6755 = torch.constant.int 2
    %8992 = torch.aten.transpose.int %8961, %int1_6754, %int2_6755 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8992, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6756 = torch.constant.int 1
    %int2_6757 = torch.constant.int 2
    %8993 = torch.aten.transpose.int %8963, %int1_6756, %int2_6757 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8993, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6758 = torch.constant.int 1
    %int2_6759 = torch.constant.int 2
    %8994 = torch.aten.transpose.int %8965, %int1_6758, %int2_6759 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8994, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6760 = torch.constant.int 1
    %int2_6761 = torch.constant.int 2
    %8995 = torch.aten.transpose.int %8967, %int1_6760, %int2_6761 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8995, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6762 = torch.constant.int 1
    %int2_6763 = torch.constant.int 2
    %8996 = torch.aten.transpose.int %8969, %int1_6762, %int2_6763 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8996, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6764 = torch.constant.int 1
    %int2_6765 = torch.constant.int 2
    %8997 = torch.aten.transpose.int %8971, %int1_6764, %int2_6765 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8997, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6766 = torch.constant.int 1
    %int2_6767 = torch.constant.int 2
    %8998 = torch.aten.transpose.int %8973, %int1_6766, %int2_6767 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8998, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6768 = torch.constant.int 1
    %int2_6769 = torch.constant.int 2
    %8999 = torch.aten.transpose.int %8975, %int1_6768, %int2_6769 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %8999, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6770 = torch.constant.float 0.000000e+00
    %true_6771 = torch.constant.bool true
    %none_6772 = torch.constant.none
    %none_6773 = torch.constant.none
    %9000:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8976, %8984, %8992, %float0.000000e00_6770, %true_6771, %none_6772, %none_6773) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9000#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6774 = torch.constant.float 0.000000e+00
    %true_6775 = torch.constant.bool true
    %none_6776 = torch.constant.none
    %none_6777 = torch.constant.none
    %9001:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8977, %8985, %8993, %float0.000000e00_6774, %true_6775, %none_6776, %none_6777) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9001#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6778 = torch.constant.float 0.000000e+00
    %true_6779 = torch.constant.bool true
    %none_6780 = torch.constant.none
    %none_6781 = torch.constant.none
    %9002:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8978, %8986, %8994, %float0.000000e00_6778, %true_6779, %none_6780, %none_6781) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9002#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6782 = torch.constant.float 0.000000e+00
    %true_6783 = torch.constant.bool true
    %none_6784 = torch.constant.none
    %none_6785 = torch.constant.none
    %9003:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8979, %8987, %8995, %float0.000000e00_6782, %true_6783, %none_6784, %none_6785) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9003#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6786 = torch.constant.float 0.000000e+00
    %true_6787 = torch.constant.bool true
    %none_6788 = torch.constant.none
    %none_6789 = torch.constant.none
    %9004:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8980, %8988, %8996, %float0.000000e00_6786, %true_6787, %none_6788, %none_6789) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9004#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6790 = torch.constant.float 0.000000e+00
    %true_6791 = torch.constant.bool true
    %none_6792 = torch.constant.none
    %none_6793 = torch.constant.none
    %9005:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8981, %8989, %8997, %float0.000000e00_6790, %true_6791, %none_6792, %none_6793) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9005#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6794 = torch.constant.float 0.000000e+00
    %true_6795 = torch.constant.bool true
    %none_6796 = torch.constant.none
    %none_6797 = torch.constant.none
    %9006:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8982, %8990, %8998, %float0.000000e00_6794, %true_6795, %none_6796, %none_6797) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9006#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_6798 = torch.constant.float 0.000000e+00
    %true_6799 = torch.constant.bool true
    %none_6800 = torch.constant.none
    %none_6801 = torch.constant.none
    %9007:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%8983, %8991, %8999, %float0.000000e00_6798, %true_6799, %none_6800, %none_6801) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %9007#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_6802 = torch.constant.int 1
    %int2_6803 = torch.constant.int 2
    %9008 = torch.aten.transpose.int %9000#0, %int1_6802, %int2_6803 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6804 = torch.constant.int 1
    %int2_6805 = torch.constant.int 2
    %9009 = torch.aten.transpose.int %9001#0, %int1_6804, %int2_6805 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6806 = torch.constant.int 1
    %int2_6807 = torch.constant.int 2
    %9010 = torch.aten.transpose.int %9002#0, %int1_6806, %int2_6807 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6808 = torch.constant.int 1
    %int2_6809 = torch.constant.int 2
    %9011 = torch.aten.transpose.int %9003#0, %int1_6808, %int2_6809 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6810 = torch.constant.int 1
    %int2_6811 = torch.constant.int 2
    %9012 = torch.aten.transpose.int %9004#0, %int1_6810, %int2_6811 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6812 = torch.constant.int 1
    %int2_6813 = torch.constant.int 2
    %9013 = torch.aten.transpose.int %9005#0, %int1_6812, %int2_6813 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6814 = torch.constant.int 1
    %int2_6815 = torch.constant.int 2
    %9014 = torch.aten.transpose.int %9006#0, %int1_6814, %int2_6815 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_6816 = torch.constant.int 1
    %int2_6817 = torch.constant.int 2
    %9015 = torch.aten.transpose.int %9007#0, %int1_6816, %int2_6817 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %9015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_6818 = torch.constant.int 4
    %int512_6819 = torch.constant.int 512
    %9016 = torch.prim.ListConstruct %int4_6818, %8337, %int512_6819 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9017 = torch.aten.view %9008, %9016 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6820 = torch.constant.int 4
    %int512_6821 = torch.constant.int 512
    %9018 = torch.prim.ListConstruct %int4_6820, %8352, %int512_6821 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9019 = torch.aten.view %9009, %9018 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6822 = torch.constant.int 4
    %int512_6823 = torch.constant.int 512
    %9020 = torch.prim.ListConstruct %int4_6822, %8367, %int512_6823 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9021 = torch.aten.view %9010, %9020 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6824 = torch.constant.int 4
    %int512_6825 = torch.constant.int 512
    %9022 = torch.prim.ListConstruct %int4_6824, %8382, %int512_6825 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9023 = torch.aten.view %9011, %9022 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6826 = torch.constant.int 4
    %int512_6827 = torch.constant.int 512
    %9024 = torch.prim.ListConstruct %int4_6826, %8397, %int512_6827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9025 = torch.aten.view %9012, %9024 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6828 = torch.constant.int 4
    %int512_6829 = torch.constant.int 512
    %9026 = torch.prim.ListConstruct %int4_6828, %8412, %int512_6829 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9027 = torch.aten.view %9013, %9026 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6830 = torch.constant.int 4
    %int512_6831 = torch.constant.int 512
    %9028 = torch.prim.ListConstruct %int4_6830, %8427, %int512_6831 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9029 = torch.aten.view %9014, %9028 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_6832 = torch.constant.int 4
    %int512_6833 = torch.constant.int 512
    %9030 = torch.prim.ListConstruct %int4_6832, %8442, %int512_6833 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9031 = torch.aten.view %9015, %9030 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_6834 = torch.constant.int 1
    %int0_6835 = torch.constant.int 0
    %9032 = torch.prim.ListConstruct %int1_6834, %int0_6835 : (!torch.int, !torch.int) -> !torch.list<int>
    %9033 = torch.aten.permute %256, %9032 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6836 = torch.constant.int 1
    %int0_6837 = torch.constant.int 0
    %9034 = torch.prim.ListConstruct %int1_6836, %int0_6837 : (!torch.int, !torch.int) -> !torch.list<int>
    %9035 = torch.aten.permute %257, %9034 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6838 = torch.constant.int 1
    %int0_6839 = torch.constant.int 0
    %9036 = torch.prim.ListConstruct %int1_6838, %int0_6839 : (!torch.int, !torch.int) -> !torch.list<int>
    %9037 = torch.aten.permute %258, %9036 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6840 = torch.constant.int 1
    %int0_6841 = torch.constant.int 0
    %9038 = torch.prim.ListConstruct %int1_6840, %int0_6841 : (!torch.int, !torch.int) -> !torch.list<int>
    %9039 = torch.aten.permute %259, %9038 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6842 = torch.constant.int 1
    %int0_6843 = torch.constant.int 0
    %9040 = torch.prim.ListConstruct %int1_6842, %int0_6843 : (!torch.int, !torch.int) -> !torch.list<int>
    %9041 = torch.aten.permute %260, %9040 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6844 = torch.constant.int 1
    %int0_6845 = torch.constant.int 0
    %9042 = torch.prim.ListConstruct %int1_6844, %int0_6845 : (!torch.int, !torch.int) -> !torch.list<int>
    %9043 = torch.aten.permute %261, %9042 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6846 = torch.constant.int 1
    %int0_6847 = torch.constant.int 0
    %9044 = torch.prim.ListConstruct %int1_6846, %int0_6847 : (!torch.int, !torch.int) -> !torch.list<int>
    %9045 = torch.aten.permute %262, %9044 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_6848 = torch.constant.int 1
    %int0_6849 = torch.constant.int 0
    %9046 = torch.prim.ListConstruct %int1_6848, %int0_6849 : (!torch.int, !torch.int) -> !torch.list<int>
    %9047 = torch.aten.permute %263, %9046 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_6850 = torch.constant.int 4
    %9048 = torch.aten.mul.int %int4_6850, %8337 : !torch.int, !torch.int -> !torch.int
    %int512_6851 = torch.constant.int 512
    %9049 = torch.prim.ListConstruct %9048, %int512_6851 : (!torch.int, !torch.int) -> !torch.list<int>
    %9050 = torch.aten.view %9017, %9049 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9050, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9051 = torch.aten.mm %9050, %9033 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9051, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6852 = torch.constant.int 4
    %int4096_6853 = torch.constant.int 4096
    %9052 = torch.prim.ListConstruct %int4_6852, %8337, %int4096_6853 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9053 = torch.aten.view %9051, %9052 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6854 = torch.constant.int 4
    %9054 = torch.aten.mul.int %int4_6854, %8352 : !torch.int, !torch.int -> !torch.int
    %int512_6855 = torch.constant.int 512
    %9055 = torch.prim.ListConstruct %9054, %int512_6855 : (!torch.int, !torch.int) -> !torch.list<int>
    %9056 = torch.aten.view %9019, %9055 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9056, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9057 = torch.aten.mm %9056, %9035 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9057, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6856 = torch.constant.int 4
    %int4096_6857 = torch.constant.int 4096
    %9058 = torch.prim.ListConstruct %int4_6856, %8352, %int4096_6857 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9059 = torch.aten.view %9057, %9058 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6858 = torch.constant.int 4
    %9060 = torch.aten.mul.int %int4_6858, %8367 : !torch.int, !torch.int -> !torch.int
    %int512_6859 = torch.constant.int 512
    %9061 = torch.prim.ListConstruct %9060, %int512_6859 : (!torch.int, !torch.int) -> !torch.list<int>
    %9062 = torch.aten.view %9021, %9061 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9062, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9063 = torch.aten.mm %9062, %9037 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9063, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6860 = torch.constant.int 4
    %int4096_6861 = torch.constant.int 4096
    %9064 = torch.prim.ListConstruct %int4_6860, %8367, %int4096_6861 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9065 = torch.aten.view %9063, %9064 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6862 = torch.constant.int 4
    %9066 = torch.aten.mul.int %int4_6862, %8382 : !torch.int, !torch.int -> !torch.int
    %int512_6863 = torch.constant.int 512
    %9067 = torch.prim.ListConstruct %9066, %int512_6863 : (!torch.int, !torch.int) -> !torch.list<int>
    %9068 = torch.aten.view %9023, %9067 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9068, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9069 = torch.aten.mm %9068, %9039 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9069, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6864 = torch.constant.int 4
    %int4096_6865 = torch.constant.int 4096
    %9070 = torch.prim.ListConstruct %int4_6864, %8382, %int4096_6865 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9071 = torch.aten.view %9069, %9070 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6866 = torch.constant.int 4
    %9072 = torch.aten.mul.int %int4_6866, %8397 : !torch.int, !torch.int -> !torch.int
    %int512_6867 = torch.constant.int 512
    %9073 = torch.prim.ListConstruct %9072, %int512_6867 : (!torch.int, !torch.int) -> !torch.list<int>
    %9074 = torch.aten.view %9025, %9073 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9074, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9075 = torch.aten.mm %9074, %9041 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9075, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6868 = torch.constant.int 4
    %int4096_6869 = torch.constant.int 4096
    %9076 = torch.prim.ListConstruct %int4_6868, %8397, %int4096_6869 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9077 = torch.aten.view %9075, %9076 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6870 = torch.constant.int 4
    %9078 = torch.aten.mul.int %int4_6870, %8412 : !torch.int, !torch.int -> !torch.int
    %int512_6871 = torch.constant.int 512
    %9079 = torch.prim.ListConstruct %9078, %int512_6871 : (!torch.int, !torch.int) -> !torch.list<int>
    %9080 = torch.aten.view %9027, %9079 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9080, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9081 = torch.aten.mm %9080, %9043 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9081, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6872 = torch.constant.int 4
    %int4096_6873 = torch.constant.int 4096
    %9082 = torch.prim.ListConstruct %int4_6872, %8412, %int4096_6873 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9083 = torch.aten.view %9081, %9082 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6874 = torch.constant.int 4
    %9084 = torch.aten.mul.int %int4_6874, %8427 : !torch.int, !torch.int -> !torch.int
    %int512_6875 = torch.constant.int 512
    %9085 = torch.prim.ListConstruct %9084, %int512_6875 : (!torch.int, !torch.int) -> !torch.list<int>
    %9086 = torch.aten.view %9029, %9085 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9086, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9087 = torch.aten.mm %9086, %9045 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9087, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6876 = torch.constant.int 4
    %int4096_6877 = torch.constant.int 4096
    %9088 = torch.prim.ListConstruct %int4_6876, %8427, %int4096_6877 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9089 = torch.aten.view %9087, %9088 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_6878 = torch.constant.int 4
    %9090 = torch.aten.mul.int %int4_6878, %8442 : !torch.int, !torch.int -> !torch.int
    %int512_6879 = torch.constant.int 512
    %9091 = torch.prim.ListConstruct %9090, %int512_6879 : (!torch.int, !torch.int) -> !torch.list<int>
    %9092 = torch.aten.view %9031, %9091 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9092, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %9093 = torch.aten.mm %9092, %9047 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9093, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_6880 = torch.constant.int 4
    %int4096_6881 = torch.constant.int 4096
    %9094 = torch.prim.ListConstruct %int4_6880, %8442, %int4096_6881 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9095 = torch.aten.view %9093, %9094 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9096 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6882 = arith.constant 1 : index
    %dim_6883 = tensor.dim %9096, %c1_6882 : tensor<4x?x4096xf16>
    %9097 = flow.tensor.transfer %9096 : tensor<4x?x4096xf16>{%dim_6883} to #hal.device.promise<@__device_0>
    %9098 = torch_c.from_builtin_tensor %9097 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9099 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6884 = arith.constant 1 : index
    %dim_6885 = tensor.dim %9099, %c1_6884 : tensor<4x?x4096xf16>
    %9100 = flow.tensor.transfer %9099 : tensor<4x?x4096xf16>{%dim_6885} to #hal.device.promise<@__device_0>
    %9101 = torch_c.from_builtin_tensor %9100 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9102 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6886 = arith.constant 1 : index
    %dim_6887 = tensor.dim %9102, %c1_6886 : tensor<4x?x4096xf16>
    %9103 = flow.tensor.transfer %9102 : tensor<4x?x4096xf16>{%dim_6887} to #hal.device.promise<@__device_0>
    %9104 = torch_c.from_builtin_tensor %9103 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9105 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6888 = arith.constant 1 : index
    %dim_6889 = tensor.dim %9105, %c1_6888 : tensor<4x?x4096xf16>
    %9106 = flow.tensor.transfer %9105 : tensor<4x?x4096xf16>{%dim_6889} to #hal.device.promise<@__device_0>
    %9107 = torch_c.from_builtin_tensor %9106 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9108 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6890 = arith.constant 1 : index
    %dim_6891 = tensor.dim %9108, %c1_6890 : tensor<4x?x4096xf16>
    %9109 = flow.tensor.transfer %9108 : tensor<4x?x4096xf16>{%dim_6891} to #hal.device.promise<@__device_0>
    %9110 = torch_c.from_builtin_tensor %9109 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9111 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6892 = arith.constant 1 : index
    %dim_6893 = tensor.dim %9111, %c1_6892 : tensor<4x?x4096xf16>
    %9112 = flow.tensor.transfer %9111 : tensor<4x?x4096xf16>{%dim_6893} to #hal.device.promise<@__device_0>
    %9113 = torch_c.from_builtin_tensor %9112 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9114 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6894 = arith.constant 1 : index
    %dim_6895 = tensor.dim %9114, %c1_6894 : tensor<4x?x4096xf16>
    %9115 = flow.tensor.transfer %9114 : tensor<4x?x4096xf16>{%dim_6895} to #hal.device.promise<@__device_0>
    %9116 = torch_c.from_builtin_tensor %9115 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6896 = torch.constant.int 1
    %9117 = torch.aten.add.Tensor %9053, %9098, %int1_6896 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6897 = torch.constant.int 1
    %9118 = torch.aten.add.Tensor %9117, %9101, %int1_6897 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6898 = torch.constant.int 1
    %9119 = torch.aten.add.Tensor %9118, %9104, %int1_6898 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6899 = torch.constant.int 1
    %9120 = torch.aten.add.Tensor %9119, %9107, %int1_6899 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6900 = torch.constant.int 1
    %9121 = torch.aten.add.Tensor %9120, %9110, %int1_6900 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6901 = torch.constant.int 1
    %9122 = torch.aten.add.Tensor %9121, %9113, %int1_6901 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6902 = torch.constant.int 1
    %9123 = torch.aten.add.Tensor %9122, %9116, %int1_6902 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9124 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6903 = arith.constant 1 : index
    %dim_6904 = tensor.dim %9124, %c1_6903 : tensor<4x?x4096xf16>
    %9125 = flow.tensor.transfer %9124 : tensor<4x?x4096xf16>{%dim_6904} to #hal.device.promise<@__device_1>
    %9126 = torch_c.from_builtin_tensor %9125 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9127 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6905 = arith.constant 1 : index
    %dim_6906 = tensor.dim %9127, %c1_6905 : tensor<4x?x4096xf16>
    %9128 = flow.tensor.transfer %9127 : tensor<4x?x4096xf16>{%dim_6906} to #hal.device.promise<@__device_1>
    %9129 = torch_c.from_builtin_tensor %9128 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9130 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6907 = arith.constant 1 : index
    %dim_6908 = tensor.dim %9130, %c1_6907 : tensor<4x?x4096xf16>
    %9131 = flow.tensor.transfer %9130 : tensor<4x?x4096xf16>{%dim_6908} to #hal.device.promise<@__device_1>
    %9132 = torch_c.from_builtin_tensor %9131 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9133 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6909 = arith.constant 1 : index
    %dim_6910 = tensor.dim %9133, %c1_6909 : tensor<4x?x4096xf16>
    %9134 = flow.tensor.transfer %9133 : tensor<4x?x4096xf16>{%dim_6910} to #hal.device.promise<@__device_1>
    %9135 = torch_c.from_builtin_tensor %9134 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9136 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6911 = arith.constant 1 : index
    %dim_6912 = tensor.dim %9136, %c1_6911 : tensor<4x?x4096xf16>
    %9137 = flow.tensor.transfer %9136 : tensor<4x?x4096xf16>{%dim_6912} to #hal.device.promise<@__device_1>
    %9138 = torch_c.from_builtin_tensor %9137 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9139 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6913 = arith.constant 1 : index
    %dim_6914 = tensor.dim %9139, %c1_6913 : tensor<4x?x4096xf16>
    %9140 = flow.tensor.transfer %9139 : tensor<4x?x4096xf16>{%dim_6914} to #hal.device.promise<@__device_1>
    %9141 = torch_c.from_builtin_tensor %9140 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9142 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6915 = arith.constant 1 : index
    %dim_6916 = tensor.dim %9142, %c1_6915 : tensor<4x?x4096xf16>
    %9143 = flow.tensor.transfer %9142 : tensor<4x?x4096xf16>{%dim_6916} to #hal.device.promise<@__device_1>
    %9144 = torch_c.from_builtin_tensor %9143 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6917 = torch.constant.int 1
    %9145 = torch.aten.add.Tensor %9126, %9059, %int1_6917 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6918 = torch.constant.int 1
    %9146 = torch.aten.add.Tensor %9145, %9129, %int1_6918 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6919 = torch.constant.int 1
    %9147 = torch.aten.add.Tensor %9146, %9132, %int1_6919 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6920 = torch.constant.int 1
    %9148 = torch.aten.add.Tensor %9147, %9135, %int1_6920 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6921 = torch.constant.int 1
    %9149 = torch.aten.add.Tensor %9148, %9138, %int1_6921 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6922 = torch.constant.int 1
    %9150 = torch.aten.add.Tensor %9149, %9141, %int1_6922 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6923 = torch.constant.int 1
    %9151 = torch.aten.add.Tensor %9150, %9144, %int1_6923 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9152 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6924 = arith.constant 1 : index
    %dim_6925 = tensor.dim %9152, %c1_6924 : tensor<4x?x4096xf16>
    %9153 = flow.tensor.transfer %9152 : tensor<4x?x4096xf16>{%dim_6925} to #hal.device.promise<@__device_2>
    %9154 = torch_c.from_builtin_tensor %9153 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9155 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6926 = arith.constant 1 : index
    %dim_6927 = tensor.dim %9155, %c1_6926 : tensor<4x?x4096xf16>
    %9156 = flow.tensor.transfer %9155 : tensor<4x?x4096xf16>{%dim_6927} to #hal.device.promise<@__device_2>
    %9157 = torch_c.from_builtin_tensor %9156 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9158 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6928 = arith.constant 1 : index
    %dim_6929 = tensor.dim %9158, %c1_6928 : tensor<4x?x4096xf16>
    %9159 = flow.tensor.transfer %9158 : tensor<4x?x4096xf16>{%dim_6929} to #hal.device.promise<@__device_2>
    %9160 = torch_c.from_builtin_tensor %9159 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9161 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6930 = arith.constant 1 : index
    %dim_6931 = tensor.dim %9161, %c1_6930 : tensor<4x?x4096xf16>
    %9162 = flow.tensor.transfer %9161 : tensor<4x?x4096xf16>{%dim_6931} to #hal.device.promise<@__device_2>
    %9163 = torch_c.from_builtin_tensor %9162 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9164 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6932 = arith.constant 1 : index
    %dim_6933 = tensor.dim %9164, %c1_6932 : tensor<4x?x4096xf16>
    %9165 = flow.tensor.transfer %9164 : tensor<4x?x4096xf16>{%dim_6933} to #hal.device.promise<@__device_2>
    %9166 = torch_c.from_builtin_tensor %9165 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9167 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6934 = arith.constant 1 : index
    %dim_6935 = tensor.dim %9167, %c1_6934 : tensor<4x?x4096xf16>
    %9168 = flow.tensor.transfer %9167 : tensor<4x?x4096xf16>{%dim_6935} to #hal.device.promise<@__device_2>
    %9169 = torch_c.from_builtin_tensor %9168 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9170 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6936 = arith.constant 1 : index
    %dim_6937 = tensor.dim %9170, %c1_6936 : tensor<4x?x4096xf16>
    %9171 = flow.tensor.transfer %9170 : tensor<4x?x4096xf16>{%dim_6937} to #hal.device.promise<@__device_2>
    %9172 = torch_c.from_builtin_tensor %9171 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6938 = torch.constant.int 1
    %9173 = torch.aten.add.Tensor %9154, %9157, %int1_6938 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6939 = torch.constant.int 1
    %9174 = torch.aten.add.Tensor %9173, %9065, %int1_6939 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6940 = torch.constant.int 1
    %9175 = torch.aten.add.Tensor %9174, %9160, %int1_6940 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6941 = torch.constant.int 1
    %9176 = torch.aten.add.Tensor %9175, %9163, %int1_6941 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6942 = torch.constant.int 1
    %9177 = torch.aten.add.Tensor %9176, %9166, %int1_6942 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6943 = torch.constant.int 1
    %9178 = torch.aten.add.Tensor %9177, %9169, %int1_6943 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6944 = torch.constant.int 1
    %9179 = torch.aten.add.Tensor %9178, %9172, %int1_6944 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9180 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6945 = arith.constant 1 : index
    %dim_6946 = tensor.dim %9180, %c1_6945 : tensor<4x?x4096xf16>
    %9181 = flow.tensor.transfer %9180 : tensor<4x?x4096xf16>{%dim_6946} to #hal.device.promise<@__device_3>
    %9182 = torch_c.from_builtin_tensor %9181 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9183 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6947 = arith.constant 1 : index
    %dim_6948 = tensor.dim %9183, %c1_6947 : tensor<4x?x4096xf16>
    %9184 = flow.tensor.transfer %9183 : tensor<4x?x4096xf16>{%dim_6948} to #hal.device.promise<@__device_3>
    %9185 = torch_c.from_builtin_tensor %9184 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9186 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6949 = arith.constant 1 : index
    %dim_6950 = tensor.dim %9186, %c1_6949 : tensor<4x?x4096xf16>
    %9187 = flow.tensor.transfer %9186 : tensor<4x?x4096xf16>{%dim_6950} to #hal.device.promise<@__device_3>
    %9188 = torch_c.from_builtin_tensor %9187 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9189 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6951 = arith.constant 1 : index
    %dim_6952 = tensor.dim %9189, %c1_6951 : tensor<4x?x4096xf16>
    %9190 = flow.tensor.transfer %9189 : tensor<4x?x4096xf16>{%dim_6952} to #hal.device.promise<@__device_3>
    %9191 = torch_c.from_builtin_tensor %9190 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9192 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6953 = arith.constant 1 : index
    %dim_6954 = tensor.dim %9192, %c1_6953 : tensor<4x?x4096xf16>
    %9193 = flow.tensor.transfer %9192 : tensor<4x?x4096xf16>{%dim_6954} to #hal.device.promise<@__device_3>
    %9194 = torch_c.from_builtin_tensor %9193 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9195 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6955 = arith.constant 1 : index
    %dim_6956 = tensor.dim %9195, %c1_6955 : tensor<4x?x4096xf16>
    %9196 = flow.tensor.transfer %9195 : tensor<4x?x4096xf16>{%dim_6956} to #hal.device.promise<@__device_3>
    %9197 = torch_c.from_builtin_tensor %9196 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9198 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6957 = arith.constant 1 : index
    %dim_6958 = tensor.dim %9198, %c1_6957 : tensor<4x?x4096xf16>
    %9199 = flow.tensor.transfer %9198 : tensor<4x?x4096xf16>{%dim_6958} to #hal.device.promise<@__device_3>
    %9200 = torch_c.from_builtin_tensor %9199 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6959 = torch.constant.int 1
    %9201 = torch.aten.add.Tensor %9182, %9185, %int1_6959 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6960 = torch.constant.int 1
    %9202 = torch.aten.add.Tensor %9201, %9188, %int1_6960 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6961 = torch.constant.int 1
    %9203 = torch.aten.add.Tensor %9202, %9071, %int1_6961 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6962 = torch.constant.int 1
    %9204 = torch.aten.add.Tensor %9203, %9191, %int1_6962 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6963 = torch.constant.int 1
    %9205 = torch.aten.add.Tensor %9204, %9194, %int1_6963 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6964 = torch.constant.int 1
    %9206 = torch.aten.add.Tensor %9205, %9197, %int1_6964 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6965 = torch.constant.int 1
    %9207 = torch.aten.add.Tensor %9206, %9200, %int1_6965 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9208 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6966 = arith.constant 1 : index
    %dim_6967 = tensor.dim %9208, %c1_6966 : tensor<4x?x4096xf16>
    %9209 = flow.tensor.transfer %9208 : tensor<4x?x4096xf16>{%dim_6967} to #hal.device.promise<@__device_4>
    %9210 = torch_c.from_builtin_tensor %9209 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9211 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6968 = arith.constant 1 : index
    %dim_6969 = tensor.dim %9211, %c1_6968 : tensor<4x?x4096xf16>
    %9212 = flow.tensor.transfer %9211 : tensor<4x?x4096xf16>{%dim_6969} to #hal.device.promise<@__device_4>
    %9213 = torch_c.from_builtin_tensor %9212 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9214 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6970 = arith.constant 1 : index
    %dim_6971 = tensor.dim %9214, %c1_6970 : tensor<4x?x4096xf16>
    %9215 = flow.tensor.transfer %9214 : tensor<4x?x4096xf16>{%dim_6971} to #hal.device.promise<@__device_4>
    %9216 = torch_c.from_builtin_tensor %9215 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9217 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6972 = arith.constant 1 : index
    %dim_6973 = tensor.dim %9217, %c1_6972 : tensor<4x?x4096xf16>
    %9218 = flow.tensor.transfer %9217 : tensor<4x?x4096xf16>{%dim_6973} to #hal.device.promise<@__device_4>
    %9219 = torch_c.from_builtin_tensor %9218 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9220 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6974 = arith.constant 1 : index
    %dim_6975 = tensor.dim %9220, %c1_6974 : tensor<4x?x4096xf16>
    %9221 = flow.tensor.transfer %9220 : tensor<4x?x4096xf16>{%dim_6975} to #hal.device.promise<@__device_4>
    %9222 = torch_c.from_builtin_tensor %9221 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9223 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6976 = arith.constant 1 : index
    %dim_6977 = tensor.dim %9223, %c1_6976 : tensor<4x?x4096xf16>
    %9224 = flow.tensor.transfer %9223 : tensor<4x?x4096xf16>{%dim_6977} to #hal.device.promise<@__device_4>
    %9225 = torch_c.from_builtin_tensor %9224 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9226 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6978 = arith.constant 1 : index
    %dim_6979 = tensor.dim %9226, %c1_6978 : tensor<4x?x4096xf16>
    %9227 = flow.tensor.transfer %9226 : tensor<4x?x4096xf16>{%dim_6979} to #hal.device.promise<@__device_4>
    %9228 = torch_c.from_builtin_tensor %9227 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6980 = torch.constant.int 1
    %9229 = torch.aten.add.Tensor %9210, %9213, %int1_6980 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6981 = torch.constant.int 1
    %9230 = torch.aten.add.Tensor %9229, %9216, %int1_6981 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6982 = torch.constant.int 1
    %9231 = torch.aten.add.Tensor %9230, %9219, %int1_6982 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6983 = torch.constant.int 1
    %9232 = torch.aten.add.Tensor %9231, %9077, %int1_6983 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6984 = torch.constant.int 1
    %9233 = torch.aten.add.Tensor %9232, %9222, %int1_6984 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6985 = torch.constant.int 1
    %9234 = torch.aten.add.Tensor %9233, %9225, %int1_6985 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_6986 = torch.constant.int 1
    %9235 = torch.aten.add.Tensor %9234, %9228, %int1_6986 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9236 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6987 = arith.constant 1 : index
    %dim_6988 = tensor.dim %9236, %c1_6987 : tensor<4x?x4096xf16>
    %9237 = flow.tensor.transfer %9236 : tensor<4x?x4096xf16>{%dim_6988} to #hal.device.promise<@__device_5>
    %9238 = torch_c.from_builtin_tensor %9237 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9239 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6989 = arith.constant 1 : index
    %dim_6990 = tensor.dim %9239, %c1_6989 : tensor<4x?x4096xf16>
    %9240 = flow.tensor.transfer %9239 : tensor<4x?x4096xf16>{%dim_6990} to #hal.device.promise<@__device_5>
    %9241 = torch_c.from_builtin_tensor %9240 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9242 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6991 = arith.constant 1 : index
    %dim_6992 = tensor.dim %9242, %c1_6991 : tensor<4x?x4096xf16>
    %9243 = flow.tensor.transfer %9242 : tensor<4x?x4096xf16>{%dim_6992} to #hal.device.promise<@__device_5>
    %9244 = torch_c.from_builtin_tensor %9243 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9245 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6993 = arith.constant 1 : index
    %dim_6994 = tensor.dim %9245, %c1_6993 : tensor<4x?x4096xf16>
    %9246 = flow.tensor.transfer %9245 : tensor<4x?x4096xf16>{%dim_6994} to #hal.device.promise<@__device_5>
    %9247 = torch_c.from_builtin_tensor %9246 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9248 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6995 = arith.constant 1 : index
    %dim_6996 = tensor.dim %9248, %c1_6995 : tensor<4x?x4096xf16>
    %9249 = flow.tensor.transfer %9248 : tensor<4x?x4096xf16>{%dim_6996} to #hal.device.promise<@__device_5>
    %9250 = torch_c.from_builtin_tensor %9249 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9251 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6997 = arith.constant 1 : index
    %dim_6998 = tensor.dim %9251, %c1_6997 : tensor<4x?x4096xf16>
    %9252 = flow.tensor.transfer %9251 : tensor<4x?x4096xf16>{%dim_6998} to #hal.device.promise<@__device_5>
    %9253 = torch_c.from_builtin_tensor %9252 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9254 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_6999 = arith.constant 1 : index
    %dim_7000 = tensor.dim %9254, %c1_6999 : tensor<4x?x4096xf16>
    %9255 = flow.tensor.transfer %9254 : tensor<4x?x4096xf16>{%dim_7000} to #hal.device.promise<@__device_5>
    %9256 = torch_c.from_builtin_tensor %9255 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7001 = torch.constant.int 1
    %9257 = torch.aten.add.Tensor %9238, %9241, %int1_7001 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7002 = torch.constant.int 1
    %9258 = torch.aten.add.Tensor %9257, %9244, %int1_7002 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7003 = torch.constant.int 1
    %9259 = torch.aten.add.Tensor %9258, %9247, %int1_7003 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7004 = torch.constant.int 1
    %9260 = torch.aten.add.Tensor %9259, %9250, %int1_7004 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7005 = torch.constant.int 1
    %9261 = torch.aten.add.Tensor %9260, %9083, %int1_7005 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7006 = torch.constant.int 1
    %9262 = torch.aten.add.Tensor %9261, %9253, %int1_7006 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7007 = torch.constant.int 1
    %9263 = torch.aten.add.Tensor %9262, %9256, %int1_7007 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9264 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7008 = arith.constant 1 : index
    %dim_7009 = tensor.dim %9264, %c1_7008 : tensor<4x?x4096xf16>
    %9265 = flow.tensor.transfer %9264 : tensor<4x?x4096xf16>{%dim_7009} to #hal.device.promise<@__device_6>
    %9266 = torch_c.from_builtin_tensor %9265 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9267 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7010 = arith.constant 1 : index
    %dim_7011 = tensor.dim %9267, %c1_7010 : tensor<4x?x4096xf16>
    %9268 = flow.tensor.transfer %9267 : tensor<4x?x4096xf16>{%dim_7011} to #hal.device.promise<@__device_6>
    %9269 = torch_c.from_builtin_tensor %9268 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9270 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7012 = arith.constant 1 : index
    %dim_7013 = tensor.dim %9270, %c1_7012 : tensor<4x?x4096xf16>
    %9271 = flow.tensor.transfer %9270 : tensor<4x?x4096xf16>{%dim_7013} to #hal.device.promise<@__device_6>
    %9272 = torch_c.from_builtin_tensor %9271 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9273 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7014 = arith.constant 1 : index
    %dim_7015 = tensor.dim %9273, %c1_7014 : tensor<4x?x4096xf16>
    %9274 = flow.tensor.transfer %9273 : tensor<4x?x4096xf16>{%dim_7015} to #hal.device.promise<@__device_6>
    %9275 = torch_c.from_builtin_tensor %9274 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9276 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7016 = arith.constant 1 : index
    %dim_7017 = tensor.dim %9276, %c1_7016 : tensor<4x?x4096xf16>
    %9277 = flow.tensor.transfer %9276 : tensor<4x?x4096xf16>{%dim_7017} to #hal.device.promise<@__device_6>
    %9278 = torch_c.from_builtin_tensor %9277 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9279 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7018 = arith.constant 1 : index
    %dim_7019 = tensor.dim %9279, %c1_7018 : tensor<4x?x4096xf16>
    %9280 = flow.tensor.transfer %9279 : tensor<4x?x4096xf16>{%dim_7019} to #hal.device.promise<@__device_6>
    %9281 = torch_c.from_builtin_tensor %9280 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9282 = torch_c.to_builtin_tensor %9095 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7020 = arith.constant 1 : index
    %dim_7021 = tensor.dim %9282, %c1_7020 : tensor<4x?x4096xf16>
    %9283 = flow.tensor.transfer %9282 : tensor<4x?x4096xf16>{%dim_7021} to #hal.device.promise<@__device_6>
    %9284 = torch_c.from_builtin_tensor %9283 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7022 = torch.constant.int 1
    %9285 = torch.aten.add.Tensor %9266, %9269, %int1_7022 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7023 = torch.constant.int 1
    %9286 = torch.aten.add.Tensor %9285, %9272, %int1_7023 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7024 = torch.constant.int 1
    %9287 = torch.aten.add.Tensor %9286, %9275, %int1_7024 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7025 = torch.constant.int 1
    %9288 = torch.aten.add.Tensor %9287, %9278, %int1_7025 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7026 = torch.constant.int 1
    %9289 = torch.aten.add.Tensor %9288, %9281, %int1_7026 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7027 = torch.constant.int 1
    %9290 = torch.aten.add.Tensor %9289, %9089, %int1_7027 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7028 = torch.constant.int 1
    %9291 = torch.aten.add.Tensor %9290, %9284, %int1_7028 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9292 = torch_c.to_builtin_tensor %9053 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7029 = arith.constant 1 : index
    %dim_7030 = tensor.dim %9292, %c1_7029 : tensor<4x?x4096xf16>
    %9293 = flow.tensor.transfer %9292 : tensor<4x?x4096xf16>{%dim_7030} to #hal.device.promise<@__device_7>
    %9294 = torch_c.from_builtin_tensor %9293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9295 = torch_c.to_builtin_tensor %9059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7031 = arith.constant 1 : index
    %dim_7032 = tensor.dim %9295, %c1_7031 : tensor<4x?x4096xf16>
    %9296 = flow.tensor.transfer %9295 : tensor<4x?x4096xf16>{%dim_7032} to #hal.device.promise<@__device_7>
    %9297 = torch_c.from_builtin_tensor %9296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9298 = torch_c.to_builtin_tensor %9065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7033 = arith.constant 1 : index
    %dim_7034 = tensor.dim %9298, %c1_7033 : tensor<4x?x4096xf16>
    %9299 = flow.tensor.transfer %9298 : tensor<4x?x4096xf16>{%dim_7034} to #hal.device.promise<@__device_7>
    %9300 = torch_c.from_builtin_tensor %9299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9301 = torch_c.to_builtin_tensor %9071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7035 = arith.constant 1 : index
    %dim_7036 = tensor.dim %9301, %c1_7035 : tensor<4x?x4096xf16>
    %9302 = flow.tensor.transfer %9301 : tensor<4x?x4096xf16>{%dim_7036} to #hal.device.promise<@__device_7>
    %9303 = torch_c.from_builtin_tensor %9302 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9304 = torch_c.to_builtin_tensor %9077 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7037 = arith.constant 1 : index
    %dim_7038 = tensor.dim %9304, %c1_7037 : tensor<4x?x4096xf16>
    %9305 = flow.tensor.transfer %9304 : tensor<4x?x4096xf16>{%dim_7038} to #hal.device.promise<@__device_7>
    %9306 = torch_c.from_builtin_tensor %9305 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9307 = torch_c.to_builtin_tensor %9083 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7039 = arith.constant 1 : index
    %dim_7040 = tensor.dim %9307, %c1_7039 : tensor<4x?x4096xf16>
    %9308 = flow.tensor.transfer %9307 : tensor<4x?x4096xf16>{%dim_7040} to #hal.device.promise<@__device_7>
    %9309 = torch_c.from_builtin_tensor %9308 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9310 = torch_c.to_builtin_tensor %9089 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7041 = arith.constant 1 : index
    %dim_7042 = tensor.dim %9310, %c1_7041 : tensor<4x?x4096xf16>
    %9311 = flow.tensor.transfer %9310 : tensor<4x?x4096xf16>{%dim_7042} to #hal.device.promise<@__device_7>
    %9312 = torch_c.from_builtin_tensor %9311 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7043 = torch.constant.int 1
    %9313 = torch.aten.add.Tensor %9294, %9297, %int1_7043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7044 = torch.constant.int 1
    %9314 = torch.aten.add.Tensor %9313, %9300, %int1_7044 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7045 = torch.constant.int 1
    %9315 = torch.aten.add.Tensor %9314, %9303, %int1_7045 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7046 = torch.constant.int 1
    %9316 = torch.aten.add.Tensor %9315, %9306, %int1_7046 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7047 = torch.constant.int 1
    %9317 = torch.aten.add.Tensor %9316, %9309, %int1_7047 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7048 = torch.constant.int 1
    %9318 = torch.aten.add.Tensor %9317, %9312, %int1_7048 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7049 = torch.constant.int 1
    %9319 = torch.aten.add.Tensor %9318, %9095, %int1_7049 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7050 = torch.constant.int 1
    %9320 = torch.aten.add.Tensor %7979, %9123, %int1_7050 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7051 = torch.constant.int 1
    %9321 = torch.aten.add.Tensor %7980, %9151, %int1_7051 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7052 = torch.constant.int 1
    %9322 = torch.aten.add.Tensor %7981, %9179, %int1_7052 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7053 = torch.constant.int 1
    %9323 = torch.aten.add.Tensor %7982, %9207, %int1_7053 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7054 = torch.constant.int 1
    %9324 = torch.aten.add.Tensor %7983, %9235, %int1_7054 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7055 = torch.constant.int 1
    %9325 = torch.aten.add.Tensor %7984, %9263, %int1_7055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7056 = torch.constant.int 1
    %9326 = torch.aten.add.Tensor %7985, %9291, %int1_7056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7057 = torch.constant.int 1
    %9327 = torch.aten.add.Tensor %7986, %9319, %int1_7057 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7058 = torch.constant.int 6
    %9328 = torch.prims.convert_element_type %9320, %int6_7058 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7059 = torch.constant.int 6
    %9329 = torch.prims.convert_element_type %9321, %int6_7059 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7060 = torch.constant.int 6
    %9330 = torch.prims.convert_element_type %9322, %int6_7060 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7061 = torch.constant.int 6
    %9331 = torch.prims.convert_element_type %9323, %int6_7061 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7062 = torch.constant.int 6
    %9332 = torch.prims.convert_element_type %9324, %int6_7062 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7063 = torch.constant.int 6
    %9333 = torch.prims.convert_element_type %9325, %int6_7063 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7064 = torch.constant.int 6
    %9334 = torch.prims.convert_element_type %9326, %int6_7064 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7065 = torch.constant.int 6
    %9335 = torch.prims.convert_element_type %9327, %int6_7065 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7066 = torch.constant.int 2
    %9336 = torch.aten.pow.Tensor_Scalar %9328, %int2_7066 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7067 = torch.constant.int 2
    %9337 = torch.aten.pow.Tensor_Scalar %9329, %int2_7067 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7068 = torch.constant.int 2
    %9338 = torch.aten.pow.Tensor_Scalar %9330, %int2_7068 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7069 = torch.constant.int 2
    %9339 = torch.aten.pow.Tensor_Scalar %9331, %int2_7069 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7070 = torch.constant.int 2
    %9340 = torch.aten.pow.Tensor_Scalar %9332, %int2_7070 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7071 = torch.constant.int 2
    %9341 = torch.aten.pow.Tensor_Scalar %9333, %int2_7071 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7072 = torch.constant.int 2
    %9342 = torch.aten.pow.Tensor_Scalar %9334, %int2_7072 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7073 = torch.constant.int 2
    %9343 = torch.aten.pow.Tensor_Scalar %9335, %int2_7073 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7074 = torch.constant.int -1
    %9344 = torch.prim.ListConstruct %int-1_7074 : (!torch.int) -> !torch.list<int>
    %true_7075 = torch.constant.bool true
    %none_7076 = torch.constant.none
    %9345 = torch.aten.mean.dim %9336, %9344, %true_7075, %none_7076 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7077 = torch.constant.int -1
    %9346 = torch.prim.ListConstruct %int-1_7077 : (!torch.int) -> !torch.list<int>
    %true_7078 = torch.constant.bool true
    %none_7079 = torch.constant.none
    %9347 = torch.aten.mean.dim %9337, %9346, %true_7078, %none_7079 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7080 = torch.constant.int -1
    %9348 = torch.prim.ListConstruct %int-1_7080 : (!torch.int) -> !torch.list<int>
    %true_7081 = torch.constant.bool true
    %none_7082 = torch.constant.none
    %9349 = torch.aten.mean.dim %9338, %9348, %true_7081, %none_7082 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7083 = torch.constant.int -1
    %9350 = torch.prim.ListConstruct %int-1_7083 : (!torch.int) -> !torch.list<int>
    %true_7084 = torch.constant.bool true
    %none_7085 = torch.constant.none
    %9351 = torch.aten.mean.dim %9339, %9350, %true_7084, %none_7085 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7086 = torch.constant.int -1
    %9352 = torch.prim.ListConstruct %int-1_7086 : (!torch.int) -> !torch.list<int>
    %true_7087 = torch.constant.bool true
    %none_7088 = torch.constant.none
    %9353 = torch.aten.mean.dim %9340, %9352, %true_7087, %none_7088 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7089 = torch.constant.int -1
    %9354 = torch.prim.ListConstruct %int-1_7089 : (!torch.int) -> !torch.list<int>
    %true_7090 = torch.constant.bool true
    %none_7091 = torch.constant.none
    %9355 = torch.aten.mean.dim %9341, %9354, %true_7090, %none_7091 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7092 = torch.constant.int -1
    %9356 = torch.prim.ListConstruct %int-1_7092 : (!torch.int) -> !torch.list<int>
    %true_7093 = torch.constant.bool true
    %none_7094 = torch.constant.none
    %9357 = torch.aten.mean.dim %9342, %9356, %true_7093, %none_7094 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7095 = torch.constant.int -1
    %9358 = torch.prim.ListConstruct %int-1_7095 : (!torch.int) -> !torch.list<int>
    %true_7096 = torch.constant.bool true
    %none_7097 = torch.constant.none
    %9359 = torch.aten.mean.dim %9343, %9358, %true_7096, %none_7097 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7098 = torch.constant.float 9.9999997473787516E-6
    %int1_7099 = torch.constant.int 1
    %9360 = torch.aten.add.Scalar %9345, %float9.999990e-06_7098, %int1_7099 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7100 = torch.constant.float 9.9999997473787516E-6
    %int1_7101 = torch.constant.int 1
    %9361 = torch.aten.add.Scalar %9347, %float9.999990e-06_7100, %int1_7101 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7102 = torch.constant.float 9.9999997473787516E-6
    %int1_7103 = torch.constant.int 1
    %9362 = torch.aten.add.Scalar %9349, %float9.999990e-06_7102, %int1_7103 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7104 = torch.constant.float 9.9999997473787516E-6
    %int1_7105 = torch.constant.int 1
    %9363 = torch.aten.add.Scalar %9351, %float9.999990e-06_7104, %int1_7105 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7106 = torch.constant.float 9.9999997473787516E-6
    %int1_7107 = torch.constant.int 1
    %9364 = torch.aten.add.Scalar %9353, %float9.999990e-06_7106, %int1_7107 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7108 = torch.constant.float 9.9999997473787516E-6
    %int1_7109 = torch.constant.int 1
    %9365 = torch.aten.add.Scalar %9355, %float9.999990e-06_7108, %int1_7109 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7110 = torch.constant.float 9.9999997473787516E-6
    %int1_7111 = torch.constant.int 1
    %9366 = torch.aten.add.Scalar %9357, %float9.999990e-06_7110, %int1_7111 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7112 = torch.constant.float 9.9999997473787516E-6
    %int1_7113 = torch.constant.int 1
    %9367 = torch.aten.add.Scalar %9359, %float9.999990e-06_7112, %int1_7113 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9368 = torch.aten.rsqrt %9360 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9369 = torch.aten.rsqrt %9361 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9370 = torch.aten.rsqrt %9362 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9371 = torch.aten.rsqrt %9363 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9372 = torch.aten.rsqrt %9364 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9373 = torch.aten.rsqrt %9365 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9374 = torch.aten.rsqrt %9366 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9375 = torch.aten.rsqrt %9367 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9376 = torch.aten.mul.Tensor %9328, %9368 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9377 = torch.aten.mul.Tensor %9329, %9369 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9378 = torch.aten.mul.Tensor %9330, %9370 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9379 = torch.aten.mul.Tensor %9331, %9371 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9380 = torch.aten.mul.Tensor %9332, %9372 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9381 = torch.aten.mul.Tensor %9333, %9373 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9382 = torch.aten.mul.Tensor %9334, %9374 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9383 = torch.aten.mul.Tensor %9335, %9375 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9384 = torch.aten.mul.Tensor %264, %9376 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9385 = torch.aten.mul.Tensor %265, %9377 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9386 = torch.aten.mul.Tensor %266, %9378 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9387 = torch.aten.mul.Tensor %267, %9379 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9388 = torch.aten.mul.Tensor %268, %9380 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9389 = torch.aten.mul.Tensor %269, %9381 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9390 = torch.aten.mul.Tensor %270, %9382 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9391 = torch.aten.mul.Tensor %271, %9383 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7114 = torch.constant.int 5
    %9392 = torch.prims.convert_element_type %9384, %int5_7114 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7115 = torch.constant.int 5
    %9393 = torch.prims.convert_element_type %9385, %int5_7115 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7116 = torch.constant.int 5
    %9394 = torch.prims.convert_element_type %9386, %int5_7116 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7117 = torch.constant.int 5
    %9395 = torch.prims.convert_element_type %9387, %int5_7117 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7118 = torch.constant.int 5
    %9396 = torch.prims.convert_element_type %9388, %int5_7118 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7119 = torch.constant.int 5
    %9397 = torch.prims.convert_element_type %9389, %int5_7119 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7120 = torch.constant.int 5
    %9398 = torch.prims.convert_element_type %9390, %int5_7120 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7121 = torch.constant.int 5
    %9399 = torch.prims.convert_element_type %9391, %int5_7121 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7122 = torch.constant.int 1
    %int0_7123 = torch.constant.int 0
    %9400 = torch.prim.ListConstruct %int1_7122, %int0_7123 : (!torch.int, !torch.int) -> !torch.list<int>
    %9401 = torch.aten.permute %272, %9400 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7124 = torch.constant.int 1
    %int0_7125 = torch.constant.int 0
    %9402 = torch.prim.ListConstruct %int1_7124, %int0_7125 : (!torch.int, !torch.int) -> !torch.list<int>
    %9403 = torch.aten.permute %273, %9402 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7126 = torch.constant.int 1
    %int0_7127 = torch.constant.int 0
    %9404 = torch.prim.ListConstruct %int1_7126, %int0_7127 : (!torch.int, !torch.int) -> !torch.list<int>
    %9405 = torch.aten.permute %274, %9404 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7128 = torch.constant.int 1
    %int0_7129 = torch.constant.int 0
    %9406 = torch.prim.ListConstruct %int1_7128, %int0_7129 : (!torch.int, !torch.int) -> !torch.list<int>
    %9407 = torch.aten.permute %275, %9406 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7130 = torch.constant.int 1
    %int0_7131 = torch.constant.int 0
    %9408 = torch.prim.ListConstruct %int1_7130, %int0_7131 : (!torch.int, !torch.int) -> !torch.list<int>
    %9409 = torch.aten.permute %276, %9408 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7132 = torch.constant.int 1
    %int0_7133 = torch.constant.int 0
    %9410 = torch.prim.ListConstruct %int1_7132, %int0_7133 : (!torch.int, !torch.int) -> !torch.list<int>
    %9411 = torch.aten.permute %277, %9410 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7134 = torch.constant.int 1
    %int0_7135 = torch.constant.int 0
    %9412 = torch.prim.ListConstruct %int1_7134, %int0_7135 : (!torch.int, !torch.int) -> !torch.list<int>
    %9413 = torch.aten.permute %278, %9412 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7136 = torch.constant.int 1
    %int0_7137 = torch.constant.int 0
    %9414 = torch.prim.ListConstruct %int1_7136, %int0_7137 : (!torch.int, !torch.int) -> !torch.list<int>
    %9415 = torch.aten.permute %279, %9414 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_7138 = torch.constant.int 4
    %9416 = torch.aten.mul.int %int4_7138, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7139 = torch.constant.int 4096
    %9417 = torch.prim.ListConstruct %9416, %int4096_7139 : (!torch.int, !torch.int) -> !torch.list<int>
    %9418 = torch.aten.view %9392, %9417 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9418, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9419 = torch.aten.mm %9418, %9401 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9419, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7140 = torch.constant.int 4
    %int1792_7141 = torch.constant.int 1792
    %9420 = torch.prim.ListConstruct %int4_7140, %2482, %int1792_7141 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9421 = torch.aten.view %9419, %9420 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7142 = torch.constant.int 4
    %9422 = torch.aten.mul.int %int4_7142, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7143 = torch.constant.int 4096
    %9423 = torch.prim.ListConstruct %9422, %int4096_7143 : (!torch.int, !torch.int) -> !torch.list<int>
    %9424 = torch.aten.view %9393, %9423 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9424, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9425 = torch.aten.mm %9424, %9403 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9425, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7144 = torch.constant.int 4
    %int1792_7145 = torch.constant.int 1792
    %9426 = torch.prim.ListConstruct %int4_7144, %2482, %int1792_7145 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9427 = torch.aten.view %9425, %9426 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7146 = torch.constant.int 4
    %9428 = torch.aten.mul.int %int4_7146, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7147 = torch.constant.int 4096
    %9429 = torch.prim.ListConstruct %9428, %int4096_7147 : (!torch.int, !torch.int) -> !torch.list<int>
    %9430 = torch.aten.view %9394, %9429 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9430, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9431 = torch.aten.mm %9430, %9405 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9431, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7148 = torch.constant.int 4
    %int1792_7149 = torch.constant.int 1792
    %9432 = torch.prim.ListConstruct %int4_7148, %2482, %int1792_7149 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9433 = torch.aten.view %9431, %9432 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7150 = torch.constant.int 4
    %9434 = torch.aten.mul.int %int4_7150, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7151 = torch.constant.int 4096
    %9435 = torch.prim.ListConstruct %9434, %int4096_7151 : (!torch.int, !torch.int) -> !torch.list<int>
    %9436 = torch.aten.view %9395, %9435 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9436, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9437 = torch.aten.mm %9436, %9407 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9437, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7152 = torch.constant.int 4
    %int1792_7153 = torch.constant.int 1792
    %9438 = torch.prim.ListConstruct %int4_7152, %2482, %int1792_7153 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9439 = torch.aten.view %9437, %9438 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7154 = torch.constant.int 4
    %9440 = torch.aten.mul.int %int4_7154, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7155 = torch.constant.int 4096
    %9441 = torch.prim.ListConstruct %9440, %int4096_7155 : (!torch.int, !torch.int) -> !torch.list<int>
    %9442 = torch.aten.view %9396, %9441 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9442, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9443 = torch.aten.mm %9442, %9409 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9443, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7156 = torch.constant.int 4
    %int1792_7157 = torch.constant.int 1792
    %9444 = torch.prim.ListConstruct %int4_7156, %2482, %int1792_7157 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9445 = torch.aten.view %9443, %9444 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7158 = torch.constant.int 4
    %9446 = torch.aten.mul.int %int4_7158, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7159 = torch.constant.int 4096
    %9447 = torch.prim.ListConstruct %9446, %int4096_7159 : (!torch.int, !torch.int) -> !torch.list<int>
    %9448 = torch.aten.view %9397, %9447 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9448, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9449 = torch.aten.mm %9448, %9411 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9449, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7160 = torch.constant.int 4
    %int1792_7161 = torch.constant.int 1792
    %9450 = torch.prim.ListConstruct %int4_7160, %2482, %int1792_7161 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9451 = torch.aten.view %9449, %9450 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7162 = torch.constant.int 4
    %9452 = torch.aten.mul.int %int4_7162, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7163 = torch.constant.int 4096
    %9453 = torch.prim.ListConstruct %9452, %int4096_7163 : (!torch.int, !torch.int) -> !torch.list<int>
    %9454 = torch.aten.view %9398, %9453 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9454, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9455 = torch.aten.mm %9454, %9413 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9455, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7164 = torch.constant.int 4
    %int1792_7165 = torch.constant.int 1792
    %9456 = torch.prim.ListConstruct %int4_7164, %2482, %int1792_7165 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9457 = torch.aten.view %9455, %9456 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7166 = torch.constant.int 4
    %9458 = torch.aten.mul.int %int4_7166, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7167 = torch.constant.int 4096
    %9459 = torch.prim.ListConstruct %9458, %int4096_7167 : (!torch.int, !torch.int) -> !torch.list<int>
    %9460 = torch.aten.view %9399, %9459 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9460, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9461 = torch.aten.mm %9460, %9415 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9461, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7168 = torch.constant.int 4
    %int1792_7169 = torch.constant.int 1792
    %9462 = torch.prim.ListConstruct %int4_7168, %2482, %int1792_7169 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9463 = torch.aten.view %9461, %9462 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9464 = torch.aten.silu %9421 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9465 = torch.aten.silu %9427 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9466 = torch.aten.silu %9433 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9467 = torch.aten.silu %9439 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9468 = torch.aten.silu %9445 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9469 = torch.aten.silu %9451 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9470 = torch.aten.silu %9457 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9471 = torch.aten.silu %9463 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_7170 = torch.constant.int 1
    %int0_7171 = torch.constant.int 0
    %9472 = torch.prim.ListConstruct %int1_7170, %int0_7171 : (!torch.int, !torch.int) -> !torch.list<int>
    %9473 = torch.aten.permute %280, %9472 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7172 = torch.constant.int 1
    %int0_7173 = torch.constant.int 0
    %9474 = torch.prim.ListConstruct %int1_7172, %int0_7173 : (!torch.int, !torch.int) -> !torch.list<int>
    %9475 = torch.aten.permute %281, %9474 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7174 = torch.constant.int 1
    %int0_7175 = torch.constant.int 0
    %9476 = torch.prim.ListConstruct %int1_7174, %int0_7175 : (!torch.int, !torch.int) -> !torch.list<int>
    %9477 = torch.aten.permute %282, %9476 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7176 = torch.constant.int 1
    %int0_7177 = torch.constant.int 0
    %9478 = torch.prim.ListConstruct %int1_7176, %int0_7177 : (!torch.int, !torch.int) -> !torch.list<int>
    %9479 = torch.aten.permute %283, %9478 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7178 = torch.constant.int 1
    %int0_7179 = torch.constant.int 0
    %9480 = torch.prim.ListConstruct %int1_7178, %int0_7179 : (!torch.int, !torch.int) -> !torch.list<int>
    %9481 = torch.aten.permute %284, %9480 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7180 = torch.constant.int 1
    %int0_7181 = torch.constant.int 0
    %9482 = torch.prim.ListConstruct %int1_7180, %int0_7181 : (!torch.int, !torch.int) -> !torch.list<int>
    %9483 = torch.aten.permute %285, %9482 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7182 = torch.constant.int 1
    %int0_7183 = torch.constant.int 0
    %9484 = torch.prim.ListConstruct %int1_7182, %int0_7183 : (!torch.int, !torch.int) -> !torch.list<int>
    %9485 = torch.aten.permute %286, %9484 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_7184 = torch.constant.int 1
    %int0_7185 = torch.constant.int 0
    %9486 = torch.prim.ListConstruct %int1_7184, %int0_7185 : (!torch.int, !torch.int) -> !torch.list<int>
    %9487 = torch.aten.permute %287, %9486 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_7186 = torch.constant.int 4
    %9488 = torch.aten.mul.int %int4_7186, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7187 = torch.constant.int 4096
    %9489 = torch.prim.ListConstruct %9488, %int4096_7187 : (!torch.int, !torch.int) -> !torch.list<int>
    %9490 = torch.aten.view %9392, %9489 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9490, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9491 = torch.aten.mm %9490, %9473 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9491, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7188 = torch.constant.int 4
    %int1792_7189 = torch.constant.int 1792
    %9492 = torch.prim.ListConstruct %int4_7188, %2482, %int1792_7189 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9493 = torch.aten.view %9491, %9492 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7190 = torch.constant.int 4
    %9494 = torch.aten.mul.int %int4_7190, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7191 = torch.constant.int 4096
    %9495 = torch.prim.ListConstruct %9494, %int4096_7191 : (!torch.int, !torch.int) -> !torch.list<int>
    %9496 = torch.aten.view %9393, %9495 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9496, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9497 = torch.aten.mm %9496, %9475 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9497, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7192 = torch.constant.int 4
    %int1792_7193 = torch.constant.int 1792
    %9498 = torch.prim.ListConstruct %int4_7192, %2482, %int1792_7193 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9499 = torch.aten.view %9497, %9498 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7194 = torch.constant.int 4
    %9500 = torch.aten.mul.int %int4_7194, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7195 = torch.constant.int 4096
    %9501 = torch.prim.ListConstruct %9500, %int4096_7195 : (!torch.int, !torch.int) -> !torch.list<int>
    %9502 = torch.aten.view %9394, %9501 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9502, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9503 = torch.aten.mm %9502, %9477 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9503, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7196 = torch.constant.int 4
    %int1792_7197 = torch.constant.int 1792
    %9504 = torch.prim.ListConstruct %int4_7196, %2482, %int1792_7197 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9505 = torch.aten.view %9503, %9504 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7198 = torch.constant.int 4
    %9506 = torch.aten.mul.int %int4_7198, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7199 = torch.constant.int 4096
    %9507 = torch.prim.ListConstruct %9506, %int4096_7199 : (!torch.int, !torch.int) -> !torch.list<int>
    %9508 = torch.aten.view %9395, %9507 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9508, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9509 = torch.aten.mm %9508, %9479 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9509, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7200 = torch.constant.int 4
    %int1792_7201 = torch.constant.int 1792
    %9510 = torch.prim.ListConstruct %int4_7200, %2482, %int1792_7201 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9511 = torch.aten.view %9509, %9510 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7202 = torch.constant.int 4
    %9512 = torch.aten.mul.int %int4_7202, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7203 = torch.constant.int 4096
    %9513 = torch.prim.ListConstruct %9512, %int4096_7203 : (!torch.int, !torch.int) -> !torch.list<int>
    %9514 = torch.aten.view %9396, %9513 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9514, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9515 = torch.aten.mm %9514, %9481 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9515, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7204 = torch.constant.int 4
    %int1792_7205 = torch.constant.int 1792
    %9516 = torch.prim.ListConstruct %int4_7204, %2482, %int1792_7205 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9517 = torch.aten.view %9515, %9516 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7206 = torch.constant.int 4
    %9518 = torch.aten.mul.int %int4_7206, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7207 = torch.constant.int 4096
    %9519 = torch.prim.ListConstruct %9518, %int4096_7207 : (!torch.int, !torch.int) -> !torch.list<int>
    %9520 = torch.aten.view %9397, %9519 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9520, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9521 = torch.aten.mm %9520, %9483 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9521, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7208 = torch.constant.int 4
    %int1792_7209 = torch.constant.int 1792
    %9522 = torch.prim.ListConstruct %int4_7208, %2482, %int1792_7209 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9523 = torch.aten.view %9521, %9522 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7210 = torch.constant.int 4
    %9524 = torch.aten.mul.int %int4_7210, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7211 = torch.constant.int 4096
    %9525 = torch.prim.ListConstruct %9524, %int4096_7211 : (!torch.int, !torch.int) -> !torch.list<int>
    %9526 = torch.aten.view %9398, %9525 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9526, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9527 = torch.aten.mm %9526, %9485 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9527, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7212 = torch.constant.int 4
    %int1792_7213 = torch.constant.int 1792
    %9528 = torch.prim.ListConstruct %int4_7212, %2482, %int1792_7213 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9529 = torch.aten.view %9527, %9528 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_7214 = torch.constant.int 4
    %9530 = torch.aten.mul.int %int4_7214, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7215 = torch.constant.int 4096
    %9531 = torch.prim.ListConstruct %9530, %int4096_7215 : (!torch.int, !torch.int) -> !torch.list<int>
    %9532 = torch.aten.view %9399, %9531 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9532, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9533 = torch.aten.mm %9532, %9487 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9533, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_7216 = torch.constant.int 4
    %int1792_7217 = torch.constant.int 1792
    %9534 = torch.prim.ListConstruct %int4_7216, %2482, %int1792_7217 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9535 = torch.aten.view %9533, %9534 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9536 = torch.aten.mul.Tensor %9464, %9493 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9537 = torch.aten.mul.Tensor %9465, %9499 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9538 = torch.aten.mul.Tensor %9466, %9505 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9539 = torch.aten.mul.Tensor %9467, %9511 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9540 = torch.aten.mul.Tensor %9468, %9517 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9541 = torch.aten.mul.Tensor %9469, %9523 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9542 = torch.aten.mul.Tensor %9470, %9529 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %9543 = torch.aten.mul.Tensor %9471, %9535 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %9543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_7218 = torch.constant.int 1
    %int0_7219 = torch.constant.int 0
    %9544 = torch.prim.ListConstruct %int1_7218, %int0_7219 : (!torch.int, !torch.int) -> !torch.list<int>
    %9545 = torch.aten.permute %288, %9544 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7220 = torch.constant.int 1
    %int0_7221 = torch.constant.int 0
    %9546 = torch.prim.ListConstruct %int1_7220, %int0_7221 : (!torch.int, !torch.int) -> !torch.list<int>
    %9547 = torch.aten.permute %289, %9546 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7222 = torch.constant.int 1
    %int0_7223 = torch.constant.int 0
    %9548 = torch.prim.ListConstruct %int1_7222, %int0_7223 : (!torch.int, !torch.int) -> !torch.list<int>
    %9549 = torch.aten.permute %290, %9548 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7224 = torch.constant.int 1
    %int0_7225 = torch.constant.int 0
    %9550 = torch.prim.ListConstruct %int1_7224, %int0_7225 : (!torch.int, !torch.int) -> !torch.list<int>
    %9551 = torch.aten.permute %291, %9550 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7226 = torch.constant.int 1
    %int0_7227 = torch.constant.int 0
    %9552 = torch.prim.ListConstruct %int1_7226, %int0_7227 : (!torch.int, !torch.int) -> !torch.list<int>
    %9553 = torch.aten.permute %292, %9552 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7228 = torch.constant.int 1
    %int0_7229 = torch.constant.int 0
    %9554 = torch.prim.ListConstruct %int1_7228, %int0_7229 : (!torch.int, !torch.int) -> !torch.list<int>
    %9555 = torch.aten.permute %293, %9554 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7230 = torch.constant.int 1
    %int0_7231 = torch.constant.int 0
    %9556 = torch.prim.ListConstruct %int1_7230, %int0_7231 : (!torch.int, !torch.int) -> !torch.list<int>
    %9557 = torch.aten.permute %294, %9556 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7232 = torch.constant.int 1
    %int0_7233 = torch.constant.int 0
    %9558 = torch.prim.ListConstruct %int1_7232, %int0_7233 : (!torch.int, !torch.int) -> !torch.list<int>
    %9559 = torch.aten.permute %295, %9558 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_7234 = torch.constant.int 1
    %9560 = torch.aten.size.int %9421, %int1_7234 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7235 = torch.constant.int 4
    %9561 = torch.aten.mul.int %int4_7235, %9560 : !torch.int, !torch.int -> !torch.int
    %int1792_7236 = torch.constant.int 1792
    %9562 = torch.prim.ListConstruct %9561, %int1792_7236 : (!torch.int, !torch.int) -> !torch.list<int>
    %9563 = torch.aten.view %9536, %9562 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9563, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9564 = torch.aten.mm %9563, %9545 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9564, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7237 = torch.constant.int 4
    %int4096_7238 = torch.constant.int 4096
    %9565 = torch.prim.ListConstruct %int4_7237, %9560, %int4096_7238 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9566 = torch.aten.view %9564, %9565 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7239 = torch.constant.int 1
    %9567 = torch.aten.size.int %9427, %int1_7239 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7240 = torch.constant.int 4
    %9568 = torch.aten.mul.int %int4_7240, %9567 : !torch.int, !torch.int -> !torch.int
    %int1792_7241 = torch.constant.int 1792
    %9569 = torch.prim.ListConstruct %9568, %int1792_7241 : (!torch.int, !torch.int) -> !torch.list<int>
    %9570 = torch.aten.view %9537, %9569 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9570, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9571 = torch.aten.mm %9570, %9547 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9571, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7242 = torch.constant.int 4
    %int4096_7243 = torch.constant.int 4096
    %9572 = torch.prim.ListConstruct %int4_7242, %9567, %int4096_7243 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9573 = torch.aten.view %9571, %9572 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7244 = torch.constant.int 1
    %9574 = torch.aten.size.int %9433, %int1_7244 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7245 = torch.constant.int 4
    %9575 = torch.aten.mul.int %int4_7245, %9574 : !torch.int, !torch.int -> !torch.int
    %int1792_7246 = torch.constant.int 1792
    %9576 = torch.prim.ListConstruct %9575, %int1792_7246 : (!torch.int, !torch.int) -> !torch.list<int>
    %9577 = torch.aten.view %9538, %9576 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9577, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9578 = torch.aten.mm %9577, %9549 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9578, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7247 = torch.constant.int 4
    %int4096_7248 = torch.constant.int 4096
    %9579 = torch.prim.ListConstruct %int4_7247, %9574, %int4096_7248 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9580 = torch.aten.view %9578, %9579 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7249 = torch.constant.int 1
    %9581 = torch.aten.size.int %9439, %int1_7249 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7250 = torch.constant.int 4
    %9582 = torch.aten.mul.int %int4_7250, %9581 : !torch.int, !torch.int -> !torch.int
    %int1792_7251 = torch.constant.int 1792
    %9583 = torch.prim.ListConstruct %9582, %int1792_7251 : (!torch.int, !torch.int) -> !torch.list<int>
    %9584 = torch.aten.view %9539, %9583 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9584, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9585 = torch.aten.mm %9584, %9551 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9585, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7252 = torch.constant.int 4
    %int4096_7253 = torch.constant.int 4096
    %9586 = torch.prim.ListConstruct %int4_7252, %9581, %int4096_7253 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9587 = torch.aten.view %9585, %9586 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7254 = torch.constant.int 1
    %9588 = torch.aten.size.int %9445, %int1_7254 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7255 = torch.constant.int 4
    %9589 = torch.aten.mul.int %int4_7255, %9588 : !torch.int, !torch.int -> !torch.int
    %int1792_7256 = torch.constant.int 1792
    %9590 = torch.prim.ListConstruct %9589, %int1792_7256 : (!torch.int, !torch.int) -> !torch.list<int>
    %9591 = torch.aten.view %9540, %9590 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9591, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9592 = torch.aten.mm %9591, %9553 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9592, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7257 = torch.constant.int 4
    %int4096_7258 = torch.constant.int 4096
    %9593 = torch.prim.ListConstruct %int4_7257, %9588, %int4096_7258 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9594 = torch.aten.view %9592, %9593 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7259 = torch.constant.int 1
    %9595 = torch.aten.size.int %9451, %int1_7259 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7260 = torch.constant.int 4
    %9596 = torch.aten.mul.int %int4_7260, %9595 : !torch.int, !torch.int -> !torch.int
    %int1792_7261 = torch.constant.int 1792
    %9597 = torch.prim.ListConstruct %9596, %int1792_7261 : (!torch.int, !torch.int) -> !torch.list<int>
    %9598 = torch.aten.view %9541, %9597 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9598, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9599 = torch.aten.mm %9598, %9555 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9599, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7262 = torch.constant.int 4
    %int4096_7263 = torch.constant.int 4096
    %9600 = torch.prim.ListConstruct %int4_7262, %9595, %int4096_7263 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9601 = torch.aten.view %9599, %9600 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7264 = torch.constant.int 1
    %9602 = torch.aten.size.int %9457, %int1_7264 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7265 = torch.constant.int 4
    %9603 = torch.aten.mul.int %int4_7265, %9602 : !torch.int, !torch.int -> !torch.int
    %int1792_7266 = torch.constant.int 1792
    %9604 = torch.prim.ListConstruct %9603, %int1792_7266 : (!torch.int, !torch.int) -> !torch.list<int>
    %9605 = torch.aten.view %9542, %9604 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9605, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9606 = torch.aten.mm %9605, %9557 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9606, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7267 = torch.constant.int 4
    %int4096_7268 = torch.constant.int 4096
    %9607 = torch.prim.ListConstruct %int4_7267, %9602, %int4096_7268 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9608 = torch.aten.view %9606, %9607 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7269 = torch.constant.int 1
    %9609 = torch.aten.size.int %9463, %int1_7269 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_7270 = torch.constant.int 4
    %9610 = torch.aten.mul.int %int4_7270, %9609 : !torch.int, !torch.int -> !torch.int
    %int1792_7271 = torch.constant.int 1792
    %9611 = torch.prim.ListConstruct %9610, %int1792_7271 : (!torch.int, !torch.int) -> !torch.list<int>
    %9612 = torch.aten.view %9543, %9611 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %9612, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %9613 = torch.aten.mm %9612, %9559 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9613, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_7272 = torch.constant.int 4
    %int4096_7273 = torch.constant.int 4096
    %9614 = torch.prim.ListConstruct %int4_7272, %9609, %int4096_7273 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9615 = torch.aten.view %9613, %9614 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9616 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7274 = arith.constant 1 : index
    %dim_7275 = tensor.dim %9616, %c1_7274 : tensor<4x?x4096xf16>
    %9617 = flow.tensor.transfer %9616 : tensor<4x?x4096xf16>{%dim_7275} to #hal.device.promise<@__device_0>
    %9618 = torch_c.from_builtin_tensor %9617 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9619 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7276 = arith.constant 1 : index
    %dim_7277 = tensor.dim %9619, %c1_7276 : tensor<4x?x4096xf16>
    %9620 = flow.tensor.transfer %9619 : tensor<4x?x4096xf16>{%dim_7277} to #hal.device.promise<@__device_0>
    %9621 = torch_c.from_builtin_tensor %9620 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9622 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7278 = arith.constant 1 : index
    %dim_7279 = tensor.dim %9622, %c1_7278 : tensor<4x?x4096xf16>
    %9623 = flow.tensor.transfer %9622 : tensor<4x?x4096xf16>{%dim_7279} to #hal.device.promise<@__device_0>
    %9624 = torch_c.from_builtin_tensor %9623 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9625 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7280 = arith.constant 1 : index
    %dim_7281 = tensor.dim %9625, %c1_7280 : tensor<4x?x4096xf16>
    %9626 = flow.tensor.transfer %9625 : tensor<4x?x4096xf16>{%dim_7281} to #hal.device.promise<@__device_0>
    %9627 = torch_c.from_builtin_tensor %9626 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9628 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7282 = arith.constant 1 : index
    %dim_7283 = tensor.dim %9628, %c1_7282 : tensor<4x?x4096xf16>
    %9629 = flow.tensor.transfer %9628 : tensor<4x?x4096xf16>{%dim_7283} to #hal.device.promise<@__device_0>
    %9630 = torch_c.from_builtin_tensor %9629 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9631 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7284 = arith.constant 1 : index
    %dim_7285 = tensor.dim %9631, %c1_7284 : tensor<4x?x4096xf16>
    %9632 = flow.tensor.transfer %9631 : tensor<4x?x4096xf16>{%dim_7285} to #hal.device.promise<@__device_0>
    %9633 = torch_c.from_builtin_tensor %9632 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9634 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7286 = arith.constant 1 : index
    %dim_7287 = tensor.dim %9634, %c1_7286 : tensor<4x?x4096xf16>
    %9635 = flow.tensor.transfer %9634 : tensor<4x?x4096xf16>{%dim_7287} to #hal.device.promise<@__device_0>
    %9636 = torch_c.from_builtin_tensor %9635 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7288 = torch.constant.int 1
    %9637 = torch.aten.add.Tensor %9566, %9618, %int1_7288 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7289 = torch.constant.int 1
    %9638 = torch.aten.add.Tensor %9637, %9621, %int1_7289 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7290 = torch.constant.int 1
    %9639 = torch.aten.add.Tensor %9638, %9624, %int1_7290 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7291 = torch.constant.int 1
    %9640 = torch.aten.add.Tensor %9639, %9627, %int1_7291 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7292 = torch.constant.int 1
    %9641 = torch.aten.add.Tensor %9640, %9630, %int1_7292 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7293 = torch.constant.int 1
    %9642 = torch.aten.add.Tensor %9641, %9633, %int1_7293 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7294 = torch.constant.int 1
    %9643 = torch.aten.add.Tensor %9642, %9636, %int1_7294 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9644 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7295 = arith.constant 1 : index
    %dim_7296 = tensor.dim %9644, %c1_7295 : tensor<4x?x4096xf16>
    %9645 = flow.tensor.transfer %9644 : tensor<4x?x4096xf16>{%dim_7296} to #hal.device.promise<@__device_1>
    %9646 = torch_c.from_builtin_tensor %9645 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9647 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7297 = arith.constant 1 : index
    %dim_7298 = tensor.dim %9647, %c1_7297 : tensor<4x?x4096xf16>
    %9648 = flow.tensor.transfer %9647 : tensor<4x?x4096xf16>{%dim_7298} to #hal.device.promise<@__device_1>
    %9649 = torch_c.from_builtin_tensor %9648 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9650 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7299 = arith.constant 1 : index
    %dim_7300 = tensor.dim %9650, %c1_7299 : tensor<4x?x4096xf16>
    %9651 = flow.tensor.transfer %9650 : tensor<4x?x4096xf16>{%dim_7300} to #hal.device.promise<@__device_1>
    %9652 = torch_c.from_builtin_tensor %9651 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9653 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7301 = arith.constant 1 : index
    %dim_7302 = tensor.dim %9653, %c1_7301 : tensor<4x?x4096xf16>
    %9654 = flow.tensor.transfer %9653 : tensor<4x?x4096xf16>{%dim_7302} to #hal.device.promise<@__device_1>
    %9655 = torch_c.from_builtin_tensor %9654 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9656 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7303 = arith.constant 1 : index
    %dim_7304 = tensor.dim %9656, %c1_7303 : tensor<4x?x4096xf16>
    %9657 = flow.tensor.transfer %9656 : tensor<4x?x4096xf16>{%dim_7304} to #hal.device.promise<@__device_1>
    %9658 = torch_c.from_builtin_tensor %9657 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9659 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7305 = arith.constant 1 : index
    %dim_7306 = tensor.dim %9659, %c1_7305 : tensor<4x?x4096xf16>
    %9660 = flow.tensor.transfer %9659 : tensor<4x?x4096xf16>{%dim_7306} to #hal.device.promise<@__device_1>
    %9661 = torch_c.from_builtin_tensor %9660 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9662 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7307 = arith.constant 1 : index
    %dim_7308 = tensor.dim %9662, %c1_7307 : tensor<4x?x4096xf16>
    %9663 = flow.tensor.transfer %9662 : tensor<4x?x4096xf16>{%dim_7308} to #hal.device.promise<@__device_1>
    %9664 = torch_c.from_builtin_tensor %9663 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7309 = torch.constant.int 1
    %9665 = torch.aten.add.Tensor %9646, %9573, %int1_7309 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7310 = torch.constant.int 1
    %9666 = torch.aten.add.Tensor %9665, %9649, %int1_7310 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7311 = torch.constant.int 1
    %9667 = torch.aten.add.Tensor %9666, %9652, %int1_7311 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7312 = torch.constant.int 1
    %9668 = torch.aten.add.Tensor %9667, %9655, %int1_7312 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7313 = torch.constant.int 1
    %9669 = torch.aten.add.Tensor %9668, %9658, %int1_7313 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7314 = torch.constant.int 1
    %9670 = torch.aten.add.Tensor %9669, %9661, %int1_7314 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7315 = torch.constant.int 1
    %9671 = torch.aten.add.Tensor %9670, %9664, %int1_7315 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9672 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7316 = arith.constant 1 : index
    %dim_7317 = tensor.dim %9672, %c1_7316 : tensor<4x?x4096xf16>
    %9673 = flow.tensor.transfer %9672 : tensor<4x?x4096xf16>{%dim_7317} to #hal.device.promise<@__device_2>
    %9674 = torch_c.from_builtin_tensor %9673 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9675 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7318 = arith.constant 1 : index
    %dim_7319 = tensor.dim %9675, %c1_7318 : tensor<4x?x4096xf16>
    %9676 = flow.tensor.transfer %9675 : tensor<4x?x4096xf16>{%dim_7319} to #hal.device.promise<@__device_2>
    %9677 = torch_c.from_builtin_tensor %9676 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9678 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7320 = arith.constant 1 : index
    %dim_7321 = tensor.dim %9678, %c1_7320 : tensor<4x?x4096xf16>
    %9679 = flow.tensor.transfer %9678 : tensor<4x?x4096xf16>{%dim_7321} to #hal.device.promise<@__device_2>
    %9680 = torch_c.from_builtin_tensor %9679 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9681 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7322 = arith.constant 1 : index
    %dim_7323 = tensor.dim %9681, %c1_7322 : tensor<4x?x4096xf16>
    %9682 = flow.tensor.transfer %9681 : tensor<4x?x4096xf16>{%dim_7323} to #hal.device.promise<@__device_2>
    %9683 = torch_c.from_builtin_tensor %9682 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9684 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7324 = arith.constant 1 : index
    %dim_7325 = tensor.dim %9684, %c1_7324 : tensor<4x?x4096xf16>
    %9685 = flow.tensor.transfer %9684 : tensor<4x?x4096xf16>{%dim_7325} to #hal.device.promise<@__device_2>
    %9686 = torch_c.from_builtin_tensor %9685 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9687 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7326 = arith.constant 1 : index
    %dim_7327 = tensor.dim %9687, %c1_7326 : tensor<4x?x4096xf16>
    %9688 = flow.tensor.transfer %9687 : tensor<4x?x4096xf16>{%dim_7327} to #hal.device.promise<@__device_2>
    %9689 = torch_c.from_builtin_tensor %9688 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9690 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7328 = arith.constant 1 : index
    %dim_7329 = tensor.dim %9690, %c1_7328 : tensor<4x?x4096xf16>
    %9691 = flow.tensor.transfer %9690 : tensor<4x?x4096xf16>{%dim_7329} to #hal.device.promise<@__device_2>
    %9692 = torch_c.from_builtin_tensor %9691 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7330 = torch.constant.int 1
    %9693 = torch.aten.add.Tensor %9674, %9677, %int1_7330 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7331 = torch.constant.int 1
    %9694 = torch.aten.add.Tensor %9693, %9580, %int1_7331 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7332 = torch.constant.int 1
    %9695 = torch.aten.add.Tensor %9694, %9680, %int1_7332 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7333 = torch.constant.int 1
    %9696 = torch.aten.add.Tensor %9695, %9683, %int1_7333 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7334 = torch.constant.int 1
    %9697 = torch.aten.add.Tensor %9696, %9686, %int1_7334 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7335 = torch.constant.int 1
    %9698 = torch.aten.add.Tensor %9697, %9689, %int1_7335 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7336 = torch.constant.int 1
    %9699 = torch.aten.add.Tensor %9698, %9692, %int1_7336 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9700 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7337 = arith.constant 1 : index
    %dim_7338 = tensor.dim %9700, %c1_7337 : tensor<4x?x4096xf16>
    %9701 = flow.tensor.transfer %9700 : tensor<4x?x4096xf16>{%dim_7338} to #hal.device.promise<@__device_3>
    %9702 = torch_c.from_builtin_tensor %9701 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9703 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7339 = arith.constant 1 : index
    %dim_7340 = tensor.dim %9703, %c1_7339 : tensor<4x?x4096xf16>
    %9704 = flow.tensor.transfer %9703 : tensor<4x?x4096xf16>{%dim_7340} to #hal.device.promise<@__device_3>
    %9705 = torch_c.from_builtin_tensor %9704 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9706 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7341 = arith.constant 1 : index
    %dim_7342 = tensor.dim %9706, %c1_7341 : tensor<4x?x4096xf16>
    %9707 = flow.tensor.transfer %9706 : tensor<4x?x4096xf16>{%dim_7342} to #hal.device.promise<@__device_3>
    %9708 = torch_c.from_builtin_tensor %9707 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9709 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7343 = arith.constant 1 : index
    %dim_7344 = tensor.dim %9709, %c1_7343 : tensor<4x?x4096xf16>
    %9710 = flow.tensor.transfer %9709 : tensor<4x?x4096xf16>{%dim_7344} to #hal.device.promise<@__device_3>
    %9711 = torch_c.from_builtin_tensor %9710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9712 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7345 = arith.constant 1 : index
    %dim_7346 = tensor.dim %9712, %c1_7345 : tensor<4x?x4096xf16>
    %9713 = flow.tensor.transfer %9712 : tensor<4x?x4096xf16>{%dim_7346} to #hal.device.promise<@__device_3>
    %9714 = torch_c.from_builtin_tensor %9713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9715 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7347 = arith.constant 1 : index
    %dim_7348 = tensor.dim %9715, %c1_7347 : tensor<4x?x4096xf16>
    %9716 = flow.tensor.transfer %9715 : tensor<4x?x4096xf16>{%dim_7348} to #hal.device.promise<@__device_3>
    %9717 = torch_c.from_builtin_tensor %9716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9718 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7349 = arith.constant 1 : index
    %dim_7350 = tensor.dim %9718, %c1_7349 : tensor<4x?x4096xf16>
    %9719 = flow.tensor.transfer %9718 : tensor<4x?x4096xf16>{%dim_7350} to #hal.device.promise<@__device_3>
    %9720 = torch_c.from_builtin_tensor %9719 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7351 = torch.constant.int 1
    %9721 = torch.aten.add.Tensor %9702, %9705, %int1_7351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7352 = torch.constant.int 1
    %9722 = torch.aten.add.Tensor %9721, %9708, %int1_7352 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7353 = torch.constant.int 1
    %9723 = torch.aten.add.Tensor %9722, %9587, %int1_7353 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7354 = torch.constant.int 1
    %9724 = torch.aten.add.Tensor %9723, %9711, %int1_7354 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7355 = torch.constant.int 1
    %9725 = torch.aten.add.Tensor %9724, %9714, %int1_7355 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7356 = torch.constant.int 1
    %9726 = torch.aten.add.Tensor %9725, %9717, %int1_7356 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7357 = torch.constant.int 1
    %9727 = torch.aten.add.Tensor %9726, %9720, %int1_7357 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9728 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7358 = arith.constant 1 : index
    %dim_7359 = tensor.dim %9728, %c1_7358 : tensor<4x?x4096xf16>
    %9729 = flow.tensor.transfer %9728 : tensor<4x?x4096xf16>{%dim_7359} to #hal.device.promise<@__device_4>
    %9730 = torch_c.from_builtin_tensor %9729 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9731 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7360 = arith.constant 1 : index
    %dim_7361 = tensor.dim %9731, %c1_7360 : tensor<4x?x4096xf16>
    %9732 = flow.tensor.transfer %9731 : tensor<4x?x4096xf16>{%dim_7361} to #hal.device.promise<@__device_4>
    %9733 = torch_c.from_builtin_tensor %9732 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9734 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7362 = arith.constant 1 : index
    %dim_7363 = tensor.dim %9734, %c1_7362 : tensor<4x?x4096xf16>
    %9735 = flow.tensor.transfer %9734 : tensor<4x?x4096xf16>{%dim_7363} to #hal.device.promise<@__device_4>
    %9736 = torch_c.from_builtin_tensor %9735 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9737 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7364 = arith.constant 1 : index
    %dim_7365 = tensor.dim %9737, %c1_7364 : tensor<4x?x4096xf16>
    %9738 = flow.tensor.transfer %9737 : tensor<4x?x4096xf16>{%dim_7365} to #hal.device.promise<@__device_4>
    %9739 = torch_c.from_builtin_tensor %9738 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9740 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7366 = arith.constant 1 : index
    %dim_7367 = tensor.dim %9740, %c1_7366 : tensor<4x?x4096xf16>
    %9741 = flow.tensor.transfer %9740 : tensor<4x?x4096xf16>{%dim_7367} to #hal.device.promise<@__device_4>
    %9742 = torch_c.from_builtin_tensor %9741 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9743 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7368 = arith.constant 1 : index
    %dim_7369 = tensor.dim %9743, %c1_7368 : tensor<4x?x4096xf16>
    %9744 = flow.tensor.transfer %9743 : tensor<4x?x4096xf16>{%dim_7369} to #hal.device.promise<@__device_4>
    %9745 = torch_c.from_builtin_tensor %9744 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9746 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7370 = arith.constant 1 : index
    %dim_7371 = tensor.dim %9746, %c1_7370 : tensor<4x?x4096xf16>
    %9747 = flow.tensor.transfer %9746 : tensor<4x?x4096xf16>{%dim_7371} to #hal.device.promise<@__device_4>
    %9748 = torch_c.from_builtin_tensor %9747 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7372 = torch.constant.int 1
    %9749 = torch.aten.add.Tensor %9730, %9733, %int1_7372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7373 = torch.constant.int 1
    %9750 = torch.aten.add.Tensor %9749, %9736, %int1_7373 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7374 = torch.constant.int 1
    %9751 = torch.aten.add.Tensor %9750, %9739, %int1_7374 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7375 = torch.constant.int 1
    %9752 = torch.aten.add.Tensor %9751, %9594, %int1_7375 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7376 = torch.constant.int 1
    %9753 = torch.aten.add.Tensor %9752, %9742, %int1_7376 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7377 = torch.constant.int 1
    %9754 = torch.aten.add.Tensor %9753, %9745, %int1_7377 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7378 = torch.constant.int 1
    %9755 = torch.aten.add.Tensor %9754, %9748, %int1_7378 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9756 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7379 = arith.constant 1 : index
    %dim_7380 = tensor.dim %9756, %c1_7379 : tensor<4x?x4096xf16>
    %9757 = flow.tensor.transfer %9756 : tensor<4x?x4096xf16>{%dim_7380} to #hal.device.promise<@__device_5>
    %9758 = torch_c.from_builtin_tensor %9757 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9759 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7381 = arith.constant 1 : index
    %dim_7382 = tensor.dim %9759, %c1_7381 : tensor<4x?x4096xf16>
    %9760 = flow.tensor.transfer %9759 : tensor<4x?x4096xf16>{%dim_7382} to #hal.device.promise<@__device_5>
    %9761 = torch_c.from_builtin_tensor %9760 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9762 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7383 = arith.constant 1 : index
    %dim_7384 = tensor.dim %9762, %c1_7383 : tensor<4x?x4096xf16>
    %9763 = flow.tensor.transfer %9762 : tensor<4x?x4096xf16>{%dim_7384} to #hal.device.promise<@__device_5>
    %9764 = torch_c.from_builtin_tensor %9763 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9765 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7385 = arith.constant 1 : index
    %dim_7386 = tensor.dim %9765, %c1_7385 : tensor<4x?x4096xf16>
    %9766 = flow.tensor.transfer %9765 : tensor<4x?x4096xf16>{%dim_7386} to #hal.device.promise<@__device_5>
    %9767 = torch_c.from_builtin_tensor %9766 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9768 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7387 = arith.constant 1 : index
    %dim_7388 = tensor.dim %9768, %c1_7387 : tensor<4x?x4096xf16>
    %9769 = flow.tensor.transfer %9768 : tensor<4x?x4096xf16>{%dim_7388} to #hal.device.promise<@__device_5>
    %9770 = torch_c.from_builtin_tensor %9769 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9771 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7389 = arith.constant 1 : index
    %dim_7390 = tensor.dim %9771, %c1_7389 : tensor<4x?x4096xf16>
    %9772 = flow.tensor.transfer %9771 : tensor<4x?x4096xf16>{%dim_7390} to #hal.device.promise<@__device_5>
    %9773 = torch_c.from_builtin_tensor %9772 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9774 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7391 = arith.constant 1 : index
    %dim_7392 = tensor.dim %9774, %c1_7391 : tensor<4x?x4096xf16>
    %9775 = flow.tensor.transfer %9774 : tensor<4x?x4096xf16>{%dim_7392} to #hal.device.promise<@__device_5>
    %9776 = torch_c.from_builtin_tensor %9775 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7393 = torch.constant.int 1
    %9777 = torch.aten.add.Tensor %9758, %9761, %int1_7393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7394 = torch.constant.int 1
    %9778 = torch.aten.add.Tensor %9777, %9764, %int1_7394 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7395 = torch.constant.int 1
    %9779 = torch.aten.add.Tensor %9778, %9767, %int1_7395 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7396 = torch.constant.int 1
    %9780 = torch.aten.add.Tensor %9779, %9770, %int1_7396 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7397 = torch.constant.int 1
    %9781 = torch.aten.add.Tensor %9780, %9601, %int1_7397 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7398 = torch.constant.int 1
    %9782 = torch.aten.add.Tensor %9781, %9773, %int1_7398 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7399 = torch.constant.int 1
    %9783 = torch.aten.add.Tensor %9782, %9776, %int1_7399 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9784 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7400 = arith.constant 1 : index
    %dim_7401 = tensor.dim %9784, %c1_7400 : tensor<4x?x4096xf16>
    %9785 = flow.tensor.transfer %9784 : tensor<4x?x4096xf16>{%dim_7401} to #hal.device.promise<@__device_6>
    %9786 = torch_c.from_builtin_tensor %9785 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9787 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7402 = arith.constant 1 : index
    %dim_7403 = tensor.dim %9787, %c1_7402 : tensor<4x?x4096xf16>
    %9788 = flow.tensor.transfer %9787 : tensor<4x?x4096xf16>{%dim_7403} to #hal.device.promise<@__device_6>
    %9789 = torch_c.from_builtin_tensor %9788 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9790 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7404 = arith.constant 1 : index
    %dim_7405 = tensor.dim %9790, %c1_7404 : tensor<4x?x4096xf16>
    %9791 = flow.tensor.transfer %9790 : tensor<4x?x4096xf16>{%dim_7405} to #hal.device.promise<@__device_6>
    %9792 = torch_c.from_builtin_tensor %9791 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9793 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7406 = arith.constant 1 : index
    %dim_7407 = tensor.dim %9793, %c1_7406 : tensor<4x?x4096xf16>
    %9794 = flow.tensor.transfer %9793 : tensor<4x?x4096xf16>{%dim_7407} to #hal.device.promise<@__device_6>
    %9795 = torch_c.from_builtin_tensor %9794 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9796 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7408 = arith.constant 1 : index
    %dim_7409 = tensor.dim %9796, %c1_7408 : tensor<4x?x4096xf16>
    %9797 = flow.tensor.transfer %9796 : tensor<4x?x4096xf16>{%dim_7409} to #hal.device.promise<@__device_6>
    %9798 = torch_c.from_builtin_tensor %9797 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9799 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7410 = arith.constant 1 : index
    %dim_7411 = tensor.dim %9799, %c1_7410 : tensor<4x?x4096xf16>
    %9800 = flow.tensor.transfer %9799 : tensor<4x?x4096xf16>{%dim_7411} to #hal.device.promise<@__device_6>
    %9801 = torch_c.from_builtin_tensor %9800 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9802 = torch_c.to_builtin_tensor %9615 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7412 = arith.constant 1 : index
    %dim_7413 = tensor.dim %9802, %c1_7412 : tensor<4x?x4096xf16>
    %9803 = flow.tensor.transfer %9802 : tensor<4x?x4096xf16>{%dim_7413} to #hal.device.promise<@__device_6>
    %9804 = torch_c.from_builtin_tensor %9803 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7414 = torch.constant.int 1
    %9805 = torch.aten.add.Tensor %9786, %9789, %int1_7414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7415 = torch.constant.int 1
    %9806 = torch.aten.add.Tensor %9805, %9792, %int1_7415 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7416 = torch.constant.int 1
    %9807 = torch.aten.add.Tensor %9806, %9795, %int1_7416 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7417 = torch.constant.int 1
    %9808 = torch.aten.add.Tensor %9807, %9798, %int1_7417 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7418 = torch.constant.int 1
    %9809 = torch.aten.add.Tensor %9808, %9801, %int1_7418 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7419 = torch.constant.int 1
    %9810 = torch.aten.add.Tensor %9809, %9608, %int1_7419 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7420 = torch.constant.int 1
    %9811 = torch.aten.add.Tensor %9810, %9804, %int1_7420 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9812 = torch_c.to_builtin_tensor %9566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7421 = arith.constant 1 : index
    %dim_7422 = tensor.dim %9812, %c1_7421 : tensor<4x?x4096xf16>
    %9813 = flow.tensor.transfer %9812 : tensor<4x?x4096xf16>{%dim_7422} to #hal.device.promise<@__device_7>
    %9814 = torch_c.from_builtin_tensor %9813 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9815 = torch_c.to_builtin_tensor %9573 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7423 = arith.constant 1 : index
    %dim_7424 = tensor.dim %9815, %c1_7423 : tensor<4x?x4096xf16>
    %9816 = flow.tensor.transfer %9815 : tensor<4x?x4096xf16>{%dim_7424} to #hal.device.promise<@__device_7>
    %9817 = torch_c.from_builtin_tensor %9816 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9818 = torch_c.to_builtin_tensor %9580 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7425 = arith.constant 1 : index
    %dim_7426 = tensor.dim %9818, %c1_7425 : tensor<4x?x4096xf16>
    %9819 = flow.tensor.transfer %9818 : tensor<4x?x4096xf16>{%dim_7426} to #hal.device.promise<@__device_7>
    %9820 = torch_c.from_builtin_tensor %9819 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9821 = torch_c.to_builtin_tensor %9587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7427 = arith.constant 1 : index
    %dim_7428 = tensor.dim %9821, %c1_7427 : tensor<4x?x4096xf16>
    %9822 = flow.tensor.transfer %9821 : tensor<4x?x4096xf16>{%dim_7428} to #hal.device.promise<@__device_7>
    %9823 = torch_c.from_builtin_tensor %9822 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9824 = torch_c.to_builtin_tensor %9594 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7429 = arith.constant 1 : index
    %dim_7430 = tensor.dim %9824, %c1_7429 : tensor<4x?x4096xf16>
    %9825 = flow.tensor.transfer %9824 : tensor<4x?x4096xf16>{%dim_7430} to #hal.device.promise<@__device_7>
    %9826 = torch_c.from_builtin_tensor %9825 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9827 = torch_c.to_builtin_tensor %9601 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7431 = arith.constant 1 : index
    %dim_7432 = tensor.dim %9827, %c1_7431 : tensor<4x?x4096xf16>
    %9828 = flow.tensor.transfer %9827 : tensor<4x?x4096xf16>{%dim_7432} to #hal.device.promise<@__device_7>
    %9829 = torch_c.from_builtin_tensor %9828 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %9830 = torch_c.to_builtin_tensor %9608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_7433 = arith.constant 1 : index
    %dim_7434 = tensor.dim %9830, %c1_7433 : tensor<4x?x4096xf16>
    %9831 = flow.tensor.transfer %9830 : tensor<4x?x4096xf16>{%dim_7434} to #hal.device.promise<@__device_7>
    %9832 = torch_c.from_builtin_tensor %9831 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7435 = torch.constant.int 1
    %9833 = torch.aten.add.Tensor %9814, %9817, %int1_7435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7436 = torch.constant.int 1
    %9834 = torch.aten.add.Tensor %9833, %9820, %int1_7436 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7437 = torch.constant.int 1
    %9835 = torch.aten.add.Tensor %9834, %9823, %int1_7437 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7438 = torch.constant.int 1
    %9836 = torch.aten.add.Tensor %9835, %9826, %int1_7438 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7439 = torch.constant.int 1
    %9837 = torch.aten.add.Tensor %9836, %9829, %int1_7439 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7440 = torch.constant.int 1
    %9838 = torch.aten.add.Tensor %9837, %9832, %int1_7440 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7441 = torch.constant.int 1
    %9839 = torch.aten.add.Tensor %9838, %9615, %int1_7441 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7442 = torch.constant.int 1
    %9840 = torch.aten.add.Tensor %9320, %9643, %int1_7442 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7443 = torch.constant.int 1
    %9841 = torch.aten.add.Tensor %9321, %9671, %int1_7443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7444 = torch.constant.int 1
    %9842 = torch.aten.add.Tensor %9322, %9699, %int1_7444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7445 = torch.constant.int 1
    %9843 = torch.aten.add.Tensor %9323, %9727, %int1_7445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7446 = torch.constant.int 1
    %9844 = torch.aten.add.Tensor %9324, %9755, %int1_7446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7447 = torch.constant.int 1
    %9845 = torch.aten.add.Tensor %9325, %9783, %int1_7447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7448 = torch.constant.int 1
    %9846 = torch.aten.add.Tensor %9326, %9811, %int1_7448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7449 = torch.constant.int 1
    %9847 = torch.aten.add.Tensor %9327, %9839, %int1_7449 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_7450 = torch.constant.int 6
    %9848 = torch.prims.convert_element_type %9840, %int6_7450 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7451 = torch.constant.int 6
    %9849 = torch.prims.convert_element_type %9841, %int6_7451 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7452 = torch.constant.int 6
    %9850 = torch.prims.convert_element_type %9842, %int6_7452 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7453 = torch.constant.int 6
    %9851 = torch.prims.convert_element_type %9843, %int6_7453 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7454 = torch.constant.int 6
    %9852 = torch.prims.convert_element_type %9844, %int6_7454 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7455 = torch.constant.int 6
    %9853 = torch.prims.convert_element_type %9845, %int6_7455 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7456 = torch.constant.int 6
    %9854 = torch.prims.convert_element_type %9846, %int6_7456 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_7457 = torch.constant.int 6
    %9855 = torch.prims.convert_element_type %9847, %int6_7457 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7458 = torch.constant.int 2
    %9856 = torch.aten.pow.Tensor_Scalar %9848, %int2_7458 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7459 = torch.constant.int 2
    %9857 = torch.aten.pow.Tensor_Scalar %9849, %int2_7459 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7460 = torch.constant.int 2
    %9858 = torch.aten.pow.Tensor_Scalar %9850, %int2_7460 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7461 = torch.constant.int 2
    %9859 = torch.aten.pow.Tensor_Scalar %9851, %int2_7461 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7462 = torch.constant.int 2
    %9860 = torch.aten.pow.Tensor_Scalar %9852, %int2_7462 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7463 = torch.constant.int 2
    %9861 = torch.aten.pow.Tensor_Scalar %9853, %int2_7463 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7464 = torch.constant.int 2
    %9862 = torch.aten.pow.Tensor_Scalar %9854, %int2_7464 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_7465 = torch.constant.int 2
    %9863 = torch.aten.pow.Tensor_Scalar %9855, %int2_7465 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_7466 = torch.constant.int -1
    %9864 = torch.prim.ListConstruct %int-1_7466 : (!torch.int) -> !torch.list<int>
    %true_7467 = torch.constant.bool true
    %none_7468 = torch.constant.none
    %9865 = torch.aten.mean.dim %9856, %9864, %true_7467, %none_7468 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7469 = torch.constant.int -1
    %9866 = torch.prim.ListConstruct %int-1_7469 : (!torch.int) -> !torch.list<int>
    %true_7470 = torch.constant.bool true
    %none_7471 = torch.constant.none
    %9867 = torch.aten.mean.dim %9857, %9866, %true_7470, %none_7471 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7472 = torch.constant.int -1
    %9868 = torch.prim.ListConstruct %int-1_7472 : (!torch.int) -> !torch.list<int>
    %true_7473 = torch.constant.bool true
    %none_7474 = torch.constant.none
    %9869 = torch.aten.mean.dim %9858, %9868, %true_7473, %none_7474 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7475 = torch.constant.int -1
    %9870 = torch.prim.ListConstruct %int-1_7475 : (!torch.int) -> !torch.list<int>
    %true_7476 = torch.constant.bool true
    %none_7477 = torch.constant.none
    %9871 = torch.aten.mean.dim %9859, %9870, %true_7476, %none_7477 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7478 = torch.constant.int -1
    %9872 = torch.prim.ListConstruct %int-1_7478 : (!torch.int) -> !torch.list<int>
    %true_7479 = torch.constant.bool true
    %none_7480 = torch.constant.none
    %9873 = torch.aten.mean.dim %9860, %9872, %true_7479, %none_7480 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7481 = torch.constant.int -1
    %9874 = torch.prim.ListConstruct %int-1_7481 : (!torch.int) -> !torch.list<int>
    %true_7482 = torch.constant.bool true
    %none_7483 = torch.constant.none
    %9875 = torch.aten.mean.dim %9861, %9874, %true_7482, %none_7483 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7484 = torch.constant.int -1
    %9876 = torch.prim.ListConstruct %int-1_7484 : (!torch.int) -> !torch.list<int>
    %true_7485 = torch.constant.bool true
    %none_7486 = torch.constant.none
    %9877 = torch.aten.mean.dim %9862, %9876, %true_7485, %none_7486 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_7487 = torch.constant.int -1
    %9878 = torch.prim.ListConstruct %int-1_7487 : (!torch.int) -> !torch.list<int>
    %true_7488 = torch.constant.bool true
    %none_7489 = torch.constant.none
    %9879 = torch.aten.mean.dim %9863, %9878, %true_7488, %none_7489 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7490 = torch.constant.float 9.9999997473787516E-6
    %int1_7491 = torch.constant.int 1
    %9880 = torch.aten.add.Scalar %9865, %float9.999990e-06_7490, %int1_7491 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7492 = torch.constant.float 9.9999997473787516E-6
    %int1_7493 = torch.constant.int 1
    %9881 = torch.aten.add.Scalar %9867, %float9.999990e-06_7492, %int1_7493 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7494 = torch.constant.float 9.9999997473787516E-6
    %int1_7495 = torch.constant.int 1
    %9882 = torch.aten.add.Scalar %9869, %float9.999990e-06_7494, %int1_7495 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7496 = torch.constant.float 9.9999997473787516E-6
    %int1_7497 = torch.constant.int 1
    %9883 = torch.aten.add.Scalar %9871, %float9.999990e-06_7496, %int1_7497 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7498 = torch.constant.float 9.9999997473787516E-6
    %int1_7499 = torch.constant.int 1
    %9884 = torch.aten.add.Scalar %9873, %float9.999990e-06_7498, %int1_7499 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7500 = torch.constant.float 9.9999997473787516E-6
    %int1_7501 = torch.constant.int 1
    %9885 = torch.aten.add.Scalar %9875, %float9.999990e-06_7500, %int1_7501 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7502 = torch.constant.float 9.9999997473787516E-6
    %int1_7503 = torch.constant.int 1
    %9886 = torch.aten.add.Scalar %9877, %float9.999990e-06_7502, %int1_7503 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_7504 = torch.constant.float 9.9999997473787516E-6
    %int1_7505 = torch.constant.int 1
    %9887 = torch.aten.add.Scalar %9879, %float9.999990e-06_7504, %int1_7505 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9888 = torch.aten.rsqrt %9880 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9889 = torch.aten.rsqrt %9881 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9890 = torch.aten.rsqrt %9882 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9891 = torch.aten.rsqrt %9883 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9892 = torch.aten.rsqrt %9884 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9893 = torch.aten.rsqrt %9885 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9894 = torch.aten.rsqrt %9886 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9895 = torch.aten.rsqrt %9887 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %9895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %9896 = torch.aten.mul.Tensor %9848, %9888 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9897 = torch.aten.mul.Tensor %9849, %9889 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9898 = torch.aten.mul.Tensor %9850, %9890 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9899 = torch.aten.mul.Tensor %9851, %9891 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9900 = torch.aten.mul.Tensor %9852, %9892 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9901 = torch.aten.mul.Tensor %9853, %9893 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9902 = torch.aten.mul.Tensor %9854, %9894 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9903 = torch.aten.mul.Tensor %9855, %9895 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9904 = torch.aten.mul.Tensor %296, %9896 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9905 = torch.aten.mul.Tensor %297, %9897 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9906 = torch.aten.mul.Tensor %298, %9898 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9907 = torch.aten.mul.Tensor %299, %9899 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9908 = torch.aten.mul.Tensor %300, %9900 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9909 = torch.aten.mul.Tensor %301, %9901 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9910 = torch.aten.mul.Tensor %302, %9902 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %9911 = torch.aten.mul.Tensor %303, %9903 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %9911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_7506 = torch.constant.int 5
    %9912 = torch.prims.convert_element_type %9904, %int5_7506 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7507 = torch.constant.int 5
    %9913 = torch.prims.convert_element_type %9905, %int5_7507 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7508 = torch.constant.int 5
    %9914 = torch.prims.convert_element_type %9906, %int5_7508 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7509 = torch.constant.int 5
    %9915 = torch.prims.convert_element_type %9907, %int5_7509 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7510 = torch.constant.int 5
    %9916 = torch.prims.convert_element_type %9908, %int5_7510 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7511 = torch.constant.int 5
    %9917 = torch.prims.convert_element_type %9909, %int5_7511 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7512 = torch.constant.int 5
    %9918 = torch.prims.convert_element_type %9910, %int5_7512 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_7513 = torch.constant.int 5
    %9919 = torch.prims.convert_element_type %9911, %int5_7513 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %9919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_7514 = torch.constant.int 1
    %int0_7515 = torch.constant.int 0
    %9920 = torch.prim.ListConstruct %int1_7514, %int0_7515 : (!torch.int, !torch.int) -> !torch.list<int>
    %9921 = torch.aten.permute %304, %9920 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7516 = torch.constant.int 1
    %int0_7517 = torch.constant.int 0
    %9922 = torch.prim.ListConstruct %int1_7516, %int0_7517 : (!torch.int, !torch.int) -> !torch.list<int>
    %9923 = torch.aten.permute %305, %9922 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7518 = torch.constant.int 1
    %int0_7519 = torch.constant.int 0
    %9924 = torch.prim.ListConstruct %int1_7518, %int0_7519 : (!torch.int, !torch.int) -> !torch.list<int>
    %9925 = torch.aten.permute %306, %9924 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7520 = torch.constant.int 1
    %int0_7521 = torch.constant.int 0
    %9926 = torch.prim.ListConstruct %int1_7520, %int0_7521 : (!torch.int, !torch.int) -> !torch.list<int>
    %9927 = torch.aten.permute %307, %9926 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7522 = torch.constant.int 1
    %int0_7523 = torch.constant.int 0
    %9928 = torch.prim.ListConstruct %int1_7522, %int0_7523 : (!torch.int, !torch.int) -> !torch.list<int>
    %9929 = torch.aten.permute %308, %9928 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7524 = torch.constant.int 1
    %int0_7525 = torch.constant.int 0
    %9930 = torch.prim.ListConstruct %int1_7524, %int0_7525 : (!torch.int, !torch.int) -> !torch.list<int>
    %9931 = torch.aten.permute %309, %9930 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7526 = torch.constant.int 1
    %int0_7527 = torch.constant.int 0
    %9932 = torch.prim.ListConstruct %int1_7526, %int0_7527 : (!torch.int, !torch.int) -> !torch.list<int>
    %9933 = torch.aten.permute %310, %9932 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_7528 = torch.constant.int 1
    %int0_7529 = torch.constant.int 0
    %9934 = torch.prim.ListConstruct %int1_7528, %int0_7529 : (!torch.int, !torch.int) -> !torch.list<int>
    %9935 = torch.aten.permute %311, %9934 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_7530 = torch.constant.int 4
    %9936 = torch.aten.mul.int %int4_7530, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7531 = torch.constant.int 4096
    %9937 = torch.prim.ListConstruct %9936, %int4096_7531 : (!torch.int, !torch.int) -> !torch.list<int>
    %9938 = torch.aten.view %9912, %9937 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9938, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9939 = torch.aten.mm %9938, %9921 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9939, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7532 = torch.constant.int 4
    %int512_7533 = torch.constant.int 512
    %9940 = torch.prim.ListConstruct %int4_7532, %2482, %int512_7533 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9941 = torch.aten.view %9939, %9940 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7534 = torch.constant.int 4
    %9942 = torch.aten.mul.int %int4_7534, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7535 = torch.constant.int 4096
    %9943 = torch.prim.ListConstruct %9942, %int4096_7535 : (!torch.int, !torch.int) -> !torch.list<int>
    %9944 = torch.aten.view %9913, %9943 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9944, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9945 = torch.aten.mm %9944, %9923 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9945, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7536 = torch.constant.int 4
    %int512_7537 = torch.constant.int 512
    %9946 = torch.prim.ListConstruct %int4_7536, %2482, %int512_7537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9947 = torch.aten.view %9945, %9946 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7538 = torch.constant.int 4
    %9948 = torch.aten.mul.int %int4_7538, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7539 = torch.constant.int 4096
    %9949 = torch.prim.ListConstruct %9948, %int4096_7539 : (!torch.int, !torch.int) -> !torch.list<int>
    %9950 = torch.aten.view %9914, %9949 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9950, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9951 = torch.aten.mm %9950, %9925 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9951, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7540 = torch.constant.int 4
    %int512_7541 = torch.constant.int 512
    %9952 = torch.prim.ListConstruct %int4_7540, %2482, %int512_7541 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9953 = torch.aten.view %9951, %9952 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7542 = torch.constant.int 4
    %9954 = torch.aten.mul.int %int4_7542, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7543 = torch.constant.int 4096
    %9955 = torch.prim.ListConstruct %9954, %int4096_7543 : (!torch.int, !torch.int) -> !torch.list<int>
    %9956 = torch.aten.view %9915, %9955 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9956, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9957 = torch.aten.mm %9956, %9927 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9957, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7544 = torch.constant.int 4
    %int512_7545 = torch.constant.int 512
    %9958 = torch.prim.ListConstruct %int4_7544, %2482, %int512_7545 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9959 = torch.aten.view %9957, %9958 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7546 = torch.constant.int 4
    %9960 = torch.aten.mul.int %int4_7546, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7547 = torch.constant.int 4096
    %9961 = torch.prim.ListConstruct %9960, %int4096_7547 : (!torch.int, !torch.int) -> !torch.list<int>
    %9962 = torch.aten.view %9916, %9961 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9962, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9963 = torch.aten.mm %9962, %9929 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9963, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7548 = torch.constant.int 4
    %int512_7549 = torch.constant.int 512
    %9964 = torch.prim.ListConstruct %int4_7548, %2482, %int512_7549 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9965 = torch.aten.view %9963, %9964 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7550 = torch.constant.int 4
    %9966 = torch.aten.mul.int %int4_7550, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7551 = torch.constant.int 4096
    %9967 = torch.prim.ListConstruct %9966, %int4096_7551 : (!torch.int, !torch.int) -> !torch.list<int>
    %9968 = torch.aten.view %9917, %9967 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9968, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9969 = torch.aten.mm %9968, %9931 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9969, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7552 = torch.constant.int 4
    %int512_7553 = torch.constant.int 512
    %9970 = torch.prim.ListConstruct %int4_7552, %2482, %int512_7553 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9971 = torch.aten.view %9969, %9970 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7554 = torch.constant.int 4
    %9972 = torch.aten.mul.int %int4_7554, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7555 = torch.constant.int 4096
    %9973 = torch.prim.ListConstruct %9972, %int4096_7555 : (!torch.int, !torch.int) -> !torch.list<int>
    %9974 = torch.aten.view %9918, %9973 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9974, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9975 = torch.aten.mm %9974, %9933 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9975, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7556 = torch.constant.int 4
    %int512_7557 = torch.constant.int 512
    %9976 = torch.prim.ListConstruct %int4_7556, %2482, %int512_7557 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9977 = torch.aten.view %9975, %9976 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_7558 = torch.constant.int 4
    %9978 = torch.aten.mul.int %int4_7558, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7559 = torch.constant.int 4096
    %9979 = torch.prim.ListConstruct %9978, %int4096_7559 : (!torch.int, !torch.int) -> !torch.list<int>
    %9980 = torch.aten.view %9919, %9979 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %9980, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %9981 = torch.aten.mm %9980, %9935 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %9981, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_7560 = torch.constant.int 4
    %int512_7561 = torch.constant.int 512
    %9982 = torch.prim.ListConstruct %int4_7560, %2482, %int512_7561 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %9983 = torch.aten.view %9981, %9982 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %9983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_7562 = torch.constant.int 1
    %int0_7563 = torch.constant.int 0
    %9984 = torch.prim.ListConstruct %int1_7562, %int0_7563 : (!torch.int, !torch.int) -> !torch.list<int>
    %9985 = torch.aten.permute %312, %9984 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7564 = torch.constant.int 1
    %int0_7565 = torch.constant.int 0
    %9986 = torch.prim.ListConstruct %int1_7564, %int0_7565 : (!torch.int, !torch.int) -> !torch.list<int>
    %9987 = torch.aten.permute %313, %9986 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7566 = torch.constant.int 1
    %int0_7567 = torch.constant.int 0
    %9988 = torch.prim.ListConstruct %int1_7566, %int0_7567 : (!torch.int, !torch.int) -> !torch.list<int>
    %9989 = torch.aten.permute %314, %9988 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7568 = torch.constant.int 1
    %int0_7569 = torch.constant.int 0
    %9990 = torch.prim.ListConstruct %int1_7568, %int0_7569 : (!torch.int, !torch.int) -> !torch.list<int>
    %9991 = torch.aten.permute %315, %9990 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7570 = torch.constant.int 1
    %int0_7571 = torch.constant.int 0
    %9992 = torch.prim.ListConstruct %int1_7570, %int0_7571 : (!torch.int, !torch.int) -> !torch.list<int>
    %9993 = torch.aten.permute %316, %9992 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7572 = torch.constant.int 1
    %int0_7573 = torch.constant.int 0
    %9994 = torch.prim.ListConstruct %int1_7572, %int0_7573 : (!torch.int, !torch.int) -> !torch.list<int>
    %9995 = torch.aten.permute %317, %9994 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7574 = torch.constant.int 1
    %int0_7575 = torch.constant.int 0
    %9996 = torch.prim.ListConstruct %int1_7574, %int0_7575 : (!torch.int, !torch.int) -> !torch.list<int>
    %9997 = torch.aten.permute %318, %9996 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7576 = torch.constant.int 1
    %int0_7577 = torch.constant.int 0
    %9998 = torch.prim.ListConstruct %int1_7576, %int0_7577 : (!torch.int, !torch.int) -> !torch.list<int>
    %9999 = torch.aten.permute %319, %9998 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_7578 = torch.constant.int 4
    %10000 = torch.aten.mul.int %int4_7578, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7579 = torch.constant.int 4096
    %10001 = torch.prim.ListConstruct %10000, %int4096_7579 : (!torch.int, !torch.int) -> !torch.list<int>
    %10002 = torch.aten.view %9912, %10001 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10002, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10003 = torch.aten.mm %10002, %9985 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10003, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7580 = torch.constant.int 4
    %int128_7581 = torch.constant.int 128
    %10004 = torch.prim.ListConstruct %int4_7580, %2482, %int128_7581 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10005 = torch.aten.view %10003, %10004 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7582 = torch.constant.int 4
    %10006 = torch.aten.mul.int %int4_7582, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7583 = torch.constant.int 4096
    %10007 = torch.prim.ListConstruct %10006, %int4096_7583 : (!torch.int, !torch.int) -> !torch.list<int>
    %10008 = torch.aten.view %9913, %10007 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10008, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10009 = torch.aten.mm %10008, %9987 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10009, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7584 = torch.constant.int 4
    %int128_7585 = torch.constant.int 128
    %10010 = torch.prim.ListConstruct %int4_7584, %2482, %int128_7585 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10011 = torch.aten.view %10009, %10010 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7586 = torch.constant.int 4
    %10012 = torch.aten.mul.int %int4_7586, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7587 = torch.constant.int 4096
    %10013 = torch.prim.ListConstruct %10012, %int4096_7587 : (!torch.int, !torch.int) -> !torch.list<int>
    %10014 = torch.aten.view %9914, %10013 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10014, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10015 = torch.aten.mm %10014, %9989 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10015, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7588 = torch.constant.int 4
    %int128_7589 = torch.constant.int 128
    %10016 = torch.prim.ListConstruct %int4_7588, %2482, %int128_7589 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10017 = torch.aten.view %10015, %10016 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7590 = torch.constant.int 4
    %10018 = torch.aten.mul.int %int4_7590, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7591 = torch.constant.int 4096
    %10019 = torch.prim.ListConstruct %10018, %int4096_7591 : (!torch.int, !torch.int) -> !torch.list<int>
    %10020 = torch.aten.view %9915, %10019 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10020, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10021 = torch.aten.mm %10020, %9991 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10021, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7592 = torch.constant.int 4
    %int128_7593 = torch.constant.int 128
    %10022 = torch.prim.ListConstruct %int4_7592, %2482, %int128_7593 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10023 = torch.aten.view %10021, %10022 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7594 = torch.constant.int 4
    %10024 = torch.aten.mul.int %int4_7594, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7595 = torch.constant.int 4096
    %10025 = torch.prim.ListConstruct %10024, %int4096_7595 : (!torch.int, !torch.int) -> !torch.list<int>
    %10026 = torch.aten.view %9916, %10025 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10026, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10027 = torch.aten.mm %10026, %9993 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10027, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7596 = torch.constant.int 4
    %int128_7597 = torch.constant.int 128
    %10028 = torch.prim.ListConstruct %int4_7596, %2482, %int128_7597 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10029 = torch.aten.view %10027, %10028 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7598 = torch.constant.int 4
    %10030 = torch.aten.mul.int %int4_7598, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7599 = torch.constant.int 4096
    %10031 = torch.prim.ListConstruct %10030, %int4096_7599 : (!torch.int, !torch.int) -> !torch.list<int>
    %10032 = torch.aten.view %9917, %10031 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10032, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10033 = torch.aten.mm %10032, %9995 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10033, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7600 = torch.constant.int 4
    %int128_7601 = torch.constant.int 128
    %10034 = torch.prim.ListConstruct %int4_7600, %2482, %int128_7601 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10035 = torch.aten.view %10033, %10034 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7602 = torch.constant.int 4
    %10036 = torch.aten.mul.int %int4_7602, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7603 = torch.constant.int 4096
    %10037 = torch.prim.ListConstruct %10036, %int4096_7603 : (!torch.int, !torch.int) -> !torch.list<int>
    %10038 = torch.aten.view %9918, %10037 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10038, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10039 = torch.aten.mm %10038, %9997 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10039, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7604 = torch.constant.int 4
    %int128_7605 = torch.constant.int 128
    %10040 = torch.prim.ListConstruct %int4_7604, %2482, %int128_7605 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10041 = torch.aten.view %10039, %10040 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7606 = torch.constant.int 4
    %10042 = torch.aten.mul.int %int4_7606, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7607 = torch.constant.int 4096
    %10043 = torch.prim.ListConstruct %10042, %int4096_7607 : (!torch.int, !torch.int) -> !torch.list<int>
    %10044 = torch.aten.view %9919, %10043 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10044, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10045 = torch.aten.mm %10044, %9999 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10045, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7608 = torch.constant.int 4
    %int128_7609 = torch.constant.int 128
    %10046 = torch.prim.ListConstruct %int4_7608, %2482, %int128_7609 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10047 = torch.aten.view %10045, %10046 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_7610 = torch.constant.int 1
    %int0_7611 = torch.constant.int 0
    %10048 = torch.prim.ListConstruct %int1_7610, %int0_7611 : (!torch.int, !torch.int) -> !torch.list<int>
    %10049 = torch.aten.permute %320, %10048 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7612 = torch.constant.int 1
    %int0_7613 = torch.constant.int 0
    %10050 = torch.prim.ListConstruct %int1_7612, %int0_7613 : (!torch.int, !torch.int) -> !torch.list<int>
    %10051 = torch.aten.permute %321, %10050 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7614 = torch.constant.int 1
    %int0_7615 = torch.constant.int 0
    %10052 = torch.prim.ListConstruct %int1_7614, %int0_7615 : (!torch.int, !torch.int) -> !torch.list<int>
    %10053 = torch.aten.permute %322, %10052 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7616 = torch.constant.int 1
    %int0_7617 = torch.constant.int 0
    %10054 = torch.prim.ListConstruct %int1_7616, %int0_7617 : (!torch.int, !torch.int) -> !torch.list<int>
    %10055 = torch.aten.permute %323, %10054 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7618 = torch.constant.int 1
    %int0_7619 = torch.constant.int 0
    %10056 = torch.prim.ListConstruct %int1_7618, %int0_7619 : (!torch.int, !torch.int) -> !torch.list<int>
    %10057 = torch.aten.permute %324, %10056 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7620 = torch.constant.int 1
    %int0_7621 = torch.constant.int 0
    %10058 = torch.prim.ListConstruct %int1_7620, %int0_7621 : (!torch.int, !torch.int) -> !torch.list<int>
    %10059 = torch.aten.permute %325, %10058 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7622 = torch.constant.int 1
    %int0_7623 = torch.constant.int 0
    %10060 = torch.prim.ListConstruct %int1_7622, %int0_7623 : (!torch.int, !torch.int) -> !torch.list<int>
    %10061 = torch.aten.permute %326, %10060 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_7624 = torch.constant.int 1
    %int0_7625 = torch.constant.int 0
    %10062 = torch.prim.ListConstruct %int1_7624, %int0_7625 : (!torch.int, !torch.int) -> !torch.list<int>
    %10063 = torch.aten.permute %327, %10062 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_7626 = torch.constant.int 4
    %10064 = torch.aten.mul.int %int4_7626, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7627 = torch.constant.int 4096
    %10065 = torch.prim.ListConstruct %10064, %int4096_7627 : (!torch.int, !torch.int) -> !torch.list<int>
    %10066 = torch.aten.view %9912, %10065 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10066, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10067 = torch.aten.mm %10066, %10049 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10067, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7628 = torch.constant.int 4
    %int128_7629 = torch.constant.int 128
    %10068 = torch.prim.ListConstruct %int4_7628, %2482, %int128_7629 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10069 = torch.aten.view %10067, %10068 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7630 = torch.constant.int 4
    %10070 = torch.aten.mul.int %int4_7630, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7631 = torch.constant.int 4096
    %10071 = torch.prim.ListConstruct %10070, %int4096_7631 : (!torch.int, !torch.int) -> !torch.list<int>
    %10072 = torch.aten.view %9913, %10071 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10072, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10073 = torch.aten.mm %10072, %10051 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10073, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7632 = torch.constant.int 4
    %int128_7633 = torch.constant.int 128
    %10074 = torch.prim.ListConstruct %int4_7632, %2482, %int128_7633 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10075 = torch.aten.view %10073, %10074 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7634 = torch.constant.int 4
    %10076 = torch.aten.mul.int %int4_7634, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7635 = torch.constant.int 4096
    %10077 = torch.prim.ListConstruct %10076, %int4096_7635 : (!torch.int, !torch.int) -> !torch.list<int>
    %10078 = torch.aten.view %9914, %10077 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10078, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10079 = torch.aten.mm %10078, %10053 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10079, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7636 = torch.constant.int 4
    %int128_7637 = torch.constant.int 128
    %10080 = torch.prim.ListConstruct %int4_7636, %2482, %int128_7637 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10081 = torch.aten.view %10079, %10080 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7638 = torch.constant.int 4
    %10082 = torch.aten.mul.int %int4_7638, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7639 = torch.constant.int 4096
    %10083 = torch.prim.ListConstruct %10082, %int4096_7639 : (!torch.int, !torch.int) -> !torch.list<int>
    %10084 = torch.aten.view %9915, %10083 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10084, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10085 = torch.aten.mm %10084, %10055 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10085, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7640 = torch.constant.int 4
    %int128_7641 = torch.constant.int 128
    %10086 = torch.prim.ListConstruct %int4_7640, %2482, %int128_7641 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10087 = torch.aten.view %10085, %10086 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7642 = torch.constant.int 4
    %10088 = torch.aten.mul.int %int4_7642, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7643 = torch.constant.int 4096
    %10089 = torch.prim.ListConstruct %10088, %int4096_7643 : (!torch.int, !torch.int) -> !torch.list<int>
    %10090 = torch.aten.view %9916, %10089 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10090, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10091 = torch.aten.mm %10090, %10057 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10091, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7644 = torch.constant.int 4
    %int128_7645 = torch.constant.int 128
    %10092 = torch.prim.ListConstruct %int4_7644, %2482, %int128_7645 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10093 = torch.aten.view %10091, %10092 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7646 = torch.constant.int 4
    %10094 = torch.aten.mul.int %int4_7646, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7647 = torch.constant.int 4096
    %10095 = torch.prim.ListConstruct %10094, %int4096_7647 : (!torch.int, !torch.int) -> !torch.list<int>
    %10096 = torch.aten.view %9917, %10095 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10096, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10097 = torch.aten.mm %10096, %10059 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10097, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7648 = torch.constant.int 4
    %int128_7649 = torch.constant.int 128
    %10098 = torch.prim.ListConstruct %int4_7648, %2482, %int128_7649 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10099 = torch.aten.view %10097, %10098 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7650 = torch.constant.int 4
    %10100 = torch.aten.mul.int %int4_7650, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7651 = torch.constant.int 4096
    %10101 = torch.prim.ListConstruct %10100, %int4096_7651 : (!torch.int, !torch.int) -> !torch.list<int>
    %10102 = torch.aten.view %9918, %10101 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10102, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10103 = torch.aten.mm %10102, %10061 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10103, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7652 = torch.constant.int 4
    %int128_7653 = torch.constant.int 128
    %10104 = torch.prim.ListConstruct %int4_7652, %2482, %int128_7653 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10105 = torch.aten.view %10103, %10104 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7654 = torch.constant.int 4
    %10106 = torch.aten.mul.int %int4_7654, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_7655 = torch.constant.int 4096
    %10107 = torch.prim.ListConstruct %10106, %int4096_7655 : (!torch.int, !torch.int) -> !torch.list<int>
    %10108 = torch.aten.view %9919, %10107 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10108, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %10109 = torch.aten.mm %10108, %10063 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %10109, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_7656 = torch.constant.int 4
    %int128_7657 = torch.constant.int 128
    %10110 = torch.prim.ListConstruct %int4_7656, %2482, %int128_7657 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10111 = torch.aten.view %10109, %10110 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %10111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_7658 = torch.constant.int 4
    %int4_7659 = torch.constant.int 4
    %int128_7660 = torch.constant.int 128
    %10112 = torch.prim.ListConstruct %int4_7658, %2482, %int4_7659, %int128_7660 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10113 = torch.aten.view %9941, %10112 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7661 = torch.constant.int 4
    %int4_7662 = torch.constant.int 4
    %int128_7663 = torch.constant.int 128
    %10114 = torch.prim.ListConstruct %int4_7661, %2482, %int4_7662, %int128_7663 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10115 = torch.aten.view %9947, %10114 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7664 = torch.constant.int 4
    %int4_7665 = torch.constant.int 4
    %int128_7666 = torch.constant.int 128
    %10116 = torch.prim.ListConstruct %int4_7664, %2482, %int4_7665, %int128_7666 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10117 = torch.aten.view %9953, %10116 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7667 = torch.constant.int 4
    %int4_7668 = torch.constant.int 4
    %int128_7669 = torch.constant.int 128
    %10118 = torch.prim.ListConstruct %int4_7667, %2482, %int4_7668, %int128_7669 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10119 = torch.aten.view %9959, %10118 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7670 = torch.constant.int 4
    %int4_7671 = torch.constant.int 4
    %int128_7672 = torch.constant.int 128
    %10120 = torch.prim.ListConstruct %int4_7670, %2482, %int4_7671, %int128_7672 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10121 = torch.aten.view %9965, %10120 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7673 = torch.constant.int 4
    %int4_7674 = torch.constant.int 4
    %int128_7675 = torch.constant.int 128
    %10122 = torch.prim.ListConstruct %int4_7673, %2482, %int4_7674, %int128_7675 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10123 = torch.aten.view %9971, %10122 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7676 = torch.constant.int 4
    %int4_7677 = torch.constant.int 4
    %int128_7678 = torch.constant.int 128
    %10124 = torch.prim.ListConstruct %int4_7676, %2482, %int4_7677, %int128_7678 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10125 = torch.aten.view %9977, %10124 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7679 = torch.constant.int 4
    %int4_7680 = torch.constant.int 4
    %int128_7681 = torch.constant.int 128
    %10126 = torch.prim.ListConstruct %int4_7679, %2482, %int4_7680, %int128_7681 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10127 = torch.aten.view %9983, %10126 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_7682 = torch.constant.int 4
    %int1_7683 = torch.constant.int 1
    %int128_7684 = torch.constant.int 128
    %10128 = torch.prim.ListConstruct %int4_7682, %2482, %int1_7683, %int128_7684 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10129 = torch.aten.view %10005, %10128 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7685 = torch.constant.int 4
    %int1_7686 = torch.constant.int 1
    %int128_7687 = torch.constant.int 128
    %10130 = torch.prim.ListConstruct %int4_7685, %2482, %int1_7686, %int128_7687 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10131 = torch.aten.view %10011, %10130 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7688 = torch.constant.int 4
    %int1_7689 = torch.constant.int 1
    %int128_7690 = torch.constant.int 128
    %10132 = torch.prim.ListConstruct %int4_7688, %2482, %int1_7689, %int128_7690 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10133 = torch.aten.view %10017, %10132 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7691 = torch.constant.int 4
    %int1_7692 = torch.constant.int 1
    %int128_7693 = torch.constant.int 128
    %10134 = torch.prim.ListConstruct %int4_7691, %2482, %int1_7692, %int128_7693 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10135 = torch.aten.view %10023, %10134 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7694 = torch.constant.int 4
    %int1_7695 = torch.constant.int 1
    %int128_7696 = torch.constant.int 128
    %10136 = torch.prim.ListConstruct %int4_7694, %2482, %int1_7695, %int128_7696 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10137 = torch.aten.view %10029, %10136 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7697 = torch.constant.int 4
    %int1_7698 = torch.constant.int 1
    %int128_7699 = torch.constant.int 128
    %10138 = torch.prim.ListConstruct %int4_7697, %2482, %int1_7698, %int128_7699 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10139 = torch.aten.view %10035, %10138 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7700 = torch.constant.int 4
    %int1_7701 = torch.constant.int 1
    %int128_7702 = torch.constant.int 128
    %10140 = torch.prim.ListConstruct %int4_7700, %2482, %int1_7701, %int128_7702 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10141 = torch.aten.view %10041, %10140 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7703 = torch.constant.int 4
    %int1_7704 = torch.constant.int 1
    %int128_7705 = torch.constant.int 128
    %10142 = torch.prim.ListConstruct %int4_7703, %2482, %int1_7704, %int128_7705 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10143 = torch.aten.view %10047, %10142 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7706 = torch.constant.int 4
    %int1_7707 = torch.constant.int 1
    %int128_7708 = torch.constant.int 128
    %10144 = torch.prim.ListConstruct %int4_7706, %2482, %int1_7707, %int128_7708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10145 = torch.aten.view %10069, %10144 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7709 = torch.constant.int 4
    %int1_7710 = torch.constant.int 1
    %int128_7711 = torch.constant.int 128
    %10146 = torch.prim.ListConstruct %int4_7709, %2482, %int1_7710, %int128_7711 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10147 = torch.aten.view %10075, %10146 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7712 = torch.constant.int 4
    %int1_7713 = torch.constant.int 1
    %int128_7714 = torch.constant.int 128
    %10148 = torch.prim.ListConstruct %int4_7712, %2482, %int1_7713, %int128_7714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10149 = torch.aten.view %10081, %10148 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7715 = torch.constant.int 4
    %int1_7716 = torch.constant.int 1
    %int128_7717 = torch.constant.int 128
    %10150 = torch.prim.ListConstruct %int4_7715, %2482, %int1_7716, %int128_7717 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10151 = torch.aten.view %10087, %10150 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7718 = torch.constant.int 4
    %int1_7719 = torch.constant.int 1
    %int128_7720 = torch.constant.int 128
    %10152 = torch.prim.ListConstruct %int4_7718, %2482, %int1_7719, %int128_7720 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10153 = torch.aten.view %10093, %10152 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7721 = torch.constant.int 4
    %int1_7722 = torch.constant.int 1
    %int128_7723 = torch.constant.int 128
    %10154 = torch.prim.ListConstruct %int4_7721, %2482, %int1_7722, %int128_7723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10155 = torch.aten.view %10099, %10154 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7724 = torch.constant.int 4
    %int1_7725 = torch.constant.int 1
    %int128_7726 = torch.constant.int 128
    %10156 = torch.prim.ListConstruct %int4_7724, %2482, %int1_7725, %int128_7726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10157 = torch.aten.view %10105, %10156 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_7727 = torch.constant.int 4
    %int1_7728 = torch.constant.int 1
    %int128_7729 = torch.constant.int 128
    %10158 = torch.prim.ListConstruct %int4_7727, %2482, %int1_7728, %int128_7729 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10159 = torch.aten.view %10111, %10158 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_7730 = torch.constant.int 131072
    %none_7731 = torch.constant.none
    %none_7732 = torch.constant.none
    %cpu_7733 = torch.constant.device "cpu"
    %false_7734 = torch.constant.bool false
    %10160 = torch.aten.arange %int131072_7730, %none_7731, %none_7732, %cpu_7733, %false_7734 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7735 = torch.constant.int 0
    %int128_7736 = torch.constant.int 128
    %int2_7737 = torch.constant.int 2
    %none_7738 = torch.constant.none
    %none_7739 = torch.constant.none
    %cpu_7740 = torch.constant.device "cpu"
    %false_7741 = torch.constant.bool false
    %10161 = torch.aten.arange.start_step %int0_7735, %int128_7736, %int2_7737, %none_7738, %none_7739, %cpu_7740, %false_7741 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7742 = torch.constant.int 0
    %int0_7743 = torch.constant.int 0
    %int64_7744 = torch.constant.int 64
    %int1_7745 = torch.constant.int 1
    %10162 = torch.aten.slice.Tensor %10161, %int0_7742, %int0_7743, %int64_7744, %int1_7745 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7746 = torch.constant.int 6
    %10163 = torch.prims.convert_element_type %10162, %int6_7746 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7747 = torch.constant.int 128
    %10164 = torch.aten.div.Scalar %10163, %int128_7747 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7748 = torch.constant.float 5.000000e+05
    %10165 = torch.aten.pow.Scalar %float5.000000e05_7748, %10164 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %10166 = torch.aten.reciprocal %10165 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7749 = torch.constant.float 1.000000e+00
    %10167 = torch.aten.mul.Scalar %10166, %float1.000000e00_7749 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7750 = torch.constant.int 131072
    %int1_7751 = torch.constant.int 1
    %10168 = torch.prim.ListConstruct %int131072_7750, %int1_7751 : (!torch.int, !torch.int) -> !torch.list<int>
    %10169 = torch.aten.view %10160, %10168 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %10170 = torch.aten.mul.Tensor %10169, %10167 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %10171 = torch.aten.cos %10170 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %10172 = torch.aten.sin %10170 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %10173 = torch.aten.complex %10171, %10172 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %10174 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10175 = flow.tensor.transfer %10174 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %10176 = torch_c.from_builtin_tensor %10175 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10177 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10178 = flow.tensor.transfer %10177 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %10179 = torch_c.from_builtin_tensor %10178 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10180 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10181 = flow.tensor.transfer %10180 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %10182 = torch_c.from_builtin_tensor %10181 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10183 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10184 = flow.tensor.transfer %10183 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %10185 = torch_c.from_builtin_tensor %10184 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10186 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10187 = flow.tensor.transfer %10186 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %10188 = torch_c.from_builtin_tensor %10187 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10189 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10190 = flow.tensor.transfer %10189 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %10191 = torch_c.from_builtin_tensor %10190 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10192 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10193 = flow.tensor.transfer %10192 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %10194 = torch_c.from_builtin_tensor %10193 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10195 = torch_c.to_builtin_tensor %10173 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10196 = flow.tensor.transfer %10195 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %10197 = torch_c.from_builtin_tensor %10196 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7752 = torch.constant.int 1
    %10198 = torch.aten.size.int %9941, %int1_7752 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7753 = torch.constant.int 0
    %10199 = torch.aten.add.int %int0_7753, %10198 : !torch.int, !torch.int -> !torch.int
    %int0_7754 = torch.constant.int 0
    %int0_7755 = torch.constant.int 0
    %int1_7756 = torch.constant.int 1
    %10200 = torch.aten.slice.Tensor %10176, %int0_7754, %int0_7755, %10199, %int1_7756 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10200, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7757 = torch.constant.int 1
    %int0_7758 = torch.constant.int 0
    %int9223372036854775807_7759 = torch.constant.int 9223372036854775807
    %int1_7760 = torch.constant.int 1
    %10201 = torch.aten.slice.Tensor %10200, %int1_7757, %int0_7758, %int9223372036854775807_7759, %int1_7760 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10201, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7761 = torch.constant.int 0
    %10202 = torch.aten.unsqueeze %10201, %int0_7761 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10202, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7762 = torch.constant.int 2
    %10203 = torch.aten.unsqueeze %10202, %int2_7762 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10203, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7763 = torch.constant.int 3
    %int0_7764 = torch.constant.int 0
    %int9223372036854775807_7765 = torch.constant.int 9223372036854775807
    %int1_7766 = torch.constant.int 1
    %10204 = torch.aten.slice.Tensor %10203, %int3_7763, %int0_7764, %int9223372036854775807_7765, %int1_7766 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10204, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10205 = torch_c.to_builtin_tensor %10113 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7767 = arith.constant 1 : index
    %dim_7768 = tensor.dim %10205, %c1_7767 : tensor<4x?x4x128xf16>
    %10206 = flow.tensor.bitcast %10205 : tensor<4x?x4x128xf16>{%dim_7768} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7768}
    %10207 = torch_c.from_builtin_tensor %10206 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10208 = torch.aten.mul.Tensor %10207, %10204 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10209 = torch_c.to_builtin_tensor %10208 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7769 = arith.constant 1 : index
    %dim_7770 = tensor.dim %10209, %c1_7769 : tensor<4x?x4x64xcomplex<f32>>
    %10210 = flow.tensor.bitcast %10209 : tensor<4x?x4x64xcomplex<f32>>{%dim_7770} -> tensor<4x?x4x128xf32>{%dim_7770}
    %10211 = torch_c.from_builtin_tensor %10210 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7771 = torch.constant.int 5
    %10212 = torch.prims.convert_element_type %10211, %int5_7771 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7772 = torch.constant.int 1
    %10213 = torch.aten.size.int %9947, %int1_7772 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7773 = torch.constant.int 0
    %10214 = torch.aten.add.int %int0_7773, %10213 : !torch.int, !torch.int -> !torch.int
    %int0_7774 = torch.constant.int 0
    %int0_7775 = torch.constant.int 0
    %int1_7776 = torch.constant.int 1
    %10215 = torch.aten.slice.Tensor %10179, %int0_7774, %int0_7775, %10214, %int1_7776 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10215, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7777 = torch.constant.int 1
    %int0_7778 = torch.constant.int 0
    %int9223372036854775807_7779 = torch.constant.int 9223372036854775807
    %int1_7780 = torch.constant.int 1
    %10216 = torch.aten.slice.Tensor %10215, %int1_7777, %int0_7778, %int9223372036854775807_7779, %int1_7780 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10216, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7781 = torch.constant.int 0
    %10217 = torch.aten.unsqueeze %10216, %int0_7781 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10217, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7782 = torch.constant.int 2
    %10218 = torch.aten.unsqueeze %10217, %int2_7782 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10218, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7783 = torch.constant.int 3
    %int0_7784 = torch.constant.int 0
    %int9223372036854775807_7785 = torch.constant.int 9223372036854775807
    %int1_7786 = torch.constant.int 1
    %10219 = torch.aten.slice.Tensor %10218, %int3_7783, %int0_7784, %int9223372036854775807_7785, %int1_7786 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10219, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10220 = torch_c.to_builtin_tensor %10115 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7787 = arith.constant 1 : index
    %dim_7788 = tensor.dim %10220, %c1_7787 : tensor<4x?x4x128xf16>
    %10221 = flow.tensor.bitcast %10220 : tensor<4x?x4x128xf16>{%dim_7788} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7788}
    %10222 = torch_c.from_builtin_tensor %10221 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10223 = torch.aten.mul.Tensor %10222, %10219 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10224 = torch_c.to_builtin_tensor %10223 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7789 = arith.constant 1 : index
    %dim_7790 = tensor.dim %10224, %c1_7789 : tensor<4x?x4x64xcomplex<f32>>
    %10225 = flow.tensor.bitcast %10224 : tensor<4x?x4x64xcomplex<f32>>{%dim_7790} -> tensor<4x?x4x128xf32>{%dim_7790}
    %10226 = torch_c.from_builtin_tensor %10225 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7791 = torch.constant.int 5
    %10227 = torch.prims.convert_element_type %10226, %int5_7791 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7792 = torch.constant.int 1
    %10228 = torch.aten.size.int %9953, %int1_7792 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7793 = torch.constant.int 0
    %10229 = torch.aten.add.int %int0_7793, %10228 : !torch.int, !torch.int -> !torch.int
    %int0_7794 = torch.constant.int 0
    %int0_7795 = torch.constant.int 0
    %int1_7796 = torch.constant.int 1
    %10230 = torch.aten.slice.Tensor %10182, %int0_7794, %int0_7795, %10229, %int1_7796 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10230, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7797 = torch.constant.int 1
    %int0_7798 = torch.constant.int 0
    %int9223372036854775807_7799 = torch.constant.int 9223372036854775807
    %int1_7800 = torch.constant.int 1
    %10231 = torch.aten.slice.Tensor %10230, %int1_7797, %int0_7798, %int9223372036854775807_7799, %int1_7800 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10231, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7801 = torch.constant.int 0
    %10232 = torch.aten.unsqueeze %10231, %int0_7801 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10232, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7802 = torch.constant.int 2
    %10233 = torch.aten.unsqueeze %10232, %int2_7802 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10233, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7803 = torch.constant.int 3
    %int0_7804 = torch.constant.int 0
    %int9223372036854775807_7805 = torch.constant.int 9223372036854775807
    %int1_7806 = torch.constant.int 1
    %10234 = torch.aten.slice.Tensor %10233, %int3_7803, %int0_7804, %int9223372036854775807_7805, %int1_7806 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10234, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10235 = torch_c.to_builtin_tensor %10117 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7807 = arith.constant 1 : index
    %dim_7808 = tensor.dim %10235, %c1_7807 : tensor<4x?x4x128xf16>
    %10236 = flow.tensor.bitcast %10235 : tensor<4x?x4x128xf16>{%dim_7808} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7808}
    %10237 = torch_c.from_builtin_tensor %10236 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10238 = torch.aten.mul.Tensor %10237, %10234 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10239 = torch_c.to_builtin_tensor %10238 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7809 = arith.constant 1 : index
    %dim_7810 = tensor.dim %10239, %c1_7809 : tensor<4x?x4x64xcomplex<f32>>
    %10240 = flow.tensor.bitcast %10239 : tensor<4x?x4x64xcomplex<f32>>{%dim_7810} -> tensor<4x?x4x128xf32>{%dim_7810}
    %10241 = torch_c.from_builtin_tensor %10240 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7811 = torch.constant.int 5
    %10242 = torch.prims.convert_element_type %10241, %int5_7811 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7812 = torch.constant.int 1
    %10243 = torch.aten.size.int %9959, %int1_7812 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7813 = torch.constant.int 0
    %10244 = torch.aten.add.int %int0_7813, %10243 : !torch.int, !torch.int -> !torch.int
    %int0_7814 = torch.constant.int 0
    %int0_7815 = torch.constant.int 0
    %int1_7816 = torch.constant.int 1
    %10245 = torch.aten.slice.Tensor %10185, %int0_7814, %int0_7815, %10244, %int1_7816 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10245, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7817 = torch.constant.int 1
    %int0_7818 = torch.constant.int 0
    %int9223372036854775807_7819 = torch.constant.int 9223372036854775807
    %int1_7820 = torch.constant.int 1
    %10246 = torch.aten.slice.Tensor %10245, %int1_7817, %int0_7818, %int9223372036854775807_7819, %int1_7820 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10246, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7821 = torch.constant.int 0
    %10247 = torch.aten.unsqueeze %10246, %int0_7821 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10247, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7822 = torch.constant.int 2
    %10248 = torch.aten.unsqueeze %10247, %int2_7822 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10248, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7823 = torch.constant.int 3
    %int0_7824 = torch.constant.int 0
    %int9223372036854775807_7825 = torch.constant.int 9223372036854775807
    %int1_7826 = torch.constant.int 1
    %10249 = torch.aten.slice.Tensor %10248, %int3_7823, %int0_7824, %int9223372036854775807_7825, %int1_7826 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10249, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10250 = torch_c.to_builtin_tensor %10119 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7827 = arith.constant 1 : index
    %dim_7828 = tensor.dim %10250, %c1_7827 : tensor<4x?x4x128xf16>
    %10251 = flow.tensor.bitcast %10250 : tensor<4x?x4x128xf16>{%dim_7828} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7828}
    %10252 = torch_c.from_builtin_tensor %10251 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10253 = torch.aten.mul.Tensor %10252, %10249 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10254 = torch_c.to_builtin_tensor %10253 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7829 = arith.constant 1 : index
    %dim_7830 = tensor.dim %10254, %c1_7829 : tensor<4x?x4x64xcomplex<f32>>
    %10255 = flow.tensor.bitcast %10254 : tensor<4x?x4x64xcomplex<f32>>{%dim_7830} -> tensor<4x?x4x128xf32>{%dim_7830}
    %10256 = torch_c.from_builtin_tensor %10255 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7831 = torch.constant.int 5
    %10257 = torch.prims.convert_element_type %10256, %int5_7831 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7832 = torch.constant.int 1
    %10258 = torch.aten.size.int %9965, %int1_7832 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7833 = torch.constant.int 0
    %10259 = torch.aten.add.int %int0_7833, %10258 : !torch.int, !torch.int -> !torch.int
    %int0_7834 = torch.constant.int 0
    %int0_7835 = torch.constant.int 0
    %int1_7836 = torch.constant.int 1
    %10260 = torch.aten.slice.Tensor %10188, %int0_7834, %int0_7835, %10259, %int1_7836 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10260, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7837 = torch.constant.int 1
    %int0_7838 = torch.constant.int 0
    %int9223372036854775807_7839 = torch.constant.int 9223372036854775807
    %int1_7840 = torch.constant.int 1
    %10261 = torch.aten.slice.Tensor %10260, %int1_7837, %int0_7838, %int9223372036854775807_7839, %int1_7840 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10261, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7841 = torch.constant.int 0
    %10262 = torch.aten.unsqueeze %10261, %int0_7841 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10262, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7842 = torch.constant.int 2
    %10263 = torch.aten.unsqueeze %10262, %int2_7842 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10263, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7843 = torch.constant.int 3
    %int0_7844 = torch.constant.int 0
    %int9223372036854775807_7845 = torch.constant.int 9223372036854775807
    %int1_7846 = torch.constant.int 1
    %10264 = torch.aten.slice.Tensor %10263, %int3_7843, %int0_7844, %int9223372036854775807_7845, %int1_7846 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10264, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10265 = torch_c.to_builtin_tensor %10121 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7847 = arith.constant 1 : index
    %dim_7848 = tensor.dim %10265, %c1_7847 : tensor<4x?x4x128xf16>
    %10266 = flow.tensor.bitcast %10265 : tensor<4x?x4x128xf16>{%dim_7848} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7848}
    %10267 = torch_c.from_builtin_tensor %10266 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10268 = torch.aten.mul.Tensor %10267, %10264 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10269 = torch_c.to_builtin_tensor %10268 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7849 = arith.constant 1 : index
    %dim_7850 = tensor.dim %10269, %c1_7849 : tensor<4x?x4x64xcomplex<f32>>
    %10270 = flow.tensor.bitcast %10269 : tensor<4x?x4x64xcomplex<f32>>{%dim_7850} -> tensor<4x?x4x128xf32>{%dim_7850}
    %10271 = torch_c.from_builtin_tensor %10270 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7851 = torch.constant.int 5
    %10272 = torch.prims.convert_element_type %10271, %int5_7851 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7852 = torch.constant.int 1
    %10273 = torch.aten.size.int %9971, %int1_7852 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7853 = torch.constant.int 0
    %10274 = torch.aten.add.int %int0_7853, %10273 : !torch.int, !torch.int -> !torch.int
    %int0_7854 = torch.constant.int 0
    %int0_7855 = torch.constant.int 0
    %int1_7856 = torch.constant.int 1
    %10275 = torch.aten.slice.Tensor %10191, %int0_7854, %int0_7855, %10274, %int1_7856 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10275, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7857 = torch.constant.int 1
    %int0_7858 = torch.constant.int 0
    %int9223372036854775807_7859 = torch.constant.int 9223372036854775807
    %int1_7860 = torch.constant.int 1
    %10276 = torch.aten.slice.Tensor %10275, %int1_7857, %int0_7858, %int9223372036854775807_7859, %int1_7860 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10276, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7861 = torch.constant.int 0
    %10277 = torch.aten.unsqueeze %10276, %int0_7861 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10277, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7862 = torch.constant.int 2
    %10278 = torch.aten.unsqueeze %10277, %int2_7862 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10278, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7863 = torch.constant.int 3
    %int0_7864 = torch.constant.int 0
    %int9223372036854775807_7865 = torch.constant.int 9223372036854775807
    %int1_7866 = torch.constant.int 1
    %10279 = torch.aten.slice.Tensor %10278, %int3_7863, %int0_7864, %int9223372036854775807_7865, %int1_7866 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10279, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10280 = torch_c.to_builtin_tensor %10123 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7867 = arith.constant 1 : index
    %dim_7868 = tensor.dim %10280, %c1_7867 : tensor<4x?x4x128xf16>
    %10281 = flow.tensor.bitcast %10280 : tensor<4x?x4x128xf16>{%dim_7868} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7868}
    %10282 = torch_c.from_builtin_tensor %10281 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10283 = torch.aten.mul.Tensor %10282, %10279 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10284 = torch_c.to_builtin_tensor %10283 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7869 = arith.constant 1 : index
    %dim_7870 = tensor.dim %10284, %c1_7869 : tensor<4x?x4x64xcomplex<f32>>
    %10285 = flow.tensor.bitcast %10284 : tensor<4x?x4x64xcomplex<f32>>{%dim_7870} -> tensor<4x?x4x128xf32>{%dim_7870}
    %10286 = torch_c.from_builtin_tensor %10285 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7871 = torch.constant.int 5
    %10287 = torch.prims.convert_element_type %10286, %int5_7871 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7872 = torch.constant.int 1
    %10288 = torch.aten.size.int %9977, %int1_7872 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7873 = torch.constant.int 0
    %10289 = torch.aten.add.int %int0_7873, %10288 : !torch.int, !torch.int -> !torch.int
    %int0_7874 = torch.constant.int 0
    %int0_7875 = torch.constant.int 0
    %int1_7876 = torch.constant.int 1
    %10290 = torch.aten.slice.Tensor %10194, %int0_7874, %int0_7875, %10289, %int1_7876 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10290, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7877 = torch.constant.int 1
    %int0_7878 = torch.constant.int 0
    %int9223372036854775807_7879 = torch.constant.int 9223372036854775807
    %int1_7880 = torch.constant.int 1
    %10291 = torch.aten.slice.Tensor %10290, %int1_7877, %int0_7878, %int9223372036854775807_7879, %int1_7880 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10291, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7881 = torch.constant.int 0
    %10292 = torch.aten.unsqueeze %10291, %int0_7881 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10292, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7882 = torch.constant.int 2
    %10293 = torch.aten.unsqueeze %10292, %int2_7882 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10293, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7883 = torch.constant.int 3
    %int0_7884 = torch.constant.int 0
    %int9223372036854775807_7885 = torch.constant.int 9223372036854775807
    %int1_7886 = torch.constant.int 1
    %10294 = torch.aten.slice.Tensor %10293, %int3_7883, %int0_7884, %int9223372036854775807_7885, %int1_7886 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10294, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10295 = torch_c.to_builtin_tensor %10125 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7887 = arith.constant 1 : index
    %dim_7888 = tensor.dim %10295, %c1_7887 : tensor<4x?x4x128xf16>
    %10296 = flow.tensor.bitcast %10295 : tensor<4x?x4x128xf16>{%dim_7888} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7888}
    %10297 = torch_c.from_builtin_tensor %10296 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10298 = torch.aten.mul.Tensor %10297, %10294 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10299 = torch_c.to_builtin_tensor %10298 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7889 = arith.constant 1 : index
    %dim_7890 = tensor.dim %10299, %c1_7889 : tensor<4x?x4x64xcomplex<f32>>
    %10300 = flow.tensor.bitcast %10299 : tensor<4x?x4x64xcomplex<f32>>{%dim_7890} -> tensor<4x?x4x128xf32>{%dim_7890}
    %10301 = torch_c.from_builtin_tensor %10300 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7891 = torch.constant.int 5
    %10302 = torch.prims.convert_element_type %10301, %int5_7891 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_7892 = torch.constant.int 1
    %10303 = torch.aten.size.int %9983, %int1_7892 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_7893 = torch.constant.int 0
    %10304 = torch.aten.add.int %int0_7893, %10303 : !torch.int, !torch.int -> !torch.int
    %int0_7894 = torch.constant.int 0
    %int0_7895 = torch.constant.int 0
    %int1_7896 = torch.constant.int 1
    %10305 = torch.aten.slice.Tensor %10197, %int0_7894, %int0_7895, %10304, %int1_7896 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10305, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7897 = torch.constant.int 1
    %int0_7898 = torch.constant.int 0
    %int9223372036854775807_7899 = torch.constant.int 9223372036854775807
    %int1_7900 = torch.constant.int 1
    %10306 = torch.aten.slice.Tensor %10305, %int1_7897, %int0_7898, %int9223372036854775807_7899, %int1_7900 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10306, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7901 = torch.constant.int 0
    %10307 = torch.aten.unsqueeze %10306, %int0_7901 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10307, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7902 = torch.constant.int 2
    %10308 = torch.aten.unsqueeze %10307, %int2_7902 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10308, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7903 = torch.constant.int 3
    %int0_7904 = torch.constant.int 0
    %int9223372036854775807_7905 = torch.constant.int 9223372036854775807
    %int1_7906 = torch.constant.int 1
    %10309 = torch.aten.slice.Tensor %10308, %int3_7903, %int0_7904, %int9223372036854775807_7905, %int1_7906 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10309, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10310 = torch_c.to_builtin_tensor %10127 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_7907 = arith.constant 1 : index
    %dim_7908 = tensor.dim %10310, %c1_7907 : tensor<4x?x4x128xf16>
    %10311 = flow.tensor.bitcast %10310 : tensor<4x?x4x128xf16>{%dim_7908} -> tensor<4x?x4x64xcomplex<f16>>{%dim_7908}
    %10312 = torch_c.from_builtin_tensor %10311 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %10312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %10313 = torch.aten.mul.Tensor %10312, %10309 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %10313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %10314 = torch_c.to_builtin_tensor %10313 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_7909 = arith.constant 1 : index
    %dim_7910 = tensor.dim %10314, %c1_7909 : tensor<4x?x4x64xcomplex<f32>>
    %10315 = flow.tensor.bitcast %10314 : tensor<4x?x4x64xcomplex<f32>>{%dim_7910} -> tensor<4x?x4x128xf32>{%dim_7910}
    %10316 = torch_c.from_builtin_tensor %10315 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %10316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_7911 = torch.constant.int 5
    %10317 = torch.prims.convert_element_type %10316, %int5_7911 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_7912 = torch.constant.int 131072
    %none_7913 = torch.constant.none
    %none_7914 = torch.constant.none
    %cpu_7915 = torch.constant.device "cpu"
    %false_7916 = torch.constant.bool false
    %10318 = torch.aten.arange %int131072_7912, %none_7913, %none_7914, %cpu_7915, %false_7916 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_7917 = torch.constant.int 0
    %int128_7918 = torch.constant.int 128
    %int2_7919 = torch.constant.int 2
    %none_7920 = torch.constant.none
    %none_7921 = torch.constant.none
    %cpu_7922 = torch.constant.device "cpu"
    %false_7923 = torch.constant.bool false
    %10319 = torch.aten.arange.start_step %int0_7917, %int128_7918, %int2_7919, %none_7920, %none_7921, %cpu_7922, %false_7923 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_7924 = torch.constant.int 0
    %int0_7925 = torch.constant.int 0
    %int64_7926 = torch.constant.int 64
    %int1_7927 = torch.constant.int 1
    %10320 = torch.aten.slice.Tensor %10319, %int0_7924, %int0_7925, %int64_7926, %int1_7927 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_7928 = torch.constant.int 6
    %10321 = torch.prims.convert_element_type %10320, %int6_7928 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_7929 = torch.constant.int 128
    %10322 = torch.aten.div.Scalar %10321, %int128_7929 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_7930 = torch.constant.float 5.000000e+05
    %10323 = torch.aten.pow.Scalar %float5.000000e05_7930, %10322 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %10324 = torch.aten.reciprocal %10323 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_7931 = torch.constant.float 1.000000e+00
    %10325 = torch.aten.mul.Scalar %10324, %float1.000000e00_7931 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_7932 = torch.constant.int 131072
    %int1_7933 = torch.constant.int 1
    %10326 = torch.prim.ListConstruct %int131072_7932, %int1_7933 : (!torch.int, !torch.int) -> !torch.list<int>
    %10327 = torch.aten.view %10318, %10326 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %10328 = torch.aten.mul.Tensor %10327, %10325 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %10329 = torch.aten.cos %10328 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %10330 = torch.aten.sin %10328 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %10331 = torch.aten.complex %10329, %10330 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %10332 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10333 = flow.tensor.transfer %10332 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %10334 = torch_c.from_builtin_tensor %10333 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10335 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10336 = flow.tensor.transfer %10335 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %10337 = torch_c.from_builtin_tensor %10336 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10338 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10339 = flow.tensor.transfer %10338 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %10340 = torch_c.from_builtin_tensor %10339 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10341 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10342 = flow.tensor.transfer %10341 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %10343 = torch_c.from_builtin_tensor %10342 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10344 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10345 = flow.tensor.transfer %10344 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %10346 = torch_c.from_builtin_tensor %10345 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10347 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10348 = flow.tensor.transfer %10347 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %10349 = torch_c.from_builtin_tensor %10348 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10350 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10351 = flow.tensor.transfer %10350 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %10352 = torch_c.from_builtin_tensor %10351 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %10353 = torch_c.to_builtin_tensor %10331 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %10354 = flow.tensor.transfer %10353 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %10355 = torch_c.from_builtin_tensor %10354 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_7934 = torch.constant.int 1
    %10356 = torch.aten.size.int %10005, %int1_7934 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_7935 = torch.constant.int 0
    %10357 = torch.aten.add.int %int0_7935, %10356 : !torch.int, !torch.int -> !torch.int
    %int0_7936 = torch.constant.int 0
    %int0_7937 = torch.constant.int 0
    %int1_7938 = torch.constant.int 1
    %10358 = torch.aten.slice.Tensor %10334, %int0_7936, %int0_7937, %10357, %int1_7938 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10358, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7939 = torch.constant.int 1
    %int0_7940 = torch.constant.int 0
    %int9223372036854775807_7941 = torch.constant.int 9223372036854775807
    %int1_7942 = torch.constant.int 1
    %10359 = torch.aten.slice.Tensor %10358, %int1_7939, %int0_7940, %int9223372036854775807_7941, %int1_7942 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10359, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7943 = torch.constant.int 0
    %10360 = torch.aten.unsqueeze %10359, %int0_7943 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10360, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7944 = torch.constant.int 2
    %10361 = torch.aten.unsqueeze %10360, %int2_7944 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10361, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7945 = torch.constant.int 3
    %int0_7946 = torch.constant.int 0
    %int9223372036854775807_7947 = torch.constant.int 9223372036854775807
    %int1_7948 = torch.constant.int 1
    %10362 = torch.aten.slice.Tensor %10361, %int3_7945, %int0_7946, %int9223372036854775807_7947, %int1_7948 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10362, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10363 = torch_c.to_builtin_tensor %10129 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_7949 = arith.constant 1 : index
    %dim_7950 = tensor.dim %10363, %c1_7949 : tensor<4x?x1x128xf16>
    %10364 = flow.tensor.bitcast %10363 : tensor<4x?x1x128xf16>{%dim_7950} -> tensor<4x?x1x64xcomplex<f16>>{%dim_7950}
    %10365 = torch_c.from_builtin_tensor %10364 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10366 = torch.aten.mul.Tensor %10365, %10362 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10367 = torch_c.to_builtin_tensor %10366 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_7951 = arith.constant 1 : index
    %dim_7952 = tensor.dim %10367, %c1_7951 : tensor<4x?x1x64xcomplex<f32>>
    %10368 = flow.tensor.bitcast %10367 : tensor<4x?x1x64xcomplex<f32>>{%dim_7952} -> tensor<4x?x1x128xf32>{%dim_7952}
    %10369 = torch_c.from_builtin_tensor %10368 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_7953 = torch.constant.int 5
    %10370 = torch.prims.convert_element_type %10369, %int5_7953 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_7954 = torch.constant.int 1
    %10371 = torch.aten.size.int %10011, %int1_7954 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_7955 = torch.constant.int 0
    %10372 = torch.aten.add.int %int0_7955, %10371 : !torch.int, !torch.int -> !torch.int
    %int0_7956 = torch.constant.int 0
    %int0_7957 = torch.constant.int 0
    %int1_7958 = torch.constant.int 1
    %10373 = torch.aten.slice.Tensor %10337, %int0_7956, %int0_7957, %10372, %int1_7958 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10373, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7959 = torch.constant.int 1
    %int0_7960 = torch.constant.int 0
    %int9223372036854775807_7961 = torch.constant.int 9223372036854775807
    %int1_7962 = torch.constant.int 1
    %10374 = torch.aten.slice.Tensor %10373, %int1_7959, %int0_7960, %int9223372036854775807_7961, %int1_7962 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10374, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7963 = torch.constant.int 0
    %10375 = torch.aten.unsqueeze %10374, %int0_7963 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10375, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7964 = torch.constant.int 2
    %10376 = torch.aten.unsqueeze %10375, %int2_7964 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10376, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7965 = torch.constant.int 3
    %int0_7966 = torch.constant.int 0
    %int9223372036854775807_7967 = torch.constant.int 9223372036854775807
    %int1_7968 = torch.constant.int 1
    %10377 = torch.aten.slice.Tensor %10376, %int3_7965, %int0_7966, %int9223372036854775807_7967, %int1_7968 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10377, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10378 = torch_c.to_builtin_tensor %10131 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_7969 = arith.constant 1 : index
    %dim_7970 = tensor.dim %10378, %c1_7969 : tensor<4x?x1x128xf16>
    %10379 = flow.tensor.bitcast %10378 : tensor<4x?x1x128xf16>{%dim_7970} -> tensor<4x?x1x64xcomplex<f16>>{%dim_7970}
    %10380 = torch_c.from_builtin_tensor %10379 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10381 = torch.aten.mul.Tensor %10380, %10377 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10382 = torch_c.to_builtin_tensor %10381 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_7971 = arith.constant 1 : index
    %dim_7972 = tensor.dim %10382, %c1_7971 : tensor<4x?x1x64xcomplex<f32>>
    %10383 = flow.tensor.bitcast %10382 : tensor<4x?x1x64xcomplex<f32>>{%dim_7972} -> tensor<4x?x1x128xf32>{%dim_7972}
    %10384 = torch_c.from_builtin_tensor %10383 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_7973 = torch.constant.int 5
    %10385 = torch.prims.convert_element_type %10384, %int5_7973 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_7974 = torch.constant.int 1
    %10386 = torch.aten.size.int %10017, %int1_7974 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_7975 = torch.constant.int 0
    %10387 = torch.aten.add.int %int0_7975, %10386 : !torch.int, !torch.int -> !torch.int
    %int0_7976 = torch.constant.int 0
    %int0_7977 = torch.constant.int 0
    %int1_7978 = torch.constant.int 1
    %10388 = torch.aten.slice.Tensor %10340, %int0_7976, %int0_7977, %10387, %int1_7978 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10388, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7979 = torch.constant.int 1
    %int0_7980 = torch.constant.int 0
    %int9223372036854775807_7981 = torch.constant.int 9223372036854775807
    %int1_7982 = torch.constant.int 1
    %10389 = torch.aten.slice.Tensor %10388, %int1_7979, %int0_7980, %int9223372036854775807_7981, %int1_7982 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10389, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_7983 = torch.constant.int 0
    %10390 = torch.aten.unsqueeze %10389, %int0_7983 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10390, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_7984 = torch.constant.int 2
    %10391 = torch.aten.unsqueeze %10390, %int2_7984 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10391, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_7985 = torch.constant.int 3
    %int0_7986 = torch.constant.int 0
    %int9223372036854775807_7987 = torch.constant.int 9223372036854775807
    %int1_7988 = torch.constant.int 1
    %10392 = torch.aten.slice.Tensor %10391, %int3_7985, %int0_7986, %int9223372036854775807_7987, %int1_7988 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10392, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10393 = torch_c.to_builtin_tensor %10133 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_7989 = arith.constant 1 : index
    %dim_7990 = tensor.dim %10393, %c1_7989 : tensor<4x?x1x128xf16>
    %10394 = flow.tensor.bitcast %10393 : tensor<4x?x1x128xf16>{%dim_7990} -> tensor<4x?x1x64xcomplex<f16>>{%dim_7990}
    %10395 = torch_c.from_builtin_tensor %10394 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10396 = torch.aten.mul.Tensor %10395, %10392 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10397 = torch_c.to_builtin_tensor %10396 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_7991 = arith.constant 1 : index
    %dim_7992 = tensor.dim %10397, %c1_7991 : tensor<4x?x1x64xcomplex<f32>>
    %10398 = flow.tensor.bitcast %10397 : tensor<4x?x1x64xcomplex<f32>>{%dim_7992} -> tensor<4x?x1x128xf32>{%dim_7992}
    %10399 = torch_c.from_builtin_tensor %10398 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_7993 = torch.constant.int 5
    %10400 = torch.prims.convert_element_type %10399, %int5_7993 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_7994 = torch.constant.int 1
    %10401 = torch.aten.size.int %10023, %int1_7994 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_7995 = torch.constant.int 0
    %10402 = torch.aten.add.int %int0_7995, %10401 : !torch.int, !torch.int -> !torch.int
    %int0_7996 = torch.constant.int 0
    %int0_7997 = torch.constant.int 0
    %int1_7998 = torch.constant.int 1
    %10403 = torch.aten.slice.Tensor %10343, %int0_7996, %int0_7997, %10402, %int1_7998 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10403, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_7999 = torch.constant.int 1
    %int0_8000 = torch.constant.int 0
    %int9223372036854775807_8001 = torch.constant.int 9223372036854775807
    %int1_8002 = torch.constant.int 1
    %10404 = torch.aten.slice.Tensor %10403, %int1_7999, %int0_8000, %int9223372036854775807_8001, %int1_8002 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10404, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_8003 = torch.constant.int 0
    %10405 = torch.aten.unsqueeze %10404, %int0_8003 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10405, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_8004 = torch.constant.int 2
    %10406 = torch.aten.unsqueeze %10405, %int2_8004 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10406, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_8005 = torch.constant.int 3
    %int0_8006 = torch.constant.int 0
    %int9223372036854775807_8007 = torch.constant.int 9223372036854775807
    %int1_8008 = torch.constant.int 1
    %10407 = torch.aten.slice.Tensor %10406, %int3_8005, %int0_8006, %int9223372036854775807_8007, %int1_8008 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10407, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10408 = torch_c.to_builtin_tensor %10135 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_8009 = arith.constant 1 : index
    %dim_8010 = tensor.dim %10408, %c1_8009 : tensor<4x?x1x128xf16>
    %10409 = flow.tensor.bitcast %10408 : tensor<4x?x1x128xf16>{%dim_8010} -> tensor<4x?x1x64xcomplex<f16>>{%dim_8010}
    %10410 = torch_c.from_builtin_tensor %10409 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10411 = torch.aten.mul.Tensor %10410, %10407 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10412 = torch_c.to_builtin_tensor %10411 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_8011 = arith.constant 1 : index
    %dim_8012 = tensor.dim %10412, %c1_8011 : tensor<4x?x1x64xcomplex<f32>>
    %10413 = flow.tensor.bitcast %10412 : tensor<4x?x1x64xcomplex<f32>>{%dim_8012} -> tensor<4x?x1x128xf32>{%dim_8012}
    %10414 = torch_c.from_builtin_tensor %10413 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_8013 = torch.constant.int 5
    %10415 = torch.prims.convert_element_type %10414, %int5_8013 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_8014 = torch.constant.int 1
    %10416 = torch.aten.size.int %10029, %int1_8014 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_8015 = torch.constant.int 0
    %10417 = torch.aten.add.int %int0_8015, %10416 : !torch.int, !torch.int -> !torch.int
    %int0_8016 = torch.constant.int 0
    %int0_8017 = torch.constant.int 0
    %int1_8018 = torch.constant.int 1
    %10418 = torch.aten.slice.Tensor %10346, %int0_8016, %int0_8017, %10417, %int1_8018 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10418, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_8019 = torch.constant.int 1
    %int0_8020 = torch.constant.int 0
    %int9223372036854775807_8021 = torch.constant.int 9223372036854775807
    %int1_8022 = torch.constant.int 1
    %10419 = torch.aten.slice.Tensor %10418, %int1_8019, %int0_8020, %int9223372036854775807_8021, %int1_8022 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10419, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_8023 = torch.constant.int 0
    %10420 = torch.aten.unsqueeze %10419, %int0_8023 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10420, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_8024 = torch.constant.int 2
    %10421 = torch.aten.unsqueeze %10420, %int2_8024 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10421, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_8025 = torch.constant.int 3
    %int0_8026 = torch.constant.int 0
    %int9223372036854775807_8027 = torch.constant.int 9223372036854775807
    %int1_8028 = torch.constant.int 1
    %10422 = torch.aten.slice.Tensor %10421, %int3_8025, %int0_8026, %int9223372036854775807_8027, %int1_8028 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10422, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10423 = torch_c.to_builtin_tensor %10137 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_8029 = arith.constant 1 : index
    %dim_8030 = tensor.dim %10423, %c1_8029 : tensor<4x?x1x128xf16>
    %10424 = flow.tensor.bitcast %10423 : tensor<4x?x1x128xf16>{%dim_8030} -> tensor<4x?x1x64xcomplex<f16>>{%dim_8030}
    %10425 = torch_c.from_builtin_tensor %10424 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10426 = torch.aten.mul.Tensor %10425, %10422 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10427 = torch_c.to_builtin_tensor %10426 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_8031 = arith.constant 1 : index
    %dim_8032 = tensor.dim %10427, %c1_8031 : tensor<4x?x1x64xcomplex<f32>>
    %10428 = flow.tensor.bitcast %10427 : tensor<4x?x1x64xcomplex<f32>>{%dim_8032} -> tensor<4x?x1x128xf32>{%dim_8032}
    %10429 = torch_c.from_builtin_tensor %10428 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_8033 = torch.constant.int 5
    %10430 = torch.prims.convert_element_type %10429, %int5_8033 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_8034 = torch.constant.int 1
    %10431 = torch.aten.size.int %10035, %int1_8034 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_8035 = torch.constant.int 0
    %10432 = torch.aten.add.int %int0_8035, %10431 : !torch.int, !torch.int -> !torch.int
    %int0_8036 = torch.constant.int 0
    %int0_8037 = torch.constant.int 0
    %int1_8038 = torch.constant.int 1
    %10433 = torch.aten.slice.Tensor %10349, %int0_8036, %int0_8037, %10432, %int1_8038 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10433, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_8039 = torch.constant.int 1
    %int0_8040 = torch.constant.int 0
    %int9223372036854775807_8041 = torch.constant.int 9223372036854775807
    %int1_8042 = torch.constant.int 1
    %10434 = torch.aten.slice.Tensor %10433, %int1_8039, %int0_8040, %int9223372036854775807_8041, %int1_8042 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10434, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_8043 = torch.constant.int 0
    %10435 = torch.aten.unsqueeze %10434, %int0_8043 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10435, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_8044 = torch.constant.int 2
    %10436 = torch.aten.unsqueeze %10435, %int2_8044 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10436, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_8045 = torch.constant.int 3
    %int0_8046 = torch.constant.int 0
    %int9223372036854775807_8047 = torch.constant.int 9223372036854775807
    %int1_8048 = torch.constant.int 1
    %10437 = torch.aten.slice.Tensor %10436, %int3_8045, %int0_8046, %int9223372036854775807_8047, %int1_8048 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10437, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10438 = torch_c.to_builtin_tensor %10139 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_8049 = arith.constant 1 : index
    %dim_8050 = tensor.dim %10438, %c1_8049 : tensor<4x?x1x128xf16>
    %10439 = flow.tensor.bitcast %10438 : tensor<4x?x1x128xf16>{%dim_8050} -> tensor<4x?x1x64xcomplex<f16>>{%dim_8050}
    %10440 = torch_c.from_builtin_tensor %10439 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10441 = torch.aten.mul.Tensor %10440, %10437 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10442 = torch_c.to_builtin_tensor %10441 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_8051 = arith.constant 1 : index
    %dim_8052 = tensor.dim %10442, %c1_8051 : tensor<4x?x1x64xcomplex<f32>>
    %10443 = flow.tensor.bitcast %10442 : tensor<4x?x1x64xcomplex<f32>>{%dim_8052} -> tensor<4x?x1x128xf32>{%dim_8052}
    %10444 = torch_c.from_builtin_tensor %10443 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_8053 = torch.constant.int 5
    %10445 = torch.prims.convert_element_type %10444, %int5_8053 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_8054 = torch.constant.int 1
    %10446 = torch.aten.size.int %10041, %int1_8054 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_8055 = torch.constant.int 0
    %10447 = torch.aten.add.int %int0_8055, %10446 : !torch.int, !torch.int -> !torch.int
    %int0_8056 = torch.constant.int 0
    %int0_8057 = torch.constant.int 0
    %int1_8058 = torch.constant.int 1
    %10448 = torch.aten.slice.Tensor %10352, %int0_8056, %int0_8057, %10447, %int1_8058 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10448, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_8059 = torch.constant.int 1
    %int0_8060 = torch.constant.int 0
    %int9223372036854775807_8061 = torch.constant.int 9223372036854775807
    %int1_8062 = torch.constant.int 1
    %10449 = torch.aten.slice.Tensor %10448, %int1_8059, %int0_8060, %int9223372036854775807_8061, %int1_8062 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10449, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_8063 = torch.constant.int 0
    %10450 = torch.aten.unsqueeze %10449, %int0_8063 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10450, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_8064 = torch.constant.int 2
    %10451 = torch.aten.unsqueeze %10450, %int2_8064 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10451, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_8065 = torch.constant.int 3
    %int0_8066 = torch.constant.int 0
    %int9223372036854775807_8067 = torch.constant.int 9223372036854775807
    %int1_8068 = torch.constant.int 1
    %10452 = torch.aten.slice.Tensor %10451, %int3_8065, %int0_8066, %int9223372036854775807_8067, %int1_8068 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10452, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10453 = torch_c.to_builtin_tensor %10141 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_8069 = arith.constant 1 : index
    %dim_8070 = tensor.dim %10453, %c1_8069 : tensor<4x?x1x128xf16>
    %10454 = flow.tensor.bitcast %10453 : tensor<4x?x1x128xf16>{%dim_8070} -> tensor<4x?x1x64xcomplex<f16>>{%dim_8070}
    %10455 = torch_c.from_builtin_tensor %10454 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10456 = torch.aten.mul.Tensor %10455, %10452 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10457 = torch_c.to_builtin_tensor %10456 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_8071 = arith.constant 1 : index
    %dim_8072 = tensor.dim %10457, %c1_8071 : tensor<4x?x1x64xcomplex<f32>>
    %10458 = flow.tensor.bitcast %10457 : tensor<4x?x1x64xcomplex<f32>>{%dim_8072} -> tensor<4x?x1x128xf32>{%dim_8072}
    %10459 = torch_c.from_builtin_tensor %10458 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_8073 = torch.constant.int 5
    %10460 = torch.prims.convert_element_type %10459, %int5_8073 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_8074 = torch.constant.int 1
    %10461 = torch.aten.size.int %10047, %int1_8074 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_8075 = torch.constant.int 0
    %10462 = torch.aten.add.int %int0_8075, %10461 : !torch.int, !torch.int -> !torch.int
    %int0_8076 = torch.constant.int 0
    %int0_8077 = torch.constant.int 0
    %int1_8078 = torch.constant.int 1
    %10463 = torch.aten.slice.Tensor %10355, %int0_8076, %int0_8077, %10462, %int1_8078 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10463, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_8079 = torch.constant.int 1
    %int0_8080 = torch.constant.int 0
    %int9223372036854775807_8081 = torch.constant.int 9223372036854775807
    %int1_8082 = torch.constant.int 1
    %10464 = torch.aten.slice.Tensor %10463, %int1_8079, %int0_8080, %int9223372036854775807_8081, %int1_8082 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %10464, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_8083 = torch.constant.int 0
    %10465 = torch.aten.unsqueeze %10464, %int0_8083 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %10465, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_8084 = torch.constant.int 2
    %10466 = torch.aten.unsqueeze %10465, %int2_8084 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10466, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_8085 = torch.constant.int 3
    %int0_8086 = torch.constant.int 0
    %int9223372036854775807_8087 = torch.constant.int 9223372036854775807
    %int1_8088 = torch.constant.int 1
    %10467 = torch.aten.slice.Tensor %10466, %int3_8085, %int0_8086, %int9223372036854775807_8087, %int1_8088 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10467, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %10468 = torch_c.to_builtin_tensor %10143 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_8089 = arith.constant 1 : index
    %dim_8090 = tensor.dim %10468, %c1_8089 : tensor<4x?x1x128xf16>
    %10469 = flow.tensor.bitcast %10468 : tensor<4x?x1x128xf16>{%dim_8090} -> tensor<4x?x1x64xcomplex<f16>>{%dim_8090}
    %10470 = torch_c.from_builtin_tensor %10469 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %10470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %10471 = torch.aten.mul.Tensor %10470, %10467 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %10471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %10472 = torch_c.to_builtin_tensor %10471 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_8091 = arith.constant 1 : index
    %dim_8092 = tensor.dim %10472, %c1_8091 : tensor<4x?x1x64xcomplex<f32>>
    %10473 = flow.tensor.bitcast %10472 : tensor<4x?x1x64xcomplex<f32>>{%dim_8092} -> tensor<4x?x1x128xf32>{%dim_8092}
    %10474 = torch_c.from_builtin_tensor %10473 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %10474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_8093 = torch.constant.int 5
    %10475 = torch.prims.convert_element_type %10474, %int5_8093 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %10475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_8094 = torch.constant.int 64
    %10476 = torch.aten.mul.Scalar %2364, %int64_8094 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10476, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8095 = torch.constant.int 64
    %10477 = torch.aten.mul.Scalar %2367, %int64_8095 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10477, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8096 = torch.constant.int 64
    %10478 = torch.aten.mul.Scalar %2370, %int64_8096 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10478, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8097 = torch.constant.int 64
    %10479 = torch.aten.mul.Scalar %2373, %int64_8097 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10479, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8098 = torch.constant.int 64
    %10480 = torch.aten.mul.Scalar %2376, %int64_8098 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10480, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8099 = torch.constant.int 64
    %10481 = torch.aten.mul.Scalar %2379, %int64_8099 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10481, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8100 = torch.constant.int 64
    %10482 = torch.aten.mul.Scalar %2382, %int64_8100 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10482, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_8101 = torch.constant.int 64
    %10483 = torch.aten.mul.Scalar %2385, %int64_8101 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10483, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8 = torch.constant.int 8
    %int1_8102 = torch.constant.int 1
    %10484 = torch.aten.add.Scalar %10476, %int8, %int1_8102 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10484, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8103 = torch.constant.int 8
    %int1_8104 = torch.constant.int 1
    %10485 = torch.aten.add.Scalar %10477, %int8_8103, %int1_8104 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10485, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8105 = torch.constant.int 8
    %int1_8106 = torch.constant.int 1
    %10486 = torch.aten.add.Scalar %10478, %int8_8105, %int1_8106 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10486, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8107 = torch.constant.int 8
    %int1_8108 = torch.constant.int 1
    %10487 = torch.aten.add.Scalar %10479, %int8_8107, %int1_8108 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10487, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8109 = torch.constant.int 8
    %int1_8110 = torch.constant.int 1
    %10488 = torch.aten.add.Scalar %10480, %int8_8109, %int1_8110 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10488, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8111 = torch.constant.int 8
    %int1_8112 = torch.constant.int 1
    %10489 = torch.aten.add.Scalar %10481, %int8_8111, %int1_8112 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10489, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8113 = torch.constant.int 8
    %int1_8114 = torch.constant.int 1
    %10490 = torch.aten.add.Scalar %10482, %int8_8113, %int1_8114 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10490, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int8_8115 = torch.constant.int 8
    %int1_8116 = torch.constant.int 1
    %10491 = torch.aten.add.Scalar %10483, %int8_8115, %int1_8116 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10491, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_8117 = torch.constant.int 4
    %int16_8118 = torch.constant.int 16
    %int1_8119 = torch.constant.int 1
    %int128_8120 = torch.constant.int 128
    %10492 = torch.prim.ListConstruct %int4_8117, %3095, %int16_8118, %int1_8119, %int128_8120 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10493 = torch.aten.view %10370, %10492 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10493, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8121 = torch.constant.int 4
    %int16_8122 = torch.constant.int 16
    %int1_8123 = torch.constant.int 1
    %int128_8124 = torch.constant.int 128
    %10494 = torch.prim.ListConstruct %int4_8121, %3095, %int16_8122, %int1_8123, %int128_8124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10495 = torch.aten.view %10385, %10494 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10495, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8125 = torch.constant.int 4
    %int16_8126 = torch.constant.int 16
    %int1_8127 = torch.constant.int 1
    %int128_8128 = torch.constant.int 128
    %10496 = torch.prim.ListConstruct %int4_8125, %3095, %int16_8126, %int1_8127, %int128_8128 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10497 = torch.aten.view %10400, %10496 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10497, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8129 = torch.constant.int 4
    %int16_8130 = torch.constant.int 16
    %int1_8131 = torch.constant.int 1
    %int128_8132 = torch.constant.int 128
    %10498 = torch.prim.ListConstruct %int4_8129, %3095, %int16_8130, %int1_8131, %int128_8132 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10499 = torch.aten.view %10415, %10498 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10499, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8133 = torch.constant.int 4
    %int16_8134 = torch.constant.int 16
    %int1_8135 = torch.constant.int 1
    %int128_8136 = torch.constant.int 128
    %10500 = torch.prim.ListConstruct %int4_8133, %3095, %int16_8134, %int1_8135, %int128_8136 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10501 = torch.aten.view %10430, %10500 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10501, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8137 = torch.constant.int 4
    %int16_8138 = torch.constant.int 16
    %int1_8139 = torch.constant.int 1
    %int128_8140 = torch.constant.int 128
    %10502 = torch.prim.ListConstruct %int4_8137, %3095, %int16_8138, %int1_8139, %int128_8140 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10503 = torch.aten.view %10445, %10502 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10503, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8141 = torch.constant.int 4
    %int16_8142 = torch.constant.int 16
    %int1_8143 = torch.constant.int 1
    %int128_8144 = torch.constant.int 128
    %10504 = torch.prim.ListConstruct %int4_8141, %3095, %int16_8142, %int1_8143, %int128_8144 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10505 = torch.aten.view %10460, %10504 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10505, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8145 = torch.constant.int 4
    %int16_8146 = torch.constant.int 16
    %int1_8147 = torch.constant.int 1
    %int128_8148 = torch.constant.int 128
    %10506 = torch.prim.ListConstruct %int4_8145, %3095, %int16_8146, %int1_8147, %int128_8148 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10507 = torch.aten.view %10475, %10506 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10507, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8149 = torch.constant.int 4
    %10508 = torch.aten.mul.int %int4_8149, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8150 = torch.constant.int 16
    %int1_8151 = torch.constant.int 1
    %int128_8152 = torch.constant.int 128
    %10509 = torch.prim.ListConstruct %10508, %int16_8150, %int1_8151, %int128_8152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10510 = torch.aten.view %10493, %10509 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10510, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8153 = torch.constant.int 4
    %10511 = torch.aten.mul.int %int4_8153, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8154 = torch.constant.int 16
    %int1_8155 = torch.constant.int 1
    %int128_8156 = torch.constant.int 128
    %10512 = torch.prim.ListConstruct %10511, %int16_8154, %int1_8155, %int128_8156 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10513 = torch.aten.view %10495, %10512 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10513, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8157 = torch.constant.int 4
    %10514 = torch.aten.mul.int %int4_8157, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8158 = torch.constant.int 16
    %int1_8159 = torch.constant.int 1
    %int128_8160 = torch.constant.int 128
    %10515 = torch.prim.ListConstruct %10514, %int16_8158, %int1_8159, %int128_8160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10516 = torch.aten.view %10497, %10515 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10516, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8161 = torch.constant.int 4
    %10517 = torch.aten.mul.int %int4_8161, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8162 = torch.constant.int 16
    %int1_8163 = torch.constant.int 1
    %int128_8164 = torch.constant.int 128
    %10518 = torch.prim.ListConstruct %10517, %int16_8162, %int1_8163, %int128_8164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10519 = torch.aten.view %10499, %10518 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10519, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8165 = torch.constant.int 4
    %10520 = torch.aten.mul.int %int4_8165, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8166 = torch.constant.int 16
    %int1_8167 = torch.constant.int 1
    %int128_8168 = torch.constant.int 128
    %10521 = torch.prim.ListConstruct %10520, %int16_8166, %int1_8167, %int128_8168 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10522 = torch.aten.view %10501, %10521 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10522, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8169 = torch.constant.int 4
    %10523 = torch.aten.mul.int %int4_8169, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8170 = torch.constant.int 16
    %int1_8171 = torch.constant.int 1
    %int128_8172 = torch.constant.int 128
    %10524 = torch.prim.ListConstruct %10523, %int16_8170, %int1_8171, %int128_8172 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10525 = torch.aten.view %10503, %10524 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10525, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8173 = torch.constant.int 4
    %10526 = torch.aten.mul.int %int4_8173, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8174 = torch.constant.int 16
    %int1_8175 = torch.constant.int 1
    %int128_8176 = torch.constant.int 128
    %10527 = torch.prim.ListConstruct %10526, %int16_8174, %int1_8175, %int128_8176 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10528 = torch.aten.view %10505, %10527 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10528, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8177 = torch.constant.int 4
    %10529 = torch.aten.mul.int %int4_8177, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8178 = torch.constant.int 16
    %int1_8179 = torch.constant.int 1
    %int128_8180 = torch.constant.int 128
    %10530 = torch.prim.ListConstruct %10529, %int16_8178, %int1_8179, %int128_8180 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10531 = torch.aten.view %10507, %10530 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10531, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8181 = torch.constant.int 4
    %10532 = torch.aten.mul.int %int4_8181, %3095 : !torch.int, !torch.int -> !torch.int
    %10533 = torch.prim.ListConstruct %10532 : (!torch.int) -> !torch.list<int>
    %10534 = torch.aten.view %10484, %10533 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10534, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8182 = torch.constant.int 4
    %10535 = torch.aten.mul.int %int4_8182, %3095 : !torch.int, !torch.int -> !torch.int
    %10536 = torch.prim.ListConstruct %10535 : (!torch.int) -> !torch.list<int>
    %10537 = torch.aten.view %10485, %10536 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10537, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8183 = torch.constant.int 4
    %10538 = torch.aten.mul.int %int4_8183, %3095 : !torch.int, !torch.int -> !torch.int
    %10539 = torch.prim.ListConstruct %10538 : (!torch.int) -> !torch.list<int>
    %10540 = torch.aten.view %10486, %10539 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10540, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8184 = torch.constant.int 4
    %10541 = torch.aten.mul.int %int4_8184, %3095 : !torch.int, !torch.int -> !torch.int
    %10542 = torch.prim.ListConstruct %10541 : (!torch.int) -> !torch.list<int>
    %10543 = torch.aten.view %10487, %10542 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10543, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8185 = torch.constant.int 4
    %10544 = torch.aten.mul.int %int4_8185, %3095 : !torch.int, !torch.int -> !torch.int
    %10545 = torch.prim.ListConstruct %10544 : (!torch.int) -> !torch.list<int>
    %10546 = torch.aten.view %10488, %10545 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10546, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8186 = torch.constant.int 4
    %10547 = torch.aten.mul.int %int4_8186, %3095 : !torch.int, !torch.int -> !torch.int
    %10548 = torch.prim.ListConstruct %10547 : (!torch.int) -> !torch.list<int>
    %10549 = torch.aten.view %10489, %10548 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10549, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8187 = torch.constant.int 4
    %10550 = torch.aten.mul.int %int4_8187, %3095 : !torch.int, !torch.int -> !torch.int
    %10551 = torch.prim.ListConstruct %10550 : (!torch.int) -> !torch.list<int>
    %10552 = torch.aten.view %10490, %10551 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10552, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8188 = torch.constant.int 4
    %10553 = torch.aten.mul.int %int4_8188, %3095 : !torch.int, !torch.int -> !torch.int
    %10554 = torch.prim.ListConstruct %10553 : (!torch.int) -> !torch.list<int>
    %10555 = torch.aten.view %10491, %10554 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10555, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8189 = torch.constant.int 4
    %int16_8190 = torch.constant.int 16
    %int1_8191 = torch.constant.int 1
    %int128_8192 = torch.constant.int 128
    %10556 = torch.prim.ListConstruct %int4_8189, %3095, %int16_8190, %int1_8191, %int128_8192 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10557 = torch.aten.view %10145, %10556 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10557, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8193 = torch.constant.int 4
    %int16_8194 = torch.constant.int 16
    %int1_8195 = torch.constant.int 1
    %int128_8196 = torch.constant.int 128
    %10558 = torch.prim.ListConstruct %int4_8193, %3095, %int16_8194, %int1_8195, %int128_8196 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10559 = torch.aten.view %10147, %10558 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10559, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8197 = torch.constant.int 4
    %int16_8198 = torch.constant.int 16
    %int1_8199 = torch.constant.int 1
    %int128_8200 = torch.constant.int 128
    %10560 = torch.prim.ListConstruct %int4_8197, %3095, %int16_8198, %int1_8199, %int128_8200 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10561 = torch.aten.view %10149, %10560 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10561, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8201 = torch.constant.int 4
    %int16_8202 = torch.constant.int 16
    %int1_8203 = torch.constant.int 1
    %int128_8204 = torch.constant.int 128
    %10562 = torch.prim.ListConstruct %int4_8201, %3095, %int16_8202, %int1_8203, %int128_8204 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10563 = torch.aten.view %10151, %10562 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10563, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8205 = torch.constant.int 4
    %int16_8206 = torch.constant.int 16
    %int1_8207 = torch.constant.int 1
    %int128_8208 = torch.constant.int 128
    %10564 = torch.prim.ListConstruct %int4_8205, %3095, %int16_8206, %int1_8207, %int128_8208 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10565 = torch.aten.view %10153, %10564 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10565, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8209 = torch.constant.int 4
    %int16_8210 = torch.constant.int 16
    %int1_8211 = torch.constant.int 1
    %int128_8212 = torch.constant.int 128
    %10566 = torch.prim.ListConstruct %int4_8209, %3095, %int16_8210, %int1_8211, %int128_8212 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10567 = torch.aten.view %10155, %10566 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10567, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8213 = torch.constant.int 4
    %int16_8214 = torch.constant.int 16
    %int1_8215 = torch.constant.int 1
    %int128_8216 = torch.constant.int 128
    %10568 = torch.prim.ListConstruct %int4_8213, %3095, %int16_8214, %int1_8215, %int128_8216 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10569 = torch.aten.view %10157, %10568 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10569, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8217 = torch.constant.int 4
    %int16_8218 = torch.constant.int 16
    %int1_8219 = torch.constant.int 1
    %int128_8220 = torch.constant.int 128
    %10570 = torch.prim.ListConstruct %int4_8217, %3095, %int16_8218, %int1_8219, %int128_8220 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10571 = torch.aten.view %10159, %10570 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %10571, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_8221 = torch.constant.int 4
    %10572 = torch.aten.mul.int %int4_8221, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8222 = torch.constant.int 16
    %int1_8223 = torch.constant.int 1
    %int128_8224 = torch.constant.int 128
    %10573 = torch.prim.ListConstruct %10572, %int16_8222, %int1_8223, %int128_8224 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10574 = torch.aten.view %10557, %10573 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10574, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8225 = torch.constant.int 4
    %10575 = torch.aten.mul.int %int4_8225, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8226 = torch.constant.int 16
    %int1_8227 = torch.constant.int 1
    %int128_8228 = torch.constant.int 128
    %10576 = torch.prim.ListConstruct %10575, %int16_8226, %int1_8227, %int128_8228 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10577 = torch.aten.view %10559, %10576 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10577, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8229 = torch.constant.int 4
    %10578 = torch.aten.mul.int %int4_8229, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8230 = torch.constant.int 16
    %int1_8231 = torch.constant.int 1
    %int128_8232 = torch.constant.int 128
    %10579 = torch.prim.ListConstruct %10578, %int16_8230, %int1_8231, %int128_8232 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10580 = torch.aten.view %10561, %10579 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10580, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8233 = torch.constant.int 4
    %10581 = torch.aten.mul.int %int4_8233, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8234 = torch.constant.int 16
    %int1_8235 = torch.constant.int 1
    %int128_8236 = torch.constant.int 128
    %10582 = torch.prim.ListConstruct %10581, %int16_8234, %int1_8235, %int128_8236 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10583 = torch.aten.view %10563, %10582 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10583, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8237 = torch.constant.int 4
    %10584 = torch.aten.mul.int %int4_8237, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8238 = torch.constant.int 16
    %int1_8239 = torch.constant.int 1
    %int128_8240 = torch.constant.int 128
    %10585 = torch.prim.ListConstruct %10584, %int16_8238, %int1_8239, %int128_8240 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10586 = torch.aten.view %10565, %10585 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10586, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8241 = torch.constant.int 4
    %10587 = torch.aten.mul.int %int4_8241, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8242 = torch.constant.int 16
    %int1_8243 = torch.constant.int 1
    %int128_8244 = torch.constant.int 128
    %10588 = torch.prim.ListConstruct %10587, %int16_8242, %int1_8243, %int128_8244 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10589 = torch.aten.view %10567, %10588 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10589, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8245 = torch.constant.int 4
    %10590 = torch.aten.mul.int %int4_8245, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8246 = torch.constant.int 16
    %int1_8247 = torch.constant.int 1
    %int128_8248 = torch.constant.int 128
    %10591 = torch.prim.ListConstruct %10590, %int16_8246, %int1_8247, %int128_8248 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10592 = torch.aten.view %10569, %10591 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10592, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_8249 = torch.constant.int 4
    %10593 = torch.aten.mul.int %int4_8249, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_8250 = torch.constant.int 16
    %int1_8251 = torch.constant.int 1
    %int128_8252 = torch.constant.int 128
    %10594 = torch.prim.ListConstruct %10593, %int16_8250, %int1_8251, %int128_8252 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10595 = torch.aten.view %10571, %10594 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10595, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_8253 = torch.constant.int 1
    %int1_8254 = torch.constant.int 1
    %10596 = torch.aten.add.Scalar %10484, %int1_8253, %int1_8254 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10596, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8255 = torch.constant.int 1
    %int1_8256 = torch.constant.int 1
    %10597 = torch.aten.add.Scalar %10485, %int1_8255, %int1_8256 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10597, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8257 = torch.constant.int 1
    %int1_8258 = torch.constant.int 1
    %10598 = torch.aten.add.Scalar %10486, %int1_8257, %int1_8258 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10598, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8259 = torch.constant.int 1
    %int1_8260 = torch.constant.int 1
    %10599 = torch.aten.add.Scalar %10487, %int1_8259, %int1_8260 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10599, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8261 = torch.constant.int 1
    %int1_8262 = torch.constant.int 1
    %10600 = torch.aten.add.Scalar %10488, %int1_8261, %int1_8262 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10600, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8263 = torch.constant.int 1
    %int1_8264 = torch.constant.int 1
    %10601 = torch.aten.add.Scalar %10489, %int1_8263, %int1_8264 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10601, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8265 = torch.constant.int 1
    %int1_8266 = torch.constant.int 1
    %10602 = torch.aten.add.Scalar %10490, %int1_8265, %int1_8266 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10602, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_8267 = torch.constant.int 1
    %int1_8268 = torch.constant.int 1
    %10603 = torch.aten.add.Scalar %10491, %int1_8267, %int1_8268 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %10603, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_8269 = torch.constant.int 4
    %10604 = torch.aten.mul.int %int4_8269, %3095 : !torch.int, !torch.int -> !torch.int
    %10605 = torch.prim.ListConstruct %10604 : (!torch.int) -> !torch.list<int>
    %10606 = torch.aten.view %10596, %10605 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10606, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8270 = torch.constant.int 4
    %10607 = torch.aten.mul.int %int4_8270, %3095 : !torch.int, !torch.int -> !torch.int
    %10608 = torch.prim.ListConstruct %10607 : (!torch.int) -> !torch.list<int>
    %10609 = torch.aten.view %10597, %10608 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10609, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8271 = torch.constant.int 4
    %10610 = torch.aten.mul.int %int4_8271, %3095 : !torch.int, !torch.int -> !torch.int
    %10611 = torch.prim.ListConstruct %10610 : (!torch.int) -> !torch.list<int>
    %10612 = torch.aten.view %10598, %10611 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10612, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8272 = torch.constant.int 4
    %10613 = torch.aten.mul.int %int4_8272, %3095 : !torch.int, !torch.int -> !torch.int
    %10614 = torch.prim.ListConstruct %10613 : (!torch.int) -> !torch.list<int>
    %10615 = torch.aten.view %10599, %10614 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10615, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8273 = torch.constant.int 4
    %10616 = torch.aten.mul.int %int4_8273, %3095 : !torch.int, !torch.int -> !torch.int
    %10617 = torch.prim.ListConstruct %10616 : (!torch.int) -> !torch.list<int>
    %10618 = torch.aten.view %10600, %10617 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10618, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8274 = torch.constant.int 4
    %10619 = torch.aten.mul.int %int4_8274, %3095 : !torch.int, !torch.int -> !torch.int
    %10620 = torch.prim.ListConstruct %10619 : (!torch.int) -> !torch.list<int>
    %10621 = torch.aten.view %10601, %10620 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10621, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8275 = torch.constant.int 4
    %10622 = torch.aten.mul.int %int4_8275, %3095 : !torch.int, !torch.int -> !torch.int
    %10623 = torch.prim.ListConstruct %10622 : (!torch.int) -> !torch.list<int>
    %10624 = torch.aten.view %10602, %10623 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10624, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_8276 = torch.constant.int 4
    %10625 = torch.aten.mul.int %int4_8276, %3095 : !torch.int, !torch.int -> !torch.int
    %10626 = torch.prim.ListConstruct %10625 : (!torch.int) -> !torch.list<int>
    %10627 = torch.aten.view %10603, %10626 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10627, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %10628 = torch.prim.ListConstruct %10534, %10606 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8277 = torch.constant.int 0
    %10629 = torch.aten.cat %10628, %int0_8277 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10629, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10630 = torch.prim.ListConstruct %10537, %10609 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8278 = torch.constant.int 0
    %10631 = torch.aten.cat %10630, %int0_8278 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10631, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10632 = torch.prim.ListConstruct %10540, %10612 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8279 = torch.constant.int 0
    %10633 = torch.aten.cat %10632, %int0_8279 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10633, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10634 = torch.prim.ListConstruct %10543, %10615 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8280 = torch.constant.int 0
    %10635 = torch.aten.cat %10634, %int0_8280 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10635, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10636 = torch.prim.ListConstruct %10546, %10618 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8281 = torch.constant.int 0
    %10637 = torch.aten.cat %10636, %int0_8281 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10637, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10638 = torch.prim.ListConstruct %10549, %10621 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8282 = torch.constant.int 0
    %10639 = torch.aten.cat %10638, %int0_8282 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10639, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10640 = torch.prim.ListConstruct %10552, %10624 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8283 = torch.constant.int 0
    %10641 = torch.aten.cat %10640, %int0_8283 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10641, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10642 = torch.prim.ListConstruct %10555, %10627 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_8284 = torch.constant.int 0
    %10643 = torch.aten.cat %10642, %int0_8284 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %10643, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %10644 = torch.prim.ListConstruct %10510, %10574 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8285 = torch.constant.int 0
    %10645 = torch.aten.cat %10644, %int0_8285 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10645, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10646 = torch.prim.ListConstruct %10513, %10577 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8286 = torch.constant.int 0
    %10647 = torch.aten.cat %10646, %int0_8286 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10647, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10648 = torch.prim.ListConstruct %10516, %10580 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8287 = torch.constant.int 0
    %10649 = torch.aten.cat %10648, %int0_8287 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10649, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10650 = torch.prim.ListConstruct %10519, %10583 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8288 = torch.constant.int 0
    %10651 = torch.aten.cat %10650, %int0_8288 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10651, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10652 = torch.prim.ListConstruct %10522, %10586 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8289 = torch.constant.int 0
    %10653 = torch.aten.cat %10652, %int0_8289 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10653, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10654 = torch.prim.ListConstruct %10525, %10589 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8290 = torch.constant.int 0
    %10655 = torch.aten.cat %10654, %int0_8290 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10655, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10656 = torch.prim.ListConstruct %10528, %10592 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8291 = torch.constant.int 0
    %10657 = torch.aten.cat %10656, %int0_8291 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10657, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10658 = torch.prim.ListConstruct %10531, %10595 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_8292 = torch.constant.int 0
    %10659 = torch.aten.cat %10658, %int0_8292 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10659, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8293 = torch.constant.int 32
    %int2_8294 = torch.constant.int 2
    %int16_8295 = torch.constant.int 16
    %int1_8296 = torch.constant.int 1
    %int128_8297 = torch.constant.int 128
    %10660 = torch.prim.ListConstruct %3023, %int32_8293, %int2_8294, %int16_8295, %int1_8296, %int128_8297 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10661 = torch.aten.view %8810, %10660 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10661, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8298 = torch.constant.int 32
    %10662 = torch.aten.mul.int %3023, %int32_8298 : !torch.int, !torch.int -> !torch.int
    %int2_8299 = torch.constant.int 2
    %10663 = torch.aten.mul.int %10662, %int2_8299 : !torch.int, !torch.int -> !torch.int
    %int16_8300 = torch.constant.int 16
    %int1_8301 = torch.constant.int 1
    %int128_8302 = torch.constant.int 128
    %10664 = torch.prim.ListConstruct %10663, %int16_8300, %int1_8301, %int128_8302 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10665 = torch.aten.view %10661, %10664 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10665, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10666 = torch.prim.ListConstruct %10629 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8303 = torch.constant.bool false
    %10667 = torch.aten.index_put %10665, %10666, %10645, %false_8303 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10667, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8304 = torch.constant.int 32
    %int2_8305 = torch.constant.int 2
    %int16_8306 = torch.constant.int 16
    %int1_8307 = torch.constant.int 1
    %int128_8308 = torch.constant.int 128
    %10668 = torch.prim.ListConstruct %3023, %int32_8304, %int2_8305, %int16_8306, %int1_8307, %int128_8308 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10669 = torch.aten.view %10667, %10668 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10669, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8309 = torch.constant.int 131072
    %10670 = torch.prim.ListConstruct %3023, %int131072_8309 : (!torch.int, !torch.int) -> !torch.list<int>
    %10671 = torch.aten.view %10669, %10670 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10671, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8310 = torch.constant.int 32
    %int2_8311 = torch.constant.int 2
    %int16_8312 = torch.constant.int 16
    %int1_8313 = torch.constant.int 1
    %int128_8314 = torch.constant.int 128
    %10672 = torch.prim.ListConstruct %3026, %int32_8310, %int2_8311, %int16_8312, %int1_8313, %int128_8314 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10673 = torch.aten.view %8822, %10672 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10673, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8315 = torch.constant.int 32
    %10674 = torch.aten.mul.int %3026, %int32_8315 : !torch.int, !torch.int -> !torch.int
    %int2_8316 = torch.constant.int 2
    %10675 = torch.aten.mul.int %10674, %int2_8316 : !torch.int, !torch.int -> !torch.int
    %int16_8317 = torch.constant.int 16
    %int1_8318 = torch.constant.int 1
    %int128_8319 = torch.constant.int 128
    %10676 = torch.prim.ListConstruct %10675, %int16_8317, %int1_8318, %int128_8319 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10677 = torch.aten.view %10673, %10676 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10677, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10678 = torch.prim.ListConstruct %10631 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8320 = torch.constant.bool false
    %10679 = torch.aten.index_put %10677, %10678, %10647, %false_8320 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10679, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8321 = torch.constant.int 32
    %int2_8322 = torch.constant.int 2
    %int16_8323 = torch.constant.int 16
    %int1_8324 = torch.constant.int 1
    %int128_8325 = torch.constant.int 128
    %10680 = torch.prim.ListConstruct %3026, %int32_8321, %int2_8322, %int16_8323, %int1_8324, %int128_8325 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10681 = torch.aten.view %10679, %10680 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10681, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8326 = torch.constant.int 131072
    %10682 = torch.prim.ListConstruct %3026, %int131072_8326 : (!torch.int, !torch.int) -> !torch.list<int>
    %10683 = torch.aten.view %10681, %10682 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10683, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8327 = torch.constant.int 32
    %int2_8328 = torch.constant.int 2
    %int16_8329 = torch.constant.int 16
    %int1_8330 = torch.constant.int 1
    %int128_8331 = torch.constant.int 128
    %10684 = torch.prim.ListConstruct %3029, %int32_8327, %int2_8328, %int16_8329, %int1_8330, %int128_8331 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10685 = torch.aten.view %8834, %10684 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10685, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8332 = torch.constant.int 32
    %10686 = torch.aten.mul.int %3029, %int32_8332 : !torch.int, !torch.int -> !torch.int
    %int2_8333 = torch.constant.int 2
    %10687 = torch.aten.mul.int %10686, %int2_8333 : !torch.int, !torch.int -> !torch.int
    %int16_8334 = torch.constant.int 16
    %int1_8335 = torch.constant.int 1
    %int128_8336 = torch.constant.int 128
    %10688 = torch.prim.ListConstruct %10687, %int16_8334, %int1_8335, %int128_8336 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10689 = torch.aten.view %10685, %10688 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10689, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10690 = torch.prim.ListConstruct %10633 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8337 = torch.constant.bool false
    %10691 = torch.aten.index_put %10689, %10690, %10649, %false_8337 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10691, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8338 = torch.constant.int 32
    %int2_8339 = torch.constant.int 2
    %int16_8340 = torch.constant.int 16
    %int1_8341 = torch.constant.int 1
    %int128_8342 = torch.constant.int 128
    %10692 = torch.prim.ListConstruct %3029, %int32_8338, %int2_8339, %int16_8340, %int1_8341, %int128_8342 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10693 = torch.aten.view %10691, %10692 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10693, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8343 = torch.constant.int 131072
    %10694 = torch.prim.ListConstruct %3029, %int131072_8343 : (!torch.int, !torch.int) -> !torch.list<int>
    %10695 = torch.aten.view %10693, %10694 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10695, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8344 = torch.constant.int 32
    %int2_8345 = torch.constant.int 2
    %int16_8346 = torch.constant.int 16
    %int1_8347 = torch.constant.int 1
    %int128_8348 = torch.constant.int 128
    %10696 = torch.prim.ListConstruct %3032, %int32_8344, %int2_8345, %int16_8346, %int1_8347, %int128_8348 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10697 = torch.aten.view %8846, %10696 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10697, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8349 = torch.constant.int 32
    %10698 = torch.aten.mul.int %3032, %int32_8349 : !torch.int, !torch.int -> !torch.int
    %int2_8350 = torch.constant.int 2
    %10699 = torch.aten.mul.int %10698, %int2_8350 : !torch.int, !torch.int -> !torch.int
    %int16_8351 = torch.constant.int 16
    %int1_8352 = torch.constant.int 1
    %int128_8353 = torch.constant.int 128
    %10700 = torch.prim.ListConstruct %10699, %int16_8351, %int1_8352, %int128_8353 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10701 = torch.aten.view %10697, %10700 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10701, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10702 = torch.prim.ListConstruct %10635 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8354 = torch.constant.bool false
    %10703 = torch.aten.index_put %10701, %10702, %10651, %false_8354 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10703, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8355 = torch.constant.int 32
    %int2_8356 = torch.constant.int 2
    %int16_8357 = torch.constant.int 16
    %int1_8358 = torch.constant.int 1
    %int128_8359 = torch.constant.int 128
    %10704 = torch.prim.ListConstruct %3032, %int32_8355, %int2_8356, %int16_8357, %int1_8358, %int128_8359 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10705 = torch.aten.view %10703, %10704 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10705, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8360 = torch.constant.int 131072
    %10706 = torch.prim.ListConstruct %3032, %int131072_8360 : (!torch.int, !torch.int) -> !torch.list<int>
    %10707 = torch.aten.view %10705, %10706 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10707, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8361 = torch.constant.int 32
    %int2_8362 = torch.constant.int 2
    %int16_8363 = torch.constant.int 16
    %int1_8364 = torch.constant.int 1
    %int128_8365 = torch.constant.int 128
    %10708 = torch.prim.ListConstruct %3035, %int32_8361, %int2_8362, %int16_8363, %int1_8364, %int128_8365 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10709 = torch.aten.view %8858, %10708 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10709, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8366 = torch.constant.int 32
    %10710 = torch.aten.mul.int %3035, %int32_8366 : !torch.int, !torch.int -> !torch.int
    %int2_8367 = torch.constant.int 2
    %10711 = torch.aten.mul.int %10710, %int2_8367 : !torch.int, !torch.int -> !torch.int
    %int16_8368 = torch.constant.int 16
    %int1_8369 = torch.constant.int 1
    %int128_8370 = torch.constant.int 128
    %10712 = torch.prim.ListConstruct %10711, %int16_8368, %int1_8369, %int128_8370 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10713 = torch.aten.view %10709, %10712 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10713, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10714 = torch.prim.ListConstruct %10637 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8371 = torch.constant.bool false
    %10715 = torch.aten.index_put %10713, %10714, %10653, %false_8371 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10715, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8372 = torch.constant.int 32
    %int2_8373 = torch.constant.int 2
    %int16_8374 = torch.constant.int 16
    %int1_8375 = torch.constant.int 1
    %int128_8376 = torch.constant.int 128
    %10716 = torch.prim.ListConstruct %3035, %int32_8372, %int2_8373, %int16_8374, %int1_8375, %int128_8376 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10717 = torch.aten.view %10715, %10716 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10717, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8377 = torch.constant.int 131072
    %10718 = torch.prim.ListConstruct %3035, %int131072_8377 : (!torch.int, !torch.int) -> !torch.list<int>
    %10719 = torch.aten.view %10717, %10718 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10719, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8378 = torch.constant.int 32
    %int2_8379 = torch.constant.int 2
    %int16_8380 = torch.constant.int 16
    %int1_8381 = torch.constant.int 1
    %int128_8382 = torch.constant.int 128
    %10720 = torch.prim.ListConstruct %3038, %int32_8378, %int2_8379, %int16_8380, %int1_8381, %int128_8382 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10721 = torch.aten.view %8870, %10720 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10721, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8383 = torch.constant.int 32
    %10722 = torch.aten.mul.int %3038, %int32_8383 : !torch.int, !torch.int -> !torch.int
    %int2_8384 = torch.constant.int 2
    %10723 = torch.aten.mul.int %10722, %int2_8384 : !torch.int, !torch.int -> !torch.int
    %int16_8385 = torch.constant.int 16
    %int1_8386 = torch.constant.int 1
    %int128_8387 = torch.constant.int 128
    %10724 = torch.prim.ListConstruct %10723, %int16_8385, %int1_8386, %int128_8387 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10725 = torch.aten.view %10721, %10724 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10725, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10726 = torch.prim.ListConstruct %10639 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8388 = torch.constant.bool false
    %10727 = torch.aten.index_put %10725, %10726, %10655, %false_8388 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10727, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8389 = torch.constant.int 32
    %int2_8390 = torch.constant.int 2
    %int16_8391 = torch.constant.int 16
    %int1_8392 = torch.constant.int 1
    %int128_8393 = torch.constant.int 128
    %10728 = torch.prim.ListConstruct %3038, %int32_8389, %int2_8390, %int16_8391, %int1_8392, %int128_8393 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10729 = torch.aten.view %10727, %10728 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10729, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8394 = torch.constant.int 131072
    %10730 = torch.prim.ListConstruct %3038, %int131072_8394 : (!torch.int, !torch.int) -> !torch.list<int>
    %10731 = torch.aten.view %10729, %10730 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10731, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8395 = torch.constant.int 32
    %int2_8396 = torch.constant.int 2
    %int16_8397 = torch.constant.int 16
    %int1_8398 = torch.constant.int 1
    %int128_8399 = torch.constant.int 128
    %10732 = torch.prim.ListConstruct %3041, %int32_8395, %int2_8396, %int16_8397, %int1_8398, %int128_8399 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10733 = torch.aten.view %8882, %10732 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10733, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8400 = torch.constant.int 32
    %10734 = torch.aten.mul.int %3041, %int32_8400 : !torch.int, !torch.int -> !torch.int
    %int2_8401 = torch.constant.int 2
    %10735 = torch.aten.mul.int %10734, %int2_8401 : !torch.int, !torch.int -> !torch.int
    %int16_8402 = torch.constant.int 16
    %int1_8403 = torch.constant.int 1
    %int128_8404 = torch.constant.int 128
    %10736 = torch.prim.ListConstruct %10735, %int16_8402, %int1_8403, %int128_8404 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10737 = torch.aten.view %10733, %10736 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10737, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10738 = torch.prim.ListConstruct %10641 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8405 = torch.constant.bool false
    %10739 = torch.aten.index_put %10737, %10738, %10657, %false_8405 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10739, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8406 = torch.constant.int 32
    %int2_8407 = torch.constant.int 2
    %int16_8408 = torch.constant.int 16
    %int1_8409 = torch.constant.int 1
    %int128_8410 = torch.constant.int 128
    %10740 = torch.prim.ListConstruct %3041, %int32_8406, %int2_8407, %int16_8408, %int1_8409, %int128_8410 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10741 = torch.aten.view %10739, %10740 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10741, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8411 = torch.constant.int 131072
    %10742 = torch.prim.ListConstruct %3041, %int131072_8411 : (!torch.int, !torch.int) -> !torch.list<int>
    %10743 = torch.aten.view %10741, %10742 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10743, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_8412 = torch.constant.int 32
    %int2_8413 = torch.constant.int 2
    %int16_8414 = torch.constant.int 16
    %int1_8415 = torch.constant.int 1
    %int128_8416 = torch.constant.int 128
    %10744 = torch.prim.ListConstruct %3044, %int32_8412, %int2_8413, %int16_8414, %int1_8415, %int128_8416 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10745 = torch.aten.view %8894, %10744 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10745, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_8417 = torch.constant.int 32
    %10746 = torch.aten.mul.int %3044, %int32_8417 : !torch.int, !torch.int -> !torch.int
    %int2_8418 = torch.constant.int 2
    %10747 = torch.aten.mul.int %10746, %int2_8418 : !torch.int, !torch.int -> !torch.int
    %int16_8419 = torch.constant.int 16
    %int1_8420 = torch.constant.int 1
    %int128_8421 = torch.constant.int 128
    %10748 = torch.prim.ListConstruct %10747, %int16_8419, %int1_8420, %int128_8421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10749 = torch.aten.view %10745, %10748 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10749, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %10750 = torch.prim.ListConstruct %10643 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_8422 = torch.constant.bool false
    %10751 = torch.aten.index_put %10749, %10750, %10659, %false_8422 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %10751, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_8423 = torch.constant.int 32
    %int2_8424 = torch.constant.int 2
    %int16_8425 = torch.constant.int 16
    %int1_8426 = torch.constant.int 1
    %int128_8427 = torch.constant.int 128
    %10752 = torch.prim.ListConstruct %3044, %int32_8423, %int2_8424, %int16_8425, %int1_8426, %int128_8427 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10753 = torch.aten.view %10751, %10752 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %10753, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_8428 = torch.constant.int 131072
    %10754 = torch.prim.ListConstruct %3044, %int131072_8428 : (!torch.int, !torch.int) -> !torch.list<int>
    %10755 = torch.aten.view %10753, %10754 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %10755, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_8429 = torch.constant.int -2
    %10756 = torch.aten.unsqueeze %10370, %int-2_8429 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8430 = torch.constant.int -2
    %10757 = torch.aten.unsqueeze %10385, %int-2_8430 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8431 = torch.constant.int -2
    %10758 = torch.aten.unsqueeze %10400, %int-2_8431 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8432 = torch.constant.int -2
    %10759 = torch.aten.unsqueeze %10415, %int-2_8432 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8433 = torch.constant.int -2
    %10760 = torch.aten.unsqueeze %10430, %int-2_8433 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8434 = torch.constant.int -2
    %10761 = torch.aten.unsqueeze %10445, %int-2_8434 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8435 = torch.constant.int -2
    %10762 = torch.aten.unsqueeze %10460, %int-2_8435 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8436 = torch.constant.int -2
    %10763 = torch.aten.unsqueeze %10475, %int-2_8436 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_8437 = torch.constant.int 4
    %int1_8438 = torch.constant.int 1
    %int4_8439 = torch.constant.int 4
    %int128_8440 = torch.constant.int 128
    %10764 = torch.prim.ListConstruct %int4_8437, %10356, %int1_8438, %int4_8439, %int128_8440 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8441 = torch.constant.bool false
    %10765 = torch.aten.expand %10756, %10764, %false_8441 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8442 = torch.constant.int 4
    %int1_8443 = torch.constant.int 1
    %int4_8444 = torch.constant.int 4
    %int128_8445 = torch.constant.int 128
    %10766 = torch.prim.ListConstruct %int4_8442, %10356, %int1_8443, %int4_8444, %int128_8445 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8446 = torch.constant.bool false
    %10767 = torch.aten.expand %10757, %10766, %false_8446 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8447 = torch.constant.int 4
    %int1_8448 = torch.constant.int 1
    %int4_8449 = torch.constant.int 4
    %int128_8450 = torch.constant.int 128
    %10768 = torch.prim.ListConstruct %int4_8447, %10356, %int1_8448, %int4_8449, %int128_8450 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8451 = torch.constant.bool false
    %10769 = torch.aten.expand %10758, %10768, %false_8451 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8452 = torch.constant.int 4
    %int1_8453 = torch.constant.int 1
    %int4_8454 = torch.constant.int 4
    %int128_8455 = torch.constant.int 128
    %10770 = torch.prim.ListConstruct %int4_8452, %10356, %int1_8453, %int4_8454, %int128_8455 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8456 = torch.constant.bool false
    %10771 = torch.aten.expand %10759, %10770, %false_8456 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8457 = torch.constant.int 4
    %int1_8458 = torch.constant.int 1
    %int4_8459 = torch.constant.int 4
    %int128_8460 = torch.constant.int 128
    %10772 = torch.prim.ListConstruct %int4_8457, %10356, %int1_8458, %int4_8459, %int128_8460 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8461 = torch.constant.bool false
    %10773 = torch.aten.expand %10760, %10772, %false_8461 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8462 = torch.constant.int 4
    %int1_8463 = torch.constant.int 1
    %int4_8464 = torch.constant.int 4
    %int128_8465 = torch.constant.int 128
    %10774 = torch.prim.ListConstruct %int4_8462, %10356, %int1_8463, %int4_8464, %int128_8465 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8466 = torch.constant.bool false
    %10775 = torch.aten.expand %10761, %10774, %false_8466 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8467 = torch.constant.int 4
    %int1_8468 = torch.constant.int 1
    %int4_8469 = torch.constant.int 4
    %int128_8470 = torch.constant.int 128
    %10776 = torch.prim.ListConstruct %int4_8467, %10356, %int1_8468, %int4_8469, %int128_8470 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8471 = torch.constant.bool false
    %10777 = torch.aten.expand %10762, %10776, %false_8471 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8472 = torch.constant.int 4
    %int1_8473 = torch.constant.int 1
    %int4_8474 = torch.constant.int 4
    %int128_8475 = torch.constant.int 128
    %10778 = torch.prim.ListConstruct %int4_8472, %10356, %int1_8473, %int4_8474, %int128_8475 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8476 = torch.constant.bool false
    %10779 = torch.aten.expand %10763, %10778, %false_8476 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8477 = torch.constant.int 4
    %int4_8478 = torch.constant.int 4
    %int128_8479 = torch.constant.int 128
    %10780 = torch.prim.ListConstruct %int4_8477, %10356, %int4_8478, %int128_8479 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10781 = torch.aten.view %10765, %10780 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8480 = torch.constant.int 4
    %int4_8481 = torch.constant.int 4
    %int128_8482 = torch.constant.int 128
    %10782 = torch.prim.ListConstruct %int4_8480, %10356, %int4_8481, %int128_8482 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10783 = torch.aten.view %10767, %10782 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8483 = torch.constant.int 4
    %int4_8484 = torch.constant.int 4
    %int128_8485 = torch.constant.int 128
    %10784 = torch.prim.ListConstruct %int4_8483, %10356, %int4_8484, %int128_8485 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10785 = torch.aten.view %10769, %10784 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8486 = torch.constant.int 4
    %int4_8487 = torch.constant.int 4
    %int128_8488 = torch.constant.int 128
    %10786 = torch.prim.ListConstruct %int4_8486, %10356, %int4_8487, %int128_8488 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10787 = torch.aten.view %10771, %10786 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8489 = torch.constant.int 4
    %int4_8490 = torch.constant.int 4
    %int128_8491 = torch.constant.int 128
    %10788 = torch.prim.ListConstruct %int4_8489, %10356, %int4_8490, %int128_8491 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10789 = torch.aten.view %10773, %10788 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8492 = torch.constant.int 4
    %int4_8493 = torch.constant.int 4
    %int128_8494 = torch.constant.int 128
    %10790 = torch.prim.ListConstruct %int4_8492, %10356, %int4_8493, %int128_8494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10791 = torch.aten.view %10775, %10790 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8495 = torch.constant.int 4
    %int4_8496 = torch.constant.int 4
    %int128_8497 = torch.constant.int 128
    %10792 = torch.prim.ListConstruct %int4_8495, %10356, %int4_8496, %int128_8497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10793 = torch.aten.view %10777, %10792 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8498 = torch.constant.int 4
    %int4_8499 = torch.constant.int 4
    %int128_8500 = torch.constant.int 128
    %10794 = torch.prim.ListConstruct %int4_8498, %10356, %int4_8499, %int128_8500 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10795 = torch.aten.view %10779, %10794 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_8501 = torch.constant.int -2
    %10796 = torch.aten.unsqueeze %10145, %int-2_8501 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8502 = torch.constant.int -2
    %10797 = torch.aten.unsqueeze %10147, %int-2_8502 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8503 = torch.constant.int -2
    %10798 = torch.aten.unsqueeze %10149, %int-2_8503 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8504 = torch.constant.int -2
    %10799 = torch.aten.unsqueeze %10151, %int-2_8504 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8505 = torch.constant.int -2
    %10800 = torch.aten.unsqueeze %10153, %int-2_8505 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8506 = torch.constant.int -2
    %10801 = torch.aten.unsqueeze %10155, %int-2_8506 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8507 = torch.constant.int -2
    %10802 = torch.aten.unsqueeze %10157, %int-2_8507 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_8508 = torch.constant.int -2
    %10803 = torch.aten.unsqueeze %10159, %int-2_8508 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %10803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_8509 = torch.constant.int 1
    %10804 = torch.aten.size.int %10069, %int1_8509 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_8510 = torch.constant.int 4
    %int1_8511 = torch.constant.int 1
    %int4_8512 = torch.constant.int 4
    %int128_8513 = torch.constant.int 128
    %10805 = torch.prim.ListConstruct %int4_8510, %10804, %int1_8511, %int4_8512, %int128_8513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8514 = torch.constant.bool false
    %10806 = torch.aten.expand %10796, %10805, %false_8514 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8515 = torch.constant.int 4
    %int1_8516 = torch.constant.int 1
    %int4_8517 = torch.constant.int 4
    %int128_8518 = torch.constant.int 128
    %10807 = torch.prim.ListConstruct %int4_8515, %10804, %int1_8516, %int4_8517, %int128_8518 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8519 = torch.constant.bool false
    %10808 = torch.aten.expand %10797, %10807, %false_8519 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8520 = torch.constant.int 4
    %int1_8521 = torch.constant.int 1
    %int4_8522 = torch.constant.int 4
    %int128_8523 = torch.constant.int 128
    %10809 = torch.prim.ListConstruct %int4_8520, %10804, %int1_8521, %int4_8522, %int128_8523 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8524 = torch.constant.bool false
    %10810 = torch.aten.expand %10798, %10809, %false_8524 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8525 = torch.constant.int 4
    %int1_8526 = torch.constant.int 1
    %int4_8527 = torch.constant.int 4
    %int128_8528 = torch.constant.int 128
    %10811 = torch.prim.ListConstruct %int4_8525, %10804, %int1_8526, %int4_8527, %int128_8528 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8529 = torch.constant.bool false
    %10812 = torch.aten.expand %10799, %10811, %false_8529 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8530 = torch.constant.int 4
    %int1_8531 = torch.constant.int 1
    %int4_8532 = torch.constant.int 4
    %int128_8533 = torch.constant.int 128
    %10813 = torch.prim.ListConstruct %int4_8530, %10804, %int1_8531, %int4_8532, %int128_8533 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8534 = torch.constant.bool false
    %10814 = torch.aten.expand %10800, %10813, %false_8534 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8535 = torch.constant.int 4
    %int1_8536 = torch.constant.int 1
    %int4_8537 = torch.constant.int 4
    %int128_8538 = torch.constant.int 128
    %10815 = torch.prim.ListConstruct %int4_8535, %10804, %int1_8536, %int4_8537, %int128_8538 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8539 = torch.constant.bool false
    %10816 = torch.aten.expand %10801, %10815, %false_8539 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8540 = torch.constant.int 4
    %int1_8541 = torch.constant.int 1
    %int4_8542 = torch.constant.int 4
    %int128_8543 = torch.constant.int 128
    %10817 = torch.prim.ListConstruct %int4_8540, %10804, %int1_8541, %int4_8542, %int128_8543 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8544 = torch.constant.bool false
    %10818 = torch.aten.expand %10802, %10817, %false_8544 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8545 = torch.constant.int 4
    %int1_8546 = torch.constant.int 1
    %int4_8547 = torch.constant.int 4
    %int128_8548 = torch.constant.int 128
    %10819 = torch.prim.ListConstruct %int4_8545, %10804, %int1_8546, %int4_8547, %int128_8548 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_8549 = torch.constant.bool false
    %10820 = torch.aten.expand %10803, %10819, %false_8549 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %10820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_8550 = torch.constant.int 4
    %int4_8551 = torch.constant.int 4
    %int128_8552 = torch.constant.int 128
    %10821 = torch.prim.ListConstruct %int4_8550, %10804, %int4_8551, %int128_8552 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10822 = torch.aten.view %10806, %10821 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8553 = torch.constant.int 4
    %int4_8554 = torch.constant.int 4
    %int128_8555 = torch.constant.int 128
    %10823 = torch.prim.ListConstruct %int4_8553, %10804, %int4_8554, %int128_8555 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10824 = torch.aten.view %10808, %10823 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8556 = torch.constant.int 4
    %int4_8557 = torch.constant.int 4
    %int128_8558 = torch.constant.int 128
    %10825 = torch.prim.ListConstruct %int4_8556, %10804, %int4_8557, %int128_8558 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10826 = torch.aten.view %10810, %10825 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8559 = torch.constant.int 4
    %int4_8560 = torch.constant.int 4
    %int128_8561 = torch.constant.int 128
    %10827 = torch.prim.ListConstruct %int4_8559, %10804, %int4_8560, %int128_8561 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10828 = torch.aten.view %10812, %10827 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8562 = torch.constant.int 4
    %int4_8563 = torch.constant.int 4
    %int128_8564 = torch.constant.int 128
    %10829 = torch.prim.ListConstruct %int4_8562, %10804, %int4_8563, %int128_8564 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10830 = torch.aten.view %10814, %10829 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8565 = torch.constant.int 4
    %int4_8566 = torch.constant.int 4
    %int128_8567 = torch.constant.int 128
    %10831 = torch.prim.ListConstruct %int4_8565, %10804, %int4_8566, %int128_8567 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10832 = torch.aten.view %10816, %10831 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8568 = torch.constant.int 4
    %int4_8569 = torch.constant.int 4
    %int128_8570 = torch.constant.int 128
    %10833 = torch.prim.ListConstruct %int4_8568, %10804, %int4_8569, %int128_8570 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10834 = torch.aten.view %10818, %10833 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8571 = torch.constant.int 4
    %int4_8572 = torch.constant.int 4
    %int128_8573 = torch.constant.int 128
    %10835 = torch.prim.ListConstruct %int4_8571, %10804, %int4_8572, %int128_8573 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10836 = torch.aten.view %10820, %10835 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8574 = torch.constant.int 1
    %int2_8575 = torch.constant.int 2
    %10837 = torch.aten.transpose.int %10212, %int1_8574, %int2_8575 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10837, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8576 = torch.constant.int 1
    %int2_8577 = torch.constant.int 2
    %10838 = torch.aten.transpose.int %10227, %int1_8576, %int2_8577 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10838, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8578 = torch.constant.int 1
    %int2_8579 = torch.constant.int 2
    %10839 = torch.aten.transpose.int %10242, %int1_8578, %int2_8579 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10839, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8580 = torch.constant.int 1
    %int2_8581 = torch.constant.int 2
    %10840 = torch.aten.transpose.int %10257, %int1_8580, %int2_8581 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10840, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8582 = torch.constant.int 1
    %int2_8583 = torch.constant.int 2
    %10841 = torch.aten.transpose.int %10272, %int1_8582, %int2_8583 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10841, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8584 = torch.constant.int 1
    %int2_8585 = torch.constant.int 2
    %10842 = torch.aten.transpose.int %10287, %int1_8584, %int2_8585 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10842, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8586 = torch.constant.int 1
    %int2_8587 = torch.constant.int 2
    %10843 = torch.aten.transpose.int %10302, %int1_8586, %int2_8587 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10843, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8588 = torch.constant.int 1
    %int2_8589 = torch.constant.int 2
    %10844 = torch.aten.transpose.int %10317, %int1_8588, %int2_8589 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10844, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8590 = torch.constant.int 1
    %int2_8591 = torch.constant.int 2
    %10845 = torch.aten.transpose.int %10781, %int1_8590, %int2_8591 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10845, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8592 = torch.constant.int 1
    %int2_8593 = torch.constant.int 2
    %10846 = torch.aten.transpose.int %10783, %int1_8592, %int2_8593 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10846, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8594 = torch.constant.int 1
    %int2_8595 = torch.constant.int 2
    %10847 = torch.aten.transpose.int %10785, %int1_8594, %int2_8595 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10847, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8596 = torch.constant.int 1
    %int2_8597 = torch.constant.int 2
    %10848 = torch.aten.transpose.int %10787, %int1_8596, %int2_8597 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10848, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8598 = torch.constant.int 1
    %int2_8599 = torch.constant.int 2
    %10849 = torch.aten.transpose.int %10789, %int1_8598, %int2_8599 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10849, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8600 = torch.constant.int 1
    %int2_8601 = torch.constant.int 2
    %10850 = torch.aten.transpose.int %10791, %int1_8600, %int2_8601 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10850, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8602 = torch.constant.int 1
    %int2_8603 = torch.constant.int 2
    %10851 = torch.aten.transpose.int %10793, %int1_8602, %int2_8603 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10851, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8604 = torch.constant.int 1
    %int2_8605 = torch.constant.int 2
    %10852 = torch.aten.transpose.int %10795, %int1_8604, %int2_8605 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10852, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8606 = torch.constant.int 1
    %int2_8607 = torch.constant.int 2
    %10853 = torch.aten.transpose.int %10822, %int1_8606, %int2_8607 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10853, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8608 = torch.constant.int 1
    %int2_8609 = torch.constant.int 2
    %10854 = torch.aten.transpose.int %10824, %int1_8608, %int2_8609 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10854, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8610 = torch.constant.int 1
    %int2_8611 = torch.constant.int 2
    %10855 = torch.aten.transpose.int %10826, %int1_8610, %int2_8611 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10855, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8612 = torch.constant.int 1
    %int2_8613 = torch.constant.int 2
    %10856 = torch.aten.transpose.int %10828, %int1_8612, %int2_8613 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10856, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8614 = torch.constant.int 1
    %int2_8615 = torch.constant.int 2
    %10857 = torch.aten.transpose.int %10830, %int1_8614, %int2_8615 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10857, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8616 = torch.constant.int 1
    %int2_8617 = torch.constant.int 2
    %10858 = torch.aten.transpose.int %10832, %int1_8616, %int2_8617 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10858, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8618 = torch.constant.int 1
    %int2_8619 = torch.constant.int 2
    %10859 = torch.aten.transpose.int %10834, %int1_8618, %int2_8619 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10859, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8620 = torch.constant.int 1
    %int2_8621 = torch.constant.int 2
    %10860 = torch.aten.transpose.int %10836, %int1_8620, %int2_8621 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %10860, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8622 = torch.constant.float 0.000000e+00
    %true_8623 = torch.constant.bool true
    %none_8624 = torch.constant.none
    %none_8625 = torch.constant.none
    %10861:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10837, %10845, %10853, %float0.000000e00_8622, %true_8623, %none_8624, %none_8625) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10861#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8626 = torch.constant.float 0.000000e+00
    %true_8627 = torch.constant.bool true
    %none_8628 = torch.constant.none
    %none_8629 = torch.constant.none
    %10862:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10838, %10846, %10854, %float0.000000e00_8626, %true_8627, %none_8628, %none_8629) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10862#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8630 = torch.constant.float 0.000000e+00
    %true_8631 = torch.constant.bool true
    %none_8632 = torch.constant.none
    %none_8633 = torch.constant.none
    %10863:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10839, %10847, %10855, %float0.000000e00_8630, %true_8631, %none_8632, %none_8633) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10863#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8634 = torch.constant.float 0.000000e+00
    %true_8635 = torch.constant.bool true
    %none_8636 = torch.constant.none
    %none_8637 = torch.constant.none
    %10864:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10840, %10848, %10856, %float0.000000e00_8634, %true_8635, %none_8636, %none_8637) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10864#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8638 = torch.constant.float 0.000000e+00
    %true_8639 = torch.constant.bool true
    %none_8640 = torch.constant.none
    %none_8641 = torch.constant.none
    %10865:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10841, %10849, %10857, %float0.000000e00_8638, %true_8639, %none_8640, %none_8641) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10865#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8642 = torch.constant.float 0.000000e+00
    %true_8643 = torch.constant.bool true
    %none_8644 = torch.constant.none
    %none_8645 = torch.constant.none
    %10866:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10842, %10850, %10858, %float0.000000e00_8642, %true_8643, %none_8644, %none_8645) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10866#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8646 = torch.constant.float 0.000000e+00
    %true_8647 = torch.constant.bool true
    %none_8648 = torch.constant.none
    %none_8649 = torch.constant.none
    %10867:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10843, %10851, %10859, %float0.000000e00_8646, %true_8647, %none_8648, %none_8649) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10867#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_8650 = torch.constant.float 0.000000e+00
    %true_8651 = torch.constant.bool true
    %none_8652 = torch.constant.none
    %none_8653 = torch.constant.none
    %10868:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%10844, %10852, %10860, %float0.000000e00_8650, %true_8651, %none_8652, %none_8653) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %10868#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_8654 = torch.constant.int 1
    %int2_8655 = torch.constant.int 2
    %10869 = torch.aten.transpose.int %10861#0, %int1_8654, %int2_8655 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8656 = torch.constant.int 1
    %int2_8657 = torch.constant.int 2
    %10870 = torch.aten.transpose.int %10862#0, %int1_8656, %int2_8657 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8658 = torch.constant.int 1
    %int2_8659 = torch.constant.int 2
    %10871 = torch.aten.transpose.int %10863#0, %int1_8658, %int2_8659 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8660 = torch.constant.int 1
    %int2_8661 = torch.constant.int 2
    %10872 = torch.aten.transpose.int %10864#0, %int1_8660, %int2_8661 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8662 = torch.constant.int 1
    %int2_8663 = torch.constant.int 2
    %10873 = torch.aten.transpose.int %10865#0, %int1_8662, %int2_8663 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8664 = torch.constant.int 1
    %int2_8665 = torch.constant.int 2
    %10874 = torch.aten.transpose.int %10866#0, %int1_8664, %int2_8665 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8666 = torch.constant.int 1
    %int2_8667 = torch.constant.int 2
    %10875 = torch.aten.transpose.int %10867#0, %int1_8666, %int2_8667 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_8668 = torch.constant.int 1
    %int2_8669 = torch.constant.int 2
    %10876 = torch.aten.transpose.int %10868#0, %int1_8668, %int2_8669 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %10876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_8670 = torch.constant.int 4
    %int512_8671 = torch.constant.int 512
    %10877 = torch.prim.ListConstruct %int4_8670, %10198, %int512_8671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10878 = torch.aten.view %10869, %10877 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8672 = torch.constant.int 4
    %int512_8673 = torch.constant.int 512
    %10879 = torch.prim.ListConstruct %int4_8672, %10213, %int512_8673 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10880 = torch.aten.view %10870, %10879 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8674 = torch.constant.int 4
    %int512_8675 = torch.constant.int 512
    %10881 = torch.prim.ListConstruct %int4_8674, %10228, %int512_8675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10882 = torch.aten.view %10871, %10881 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8676 = torch.constant.int 4
    %int512_8677 = torch.constant.int 512
    %10883 = torch.prim.ListConstruct %int4_8676, %10243, %int512_8677 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10884 = torch.aten.view %10872, %10883 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8678 = torch.constant.int 4
    %int512_8679 = torch.constant.int 512
    %10885 = torch.prim.ListConstruct %int4_8678, %10258, %int512_8679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10886 = torch.aten.view %10873, %10885 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8680 = torch.constant.int 4
    %int512_8681 = torch.constant.int 512
    %10887 = torch.prim.ListConstruct %int4_8680, %10273, %int512_8681 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10888 = torch.aten.view %10874, %10887 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8682 = torch.constant.int 4
    %int512_8683 = torch.constant.int 512
    %10889 = torch.prim.ListConstruct %int4_8682, %10288, %int512_8683 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10890 = torch.aten.view %10875, %10889 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_8684 = torch.constant.int 4
    %int512_8685 = torch.constant.int 512
    %10891 = torch.prim.ListConstruct %int4_8684, %10303, %int512_8685 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10892 = torch.aten.view %10876, %10891 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %10892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_8686 = torch.constant.int 1
    %int0_8687 = torch.constant.int 0
    %10893 = torch.prim.ListConstruct %int1_8686, %int0_8687 : (!torch.int, !torch.int) -> !torch.list<int>
    %10894 = torch.aten.permute %328, %10893 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8688 = torch.constant.int 1
    %int0_8689 = torch.constant.int 0
    %10895 = torch.prim.ListConstruct %int1_8688, %int0_8689 : (!torch.int, !torch.int) -> !torch.list<int>
    %10896 = torch.aten.permute %329, %10895 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8690 = torch.constant.int 1
    %int0_8691 = torch.constant.int 0
    %10897 = torch.prim.ListConstruct %int1_8690, %int0_8691 : (!torch.int, !torch.int) -> !torch.list<int>
    %10898 = torch.aten.permute %330, %10897 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8692 = torch.constant.int 1
    %int0_8693 = torch.constant.int 0
    %10899 = torch.prim.ListConstruct %int1_8692, %int0_8693 : (!torch.int, !torch.int) -> !torch.list<int>
    %10900 = torch.aten.permute %331, %10899 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8694 = torch.constant.int 1
    %int0_8695 = torch.constant.int 0
    %10901 = torch.prim.ListConstruct %int1_8694, %int0_8695 : (!torch.int, !torch.int) -> !torch.list<int>
    %10902 = torch.aten.permute %332, %10901 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8696 = torch.constant.int 1
    %int0_8697 = torch.constant.int 0
    %10903 = torch.prim.ListConstruct %int1_8696, %int0_8697 : (!torch.int, !torch.int) -> !torch.list<int>
    %10904 = torch.aten.permute %333, %10903 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8698 = torch.constant.int 1
    %int0_8699 = torch.constant.int 0
    %10905 = torch.prim.ListConstruct %int1_8698, %int0_8699 : (!torch.int, !torch.int) -> !torch.list<int>
    %10906 = torch.aten.permute %334, %10905 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_8700 = torch.constant.int 1
    %int0_8701 = torch.constant.int 0
    %10907 = torch.prim.ListConstruct %int1_8700, %int0_8701 : (!torch.int, !torch.int) -> !torch.list<int>
    %10908 = torch.aten.permute %335, %10907 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_8702 = torch.constant.int 4
    %10909 = torch.aten.mul.int %int4_8702, %10198 : !torch.int, !torch.int -> !torch.int
    %int512_8703 = torch.constant.int 512
    %10910 = torch.prim.ListConstruct %10909, %int512_8703 : (!torch.int, !torch.int) -> !torch.list<int>
    %10911 = torch.aten.view %10878, %10910 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10911, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10912 = torch.aten.mm %10911, %10894 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10912, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8704 = torch.constant.int 4
    %int4096_8705 = torch.constant.int 4096
    %10913 = torch.prim.ListConstruct %int4_8704, %10198, %int4096_8705 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10914 = torch.aten.view %10912, %10913 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8706 = torch.constant.int 4
    %10915 = torch.aten.mul.int %int4_8706, %10213 : !torch.int, !torch.int -> !torch.int
    %int512_8707 = torch.constant.int 512
    %10916 = torch.prim.ListConstruct %10915, %int512_8707 : (!torch.int, !torch.int) -> !torch.list<int>
    %10917 = torch.aten.view %10880, %10916 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10917, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10918 = torch.aten.mm %10917, %10896 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10918, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8708 = torch.constant.int 4
    %int4096_8709 = torch.constant.int 4096
    %10919 = torch.prim.ListConstruct %int4_8708, %10213, %int4096_8709 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10920 = torch.aten.view %10918, %10919 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8710 = torch.constant.int 4
    %10921 = torch.aten.mul.int %int4_8710, %10228 : !torch.int, !torch.int -> !torch.int
    %int512_8711 = torch.constant.int 512
    %10922 = torch.prim.ListConstruct %10921, %int512_8711 : (!torch.int, !torch.int) -> !torch.list<int>
    %10923 = torch.aten.view %10882, %10922 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10923, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10924 = torch.aten.mm %10923, %10898 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10924, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8712 = torch.constant.int 4
    %int4096_8713 = torch.constant.int 4096
    %10925 = torch.prim.ListConstruct %int4_8712, %10228, %int4096_8713 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10926 = torch.aten.view %10924, %10925 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8714 = torch.constant.int 4
    %10927 = torch.aten.mul.int %int4_8714, %10243 : !torch.int, !torch.int -> !torch.int
    %int512_8715 = torch.constant.int 512
    %10928 = torch.prim.ListConstruct %10927, %int512_8715 : (!torch.int, !torch.int) -> !torch.list<int>
    %10929 = torch.aten.view %10884, %10928 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10929, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10930 = torch.aten.mm %10929, %10900 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10930, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8716 = torch.constant.int 4
    %int4096_8717 = torch.constant.int 4096
    %10931 = torch.prim.ListConstruct %int4_8716, %10243, %int4096_8717 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10932 = torch.aten.view %10930, %10931 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8718 = torch.constant.int 4
    %10933 = torch.aten.mul.int %int4_8718, %10258 : !torch.int, !torch.int -> !torch.int
    %int512_8719 = torch.constant.int 512
    %10934 = torch.prim.ListConstruct %10933, %int512_8719 : (!torch.int, !torch.int) -> !torch.list<int>
    %10935 = torch.aten.view %10886, %10934 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10935, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10936 = torch.aten.mm %10935, %10902 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10936, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8720 = torch.constant.int 4
    %int4096_8721 = torch.constant.int 4096
    %10937 = torch.prim.ListConstruct %int4_8720, %10258, %int4096_8721 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10938 = torch.aten.view %10936, %10937 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8722 = torch.constant.int 4
    %10939 = torch.aten.mul.int %int4_8722, %10273 : !torch.int, !torch.int -> !torch.int
    %int512_8723 = torch.constant.int 512
    %10940 = torch.prim.ListConstruct %10939, %int512_8723 : (!torch.int, !torch.int) -> !torch.list<int>
    %10941 = torch.aten.view %10888, %10940 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10941, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10942 = torch.aten.mm %10941, %10904 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10942, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8724 = torch.constant.int 4
    %int4096_8725 = torch.constant.int 4096
    %10943 = torch.prim.ListConstruct %int4_8724, %10273, %int4096_8725 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10944 = torch.aten.view %10942, %10943 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8726 = torch.constant.int 4
    %10945 = torch.aten.mul.int %int4_8726, %10288 : !torch.int, !torch.int -> !torch.int
    %int512_8727 = torch.constant.int 512
    %10946 = torch.prim.ListConstruct %10945, %int512_8727 : (!torch.int, !torch.int) -> !torch.list<int>
    %10947 = torch.aten.view %10890, %10946 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10947, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10948 = torch.aten.mm %10947, %10906 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10948, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8728 = torch.constant.int 4
    %int4096_8729 = torch.constant.int 4096
    %10949 = torch.prim.ListConstruct %int4_8728, %10288, %int4096_8729 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10950 = torch.aten.view %10948, %10949 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_8730 = torch.constant.int 4
    %10951 = torch.aten.mul.int %int4_8730, %10303 : !torch.int, !torch.int -> !torch.int
    %int512_8731 = torch.constant.int 512
    %10952 = torch.prim.ListConstruct %10951, %int512_8731 : (!torch.int, !torch.int) -> !torch.list<int>
    %10953 = torch.aten.view %10892, %10952 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %10953, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %10954 = torch.aten.mm %10953, %10908 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %10954, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_8732 = torch.constant.int 4
    %int4096_8733 = torch.constant.int 4096
    %10955 = torch.prim.ListConstruct %int4_8732, %10303, %int4096_8733 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %10956 = torch.aten.view %10954, %10955 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10957 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8734 = arith.constant 1 : index
    %dim_8735 = tensor.dim %10957, %c1_8734 : tensor<4x?x4096xf16>
    %10958 = flow.tensor.transfer %10957 : tensor<4x?x4096xf16>{%dim_8735} to #hal.device.promise<@__device_0>
    %10959 = torch_c.from_builtin_tensor %10958 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10960 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8736 = arith.constant 1 : index
    %dim_8737 = tensor.dim %10960, %c1_8736 : tensor<4x?x4096xf16>
    %10961 = flow.tensor.transfer %10960 : tensor<4x?x4096xf16>{%dim_8737} to #hal.device.promise<@__device_0>
    %10962 = torch_c.from_builtin_tensor %10961 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10963 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8738 = arith.constant 1 : index
    %dim_8739 = tensor.dim %10963, %c1_8738 : tensor<4x?x4096xf16>
    %10964 = flow.tensor.transfer %10963 : tensor<4x?x4096xf16>{%dim_8739} to #hal.device.promise<@__device_0>
    %10965 = torch_c.from_builtin_tensor %10964 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10966 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8740 = arith.constant 1 : index
    %dim_8741 = tensor.dim %10966, %c1_8740 : tensor<4x?x4096xf16>
    %10967 = flow.tensor.transfer %10966 : tensor<4x?x4096xf16>{%dim_8741} to #hal.device.promise<@__device_0>
    %10968 = torch_c.from_builtin_tensor %10967 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10969 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8742 = arith.constant 1 : index
    %dim_8743 = tensor.dim %10969, %c1_8742 : tensor<4x?x4096xf16>
    %10970 = flow.tensor.transfer %10969 : tensor<4x?x4096xf16>{%dim_8743} to #hal.device.promise<@__device_0>
    %10971 = torch_c.from_builtin_tensor %10970 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10972 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8744 = arith.constant 1 : index
    %dim_8745 = tensor.dim %10972, %c1_8744 : tensor<4x?x4096xf16>
    %10973 = flow.tensor.transfer %10972 : tensor<4x?x4096xf16>{%dim_8745} to #hal.device.promise<@__device_0>
    %10974 = torch_c.from_builtin_tensor %10973 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10975 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8746 = arith.constant 1 : index
    %dim_8747 = tensor.dim %10975, %c1_8746 : tensor<4x?x4096xf16>
    %10976 = flow.tensor.transfer %10975 : tensor<4x?x4096xf16>{%dim_8747} to #hal.device.promise<@__device_0>
    %10977 = torch_c.from_builtin_tensor %10976 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8748 = torch.constant.int 1
    %10978 = torch.aten.add.Tensor %10914, %10959, %int1_8748 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8749 = torch.constant.int 1
    %10979 = torch.aten.add.Tensor %10978, %10962, %int1_8749 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8750 = torch.constant.int 1
    %10980 = torch.aten.add.Tensor %10979, %10965, %int1_8750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8751 = torch.constant.int 1
    %10981 = torch.aten.add.Tensor %10980, %10968, %int1_8751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8752 = torch.constant.int 1
    %10982 = torch.aten.add.Tensor %10981, %10971, %int1_8752 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8753 = torch.constant.int 1
    %10983 = torch.aten.add.Tensor %10982, %10974, %int1_8753 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8754 = torch.constant.int 1
    %10984 = torch.aten.add.Tensor %10983, %10977, %int1_8754 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10985 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8755 = arith.constant 1 : index
    %dim_8756 = tensor.dim %10985, %c1_8755 : tensor<4x?x4096xf16>
    %10986 = flow.tensor.transfer %10985 : tensor<4x?x4096xf16>{%dim_8756} to #hal.device.promise<@__device_1>
    %10987 = torch_c.from_builtin_tensor %10986 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10988 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8757 = arith.constant 1 : index
    %dim_8758 = tensor.dim %10988, %c1_8757 : tensor<4x?x4096xf16>
    %10989 = flow.tensor.transfer %10988 : tensor<4x?x4096xf16>{%dim_8758} to #hal.device.promise<@__device_1>
    %10990 = torch_c.from_builtin_tensor %10989 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10991 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8759 = arith.constant 1 : index
    %dim_8760 = tensor.dim %10991, %c1_8759 : tensor<4x?x4096xf16>
    %10992 = flow.tensor.transfer %10991 : tensor<4x?x4096xf16>{%dim_8760} to #hal.device.promise<@__device_1>
    %10993 = torch_c.from_builtin_tensor %10992 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10994 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8761 = arith.constant 1 : index
    %dim_8762 = tensor.dim %10994, %c1_8761 : tensor<4x?x4096xf16>
    %10995 = flow.tensor.transfer %10994 : tensor<4x?x4096xf16>{%dim_8762} to #hal.device.promise<@__device_1>
    %10996 = torch_c.from_builtin_tensor %10995 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %10997 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8763 = arith.constant 1 : index
    %dim_8764 = tensor.dim %10997, %c1_8763 : tensor<4x?x4096xf16>
    %10998 = flow.tensor.transfer %10997 : tensor<4x?x4096xf16>{%dim_8764} to #hal.device.promise<@__device_1>
    %10999 = torch_c.from_builtin_tensor %10998 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %10999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11000 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8765 = arith.constant 1 : index
    %dim_8766 = tensor.dim %11000, %c1_8765 : tensor<4x?x4096xf16>
    %11001 = flow.tensor.transfer %11000 : tensor<4x?x4096xf16>{%dim_8766} to #hal.device.promise<@__device_1>
    %11002 = torch_c.from_builtin_tensor %11001 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11003 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8767 = arith.constant 1 : index
    %dim_8768 = tensor.dim %11003, %c1_8767 : tensor<4x?x4096xf16>
    %11004 = flow.tensor.transfer %11003 : tensor<4x?x4096xf16>{%dim_8768} to #hal.device.promise<@__device_1>
    %11005 = torch_c.from_builtin_tensor %11004 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8769 = torch.constant.int 1
    %11006 = torch.aten.add.Tensor %10987, %10920, %int1_8769 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8770 = torch.constant.int 1
    %11007 = torch.aten.add.Tensor %11006, %10990, %int1_8770 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8771 = torch.constant.int 1
    %11008 = torch.aten.add.Tensor %11007, %10993, %int1_8771 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8772 = torch.constant.int 1
    %11009 = torch.aten.add.Tensor %11008, %10996, %int1_8772 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8773 = torch.constant.int 1
    %11010 = torch.aten.add.Tensor %11009, %10999, %int1_8773 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8774 = torch.constant.int 1
    %11011 = torch.aten.add.Tensor %11010, %11002, %int1_8774 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8775 = torch.constant.int 1
    %11012 = torch.aten.add.Tensor %11011, %11005, %int1_8775 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11013 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8776 = arith.constant 1 : index
    %dim_8777 = tensor.dim %11013, %c1_8776 : tensor<4x?x4096xf16>
    %11014 = flow.tensor.transfer %11013 : tensor<4x?x4096xf16>{%dim_8777} to #hal.device.promise<@__device_2>
    %11015 = torch_c.from_builtin_tensor %11014 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11016 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8778 = arith.constant 1 : index
    %dim_8779 = tensor.dim %11016, %c1_8778 : tensor<4x?x4096xf16>
    %11017 = flow.tensor.transfer %11016 : tensor<4x?x4096xf16>{%dim_8779} to #hal.device.promise<@__device_2>
    %11018 = torch_c.from_builtin_tensor %11017 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11019 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8780 = arith.constant 1 : index
    %dim_8781 = tensor.dim %11019, %c1_8780 : tensor<4x?x4096xf16>
    %11020 = flow.tensor.transfer %11019 : tensor<4x?x4096xf16>{%dim_8781} to #hal.device.promise<@__device_2>
    %11021 = torch_c.from_builtin_tensor %11020 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11022 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8782 = arith.constant 1 : index
    %dim_8783 = tensor.dim %11022, %c1_8782 : tensor<4x?x4096xf16>
    %11023 = flow.tensor.transfer %11022 : tensor<4x?x4096xf16>{%dim_8783} to #hal.device.promise<@__device_2>
    %11024 = torch_c.from_builtin_tensor %11023 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11025 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8784 = arith.constant 1 : index
    %dim_8785 = tensor.dim %11025, %c1_8784 : tensor<4x?x4096xf16>
    %11026 = flow.tensor.transfer %11025 : tensor<4x?x4096xf16>{%dim_8785} to #hal.device.promise<@__device_2>
    %11027 = torch_c.from_builtin_tensor %11026 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11028 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8786 = arith.constant 1 : index
    %dim_8787 = tensor.dim %11028, %c1_8786 : tensor<4x?x4096xf16>
    %11029 = flow.tensor.transfer %11028 : tensor<4x?x4096xf16>{%dim_8787} to #hal.device.promise<@__device_2>
    %11030 = torch_c.from_builtin_tensor %11029 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11031 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8788 = arith.constant 1 : index
    %dim_8789 = tensor.dim %11031, %c1_8788 : tensor<4x?x4096xf16>
    %11032 = flow.tensor.transfer %11031 : tensor<4x?x4096xf16>{%dim_8789} to #hal.device.promise<@__device_2>
    %11033 = torch_c.from_builtin_tensor %11032 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8790 = torch.constant.int 1
    %11034 = torch.aten.add.Tensor %11015, %11018, %int1_8790 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8791 = torch.constant.int 1
    %11035 = torch.aten.add.Tensor %11034, %10926, %int1_8791 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8792 = torch.constant.int 1
    %11036 = torch.aten.add.Tensor %11035, %11021, %int1_8792 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8793 = torch.constant.int 1
    %11037 = torch.aten.add.Tensor %11036, %11024, %int1_8793 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8794 = torch.constant.int 1
    %11038 = torch.aten.add.Tensor %11037, %11027, %int1_8794 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8795 = torch.constant.int 1
    %11039 = torch.aten.add.Tensor %11038, %11030, %int1_8795 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8796 = torch.constant.int 1
    %11040 = torch.aten.add.Tensor %11039, %11033, %int1_8796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11041 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8797 = arith.constant 1 : index
    %dim_8798 = tensor.dim %11041, %c1_8797 : tensor<4x?x4096xf16>
    %11042 = flow.tensor.transfer %11041 : tensor<4x?x4096xf16>{%dim_8798} to #hal.device.promise<@__device_3>
    %11043 = torch_c.from_builtin_tensor %11042 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11044 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8799 = arith.constant 1 : index
    %dim_8800 = tensor.dim %11044, %c1_8799 : tensor<4x?x4096xf16>
    %11045 = flow.tensor.transfer %11044 : tensor<4x?x4096xf16>{%dim_8800} to #hal.device.promise<@__device_3>
    %11046 = torch_c.from_builtin_tensor %11045 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11047 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8801 = arith.constant 1 : index
    %dim_8802 = tensor.dim %11047, %c1_8801 : tensor<4x?x4096xf16>
    %11048 = flow.tensor.transfer %11047 : tensor<4x?x4096xf16>{%dim_8802} to #hal.device.promise<@__device_3>
    %11049 = torch_c.from_builtin_tensor %11048 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11050 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8803 = arith.constant 1 : index
    %dim_8804 = tensor.dim %11050, %c1_8803 : tensor<4x?x4096xf16>
    %11051 = flow.tensor.transfer %11050 : tensor<4x?x4096xf16>{%dim_8804} to #hal.device.promise<@__device_3>
    %11052 = torch_c.from_builtin_tensor %11051 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11053 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8805 = arith.constant 1 : index
    %dim_8806 = tensor.dim %11053, %c1_8805 : tensor<4x?x4096xf16>
    %11054 = flow.tensor.transfer %11053 : tensor<4x?x4096xf16>{%dim_8806} to #hal.device.promise<@__device_3>
    %11055 = torch_c.from_builtin_tensor %11054 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11056 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8807 = arith.constant 1 : index
    %dim_8808 = tensor.dim %11056, %c1_8807 : tensor<4x?x4096xf16>
    %11057 = flow.tensor.transfer %11056 : tensor<4x?x4096xf16>{%dim_8808} to #hal.device.promise<@__device_3>
    %11058 = torch_c.from_builtin_tensor %11057 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11059 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8809 = arith.constant 1 : index
    %dim_8810 = tensor.dim %11059, %c1_8809 : tensor<4x?x4096xf16>
    %11060 = flow.tensor.transfer %11059 : tensor<4x?x4096xf16>{%dim_8810} to #hal.device.promise<@__device_3>
    %11061 = torch_c.from_builtin_tensor %11060 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8811 = torch.constant.int 1
    %11062 = torch.aten.add.Tensor %11043, %11046, %int1_8811 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8812 = torch.constant.int 1
    %11063 = torch.aten.add.Tensor %11062, %11049, %int1_8812 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8813 = torch.constant.int 1
    %11064 = torch.aten.add.Tensor %11063, %10932, %int1_8813 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8814 = torch.constant.int 1
    %11065 = torch.aten.add.Tensor %11064, %11052, %int1_8814 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8815 = torch.constant.int 1
    %11066 = torch.aten.add.Tensor %11065, %11055, %int1_8815 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8816 = torch.constant.int 1
    %11067 = torch.aten.add.Tensor %11066, %11058, %int1_8816 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8817 = torch.constant.int 1
    %11068 = torch.aten.add.Tensor %11067, %11061, %int1_8817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11069 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8818 = arith.constant 1 : index
    %dim_8819 = tensor.dim %11069, %c1_8818 : tensor<4x?x4096xf16>
    %11070 = flow.tensor.transfer %11069 : tensor<4x?x4096xf16>{%dim_8819} to #hal.device.promise<@__device_4>
    %11071 = torch_c.from_builtin_tensor %11070 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11072 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8820 = arith.constant 1 : index
    %dim_8821 = tensor.dim %11072, %c1_8820 : tensor<4x?x4096xf16>
    %11073 = flow.tensor.transfer %11072 : tensor<4x?x4096xf16>{%dim_8821} to #hal.device.promise<@__device_4>
    %11074 = torch_c.from_builtin_tensor %11073 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11075 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8822 = arith.constant 1 : index
    %dim_8823 = tensor.dim %11075, %c1_8822 : tensor<4x?x4096xf16>
    %11076 = flow.tensor.transfer %11075 : tensor<4x?x4096xf16>{%dim_8823} to #hal.device.promise<@__device_4>
    %11077 = torch_c.from_builtin_tensor %11076 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11078 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8824 = arith.constant 1 : index
    %dim_8825 = tensor.dim %11078, %c1_8824 : tensor<4x?x4096xf16>
    %11079 = flow.tensor.transfer %11078 : tensor<4x?x4096xf16>{%dim_8825} to #hal.device.promise<@__device_4>
    %11080 = torch_c.from_builtin_tensor %11079 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11081 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8826 = arith.constant 1 : index
    %dim_8827 = tensor.dim %11081, %c1_8826 : tensor<4x?x4096xf16>
    %11082 = flow.tensor.transfer %11081 : tensor<4x?x4096xf16>{%dim_8827} to #hal.device.promise<@__device_4>
    %11083 = torch_c.from_builtin_tensor %11082 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11084 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8828 = arith.constant 1 : index
    %dim_8829 = tensor.dim %11084, %c1_8828 : tensor<4x?x4096xf16>
    %11085 = flow.tensor.transfer %11084 : tensor<4x?x4096xf16>{%dim_8829} to #hal.device.promise<@__device_4>
    %11086 = torch_c.from_builtin_tensor %11085 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11087 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8830 = arith.constant 1 : index
    %dim_8831 = tensor.dim %11087, %c1_8830 : tensor<4x?x4096xf16>
    %11088 = flow.tensor.transfer %11087 : tensor<4x?x4096xf16>{%dim_8831} to #hal.device.promise<@__device_4>
    %11089 = torch_c.from_builtin_tensor %11088 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8832 = torch.constant.int 1
    %11090 = torch.aten.add.Tensor %11071, %11074, %int1_8832 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8833 = torch.constant.int 1
    %11091 = torch.aten.add.Tensor %11090, %11077, %int1_8833 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8834 = torch.constant.int 1
    %11092 = torch.aten.add.Tensor %11091, %11080, %int1_8834 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8835 = torch.constant.int 1
    %11093 = torch.aten.add.Tensor %11092, %10938, %int1_8835 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8836 = torch.constant.int 1
    %11094 = torch.aten.add.Tensor %11093, %11083, %int1_8836 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8837 = torch.constant.int 1
    %11095 = torch.aten.add.Tensor %11094, %11086, %int1_8837 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8838 = torch.constant.int 1
    %11096 = torch.aten.add.Tensor %11095, %11089, %int1_8838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11097 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8839 = arith.constant 1 : index
    %dim_8840 = tensor.dim %11097, %c1_8839 : tensor<4x?x4096xf16>
    %11098 = flow.tensor.transfer %11097 : tensor<4x?x4096xf16>{%dim_8840} to #hal.device.promise<@__device_5>
    %11099 = torch_c.from_builtin_tensor %11098 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11100 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8841 = arith.constant 1 : index
    %dim_8842 = tensor.dim %11100, %c1_8841 : tensor<4x?x4096xf16>
    %11101 = flow.tensor.transfer %11100 : tensor<4x?x4096xf16>{%dim_8842} to #hal.device.promise<@__device_5>
    %11102 = torch_c.from_builtin_tensor %11101 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11103 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8843 = arith.constant 1 : index
    %dim_8844 = tensor.dim %11103, %c1_8843 : tensor<4x?x4096xf16>
    %11104 = flow.tensor.transfer %11103 : tensor<4x?x4096xf16>{%dim_8844} to #hal.device.promise<@__device_5>
    %11105 = torch_c.from_builtin_tensor %11104 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11106 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8845 = arith.constant 1 : index
    %dim_8846 = tensor.dim %11106, %c1_8845 : tensor<4x?x4096xf16>
    %11107 = flow.tensor.transfer %11106 : tensor<4x?x4096xf16>{%dim_8846} to #hal.device.promise<@__device_5>
    %11108 = torch_c.from_builtin_tensor %11107 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11109 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8847 = arith.constant 1 : index
    %dim_8848 = tensor.dim %11109, %c1_8847 : tensor<4x?x4096xf16>
    %11110 = flow.tensor.transfer %11109 : tensor<4x?x4096xf16>{%dim_8848} to #hal.device.promise<@__device_5>
    %11111 = torch_c.from_builtin_tensor %11110 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11112 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8849 = arith.constant 1 : index
    %dim_8850 = tensor.dim %11112, %c1_8849 : tensor<4x?x4096xf16>
    %11113 = flow.tensor.transfer %11112 : tensor<4x?x4096xf16>{%dim_8850} to #hal.device.promise<@__device_5>
    %11114 = torch_c.from_builtin_tensor %11113 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11115 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8851 = arith.constant 1 : index
    %dim_8852 = tensor.dim %11115, %c1_8851 : tensor<4x?x4096xf16>
    %11116 = flow.tensor.transfer %11115 : tensor<4x?x4096xf16>{%dim_8852} to #hal.device.promise<@__device_5>
    %11117 = torch_c.from_builtin_tensor %11116 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8853 = torch.constant.int 1
    %11118 = torch.aten.add.Tensor %11099, %11102, %int1_8853 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8854 = torch.constant.int 1
    %11119 = torch.aten.add.Tensor %11118, %11105, %int1_8854 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8855 = torch.constant.int 1
    %11120 = torch.aten.add.Tensor %11119, %11108, %int1_8855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8856 = torch.constant.int 1
    %11121 = torch.aten.add.Tensor %11120, %11111, %int1_8856 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8857 = torch.constant.int 1
    %11122 = torch.aten.add.Tensor %11121, %10944, %int1_8857 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8858 = torch.constant.int 1
    %11123 = torch.aten.add.Tensor %11122, %11114, %int1_8858 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8859 = torch.constant.int 1
    %11124 = torch.aten.add.Tensor %11123, %11117, %int1_8859 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11125 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8860 = arith.constant 1 : index
    %dim_8861 = tensor.dim %11125, %c1_8860 : tensor<4x?x4096xf16>
    %11126 = flow.tensor.transfer %11125 : tensor<4x?x4096xf16>{%dim_8861} to #hal.device.promise<@__device_6>
    %11127 = torch_c.from_builtin_tensor %11126 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11128 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8862 = arith.constant 1 : index
    %dim_8863 = tensor.dim %11128, %c1_8862 : tensor<4x?x4096xf16>
    %11129 = flow.tensor.transfer %11128 : tensor<4x?x4096xf16>{%dim_8863} to #hal.device.promise<@__device_6>
    %11130 = torch_c.from_builtin_tensor %11129 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11131 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8864 = arith.constant 1 : index
    %dim_8865 = tensor.dim %11131, %c1_8864 : tensor<4x?x4096xf16>
    %11132 = flow.tensor.transfer %11131 : tensor<4x?x4096xf16>{%dim_8865} to #hal.device.promise<@__device_6>
    %11133 = torch_c.from_builtin_tensor %11132 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11134 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8866 = arith.constant 1 : index
    %dim_8867 = tensor.dim %11134, %c1_8866 : tensor<4x?x4096xf16>
    %11135 = flow.tensor.transfer %11134 : tensor<4x?x4096xf16>{%dim_8867} to #hal.device.promise<@__device_6>
    %11136 = torch_c.from_builtin_tensor %11135 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11137 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8868 = arith.constant 1 : index
    %dim_8869 = tensor.dim %11137, %c1_8868 : tensor<4x?x4096xf16>
    %11138 = flow.tensor.transfer %11137 : tensor<4x?x4096xf16>{%dim_8869} to #hal.device.promise<@__device_6>
    %11139 = torch_c.from_builtin_tensor %11138 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11140 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8870 = arith.constant 1 : index
    %dim_8871 = tensor.dim %11140, %c1_8870 : tensor<4x?x4096xf16>
    %11141 = flow.tensor.transfer %11140 : tensor<4x?x4096xf16>{%dim_8871} to #hal.device.promise<@__device_6>
    %11142 = torch_c.from_builtin_tensor %11141 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11143 = torch_c.to_builtin_tensor %10956 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8872 = arith.constant 1 : index
    %dim_8873 = tensor.dim %11143, %c1_8872 : tensor<4x?x4096xf16>
    %11144 = flow.tensor.transfer %11143 : tensor<4x?x4096xf16>{%dim_8873} to #hal.device.promise<@__device_6>
    %11145 = torch_c.from_builtin_tensor %11144 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8874 = torch.constant.int 1
    %11146 = torch.aten.add.Tensor %11127, %11130, %int1_8874 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8875 = torch.constant.int 1
    %11147 = torch.aten.add.Tensor %11146, %11133, %int1_8875 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8876 = torch.constant.int 1
    %11148 = torch.aten.add.Tensor %11147, %11136, %int1_8876 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8877 = torch.constant.int 1
    %11149 = torch.aten.add.Tensor %11148, %11139, %int1_8877 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8878 = torch.constant.int 1
    %11150 = torch.aten.add.Tensor %11149, %11142, %int1_8878 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8879 = torch.constant.int 1
    %11151 = torch.aten.add.Tensor %11150, %10950, %int1_8879 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8880 = torch.constant.int 1
    %11152 = torch.aten.add.Tensor %11151, %11145, %int1_8880 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11153 = torch_c.to_builtin_tensor %10914 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8881 = arith.constant 1 : index
    %dim_8882 = tensor.dim %11153, %c1_8881 : tensor<4x?x4096xf16>
    %11154 = flow.tensor.transfer %11153 : tensor<4x?x4096xf16>{%dim_8882} to #hal.device.promise<@__device_7>
    %11155 = torch_c.from_builtin_tensor %11154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11156 = torch_c.to_builtin_tensor %10920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8883 = arith.constant 1 : index
    %dim_8884 = tensor.dim %11156, %c1_8883 : tensor<4x?x4096xf16>
    %11157 = flow.tensor.transfer %11156 : tensor<4x?x4096xf16>{%dim_8884} to #hal.device.promise<@__device_7>
    %11158 = torch_c.from_builtin_tensor %11157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11159 = torch_c.to_builtin_tensor %10926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8885 = arith.constant 1 : index
    %dim_8886 = tensor.dim %11159, %c1_8885 : tensor<4x?x4096xf16>
    %11160 = flow.tensor.transfer %11159 : tensor<4x?x4096xf16>{%dim_8886} to #hal.device.promise<@__device_7>
    %11161 = torch_c.from_builtin_tensor %11160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11162 = torch_c.to_builtin_tensor %10932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8887 = arith.constant 1 : index
    %dim_8888 = tensor.dim %11162, %c1_8887 : tensor<4x?x4096xf16>
    %11163 = flow.tensor.transfer %11162 : tensor<4x?x4096xf16>{%dim_8888} to #hal.device.promise<@__device_7>
    %11164 = torch_c.from_builtin_tensor %11163 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11165 = torch_c.to_builtin_tensor %10938 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8889 = arith.constant 1 : index
    %dim_8890 = tensor.dim %11165, %c1_8889 : tensor<4x?x4096xf16>
    %11166 = flow.tensor.transfer %11165 : tensor<4x?x4096xf16>{%dim_8890} to #hal.device.promise<@__device_7>
    %11167 = torch_c.from_builtin_tensor %11166 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11168 = torch_c.to_builtin_tensor %10944 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8891 = arith.constant 1 : index
    %dim_8892 = tensor.dim %11168, %c1_8891 : tensor<4x?x4096xf16>
    %11169 = flow.tensor.transfer %11168 : tensor<4x?x4096xf16>{%dim_8892} to #hal.device.promise<@__device_7>
    %11170 = torch_c.from_builtin_tensor %11169 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11171 = torch_c.to_builtin_tensor %10950 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_8893 = arith.constant 1 : index
    %dim_8894 = tensor.dim %11171, %c1_8893 : tensor<4x?x4096xf16>
    %11172 = flow.tensor.transfer %11171 : tensor<4x?x4096xf16>{%dim_8894} to #hal.device.promise<@__device_7>
    %11173 = torch_c.from_builtin_tensor %11172 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8895 = torch.constant.int 1
    %11174 = torch.aten.add.Tensor %11155, %11158, %int1_8895 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8896 = torch.constant.int 1
    %11175 = torch.aten.add.Tensor %11174, %11161, %int1_8896 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8897 = torch.constant.int 1
    %11176 = torch.aten.add.Tensor %11175, %11164, %int1_8897 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8898 = torch.constant.int 1
    %11177 = torch.aten.add.Tensor %11176, %11167, %int1_8898 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8899 = torch.constant.int 1
    %11178 = torch.aten.add.Tensor %11177, %11170, %int1_8899 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8900 = torch.constant.int 1
    %11179 = torch.aten.add.Tensor %11178, %11173, %int1_8900 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8901 = torch.constant.int 1
    %11180 = torch.aten.add.Tensor %11179, %10956, %int1_8901 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8902 = torch.constant.int 1
    %11181 = torch.aten.add.Tensor %9840, %10984, %int1_8902 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8903 = torch.constant.int 1
    %11182 = torch.aten.add.Tensor %9841, %11012, %int1_8903 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8904 = torch.constant.int 1
    %11183 = torch.aten.add.Tensor %9842, %11040, %int1_8904 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8905 = torch.constant.int 1
    %11184 = torch.aten.add.Tensor %9843, %11068, %int1_8905 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8906 = torch.constant.int 1
    %11185 = torch.aten.add.Tensor %9844, %11096, %int1_8906 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8907 = torch.constant.int 1
    %11186 = torch.aten.add.Tensor %9845, %11124, %int1_8907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8908 = torch.constant.int 1
    %11187 = torch.aten.add.Tensor %9846, %11152, %int1_8908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8909 = torch.constant.int 1
    %11188 = torch.aten.add.Tensor %9847, %11180, %int1_8909 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_8910 = torch.constant.int 6
    %11189 = torch.prims.convert_element_type %11181, %int6_8910 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8911 = torch.constant.int 6
    %11190 = torch.prims.convert_element_type %11182, %int6_8911 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8912 = torch.constant.int 6
    %11191 = torch.prims.convert_element_type %11183, %int6_8912 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8913 = torch.constant.int 6
    %11192 = torch.prims.convert_element_type %11184, %int6_8913 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8914 = torch.constant.int 6
    %11193 = torch.prims.convert_element_type %11185, %int6_8914 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8915 = torch.constant.int 6
    %11194 = torch.prims.convert_element_type %11186, %int6_8915 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8916 = torch.constant.int 6
    %11195 = torch.prims.convert_element_type %11187, %int6_8916 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_8917 = torch.constant.int 6
    %11196 = torch.prims.convert_element_type %11188, %int6_8917 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8918 = torch.constant.int 2
    %11197 = torch.aten.pow.Tensor_Scalar %11189, %int2_8918 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8919 = torch.constant.int 2
    %11198 = torch.aten.pow.Tensor_Scalar %11190, %int2_8919 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8920 = torch.constant.int 2
    %11199 = torch.aten.pow.Tensor_Scalar %11191, %int2_8920 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8921 = torch.constant.int 2
    %11200 = torch.aten.pow.Tensor_Scalar %11192, %int2_8921 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8922 = torch.constant.int 2
    %11201 = torch.aten.pow.Tensor_Scalar %11193, %int2_8922 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8923 = torch.constant.int 2
    %11202 = torch.aten.pow.Tensor_Scalar %11194, %int2_8923 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8924 = torch.constant.int 2
    %11203 = torch.aten.pow.Tensor_Scalar %11195, %int2_8924 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_8925 = torch.constant.int 2
    %11204 = torch.aten.pow.Tensor_Scalar %11196, %int2_8925 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_8926 = torch.constant.int -1
    %11205 = torch.prim.ListConstruct %int-1_8926 : (!torch.int) -> !torch.list<int>
    %true_8927 = torch.constant.bool true
    %none_8928 = torch.constant.none
    %11206 = torch.aten.mean.dim %11197, %11205, %true_8927, %none_8928 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8929 = torch.constant.int -1
    %11207 = torch.prim.ListConstruct %int-1_8929 : (!torch.int) -> !torch.list<int>
    %true_8930 = torch.constant.bool true
    %none_8931 = torch.constant.none
    %11208 = torch.aten.mean.dim %11198, %11207, %true_8930, %none_8931 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8932 = torch.constant.int -1
    %11209 = torch.prim.ListConstruct %int-1_8932 : (!torch.int) -> !torch.list<int>
    %true_8933 = torch.constant.bool true
    %none_8934 = torch.constant.none
    %11210 = torch.aten.mean.dim %11199, %11209, %true_8933, %none_8934 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8935 = torch.constant.int -1
    %11211 = torch.prim.ListConstruct %int-1_8935 : (!torch.int) -> !torch.list<int>
    %true_8936 = torch.constant.bool true
    %none_8937 = torch.constant.none
    %11212 = torch.aten.mean.dim %11200, %11211, %true_8936, %none_8937 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8938 = torch.constant.int -1
    %11213 = torch.prim.ListConstruct %int-1_8938 : (!torch.int) -> !torch.list<int>
    %true_8939 = torch.constant.bool true
    %none_8940 = torch.constant.none
    %11214 = torch.aten.mean.dim %11201, %11213, %true_8939, %none_8940 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8941 = torch.constant.int -1
    %11215 = torch.prim.ListConstruct %int-1_8941 : (!torch.int) -> !torch.list<int>
    %true_8942 = torch.constant.bool true
    %none_8943 = torch.constant.none
    %11216 = torch.aten.mean.dim %11202, %11215, %true_8942, %none_8943 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8944 = torch.constant.int -1
    %11217 = torch.prim.ListConstruct %int-1_8944 : (!torch.int) -> !torch.list<int>
    %true_8945 = torch.constant.bool true
    %none_8946 = torch.constant.none
    %11218 = torch.aten.mean.dim %11203, %11217, %true_8945, %none_8946 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_8947 = torch.constant.int -1
    %11219 = torch.prim.ListConstruct %int-1_8947 : (!torch.int) -> !torch.list<int>
    %true_8948 = torch.constant.bool true
    %none_8949 = torch.constant.none
    %11220 = torch.aten.mean.dim %11204, %11219, %true_8948, %none_8949 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8950 = torch.constant.float 9.9999997473787516E-6
    %int1_8951 = torch.constant.int 1
    %11221 = torch.aten.add.Scalar %11206, %float9.999990e-06_8950, %int1_8951 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8952 = torch.constant.float 9.9999997473787516E-6
    %int1_8953 = torch.constant.int 1
    %11222 = torch.aten.add.Scalar %11208, %float9.999990e-06_8952, %int1_8953 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8954 = torch.constant.float 9.9999997473787516E-6
    %int1_8955 = torch.constant.int 1
    %11223 = torch.aten.add.Scalar %11210, %float9.999990e-06_8954, %int1_8955 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8956 = torch.constant.float 9.9999997473787516E-6
    %int1_8957 = torch.constant.int 1
    %11224 = torch.aten.add.Scalar %11212, %float9.999990e-06_8956, %int1_8957 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8958 = torch.constant.float 9.9999997473787516E-6
    %int1_8959 = torch.constant.int 1
    %11225 = torch.aten.add.Scalar %11214, %float9.999990e-06_8958, %int1_8959 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8960 = torch.constant.float 9.9999997473787516E-6
    %int1_8961 = torch.constant.int 1
    %11226 = torch.aten.add.Scalar %11216, %float9.999990e-06_8960, %int1_8961 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8962 = torch.constant.float 9.9999997473787516E-6
    %int1_8963 = torch.constant.int 1
    %11227 = torch.aten.add.Scalar %11218, %float9.999990e-06_8962, %int1_8963 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_8964 = torch.constant.float 9.9999997473787516E-6
    %int1_8965 = torch.constant.int 1
    %11228 = torch.aten.add.Scalar %11220, %float9.999990e-06_8964, %int1_8965 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11229 = torch.aten.rsqrt %11221 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11230 = torch.aten.rsqrt %11222 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11231 = torch.aten.rsqrt %11223 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11232 = torch.aten.rsqrt %11224 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11233 = torch.aten.rsqrt %11225 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11234 = torch.aten.rsqrt %11226 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11235 = torch.aten.rsqrt %11227 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11236 = torch.aten.rsqrt %11228 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11237 = torch.aten.mul.Tensor %11189, %11229 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11238 = torch.aten.mul.Tensor %11190, %11230 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11239 = torch.aten.mul.Tensor %11191, %11231 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11240 = torch.aten.mul.Tensor %11192, %11232 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11241 = torch.aten.mul.Tensor %11193, %11233 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11242 = torch.aten.mul.Tensor %11194, %11234 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11243 = torch.aten.mul.Tensor %11195, %11235 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11244 = torch.aten.mul.Tensor %11196, %11236 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11245 = torch.aten.mul.Tensor %336, %11237 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11246 = torch.aten.mul.Tensor %337, %11238 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11247 = torch.aten.mul.Tensor %338, %11239 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11248 = torch.aten.mul.Tensor %339, %11240 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11249 = torch.aten.mul.Tensor %340, %11241 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11250 = torch.aten.mul.Tensor %341, %11242 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11251 = torch.aten.mul.Tensor %342, %11243 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11252 = torch.aten.mul.Tensor %343, %11244 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_8966 = torch.constant.int 5
    %11253 = torch.prims.convert_element_type %11245, %int5_8966 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8967 = torch.constant.int 5
    %11254 = torch.prims.convert_element_type %11246, %int5_8967 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8968 = torch.constant.int 5
    %11255 = torch.prims.convert_element_type %11247, %int5_8968 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8969 = torch.constant.int 5
    %11256 = torch.prims.convert_element_type %11248, %int5_8969 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8970 = torch.constant.int 5
    %11257 = torch.prims.convert_element_type %11249, %int5_8970 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8971 = torch.constant.int 5
    %11258 = torch.prims.convert_element_type %11250, %int5_8971 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8972 = torch.constant.int 5
    %11259 = torch.prims.convert_element_type %11251, %int5_8972 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_8973 = torch.constant.int 5
    %11260 = torch.prims.convert_element_type %11252, %int5_8973 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_8974 = torch.constant.int 1
    %int0_8975 = torch.constant.int 0
    %11261 = torch.prim.ListConstruct %int1_8974, %int0_8975 : (!torch.int, !torch.int) -> !torch.list<int>
    %11262 = torch.aten.permute %344, %11261 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8976 = torch.constant.int 1
    %int0_8977 = torch.constant.int 0
    %11263 = torch.prim.ListConstruct %int1_8976, %int0_8977 : (!torch.int, !torch.int) -> !torch.list<int>
    %11264 = torch.aten.permute %345, %11263 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8978 = torch.constant.int 1
    %int0_8979 = torch.constant.int 0
    %11265 = torch.prim.ListConstruct %int1_8978, %int0_8979 : (!torch.int, !torch.int) -> !torch.list<int>
    %11266 = torch.aten.permute %346, %11265 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8980 = torch.constant.int 1
    %int0_8981 = torch.constant.int 0
    %11267 = torch.prim.ListConstruct %int1_8980, %int0_8981 : (!torch.int, !torch.int) -> !torch.list<int>
    %11268 = torch.aten.permute %347, %11267 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8982 = torch.constant.int 1
    %int0_8983 = torch.constant.int 0
    %11269 = torch.prim.ListConstruct %int1_8982, %int0_8983 : (!torch.int, !torch.int) -> !torch.list<int>
    %11270 = torch.aten.permute %348, %11269 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8984 = torch.constant.int 1
    %int0_8985 = torch.constant.int 0
    %11271 = torch.prim.ListConstruct %int1_8984, %int0_8985 : (!torch.int, !torch.int) -> !torch.list<int>
    %11272 = torch.aten.permute %349, %11271 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8986 = torch.constant.int 1
    %int0_8987 = torch.constant.int 0
    %11273 = torch.prim.ListConstruct %int1_8986, %int0_8987 : (!torch.int, !torch.int) -> !torch.list<int>
    %11274 = torch.aten.permute %350, %11273 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_8988 = torch.constant.int 1
    %int0_8989 = torch.constant.int 0
    %11275 = torch.prim.ListConstruct %int1_8988, %int0_8989 : (!torch.int, !torch.int) -> !torch.list<int>
    %11276 = torch.aten.permute %351, %11275 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_8990 = torch.constant.int 4
    %11277 = torch.aten.mul.int %int4_8990, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_8991 = torch.constant.int 4096
    %11278 = torch.prim.ListConstruct %11277, %int4096_8991 : (!torch.int, !torch.int) -> !torch.list<int>
    %11279 = torch.aten.view %11253, %11278 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11279, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11280 = torch.aten.mm %11279, %11262 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11280, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_8992 = torch.constant.int 4
    %int1792_8993 = torch.constant.int 1792
    %11281 = torch.prim.ListConstruct %int4_8992, %2482, %int1792_8993 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11282 = torch.aten.view %11280, %11281 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_8994 = torch.constant.int 4
    %11283 = torch.aten.mul.int %int4_8994, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_8995 = torch.constant.int 4096
    %11284 = torch.prim.ListConstruct %11283, %int4096_8995 : (!torch.int, !torch.int) -> !torch.list<int>
    %11285 = torch.aten.view %11254, %11284 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11285, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11286 = torch.aten.mm %11285, %11264 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11286, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_8996 = torch.constant.int 4
    %int1792_8997 = torch.constant.int 1792
    %11287 = torch.prim.ListConstruct %int4_8996, %2482, %int1792_8997 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11288 = torch.aten.view %11286, %11287 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_8998 = torch.constant.int 4
    %11289 = torch.aten.mul.int %int4_8998, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_8999 = torch.constant.int 4096
    %11290 = torch.prim.ListConstruct %11289, %int4096_8999 : (!torch.int, !torch.int) -> !torch.list<int>
    %11291 = torch.aten.view %11255, %11290 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11291, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11292 = torch.aten.mm %11291, %11266 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11292, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9000 = torch.constant.int 4
    %int1792_9001 = torch.constant.int 1792
    %11293 = torch.prim.ListConstruct %int4_9000, %2482, %int1792_9001 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11294 = torch.aten.view %11292, %11293 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9002 = torch.constant.int 4
    %11295 = torch.aten.mul.int %int4_9002, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9003 = torch.constant.int 4096
    %11296 = torch.prim.ListConstruct %11295, %int4096_9003 : (!torch.int, !torch.int) -> !torch.list<int>
    %11297 = torch.aten.view %11256, %11296 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11297, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11298 = torch.aten.mm %11297, %11268 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11298, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9004 = torch.constant.int 4
    %int1792_9005 = torch.constant.int 1792
    %11299 = torch.prim.ListConstruct %int4_9004, %2482, %int1792_9005 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11300 = torch.aten.view %11298, %11299 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9006 = torch.constant.int 4
    %11301 = torch.aten.mul.int %int4_9006, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9007 = torch.constant.int 4096
    %11302 = torch.prim.ListConstruct %11301, %int4096_9007 : (!torch.int, !torch.int) -> !torch.list<int>
    %11303 = torch.aten.view %11257, %11302 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11303, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11304 = torch.aten.mm %11303, %11270 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11304, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9008 = torch.constant.int 4
    %int1792_9009 = torch.constant.int 1792
    %11305 = torch.prim.ListConstruct %int4_9008, %2482, %int1792_9009 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11306 = torch.aten.view %11304, %11305 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9010 = torch.constant.int 4
    %11307 = torch.aten.mul.int %int4_9010, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9011 = torch.constant.int 4096
    %11308 = torch.prim.ListConstruct %11307, %int4096_9011 : (!torch.int, !torch.int) -> !torch.list<int>
    %11309 = torch.aten.view %11258, %11308 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11309, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11310 = torch.aten.mm %11309, %11272 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11310, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9012 = torch.constant.int 4
    %int1792_9013 = torch.constant.int 1792
    %11311 = torch.prim.ListConstruct %int4_9012, %2482, %int1792_9013 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11312 = torch.aten.view %11310, %11311 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9014 = torch.constant.int 4
    %11313 = torch.aten.mul.int %int4_9014, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9015 = torch.constant.int 4096
    %11314 = torch.prim.ListConstruct %11313, %int4096_9015 : (!torch.int, !torch.int) -> !torch.list<int>
    %11315 = torch.aten.view %11259, %11314 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11315, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11316 = torch.aten.mm %11315, %11274 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11316, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9016 = torch.constant.int 4
    %int1792_9017 = torch.constant.int 1792
    %11317 = torch.prim.ListConstruct %int4_9016, %2482, %int1792_9017 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11318 = torch.aten.view %11316, %11317 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9018 = torch.constant.int 4
    %11319 = torch.aten.mul.int %int4_9018, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9019 = torch.constant.int 4096
    %11320 = torch.prim.ListConstruct %11319, %int4096_9019 : (!torch.int, !torch.int) -> !torch.list<int>
    %11321 = torch.aten.view %11260, %11320 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11321, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11322 = torch.aten.mm %11321, %11276 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11322, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9020 = torch.constant.int 4
    %int1792_9021 = torch.constant.int 1792
    %11323 = torch.prim.ListConstruct %int4_9020, %2482, %int1792_9021 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11324 = torch.aten.view %11322, %11323 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11325 = torch.aten.silu %11282 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11326 = torch.aten.silu %11288 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11327 = torch.aten.silu %11294 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11328 = torch.aten.silu %11300 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11329 = torch.aten.silu %11306 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11330 = torch.aten.silu %11312 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11331 = torch.aten.silu %11318 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11332 = torch.aten.silu %11324 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_9022 = torch.constant.int 1
    %int0_9023 = torch.constant.int 0
    %11333 = torch.prim.ListConstruct %int1_9022, %int0_9023 : (!torch.int, !torch.int) -> !torch.list<int>
    %11334 = torch.aten.permute %352, %11333 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9024 = torch.constant.int 1
    %int0_9025 = torch.constant.int 0
    %11335 = torch.prim.ListConstruct %int1_9024, %int0_9025 : (!torch.int, !torch.int) -> !torch.list<int>
    %11336 = torch.aten.permute %353, %11335 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9026 = torch.constant.int 1
    %int0_9027 = torch.constant.int 0
    %11337 = torch.prim.ListConstruct %int1_9026, %int0_9027 : (!torch.int, !torch.int) -> !torch.list<int>
    %11338 = torch.aten.permute %354, %11337 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9028 = torch.constant.int 1
    %int0_9029 = torch.constant.int 0
    %11339 = torch.prim.ListConstruct %int1_9028, %int0_9029 : (!torch.int, !torch.int) -> !torch.list<int>
    %11340 = torch.aten.permute %355, %11339 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9030 = torch.constant.int 1
    %int0_9031 = torch.constant.int 0
    %11341 = torch.prim.ListConstruct %int1_9030, %int0_9031 : (!torch.int, !torch.int) -> !torch.list<int>
    %11342 = torch.aten.permute %356, %11341 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9032 = torch.constant.int 1
    %int0_9033 = torch.constant.int 0
    %11343 = torch.prim.ListConstruct %int1_9032, %int0_9033 : (!torch.int, !torch.int) -> !torch.list<int>
    %11344 = torch.aten.permute %357, %11343 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9034 = torch.constant.int 1
    %int0_9035 = torch.constant.int 0
    %11345 = torch.prim.ListConstruct %int1_9034, %int0_9035 : (!torch.int, !torch.int) -> !torch.list<int>
    %11346 = torch.aten.permute %358, %11345 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_9036 = torch.constant.int 1
    %int0_9037 = torch.constant.int 0
    %11347 = torch.prim.ListConstruct %int1_9036, %int0_9037 : (!torch.int, !torch.int) -> !torch.list<int>
    %11348 = torch.aten.permute %359, %11347 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_9038 = torch.constant.int 4
    %11349 = torch.aten.mul.int %int4_9038, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9039 = torch.constant.int 4096
    %11350 = torch.prim.ListConstruct %11349, %int4096_9039 : (!torch.int, !torch.int) -> !torch.list<int>
    %11351 = torch.aten.view %11253, %11350 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11351, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11352 = torch.aten.mm %11351, %11334 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11352, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9040 = torch.constant.int 4
    %int1792_9041 = torch.constant.int 1792
    %11353 = torch.prim.ListConstruct %int4_9040, %2482, %int1792_9041 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11354 = torch.aten.view %11352, %11353 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9042 = torch.constant.int 4
    %11355 = torch.aten.mul.int %int4_9042, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9043 = torch.constant.int 4096
    %11356 = torch.prim.ListConstruct %11355, %int4096_9043 : (!torch.int, !torch.int) -> !torch.list<int>
    %11357 = torch.aten.view %11254, %11356 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11357, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11358 = torch.aten.mm %11357, %11336 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11358, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9044 = torch.constant.int 4
    %int1792_9045 = torch.constant.int 1792
    %11359 = torch.prim.ListConstruct %int4_9044, %2482, %int1792_9045 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11360 = torch.aten.view %11358, %11359 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9046 = torch.constant.int 4
    %11361 = torch.aten.mul.int %int4_9046, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9047 = torch.constant.int 4096
    %11362 = torch.prim.ListConstruct %11361, %int4096_9047 : (!torch.int, !torch.int) -> !torch.list<int>
    %11363 = torch.aten.view %11255, %11362 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11363, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11364 = torch.aten.mm %11363, %11338 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11364, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9048 = torch.constant.int 4
    %int1792_9049 = torch.constant.int 1792
    %11365 = torch.prim.ListConstruct %int4_9048, %2482, %int1792_9049 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11366 = torch.aten.view %11364, %11365 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9050 = torch.constant.int 4
    %11367 = torch.aten.mul.int %int4_9050, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9051 = torch.constant.int 4096
    %11368 = torch.prim.ListConstruct %11367, %int4096_9051 : (!torch.int, !torch.int) -> !torch.list<int>
    %11369 = torch.aten.view %11256, %11368 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11369, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11370 = torch.aten.mm %11369, %11340 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11370, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9052 = torch.constant.int 4
    %int1792_9053 = torch.constant.int 1792
    %11371 = torch.prim.ListConstruct %int4_9052, %2482, %int1792_9053 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11372 = torch.aten.view %11370, %11371 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9054 = torch.constant.int 4
    %11373 = torch.aten.mul.int %int4_9054, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9055 = torch.constant.int 4096
    %11374 = torch.prim.ListConstruct %11373, %int4096_9055 : (!torch.int, !torch.int) -> !torch.list<int>
    %11375 = torch.aten.view %11257, %11374 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11375, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11376 = torch.aten.mm %11375, %11342 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11376, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9056 = torch.constant.int 4
    %int1792_9057 = torch.constant.int 1792
    %11377 = torch.prim.ListConstruct %int4_9056, %2482, %int1792_9057 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11378 = torch.aten.view %11376, %11377 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9058 = torch.constant.int 4
    %11379 = torch.aten.mul.int %int4_9058, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9059 = torch.constant.int 4096
    %11380 = torch.prim.ListConstruct %11379, %int4096_9059 : (!torch.int, !torch.int) -> !torch.list<int>
    %11381 = torch.aten.view %11258, %11380 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11381, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11382 = torch.aten.mm %11381, %11344 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11382, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9060 = torch.constant.int 4
    %int1792_9061 = torch.constant.int 1792
    %11383 = torch.prim.ListConstruct %int4_9060, %2482, %int1792_9061 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11384 = torch.aten.view %11382, %11383 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9062 = torch.constant.int 4
    %11385 = torch.aten.mul.int %int4_9062, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9063 = torch.constant.int 4096
    %11386 = torch.prim.ListConstruct %11385, %int4096_9063 : (!torch.int, !torch.int) -> !torch.list<int>
    %11387 = torch.aten.view %11259, %11386 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11387, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11388 = torch.aten.mm %11387, %11346 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11388, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9064 = torch.constant.int 4
    %int1792_9065 = torch.constant.int 1792
    %11389 = torch.prim.ListConstruct %int4_9064, %2482, %int1792_9065 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11390 = torch.aten.view %11388, %11389 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_9066 = torch.constant.int 4
    %11391 = torch.aten.mul.int %int4_9066, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9067 = torch.constant.int 4096
    %11392 = torch.prim.ListConstruct %11391, %int4096_9067 : (!torch.int, !torch.int) -> !torch.list<int>
    %11393 = torch.aten.view %11260, %11392 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11393, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11394 = torch.aten.mm %11393, %11348 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11394, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_9068 = torch.constant.int 4
    %int1792_9069 = torch.constant.int 1792
    %11395 = torch.prim.ListConstruct %int4_9068, %2482, %int1792_9069 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11396 = torch.aten.view %11394, %11395 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11397 = torch.aten.mul.Tensor %11325, %11354 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11398 = torch.aten.mul.Tensor %11326, %11360 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11399 = torch.aten.mul.Tensor %11327, %11366 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11400 = torch.aten.mul.Tensor %11328, %11372 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11401 = torch.aten.mul.Tensor %11329, %11378 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11402 = torch.aten.mul.Tensor %11330, %11384 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11403 = torch.aten.mul.Tensor %11331, %11390 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %11404 = torch.aten.mul.Tensor %11332, %11396 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %11404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_9070 = torch.constant.int 1
    %int0_9071 = torch.constant.int 0
    %11405 = torch.prim.ListConstruct %int1_9070, %int0_9071 : (!torch.int, !torch.int) -> !torch.list<int>
    %11406 = torch.aten.permute %360, %11405 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9072 = torch.constant.int 1
    %int0_9073 = torch.constant.int 0
    %11407 = torch.prim.ListConstruct %int1_9072, %int0_9073 : (!torch.int, !torch.int) -> !torch.list<int>
    %11408 = torch.aten.permute %361, %11407 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9074 = torch.constant.int 1
    %int0_9075 = torch.constant.int 0
    %11409 = torch.prim.ListConstruct %int1_9074, %int0_9075 : (!torch.int, !torch.int) -> !torch.list<int>
    %11410 = torch.aten.permute %362, %11409 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9076 = torch.constant.int 1
    %int0_9077 = torch.constant.int 0
    %11411 = torch.prim.ListConstruct %int1_9076, %int0_9077 : (!torch.int, !torch.int) -> !torch.list<int>
    %11412 = torch.aten.permute %363, %11411 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9078 = torch.constant.int 1
    %int0_9079 = torch.constant.int 0
    %11413 = torch.prim.ListConstruct %int1_9078, %int0_9079 : (!torch.int, !torch.int) -> !torch.list<int>
    %11414 = torch.aten.permute %364, %11413 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9080 = torch.constant.int 1
    %int0_9081 = torch.constant.int 0
    %11415 = torch.prim.ListConstruct %int1_9080, %int0_9081 : (!torch.int, !torch.int) -> !torch.list<int>
    %11416 = torch.aten.permute %365, %11415 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9082 = torch.constant.int 1
    %int0_9083 = torch.constant.int 0
    %11417 = torch.prim.ListConstruct %int1_9082, %int0_9083 : (!torch.int, !torch.int) -> !torch.list<int>
    %11418 = torch.aten.permute %366, %11417 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9084 = torch.constant.int 1
    %int0_9085 = torch.constant.int 0
    %11419 = torch.prim.ListConstruct %int1_9084, %int0_9085 : (!torch.int, !torch.int) -> !torch.list<int>
    %11420 = torch.aten.permute %367, %11419 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_9086 = torch.constant.int 1
    %11421 = torch.aten.size.int %11282, %int1_9086 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9087 = torch.constant.int 4
    %11422 = torch.aten.mul.int %int4_9087, %11421 : !torch.int, !torch.int -> !torch.int
    %int1792_9088 = torch.constant.int 1792
    %11423 = torch.prim.ListConstruct %11422, %int1792_9088 : (!torch.int, !torch.int) -> !torch.list<int>
    %11424 = torch.aten.view %11397, %11423 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11424, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11425 = torch.aten.mm %11424, %11406 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11425, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9089 = torch.constant.int 4
    %int4096_9090 = torch.constant.int 4096
    %11426 = torch.prim.ListConstruct %int4_9089, %11421, %int4096_9090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11427 = torch.aten.view %11425, %11426 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9091 = torch.constant.int 1
    %11428 = torch.aten.size.int %11288, %int1_9091 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9092 = torch.constant.int 4
    %11429 = torch.aten.mul.int %int4_9092, %11428 : !torch.int, !torch.int -> !torch.int
    %int1792_9093 = torch.constant.int 1792
    %11430 = torch.prim.ListConstruct %11429, %int1792_9093 : (!torch.int, !torch.int) -> !torch.list<int>
    %11431 = torch.aten.view %11398, %11430 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11431, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11432 = torch.aten.mm %11431, %11408 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11432, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9094 = torch.constant.int 4
    %int4096_9095 = torch.constant.int 4096
    %11433 = torch.prim.ListConstruct %int4_9094, %11428, %int4096_9095 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11434 = torch.aten.view %11432, %11433 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9096 = torch.constant.int 1
    %11435 = torch.aten.size.int %11294, %int1_9096 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9097 = torch.constant.int 4
    %11436 = torch.aten.mul.int %int4_9097, %11435 : !torch.int, !torch.int -> !torch.int
    %int1792_9098 = torch.constant.int 1792
    %11437 = torch.prim.ListConstruct %11436, %int1792_9098 : (!torch.int, !torch.int) -> !torch.list<int>
    %11438 = torch.aten.view %11399, %11437 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11438, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11439 = torch.aten.mm %11438, %11410 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11439, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9099 = torch.constant.int 4
    %int4096_9100 = torch.constant.int 4096
    %11440 = torch.prim.ListConstruct %int4_9099, %11435, %int4096_9100 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11441 = torch.aten.view %11439, %11440 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9101 = torch.constant.int 1
    %11442 = torch.aten.size.int %11300, %int1_9101 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9102 = torch.constant.int 4
    %11443 = torch.aten.mul.int %int4_9102, %11442 : !torch.int, !torch.int -> !torch.int
    %int1792_9103 = torch.constant.int 1792
    %11444 = torch.prim.ListConstruct %11443, %int1792_9103 : (!torch.int, !torch.int) -> !torch.list<int>
    %11445 = torch.aten.view %11400, %11444 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11445, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11446 = torch.aten.mm %11445, %11412 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11446, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9104 = torch.constant.int 4
    %int4096_9105 = torch.constant.int 4096
    %11447 = torch.prim.ListConstruct %int4_9104, %11442, %int4096_9105 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11448 = torch.aten.view %11446, %11447 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9106 = torch.constant.int 1
    %11449 = torch.aten.size.int %11306, %int1_9106 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9107 = torch.constant.int 4
    %11450 = torch.aten.mul.int %int4_9107, %11449 : !torch.int, !torch.int -> !torch.int
    %int1792_9108 = torch.constant.int 1792
    %11451 = torch.prim.ListConstruct %11450, %int1792_9108 : (!torch.int, !torch.int) -> !torch.list<int>
    %11452 = torch.aten.view %11401, %11451 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11452, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11453 = torch.aten.mm %11452, %11414 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11453, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9109 = torch.constant.int 4
    %int4096_9110 = torch.constant.int 4096
    %11454 = torch.prim.ListConstruct %int4_9109, %11449, %int4096_9110 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11455 = torch.aten.view %11453, %11454 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9111 = torch.constant.int 1
    %11456 = torch.aten.size.int %11312, %int1_9111 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9112 = torch.constant.int 4
    %11457 = torch.aten.mul.int %int4_9112, %11456 : !torch.int, !torch.int -> !torch.int
    %int1792_9113 = torch.constant.int 1792
    %11458 = torch.prim.ListConstruct %11457, %int1792_9113 : (!torch.int, !torch.int) -> !torch.list<int>
    %11459 = torch.aten.view %11402, %11458 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11459, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11460 = torch.aten.mm %11459, %11416 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11460, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9114 = torch.constant.int 4
    %int4096_9115 = torch.constant.int 4096
    %11461 = torch.prim.ListConstruct %int4_9114, %11456, %int4096_9115 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11462 = torch.aten.view %11460, %11461 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9116 = torch.constant.int 1
    %11463 = torch.aten.size.int %11318, %int1_9116 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9117 = torch.constant.int 4
    %11464 = torch.aten.mul.int %int4_9117, %11463 : !torch.int, !torch.int -> !torch.int
    %int1792_9118 = torch.constant.int 1792
    %11465 = torch.prim.ListConstruct %11464, %int1792_9118 : (!torch.int, !torch.int) -> !torch.list<int>
    %11466 = torch.aten.view %11403, %11465 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11466, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11467 = torch.aten.mm %11466, %11418 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11467, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9119 = torch.constant.int 4
    %int4096_9120 = torch.constant.int 4096
    %11468 = torch.prim.ListConstruct %int4_9119, %11463, %int4096_9120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11469 = torch.aten.view %11467, %11468 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9121 = torch.constant.int 1
    %11470 = torch.aten.size.int %11324, %int1_9121 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_9122 = torch.constant.int 4
    %11471 = torch.aten.mul.int %int4_9122, %11470 : !torch.int, !torch.int -> !torch.int
    %int1792_9123 = torch.constant.int 1792
    %11472 = torch.prim.ListConstruct %11471, %int1792_9123 : (!torch.int, !torch.int) -> !torch.list<int>
    %11473 = torch.aten.view %11404, %11472 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %11473, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %11474 = torch.aten.mm %11473, %11420 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11474, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_9124 = torch.constant.int 4
    %int4096_9125 = torch.constant.int 4096
    %11475 = torch.prim.ListConstruct %int4_9124, %11470, %int4096_9125 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11476 = torch.aten.view %11474, %11475 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11477 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9126 = arith.constant 1 : index
    %dim_9127 = tensor.dim %11477, %c1_9126 : tensor<4x?x4096xf16>
    %11478 = flow.tensor.transfer %11477 : tensor<4x?x4096xf16>{%dim_9127} to #hal.device.promise<@__device_0>
    %11479 = torch_c.from_builtin_tensor %11478 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11480 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9128 = arith.constant 1 : index
    %dim_9129 = tensor.dim %11480, %c1_9128 : tensor<4x?x4096xf16>
    %11481 = flow.tensor.transfer %11480 : tensor<4x?x4096xf16>{%dim_9129} to #hal.device.promise<@__device_0>
    %11482 = torch_c.from_builtin_tensor %11481 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11483 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9130 = arith.constant 1 : index
    %dim_9131 = tensor.dim %11483, %c1_9130 : tensor<4x?x4096xf16>
    %11484 = flow.tensor.transfer %11483 : tensor<4x?x4096xf16>{%dim_9131} to #hal.device.promise<@__device_0>
    %11485 = torch_c.from_builtin_tensor %11484 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11486 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9132 = arith.constant 1 : index
    %dim_9133 = tensor.dim %11486, %c1_9132 : tensor<4x?x4096xf16>
    %11487 = flow.tensor.transfer %11486 : tensor<4x?x4096xf16>{%dim_9133} to #hal.device.promise<@__device_0>
    %11488 = torch_c.from_builtin_tensor %11487 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11489 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9134 = arith.constant 1 : index
    %dim_9135 = tensor.dim %11489, %c1_9134 : tensor<4x?x4096xf16>
    %11490 = flow.tensor.transfer %11489 : tensor<4x?x4096xf16>{%dim_9135} to #hal.device.promise<@__device_0>
    %11491 = torch_c.from_builtin_tensor %11490 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11492 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9136 = arith.constant 1 : index
    %dim_9137 = tensor.dim %11492, %c1_9136 : tensor<4x?x4096xf16>
    %11493 = flow.tensor.transfer %11492 : tensor<4x?x4096xf16>{%dim_9137} to #hal.device.promise<@__device_0>
    %11494 = torch_c.from_builtin_tensor %11493 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11495 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9138 = arith.constant 1 : index
    %dim_9139 = tensor.dim %11495, %c1_9138 : tensor<4x?x4096xf16>
    %11496 = flow.tensor.transfer %11495 : tensor<4x?x4096xf16>{%dim_9139} to #hal.device.promise<@__device_0>
    %11497 = torch_c.from_builtin_tensor %11496 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9140 = torch.constant.int 1
    %11498 = torch.aten.add.Tensor %11427, %11479, %int1_9140 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9141 = torch.constant.int 1
    %11499 = torch.aten.add.Tensor %11498, %11482, %int1_9141 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9142 = torch.constant.int 1
    %11500 = torch.aten.add.Tensor %11499, %11485, %int1_9142 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9143 = torch.constant.int 1
    %11501 = torch.aten.add.Tensor %11500, %11488, %int1_9143 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9144 = torch.constant.int 1
    %11502 = torch.aten.add.Tensor %11501, %11491, %int1_9144 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9145 = torch.constant.int 1
    %11503 = torch.aten.add.Tensor %11502, %11494, %int1_9145 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9146 = torch.constant.int 1
    %11504 = torch.aten.add.Tensor %11503, %11497, %int1_9146 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11505 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9147 = arith.constant 1 : index
    %dim_9148 = tensor.dim %11505, %c1_9147 : tensor<4x?x4096xf16>
    %11506 = flow.tensor.transfer %11505 : tensor<4x?x4096xf16>{%dim_9148} to #hal.device.promise<@__device_1>
    %11507 = torch_c.from_builtin_tensor %11506 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11508 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9149 = arith.constant 1 : index
    %dim_9150 = tensor.dim %11508, %c1_9149 : tensor<4x?x4096xf16>
    %11509 = flow.tensor.transfer %11508 : tensor<4x?x4096xf16>{%dim_9150} to #hal.device.promise<@__device_1>
    %11510 = torch_c.from_builtin_tensor %11509 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11511 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9151 = arith.constant 1 : index
    %dim_9152 = tensor.dim %11511, %c1_9151 : tensor<4x?x4096xf16>
    %11512 = flow.tensor.transfer %11511 : tensor<4x?x4096xf16>{%dim_9152} to #hal.device.promise<@__device_1>
    %11513 = torch_c.from_builtin_tensor %11512 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11514 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9153 = arith.constant 1 : index
    %dim_9154 = tensor.dim %11514, %c1_9153 : tensor<4x?x4096xf16>
    %11515 = flow.tensor.transfer %11514 : tensor<4x?x4096xf16>{%dim_9154} to #hal.device.promise<@__device_1>
    %11516 = torch_c.from_builtin_tensor %11515 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11517 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9155 = arith.constant 1 : index
    %dim_9156 = tensor.dim %11517, %c1_9155 : tensor<4x?x4096xf16>
    %11518 = flow.tensor.transfer %11517 : tensor<4x?x4096xf16>{%dim_9156} to #hal.device.promise<@__device_1>
    %11519 = torch_c.from_builtin_tensor %11518 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11520 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9157 = arith.constant 1 : index
    %dim_9158 = tensor.dim %11520, %c1_9157 : tensor<4x?x4096xf16>
    %11521 = flow.tensor.transfer %11520 : tensor<4x?x4096xf16>{%dim_9158} to #hal.device.promise<@__device_1>
    %11522 = torch_c.from_builtin_tensor %11521 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11523 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9159 = arith.constant 1 : index
    %dim_9160 = tensor.dim %11523, %c1_9159 : tensor<4x?x4096xf16>
    %11524 = flow.tensor.transfer %11523 : tensor<4x?x4096xf16>{%dim_9160} to #hal.device.promise<@__device_1>
    %11525 = torch_c.from_builtin_tensor %11524 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9161 = torch.constant.int 1
    %11526 = torch.aten.add.Tensor %11507, %11434, %int1_9161 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9162 = torch.constant.int 1
    %11527 = torch.aten.add.Tensor %11526, %11510, %int1_9162 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9163 = torch.constant.int 1
    %11528 = torch.aten.add.Tensor %11527, %11513, %int1_9163 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9164 = torch.constant.int 1
    %11529 = torch.aten.add.Tensor %11528, %11516, %int1_9164 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9165 = torch.constant.int 1
    %11530 = torch.aten.add.Tensor %11529, %11519, %int1_9165 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9166 = torch.constant.int 1
    %11531 = torch.aten.add.Tensor %11530, %11522, %int1_9166 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9167 = torch.constant.int 1
    %11532 = torch.aten.add.Tensor %11531, %11525, %int1_9167 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11533 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9168 = arith.constant 1 : index
    %dim_9169 = tensor.dim %11533, %c1_9168 : tensor<4x?x4096xf16>
    %11534 = flow.tensor.transfer %11533 : tensor<4x?x4096xf16>{%dim_9169} to #hal.device.promise<@__device_2>
    %11535 = torch_c.from_builtin_tensor %11534 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11536 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9170 = arith.constant 1 : index
    %dim_9171 = tensor.dim %11536, %c1_9170 : tensor<4x?x4096xf16>
    %11537 = flow.tensor.transfer %11536 : tensor<4x?x4096xf16>{%dim_9171} to #hal.device.promise<@__device_2>
    %11538 = torch_c.from_builtin_tensor %11537 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11539 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9172 = arith.constant 1 : index
    %dim_9173 = tensor.dim %11539, %c1_9172 : tensor<4x?x4096xf16>
    %11540 = flow.tensor.transfer %11539 : tensor<4x?x4096xf16>{%dim_9173} to #hal.device.promise<@__device_2>
    %11541 = torch_c.from_builtin_tensor %11540 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11542 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9174 = arith.constant 1 : index
    %dim_9175 = tensor.dim %11542, %c1_9174 : tensor<4x?x4096xf16>
    %11543 = flow.tensor.transfer %11542 : tensor<4x?x4096xf16>{%dim_9175} to #hal.device.promise<@__device_2>
    %11544 = torch_c.from_builtin_tensor %11543 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11545 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9176 = arith.constant 1 : index
    %dim_9177 = tensor.dim %11545, %c1_9176 : tensor<4x?x4096xf16>
    %11546 = flow.tensor.transfer %11545 : tensor<4x?x4096xf16>{%dim_9177} to #hal.device.promise<@__device_2>
    %11547 = torch_c.from_builtin_tensor %11546 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11548 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9178 = arith.constant 1 : index
    %dim_9179 = tensor.dim %11548, %c1_9178 : tensor<4x?x4096xf16>
    %11549 = flow.tensor.transfer %11548 : tensor<4x?x4096xf16>{%dim_9179} to #hal.device.promise<@__device_2>
    %11550 = torch_c.from_builtin_tensor %11549 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11551 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9180 = arith.constant 1 : index
    %dim_9181 = tensor.dim %11551, %c1_9180 : tensor<4x?x4096xf16>
    %11552 = flow.tensor.transfer %11551 : tensor<4x?x4096xf16>{%dim_9181} to #hal.device.promise<@__device_2>
    %11553 = torch_c.from_builtin_tensor %11552 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9182 = torch.constant.int 1
    %11554 = torch.aten.add.Tensor %11535, %11538, %int1_9182 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9183 = torch.constant.int 1
    %11555 = torch.aten.add.Tensor %11554, %11441, %int1_9183 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9184 = torch.constant.int 1
    %11556 = torch.aten.add.Tensor %11555, %11541, %int1_9184 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9185 = torch.constant.int 1
    %11557 = torch.aten.add.Tensor %11556, %11544, %int1_9185 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9186 = torch.constant.int 1
    %11558 = torch.aten.add.Tensor %11557, %11547, %int1_9186 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9187 = torch.constant.int 1
    %11559 = torch.aten.add.Tensor %11558, %11550, %int1_9187 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9188 = torch.constant.int 1
    %11560 = torch.aten.add.Tensor %11559, %11553, %int1_9188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11561 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9189 = arith.constant 1 : index
    %dim_9190 = tensor.dim %11561, %c1_9189 : tensor<4x?x4096xf16>
    %11562 = flow.tensor.transfer %11561 : tensor<4x?x4096xf16>{%dim_9190} to #hal.device.promise<@__device_3>
    %11563 = torch_c.from_builtin_tensor %11562 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11564 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9191 = arith.constant 1 : index
    %dim_9192 = tensor.dim %11564, %c1_9191 : tensor<4x?x4096xf16>
    %11565 = flow.tensor.transfer %11564 : tensor<4x?x4096xf16>{%dim_9192} to #hal.device.promise<@__device_3>
    %11566 = torch_c.from_builtin_tensor %11565 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11567 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9193 = arith.constant 1 : index
    %dim_9194 = tensor.dim %11567, %c1_9193 : tensor<4x?x4096xf16>
    %11568 = flow.tensor.transfer %11567 : tensor<4x?x4096xf16>{%dim_9194} to #hal.device.promise<@__device_3>
    %11569 = torch_c.from_builtin_tensor %11568 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11570 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9195 = arith.constant 1 : index
    %dim_9196 = tensor.dim %11570, %c1_9195 : tensor<4x?x4096xf16>
    %11571 = flow.tensor.transfer %11570 : tensor<4x?x4096xf16>{%dim_9196} to #hal.device.promise<@__device_3>
    %11572 = torch_c.from_builtin_tensor %11571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11573 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9197 = arith.constant 1 : index
    %dim_9198 = tensor.dim %11573, %c1_9197 : tensor<4x?x4096xf16>
    %11574 = flow.tensor.transfer %11573 : tensor<4x?x4096xf16>{%dim_9198} to #hal.device.promise<@__device_3>
    %11575 = torch_c.from_builtin_tensor %11574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11576 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9199 = arith.constant 1 : index
    %dim_9200 = tensor.dim %11576, %c1_9199 : tensor<4x?x4096xf16>
    %11577 = flow.tensor.transfer %11576 : tensor<4x?x4096xf16>{%dim_9200} to #hal.device.promise<@__device_3>
    %11578 = torch_c.from_builtin_tensor %11577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11579 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9201 = arith.constant 1 : index
    %dim_9202 = tensor.dim %11579, %c1_9201 : tensor<4x?x4096xf16>
    %11580 = flow.tensor.transfer %11579 : tensor<4x?x4096xf16>{%dim_9202} to #hal.device.promise<@__device_3>
    %11581 = torch_c.from_builtin_tensor %11580 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9203 = torch.constant.int 1
    %11582 = torch.aten.add.Tensor %11563, %11566, %int1_9203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9204 = torch.constant.int 1
    %11583 = torch.aten.add.Tensor %11582, %11569, %int1_9204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9205 = torch.constant.int 1
    %11584 = torch.aten.add.Tensor %11583, %11448, %int1_9205 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9206 = torch.constant.int 1
    %11585 = torch.aten.add.Tensor %11584, %11572, %int1_9206 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9207 = torch.constant.int 1
    %11586 = torch.aten.add.Tensor %11585, %11575, %int1_9207 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9208 = torch.constant.int 1
    %11587 = torch.aten.add.Tensor %11586, %11578, %int1_9208 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9209 = torch.constant.int 1
    %11588 = torch.aten.add.Tensor %11587, %11581, %int1_9209 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11589 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9210 = arith.constant 1 : index
    %dim_9211 = tensor.dim %11589, %c1_9210 : tensor<4x?x4096xf16>
    %11590 = flow.tensor.transfer %11589 : tensor<4x?x4096xf16>{%dim_9211} to #hal.device.promise<@__device_4>
    %11591 = torch_c.from_builtin_tensor %11590 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11592 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9212 = arith.constant 1 : index
    %dim_9213 = tensor.dim %11592, %c1_9212 : tensor<4x?x4096xf16>
    %11593 = flow.tensor.transfer %11592 : tensor<4x?x4096xf16>{%dim_9213} to #hal.device.promise<@__device_4>
    %11594 = torch_c.from_builtin_tensor %11593 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11595 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9214 = arith.constant 1 : index
    %dim_9215 = tensor.dim %11595, %c1_9214 : tensor<4x?x4096xf16>
    %11596 = flow.tensor.transfer %11595 : tensor<4x?x4096xf16>{%dim_9215} to #hal.device.promise<@__device_4>
    %11597 = torch_c.from_builtin_tensor %11596 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11598 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9216 = arith.constant 1 : index
    %dim_9217 = tensor.dim %11598, %c1_9216 : tensor<4x?x4096xf16>
    %11599 = flow.tensor.transfer %11598 : tensor<4x?x4096xf16>{%dim_9217} to #hal.device.promise<@__device_4>
    %11600 = torch_c.from_builtin_tensor %11599 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11601 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9218 = arith.constant 1 : index
    %dim_9219 = tensor.dim %11601, %c1_9218 : tensor<4x?x4096xf16>
    %11602 = flow.tensor.transfer %11601 : tensor<4x?x4096xf16>{%dim_9219} to #hal.device.promise<@__device_4>
    %11603 = torch_c.from_builtin_tensor %11602 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11604 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9220 = arith.constant 1 : index
    %dim_9221 = tensor.dim %11604, %c1_9220 : tensor<4x?x4096xf16>
    %11605 = flow.tensor.transfer %11604 : tensor<4x?x4096xf16>{%dim_9221} to #hal.device.promise<@__device_4>
    %11606 = torch_c.from_builtin_tensor %11605 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11607 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9222 = arith.constant 1 : index
    %dim_9223 = tensor.dim %11607, %c1_9222 : tensor<4x?x4096xf16>
    %11608 = flow.tensor.transfer %11607 : tensor<4x?x4096xf16>{%dim_9223} to #hal.device.promise<@__device_4>
    %11609 = torch_c.from_builtin_tensor %11608 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9224 = torch.constant.int 1
    %11610 = torch.aten.add.Tensor %11591, %11594, %int1_9224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9225 = torch.constant.int 1
    %11611 = torch.aten.add.Tensor %11610, %11597, %int1_9225 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9226 = torch.constant.int 1
    %11612 = torch.aten.add.Tensor %11611, %11600, %int1_9226 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9227 = torch.constant.int 1
    %11613 = torch.aten.add.Tensor %11612, %11455, %int1_9227 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9228 = torch.constant.int 1
    %11614 = torch.aten.add.Tensor %11613, %11603, %int1_9228 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9229 = torch.constant.int 1
    %11615 = torch.aten.add.Tensor %11614, %11606, %int1_9229 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9230 = torch.constant.int 1
    %11616 = torch.aten.add.Tensor %11615, %11609, %int1_9230 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11617 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9231 = arith.constant 1 : index
    %dim_9232 = tensor.dim %11617, %c1_9231 : tensor<4x?x4096xf16>
    %11618 = flow.tensor.transfer %11617 : tensor<4x?x4096xf16>{%dim_9232} to #hal.device.promise<@__device_5>
    %11619 = torch_c.from_builtin_tensor %11618 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11620 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9233 = arith.constant 1 : index
    %dim_9234 = tensor.dim %11620, %c1_9233 : tensor<4x?x4096xf16>
    %11621 = flow.tensor.transfer %11620 : tensor<4x?x4096xf16>{%dim_9234} to #hal.device.promise<@__device_5>
    %11622 = torch_c.from_builtin_tensor %11621 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11623 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9235 = arith.constant 1 : index
    %dim_9236 = tensor.dim %11623, %c1_9235 : tensor<4x?x4096xf16>
    %11624 = flow.tensor.transfer %11623 : tensor<4x?x4096xf16>{%dim_9236} to #hal.device.promise<@__device_5>
    %11625 = torch_c.from_builtin_tensor %11624 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11626 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9237 = arith.constant 1 : index
    %dim_9238 = tensor.dim %11626, %c1_9237 : tensor<4x?x4096xf16>
    %11627 = flow.tensor.transfer %11626 : tensor<4x?x4096xf16>{%dim_9238} to #hal.device.promise<@__device_5>
    %11628 = torch_c.from_builtin_tensor %11627 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11629 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9239 = arith.constant 1 : index
    %dim_9240 = tensor.dim %11629, %c1_9239 : tensor<4x?x4096xf16>
    %11630 = flow.tensor.transfer %11629 : tensor<4x?x4096xf16>{%dim_9240} to #hal.device.promise<@__device_5>
    %11631 = torch_c.from_builtin_tensor %11630 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11632 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9241 = arith.constant 1 : index
    %dim_9242 = tensor.dim %11632, %c1_9241 : tensor<4x?x4096xf16>
    %11633 = flow.tensor.transfer %11632 : tensor<4x?x4096xf16>{%dim_9242} to #hal.device.promise<@__device_5>
    %11634 = torch_c.from_builtin_tensor %11633 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11635 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9243 = arith.constant 1 : index
    %dim_9244 = tensor.dim %11635, %c1_9243 : tensor<4x?x4096xf16>
    %11636 = flow.tensor.transfer %11635 : tensor<4x?x4096xf16>{%dim_9244} to #hal.device.promise<@__device_5>
    %11637 = torch_c.from_builtin_tensor %11636 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9245 = torch.constant.int 1
    %11638 = torch.aten.add.Tensor %11619, %11622, %int1_9245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9246 = torch.constant.int 1
    %11639 = torch.aten.add.Tensor %11638, %11625, %int1_9246 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9247 = torch.constant.int 1
    %11640 = torch.aten.add.Tensor %11639, %11628, %int1_9247 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9248 = torch.constant.int 1
    %11641 = torch.aten.add.Tensor %11640, %11631, %int1_9248 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9249 = torch.constant.int 1
    %11642 = torch.aten.add.Tensor %11641, %11462, %int1_9249 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9250 = torch.constant.int 1
    %11643 = torch.aten.add.Tensor %11642, %11634, %int1_9250 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9251 = torch.constant.int 1
    %11644 = torch.aten.add.Tensor %11643, %11637, %int1_9251 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11645 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9252 = arith.constant 1 : index
    %dim_9253 = tensor.dim %11645, %c1_9252 : tensor<4x?x4096xf16>
    %11646 = flow.tensor.transfer %11645 : tensor<4x?x4096xf16>{%dim_9253} to #hal.device.promise<@__device_6>
    %11647 = torch_c.from_builtin_tensor %11646 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11648 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9254 = arith.constant 1 : index
    %dim_9255 = tensor.dim %11648, %c1_9254 : tensor<4x?x4096xf16>
    %11649 = flow.tensor.transfer %11648 : tensor<4x?x4096xf16>{%dim_9255} to #hal.device.promise<@__device_6>
    %11650 = torch_c.from_builtin_tensor %11649 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11651 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9256 = arith.constant 1 : index
    %dim_9257 = tensor.dim %11651, %c1_9256 : tensor<4x?x4096xf16>
    %11652 = flow.tensor.transfer %11651 : tensor<4x?x4096xf16>{%dim_9257} to #hal.device.promise<@__device_6>
    %11653 = torch_c.from_builtin_tensor %11652 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11654 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9258 = arith.constant 1 : index
    %dim_9259 = tensor.dim %11654, %c1_9258 : tensor<4x?x4096xf16>
    %11655 = flow.tensor.transfer %11654 : tensor<4x?x4096xf16>{%dim_9259} to #hal.device.promise<@__device_6>
    %11656 = torch_c.from_builtin_tensor %11655 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11657 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9260 = arith.constant 1 : index
    %dim_9261 = tensor.dim %11657, %c1_9260 : tensor<4x?x4096xf16>
    %11658 = flow.tensor.transfer %11657 : tensor<4x?x4096xf16>{%dim_9261} to #hal.device.promise<@__device_6>
    %11659 = torch_c.from_builtin_tensor %11658 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11660 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9262 = arith.constant 1 : index
    %dim_9263 = tensor.dim %11660, %c1_9262 : tensor<4x?x4096xf16>
    %11661 = flow.tensor.transfer %11660 : tensor<4x?x4096xf16>{%dim_9263} to #hal.device.promise<@__device_6>
    %11662 = torch_c.from_builtin_tensor %11661 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11663 = torch_c.to_builtin_tensor %11476 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9264 = arith.constant 1 : index
    %dim_9265 = tensor.dim %11663, %c1_9264 : tensor<4x?x4096xf16>
    %11664 = flow.tensor.transfer %11663 : tensor<4x?x4096xf16>{%dim_9265} to #hal.device.promise<@__device_6>
    %11665 = torch_c.from_builtin_tensor %11664 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9266 = torch.constant.int 1
    %11666 = torch.aten.add.Tensor %11647, %11650, %int1_9266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9267 = torch.constant.int 1
    %11667 = torch.aten.add.Tensor %11666, %11653, %int1_9267 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9268 = torch.constant.int 1
    %11668 = torch.aten.add.Tensor %11667, %11656, %int1_9268 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9269 = torch.constant.int 1
    %11669 = torch.aten.add.Tensor %11668, %11659, %int1_9269 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9270 = torch.constant.int 1
    %11670 = torch.aten.add.Tensor %11669, %11662, %int1_9270 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9271 = torch.constant.int 1
    %11671 = torch.aten.add.Tensor %11670, %11469, %int1_9271 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9272 = torch.constant.int 1
    %11672 = torch.aten.add.Tensor %11671, %11665, %int1_9272 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11673 = torch_c.to_builtin_tensor %11427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9273 = arith.constant 1 : index
    %dim_9274 = tensor.dim %11673, %c1_9273 : tensor<4x?x4096xf16>
    %11674 = flow.tensor.transfer %11673 : tensor<4x?x4096xf16>{%dim_9274} to #hal.device.promise<@__device_7>
    %11675 = torch_c.from_builtin_tensor %11674 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11676 = torch_c.to_builtin_tensor %11434 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9275 = arith.constant 1 : index
    %dim_9276 = tensor.dim %11676, %c1_9275 : tensor<4x?x4096xf16>
    %11677 = flow.tensor.transfer %11676 : tensor<4x?x4096xf16>{%dim_9276} to #hal.device.promise<@__device_7>
    %11678 = torch_c.from_builtin_tensor %11677 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11679 = torch_c.to_builtin_tensor %11441 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9277 = arith.constant 1 : index
    %dim_9278 = tensor.dim %11679, %c1_9277 : tensor<4x?x4096xf16>
    %11680 = flow.tensor.transfer %11679 : tensor<4x?x4096xf16>{%dim_9278} to #hal.device.promise<@__device_7>
    %11681 = torch_c.from_builtin_tensor %11680 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11682 = torch_c.to_builtin_tensor %11448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9279 = arith.constant 1 : index
    %dim_9280 = tensor.dim %11682, %c1_9279 : tensor<4x?x4096xf16>
    %11683 = flow.tensor.transfer %11682 : tensor<4x?x4096xf16>{%dim_9280} to #hal.device.promise<@__device_7>
    %11684 = torch_c.from_builtin_tensor %11683 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11685 = torch_c.to_builtin_tensor %11455 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9281 = arith.constant 1 : index
    %dim_9282 = tensor.dim %11685, %c1_9281 : tensor<4x?x4096xf16>
    %11686 = flow.tensor.transfer %11685 : tensor<4x?x4096xf16>{%dim_9282} to #hal.device.promise<@__device_7>
    %11687 = torch_c.from_builtin_tensor %11686 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11688 = torch_c.to_builtin_tensor %11462 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9283 = arith.constant 1 : index
    %dim_9284 = tensor.dim %11688, %c1_9283 : tensor<4x?x4096xf16>
    %11689 = flow.tensor.transfer %11688 : tensor<4x?x4096xf16>{%dim_9284} to #hal.device.promise<@__device_7>
    %11690 = torch_c.from_builtin_tensor %11689 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %11691 = torch_c.to_builtin_tensor %11469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_9285 = arith.constant 1 : index
    %dim_9286 = tensor.dim %11691, %c1_9285 : tensor<4x?x4096xf16>
    %11692 = flow.tensor.transfer %11691 : tensor<4x?x4096xf16>{%dim_9286} to #hal.device.promise<@__device_7>
    %11693 = torch_c.from_builtin_tensor %11692 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9287 = torch.constant.int 1
    %11694 = torch.aten.add.Tensor %11675, %11678, %int1_9287 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9288 = torch.constant.int 1
    %11695 = torch.aten.add.Tensor %11694, %11681, %int1_9288 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9289 = torch.constant.int 1
    %11696 = torch.aten.add.Tensor %11695, %11684, %int1_9289 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9290 = torch.constant.int 1
    %11697 = torch.aten.add.Tensor %11696, %11687, %int1_9290 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9291 = torch.constant.int 1
    %11698 = torch.aten.add.Tensor %11697, %11690, %int1_9291 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9292 = torch.constant.int 1
    %11699 = torch.aten.add.Tensor %11698, %11693, %int1_9292 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9293 = torch.constant.int 1
    %11700 = torch.aten.add.Tensor %11699, %11476, %int1_9293 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9294 = torch.constant.int 1
    %11701 = torch.aten.add.Tensor %11181, %11504, %int1_9294 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9295 = torch.constant.int 1
    %11702 = torch.aten.add.Tensor %11182, %11532, %int1_9295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9296 = torch.constant.int 1
    %11703 = torch.aten.add.Tensor %11183, %11560, %int1_9296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9297 = torch.constant.int 1
    %11704 = torch.aten.add.Tensor %11184, %11588, %int1_9297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9298 = torch.constant.int 1
    %11705 = torch.aten.add.Tensor %11185, %11616, %int1_9298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9299 = torch.constant.int 1
    %11706 = torch.aten.add.Tensor %11186, %11644, %int1_9299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9300 = torch.constant.int 1
    %11707 = torch.aten.add.Tensor %11187, %11672, %int1_9300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9301 = torch.constant.int 1
    %11708 = torch.aten.add.Tensor %11188, %11700, %int1_9301 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_9302 = torch.constant.int 6
    %11709 = torch.prims.convert_element_type %11701, %int6_9302 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9303 = torch.constant.int 6
    %11710 = torch.prims.convert_element_type %11702, %int6_9303 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9304 = torch.constant.int 6
    %11711 = torch.prims.convert_element_type %11703, %int6_9304 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9305 = torch.constant.int 6
    %11712 = torch.prims.convert_element_type %11704, %int6_9305 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9306 = torch.constant.int 6
    %11713 = torch.prims.convert_element_type %11705, %int6_9306 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9307 = torch.constant.int 6
    %11714 = torch.prims.convert_element_type %11706, %int6_9307 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9308 = torch.constant.int 6
    %11715 = torch.prims.convert_element_type %11707, %int6_9308 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_9309 = torch.constant.int 6
    %11716 = torch.prims.convert_element_type %11708, %int6_9309 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9310 = torch.constant.int 2
    %11717 = torch.aten.pow.Tensor_Scalar %11709, %int2_9310 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9311 = torch.constant.int 2
    %11718 = torch.aten.pow.Tensor_Scalar %11710, %int2_9311 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9312 = torch.constant.int 2
    %11719 = torch.aten.pow.Tensor_Scalar %11711, %int2_9312 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9313 = torch.constant.int 2
    %11720 = torch.aten.pow.Tensor_Scalar %11712, %int2_9313 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9314 = torch.constant.int 2
    %11721 = torch.aten.pow.Tensor_Scalar %11713, %int2_9314 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9315 = torch.constant.int 2
    %11722 = torch.aten.pow.Tensor_Scalar %11714, %int2_9315 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9316 = torch.constant.int 2
    %11723 = torch.aten.pow.Tensor_Scalar %11715, %int2_9316 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_9317 = torch.constant.int 2
    %11724 = torch.aten.pow.Tensor_Scalar %11716, %int2_9317 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_9318 = torch.constant.int -1
    %11725 = torch.prim.ListConstruct %int-1_9318 : (!torch.int) -> !torch.list<int>
    %true_9319 = torch.constant.bool true
    %none_9320 = torch.constant.none
    %11726 = torch.aten.mean.dim %11717, %11725, %true_9319, %none_9320 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9321 = torch.constant.int -1
    %11727 = torch.prim.ListConstruct %int-1_9321 : (!torch.int) -> !torch.list<int>
    %true_9322 = torch.constant.bool true
    %none_9323 = torch.constant.none
    %11728 = torch.aten.mean.dim %11718, %11727, %true_9322, %none_9323 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9324 = torch.constant.int -1
    %11729 = torch.prim.ListConstruct %int-1_9324 : (!torch.int) -> !torch.list<int>
    %true_9325 = torch.constant.bool true
    %none_9326 = torch.constant.none
    %11730 = torch.aten.mean.dim %11719, %11729, %true_9325, %none_9326 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9327 = torch.constant.int -1
    %11731 = torch.prim.ListConstruct %int-1_9327 : (!torch.int) -> !torch.list<int>
    %true_9328 = torch.constant.bool true
    %none_9329 = torch.constant.none
    %11732 = torch.aten.mean.dim %11720, %11731, %true_9328, %none_9329 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9330 = torch.constant.int -1
    %11733 = torch.prim.ListConstruct %int-1_9330 : (!torch.int) -> !torch.list<int>
    %true_9331 = torch.constant.bool true
    %none_9332 = torch.constant.none
    %11734 = torch.aten.mean.dim %11721, %11733, %true_9331, %none_9332 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9333 = torch.constant.int -1
    %11735 = torch.prim.ListConstruct %int-1_9333 : (!torch.int) -> !torch.list<int>
    %true_9334 = torch.constant.bool true
    %none_9335 = torch.constant.none
    %11736 = torch.aten.mean.dim %11722, %11735, %true_9334, %none_9335 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9336 = torch.constant.int -1
    %11737 = torch.prim.ListConstruct %int-1_9336 : (!torch.int) -> !torch.list<int>
    %true_9337 = torch.constant.bool true
    %none_9338 = torch.constant.none
    %11738 = torch.aten.mean.dim %11723, %11737, %true_9337, %none_9338 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_9339 = torch.constant.int -1
    %11739 = torch.prim.ListConstruct %int-1_9339 : (!torch.int) -> !torch.list<int>
    %true_9340 = torch.constant.bool true
    %none_9341 = torch.constant.none
    %11740 = torch.aten.mean.dim %11724, %11739, %true_9340, %none_9341 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9342 = torch.constant.float 9.9999997473787516E-6
    %int1_9343 = torch.constant.int 1
    %11741 = torch.aten.add.Scalar %11726, %float9.999990e-06_9342, %int1_9343 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9344 = torch.constant.float 9.9999997473787516E-6
    %int1_9345 = torch.constant.int 1
    %11742 = torch.aten.add.Scalar %11728, %float9.999990e-06_9344, %int1_9345 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9346 = torch.constant.float 9.9999997473787516E-6
    %int1_9347 = torch.constant.int 1
    %11743 = torch.aten.add.Scalar %11730, %float9.999990e-06_9346, %int1_9347 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9348 = torch.constant.float 9.9999997473787516E-6
    %int1_9349 = torch.constant.int 1
    %11744 = torch.aten.add.Scalar %11732, %float9.999990e-06_9348, %int1_9349 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9350 = torch.constant.float 9.9999997473787516E-6
    %int1_9351 = torch.constant.int 1
    %11745 = torch.aten.add.Scalar %11734, %float9.999990e-06_9350, %int1_9351 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9352 = torch.constant.float 9.9999997473787516E-6
    %int1_9353 = torch.constant.int 1
    %11746 = torch.aten.add.Scalar %11736, %float9.999990e-06_9352, %int1_9353 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9354 = torch.constant.float 9.9999997473787516E-6
    %int1_9355 = torch.constant.int 1
    %11747 = torch.aten.add.Scalar %11738, %float9.999990e-06_9354, %int1_9355 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_9356 = torch.constant.float 9.9999997473787516E-6
    %int1_9357 = torch.constant.int 1
    %11748 = torch.aten.add.Scalar %11740, %float9.999990e-06_9356, %int1_9357 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11749 = torch.aten.rsqrt %11741 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11750 = torch.aten.rsqrt %11742 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11751 = torch.aten.rsqrt %11743 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11752 = torch.aten.rsqrt %11744 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11753 = torch.aten.rsqrt %11745 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11754 = torch.aten.rsqrt %11746 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11755 = torch.aten.rsqrt %11747 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11756 = torch.aten.rsqrt %11748 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %11756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %11757 = torch.aten.mul.Tensor %11709, %11749 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11758 = torch.aten.mul.Tensor %11710, %11750 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11759 = torch.aten.mul.Tensor %11711, %11751 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11760 = torch.aten.mul.Tensor %11712, %11752 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11761 = torch.aten.mul.Tensor %11713, %11753 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11762 = torch.aten.mul.Tensor %11714, %11754 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11763 = torch.aten.mul.Tensor %11715, %11755 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11764 = torch.aten.mul.Tensor %11716, %11756 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11765 = torch.aten.mul.Tensor %368, %11757 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11766 = torch.aten.mul.Tensor %369, %11758 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11767 = torch.aten.mul.Tensor %370, %11759 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11768 = torch.aten.mul.Tensor %371, %11760 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11769 = torch.aten.mul.Tensor %372, %11761 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11770 = torch.aten.mul.Tensor %373, %11762 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11771 = torch.aten.mul.Tensor %374, %11763 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %11772 = torch.aten.mul.Tensor %375, %11764 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %11772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_9358 = torch.constant.int 5
    %11773 = torch.prims.convert_element_type %11765, %int5_9358 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9359 = torch.constant.int 5
    %11774 = torch.prims.convert_element_type %11766, %int5_9359 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9360 = torch.constant.int 5
    %11775 = torch.prims.convert_element_type %11767, %int5_9360 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9361 = torch.constant.int 5
    %11776 = torch.prims.convert_element_type %11768, %int5_9361 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9362 = torch.constant.int 5
    %11777 = torch.prims.convert_element_type %11769, %int5_9362 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9363 = torch.constant.int 5
    %11778 = torch.prims.convert_element_type %11770, %int5_9363 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9364 = torch.constant.int 5
    %11779 = torch.prims.convert_element_type %11771, %int5_9364 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_9365 = torch.constant.int 5
    %11780 = torch.prims.convert_element_type %11772, %int5_9365 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %11780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_9366 = torch.constant.int 1
    %int0_9367 = torch.constant.int 0
    %11781 = torch.prim.ListConstruct %int1_9366, %int0_9367 : (!torch.int, !torch.int) -> !torch.list<int>
    %11782 = torch.aten.permute %376, %11781 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9368 = torch.constant.int 1
    %int0_9369 = torch.constant.int 0
    %11783 = torch.prim.ListConstruct %int1_9368, %int0_9369 : (!torch.int, !torch.int) -> !torch.list<int>
    %11784 = torch.aten.permute %377, %11783 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9370 = torch.constant.int 1
    %int0_9371 = torch.constant.int 0
    %11785 = torch.prim.ListConstruct %int1_9370, %int0_9371 : (!torch.int, !torch.int) -> !torch.list<int>
    %11786 = torch.aten.permute %378, %11785 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9372 = torch.constant.int 1
    %int0_9373 = torch.constant.int 0
    %11787 = torch.prim.ListConstruct %int1_9372, %int0_9373 : (!torch.int, !torch.int) -> !torch.list<int>
    %11788 = torch.aten.permute %379, %11787 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9374 = torch.constant.int 1
    %int0_9375 = torch.constant.int 0
    %11789 = torch.prim.ListConstruct %int1_9374, %int0_9375 : (!torch.int, !torch.int) -> !torch.list<int>
    %11790 = torch.aten.permute %380, %11789 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9376 = torch.constant.int 1
    %int0_9377 = torch.constant.int 0
    %11791 = torch.prim.ListConstruct %int1_9376, %int0_9377 : (!torch.int, !torch.int) -> !torch.list<int>
    %11792 = torch.aten.permute %381, %11791 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9378 = torch.constant.int 1
    %int0_9379 = torch.constant.int 0
    %11793 = torch.prim.ListConstruct %int1_9378, %int0_9379 : (!torch.int, !torch.int) -> !torch.list<int>
    %11794 = torch.aten.permute %382, %11793 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_9380 = torch.constant.int 1
    %int0_9381 = torch.constant.int 0
    %11795 = torch.prim.ListConstruct %int1_9380, %int0_9381 : (!torch.int, !torch.int) -> !torch.list<int>
    %11796 = torch.aten.permute %383, %11795 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_9382 = torch.constant.int 4
    %11797 = torch.aten.mul.int %int4_9382, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9383 = torch.constant.int 4096
    %11798 = torch.prim.ListConstruct %11797, %int4096_9383 : (!torch.int, !torch.int) -> !torch.list<int>
    %11799 = torch.aten.view %11773, %11798 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11799, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11800 = torch.aten.mm %11799, %11782 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11800, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9384 = torch.constant.int 4
    %int512_9385 = torch.constant.int 512
    %11801 = torch.prim.ListConstruct %int4_9384, %2482, %int512_9385 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11802 = torch.aten.view %11800, %11801 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9386 = torch.constant.int 4
    %11803 = torch.aten.mul.int %int4_9386, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9387 = torch.constant.int 4096
    %11804 = torch.prim.ListConstruct %11803, %int4096_9387 : (!torch.int, !torch.int) -> !torch.list<int>
    %11805 = torch.aten.view %11774, %11804 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11805, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11806 = torch.aten.mm %11805, %11784 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11806, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9388 = torch.constant.int 4
    %int512_9389 = torch.constant.int 512
    %11807 = torch.prim.ListConstruct %int4_9388, %2482, %int512_9389 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11808 = torch.aten.view %11806, %11807 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9390 = torch.constant.int 4
    %11809 = torch.aten.mul.int %int4_9390, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9391 = torch.constant.int 4096
    %11810 = torch.prim.ListConstruct %11809, %int4096_9391 : (!torch.int, !torch.int) -> !torch.list<int>
    %11811 = torch.aten.view %11775, %11810 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11811, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11812 = torch.aten.mm %11811, %11786 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11812, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9392 = torch.constant.int 4
    %int512_9393 = torch.constant.int 512
    %11813 = torch.prim.ListConstruct %int4_9392, %2482, %int512_9393 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11814 = torch.aten.view %11812, %11813 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9394 = torch.constant.int 4
    %11815 = torch.aten.mul.int %int4_9394, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9395 = torch.constant.int 4096
    %11816 = torch.prim.ListConstruct %11815, %int4096_9395 : (!torch.int, !torch.int) -> !torch.list<int>
    %11817 = torch.aten.view %11776, %11816 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11817, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11818 = torch.aten.mm %11817, %11788 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11818, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9396 = torch.constant.int 4
    %int512_9397 = torch.constant.int 512
    %11819 = torch.prim.ListConstruct %int4_9396, %2482, %int512_9397 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11820 = torch.aten.view %11818, %11819 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9398 = torch.constant.int 4
    %11821 = torch.aten.mul.int %int4_9398, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9399 = torch.constant.int 4096
    %11822 = torch.prim.ListConstruct %11821, %int4096_9399 : (!torch.int, !torch.int) -> !torch.list<int>
    %11823 = torch.aten.view %11777, %11822 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11823, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11824 = torch.aten.mm %11823, %11790 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11824, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9400 = torch.constant.int 4
    %int512_9401 = torch.constant.int 512
    %11825 = torch.prim.ListConstruct %int4_9400, %2482, %int512_9401 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11826 = torch.aten.view %11824, %11825 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9402 = torch.constant.int 4
    %11827 = torch.aten.mul.int %int4_9402, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9403 = torch.constant.int 4096
    %11828 = torch.prim.ListConstruct %11827, %int4096_9403 : (!torch.int, !torch.int) -> !torch.list<int>
    %11829 = torch.aten.view %11778, %11828 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11829, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11830 = torch.aten.mm %11829, %11792 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11830, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9404 = torch.constant.int 4
    %int512_9405 = torch.constant.int 512
    %11831 = torch.prim.ListConstruct %int4_9404, %2482, %int512_9405 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11832 = torch.aten.view %11830, %11831 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9406 = torch.constant.int 4
    %11833 = torch.aten.mul.int %int4_9406, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9407 = torch.constant.int 4096
    %11834 = torch.prim.ListConstruct %11833, %int4096_9407 : (!torch.int, !torch.int) -> !torch.list<int>
    %11835 = torch.aten.view %11779, %11834 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11835, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11836 = torch.aten.mm %11835, %11794 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11836, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9408 = torch.constant.int 4
    %int512_9409 = torch.constant.int 512
    %11837 = torch.prim.ListConstruct %int4_9408, %2482, %int512_9409 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11838 = torch.aten.view %11836, %11837 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_9410 = torch.constant.int 4
    %11839 = torch.aten.mul.int %int4_9410, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9411 = torch.constant.int 4096
    %11840 = torch.prim.ListConstruct %11839, %int4096_9411 : (!torch.int, !torch.int) -> !torch.list<int>
    %11841 = torch.aten.view %11780, %11840 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11841, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11842 = torch.aten.mm %11841, %11796 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %11842, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_9412 = torch.constant.int 4
    %int512_9413 = torch.constant.int 512
    %11843 = torch.prim.ListConstruct %int4_9412, %2482, %int512_9413 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11844 = torch.aten.view %11842, %11843 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %11844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_9414 = torch.constant.int 1
    %int0_9415 = torch.constant.int 0
    %11845 = torch.prim.ListConstruct %int1_9414, %int0_9415 : (!torch.int, !torch.int) -> !torch.list<int>
    %11846 = torch.aten.permute %384, %11845 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9416 = torch.constant.int 1
    %int0_9417 = torch.constant.int 0
    %11847 = torch.prim.ListConstruct %int1_9416, %int0_9417 : (!torch.int, !torch.int) -> !torch.list<int>
    %11848 = torch.aten.permute %385, %11847 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9418 = torch.constant.int 1
    %int0_9419 = torch.constant.int 0
    %11849 = torch.prim.ListConstruct %int1_9418, %int0_9419 : (!torch.int, !torch.int) -> !torch.list<int>
    %11850 = torch.aten.permute %386, %11849 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9420 = torch.constant.int 1
    %int0_9421 = torch.constant.int 0
    %11851 = torch.prim.ListConstruct %int1_9420, %int0_9421 : (!torch.int, !torch.int) -> !torch.list<int>
    %11852 = torch.aten.permute %387, %11851 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9422 = torch.constant.int 1
    %int0_9423 = torch.constant.int 0
    %11853 = torch.prim.ListConstruct %int1_9422, %int0_9423 : (!torch.int, !torch.int) -> !torch.list<int>
    %11854 = torch.aten.permute %388, %11853 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9424 = torch.constant.int 1
    %int0_9425 = torch.constant.int 0
    %11855 = torch.prim.ListConstruct %int1_9424, %int0_9425 : (!torch.int, !torch.int) -> !torch.list<int>
    %11856 = torch.aten.permute %389, %11855 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9426 = torch.constant.int 1
    %int0_9427 = torch.constant.int 0
    %11857 = torch.prim.ListConstruct %int1_9426, %int0_9427 : (!torch.int, !torch.int) -> !torch.list<int>
    %11858 = torch.aten.permute %390, %11857 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9428 = torch.constant.int 1
    %int0_9429 = torch.constant.int 0
    %11859 = torch.prim.ListConstruct %int1_9428, %int0_9429 : (!torch.int, !torch.int) -> !torch.list<int>
    %11860 = torch.aten.permute %391, %11859 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_9430 = torch.constant.int 4
    %11861 = torch.aten.mul.int %int4_9430, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9431 = torch.constant.int 4096
    %11862 = torch.prim.ListConstruct %11861, %int4096_9431 : (!torch.int, !torch.int) -> !torch.list<int>
    %11863 = torch.aten.view %11773, %11862 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11863, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11864 = torch.aten.mm %11863, %11846 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11864, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9432 = torch.constant.int 4
    %int128_9433 = torch.constant.int 128
    %11865 = torch.prim.ListConstruct %int4_9432, %2482, %int128_9433 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11866 = torch.aten.view %11864, %11865 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9434 = torch.constant.int 4
    %11867 = torch.aten.mul.int %int4_9434, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9435 = torch.constant.int 4096
    %11868 = torch.prim.ListConstruct %11867, %int4096_9435 : (!torch.int, !torch.int) -> !torch.list<int>
    %11869 = torch.aten.view %11774, %11868 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11869, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11870 = torch.aten.mm %11869, %11848 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11870, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9436 = torch.constant.int 4
    %int128_9437 = torch.constant.int 128
    %11871 = torch.prim.ListConstruct %int4_9436, %2482, %int128_9437 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11872 = torch.aten.view %11870, %11871 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9438 = torch.constant.int 4
    %11873 = torch.aten.mul.int %int4_9438, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9439 = torch.constant.int 4096
    %11874 = torch.prim.ListConstruct %11873, %int4096_9439 : (!torch.int, !torch.int) -> !torch.list<int>
    %11875 = torch.aten.view %11775, %11874 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11875, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11876 = torch.aten.mm %11875, %11850 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11876, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9440 = torch.constant.int 4
    %int128_9441 = torch.constant.int 128
    %11877 = torch.prim.ListConstruct %int4_9440, %2482, %int128_9441 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11878 = torch.aten.view %11876, %11877 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9442 = torch.constant.int 4
    %11879 = torch.aten.mul.int %int4_9442, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9443 = torch.constant.int 4096
    %11880 = torch.prim.ListConstruct %11879, %int4096_9443 : (!torch.int, !torch.int) -> !torch.list<int>
    %11881 = torch.aten.view %11776, %11880 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11881, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11882 = torch.aten.mm %11881, %11852 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11882, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9444 = torch.constant.int 4
    %int128_9445 = torch.constant.int 128
    %11883 = torch.prim.ListConstruct %int4_9444, %2482, %int128_9445 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11884 = torch.aten.view %11882, %11883 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9446 = torch.constant.int 4
    %11885 = torch.aten.mul.int %int4_9446, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9447 = torch.constant.int 4096
    %11886 = torch.prim.ListConstruct %11885, %int4096_9447 : (!torch.int, !torch.int) -> !torch.list<int>
    %11887 = torch.aten.view %11777, %11886 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11887, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11888 = torch.aten.mm %11887, %11854 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11888, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9448 = torch.constant.int 4
    %int128_9449 = torch.constant.int 128
    %11889 = torch.prim.ListConstruct %int4_9448, %2482, %int128_9449 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11890 = torch.aten.view %11888, %11889 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9450 = torch.constant.int 4
    %11891 = torch.aten.mul.int %int4_9450, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9451 = torch.constant.int 4096
    %11892 = torch.prim.ListConstruct %11891, %int4096_9451 : (!torch.int, !torch.int) -> !torch.list<int>
    %11893 = torch.aten.view %11778, %11892 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11893, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11894 = torch.aten.mm %11893, %11856 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11894, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9452 = torch.constant.int 4
    %int128_9453 = torch.constant.int 128
    %11895 = torch.prim.ListConstruct %int4_9452, %2482, %int128_9453 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11896 = torch.aten.view %11894, %11895 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9454 = torch.constant.int 4
    %11897 = torch.aten.mul.int %int4_9454, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9455 = torch.constant.int 4096
    %11898 = torch.prim.ListConstruct %11897, %int4096_9455 : (!torch.int, !torch.int) -> !torch.list<int>
    %11899 = torch.aten.view %11779, %11898 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11899, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11900 = torch.aten.mm %11899, %11858 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11900, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9456 = torch.constant.int 4
    %int128_9457 = torch.constant.int 128
    %11901 = torch.prim.ListConstruct %int4_9456, %2482, %int128_9457 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11902 = torch.aten.view %11900, %11901 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9458 = torch.constant.int 4
    %11903 = torch.aten.mul.int %int4_9458, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9459 = torch.constant.int 4096
    %11904 = torch.prim.ListConstruct %11903, %int4096_9459 : (!torch.int, !torch.int) -> !torch.list<int>
    %11905 = torch.aten.view %11780, %11904 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11905, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11906 = torch.aten.mm %11905, %11860 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11906, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9460 = torch.constant.int 4
    %int128_9461 = torch.constant.int 128
    %11907 = torch.prim.ListConstruct %int4_9460, %2482, %int128_9461 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11908 = torch.aten.view %11906, %11907 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_9462 = torch.constant.int 1
    %int0_9463 = torch.constant.int 0
    %11909 = torch.prim.ListConstruct %int1_9462, %int0_9463 : (!torch.int, !torch.int) -> !torch.list<int>
    %11910 = torch.aten.permute %392, %11909 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9464 = torch.constant.int 1
    %int0_9465 = torch.constant.int 0
    %11911 = torch.prim.ListConstruct %int1_9464, %int0_9465 : (!torch.int, !torch.int) -> !torch.list<int>
    %11912 = torch.aten.permute %393, %11911 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9466 = torch.constant.int 1
    %int0_9467 = torch.constant.int 0
    %11913 = torch.prim.ListConstruct %int1_9466, %int0_9467 : (!torch.int, !torch.int) -> !torch.list<int>
    %11914 = torch.aten.permute %394, %11913 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9468 = torch.constant.int 1
    %int0_9469 = torch.constant.int 0
    %11915 = torch.prim.ListConstruct %int1_9468, %int0_9469 : (!torch.int, !torch.int) -> !torch.list<int>
    %11916 = torch.aten.permute %395, %11915 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9470 = torch.constant.int 1
    %int0_9471 = torch.constant.int 0
    %11917 = torch.prim.ListConstruct %int1_9470, %int0_9471 : (!torch.int, !torch.int) -> !torch.list<int>
    %11918 = torch.aten.permute %396, %11917 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9472 = torch.constant.int 1
    %int0_9473 = torch.constant.int 0
    %11919 = torch.prim.ListConstruct %int1_9472, %int0_9473 : (!torch.int, !torch.int) -> !torch.list<int>
    %11920 = torch.aten.permute %397, %11919 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9474 = torch.constant.int 1
    %int0_9475 = torch.constant.int 0
    %11921 = torch.prim.ListConstruct %int1_9474, %int0_9475 : (!torch.int, !torch.int) -> !torch.list<int>
    %11922 = torch.aten.permute %398, %11921 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_9476 = torch.constant.int 1
    %int0_9477 = torch.constant.int 0
    %11923 = torch.prim.ListConstruct %int1_9476, %int0_9477 : (!torch.int, !torch.int) -> !torch.list<int>
    %11924 = torch.aten.permute %399, %11923 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_9478 = torch.constant.int 4
    %11925 = torch.aten.mul.int %int4_9478, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9479 = torch.constant.int 4096
    %11926 = torch.prim.ListConstruct %11925, %int4096_9479 : (!torch.int, !torch.int) -> !torch.list<int>
    %11927 = torch.aten.view %11773, %11926 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11927, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11928 = torch.aten.mm %11927, %11910 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11928, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9480 = torch.constant.int 4
    %int128_9481 = torch.constant.int 128
    %11929 = torch.prim.ListConstruct %int4_9480, %2482, %int128_9481 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11930 = torch.aten.view %11928, %11929 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9482 = torch.constant.int 4
    %11931 = torch.aten.mul.int %int4_9482, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9483 = torch.constant.int 4096
    %11932 = torch.prim.ListConstruct %11931, %int4096_9483 : (!torch.int, !torch.int) -> !torch.list<int>
    %11933 = torch.aten.view %11774, %11932 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11933, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11934 = torch.aten.mm %11933, %11912 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11934, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9484 = torch.constant.int 4
    %int128_9485 = torch.constant.int 128
    %11935 = torch.prim.ListConstruct %int4_9484, %2482, %int128_9485 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11936 = torch.aten.view %11934, %11935 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9486 = torch.constant.int 4
    %11937 = torch.aten.mul.int %int4_9486, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9487 = torch.constant.int 4096
    %11938 = torch.prim.ListConstruct %11937, %int4096_9487 : (!torch.int, !torch.int) -> !torch.list<int>
    %11939 = torch.aten.view %11775, %11938 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11939, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11940 = torch.aten.mm %11939, %11914 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11940, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9488 = torch.constant.int 4
    %int128_9489 = torch.constant.int 128
    %11941 = torch.prim.ListConstruct %int4_9488, %2482, %int128_9489 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11942 = torch.aten.view %11940, %11941 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9490 = torch.constant.int 4
    %11943 = torch.aten.mul.int %int4_9490, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9491 = torch.constant.int 4096
    %11944 = torch.prim.ListConstruct %11943, %int4096_9491 : (!torch.int, !torch.int) -> !torch.list<int>
    %11945 = torch.aten.view %11776, %11944 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11945, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11946 = torch.aten.mm %11945, %11916 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11946, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9492 = torch.constant.int 4
    %int128_9493 = torch.constant.int 128
    %11947 = torch.prim.ListConstruct %int4_9492, %2482, %int128_9493 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11948 = torch.aten.view %11946, %11947 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9494 = torch.constant.int 4
    %11949 = torch.aten.mul.int %int4_9494, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9495 = torch.constant.int 4096
    %11950 = torch.prim.ListConstruct %11949, %int4096_9495 : (!torch.int, !torch.int) -> !torch.list<int>
    %11951 = torch.aten.view %11777, %11950 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11951, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11952 = torch.aten.mm %11951, %11918 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11952, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9496 = torch.constant.int 4
    %int128_9497 = torch.constant.int 128
    %11953 = torch.prim.ListConstruct %int4_9496, %2482, %int128_9497 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11954 = torch.aten.view %11952, %11953 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9498 = torch.constant.int 4
    %11955 = torch.aten.mul.int %int4_9498, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9499 = torch.constant.int 4096
    %11956 = torch.prim.ListConstruct %11955, %int4096_9499 : (!torch.int, !torch.int) -> !torch.list<int>
    %11957 = torch.aten.view %11778, %11956 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11957, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11958 = torch.aten.mm %11957, %11920 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11958, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9500 = torch.constant.int 4
    %int128_9501 = torch.constant.int 128
    %11959 = torch.prim.ListConstruct %int4_9500, %2482, %int128_9501 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11960 = torch.aten.view %11958, %11959 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9502 = torch.constant.int 4
    %11961 = torch.aten.mul.int %int4_9502, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9503 = torch.constant.int 4096
    %11962 = torch.prim.ListConstruct %11961, %int4096_9503 : (!torch.int, !torch.int) -> !torch.list<int>
    %11963 = torch.aten.view %11779, %11962 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11963, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11964 = torch.aten.mm %11963, %11922 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11964, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9504 = torch.constant.int 4
    %int128_9505 = torch.constant.int 128
    %11965 = torch.prim.ListConstruct %int4_9504, %2482, %int128_9505 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11966 = torch.aten.view %11964, %11965 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9506 = torch.constant.int 4
    %11967 = torch.aten.mul.int %int4_9506, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_9507 = torch.constant.int 4096
    %11968 = torch.prim.ListConstruct %11967, %int4096_9507 : (!torch.int, !torch.int) -> !torch.list<int>
    %11969 = torch.aten.view %11780, %11968 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %11969, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %11970 = torch.aten.mm %11969, %11924 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %11970, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_9508 = torch.constant.int 4
    %int128_9509 = torch.constant.int 128
    %11971 = torch.prim.ListConstruct %int4_9508, %2482, %int128_9509 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11972 = torch.aten.view %11970, %11971 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %11972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_9510 = torch.constant.int 4
    %int4_9511 = torch.constant.int 4
    %int128_9512 = torch.constant.int 128
    %11973 = torch.prim.ListConstruct %int4_9510, %2482, %int4_9511, %int128_9512 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11974 = torch.aten.view %11802, %11973 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9513 = torch.constant.int 4
    %int4_9514 = torch.constant.int 4
    %int128_9515 = torch.constant.int 128
    %11975 = torch.prim.ListConstruct %int4_9513, %2482, %int4_9514, %int128_9515 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11976 = torch.aten.view %11808, %11975 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9516 = torch.constant.int 4
    %int4_9517 = torch.constant.int 4
    %int128_9518 = torch.constant.int 128
    %11977 = torch.prim.ListConstruct %int4_9516, %2482, %int4_9517, %int128_9518 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11978 = torch.aten.view %11814, %11977 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9519 = torch.constant.int 4
    %int4_9520 = torch.constant.int 4
    %int128_9521 = torch.constant.int 128
    %11979 = torch.prim.ListConstruct %int4_9519, %2482, %int4_9520, %int128_9521 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11980 = torch.aten.view %11820, %11979 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9522 = torch.constant.int 4
    %int4_9523 = torch.constant.int 4
    %int128_9524 = torch.constant.int 128
    %11981 = torch.prim.ListConstruct %int4_9522, %2482, %int4_9523, %int128_9524 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11982 = torch.aten.view %11826, %11981 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9525 = torch.constant.int 4
    %int4_9526 = torch.constant.int 4
    %int128_9527 = torch.constant.int 128
    %11983 = torch.prim.ListConstruct %int4_9525, %2482, %int4_9526, %int128_9527 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11984 = torch.aten.view %11832, %11983 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9528 = torch.constant.int 4
    %int4_9529 = torch.constant.int 4
    %int128_9530 = torch.constant.int 128
    %11985 = torch.prim.ListConstruct %int4_9528, %2482, %int4_9529, %int128_9530 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11986 = torch.aten.view %11838, %11985 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9531 = torch.constant.int 4
    %int4_9532 = torch.constant.int 4
    %int128_9533 = torch.constant.int 128
    %11987 = torch.prim.ListConstruct %int4_9531, %2482, %int4_9532, %int128_9533 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11988 = torch.aten.view %11844, %11987 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %11988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_9534 = torch.constant.int 4
    %int1_9535 = torch.constant.int 1
    %int128_9536 = torch.constant.int 128
    %11989 = torch.prim.ListConstruct %int4_9534, %2482, %int1_9535, %int128_9536 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11990 = torch.aten.view %11866, %11989 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %11990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9537 = torch.constant.int 4
    %int1_9538 = torch.constant.int 1
    %int128_9539 = torch.constant.int 128
    %11991 = torch.prim.ListConstruct %int4_9537, %2482, %int1_9538, %int128_9539 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11992 = torch.aten.view %11872, %11991 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %11992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9540 = torch.constant.int 4
    %int1_9541 = torch.constant.int 1
    %int128_9542 = torch.constant.int 128
    %11993 = torch.prim.ListConstruct %int4_9540, %2482, %int1_9541, %int128_9542 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11994 = torch.aten.view %11878, %11993 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %11994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9543 = torch.constant.int 4
    %int1_9544 = torch.constant.int 1
    %int128_9545 = torch.constant.int 128
    %11995 = torch.prim.ListConstruct %int4_9543, %2482, %int1_9544, %int128_9545 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11996 = torch.aten.view %11884, %11995 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %11996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9546 = torch.constant.int 4
    %int1_9547 = torch.constant.int 1
    %int128_9548 = torch.constant.int 128
    %11997 = torch.prim.ListConstruct %int4_9546, %2482, %int1_9547, %int128_9548 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %11998 = torch.aten.view %11890, %11997 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %11998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9549 = torch.constant.int 4
    %int1_9550 = torch.constant.int 1
    %int128_9551 = torch.constant.int 128
    %11999 = torch.prim.ListConstruct %int4_9549, %2482, %int1_9550, %int128_9551 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12000 = torch.aten.view %11896, %11999 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9552 = torch.constant.int 4
    %int1_9553 = torch.constant.int 1
    %int128_9554 = torch.constant.int 128
    %12001 = torch.prim.ListConstruct %int4_9552, %2482, %int1_9553, %int128_9554 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12002 = torch.aten.view %11902, %12001 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9555 = torch.constant.int 4
    %int1_9556 = torch.constant.int 1
    %int128_9557 = torch.constant.int 128
    %12003 = torch.prim.ListConstruct %int4_9555, %2482, %int1_9556, %int128_9557 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12004 = torch.aten.view %11908, %12003 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9558 = torch.constant.int 4
    %int1_9559 = torch.constant.int 1
    %int128_9560 = torch.constant.int 128
    %12005 = torch.prim.ListConstruct %int4_9558, %2482, %int1_9559, %int128_9560 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12006 = torch.aten.view %11930, %12005 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9561 = torch.constant.int 4
    %int1_9562 = torch.constant.int 1
    %int128_9563 = torch.constant.int 128
    %12007 = torch.prim.ListConstruct %int4_9561, %2482, %int1_9562, %int128_9563 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12008 = torch.aten.view %11936, %12007 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9564 = torch.constant.int 4
    %int1_9565 = torch.constant.int 1
    %int128_9566 = torch.constant.int 128
    %12009 = torch.prim.ListConstruct %int4_9564, %2482, %int1_9565, %int128_9566 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12010 = torch.aten.view %11942, %12009 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9567 = torch.constant.int 4
    %int1_9568 = torch.constant.int 1
    %int128_9569 = torch.constant.int 128
    %12011 = torch.prim.ListConstruct %int4_9567, %2482, %int1_9568, %int128_9569 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12012 = torch.aten.view %11948, %12011 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9570 = torch.constant.int 4
    %int1_9571 = torch.constant.int 1
    %int128_9572 = torch.constant.int 128
    %12013 = torch.prim.ListConstruct %int4_9570, %2482, %int1_9571, %int128_9572 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12014 = torch.aten.view %11954, %12013 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9573 = torch.constant.int 4
    %int1_9574 = torch.constant.int 1
    %int128_9575 = torch.constant.int 128
    %12015 = torch.prim.ListConstruct %int4_9573, %2482, %int1_9574, %int128_9575 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12016 = torch.aten.view %11960, %12015 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9576 = torch.constant.int 4
    %int1_9577 = torch.constant.int 1
    %int128_9578 = torch.constant.int 128
    %12017 = torch.prim.ListConstruct %int4_9576, %2482, %int1_9577, %int128_9578 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12018 = torch.aten.view %11966, %12017 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_9579 = torch.constant.int 4
    %int1_9580 = torch.constant.int 1
    %int128_9581 = torch.constant.int 128
    %12019 = torch.prim.ListConstruct %int4_9579, %2482, %int1_9580, %int128_9581 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12020 = torch.aten.view %11972, %12019 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_9582 = torch.constant.int 131072
    %none_9583 = torch.constant.none
    %none_9584 = torch.constant.none
    %cpu_9585 = torch.constant.device "cpu"
    %false_9586 = torch.constant.bool false
    %12021 = torch.aten.arange %int131072_9582, %none_9583, %none_9584, %cpu_9585, %false_9586 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_9587 = torch.constant.int 0
    %int128_9588 = torch.constant.int 128
    %int2_9589 = torch.constant.int 2
    %none_9590 = torch.constant.none
    %none_9591 = torch.constant.none
    %cpu_9592 = torch.constant.device "cpu"
    %false_9593 = torch.constant.bool false
    %12022 = torch.aten.arange.start_step %int0_9587, %int128_9588, %int2_9589, %none_9590, %none_9591, %cpu_9592, %false_9593 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_9594 = torch.constant.int 0
    %int0_9595 = torch.constant.int 0
    %int64_9596 = torch.constant.int 64
    %int1_9597 = torch.constant.int 1
    %12023 = torch.aten.slice.Tensor %12022, %int0_9594, %int0_9595, %int64_9596, %int1_9597 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_9598 = torch.constant.int 6
    %12024 = torch.prims.convert_element_type %12023, %int6_9598 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_9599 = torch.constant.int 128
    %12025 = torch.aten.div.Scalar %12024, %int128_9599 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_9600 = torch.constant.float 5.000000e+05
    %12026 = torch.aten.pow.Scalar %float5.000000e05_9600, %12025 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %12027 = torch.aten.reciprocal %12026 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_9601 = torch.constant.float 1.000000e+00
    %12028 = torch.aten.mul.Scalar %12027, %float1.000000e00_9601 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_9602 = torch.constant.int 131072
    %int1_9603 = torch.constant.int 1
    %12029 = torch.prim.ListConstruct %int131072_9602, %int1_9603 : (!torch.int, !torch.int) -> !torch.list<int>
    %12030 = torch.aten.view %12021, %12029 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %12031 = torch.aten.mul.Tensor %12030, %12028 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %12032 = torch.aten.cos %12031 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %12033 = torch.aten.sin %12031 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %12034 = torch.aten.complex %12032, %12033 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %12035 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12036 = flow.tensor.transfer %12035 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %12037 = torch_c.from_builtin_tensor %12036 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12038 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12039 = flow.tensor.transfer %12038 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %12040 = torch_c.from_builtin_tensor %12039 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12041 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12042 = flow.tensor.transfer %12041 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %12043 = torch_c.from_builtin_tensor %12042 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12044 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12045 = flow.tensor.transfer %12044 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %12046 = torch_c.from_builtin_tensor %12045 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12047 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12048 = flow.tensor.transfer %12047 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %12049 = torch_c.from_builtin_tensor %12048 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12050 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12051 = flow.tensor.transfer %12050 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %12052 = torch_c.from_builtin_tensor %12051 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12053 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12054 = flow.tensor.transfer %12053 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %12055 = torch_c.from_builtin_tensor %12054 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12056 = torch_c.to_builtin_tensor %12034 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12057 = flow.tensor.transfer %12056 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %12058 = torch_c.from_builtin_tensor %12057 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_9604 = torch.constant.int 1
    %12059 = torch.aten.size.int %11802, %int1_9604 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9605 = torch.constant.int 0
    %12060 = torch.aten.add.int %int0_9605, %12059 : !torch.int, !torch.int -> !torch.int
    %int0_9606 = torch.constant.int 0
    %int0_9607 = torch.constant.int 0
    %int1_9608 = torch.constant.int 1
    %12061 = torch.aten.slice.Tensor %12037, %int0_9606, %int0_9607, %12060, %int1_9608 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12061, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9609 = torch.constant.int 1
    %int0_9610 = torch.constant.int 0
    %int9223372036854775807_9611 = torch.constant.int 9223372036854775807
    %int1_9612 = torch.constant.int 1
    %12062 = torch.aten.slice.Tensor %12061, %int1_9609, %int0_9610, %int9223372036854775807_9611, %int1_9612 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12062, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9613 = torch.constant.int 0
    %12063 = torch.aten.unsqueeze %12062, %int0_9613 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12063, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9614 = torch.constant.int 2
    %12064 = torch.aten.unsqueeze %12063, %int2_9614 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12064, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9615 = torch.constant.int 3
    %int0_9616 = torch.constant.int 0
    %int9223372036854775807_9617 = torch.constant.int 9223372036854775807
    %int1_9618 = torch.constant.int 1
    %12065 = torch.aten.slice.Tensor %12064, %int3_9615, %int0_9616, %int9223372036854775807_9617, %int1_9618 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12065, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12066 = torch_c.to_builtin_tensor %11974 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9619 = arith.constant 1 : index
    %dim_9620 = tensor.dim %12066, %c1_9619 : tensor<4x?x4x128xf16>
    %12067 = flow.tensor.bitcast %12066 : tensor<4x?x4x128xf16>{%dim_9620} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9620}
    %12068 = torch_c.from_builtin_tensor %12067 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12069 = torch.aten.mul.Tensor %12068, %12065 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12070 = torch_c.to_builtin_tensor %12069 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9621 = arith.constant 1 : index
    %dim_9622 = tensor.dim %12070, %c1_9621 : tensor<4x?x4x64xcomplex<f32>>
    %12071 = flow.tensor.bitcast %12070 : tensor<4x?x4x64xcomplex<f32>>{%dim_9622} -> tensor<4x?x4x128xf32>{%dim_9622}
    %12072 = torch_c.from_builtin_tensor %12071 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9623 = torch.constant.int 5
    %12073 = torch.prims.convert_element_type %12072, %int5_9623 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9624 = torch.constant.int 1
    %12074 = torch.aten.size.int %11808, %int1_9624 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9625 = torch.constant.int 0
    %12075 = torch.aten.add.int %int0_9625, %12074 : !torch.int, !torch.int -> !torch.int
    %int0_9626 = torch.constant.int 0
    %int0_9627 = torch.constant.int 0
    %int1_9628 = torch.constant.int 1
    %12076 = torch.aten.slice.Tensor %12040, %int0_9626, %int0_9627, %12075, %int1_9628 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12076, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9629 = torch.constant.int 1
    %int0_9630 = torch.constant.int 0
    %int9223372036854775807_9631 = torch.constant.int 9223372036854775807
    %int1_9632 = torch.constant.int 1
    %12077 = torch.aten.slice.Tensor %12076, %int1_9629, %int0_9630, %int9223372036854775807_9631, %int1_9632 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12077, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9633 = torch.constant.int 0
    %12078 = torch.aten.unsqueeze %12077, %int0_9633 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12078, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9634 = torch.constant.int 2
    %12079 = torch.aten.unsqueeze %12078, %int2_9634 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12079, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9635 = torch.constant.int 3
    %int0_9636 = torch.constant.int 0
    %int9223372036854775807_9637 = torch.constant.int 9223372036854775807
    %int1_9638 = torch.constant.int 1
    %12080 = torch.aten.slice.Tensor %12079, %int3_9635, %int0_9636, %int9223372036854775807_9637, %int1_9638 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12080, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12081 = torch_c.to_builtin_tensor %11976 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9639 = arith.constant 1 : index
    %dim_9640 = tensor.dim %12081, %c1_9639 : tensor<4x?x4x128xf16>
    %12082 = flow.tensor.bitcast %12081 : tensor<4x?x4x128xf16>{%dim_9640} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9640}
    %12083 = torch_c.from_builtin_tensor %12082 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12084 = torch.aten.mul.Tensor %12083, %12080 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12085 = torch_c.to_builtin_tensor %12084 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9641 = arith.constant 1 : index
    %dim_9642 = tensor.dim %12085, %c1_9641 : tensor<4x?x4x64xcomplex<f32>>
    %12086 = flow.tensor.bitcast %12085 : tensor<4x?x4x64xcomplex<f32>>{%dim_9642} -> tensor<4x?x4x128xf32>{%dim_9642}
    %12087 = torch_c.from_builtin_tensor %12086 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9643 = torch.constant.int 5
    %12088 = torch.prims.convert_element_type %12087, %int5_9643 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9644 = torch.constant.int 1
    %12089 = torch.aten.size.int %11814, %int1_9644 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9645 = torch.constant.int 0
    %12090 = torch.aten.add.int %int0_9645, %12089 : !torch.int, !torch.int -> !torch.int
    %int0_9646 = torch.constant.int 0
    %int0_9647 = torch.constant.int 0
    %int1_9648 = torch.constant.int 1
    %12091 = torch.aten.slice.Tensor %12043, %int0_9646, %int0_9647, %12090, %int1_9648 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12091, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9649 = torch.constant.int 1
    %int0_9650 = torch.constant.int 0
    %int9223372036854775807_9651 = torch.constant.int 9223372036854775807
    %int1_9652 = torch.constant.int 1
    %12092 = torch.aten.slice.Tensor %12091, %int1_9649, %int0_9650, %int9223372036854775807_9651, %int1_9652 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12092, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9653 = torch.constant.int 0
    %12093 = torch.aten.unsqueeze %12092, %int0_9653 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12093, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9654 = torch.constant.int 2
    %12094 = torch.aten.unsqueeze %12093, %int2_9654 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12094, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9655 = torch.constant.int 3
    %int0_9656 = torch.constant.int 0
    %int9223372036854775807_9657 = torch.constant.int 9223372036854775807
    %int1_9658 = torch.constant.int 1
    %12095 = torch.aten.slice.Tensor %12094, %int3_9655, %int0_9656, %int9223372036854775807_9657, %int1_9658 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12095, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12096 = torch_c.to_builtin_tensor %11978 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9659 = arith.constant 1 : index
    %dim_9660 = tensor.dim %12096, %c1_9659 : tensor<4x?x4x128xf16>
    %12097 = flow.tensor.bitcast %12096 : tensor<4x?x4x128xf16>{%dim_9660} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9660}
    %12098 = torch_c.from_builtin_tensor %12097 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12099 = torch.aten.mul.Tensor %12098, %12095 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12100 = torch_c.to_builtin_tensor %12099 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9661 = arith.constant 1 : index
    %dim_9662 = tensor.dim %12100, %c1_9661 : tensor<4x?x4x64xcomplex<f32>>
    %12101 = flow.tensor.bitcast %12100 : tensor<4x?x4x64xcomplex<f32>>{%dim_9662} -> tensor<4x?x4x128xf32>{%dim_9662}
    %12102 = torch_c.from_builtin_tensor %12101 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9663 = torch.constant.int 5
    %12103 = torch.prims.convert_element_type %12102, %int5_9663 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9664 = torch.constant.int 1
    %12104 = torch.aten.size.int %11820, %int1_9664 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9665 = torch.constant.int 0
    %12105 = torch.aten.add.int %int0_9665, %12104 : !torch.int, !torch.int -> !torch.int
    %int0_9666 = torch.constant.int 0
    %int0_9667 = torch.constant.int 0
    %int1_9668 = torch.constant.int 1
    %12106 = torch.aten.slice.Tensor %12046, %int0_9666, %int0_9667, %12105, %int1_9668 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12106, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9669 = torch.constant.int 1
    %int0_9670 = torch.constant.int 0
    %int9223372036854775807_9671 = torch.constant.int 9223372036854775807
    %int1_9672 = torch.constant.int 1
    %12107 = torch.aten.slice.Tensor %12106, %int1_9669, %int0_9670, %int9223372036854775807_9671, %int1_9672 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12107, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9673 = torch.constant.int 0
    %12108 = torch.aten.unsqueeze %12107, %int0_9673 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12108, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9674 = torch.constant.int 2
    %12109 = torch.aten.unsqueeze %12108, %int2_9674 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12109, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9675 = torch.constant.int 3
    %int0_9676 = torch.constant.int 0
    %int9223372036854775807_9677 = torch.constant.int 9223372036854775807
    %int1_9678 = torch.constant.int 1
    %12110 = torch.aten.slice.Tensor %12109, %int3_9675, %int0_9676, %int9223372036854775807_9677, %int1_9678 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12110, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12111 = torch_c.to_builtin_tensor %11980 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9679 = arith.constant 1 : index
    %dim_9680 = tensor.dim %12111, %c1_9679 : tensor<4x?x4x128xf16>
    %12112 = flow.tensor.bitcast %12111 : tensor<4x?x4x128xf16>{%dim_9680} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9680}
    %12113 = torch_c.from_builtin_tensor %12112 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12114 = torch.aten.mul.Tensor %12113, %12110 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12115 = torch_c.to_builtin_tensor %12114 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9681 = arith.constant 1 : index
    %dim_9682 = tensor.dim %12115, %c1_9681 : tensor<4x?x4x64xcomplex<f32>>
    %12116 = flow.tensor.bitcast %12115 : tensor<4x?x4x64xcomplex<f32>>{%dim_9682} -> tensor<4x?x4x128xf32>{%dim_9682}
    %12117 = torch_c.from_builtin_tensor %12116 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9683 = torch.constant.int 5
    %12118 = torch.prims.convert_element_type %12117, %int5_9683 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9684 = torch.constant.int 1
    %12119 = torch.aten.size.int %11826, %int1_9684 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9685 = torch.constant.int 0
    %12120 = torch.aten.add.int %int0_9685, %12119 : !torch.int, !torch.int -> !torch.int
    %int0_9686 = torch.constant.int 0
    %int0_9687 = torch.constant.int 0
    %int1_9688 = torch.constant.int 1
    %12121 = torch.aten.slice.Tensor %12049, %int0_9686, %int0_9687, %12120, %int1_9688 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12121, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9689 = torch.constant.int 1
    %int0_9690 = torch.constant.int 0
    %int9223372036854775807_9691 = torch.constant.int 9223372036854775807
    %int1_9692 = torch.constant.int 1
    %12122 = torch.aten.slice.Tensor %12121, %int1_9689, %int0_9690, %int9223372036854775807_9691, %int1_9692 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12122, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9693 = torch.constant.int 0
    %12123 = torch.aten.unsqueeze %12122, %int0_9693 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12123, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9694 = torch.constant.int 2
    %12124 = torch.aten.unsqueeze %12123, %int2_9694 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12124, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9695 = torch.constant.int 3
    %int0_9696 = torch.constant.int 0
    %int9223372036854775807_9697 = torch.constant.int 9223372036854775807
    %int1_9698 = torch.constant.int 1
    %12125 = torch.aten.slice.Tensor %12124, %int3_9695, %int0_9696, %int9223372036854775807_9697, %int1_9698 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12125, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12126 = torch_c.to_builtin_tensor %11982 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9699 = arith.constant 1 : index
    %dim_9700 = tensor.dim %12126, %c1_9699 : tensor<4x?x4x128xf16>
    %12127 = flow.tensor.bitcast %12126 : tensor<4x?x4x128xf16>{%dim_9700} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9700}
    %12128 = torch_c.from_builtin_tensor %12127 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12129 = torch.aten.mul.Tensor %12128, %12125 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12130 = torch_c.to_builtin_tensor %12129 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9701 = arith.constant 1 : index
    %dim_9702 = tensor.dim %12130, %c1_9701 : tensor<4x?x4x64xcomplex<f32>>
    %12131 = flow.tensor.bitcast %12130 : tensor<4x?x4x64xcomplex<f32>>{%dim_9702} -> tensor<4x?x4x128xf32>{%dim_9702}
    %12132 = torch_c.from_builtin_tensor %12131 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9703 = torch.constant.int 5
    %12133 = torch.prims.convert_element_type %12132, %int5_9703 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9704 = torch.constant.int 1
    %12134 = torch.aten.size.int %11832, %int1_9704 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9705 = torch.constant.int 0
    %12135 = torch.aten.add.int %int0_9705, %12134 : !torch.int, !torch.int -> !torch.int
    %int0_9706 = torch.constant.int 0
    %int0_9707 = torch.constant.int 0
    %int1_9708 = torch.constant.int 1
    %12136 = torch.aten.slice.Tensor %12052, %int0_9706, %int0_9707, %12135, %int1_9708 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12136, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9709 = torch.constant.int 1
    %int0_9710 = torch.constant.int 0
    %int9223372036854775807_9711 = torch.constant.int 9223372036854775807
    %int1_9712 = torch.constant.int 1
    %12137 = torch.aten.slice.Tensor %12136, %int1_9709, %int0_9710, %int9223372036854775807_9711, %int1_9712 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12137, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9713 = torch.constant.int 0
    %12138 = torch.aten.unsqueeze %12137, %int0_9713 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12138, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9714 = torch.constant.int 2
    %12139 = torch.aten.unsqueeze %12138, %int2_9714 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12139, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9715 = torch.constant.int 3
    %int0_9716 = torch.constant.int 0
    %int9223372036854775807_9717 = torch.constant.int 9223372036854775807
    %int1_9718 = torch.constant.int 1
    %12140 = torch.aten.slice.Tensor %12139, %int3_9715, %int0_9716, %int9223372036854775807_9717, %int1_9718 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12140, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12141 = torch_c.to_builtin_tensor %11984 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9719 = arith.constant 1 : index
    %dim_9720 = tensor.dim %12141, %c1_9719 : tensor<4x?x4x128xf16>
    %12142 = flow.tensor.bitcast %12141 : tensor<4x?x4x128xf16>{%dim_9720} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9720}
    %12143 = torch_c.from_builtin_tensor %12142 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12144 = torch.aten.mul.Tensor %12143, %12140 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12145 = torch_c.to_builtin_tensor %12144 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9721 = arith.constant 1 : index
    %dim_9722 = tensor.dim %12145, %c1_9721 : tensor<4x?x4x64xcomplex<f32>>
    %12146 = flow.tensor.bitcast %12145 : tensor<4x?x4x64xcomplex<f32>>{%dim_9722} -> tensor<4x?x4x128xf32>{%dim_9722}
    %12147 = torch_c.from_builtin_tensor %12146 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9723 = torch.constant.int 5
    %12148 = torch.prims.convert_element_type %12147, %int5_9723 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9724 = torch.constant.int 1
    %12149 = torch.aten.size.int %11838, %int1_9724 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9725 = torch.constant.int 0
    %12150 = torch.aten.add.int %int0_9725, %12149 : !torch.int, !torch.int -> !torch.int
    %int0_9726 = torch.constant.int 0
    %int0_9727 = torch.constant.int 0
    %int1_9728 = torch.constant.int 1
    %12151 = torch.aten.slice.Tensor %12055, %int0_9726, %int0_9727, %12150, %int1_9728 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12151, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9729 = torch.constant.int 1
    %int0_9730 = torch.constant.int 0
    %int9223372036854775807_9731 = torch.constant.int 9223372036854775807
    %int1_9732 = torch.constant.int 1
    %12152 = torch.aten.slice.Tensor %12151, %int1_9729, %int0_9730, %int9223372036854775807_9731, %int1_9732 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12152, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9733 = torch.constant.int 0
    %12153 = torch.aten.unsqueeze %12152, %int0_9733 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12153, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9734 = torch.constant.int 2
    %12154 = torch.aten.unsqueeze %12153, %int2_9734 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12154, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9735 = torch.constant.int 3
    %int0_9736 = torch.constant.int 0
    %int9223372036854775807_9737 = torch.constant.int 9223372036854775807
    %int1_9738 = torch.constant.int 1
    %12155 = torch.aten.slice.Tensor %12154, %int3_9735, %int0_9736, %int9223372036854775807_9737, %int1_9738 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12155, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12156 = torch_c.to_builtin_tensor %11986 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9739 = arith.constant 1 : index
    %dim_9740 = tensor.dim %12156, %c1_9739 : tensor<4x?x4x128xf16>
    %12157 = flow.tensor.bitcast %12156 : tensor<4x?x4x128xf16>{%dim_9740} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9740}
    %12158 = torch_c.from_builtin_tensor %12157 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12159 = torch.aten.mul.Tensor %12158, %12155 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12160 = torch_c.to_builtin_tensor %12159 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9741 = arith.constant 1 : index
    %dim_9742 = tensor.dim %12160, %c1_9741 : tensor<4x?x4x64xcomplex<f32>>
    %12161 = flow.tensor.bitcast %12160 : tensor<4x?x4x64xcomplex<f32>>{%dim_9742} -> tensor<4x?x4x128xf32>{%dim_9742}
    %12162 = torch_c.from_builtin_tensor %12161 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9743 = torch.constant.int 5
    %12163 = torch.prims.convert_element_type %12162, %int5_9743 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_9744 = torch.constant.int 1
    %12164 = torch.aten.size.int %11844, %int1_9744 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_9745 = torch.constant.int 0
    %12165 = torch.aten.add.int %int0_9745, %12164 : !torch.int, !torch.int -> !torch.int
    %int0_9746 = torch.constant.int 0
    %int0_9747 = torch.constant.int 0
    %int1_9748 = torch.constant.int 1
    %12166 = torch.aten.slice.Tensor %12058, %int0_9746, %int0_9747, %12165, %int1_9748 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12166, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9749 = torch.constant.int 1
    %int0_9750 = torch.constant.int 0
    %int9223372036854775807_9751 = torch.constant.int 9223372036854775807
    %int1_9752 = torch.constant.int 1
    %12167 = torch.aten.slice.Tensor %12166, %int1_9749, %int0_9750, %int9223372036854775807_9751, %int1_9752 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12167, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9753 = torch.constant.int 0
    %12168 = torch.aten.unsqueeze %12167, %int0_9753 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12168, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9754 = torch.constant.int 2
    %12169 = torch.aten.unsqueeze %12168, %int2_9754 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12169, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9755 = torch.constant.int 3
    %int0_9756 = torch.constant.int 0
    %int9223372036854775807_9757 = torch.constant.int 9223372036854775807
    %int1_9758 = torch.constant.int 1
    %12170 = torch.aten.slice.Tensor %12169, %int3_9755, %int0_9756, %int9223372036854775807_9757, %int1_9758 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12170, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12171 = torch_c.to_builtin_tensor %11988 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_9759 = arith.constant 1 : index
    %dim_9760 = tensor.dim %12171, %c1_9759 : tensor<4x?x4x128xf16>
    %12172 = flow.tensor.bitcast %12171 : tensor<4x?x4x128xf16>{%dim_9760} -> tensor<4x?x4x64xcomplex<f16>>{%dim_9760}
    %12173 = torch_c.from_builtin_tensor %12172 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %12173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %12174 = torch.aten.mul.Tensor %12173, %12170 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %12174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %12175 = torch_c.to_builtin_tensor %12174 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_9761 = arith.constant 1 : index
    %dim_9762 = tensor.dim %12175, %c1_9761 : tensor<4x?x4x64xcomplex<f32>>
    %12176 = flow.tensor.bitcast %12175 : tensor<4x?x4x64xcomplex<f32>>{%dim_9762} -> tensor<4x?x4x128xf32>{%dim_9762}
    %12177 = torch_c.from_builtin_tensor %12176 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %12177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_9763 = torch.constant.int 5
    %12178 = torch.prims.convert_element_type %12177, %int5_9763 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_9764 = torch.constant.int 131072
    %none_9765 = torch.constant.none
    %none_9766 = torch.constant.none
    %cpu_9767 = torch.constant.device "cpu"
    %false_9768 = torch.constant.bool false
    %12179 = torch.aten.arange %int131072_9764, %none_9765, %none_9766, %cpu_9767, %false_9768 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_9769 = torch.constant.int 0
    %int128_9770 = torch.constant.int 128
    %int2_9771 = torch.constant.int 2
    %none_9772 = torch.constant.none
    %none_9773 = torch.constant.none
    %cpu_9774 = torch.constant.device "cpu"
    %false_9775 = torch.constant.bool false
    %12180 = torch.aten.arange.start_step %int0_9769, %int128_9770, %int2_9771, %none_9772, %none_9773, %cpu_9774, %false_9775 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_9776 = torch.constant.int 0
    %int0_9777 = torch.constant.int 0
    %int64_9778 = torch.constant.int 64
    %int1_9779 = torch.constant.int 1
    %12181 = torch.aten.slice.Tensor %12180, %int0_9776, %int0_9777, %int64_9778, %int1_9779 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_9780 = torch.constant.int 6
    %12182 = torch.prims.convert_element_type %12181, %int6_9780 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_9781 = torch.constant.int 128
    %12183 = torch.aten.div.Scalar %12182, %int128_9781 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_9782 = torch.constant.float 5.000000e+05
    %12184 = torch.aten.pow.Scalar %float5.000000e05_9782, %12183 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %12185 = torch.aten.reciprocal %12184 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_9783 = torch.constant.float 1.000000e+00
    %12186 = torch.aten.mul.Scalar %12185, %float1.000000e00_9783 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_9784 = torch.constant.int 131072
    %int1_9785 = torch.constant.int 1
    %12187 = torch.prim.ListConstruct %int131072_9784, %int1_9785 : (!torch.int, !torch.int) -> !torch.list<int>
    %12188 = torch.aten.view %12179, %12187 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %12189 = torch.aten.mul.Tensor %12188, %12186 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %12190 = torch.aten.cos %12189 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %12191 = torch.aten.sin %12189 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %12192 = torch.aten.complex %12190, %12191 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %12193 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12194 = flow.tensor.transfer %12193 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %12195 = torch_c.from_builtin_tensor %12194 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12196 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12197 = flow.tensor.transfer %12196 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %12198 = torch_c.from_builtin_tensor %12197 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12199 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12200 = flow.tensor.transfer %12199 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %12201 = torch_c.from_builtin_tensor %12200 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12202 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12203 = flow.tensor.transfer %12202 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %12204 = torch_c.from_builtin_tensor %12203 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12205 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12206 = flow.tensor.transfer %12205 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %12207 = torch_c.from_builtin_tensor %12206 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12208 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12209 = flow.tensor.transfer %12208 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %12210 = torch_c.from_builtin_tensor %12209 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12211 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12212 = flow.tensor.transfer %12211 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %12213 = torch_c.from_builtin_tensor %12212 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %12214 = torch_c.to_builtin_tensor %12192 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %12215 = flow.tensor.transfer %12214 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %12216 = torch_c.from_builtin_tensor %12215 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_9786 = torch.constant.int 1
    %12217 = torch.aten.size.int %11866, %int1_9786 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9787 = torch.constant.int 0
    %12218 = torch.aten.add.int %int0_9787, %12217 : !torch.int, !torch.int -> !torch.int
    %int0_9788 = torch.constant.int 0
    %int0_9789 = torch.constant.int 0
    %int1_9790 = torch.constant.int 1
    %12219 = torch.aten.slice.Tensor %12195, %int0_9788, %int0_9789, %12218, %int1_9790 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12219, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9791 = torch.constant.int 1
    %int0_9792 = torch.constant.int 0
    %int9223372036854775807_9793 = torch.constant.int 9223372036854775807
    %int1_9794 = torch.constant.int 1
    %12220 = torch.aten.slice.Tensor %12219, %int1_9791, %int0_9792, %int9223372036854775807_9793, %int1_9794 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12220, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9795 = torch.constant.int 0
    %12221 = torch.aten.unsqueeze %12220, %int0_9795 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12221, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9796 = torch.constant.int 2
    %12222 = torch.aten.unsqueeze %12221, %int2_9796 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12222, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9797 = torch.constant.int 3
    %int0_9798 = torch.constant.int 0
    %int9223372036854775807_9799 = torch.constant.int 9223372036854775807
    %int1_9800 = torch.constant.int 1
    %12223 = torch.aten.slice.Tensor %12222, %int3_9797, %int0_9798, %int9223372036854775807_9799, %int1_9800 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12223, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12224 = torch_c.to_builtin_tensor %11990 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9801 = arith.constant 1 : index
    %dim_9802 = tensor.dim %12224, %c1_9801 : tensor<4x?x1x128xf16>
    %12225 = flow.tensor.bitcast %12224 : tensor<4x?x1x128xf16>{%dim_9802} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9802}
    %12226 = torch_c.from_builtin_tensor %12225 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12227 = torch.aten.mul.Tensor %12226, %12223 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12228 = torch_c.to_builtin_tensor %12227 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9803 = arith.constant 1 : index
    %dim_9804 = tensor.dim %12228, %c1_9803 : tensor<4x?x1x64xcomplex<f32>>
    %12229 = flow.tensor.bitcast %12228 : tensor<4x?x1x64xcomplex<f32>>{%dim_9804} -> tensor<4x?x1x128xf32>{%dim_9804}
    %12230 = torch_c.from_builtin_tensor %12229 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9805 = torch.constant.int 5
    %12231 = torch.prims.convert_element_type %12230, %int5_9805 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9806 = torch.constant.int 1
    %12232 = torch.aten.size.int %11872, %int1_9806 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9807 = torch.constant.int 0
    %12233 = torch.aten.add.int %int0_9807, %12232 : !torch.int, !torch.int -> !torch.int
    %int0_9808 = torch.constant.int 0
    %int0_9809 = torch.constant.int 0
    %int1_9810 = torch.constant.int 1
    %12234 = torch.aten.slice.Tensor %12198, %int0_9808, %int0_9809, %12233, %int1_9810 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12234, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9811 = torch.constant.int 1
    %int0_9812 = torch.constant.int 0
    %int9223372036854775807_9813 = torch.constant.int 9223372036854775807
    %int1_9814 = torch.constant.int 1
    %12235 = torch.aten.slice.Tensor %12234, %int1_9811, %int0_9812, %int9223372036854775807_9813, %int1_9814 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12235, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9815 = torch.constant.int 0
    %12236 = torch.aten.unsqueeze %12235, %int0_9815 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12236, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9816 = torch.constant.int 2
    %12237 = torch.aten.unsqueeze %12236, %int2_9816 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12237, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9817 = torch.constant.int 3
    %int0_9818 = torch.constant.int 0
    %int9223372036854775807_9819 = torch.constant.int 9223372036854775807
    %int1_9820 = torch.constant.int 1
    %12238 = torch.aten.slice.Tensor %12237, %int3_9817, %int0_9818, %int9223372036854775807_9819, %int1_9820 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12238, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12239 = torch_c.to_builtin_tensor %11992 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9821 = arith.constant 1 : index
    %dim_9822 = tensor.dim %12239, %c1_9821 : tensor<4x?x1x128xf16>
    %12240 = flow.tensor.bitcast %12239 : tensor<4x?x1x128xf16>{%dim_9822} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9822}
    %12241 = torch_c.from_builtin_tensor %12240 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12242 = torch.aten.mul.Tensor %12241, %12238 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12243 = torch_c.to_builtin_tensor %12242 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9823 = arith.constant 1 : index
    %dim_9824 = tensor.dim %12243, %c1_9823 : tensor<4x?x1x64xcomplex<f32>>
    %12244 = flow.tensor.bitcast %12243 : tensor<4x?x1x64xcomplex<f32>>{%dim_9824} -> tensor<4x?x1x128xf32>{%dim_9824}
    %12245 = torch_c.from_builtin_tensor %12244 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9825 = torch.constant.int 5
    %12246 = torch.prims.convert_element_type %12245, %int5_9825 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9826 = torch.constant.int 1
    %12247 = torch.aten.size.int %11878, %int1_9826 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9827 = torch.constant.int 0
    %12248 = torch.aten.add.int %int0_9827, %12247 : !torch.int, !torch.int -> !torch.int
    %int0_9828 = torch.constant.int 0
    %int0_9829 = torch.constant.int 0
    %int1_9830 = torch.constant.int 1
    %12249 = torch.aten.slice.Tensor %12201, %int0_9828, %int0_9829, %12248, %int1_9830 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12249, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9831 = torch.constant.int 1
    %int0_9832 = torch.constant.int 0
    %int9223372036854775807_9833 = torch.constant.int 9223372036854775807
    %int1_9834 = torch.constant.int 1
    %12250 = torch.aten.slice.Tensor %12249, %int1_9831, %int0_9832, %int9223372036854775807_9833, %int1_9834 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12250, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9835 = torch.constant.int 0
    %12251 = torch.aten.unsqueeze %12250, %int0_9835 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12251, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9836 = torch.constant.int 2
    %12252 = torch.aten.unsqueeze %12251, %int2_9836 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12252, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9837 = torch.constant.int 3
    %int0_9838 = torch.constant.int 0
    %int9223372036854775807_9839 = torch.constant.int 9223372036854775807
    %int1_9840 = torch.constant.int 1
    %12253 = torch.aten.slice.Tensor %12252, %int3_9837, %int0_9838, %int9223372036854775807_9839, %int1_9840 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12253, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12254 = torch_c.to_builtin_tensor %11994 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9841 = arith.constant 1 : index
    %dim_9842 = tensor.dim %12254, %c1_9841 : tensor<4x?x1x128xf16>
    %12255 = flow.tensor.bitcast %12254 : tensor<4x?x1x128xf16>{%dim_9842} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9842}
    %12256 = torch_c.from_builtin_tensor %12255 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12257 = torch.aten.mul.Tensor %12256, %12253 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12258 = torch_c.to_builtin_tensor %12257 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9843 = arith.constant 1 : index
    %dim_9844 = tensor.dim %12258, %c1_9843 : tensor<4x?x1x64xcomplex<f32>>
    %12259 = flow.tensor.bitcast %12258 : tensor<4x?x1x64xcomplex<f32>>{%dim_9844} -> tensor<4x?x1x128xf32>{%dim_9844}
    %12260 = torch_c.from_builtin_tensor %12259 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9845 = torch.constant.int 5
    %12261 = torch.prims.convert_element_type %12260, %int5_9845 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9846 = torch.constant.int 1
    %12262 = torch.aten.size.int %11884, %int1_9846 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9847 = torch.constant.int 0
    %12263 = torch.aten.add.int %int0_9847, %12262 : !torch.int, !torch.int -> !torch.int
    %int0_9848 = torch.constant.int 0
    %int0_9849 = torch.constant.int 0
    %int1_9850 = torch.constant.int 1
    %12264 = torch.aten.slice.Tensor %12204, %int0_9848, %int0_9849, %12263, %int1_9850 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12264, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9851 = torch.constant.int 1
    %int0_9852 = torch.constant.int 0
    %int9223372036854775807_9853 = torch.constant.int 9223372036854775807
    %int1_9854 = torch.constant.int 1
    %12265 = torch.aten.slice.Tensor %12264, %int1_9851, %int0_9852, %int9223372036854775807_9853, %int1_9854 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12265, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9855 = torch.constant.int 0
    %12266 = torch.aten.unsqueeze %12265, %int0_9855 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12266, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9856 = torch.constant.int 2
    %12267 = torch.aten.unsqueeze %12266, %int2_9856 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12267, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9857 = torch.constant.int 3
    %int0_9858 = torch.constant.int 0
    %int9223372036854775807_9859 = torch.constant.int 9223372036854775807
    %int1_9860 = torch.constant.int 1
    %12268 = torch.aten.slice.Tensor %12267, %int3_9857, %int0_9858, %int9223372036854775807_9859, %int1_9860 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12268, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12269 = torch_c.to_builtin_tensor %11996 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9861 = arith.constant 1 : index
    %dim_9862 = tensor.dim %12269, %c1_9861 : tensor<4x?x1x128xf16>
    %12270 = flow.tensor.bitcast %12269 : tensor<4x?x1x128xf16>{%dim_9862} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9862}
    %12271 = torch_c.from_builtin_tensor %12270 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12272 = torch.aten.mul.Tensor %12271, %12268 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12273 = torch_c.to_builtin_tensor %12272 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9863 = arith.constant 1 : index
    %dim_9864 = tensor.dim %12273, %c1_9863 : tensor<4x?x1x64xcomplex<f32>>
    %12274 = flow.tensor.bitcast %12273 : tensor<4x?x1x64xcomplex<f32>>{%dim_9864} -> tensor<4x?x1x128xf32>{%dim_9864}
    %12275 = torch_c.from_builtin_tensor %12274 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9865 = torch.constant.int 5
    %12276 = torch.prims.convert_element_type %12275, %int5_9865 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9866 = torch.constant.int 1
    %12277 = torch.aten.size.int %11890, %int1_9866 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9867 = torch.constant.int 0
    %12278 = torch.aten.add.int %int0_9867, %12277 : !torch.int, !torch.int -> !torch.int
    %int0_9868 = torch.constant.int 0
    %int0_9869 = torch.constant.int 0
    %int1_9870 = torch.constant.int 1
    %12279 = torch.aten.slice.Tensor %12207, %int0_9868, %int0_9869, %12278, %int1_9870 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12279, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9871 = torch.constant.int 1
    %int0_9872 = torch.constant.int 0
    %int9223372036854775807_9873 = torch.constant.int 9223372036854775807
    %int1_9874 = torch.constant.int 1
    %12280 = torch.aten.slice.Tensor %12279, %int1_9871, %int0_9872, %int9223372036854775807_9873, %int1_9874 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12280, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9875 = torch.constant.int 0
    %12281 = torch.aten.unsqueeze %12280, %int0_9875 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12281, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9876 = torch.constant.int 2
    %12282 = torch.aten.unsqueeze %12281, %int2_9876 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12282, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9877 = torch.constant.int 3
    %int0_9878 = torch.constant.int 0
    %int9223372036854775807_9879 = torch.constant.int 9223372036854775807
    %int1_9880 = torch.constant.int 1
    %12283 = torch.aten.slice.Tensor %12282, %int3_9877, %int0_9878, %int9223372036854775807_9879, %int1_9880 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12283, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12284 = torch_c.to_builtin_tensor %11998 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9881 = arith.constant 1 : index
    %dim_9882 = tensor.dim %12284, %c1_9881 : tensor<4x?x1x128xf16>
    %12285 = flow.tensor.bitcast %12284 : tensor<4x?x1x128xf16>{%dim_9882} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9882}
    %12286 = torch_c.from_builtin_tensor %12285 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12287 = torch.aten.mul.Tensor %12286, %12283 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12288 = torch_c.to_builtin_tensor %12287 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9883 = arith.constant 1 : index
    %dim_9884 = tensor.dim %12288, %c1_9883 : tensor<4x?x1x64xcomplex<f32>>
    %12289 = flow.tensor.bitcast %12288 : tensor<4x?x1x64xcomplex<f32>>{%dim_9884} -> tensor<4x?x1x128xf32>{%dim_9884}
    %12290 = torch_c.from_builtin_tensor %12289 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9885 = torch.constant.int 5
    %12291 = torch.prims.convert_element_type %12290, %int5_9885 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9886 = torch.constant.int 1
    %12292 = torch.aten.size.int %11896, %int1_9886 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9887 = torch.constant.int 0
    %12293 = torch.aten.add.int %int0_9887, %12292 : !torch.int, !torch.int -> !torch.int
    %int0_9888 = torch.constant.int 0
    %int0_9889 = torch.constant.int 0
    %int1_9890 = torch.constant.int 1
    %12294 = torch.aten.slice.Tensor %12210, %int0_9888, %int0_9889, %12293, %int1_9890 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12294, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9891 = torch.constant.int 1
    %int0_9892 = torch.constant.int 0
    %int9223372036854775807_9893 = torch.constant.int 9223372036854775807
    %int1_9894 = torch.constant.int 1
    %12295 = torch.aten.slice.Tensor %12294, %int1_9891, %int0_9892, %int9223372036854775807_9893, %int1_9894 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12295, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9895 = torch.constant.int 0
    %12296 = torch.aten.unsqueeze %12295, %int0_9895 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12296, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9896 = torch.constant.int 2
    %12297 = torch.aten.unsqueeze %12296, %int2_9896 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12297, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9897 = torch.constant.int 3
    %int0_9898 = torch.constant.int 0
    %int9223372036854775807_9899 = torch.constant.int 9223372036854775807
    %int1_9900 = torch.constant.int 1
    %12298 = torch.aten.slice.Tensor %12297, %int3_9897, %int0_9898, %int9223372036854775807_9899, %int1_9900 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12298, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12299 = torch_c.to_builtin_tensor %12000 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9901 = arith.constant 1 : index
    %dim_9902 = tensor.dim %12299, %c1_9901 : tensor<4x?x1x128xf16>
    %12300 = flow.tensor.bitcast %12299 : tensor<4x?x1x128xf16>{%dim_9902} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9902}
    %12301 = torch_c.from_builtin_tensor %12300 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12302 = torch.aten.mul.Tensor %12301, %12298 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12303 = torch_c.to_builtin_tensor %12302 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9903 = arith.constant 1 : index
    %dim_9904 = tensor.dim %12303, %c1_9903 : tensor<4x?x1x64xcomplex<f32>>
    %12304 = flow.tensor.bitcast %12303 : tensor<4x?x1x64xcomplex<f32>>{%dim_9904} -> tensor<4x?x1x128xf32>{%dim_9904}
    %12305 = torch_c.from_builtin_tensor %12304 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9905 = torch.constant.int 5
    %12306 = torch.prims.convert_element_type %12305, %int5_9905 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9906 = torch.constant.int 1
    %12307 = torch.aten.size.int %11902, %int1_9906 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9907 = torch.constant.int 0
    %12308 = torch.aten.add.int %int0_9907, %12307 : !torch.int, !torch.int -> !torch.int
    %int0_9908 = torch.constant.int 0
    %int0_9909 = torch.constant.int 0
    %int1_9910 = torch.constant.int 1
    %12309 = torch.aten.slice.Tensor %12213, %int0_9908, %int0_9909, %12308, %int1_9910 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12309, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9911 = torch.constant.int 1
    %int0_9912 = torch.constant.int 0
    %int9223372036854775807_9913 = torch.constant.int 9223372036854775807
    %int1_9914 = torch.constant.int 1
    %12310 = torch.aten.slice.Tensor %12309, %int1_9911, %int0_9912, %int9223372036854775807_9913, %int1_9914 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12310, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9915 = torch.constant.int 0
    %12311 = torch.aten.unsqueeze %12310, %int0_9915 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12311, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9916 = torch.constant.int 2
    %12312 = torch.aten.unsqueeze %12311, %int2_9916 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12312, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9917 = torch.constant.int 3
    %int0_9918 = torch.constant.int 0
    %int9223372036854775807_9919 = torch.constant.int 9223372036854775807
    %int1_9920 = torch.constant.int 1
    %12313 = torch.aten.slice.Tensor %12312, %int3_9917, %int0_9918, %int9223372036854775807_9919, %int1_9920 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12313, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12314 = torch_c.to_builtin_tensor %12002 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9921 = arith.constant 1 : index
    %dim_9922 = tensor.dim %12314, %c1_9921 : tensor<4x?x1x128xf16>
    %12315 = flow.tensor.bitcast %12314 : tensor<4x?x1x128xf16>{%dim_9922} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9922}
    %12316 = torch_c.from_builtin_tensor %12315 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12317 = torch.aten.mul.Tensor %12316, %12313 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12318 = torch_c.to_builtin_tensor %12317 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9923 = arith.constant 1 : index
    %dim_9924 = tensor.dim %12318, %c1_9923 : tensor<4x?x1x64xcomplex<f32>>
    %12319 = flow.tensor.bitcast %12318 : tensor<4x?x1x64xcomplex<f32>>{%dim_9924} -> tensor<4x?x1x128xf32>{%dim_9924}
    %12320 = torch_c.from_builtin_tensor %12319 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9925 = torch.constant.int 5
    %12321 = torch.prims.convert_element_type %12320, %int5_9925 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_9926 = torch.constant.int 1
    %12322 = torch.aten.size.int %11908, %int1_9926 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_9927 = torch.constant.int 0
    %12323 = torch.aten.add.int %int0_9927, %12322 : !torch.int, !torch.int -> !torch.int
    %int0_9928 = torch.constant.int 0
    %int0_9929 = torch.constant.int 0
    %int1_9930 = torch.constant.int 1
    %12324 = torch.aten.slice.Tensor %12216, %int0_9928, %int0_9929, %12323, %int1_9930 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12324, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_9931 = torch.constant.int 1
    %int0_9932 = torch.constant.int 0
    %int9223372036854775807_9933 = torch.constant.int 9223372036854775807
    %int1_9934 = torch.constant.int 1
    %12325 = torch.aten.slice.Tensor %12324, %int1_9931, %int0_9932, %int9223372036854775807_9933, %int1_9934 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %12325, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_9935 = torch.constant.int 0
    %12326 = torch.aten.unsqueeze %12325, %int0_9935 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %12326, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_9936 = torch.constant.int 2
    %12327 = torch.aten.unsqueeze %12326, %int2_9936 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12327, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_9937 = torch.constant.int 3
    %int0_9938 = torch.constant.int 0
    %int9223372036854775807_9939 = torch.constant.int 9223372036854775807
    %int1_9940 = torch.constant.int 1
    %12328 = torch.aten.slice.Tensor %12327, %int3_9937, %int0_9938, %int9223372036854775807_9939, %int1_9940 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12328, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %12329 = torch_c.to_builtin_tensor %12004 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_9941 = arith.constant 1 : index
    %dim_9942 = tensor.dim %12329, %c1_9941 : tensor<4x?x1x128xf16>
    %12330 = flow.tensor.bitcast %12329 : tensor<4x?x1x128xf16>{%dim_9942} -> tensor<4x?x1x64xcomplex<f16>>{%dim_9942}
    %12331 = torch_c.from_builtin_tensor %12330 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %12331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %12332 = torch.aten.mul.Tensor %12331, %12328 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %12332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %12333 = torch_c.to_builtin_tensor %12332 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_9943 = arith.constant 1 : index
    %dim_9944 = tensor.dim %12333, %c1_9943 : tensor<4x?x1x64xcomplex<f32>>
    %12334 = flow.tensor.bitcast %12333 : tensor<4x?x1x64xcomplex<f32>>{%dim_9944} -> tensor<4x?x1x128xf32>{%dim_9944}
    %12335 = torch_c.from_builtin_tensor %12334 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %12335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_9945 = torch.constant.int 5
    %12336 = torch.prims.convert_element_type %12335, %int5_9945 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %12336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_9946 = torch.constant.int 64
    %12337 = torch.aten.mul.Scalar %2364, %int64_9946 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12337, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9947 = torch.constant.int 64
    %12338 = torch.aten.mul.Scalar %2367, %int64_9947 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12338, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9948 = torch.constant.int 64
    %12339 = torch.aten.mul.Scalar %2370, %int64_9948 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12339, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9949 = torch.constant.int 64
    %12340 = torch.aten.mul.Scalar %2373, %int64_9949 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12340, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9950 = torch.constant.int 64
    %12341 = torch.aten.mul.Scalar %2376, %int64_9950 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12341, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9951 = torch.constant.int 64
    %12342 = torch.aten.mul.Scalar %2379, %int64_9951 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12342, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9952 = torch.constant.int 64
    %12343 = torch.aten.mul.Scalar %2382, %int64_9952 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12343, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_9953 = torch.constant.int 64
    %12344 = torch.aten.mul.Scalar %2385, %int64_9953 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12344, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10 = torch.constant.int 10
    %int1_9954 = torch.constant.int 1
    %12345 = torch.aten.add.Scalar %12337, %int10, %int1_9954 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12345, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9955 = torch.constant.int 10
    %int1_9956 = torch.constant.int 1
    %12346 = torch.aten.add.Scalar %12338, %int10_9955, %int1_9956 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12346, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9957 = torch.constant.int 10
    %int1_9958 = torch.constant.int 1
    %12347 = torch.aten.add.Scalar %12339, %int10_9957, %int1_9958 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12347, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9959 = torch.constant.int 10
    %int1_9960 = torch.constant.int 1
    %12348 = torch.aten.add.Scalar %12340, %int10_9959, %int1_9960 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12348, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9961 = torch.constant.int 10
    %int1_9962 = torch.constant.int 1
    %12349 = torch.aten.add.Scalar %12341, %int10_9961, %int1_9962 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12349, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9963 = torch.constant.int 10
    %int1_9964 = torch.constant.int 1
    %12350 = torch.aten.add.Scalar %12342, %int10_9963, %int1_9964 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12350, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9965 = torch.constant.int 10
    %int1_9966 = torch.constant.int 1
    %12351 = torch.aten.add.Scalar %12343, %int10_9965, %int1_9966 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12351, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int10_9967 = torch.constant.int 10
    %int1_9968 = torch.constant.int 1
    %12352 = torch.aten.add.Scalar %12344, %int10_9967, %int1_9968 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12352, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_9969 = torch.constant.int 4
    %int16_9970 = torch.constant.int 16
    %int1_9971 = torch.constant.int 1
    %int128_9972 = torch.constant.int 128
    %12353 = torch.prim.ListConstruct %int4_9969, %3095, %int16_9970, %int1_9971, %int128_9972 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12354 = torch.aten.view %12231, %12353 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12354, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9973 = torch.constant.int 4
    %int16_9974 = torch.constant.int 16
    %int1_9975 = torch.constant.int 1
    %int128_9976 = torch.constant.int 128
    %12355 = torch.prim.ListConstruct %int4_9973, %3095, %int16_9974, %int1_9975, %int128_9976 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12356 = torch.aten.view %12246, %12355 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12356, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9977 = torch.constant.int 4
    %int16_9978 = torch.constant.int 16
    %int1_9979 = torch.constant.int 1
    %int128_9980 = torch.constant.int 128
    %12357 = torch.prim.ListConstruct %int4_9977, %3095, %int16_9978, %int1_9979, %int128_9980 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12358 = torch.aten.view %12261, %12357 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12358, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9981 = torch.constant.int 4
    %int16_9982 = torch.constant.int 16
    %int1_9983 = torch.constant.int 1
    %int128_9984 = torch.constant.int 128
    %12359 = torch.prim.ListConstruct %int4_9981, %3095, %int16_9982, %int1_9983, %int128_9984 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12360 = torch.aten.view %12276, %12359 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12360, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9985 = torch.constant.int 4
    %int16_9986 = torch.constant.int 16
    %int1_9987 = torch.constant.int 1
    %int128_9988 = torch.constant.int 128
    %12361 = torch.prim.ListConstruct %int4_9985, %3095, %int16_9986, %int1_9987, %int128_9988 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12362 = torch.aten.view %12291, %12361 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12362, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9989 = torch.constant.int 4
    %int16_9990 = torch.constant.int 16
    %int1_9991 = torch.constant.int 1
    %int128_9992 = torch.constant.int 128
    %12363 = torch.prim.ListConstruct %int4_9989, %3095, %int16_9990, %int1_9991, %int128_9992 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12364 = torch.aten.view %12306, %12363 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12364, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9993 = torch.constant.int 4
    %int16_9994 = torch.constant.int 16
    %int1_9995 = torch.constant.int 1
    %int128_9996 = torch.constant.int 128
    %12365 = torch.prim.ListConstruct %int4_9993, %3095, %int16_9994, %int1_9995, %int128_9996 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12366 = torch.aten.view %12321, %12365 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12366, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_9997 = torch.constant.int 4
    %int16_9998 = torch.constant.int 16
    %int1_9999 = torch.constant.int 1
    %int128_10000 = torch.constant.int 128
    %12367 = torch.prim.ListConstruct %int4_9997, %3095, %int16_9998, %int1_9999, %int128_10000 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12368 = torch.aten.view %12336, %12367 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12368, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10001 = torch.constant.int 4
    %12369 = torch.aten.mul.int %int4_10001, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10002 = torch.constant.int 16
    %int1_10003 = torch.constant.int 1
    %int128_10004 = torch.constant.int 128
    %12370 = torch.prim.ListConstruct %12369, %int16_10002, %int1_10003, %int128_10004 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12371 = torch.aten.view %12354, %12370 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12371, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10005 = torch.constant.int 4
    %12372 = torch.aten.mul.int %int4_10005, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10006 = torch.constant.int 16
    %int1_10007 = torch.constant.int 1
    %int128_10008 = torch.constant.int 128
    %12373 = torch.prim.ListConstruct %12372, %int16_10006, %int1_10007, %int128_10008 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12374 = torch.aten.view %12356, %12373 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12374, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10009 = torch.constant.int 4
    %12375 = torch.aten.mul.int %int4_10009, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10010 = torch.constant.int 16
    %int1_10011 = torch.constant.int 1
    %int128_10012 = torch.constant.int 128
    %12376 = torch.prim.ListConstruct %12375, %int16_10010, %int1_10011, %int128_10012 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12377 = torch.aten.view %12358, %12376 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12377, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10013 = torch.constant.int 4
    %12378 = torch.aten.mul.int %int4_10013, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10014 = torch.constant.int 16
    %int1_10015 = torch.constant.int 1
    %int128_10016 = torch.constant.int 128
    %12379 = torch.prim.ListConstruct %12378, %int16_10014, %int1_10015, %int128_10016 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12380 = torch.aten.view %12360, %12379 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12380, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10017 = torch.constant.int 4
    %12381 = torch.aten.mul.int %int4_10017, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10018 = torch.constant.int 16
    %int1_10019 = torch.constant.int 1
    %int128_10020 = torch.constant.int 128
    %12382 = torch.prim.ListConstruct %12381, %int16_10018, %int1_10019, %int128_10020 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12383 = torch.aten.view %12362, %12382 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12383, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10021 = torch.constant.int 4
    %12384 = torch.aten.mul.int %int4_10021, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10022 = torch.constant.int 16
    %int1_10023 = torch.constant.int 1
    %int128_10024 = torch.constant.int 128
    %12385 = torch.prim.ListConstruct %12384, %int16_10022, %int1_10023, %int128_10024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12386 = torch.aten.view %12364, %12385 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12386, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10025 = torch.constant.int 4
    %12387 = torch.aten.mul.int %int4_10025, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10026 = torch.constant.int 16
    %int1_10027 = torch.constant.int 1
    %int128_10028 = torch.constant.int 128
    %12388 = torch.prim.ListConstruct %12387, %int16_10026, %int1_10027, %int128_10028 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12389 = torch.aten.view %12366, %12388 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12389, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10029 = torch.constant.int 4
    %12390 = torch.aten.mul.int %int4_10029, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10030 = torch.constant.int 16
    %int1_10031 = torch.constant.int 1
    %int128_10032 = torch.constant.int 128
    %12391 = torch.prim.ListConstruct %12390, %int16_10030, %int1_10031, %int128_10032 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12392 = torch.aten.view %12368, %12391 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12392, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10033 = torch.constant.int 4
    %12393 = torch.aten.mul.int %int4_10033, %3095 : !torch.int, !torch.int -> !torch.int
    %12394 = torch.prim.ListConstruct %12393 : (!torch.int) -> !torch.list<int>
    %12395 = torch.aten.view %12345, %12394 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12395, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10034 = torch.constant.int 4
    %12396 = torch.aten.mul.int %int4_10034, %3095 : !torch.int, !torch.int -> !torch.int
    %12397 = torch.prim.ListConstruct %12396 : (!torch.int) -> !torch.list<int>
    %12398 = torch.aten.view %12346, %12397 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12398, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10035 = torch.constant.int 4
    %12399 = torch.aten.mul.int %int4_10035, %3095 : !torch.int, !torch.int -> !torch.int
    %12400 = torch.prim.ListConstruct %12399 : (!torch.int) -> !torch.list<int>
    %12401 = torch.aten.view %12347, %12400 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12401, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10036 = torch.constant.int 4
    %12402 = torch.aten.mul.int %int4_10036, %3095 : !torch.int, !torch.int -> !torch.int
    %12403 = torch.prim.ListConstruct %12402 : (!torch.int) -> !torch.list<int>
    %12404 = torch.aten.view %12348, %12403 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12404, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10037 = torch.constant.int 4
    %12405 = torch.aten.mul.int %int4_10037, %3095 : !torch.int, !torch.int -> !torch.int
    %12406 = torch.prim.ListConstruct %12405 : (!torch.int) -> !torch.list<int>
    %12407 = torch.aten.view %12349, %12406 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12407, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10038 = torch.constant.int 4
    %12408 = torch.aten.mul.int %int4_10038, %3095 : !torch.int, !torch.int -> !torch.int
    %12409 = torch.prim.ListConstruct %12408 : (!torch.int) -> !torch.list<int>
    %12410 = torch.aten.view %12350, %12409 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12410, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10039 = torch.constant.int 4
    %12411 = torch.aten.mul.int %int4_10039, %3095 : !torch.int, !torch.int -> !torch.int
    %12412 = torch.prim.ListConstruct %12411 : (!torch.int) -> !torch.list<int>
    %12413 = torch.aten.view %12351, %12412 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12413, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10040 = torch.constant.int 4
    %12414 = torch.aten.mul.int %int4_10040, %3095 : !torch.int, !torch.int -> !torch.int
    %12415 = torch.prim.ListConstruct %12414 : (!torch.int) -> !torch.list<int>
    %12416 = torch.aten.view %12352, %12415 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12416, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10041 = torch.constant.int 4
    %int16_10042 = torch.constant.int 16
    %int1_10043 = torch.constant.int 1
    %int128_10044 = torch.constant.int 128
    %12417 = torch.prim.ListConstruct %int4_10041, %3095, %int16_10042, %int1_10043, %int128_10044 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12418 = torch.aten.view %12006, %12417 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12418, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10045 = torch.constant.int 4
    %int16_10046 = torch.constant.int 16
    %int1_10047 = torch.constant.int 1
    %int128_10048 = torch.constant.int 128
    %12419 = torch.prim.ListConstruct %int4_10045, %3095, %int16_10046, %int1_10047, %int128_10048 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12420 = torch.aten.view %12008, %12419 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12420, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10049 = torch.constant.int 4
    %int16_10050 = torch.constant.int 16
    %int1_10051 = torch.constant.int 1
    %int128_10052 = torch.constant.int 128
    %12421 = torch.prim.ListConstruct %int4_10049, %3095, %int16_10050, %int1_10051, %int128_10052 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12422 = torch.aten.view %12010, %12421 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12422, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10053 = torch.constant.int 4
    %int16_10054 = torch.constant.int 16
    %int1_10055 = torch.constant.int 1
    %int128_10056 = torch.constant.int 128
    %12423 = torch.prim.ListConstruct %int4_10053, %3095, %int16_10054, %int1_10055, %int128_10056 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12424 = torch.aten.view %12012, %12423 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12424, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10057 = torch.constant.int 4
    %int16_10058 = torch.constant.int 16
    %int1_10059 = torch.constant.int 1
    %int128_10060 = torch.constant.int 128
    %12425 = torch.prim.ListConstruct %int4_10057, %3095, %int16_10058, %int1_10059, %int128_10060 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12426 = torch.aten.view %12014, %12425 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12426, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10061 = torch.constant.int 4
    %int16_10062 = torch.constant.int 16
    %int1_10063 = torch.constant.int 1
    %int128_10064 = torch.constant.int 128
    %12427 = torch.prim.ListConstruct %int4_10061, %3095, %int16_10062, %int1_10063, %int128_10064 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12428 = torch.aten.view %12016, %12427 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12428, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10065 = torch.constant.int 4
    %int16_10066 = torch.constant.int 16
    %int1_10067 = torch.constant.int 1
    %int128_10068 = torch.constant.int 128
    %12429 = torch.prim.ListConstruct %int4_10065, %3095, %int16_10066, %int1_10067, %int128_10068 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12430 = torch.aten.view %12018, %12429 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12430, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10069 = torch.constant.int 4
    %int16_10070 = torch.constant.int 16
    %int1_10071 = torch.constant.int 1
    %int128_10072 = torch.constant.int 128
    %12431 = torch.prim.ListConstruct %int4_10069, %3095, %int16_10070, %int1_10071, %int128_10072 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12432 = torch.aten.view %12020, %12431 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %12432, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_10073 = torch.constant.int 4
    %12433 = torch.aten.mul.int %int4_10073, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10074 = torch.constant.int 16
    %int1_10075 = torch.constant.int 1
    %int128_10076 = torch.constant.int 128
    %12434 = torch.prim.ListConstruct %12433, %int16_10074, %int1_10075, %int128_10076 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12435 = torch.aten.view %12418, %12434 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12435, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10077 = torch.constant.int 4
    %12436 = torch.aten.mul.int %int4_10077, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10078 = torch.constant.int 16
    %int1_10079 = torch.constant.int 1
    %int128_10080 = torch.constant.int 128
    %12437 = torch.prim.ListConstruct %12436, %int16_10078, %int1_10079, %int128_10080 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12438 = torch.aten.view %12420, %12437 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12438, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10081 = torch.constant.int 4
    %12439 = torch.aten.mul.int %int4_10081, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10082 = torch.constant.int 16
    %int1_10083 = torch.constant.int 1
    %int128_10084 = torch.constant.int 128
    %12440 = torch.prim.ListConstruct %12439, %int16_10082, %int1_10083, %int128_10084 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12441 = torch.aten.view %12422, %12440 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12441, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10085 = torch.constant.int 4
    %12442 = torch.aten.mul.int %int4_10085, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10086 = torch.constant.int 16
    %int1_10087 = torch.constant.int 1
    %int128_10088 = torch.constant.int 128
    %12443 = torch.prim.ListConstruct %12442, %int16_10086, %int1_10087, %int128_10088 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12444 = torch.aten.view %12424, %12443 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12444, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10089 = torch.constant.int 4
    %12445 = torch.aten.mul.int %int4_10089, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10090 = torch.constant.int 16
    %int1_10091 = torch.constant.int 1
    %int128_10092 = torch.constant.int 128
    %12446 = torch.prim.ListConstruct %12445, %int16_10090, %int1_10091, %int128_10092 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12447 = torch.aten.view %12426, %12446 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12447, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10093 = torch.constant.int 4
    %12448 = torch.aten.mul.int %int4_10093, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10094 = torch.constant.int 16
    %int1_10095 = torch.constant.int 1
    %int128_10096 = torch.constant.int 128
    %12449 = torch.prim.ListConstruct %12448, %int16_10094, %int1_10095, %int128_10096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12450 = torch.aten.view %12428, %12449 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12450, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10097 = torch.constant.int 4
    %12451 = torch.aten.mul.int %int4_10097, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10098 = torch.constant.int 16
    %int1_10099 = torch.constant.int 1
    %int128_10100 = torch.constant.int 128
    %12452 = torch.prim.ListConstruct %12451, %int16_10098, %int1_10099, %int128_10100 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12453 = torch.aten.view %12430, %12452 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12453, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_10101 = torch.constant.int 4
    %12454 = torch.aten.mul.int %int4_10101, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_10102 = torch.constant.int 16
    %int1_10103 = torch.constant.int 1
    %int128_10104 = torch.constant.int 128
    %12455 = torch.prim.ListConstruct %12454, %int16_10102, %int1_10103, %int128_10104 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12456 = torch.aten.view %12432, %12455 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12456, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_10105 = torch.constant.int 1
    %int1_10106 = torch.constant.int 1
    %12457 = torch.aten.add.Scalar %12345, %int1_10105, %int1_10106 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12457, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10107 = torch.constant.int 1
    %int1_10108 = torch.constant.int 1
    %12458 = torch.aten.add.Scalar %12346, %int1_10107, %int1_10108 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12458, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10109 = torch.constant.int 1
    %int1_10110 = torch.constant.int 1
    %12459 = torch.aten.add.Scalar %12347, %int1_10109, %int1_10110 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12459, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10111 = torch.constant.int 1
    %int1_10112 = torch.constant.int 1
    %12460 = torch.aten.add.Scalar %12348, %int1_10111, %int1_10112 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12460, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10113 = torch.constant.int 1
    %int1_10114 = torch.constant.int 1
    %12461 = torch.aten.add.Scalar %12349, %int1_10113, %int1_10114 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12461, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10115 = torch.constant.int 1
    %int1_10116 = torch.constant.int 1
    %12462 = torch.aten.add.Scalar %12350, %int1_10115, %int1_10116 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12462, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10117 = torch.constant.int 1
    %int1_10118 = torch.constant.int 1
    %12463 = torch.aten.add.Scalar %12351, %int1_10117, %int1_10118 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12463, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_10119 = torch.constant.int 1
    %int1_10120 = torch.constant.int 1
    %12464 = torch.aten.add.Scalar %12352, %int1_10119, %int1_10120 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %12464, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_10121 = torch.constant.int 4
    %12465 = torch.aten.mul.int %int4_10121, %3095 : !torch.int, !torch.int -> !torch.int
    %12466 = torch.prim.ListConstruct %12465 : (!torch.int) -> !torch.list<int>
    %12467 = torch.aten.view %12457, %12466 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12467, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10122 = torch.constant.int 4
    %12468 = torch.aten.mul.int %int4_10122, %3095 : !torch.int, !torch.int -> !torch.int
    %12469 = torch.prim.ListConstruct %12468 : (!torch.int) -> !torch.list<int>
    %12470 = torch.aten.view %12458, %12469 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12470, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10123 = torch.constant.int 4
    %12471 = torch.aten.mul.int %int4_10123, %3095 : !torch.int, !torch.int -> !torch.int
    %12472 = torch.prim.ListConstruct %12471 : (!torch.int) -> !torch.list<int>
    %12473 = torch.aten.view %12459, %12472 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12473, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10124 = torch.constant.int 4
    %12474 = torch.aten.mul.int %int4_10124, %3095 : !torch.int, !torch.int -> !torch.int
    %12475 = torch.prim.ListConstruct %12474 : (!torch.int) -> !torch.list<int>
    %12476 = torch.aten.view %12460, %12475 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12476, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10125 = torch.constant.int 4
    %12477 = torch.aten.mul.int %int4_10125, %3095 : !torch.int, !torch.int -> !torch.int
    %12478 = torch.prim.ListConstruct %12477 : (!torch.int) -> !torch.list<int>
    %12479 = torch.aten.view %12461, %12478 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12479, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10126 = torch.constant.int 4
    %12480 = torch.aten.mul.int %int4_10126, %3095 : !torch.int, !torch.int -> !torch.int
    %12481 = torch.prim.ListConstruct %12480 : (!torch.int) -> !torch.list<int>
    %12482 = torch.aten.view %12462, %12481 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12482, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10127 = torch.constant.int 4
    %12483 = torch.aten.mul.int %int4_10127, %3095 : !torch.int, !torch.int -> !torch.int
    %12484 = torch.prim.ListConstruct %12483 : (!torch.int) -> !torch.list<int>
    %12485 = torch.aten.view %12463, %12484 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12485, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_10128 = torch.constant.int 4
    %12486 = torch.aten.mul.int %int4_10128, %3095 : !torch.int, !torch.int -> !torch.int
    %12487 = torch.prim.ListConstruct %12486 : (!torch.int) -> !torch.list<int>
    %12488 = torch.aten.view %12464, %12487 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12488, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %12489 = torch.prim.ListConstruct %12395, %12467 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10129 = torch.constant.int 0
    %12490 = torch.aten.cat %12489, %int0_10129 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12490, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12491 = torch.prim.ListConstruct %12398, %12470 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10130 = torch.constant.int 0
    %12492 = torch.aten.cat %12491, %int0_10130 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12492, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12493 = torch.prim.ListConstruct %12401, %12473 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10131 = torch.constant.int 0
    %12494 = torch.aten.cat %12493, %int0_10131 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12494, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12495 = torch.prim.ListConstruct %12404, %12476 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10132 = torch.constant.int 0
    %12496 = torch.aten.cat %12495, %int0_10132 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12496, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12497 = torch.prim.ListConstruct %12407, %12479 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10133 = torch.constant.int 0
    %12498 = torch.aten.cat %12497, %int0_10133 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12498, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12499 = torch.prim.ListConstruct %12410, %12482 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10134 = torch.constant.int 0
    %12500 = torch.aten.cat %12499, %int0_10134 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12500, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12501 = torch.prim.ListConstruct %12413, %12485 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10135 = torch.constant.int 0
    %12502 = torch.aten.cat %12501, %int0_10135 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12502, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12503 = torch.prim.ListConstruct %12416, %12488 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_10136 = torch.constant.int 0
    %12504 = torch.aten.cat %12503, %int0_10136 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %12504, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %12505 = torch.prim.ListConstruct %12371, %12435 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10137 = torch.constant.int 0
    %12506 = torch.aten.cat %12505, %int0_10137 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12506, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12507 = torch.prim.ListConstruct %12374, %12438 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10138 = torch.constant.int 0
    %12508 = torch.aten.cat %12507, %int0_10138 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12508, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12509 = torch.prim.ListConstruct %12377, %12441 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10139 = torch.constant.int 0
    %12510 = torch.aten.cat %12509, %int0_10139 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12510, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12511 = torch.prim.ListConstruct %12380, %12444 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10140 = torch.constant.int 0
    %12512 = torch.aten.cat %12511, %int0_10140 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12512, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12513 = torch.prim.ListConstruct %12383, %12447 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10141 = torch.constant.int 0
    %12514 = torch.aten.cat %12513, %int0_10141 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12514, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12515 = torch.prim.ListConstruct %12386, %12450 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10142 = torch.constant.int 0
    %12516 = torch.aten.cat %12515, %int0_10142 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12516, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12517 = torch.prim.ListConstruct %12389, %12453 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10143 = torch.constant.int 0
    %12518 = torch.aten.cat %12517, %int0_10143 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12518, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12519 = torch.prim.ListConstruct %12392, %12456 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_10144 = torch.constant.int 0
    %12520 = torch.aten.cat %12519, %int0_10144 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12520, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10145 = torch.constant.int 32
    %int2_10146 = torch.constant.int 2
    %int16_10147 = torch.constant.int 16
    %int1_10148 = torch.constant.int 1
    %int128_10149 = torch.constant.int 128
    %12521 = torch.prim.ListConstruct %3023, %int32_10145, %int2_10146, %int16_10147, %int1_10148, %int128_10149 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12522 = torch.aten.view %10671, %12521 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12522, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10150 = torch.constant.int 32
    %12523 = torch.aten.mul.int %3023, %int32_10150 : !torch.int, !torch.int -> !torch.int
    %int2_10151 = torch.constant.int 2
    %12524 = torch.aten.mul.int %12523, %int2_10151 : !torch.int, !torch.int -> !torch.int
    %int16_10152 = torch.constant.int 16
    %int1_10153 = torch.constant.int 1
    %int128_10154 = torch.constant.int 128
    %12525 = torch.prim.ListConstruct %12524, %int16_10152, %int1_10153, %int128_10154 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12526 = torch.aten.view %12522, %12525 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12526, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12527 = torch.prim.ListConstruct %12490 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10155 = torch.constant.bool false
    %12528 = torch.aten.index_put %12526, %12527, %12506, %false_10155 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12528, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10156 = torch.constant.int 32
    %int2_10157 = torch.constant.int 2
    %int16_10158 = torch.constant.int 16
    %int1_10159 = torch.constant.int 1
    %int128_10160 = torch.constant.int 128
    %12529 = torch.prim.ListConstruct %3023, %int32_10156, %int2_10157, %int16_10158, %int1_10159, %int128_10160 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12530 = torch.aten.view %12528, %12529 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12530, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10161 = torch.constant.int 131072
    %12531 = torch.prim.ListConstruct %3023, %int131072_10161 : (!torch.int, !torch.int) -> !torch.list<int>
    %12532 = torch.aten.view %12530, %12531 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12532, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10162 = torch.constant.int 32
    %int2_10163 = torch.constant.int 2
    %int16_10164 = torch.constant.int 16
    %int1_10165 = torch.constant.int 1
    %int128_10166 = torch.constant.int 128
    %12533 = torch.prim.ListConstruct %3026, %int32_10162, %int2_10163, %int16_10164, %int1_10165, %int128_10166 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12534 = torch.aten.view %10683, %12533 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12534, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10167 = torch.constant.int 32
    %12535 = torch.aten.mul.int %3026, %int32_10167 : !torch.int, !torch.int -> !torch.int
    %int2_10168 = torch.constant.int 2
    %12536 = torch.aten.mul.int %12535, %int2_10168 : !torch.int, !torch.int -> !torch.int
    %int16_10169 = torch.constant.int 16
    %int1_10170 = torch.constant.int 1
    %int128_10171 = torch.constant.int 128
    %12537 = torch.prim.ListConstruct %12536, %int16_10169, %int1_10170, %int128_10171 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12538 = torch.aten.view %12534, %12537 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12538, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12539 = torch.prim.ListConstruct %12492 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10172 = torch.constant.bool false
    %12540 = torch.aten.index_put %12538, %12539, %12508, %false_10172 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12540, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10173 = torch.constant.int 32
    %int2_10174 = torch.constant.int 2
    %int16_10175 = torch.constant.int 16
    %int1_10176 = torch.constant.int 1
    %int128_10177 = torch.constant.int 128
    %12541 = torch.prim.ListConstruct %3026, %int32_10173, %int2_10174, %int16_10175, %int1_10176, %int128_10177 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12542 = torch.aten.view %12540, %12541 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12542, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10178 = torch.constant.int 131072
    %12543 = torch.prim.ListConstruct %3026, %int131072_10178 : (!torch.int, !torch.int) -> !torch.list<int>
    %12544 = torch.aten.view %12542, %12543 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12544, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10179 = torch.constant.int 32
    %int2_10180 = torch.constant.int 2
    %int16_10181 = torch.constant.int 16
    %int1_10182 = torch.constant.int 1
    %int128_10183 = torch.constant.int 128
    %12545 = torch.prim.ListConstruct %3029, %int32_10179, %int2_10180, %int16_10181, %int1_10182, %int128_10183 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12546 = torch.aten.view %10695, %12545 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12546, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10184 = torch.constant.int 32
    %12547 = torch.aten.mul.int %3029, %int32_10184 : !torch.int, !torch.int -> !torch.int
    %int2_10185 = torch.constant.int 2
    %12548 = torch.aten.mul.int %12547, %int2_10185 : !torch.int, !torch.int -> !torch.int
    %int16_10186 = torch.constant.int 16
    %int1_10187 = torch.constant.int 1
    %int128_10188 = torch.constant.int 128
    %12549 = torch.prim.ListConstruct %12548, %int16_10186, %int1_10187, %int128_10188 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12550 = torch.aten.view %12546, %12549 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12550, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12551 = torch.prim.ListConstruct %12494 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10189 = torch.constant.bool false
    %12552 = torch.aten.index_put %12550, %12551, %12510, %false_10189 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12552, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10190 = torch.constant.int 32
    %int2_10191 = torch.constant.int 2
    %int16_10192 = torch.constant.int 16
    %int1_10193 = torch.constant.int 1
    %int128_10194 = torch.constant.int 128
    %12553 = torch.prim.ListConstruct %3029, %int32_10190, %int2_10191, %int16_10192, %int1_10193, %int128_10194 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12554 = torch.aten.view %12552, %12553 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12554, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10195 = torch.constant.int 131072
    %12555 = torch.prim.ListConstruct %3029, %int131072_10195 : (!torch.int, !torch.int) -> !torch.list<int>
    %12556 = torch.aten.view %12554, %12555 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12556, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10196 = torch.constant.int 32
    %int2_10197 = torch.constant.int 2
    %int16_10198 = torch.constant.int 16
    %int1_10199 = torch.constant.int 1
    %int128_10200 = torch.constant.int 128
    %12557 = torch.prim.ListConstruct %3032, %int32_10196, %int2_10197, %int16_10198, %int1_10199, %int128_10200 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12558 = torch.aten.view %10707, %12557 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12558, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10201 = torch.constant.int 32
    %12559 = torch.aten.mul.int %3032, %int32_10201 : !torch.int, !torch.int -> !torch.int
    %int2_10202 = torch.constant.int 2
    %12560 = torch.aten.mul.int %12559, %int2_10202 : !torch.int, !torch.int -> !torch.int
    %int16_10203 = torch.constant.int 16
    %int1_10204 = torch.constant.int 1
    %int128_10205 = torch.constant.int 128
    %12561 = torch.prim.ListConstruct %12560, %int16_10203, %int1_10204, %int128_10205 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12562 = torch.aten.view %12558, %12561 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12562, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12563 = torch.prim.ListConstruct %12496 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10206 = torch.constant.bool false
    %12564 = torch.aten.index_put %12562, %12563, %12512, %false_10206 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12564, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10207 = torch.constant.int 32
    %int2_10208 = torch.constant.int 2
    %int16_10209 = torch.constant.int 16
    %int1_10210 = torch.constant.int 1
    %int128_10211 = torch.constant.int 128
    %12565 = torch.prim.ListConstruct %3032, %int32_10207, %int2_10208, %int16_10209, %int1_10210, %int128_10211 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12566 = torch.aten.view %12564, %12565 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12566, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10212 = torch.constant.int 131072
    %12567 = torch.prim.ListConstruct %3032, %int131072_10212 : (!torch.int, !torch.int) -> !torch.list<int>
    %12568 = torch.aten.view %12566, %12567 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12568, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10213 = torch.constant.int 32
    %int2_10214 = torch.constant.int 2
    %int16_10215 = torch.constant.int 16
    %int1_10216 = torch.constant.int 1
    %int128_10217 = torch.constant.int 128
    %12569 = torch.prim.ListConstruct %3035, %int32_10213, %int2_10214, %int16_10215, %int1_10216, %int128_10217 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12570 = torch.aten.view %10719, %12569 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12570, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10218 = torch.constant.int 32
    %12571 = torch.aten.mul.int %3035, %int32_10218 : !torch.int, !torch.int -> !torch.int
    %int2_10219 = torch.constant.int 2
    %12572 = torch.aten.mul.int %12571, %int2_10219 : !torch.int, !torch.int -> !torch.int
    %int16_10220 = torch.constant.int 16
    %int1_10221 = torch.constant.int 1
    %int128_10222 = torch.constant.int 128
    %12573 = torch.prim.ListConstruct %12572, %int16_10220, %int1_10221, %int128_10222 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12574 = torch.aten.view %12570, %12573 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12574, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12575 = torch.prim.ListConstruct %12498 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10223 = torch.constant.bool false
    %12576 = torch.aten.index_put %12574, %12575, %12514, %false_10223 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12576, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10224 = torch.constant.int 32
    %int2_10225 = torch.constant.int 2
    %int16_10226 = torch.constant.int 16
    %int1_10227 = torch.constant.int 1
    %int128_10228 = torch.constant.int 128
    %12577 = torch.prim.ListConstruct %3035, %int32_10224, %int2_10225, %int16_10226, %int1_10227, %int128_10228 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12578 = torch.aten.view %12576, %12577 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12578, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10229 = torch.constant.int 131072
    %12579 = torch.prim.ListConstruct %3035, %int131072_10229 : (!torch.int, !torch.int) -> !torch.list<int>
    %12580 = torch.aten.view %12578, %12579 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12580, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10230 = torch.constant.int 32
    %int2_10231 = torch.constant.int 2
    %int16_10232 = torch.constant.int 16
    %int1_10233 = torch.constant.int 1
    %int128_10234 = torch.constant.int 128
    %12581 = torch.prim.ListConstruct %3038, %int32_10230, %int2_10231, %int16_10232, %int1_10233, %int128_10234 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12582 = torch.aten.view %10731, %12581 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12582, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10235 = torch.constant.int 32
    %12583 = torch.aten.mul.int %3038, %int32_10235 : !torch.int, !torch.int -> !torch.int
    %int2_10236 = torch.constant.int 2
    %12584 = torch.aten.mul.int %12583, %int2_10236 : !torch.int, !torch.int -> !torch.int
    %int16_10237 = torch.constant.int 16
    %int1_10238 = torch.constant.int 1
    %int128_10239 = torch.constant.int 128
    %12585 = torch.prim.ListConstruct %12584, %int16_10237, %int1_10238, %int128_10239 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12586 = torch.aten.view %12582, %12585 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12586, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12587 = torch.prim.ListConstruct %12500 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10240 = torch.constant.bool false
    %12588 = torch.aten.index_put %12586, %12587, %12516, %false_10240 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12588, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10241 = torch.constant.int 32
    %int2_10242 = torch.constant.int 2
    %int16_10243 = torch.constant.int 16
    %int1_10244 = torch.constant.int 1
    %int128_10245 = torch.constant.int 128
    %12589 = torch.prim.ListConstruct %3038, %int32_10241, %int2_10242, %int16_10243, %int1_10244, %int128_10245 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12590 = torch.aten.view %12588, %12589 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12590, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10246 = torch.constant.int 131072
    %12591 = torch.prim.ListConstruct %3038, %int131072_10246 : (!torch.int, !torch.int) -> !torch.list<int>
    %12592 = torch.aten.view %12590, %12591 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12592, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10247 = torch.constant.int 32
    %int2_10248 = torch.constant.int 2
    %int16_10249 = torch.constant.int 16
    %int1_10250 = torch.constant.int 1
    %int128_10251 = torch.constant.int 128
    %12593 = torch.prim.ListConstruct %3041, %int32_10247, %int2_10248, %int16_10249, %int1_10250, %int128_10251 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12594 = torch.aten.view %10743, %12593 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12594, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10252 = torch.constant.int 32
    %12595 = torch.aten.mul.int %3041, %int32_10252 : !torch.int, !torch.int -> !torch.int
    %int2_10253 = torch.constant.int 2
    %12596 = torch.aten.mul.int %12595, %int2_10253 : !torch.int, !torch.int -> !torch.int
    %int16_10254 = torch.constant.int 16
    %int1_10255 = torch.constant.int 1
    %int128_10256 = torch.constant.int 128
    %12597 = torch.prim.ListConstruct %12596, %int16_10254, %int1_10255, %int128_10256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12598 = torch.aten.view %12594, %12597 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12598, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12599 = torch.prim.ListConstruct %12502 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10257 = torch.constant.bool false
    %12600 = torch.aten.index_put %12598, %12599, %12518, %false_10257 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12600, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10258 = torch.constant.int 32
    %int2_10259 = torch.constant.int 2
    %int16_10260 = torch.constant.int 16
    %int1_10261 = torch.constant.int 1
    %int128_10262 = torch.constant.int 128
    %12601 = torch.prim.ListConstruct %3041, %int32_10258, %int2_10259, %int16_10260, %int1_10261, %int128_10262 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12602 = torch.aten.view %12600, %12601 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12602, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10263 = torch.constant.int 131072
    %12603 = torch.prim.ListConstruct %3041, %int131072_10263 : (!torch.int, !torch.int) -> !torch.list<int>
    %12604 = torch.aten.view %12602, %12603 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12604, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_10264 = torch.constant.int 32
    %int2_10265 = torch.constant.int 2
    %int16_10266 = torch.constant.int 16
    %int1_10267 = torch.constant.int 1
    %int128_10268 = torch.constant.int 128
    %12605 = torch.prim.ListConstruct %3044, %int32_10264, %int2_10265, %int16_10266, %int1_10267, %int128_10268 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12606 = torch.aten.view %10755, %12605 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12606, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_10269 = torch.constant.int 32
    %12607 = torch.aten.mul.int %3044, %int32_10269 : !torch.int, !torch.int -> !torch.int
    %int2_10270 = torch.constant.int 2
    %12608 = torch.aten.mul.int %12607, %int2_10270 : !torch.int, !torch.int -> !torch.int
    %int16_10271 = torch.constant.int 16
    %int1_10272 = torch.constant.int 1
    %int128_10273 = torch.constant.int 128
    %12609 = torch.prim.ListConstruct %12608, %int16_10271, %int1_10272, %int128_10273 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12610 = torch.aten.view %12606, %12609 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12610, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %12611 = torch.prim.ListConstruct %12504 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_10274 = torch.constant.bool false
    %12612 = torch.aten.index_put %12610, %12611, %12520, %false_10274 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %12612, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_10275 = torch.constant.int 32
    %int2_10276 = torch.constant.int 2
    %int16_10277 = torch.constant.int 16
    %int1_10278 = torch.constant.int 1
    %int128_10279 = torch.constant.int 128
    %12613 = torch.prim.ListConstruct %3044, %int32_10275, %int2_10276, %int16_10277, %int1_10278, %int128_10279 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12614 = torch.aten.view %12612, %12613 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %12614, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_10280 = torch.constant.int 131072
    %12615 = torch.prim.ListConstruct %3044, %int131072_10280 : (!torch.int, !torch.int) -> !torch.list<int>
    %12616 = torch.aten.view %12614, %12615 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %12616, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_10281 = torch.constant.int -2
    %12617 = torch.aten.unsqueeze %12231, %int-2_10281 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10282 = torch.constant.int -2
    %12618 = torch.aten.unsqueeze %12246, %int-2_10282 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10283 = torch.constant.int -2
    %12619 = torch.aten.unsqueeze %12261, %int-2_10283 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10284 = torch.constant.int -2
    %12620 = torch.aten.unsqueeze %12276, %int-2_10284 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10285 = torch.constant.int -2
    %12621 = torch.aten.unsqueeze %12291, %int-2_10285 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10286 = torch.constant.int -2
    %12622 = torch.aten.unsqueeze %12306, %int-2_10286 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10287 = torch.constant.int -2
    %12623 = torch.aten.unsqueeze %12321, %int-2_10287 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10288 = torch.constant.int -2
    %12624 = torch.aten.unsqueeze %12336, %int-2_10288 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_10289 = torch.constant.int 4
    %int1_10290 = torch.constant.int 1
    %int4_10291 = torch.constant.int 4
    %int128_10292 = torch.constant.int 128
    %12625 = torch.prim.ListConstruct %int4_10289, %12217, %int1_10290, %int4_10291, %int128_10292 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10293 = torch.constant.bool false
    %12626 = torch.aten.expand %12617, %12625, %false_10293 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10294 = torch.constant.int 4
    %int1_10295 = torch.constant.int 1
    %int4_10296 = torch.constant.int 4
    %int128_10297 = torch.constant.int 128
    %12627 = torch.prim.ListConstruct %int4_10294, %12217, %int1_10295, %int4_10296, %int128_10297 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10298 = torch.constant.bool false
    %12628 = torch.aten.expand %12618, %12627, %false_10298 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10299 = torch.constant.int 4
    %int1_10300 = torch.constant.int 1
    %int4_10301 = torch.constant.int 4
    %int128_10302 = torch.constant.int 128
    %12629 = torch.prim.ListConstruct %int4_10299, %12217, %int1_10300, %int4_10301, %int128_10302 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10303 = torch.constant.bool false
    %12630 = torch.aten.expand %12619, %12629, %false_10303 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10304 = torch.constant.int 4
    %int1_10305 = torch.constant.int 1
    %int4_10306 = torch.constant.int 4
    %int128_10307 = torch.constant.int 128
    %12631 = torch.prim.ListConstruct %int4_10304, %12217, %int1_10305, %int4_10306, %int128_10307 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10308 = torch.constant.bool false
    %12632 = torch.aten.expand %12620, %12631, %false_10308 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10309 = torch.constant.int 4
    %int1_10310 = torch.constant.int 1
    %int4_10311 = torch.constant.int 4
    %int128_10312 = torch.constant.int 128
    %12633 = torch.prim.ListConstruct %int4_10309, %12217, %int1_10310, %int4_10311, %int128_10312 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10313 = torch.constant.bool false
    %12634 = torch.aten.expand %12621, %12633, %false_10313 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10314 = torch.constant.int 4
    %int1_10315 = torch.constant.int 1
    %int4_10316 = torch.constant.int 4
    %int128_10317 = torch.constant.int 128
    %12635 = torch.prim.ListConstruct %int4_10314, %12217, %int1_10315, %int4_10316, %int128_10317 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10318 = torch.constant.bool false
    %12636 = torch.aten.expand %12622, %12635, %false_10318 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10319 = torch.constant.int 4
    %int1_10320 = torch.constant.int 1
    %int4_10321 = torch.constant.int 4
    %int128_10322 = torch.constant.int 128
    %12637 = torch.prim.ListConstruct %int4_10319, %12217, %int1_10320, %int4_10321, %int128_10322 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10323 = torch.constant.bool false
    %12638 = torch.aten.expand %12623, %12637, %false_10323 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10324 = torch.constant.int 4
    %int1_10325 = torch.constant.int 1
    %int4_10326 = torch.constant.int 4
    %int128_10327 = torch.constant.int 128
    %12639 = torch.prim.ListConstruct %int4_10324, %12217, %int1_10325, %int4_10326, %int128_10327 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10328 = torch.constant.bool false
    %12640 = torch.aten.expand %12624, %12639, %false_10328 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10329 = torch.constant.int 4
    %int4_10330 = torch.constant.int 4
    %int128_10331 = torch.constant.int 128
    %12641 = torch.prim.ListConstruct %int4_10329, %12217, %int4_10330, %int128_10331 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12642 = torch.aten.view %12626, %12641 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10332 = torch.constant.int 4
    %int4_10333 = torch.constant.int 4
    %int128_10334 = torch.constant.int 128
    %12643 = torch.prim.ListConstruct %int4_10332, %12217, %int4_10333, %int128_10334 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12644 = torch.aten.view %12628, %12643 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10335 = torch.constant.int 4
    %int4_10336 = torch.constant.int 4
    %int128_10337 = torch.constant.int 128
    %12645 = torch.prim.ListConstruct %int4_10335, %12217, %int4_10336, %int128_10337 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12646 = torch.aten.view %12630, %12645 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10338 = torch.constant.int 4
    %int4_10339 = torch.constant.int 4
    %int128_10340 = torch.constant.int 128
    %12647 = torch.prim.ListConstruct %int4_10338, %12217, %int4_10339, %int128_10340 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12648 = torch.aten.view %12632, %12647 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10341 = torch.constant.int 4
    %int4_10342 = torch.constant.int 4
    %int128_10343 = torch.constant.int 128
    %12649 = torch.prim.ListConstruct %int4_10341, %12217, %int4_10342, %int128_10343 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12650 = torch.aten.view %12634, %12649 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10344 = torch.constant.int 4
    %int4_10345 = torch.constant.int 4
    %int128_10346 = torch.constant.int 128
    %12651 = torch.prim.ListConstruct %int4_10344, %12217, %int4_10345, %int128_10346 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12652 = torch.aten.view %12636, %12651 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10347 = torch.constant.int 4
    %int4_10348 = torch.constant.int 4
    %int128_10349 = torch.constant.int 128
    %12653 = torch.prim.ListConstruct %int4_10347, %12217, %int4_10348, %int128_10349 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12654 = torch.aten.view %12638, %12653 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10350 = torch.constant.int 4
    %int4_10351 = torch.constant.int 4
    %int128_10352 = torch.constant.int 128
    %12655 = torch.prim.ListConstruct %int4_10350, %12217, %int4_10351, %int128_10352 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12656 = torch.aten.view %12640, %12655 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_10353 = torch.constant.int -2
    %12657 = torch.aten.unsqueeze %12006, %int-2_10353 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10354 = torch.constant.int -2
    %12658 = torch.aten.unsqueeze %12008, %int-2_10354 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10355 = torch.constant.int -2
    %12659 = torch.aten.unsqueeze %12010, %int-2_10355 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10356 = torch.constant.int -2
    %12660 = torch.aten.unsqueeze %12012, %int-2_10356 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10357 = torch.constant.int -2
    %12661 = torch.aten.unsqueeze %12014, %int-2_10357 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10358 = torch.constant.int -2
    %12662 = torch.aten.unsqueeze %12016, %int-2_10358 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10359 = torch.constant.int -2
    %12663 = torch.aten.unsqueeze %12018, %int-2_10359 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_10360 = torch.constant.int -2
    %12664 = torch.aten.unsqueeze %12020, %int-2_10360 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %12664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_10361 = torch.constant.int 1
    %12665 = torch.aten.size.int %11930, %int1_10361 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_10362 = torch.constant.int 4
    %int1_10363 = torch.constant.int 1
    %int4_10364 = torch.constant.int 4
    %int128_10365 = torch.constant.int 128
    %12666 = torch.prim.ListConstruct %int4_10362, %12665, %int1_10363, %int4_10364, %int128_10365 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10366 = torch.constant.bool false
    %12667 = torch.aten.expand %12657, %12666, %false_10366 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10367 = torch.constant.int 4
    %int1_10368 = torch.constant.int 1
    %int4_10369 = torch.constant.int 4
    %int128_10370 = torch.constant.int 128
    %12668 = torch.prim.ListConstruct %int4_10367, %12665, %int1_10368, %int4_10369, %int128_10370 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10371 = torch.constant.bool false
    %12669 = torch.aten.expand %12658, %12668, %false_10371 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10372 = torch.constant.int 4
    %int1_10373 = torch.constant.int 1
    %int4_10374 = torch.constant.int 4
    %int128_10375 = torch.constant.int 128
    %12670 = torch.prim.ListConstruct %int4_10372, %12665, %int1_10373, %int4_10374, %int128_10375 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10376 = torch.constant.bool false
    %12671 = torch.aten.expand %12659, %12670, %false_10376 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10377 = torch.constant.int 4
    %int1_10378 = torch.constant.int 1
    %int4_10379 = torch.constant.int 4
    %int128_10380 = torch.constant.int 128
    %12672 = torch.prim.ListConstruct %int4_10377, %12665, %int1_10378, %int4_10379, %int128_10380 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10381 = torch.constant.bool false
    %12673 = torch.aten.expand %12660, %12672, %false_10381 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10382 = torch.constant.int 4
    %int1_10383 = torch.constant.int 1
    %int4_10384 = torch.constant.int 4
    %int128_10385 = torch.constant.int 128
    %12674 = torch.prim.ListConstruct %int4_10382, %12665, %int1_10383, %int4_10384, %int128_10385 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10386 = torch.constant.bool false
    %12675 = torch.aten.expand %12661, %12674, %false_10386 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10387 = torch.constant.int 4
    %int1_10388 = torch.constant.int 1
    %int4_10389 = torch.constant.int 4
    %int128_10390 = torch.constant.int 128
    %12676 = torch.prim.ListConstruct %int4_10387, %12665, %int1_10388, %int4_10389, %int128_10390 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10391 = torch.constant.bool false
    %12677 = torch.aten.expand %12662, %12676, %false_10391 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10392 = torch.constant.int 4
    %int1_10393 = torch.constant.int 1
    %int4_10394 = torch.constant.int 4
    %int128_10395 = torch.constant.int 128
    %12678 = torch.prim.ListConstruct %int4_10392, %12665, %int1_10393, %int4_10394, %int128_10395 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10396 = torch.constant.bool false
    %12679 = torch.aten.expand %12663, %12678, %false_10396 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10397 = torch.constant.int 4
    %int1_10398 = torch.constant.int 1
    %int4_10399 = torch.constant.int 4
    %int128_10400 = torch.constant.int 128
    %12680 = torch.prim.ListConstruct %int4_10397, %12665, %int1_10398, %int4_10399, %int128_10400 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_10401 = torch.constant.bool false
    %12681 = torch.aten.expand %12664, %12680, %false_10401 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %12681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_10402 = torch.constant.int 4
    %int4_10403 = torch.constant.int 4
    %int128_10404 = torch.constant.int 128
    %12682 = torch.prim.ListConstruct %int4_10402, %12665, %int4_10403, %int128_10404 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12683 = torch.aten.view %12667, %12682 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10405 = torch.constant.int 4
    %int4_10406 = torch.constant.int 4
    %int128_10407 = torch.constant.int 128
    %12684 = torch.prim.ListConstruct %int4_10405, %12665, %int4_10406, %int128_10407 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12685 = torch.aten.view %12669, %12684 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10408 = torch.constant.int 4
    %int4_10409 = torch.constant.int 4
    %int128_10410 = torch.constant.int 128
    %12686 = torch.prim.ListConstruct %int4_10408, %12665, %int4_10409, %int128_10410 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12687 = torch.aten.view %12671, %12686 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10411 = torch.constant.int 4
    %int4_10412 = torch.constant.int 4
    %int128_10413 = torch.constant.int 128
    %12688 = torch.prim.ListConstruct %int4_10411, %12665, %int4_10412, %int128_10413 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12689 = torch.aten.view %12673, %12688 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10414 = torch.constant.int 4
    %int4_10415 = torch.constant.int 4
    %int128_10416 = torch.constant.int 128
    %12690 = torch.prim.ListConstruct %int4_10414, %12665, %int4_10415, %int128_10416 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12691 = torch.aten.view %12675, %12690 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10417 = torch.constant.int 4
    %int4_10418 = torch.constant.int 4
    %int128_10419 = torch.constant.int 128
    %12692 = torch.prim.ListConstruct %int4_10417, %12665, %int4_10418, %int128_10419 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12693 = torch.aten.view %12677, %12692 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10420 = torch.constant.int 4
    %int4_10421 = torch.constant.int 4
    %int128_10422 = torch.constant.int 128
    %12694 = torch.prim.ListConstruct %int4_10420, %12665, %int4_10421, %int128_10422 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12695 = torch.aten.view %12679, %12694 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10423 = torch.constant.int 4
    %int4_10424 = torch.constant.int 4
    %int128_10425 = torch.constant.int 128
    %12696 = torch.prim.ListConstruct %int4_10423, %12665, %int4_10424, %int128_10425 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12697 = torch.aten.view %12681, %12696 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10426 = torch.constant.int 1
    %int2_10427 = torch.constant.int 2
    %12698 = torch.aten.transpose.int %12073, %int1_10426, %int2_10427 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12698, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10428 = torch.constant.int 1
    %int2_10429 = torch.constant.int 2
    %12699 = torch.aten.transpose.int %12088, %int1_10428, %int2_10429 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12699, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10430 = torch.constant.int 1
    %int2_10431 = torch.constant.int 2
    %12700 = torch.aten.transpose.int %12103, %int1_10430, %int2_10431 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12700, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10432 = torch.constant.int 1
    %int2_10433 = torch.constant.int 2
    %12701 = torch.aten.transpose.int %12118, %int1_10432, %int2_10433 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12701, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10434 = torch.constant.int 1
    %int2_10435 = torch.constant.int 2
    %12702 = torch.aten.transpose.int %12133, %int1_10434, %int2_10435 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12702, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10436 = torch.constant.int 1
    %int2_10437 = torch.constant.int 2
    %12703 = torch.aten.transpose.int %12148, %int1_10436, %int2_10437 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12703, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10438 = torch.constant.int 1
    %int2_10439 = torch.constant.int 2
    %12704 = torch.aten.transpose.int %12163, %int1_10438, %int2_10439 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12704, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10440 = torch.constant.int 1
    %int2_10441 = torch.constant.int 2
    %12705 = torch.aten.transpose.int %12178, %int1_10440, %int2_10441 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12705, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10442 = torch.constant.int 1
    %int2_10443 = torch.constant.int 2
    %12706 = torch.aten.transpose.int %12642, %int1_10442, %int2_10443 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12706, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10444 = torch.constant.int 1
    %int2_10445 = torch.constant.int 2
    %12707 = torch.aten.transpose.int %12644, %int1_10444, %int2_10445 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12707, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10446 = torch.constant.int 1
    %int2_10447 = torch.constant.int 2
    %12708 = torch.aten.transpose.int %12646, %int1_10446, %int2_10447 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12708, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10448 = torch.constant.int 1
    %int2_10449 = torch.constant.int 2
    %12709 = torch.aten.transpose.int %12648, %int1_10448, %int2_10449 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12709, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10450 = torch.constant.int 1
    %int2_10451 = torch.constant.int 2
    %12710 = torch.aten.transpose.int %12650, %int1_10450, %int2_10451 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12710, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10452 = torch.constant.int 1
    %int2_10453 = torch.constant.int 2
    %12711 = torch.aten.transpose.int %12652, %int1_10452, %int2_10453 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12711, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10454 = torch.constant.int 1
    %int2_10455 = torch.constant.int 2
    %12712 = torch.aten.transpose.int %12654, %int1_10454, %int2_10455 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12712, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10456 = torch.constant.int 1
    %int2_10457 = torch.constant.int 2
    %12713 = torch.aten.transpose.int %12656, %int1_10456, %int2_10457 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12713, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10458 = torch.constant.int 1
    %int2_10459 = torch.constant.int 2
    %12714 = torch.aten.transpose.int %12683, %int1_10458, %int2_10459 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12714, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10460 = torch.constant.int 1
    %int2_10461 = torch.constant.int 2
    %12715 = torch.aten.transpose.int %12685, %int1_10460, %int2_10461 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12715, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10462 = torch.constant.int 1
    %int2_10463 = torch.constant.int 2
    %12716 = torch.aten.transpose.int %12687, %int1_10462, %int2_10463 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12716, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10464 = torch.constant.int 1
    %int2_10465 = torch.constant.int 2
    %12717 = torch.aten.transpose.int %12689, %int1_10464, %int2_10465 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12717, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10466 = torch.constant.int 1
    %int2_10467 = torch.constant.int 2
    %12718 = torch.aten.transpose.int %12691, %int1_10466, %int2_10467 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12718, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10468 = torch.constant.int 1
    %int2_10469 = torch.constant.int 2
    %12719 = torch.aten.transpose.int %12693, %int1_10468, %int2_10469 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12719, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10470 = torch.constant.int 1
    %int2_10471 = torch.constant.int 2
    %12720 = torch.aten.transpose.int %12695, %int1_10470, %int2_10471 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12720, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10472 = torch.constant.int 1
    %int2_10473 = torch.constant.int 2
    %12721 = torch.aten.transpose.int %12697, %int1_10472, %int2_10473 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %12721, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10474 = torch.constant.float 0.000000e+00
    %true_10475 = torch.constant.bool true
    %none_10476 = torch.constant.none
    %none_10477 = torch.constant.none
    %12722:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12698, %12706, %12714, %float0.000000e00_10474, %true_10475, %none_10476, %none_10477) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12722#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10478 = torch.constant.float 0.000000e+00
    %true_10479 = torch.constant.bool true
    %none_10480 = torch.constant.none
    %none_10481 = torch.constant.none
    %12723:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12699, %12707, %12715, %float0.000000e00_10478, %true_10479, %none_10480, %none_10481) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12723#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10482 = torch.constant.float 0.000000e+00
    %true_10483 = torch.constant.bool true
    %none_10484 = torch.constant.none
    %none_10485 = torch.constant.none
    %12724:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12700, %12708, %12716, %float0.000000e00_10482, %true_10483, %none_10484, %none_10485) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12724#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10486 = torch.constant.float 0.000000e+00
    %true_10487 = torch.constant.bool true
    %none_10488 = torch.constant.none
    %none_10489 = torch.constant.none
    %12725:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12701, %12709, %12717, %float0.000000e00_10486, %true_10487, %none_10488, %none_10489) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12725#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10490 = torch.constant.float 0.000000e+00
    %true_10491 = torch.constant.bool true
    %none_10492 = torch.constant.none
    %none_10493 = torch.constant.none
    %12726:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12702, %12710, %12718, %float0.000000e00_10490, %true_10491, %none_10492, %none_10493) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12726#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10494 = torch.constant.float 0.000000e+00
    %true_10495 = torch.constant.bool true
    %none_10496 = torch.constant.none
    %none_10497 = torch.constant.none
    %12727:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12703, %12711, %12719, %float0.000000e00_10494, %true_10495, %none_10496, %none_10497) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12727#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10498 = torch.constant.float 0.000000e+00
    %true_10499 = torch.constant.bool true
    %none_10500 = torch.constant.none
    %none_10501 = torch.constant.none
    %12728:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12704, %12712, %12720, %float0.000000e00_10498, %true_10499, %none_10500, %none_10501) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12728#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_10502 = torch.constant.float 0.000000e+00
    %true_10503 = torch.constant.bool true
    %none_10504 = torch.constant.none
    %none_10505 = torch.constant.none
    %12729:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%12705, %12713, %12721, %float0.000000e00_10502, %true_10503, %none_10504, %none_10505) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %12729#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_10506 = torch.constant.int 1
    %int2_10507 = torch.constant.int 2
    %12730 = torch.aten.transpose.int %12722#0, %int1_10506, %int2_10507 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10508 = torch.constant.int 1
    %int2_10509 = torch.constant.int 2
    %12731 = torch.aten.transpose.int %12723#0, %int1_10508, %int2_10509 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10510 = torch.constant.int 1
    %int2_10511 = torch.constant.int 2
    %12732 = torch.aten.transpose.int %12724#0, %int1_10510, %int2_10511 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10512 = torch.constant.int 1
    %int2_10513 = torch.constant.int 2
    %12733 = torch.aten.transpose.int %12725#0, %int1_10512, %int2_10513 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10514 = torch.constant.int 1
    %int2_10515 = torch.constant.int 2
    %12734 = torch.aten.transpose.int %12726#0, %int1_10514, %int2_10515 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10516 = torch.constant.int 1
    %int2_10517 = torch.constant.int 2
    %12735 = torch.aten.transpose.int %12727#0, %int1_10516, %int2_10517 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10518 = torch.constant.int 1
    %int2_10519 = torch.constant.int 2
    %12736 = torch.aten.transpose.int %12728#0, %int1_10518, %int2_10519 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_10520 = torch.constant.int 1
    %int2_10521 = torch.constant.int 2
    %12737 = torch.aten.transpose.int %12729#0, %int1_10520, %int2_10521 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %12737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_10522 = torch.constant.int 4
    %int512_10523 = torch.constant.int 512
    %12738 = torch.prim.ListConstruct %int4_10522, %12059, %int512_10523 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12739 = torch.aten.view %12730, %12738 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10524 = torch.constant.int 4
    %int512_10525 = torch.constant.int 512
    %12740 = torch.prim.ListConstruct %int4_10524, %12074, %int512_10525 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12741 = torch.aten.view %12731, %12740 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10526 = torch.constant.int 4
    %int512_10527 = torch.constant.int 512
    %12742 = torch.prim.ListConstruct %int4_10526, %12089, %int512_10527 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12743 = torch.aten.view %12732, %12742 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10528 = torch.constant.int 4
    %int512_10529 = torch.constant.int 512
    %12744 = torch.prim.ListConstruct %int4_10528, %12104, %int512_10529 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12745 = torch.aten.view %12733, %12744 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10530 = torch.constant.int 4
    %int512_10531 = torch.constant.int 512
    %12746 = torch.prim.ListConstruct %int4_10530, %12119, %int512_10531 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12747 = torch.aten.view %12734, %12746 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10532 = torch.constant.int 4
    %int512_10533 = torch.constant.int 512
    %12748 = torch.prim.ListConstruct %int4_10532, %12134, %int512_10533 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12749 = torch.aten.view %12735, %12748 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10534 = torch.constant.int 4
    %int512_10535 = torch.constant.int 512
    %12750 = torch.prim.ListConstruct %int4_10534, %12149, %int512_10535 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12751 = torch.aten.view %12736, %12750 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_10536 = torch.constant.int 4
    %int512_10537 = torch.constant.int 512
    %12752 = torch.prim.ListConstruct %int4_10536, %12164, %int512_10537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12753 = torch.aten.view %12737, %12752 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %12753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_10538 = torch.constant.int 1
    %int0_10539 = torch.constant.int 0
    %12754 = torch.prim.ListConstruct %int1_10538, %int0_10539 : (!torch.int, !torch.int) -> !torch.list<int>
    %12755 = torch.aten.permute %400, %12754 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10540 = torch.constant.int 1
    %int0_10541 = torch.constant.int 0
    %12756 = torch.prim.ListConstruct %int1_10540, %int0_10541 : (!torch.int, !torch.int) -> !torch.list<int>
    %12757 = torch.aten.permute %401, %12756 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10542 = torch.constant.int 1
    %int0_10543 = torch.constant.int 0
    %12758 = torch.prim.ListConstruct %int1_10542, %int0_10543 : (!torch.int, !torch.int) -> !torch.list<int>
    %12759 = torch.aten.permute %402, %12758 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10544 = torch.constant.int 1
    %int0_10545 = torch.constant.int 0
    %12760 = torch.prim.ListConstruct %int1_10544, %int0_10545 : (!torch.int, !torch.int) -> !torch.list<int>
    %12761 = torch.aten.permute %403, %12760 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10546 = torch.constant.int 1
    %int0_10547 = torch.constant.int 0
    %12762 = torch.prim.ListConstruct %int1_10546, %int0_10547 : (!torch.int, !torch.int) -> !torch.list<int>
    %12763 = torch.aten.permute %404, %12762 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10548 = torch.constant.int 1
    %int0_10549 = torch.constant.int 0
    %12764 = torch.prim.ListConstruct %int1_10548, %int0_10549 : (!torch.int, !torch.int) -> !torch.list<int>
    %12765 = torch.aten.permute %405, %12764 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10550 = torch.constant.int 1
    %int0_10551 = torch.constant.int 0
    %12766 = torch.prim.ListConstruct %int1_10550, %int0_10551 : (!torch.int, !torch.int) -> !torch.list<int>
    %12767 = torch.aten.permute %406, %12766 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_10552 = torch.constant.int 1
    %int0_10553 = torch.constant.int 0
    %12768 = torch.prim.ListConstruct %int1_10552, %int0_10553 : (!torch.int, !torch.int) -> !torch.list<int>
    %12769 = torch.aten.permute %407, %12768 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_10554 = torch.constant.int 4
    %12770 = torch.aten.mul.int %int4_10554, %12059 : !torch.int, !torch.int -> !torch.int
    %int512_10555 = torch.constant.int 512
    %12771 = torch.prim.ListConstruct %12770, %int512_10555 : (!torch.int, !torch.int) -> !torch.list<int>
    %12772 = torch.aten.view %12739, %12771 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12772, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12773 = torch.aten.mm %12772, %12755 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12773, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10556 = torch.constant.int 4
    %int4096_10557 = torch.constant.int 4096
    %12774 = torch.prim.ListConstruct %int4_10556, %12059, %int4096_10557 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12775 = torch.aten.view %12773, %12774 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10558 = torch.constant.int 4
    %12776 = torch.aten.mul.int %int4_10558, %12074 : !torch.int, !torch.int -> !torch.int
    %int512_10559 = torch.constant.int 512
    %12777 = torch.prim.ListConstruct %12776, %int512_10559 : (!torch.int, !torch.int) -> !torch.list<int>
    %12778 = torch.aten.view %12741, %12777 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12778, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12779 = torch.aten.mm %12778, %12757 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12779, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10560 = torch.constant.int 4
    %int4096_10561 = torch.constant.int 4096
    %12780 = torch.prim.ListConstruct %int4_10560, %12074, %int4096_10561 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12781 = torch.aten.view %12779, %12780 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10562 = torch.constant.int 4
    %12782 = torch.aten.mul.int %int4_10562, %12089 : !torch.int, !torch.int -> !torch.int
    %int512_10563 = torch.constant.int 512
    %12783 = torch.prim.ListConstruct %12782, %int512_10563 : (!torch.int, !torch.int) -> !torch.list<int>
    %12784 = torch.aten.view %12743, %12783 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12784, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12785 = torch.aten.mm %12784, %12759 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12785, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10564 = torch.constant.int 4
    %int4096_10565 = torch.constant.int 4096
    %12786 = torch.prim.ListConstruct %int4_10564, %12089, %int4096_10565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12787 = torch.aten.view %12785, %12786 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10566 = torch.constant.int 4
    %12788 = torch.aten.mul.int %int4_10566, %12104 : !torch.int, !torch.int -> !torch.int
    %int512_10567 = torch.constant.int 512
    %12789 = torch.prim.ListConstruct %12788, %int512_10567 : (!torch.int, !torch.int) -> !torch.list<int>
    %12790 = torch.aten.view %12745, %12789 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12790, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12791 = torch.aten.mm %12790, %12761 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12791, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10568 = torch.constant.int 4
    %int4096_10569 = torch.constant.int 4096
    %12792 = torch.prim.ListConstruct %int4_10568, %12104, %int4096_10569 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12793 = torch.aten.view %12791, %12792 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10570 = torch.constant.int 4
    %12794 = torch.aten.mul.int %int4_10570, %12119 : !torch.int, !torch.int -> !torch.int
    %int512_10571 = torch.constant.int 512
    %12795 = torch.prim.ListConstruct %12794, %int512_10571 : (!torch.int, !torch.int) -> !torch.list<int>
    %12796 = torch.aten.view %12747, %12795 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12796, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12797 = torch.aten.mm %12796, %12763 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12797, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10572 = torch.constant.int 4
    %int4096_10573 = torch.constant.int 4096
    %12798 = torch.prim.ListConstruct %int4_10572, %12119, %int4096_10573 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12799 = torch.aten.view %12797, %12798 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10574 = torch.constant.int 4
    %12800 = torch.aten.mul.int %int4_10574, %12134 : !torch.int, !torch.int -> !torch.int
    %int512_10575 = torch.constant.int 512
    %12801 = torch.prim.ListConstruct %12800, %int512_10575 : (!torch.int, !torch.int) -> !torch.list<int>
    %12802 = torch.aten.view %12749, %12801 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12802, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12803 = torch.aten.mm %12802, %12765 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12803, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10576 = torch.constant.int 4
    %int4096_10577 = torch.constant.int 4096
    %12804 = torch.prim.ListConstruct %int4_10576, %12134, %int4096_10577 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12805 = torch.aten.view %12803, %12804 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10578 = torch.constant.int 4
    %12806 = torch.aten.mul.int %int4_10578, %12149 : !torch.int, !torch.int -> !torch.int
    %int512_10579 = torch.constant.int 512
    %12807 = torch.prim.ListConstruct %12806, %int512_10579 : (!torch.int, !torch.int) -> !torch.list<int>
    %12808 = torch.aten.view %12751, %12807 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12808, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12809 = torch.aten.mm %12808, %12767 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12809, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10580 = torch.constant.int 4
    %int4096_10581 = torch.constant.int 4096
    %12810 = torch.prim.ListConstruct %int4_10580, %12149, %int4096_10581 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12811 = torch.aten.view %12809, %12810 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_10582 = torch.constant.int 4
    %12812 = torch.aten.mul.int %int4_10582, %12164 : !torch.int, !torch.int -> !torch.int
    %int512_10583 = torch.constant.int 512
    %12813 = torch.prim.ListConstruct %12812, %int512_10583 : (!torch.int, !torch.int) -> !torch.list<int>
    %12814 = torch.aten.view %12753, %12813 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %12814, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %12815 = torch.aten.mm %12814, %12769 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %12815, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10584 = torch.constant.int 4
    %int4096_10585 = torch.constant.int 4096
    %12816 = torch.prim.ListConstruct %int4_10584, %12164, %int4096_10585 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %12817 = torch.aten.view %12815, %12816 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12818 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10586 = arith.constant 1 : index
    %dim_10587 = tensor.dim %12818, %c1_10586 : tensor<4x?x4096xf16>
    %12819 = flow.tensor.transfer %12818 : tensor<4x?x4096xf16>{%dim_10587} to #hal.device.promise<@__device_0>
    %12820 = torch_c.from_builtin_tensor %12819 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12821 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10588 = arith.constant 1 : index
    %dim_10589 = tensor.dim %12821, %c1_10588 : tensor<4x?x4096xf16>
    %12822 = flow.tensor.transfer %12821 : tensor<4x?x4096xf16>{%dim_10589} to #hal.device.promise<@__device_0>
    %12823 = torch_c.from_builtin_tensor %12822 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12824 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10590 = arith.constant 1 : index
    %dim_10591 = tensor.dim %12824, %c1_10590 : tensor<4x?x4096xf16>
    %12825 = flow.tensor.transfer %12824 : tensor<4x?x4096xf16>{%dim_10591} to #hal.device.promise<@__device_0>
    %12826 = torch_c.from_builtin_tensor %12825 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12827 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10592 = arith.constant 1 : index
    %dim_10593 = tensor.dim %12827, %c1_10592 : tensor<4x?x4096xf16>
    %12828 = flow.tensor.transfer %12827 : tensor<4x?x4096xf16>{%dim_10593} to #hal.device.promise<@__device_0>
    %12829 = torch_c.from_builtin_tensor %12828 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12830 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10594 = arith.constant 1 : index
    %dim_10595 = tensor.dim %12830, %c1_10594 : tensor<4x?x4096xf16>
    %12831 = flow.tensor.transfer %12830 : tensor<4x?x4096xf16>{%dim_10595} to #hal.device.promise<@__device_0>
    %12832 = torch_c.from_builtin_tensor %12831 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12833 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10596 = arith.constant 1 : index
    %dim_10597 = tensor.dim %12833, %c1_10596 : tensor<4x?x4096xf16>
    %12834 = flow.tensor.transfer %12833 : tensor<4x?x4096xf16>{%dim_10597} to #hal.device.promise<@__device_0>
    %12835 = torch_c.from_builtin_tensor %12834 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12836 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10598 = arith.constant 1 : index
    %dim_10599 = tensor.dim %12836, %c1_10598 : tensor<4x?x4096xf16>
    %12837 = flow.tensor.transfer %12836 : tensor<4x?x4096xf16>{%dim_10599} to #hal.device.promise<@__device_0>
    %12838 = torch_c.from_builtin_tensor %12837 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10600 = torch.constant.int 1
    %12839 = torch.aten.add.Tensor %12775, %12820, %int1_10600 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10601 = torch.constant.int 1
    %12840 = torch.aten.add.Tensor %12839, %12823, %int1_10601 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10602 = torch.constant.int 1
    %12841 = torch.aten.add.Tensor %12840, %12826, %int1_10602 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10603 = torch.constant.int 1
    %12842 = torch.aten.add.Tensor %12841, %12829, %int1_10603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10604 = torch.constant.int 1
    %12843 = torch.aten.add.Tensor %12842, %12832, %int1_10604 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10605 = torch.constant.int 1
    %12844 = torch.aten.add.Tensor %12843, %12835, %int1_10605 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10606 = torch.constant.int 1
    %12845 = torch.aten.add.Tensor %12844, %12838, %int1_10606 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12846 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10607 = arith.constant 1 : index
    %dim_10608 = tensor.dim %12846, %c1_10607 : tensor<4x?x4096xf16>
    %12847 = flow.tensor.transfer %12846 : tensor<4x?x4096xf16>{%dim_10608} to #hal.device.promise<@__device_1>
    %12848 = torch_c.from_builtin_tensor %12847 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12849 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10609 = arith.constant 1 : index
    %dim_10610 = tensor.dim %12849, %c1_10609 : tensor<4x?x4096xf16>
    %12850 = flow.tensor.transfer %12849 : tensor<4x?x4096xf16>{%dim_10610} to #hal.device.promise<@__device_1>
    %12851 = torch_c.from_builtin_tensor %12850 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12852 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10611 = arith.constant 1 : index
    %dim_10612 = tensor.dim %12852, %c1_10611 : tensor<4x?x4096xf16>
    %12853 = flow.tensor.transfer %12852 : tensor<4x?x4096xf16>{%dim_10612} to #hal.device.promise<@__device_1>
    %12854 = torch_c.from_builtin_tensor %12853 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12855 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10613 = arith.constant 1 : index
    %dim_10614 = tensor.dim %12855, %c1_10613 : tensor<4x?x4096xf16>
    %12856 = flow.tensor.transfer %12855 : tensor<4x?x4096xf16>{%dim_10614} to #hal.device.promise<@__device_1>
    %12857 = torch_c.from_builtin_tensor %12856 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12858 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10615 = arith.constant 1 : index
    %dim_10616 = tensor.dim %12858, %c1_10615 : tensor<4x?x4096xf16>
    %12859 = flow.tensor.transfer %12858 : tensor<4x?x4096xf16>{%dim_10616} to #hal.device.promise<@__device_1>
    %12860 = torch_c.from_builtin_tensor %12859 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12861 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10617 = arith.constant 1 : index
    %dim_10618 = tensor.dim %12861, %c1_10617 : tensor<4x?x4096xf16>
    %12862 = flow.tensor.transfer %12861 : tensor<4x?x4096xf16>{%dim_10618} to #hal.device.promise<@__device_1>
    %12863 = torch_c.from_builtin_tensor %12862 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12864 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10619 = arith.constant 1 : index
    %dim_10620 = tensor.dim %12864, %c1_10619 : tensor<4x?x4096xf16>
    %12865 = flow.tensor.transfer %12864 : tensor<4x?x4096xf16>{%dim_10620} to #hal.device.promise<@__device_1>
    %12866 = torch_c.from_builtin_tensor %12865 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10621 = torch.constant.int 1
    %12867 = torch.aten.add.Tensor %12848, %12781, %int1_10621 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10622 = torch.constant.int 1
    %12868 = torch.aten.add.Tensor %12867, %12851, %int1_10622 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10623 = torch.constant.int 1
    %12869 = torch.aten.add.Tensor %12868, %12854, %int1_10623 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10624 = torch.constant.int 1
    %12870 = torch.aten.add.Tensor %12869, %12857, %int1_10624 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10625 = torch.constant.int 1
    %12871 = torch.aten.add.Tensor %12870, %12860, %int1_10625 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10626 = torch.constant.int 1
    %12872 = torch.aten.add.Tensor %12871, %12863, %int1_10626 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10627 = torch.constant.int 1
    %12873 = torch.aten.add.Tensor %12872, %12866, %int1_10627 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12874 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10628 = arith.constant 1 : index
    %dim_10629 = tensor.dim %12874, %c1_10628 : tensor<4x?x4096xf16>
    %12875 = flow.tensor.transfer %12874 : tensor<4x?x4096xf16>{%dim_10629} to #hal.device.promise<@__device_2>
    %12876 = torch_c.from_builtin_tensor %12875 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12877 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10630 = arith.constant 1 : index
    %dim_10631 = tensor.dim %12877, %c1_10630 : tensor<4x?x4096xf16>
    %12878 = flow.tensor.transfer %12877 : tensor<4x?x4096xf16>{%dim_10631} to #hal.device.promise<@__device_2>
    %12879 = torch_c.from_builtin_tensor %12878 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12880 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10632 = arith.constant 1 : index
    %dim_10633 = tensor.dim %12880, %c1_10632 : tensor<4x?x4096xf16>
    %12881 = flow.tensor.transfer %12880 : tensor<4x?x4096xf16>{%dim_10633} to #hal.device.promise<@__device_2>
    %12882 = torch_c.from_builtin_tensor %12881 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12883 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10634 = arith.constant 1 : index
    %dim_10635 = tensor.dim %12883, %c1_10634 : tensor<4x?x4096xf16>
    %12884 = flow.tensor.transfer %12883 : tensor<4x?x4096xf16>{%dim_10635} to #hal.device.promise<@__device_2>
    %12885 = torch_c.from_builtin_tensor %12884 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12886 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10636 = arith.constant 1 : index
    %dim_10637 = tensor.dim %12886, %c1_10636 : tensor<4x?x4096xf16>
    %12887 = flow.tensor.transfer %12886 : tensor<4x?x4096xf16>{%dim_10637} to #hal.device.promise<@__device_2>
    %12888 = torch_c.from_builtin_tensor %12887 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12889 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10638 = arith.constant 1 : index
    %dim_10639 = tensor.dim %12889, %c1_10638 : tensor<4x?x4096xf16>
    %12890 = flow.tensor.transfer %12889 : tensor<4x?x4096xf16>{%dim_10639} to #hal.device.promise<@__device_2>
    %12891 = torch_c.from_builtin_tensor %12890 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12892 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10640 = arith.constant 1 : index
    %dim_10641 = tensor.dim %12892, %c1_10640 : tensor<4x?x4096xf16>
    %12893 = flow.tensor.transfer %12892 : tensor<4x?x4096xf16>{%dim_10641} to #hal.device.promise<@__device_2>
    %12894 = torch_c.from_builtin_tensor %12893 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10642 = torch.constant.int 1
    %12895 = torch.aten.add.Tensor %12876, %12879, %int1_10642 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10643 = torch.constant.int 1
    %12896 = torch.aten.add.Tensor %12895, %12787, %int1_10643 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10644 = torch.constant.int 1
    %12897 = torch.aten.add.Tensor %12896, %12882, %int1_10644 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10645 = torch.constant.int 1
    %12898 = torch.aten.add.Tensor %12897, %12885, %int1_10645 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10646 = torch.constant.int 1
    %12899 = torch.aten.add.Tensor %12898, %12888, %int1_10646 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10647 = torch.constant.int 1
    %12900 = torch.aten.add.Tensor %12899, %12891, %int1_10647 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10648 = torch.constant.int 1
    %12901 = torch.aten.add.Tensor %12900, %12894, %int1_10648 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12902 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10649 = arith.constant 1 : index
    %dim_10650 = tensor.dim %12902, %c1_10649 : tensor<4x?x4096xf16>
    %12903 = flow.tensor.transfer %12902 : tensor<4x?x4096xf16>{%dim_10650} to #hal.device.promise<@__device_3>
    %12904 = torch_c.from_builtin_tensor %12903 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12905 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10651 = arith.constant 1 : index
    %dim_10652 = tensor.dim %12905, %c1_10651 : tensor<4x?x4096xf16>
    %12906 = flow.tensor.transfer %12905 : tensor<4x?x4096xf16>{%dim_10652} to #hal.device.promise<@__device_3>
    %12907 = torch_c.from_builtin_tensor %12906 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12908 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10653 = arith.constant 1 : index
    %dim_10654 = tensor.dim %12908, %c1_10653 : tensor<4x?x4096xf16>
    %12909 = flow.tensor.transfer %12908 : tensor<4x?x4096xf16>{%dim_10654} to #hal.device.promise<@__device_3>
    %12910 = torch_c.from_builtin_tensor %12909 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12911 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10655 = arith.constant 1 : index
    %dim_10656 = tensor.dim %12911, %c1_10655 : tensor<4x?x4096xf16>
    %12912 = flow.tensor.transfer %12911 : tensor<4x?x4096xf16>{%dim_10656} to #hal.device.promise<@__device_3>
    %12913 = torch_c.from_builtin_tensor %12912 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12914 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10657 = arith.constant 1 : index
    %dim_10658 = tensor.dim %12914, %c1_10657 : tensor<4x?x4096xf16>
    %12915 = flow.tensor.transfer %12914 : tensor<4x?x4096xf16>{%dim_10658} to #hal.device.promise<@__device_3>
    %12916 = torch_c.from_builtin_tensor %12915 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12917 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10659 = arith.constant 1 : index
    %dim_10660 = tensor.dim %12917, %c1_10659 : tensor<4x?x4096xf16>
    %12918 = flow.tensor.transfer %12917 : tensor<4x?x4096xf16>{%dim_10660} to #hal.device.promise<@__device_3>
    %12919 = torch_c.from_builtin_tensor %12918 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12920 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10661 = arith.constant 1 : index
    %dim_10662 = tensor.dim %12920, %c1_10661 : tensor<4x?x4096xf16>
    %12921 = flow.tensor.transfer %12920 : tensor<4x?x4096xf16>{%dim_10662} to #hal.device.promise<@__device_3>
    %12922 = torch_c.from_builtin_tensor %12921 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10663 = torch.constant.int 1
    %12923 = torch.aten.add.Tensor %12904, %12907, %int1_10663 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10664 = torch.constant.int 1
    %12924 = torch.aten.add.Tensor %12923, %12910, %int1_10664 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10665 = torch.constant.int 1
    %12925 = torch.aten.add.Tensor %12924, %12793, %int1_10665 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10666 = torch.constant.int 1
    %12926 = torch.aten.add.Tensor %12925, %12913, %int1_10666 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10667 = torch.constant.int 1
    %12927 = torch.aten.add.Tensor %12926, %12916, %int1_10667 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10668 = torch.constant.int 1
    %12928 = torch.aten.add.Tensor %12927, %12919, %int1_10668 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10669 = torch.constant.int 1
    %12929 = torch.aten.add.Tensor %12928, %12922, %int1_10669 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12930 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10670 = arith.constant 1 : index
    %dim_10671 = tensor.dim %12930, %c1_10670 : tensor<4x?x4096xf16>
    %12931 = flow.tensor.transfer %12930 : tensor<4x?x4096xf16>{%dim_10671} to #hal.device.promise<@__device_4>
    %12932 = torch_c.from_builtin_tensor %12931 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12933 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10672 = arith.constant 1 : index
    %dim_10673 = tensor.dim %12933, %c1_10672 : tensor<4x?x4096xf16>
    %12934 = flow.tensor.transfer %12933 : tensor<4x?x4096xf16>{%dim_10673} to #hal.device.promise<@__device_4>
    %12935 = torch_c.from_builtin_tensor %12934 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12936 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10674 = arith.constant 1 : index
    %dim_10675 = tensor.dim %12936, %c1_10674 : tensor<4x?x4096xf16>
    %12937 = flow.tensor.transfer %12936 : tensor<4x?x4096xf16>{%dim_10675} to #hal.device.promise<@__device_4>
    %12938 = torch_c.from_builtin_tensor %12937 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12939 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10676 = arith.constant 1 : index
    %dim_10677 = tensor.dim %12939, %c1_10676 : tensor<4x?x4096xf16>
    %12940 = flow.tensor.transfer %12939 : tensor<4x?x4096xf16>{%dim_10677} to #hal.device.promise<@__device_4>
    %12941 = torch_c.from_builtin_tensor %12940 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12942 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10678 = arith.constant 1 : index
    %dim_10679 = tensor.dim %12942, %c1_10678 : tensor<4x?x4096xf16>
    %12943 = flow.tensor.transfer %12942 : tensor<4x?x4096xf16>{%dim_10679} to #hal.device.promise<@__device_4>
    %12944 = torch_c.from_builtin_tensor %12943 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12945 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10680 = arith.constant 1 : index
    %dim_10681 = tensor.dim %12945, %c1_10680 : tensor<4x?x4096xf16>
    %12946 = flow.tensor.transfer %12945 : tensor<4x?x4096xf16>{%dim_10681} to #hal.device.promise<@__device_4>
    %12947 = torch_c.from_builtin_tensor %12946 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12948 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10682 = arith.constant 1 : index
    %dim_10683 = tensor.dim %12948, %c1_10682 : tensor<4x?x4096xf16>
    %12949 = flow.tensor.transfer %12948 : tensor<4x?x4096xf16>{%dim_10683} to #hal.device.promise<@__device_4>
    %12950 = torch_c.from_builtin_tensor %12949 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10684 = torch.constant.int 1
    %12951 = torch.aten.add.Tensor %12932, %12935, %int1_10684 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10685 = torch.constant.int 1
    %12952 = torch.aten.add.Tensor %12951, %12938, %int1_10685 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10686 = torch.constant.int 1
    %12953 = torch.aten.add.Tensor %12952, %12941, %int1_10686 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10687 = torch.constant.int 1
    %12954 = torch.aten.add.Tensor %12953, %12799, %int1_10687 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10688 = torch.constant.int 1
    %12955 = torch.aten.add.Tensor %12954, %12944, %int1_10688 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10689 = torch.constant.int 1
    %12956 = torch.aten.add.Tensor %12955, %12947, %int1_10689 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10690 = torch.constant.int 1
    %12957 = torch.aten.add.Tensor %12956, %12950, %int1_10690 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12958 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10691 = arith.constant 1 : index
    %dim_10692 = tensor.dim %12958, %c1_10691 : tensor<4x?x4096xf16>
    %12959 = flow.tensor.transfer %12958 : tensor<4x?x4096xf16>{%dim_10692} to #hal.device.promise<@__device_5>
    %12960 = torch_c.from_builtin_tensor %12959 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12961 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10693 = arith.constant 1 : index
    %dim_10694 = tensor.dim %12961, %c1_10693 : tensor<4x?x4096xf16>
    %12962 = flow.tensor.transfer %12961 : tensor<4x?x4096xf16>{%dim_10694} to #hal.device.promise<@__device_5>
    %12963 = torch_c.from_builtin_tensor %12962 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12964 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10695 = arith.constant 1 : index
    %dim_10696 = tensor.dim %12964, %c1_10695 : tensor<4x?x4096xf16>
    %12965 = flow.tensor.transfer %12964 : tensor<4x?x4096xf16>{%dim_10696} to #hal.device.promise<@__device_5>
    %12966 = torch_c.from_builtin_tensor %12965 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12967 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10697 = arith.constant 1 : index
    %dim_10698 = tensor.dim %12967, %c1_10697 : tensor<4x?x4096xf16>
    %12968 = flow.tensor.transfer %12967 : tensor<4x?x4096xf16>{%dim_10698} to #hal.device.promise<@__device_5>
    %12969 = torch_c.from_builtin_tensor %12968 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12970 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10699 = arith.constant 1 : index
    %dim_10700 = tensor.dim %12970, %c1_10699 : tensor<4x?x4096xf16>
    %12971 = flow.tensor.transfer %12970 : tensor<4x?x4096xf16>{%dim_10700} to #hal.device.promise<@__device_5>
    %12972 = torch_c.from_builtin_tensor %12971 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12973 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10701 = arith.constant 1 : index
    %dim_10702 = tensor.dim %12973, %c1_10701 : tensor<4x?x4096xf16>
    %12974 = flow.tensor.transfer %12973 : tensor<4x?x4096xf16>{%dim_10702} to #hal.device.promise<@__device_5>
    %12975 = torch_c.from_builtin_tensor %12974 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12976 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10703 = arith.constant 1 : index
    %dim_10704 = tensor.dim %12976, %c1_10703 : tensor<4x?x4096xf16>
    %12977 = flow.tensor.transfer %12976 : tensor<4x?x4096xf16>{%dim_10704} to #hal.device.promise<@__device_5>
    %12978 = torch_c.from_builtin_tensor %12977 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10705 = torch.constant.int 1
    %12979 = torch.aten.add.Tensor %12960, %12963, %int1_10705 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10706 = torch.constant.int 1
    %12980 = torch.aten.add.Tensor %12979, %12966, %int1_10706 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10707 = torch.constant.int 1
    %12981 = torch.aten.add.Tensor %12980, %12969, %int1_10707 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10708 = torch.constant.int 1
    %12982 = torch.aten.add.Tensor %12981, %12972, %int1_10708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10709 = torch.constant.int 1
    %12983 = torch.aten.add.Tensor %12982, %12805, %int1_10709 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10710 = torch.constant.int 1
    %12984 = torch.aten.add.Tensor %12983, %12975, %int1_10710 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10711 = torch.constant.int 1
    %12985 = torch.aten.add.Tensor %12984, %12978, %int1_10711 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12986 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10712 = arith.constant 1 : index
    %dim_10713 = tensor.dim %12986, %c1_10712 : tensor<4x?x4096xf16>
    %12987 = flow.tensor.transfer %12986 : tensor<4x?x4096xf16>{%dim_10713} to #hal.device.promise<@__device_6>
    %12988 = torch_c.from_builtin_tensor %12987 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12989 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10714 = arith.constant 1 : index
    %dim_10715 = tensor.dim %12989, %c1_10714 : tensor<4x?x4096xf16>
    %12990 = flow.tensor.transfer %12989 : tensor<4x?x4096xf16>{%dim_10715} to #hal.device.promise<@__device_6>
    %12991 = torch_c.from_builtin_tensor %12990 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12992 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10716 = arith.constant 1 : index
    %dim_10717 = tensor.dim %12992, %c1_10716 : tensor<4x?x4096xf16>
    %12993 = flow.tensor.transfer %12992 : tensor<4x?x4096xf16>{%dim_10717} to #hal.device.promise<@__device_6>
    %12994 = torch_c.from_builtin_tensor %12993 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12995 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10718 = arith.constant 1 : index
    %dim_10719 = tensor.dim %12995, %c1_10718 : tensor<4x?x4096xf16>
    %12996 = flow.tensor.transfer %12995 : tensor<4x?x4096xf16>{%dim_10719} to #hal.device.promise<@__device_6>
    %12997 = torch_c.from_builtin_tensor %12996 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %12997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %12998 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10720 = arith.constant 1 : index
    %dim_10721 = tensor.dim %12998, %c1_10720 : tensor<4x?x4096xf16>
    %12999 = flow.tensor.transfer %12998 : tensor<4x?x4096xf16>{%dim_10721} to #hal.device.promise<@__device_6>
    %13000 = torch_c.from_builtin_tensor %12999 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13001 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10722 = arith.constant 1 : index
    %dim_10723 = tensor.dim %13001, %c1_10722 : tensor<4x?x4096xf16>
    %13002 = flow.tensor.transfer %13001 : tensor<4x?x4096xf16>{%dim_10723} to #hal.device.promise<@__device_6>
    %13003 = torch_c.from_builtin_tensor %13002 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13004 = torch_c.to_builtin_tensor %12817 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10724 = arith.constant 1 : index
    %dim_10725 = tensor.dim %13004, %c1_10724 : tensor<4x?x4096xf16>
    %13005 = flow.tensor.transfer %13004 : tensor<4x?x4096xf16>{%dim_10725} to #hal.device.promise<@__device_6>
    %13006 = torch_c.from_builtin_tensor %13005 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10726 = torch.constant.int 1
    %13007 = torch.aten.add.Tensor %12988, %12991, %int1_10726 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10727 = torch.constant.int 1
    %13008 = torch.aten.add.Tensor %13007, %12994, %int1_10727 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10728 = torch.constant.int 1
    %13009 = torch.aten.add.Tensor %13008, %12997, %int1_10728 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10729 = torch.constant.int 1
    %13010 = torch.aten.add.Tensor %13009, %13000, %int1_10729 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10730 = torch.constant.int 1
    %13011 = torch.aten.add.Tensor %13010, %13003, %int1_10730 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10731 = torch.constant.int 1
    %13012 = torch.aten.add.Tensor %13011, %12811, %int1_10731 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10732 = torch.constant.int 1
    %13013 = torch.aten.add.Tensor %13012, %13006, %int1_10732 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13014 = torch_c.to_builtin_tensor %12775 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10733 = arith.constant 1 : index
    %dim_10734 = tensor.dim %13014, %c1_10733 : tensor<4x?x4096xf16>
    %13015 = flow.tensor.transfer %13014 : tensor<4x?x4096xf16>{%dim_10734} to #hal.device.promise<@__device_7>
    %13016 = torch_c.from_builtin_tensor %13015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13017 = torch_c.to_builtin_tensor %12781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10735 = arith.constant 1 : index
    %dim_10736 = tensor.dim %13017, %c1_10735 : tensor<4x?x4096xf16>
    %13018 = flow.tensor.transfer %13017 : tensor<4x?x4096xf16>{%dim_10736} to #hal.device.promise<@__device_7>
    %13019 = torch_c.from_builtin_tensor %13018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13020 = torch_c.to_builtin_tensor %12787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10737 = arith.constant 1 : index
    %dim_10738 = tensor.dim %13020, %c1_10737 : tensor<4x?x4096xf16>
    %13021 = flow.tensor.transfer %13020 : tensor<4x?x4096xf16>{%dim_10738} to #hal.device.promise<@__device_7>
    %13022 = torch_c.from_builtin_tensor %13021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13023 = torch_c.to_builtin_tensor %12793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10739 = arith.constant 1 : index
    %dim_10740 = tensor.dim %13023, %c1_10739 : tensor<4x?x4096xf16>
    %13024 = flow.tensor.transfer %13023 : tensor<4x?x4096xf16>{%dim_10740} to #hal.device.promise<@__device_7>
    %13025 = torch_c.from_builtin_tensor %13024 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13026 = torch_c.to_builtin_tensor %12799 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10741 = arith.constant 1 : index
    %dim_10742 = tensor.dim %13026, %c1_10741 : tensor<4x?x4096xf16>
    %13027 = flow.tensor.transfer %13026 : tensor<4x?x4096xf16>{%dim_10742} to #hal.device.promise<@__device_7>
    %13028 = torch_c.from_builtin_tensor %13027 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13029 = torch_c.to_builtin_tensor %12805 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10743 = arith.constant 1 : index
    %dim_10744 = tensor.dim %13029, %c1_10743 : tensor<4x?x4096xf16>
    %13030 = flow.tensor.transfer %13029 : tensor<4x?x4096xf16>{%dim_10744} to #hal.device.promise<@__device_7>
    %13031 = torch_c.from_builtin_tensor %13030 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13032 = torch_c.to_builtin_tensor %12811 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10745 = arith.constant 1 : index
    %dim_10746 = tensor.dim %13032, %c1_10745 : tensor<4x?x4096xf16>
    %13033 = flow.tensor.transfer %13032 : tensor<4x?x4096xf16>{%dim_10746} to #hal.device.promise<@__device_7>
    %13034 = torch_c.from_builtin_tensor %13033 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10747 = torch.constant.int 1
    %13035 = torch.aten.add.Tensor %13016, %13019, %int1_10747 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10748 = torch.constant.int 1
    %13036 = torch.aten.add.Tensor %13035, %13022, %int1_10748 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10749 = torch.constant.int 1
    %13037 = torch.aten.add.Tensor %13036, %13025, %int1_10749 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10750 = torch.constant.int 1
    %13038 = torch.aten.add.Tensor %13037, %13028, %int1_10750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10751 = torch.constant.int 1
    %13039 = torch.aten.add.Tensor %13038, %13031, %int1_10751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10752 = torch.constant.int 1
    %13040 = torch.aten.add.Tensor %13039, %13034, %int1_10752 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10753 = torch.constant.int 1
    %13041 = torch.aten.add.Tensor %13040, %12817, %int1_10753 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10754 = torch.constant.int 1
    %13042 = torch.aten.add.Tensor %11701, %12845, %int1_10754 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10755 = torch.constant.int 1
    %13043 = torch.aten.add.Tensor %11702, %12873, %int1_10755 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10756 = torch.constant.int 1
    %13044 = torch.aten.add.Tensor %11703, %12901, %int1_10756 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10757 = torch.constant.int 1
    %13045 = torch.aten.add.Tensor %11704, %12929, %int1_10757 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10758 = torch.constant.int 1
    %13046 = torch.aten.add.Tensor %11705, %12957, %int1_10758 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10759 = torch.constant.int 1
    %13047 = torch.aten.add.Tensor %11706, %12985, %int1_10759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10760 = torch.constant.int 1
    %13048 = torch.aten.add.Tensor %11707, %13013, %int1_10760 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10761 = torch.constant.int 1
    %13049 = torch.aten.add.Tensor %11708, %13041, %int1_10761 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_10762 = torch.constant.int 6
    %13050 = torch.prims.convert_element_type %13042, %int6_10762 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10763 = torch.constant.int 6
    %13051 = torch.prims.convert_element_type %13043, %int6_10763 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10764 = torch.constant.int 6
    %13052 = torch.prims.convert_element_type %13044, %int6_10764 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10765 = torch.constant.int 6
    %13053 = torch.prims.convert_element_type %13045, %int6_10765 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10766 = torch.constant.int 6
    %13054 = torch.prims.convert_element_type %13046, %int6_10766 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10767 = torch.constant.int 6
    %13055 = torch.prims.convert_element_type %13047, %int6_10767 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10768 = torch.constant.int 6
    %13056 = torch.prims.convert_element_type %13048, %int6_10768 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_10769 = torch.constant.int 6
    %13057 = torch.prims.convert_element_type %13049, %int6_10769 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10770 = torch.constant.int 2
    %13058 = torch.aten.pow.Tensor_Scalar %13050, %int2_10770 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10771 = torch.constant.int 2
    %13059 = torch.aten.pow.Tensor_Scalar %13051, %int2_10771 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10772 = torch.constant.int 2
    %13060 = torch.aten.pow.Tensor_Scalar %13052, %int2_10772 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10773 = torch.constant.int 2
    %13061 = torch.aten.pow.Tensor_Scalar %13053, %int2_10773 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10774 = torch.constant.int 2
    %13062 = torch.aten.pow.Tensor_Scalar %13054, %int2_10774 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10775 = torch.constant.int 2
    %13063 = torch.aten.pow.Tensor_Scalar %13055, %int2_10775 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10776 = torch.constant.int 2
    %13064 = torch.aten.pow.Tensor_Scalar %13056, %int2_10776 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_10777 = torch.constant.int 2
    %13065 = torch.aten.pow.Tensor_Scalar %13057, %int2_10777 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_10778 = torch.constant.int -1
    %13066 = torch.prim.ListConstruct %int-1_10778 : (!torch.int) -> !torch.list<int>
    %true_10779 = torch.constant.bool true
    %none_10780 = torch.constant.none
    %13067 = torch.aten.mean.dim %13058, %13066, %true_10779, %none_10780 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10781 = torch.constant.int -1
    %13068 = torch.prim.ListConstruct %int-1_10781 : (!torch.int) -> !torch.list<int>
    %true_10782 = torch.constant.bool true
    %none_10783 = torch.constant.none
    %13069 = torch.aten.mean.dim %13059, %13068, %true_10782, %none_10783 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10784 = torch.constant.int -1
    %13070 = torch.prim.ListConstruct %int-1_10784 : (!torch.int) -> !torch.list<int>
    %true_10785 = torch.constant.bool true
    %none_10786 = torch.constant.none
    %13071 = torch.aten.mean.dim %13060, %13070, %true_10785, %none_10786 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10787 = torch.constant.int -1
    %13072 = torch.prim.ListConstruct %int-1_10787 : (!torch.int) -> !torch.list<int>
    %true_10788 = torch.constant.bool true
    %none_10789 = torch.constant.none
    %13073 = torch.aten.mean.dim %13061, %13072, %true_10788, %none_10789 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10790 = torch.constant.int -1
    %13074 = torch.prim.ListConstruct %int-1_10790 : (!torch.int) -> !torch.list<int>
    %true_10791 = torch.constant.bool true
    %none_10792 = torch.constant.none
    %13075 = torch.aten.mean.dim %13062, %13074, %true_10791, %none_10792 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10793 = torch.constant.int -1
    %13076 = torch.prim.ListConstruct %int-1_10793 : (!torch.int) -> !torch.list<int>
    %true_10794 = torch.constant.bool true
    %none_10795 = torch.constant.none
    %13077 = torch.aten.mean.dim %13063, %13076, %true_10794, %none_10795 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10796 = torch.constant.int -1
    %13078 = torch.prim.ListConstruct %int-1_10796 : (!torch.int) -> !torch.list<int>
    %true_10797 = torch.constant.bool true
    %none_10798 = torch.constant.none
    %13079 = torch.aten.mean.dim %13064, %13078, %true_10797, %none_10798 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_10799 = torch.constant.int -1
    %13080 = torch.prim.ListConstruct %int-1_10799 : (!torch.int) -> !torch.list<int>
    %true_10800 = torch.constant.bool true
    %none_10801 = torch.constant.none
    %13081 = torch.aten.mean.dim %13065, %13080, %true_10800, %none_10801 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10802 = torch.constant.float 9.9999997473787516E-6
    %int1_10803 = torch.constant.int 1
    %13082 = torch.aten.add.Scalar %13067, %float9.999990e-06_10802, %int1_10803 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10804 = torch.constant.float 9.9999997473787516E-6
    %int1_10805 = torch.constant.int 1
    %13083 = torch.aten.add.Scalar %13069, %float9.999990e-06_10804, %int1_10805 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10806 = torch.constant.float 9.9999997473787516E-6
    %int1_10807 = torch.constant.int 1
    %13084 = torch.aten.add.Scalar %13071, %float9.999990e-06_10806, %int1_10807 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10808 = torch.constant.float 9.9999997473787516E-6
    %int1_10809 = torch.constant.int 1
    %13085 = torch.aten.add.Scalar %13073, %float9.999990e-06_10808, %int1_10809 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10810 = torch.constant.float 9.9999997473787516E-6
    %int1_10811 = torch.constant.int 1
    %13086 = torch.aten.add.Scalar %13075, %float9.999990e-06_10810, %int1_10811 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10812 = torch.constant.float 9.9999997473787516E-6
    %int1_10813 = torch.constant.int 1
    %13087 = torch.aten.add.Scalar %13077, %float9.999990e-06_10812, %int1_10813 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10814 = torch.constant.float 9.9999997473787516E-6
    %int1_10815 = torch.constant.int 1
    %13088 = torch.aten.add.Scalar %13079, %float9.999990e-06_10814, %int1_10815 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_10816 = torch.constant.float 9.9999997473787516E-6
    %int1_10817 = torch.constant.int 1
    %13089 = torch.aten.add.Scalar %13081, %float9.999990e-06_10816, %int1_10817 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13090 = torch.aten.rsqrt %13082 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13091 = torch.aten.rsqrt %13083 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13092 = torch.aten.rsqrt %13084 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13093 = torch.aten.rsqrt %13085 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13094 = torch.aten.rsqrt %13086 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13095 = torch.aten.rsqrt %13087 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13096 = torch.aten.rsqrt %13088 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13097 = torch.aten.rsqrt %13089 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13098 = torch.aten.mul.Tensor %13050, %13090 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13099 = torch.aten.mul.Tensor %13051, %13091 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13100 = torch.aten.mul.Tensor %13052, %13092 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13101 = torch.aten.mul.Tensor %13053, %13093 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13102 = torch.aten.mul.Tensor %13054, %13094 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13103 = torch.aten.mul.Tensor %13055, %13095 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13104 = torch.aten.mul.Tensor %13056, %13096 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13105 = torch.aten.mul.Tensor %13057, %13097 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13106 = torch.aten.mul.Tensor %408, %13098 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13107 = torch.aten.mul.Tensor %409, %13099 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13108 = torch.aten.mul.Tensor %410, %13100 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13109 = torch.aten.mul.Tensor %411, %13101 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13110 = torch.aten.mul.Tensor %412, %13102 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13111 = torch.aten.mul.Tensor %413, %13103 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13112 = torch.aten.mul.Tensor %414, %13104 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13113 = torch.aten.mul.Tensor %415, %13105 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_10818 = torch.constant.int 5
    %13114 = torch.prims.convert_element_type %13106, %int5_10818 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10819 = torch.constant.int 5
    %13115 = torch.prims.convert_element_type %13107, %int5_10819 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10820 = torch.constant.int 5
    %13116 = torch.prims.convert_element_type %13108, %int5_10820 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10821 = torch.constant.int 5
    %13117 = torch.prims.convert_element_type %13109, %int5_10821 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10822 = torch.constant.int 5
    %13118 = torch.prims.convert_element_type %13110, %int5_10822 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10823 = torch.constant.int 5
    %13119 = torch.prims.convert_element_type %13111, %int5_10823 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10824 = torch.constant.int 5
    %13120 = torch.prims.convert_element_type %13112, %int5_10824 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_10825 = torch.constant.int 5
    %13121 = torch.prims.convert_element_type %13113, %int5_10825 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10826 = torch.constant.int 1
    %int0_10827 = torch.constant.int 0
    %13122 = torch.prim.ListConstruct %int1_10826, %int0_10827 : (!torch.int, !torch.int) -> !torch.list<int>
    %13123 = torch.aten.permute %416, %13122 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10828 = torch.constant.int 1
    %int0_10829 = torch.constant.int 0
    %13124 = torch.prim.ListConstruct %int1_10828, %int0_10829 : (!torch.int, !torch.int) -> !torch.list<int>
    %13125 = torch.aten.permute %417, %13124 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10830 = torch.constant.int 1
    %int0_10831 = torch.constant.int 0
    %13126 = torch.prim.ListConstruct %int1_10830, %int0_10831 : (!torch.int, !torch.int) -> !torch.list<int>
    %13127 = torch.aten.permute %418, %13126 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10832 = torch.constant.int 1
    %int0_10833 = torch.constant.int 0
    %13128 = torch.prim.ListConstruct %int1_10832, %int0_10833 : (!torch.int, !torch.int) -> !torch.list<int>
    %13129 = torch.aten.permute %419, %13128 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10834 = torch.constant.int 1
    %int0_10835 = torch.constant.int 0
    %13130 = torch.prim.ListConstruct %int1_10834, %int0_10835 : (!torch.int, !torch.int) -> !torch.list<int>
    %13131 = torch.aten.permute %420, %13130 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10836 = torch.constant.int 1
    %int0_10837 = torch.constant.int 0
    %13132 = torch.prim.ListConstruct %int1_10836, %int0_10837 : (!torch.int, !torch.int) -> !torch.list<int>
    %13133 = torch.aten.permute %421, %13132 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10838 = torch.constant.int 1
    %int0_10839 = torch.constant.int 0
    %13134 = torch.prim.ListConstruct %int1_10838, %int0_10839 : (!torch.int, !torch.int) -> !torch.list<int>
    %13135 = torch.aten.permute %422, %13134 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10840 = torch.constant.int 1
    %int0_10841 = torch.constant.int 0
    %13136 = torch.prim.ListConstruct %int1_10840, %int0_10841 : (!torch.int, !torch.int) -> !torch.list<int>
    %13137 = torch.aten.permute %423, %13136 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_10842 = torch.constant.int 4
    %13138 = torch.aten.mul.int %int4_10842, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10843 = torch.constant.int 4096
    %13139 = torch.prim.ListConstruct %13138, %int4096_10843 : (!torch.int, !torch.int) -> !torch.list<int>
    %13140 = torch.aten.view %13114, %13139 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13140, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13141 = torch.aten.mm %13140, %13123 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13141, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10844 = torch.constant.int 4
    %int1792_10845 = torch.constant.int 1792
    %13142 = torch.prim.ListConstruct %int4_10844, %2482, %int1792_10845 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13143 = torch.aten.view %13141, %13142 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10846 = torch.constant.int 4
    %13144 = torch.aten.mul.int %int4_10846, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10847 = torch.constant.int 4096
    %13145 = torch.prim.ListConstruct %13144, %int4096_10847 : (!torch.int, !torch.int) -> !torch.list<int>
    %13146 = torch.aten.view %13115, %13145 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13146, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13147 = torch.aten.mm %13146, %13125 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13147, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10848 = torch.constant.int 4
    %int1792_10849 = torch.constant.int 1792
    %13148 = torch.prim.ListConstruct %int4_10848, %2482, %int1792_10849 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13149 = torch.aten.view %13147, %13148 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10850 = torch.constant.int 4
    %13150 = torch.aten.mul.int %int4_10850, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10851 = torch.constant.int 4096
    %13151 = torch.prim.ListConstruct %13150, %int4096_10851 : (!torch.int, !torch.int) -> !torch.list<int>
    %13152 = torch.aten.view %13116, %13151 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13152, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13153 = torch.aten.mm %13152, %13127 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13153, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10852 = torch.constant.int 4
    %int1792_10853 = torch.constant.int 1792
    %13154 = torch.prim.ListConstruct %int4_10852, %2482, %int1792_10853 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13155 = torch.aten.view %13153, %13154 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10854 = torch.constant.int 4
    %13156 = torch.aten.mul.int %int4_10854, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10855 = torch.constant.int 4096
    %13157 = torch.prim.ListConstruct %13156, %int4096_10855 : (!torch.int, !torch.int) -> !torch.list<int>
    %13158 = torch.aten.view %13117, %13157 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13158, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13159 = torch.aten.mm %13158, %13129 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13159, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10856 = torch.constant.int 4
    %int1792_10857 = torch.constant.int 1792
    %13160 = torch.prim.ListConstruct %int4_10856, %2482, %int1792_10857 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13161 = torch.aten.view %13159, %13160 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10858 = torch.constant.int 4
    %13162 = torch.aten.mul.int %int4_10858, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10859 = torch.constant.int 4096
    %13163 = torch.prim.ListConstruct %13162, %int4096_10859 : (!torch.int, !torch.int) -> !torch.list<int>
    %13164 = torch.aten.view %13118, %13163 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13164, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13165 = torch.aten.mm %13164, %13131 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13165, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10860 = torch.constant.int 4
    %int1792_10861 = torch.constant.int 1792
    %13166 = torch.prim.ListConstruct %int4_10860, %2482, %int1792_10861 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13167 = torch.aten.view %13165, %13166 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10862 = torch.constant.int 4
    %13168 = torch.aten.mul.int %int4_10862, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10863 = torch.constant.int 4096
    %13169 = torch.prim.ListConstruct %13168, %int4096_10863 : (!torch.int, !torch.int) -> !torch.list<int>
    %13170 = torch.aten.view %13119, %13169 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13170, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13171 = torch.aten.mm %13170, %13133 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13171, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10864 = torch.constant.int 4
    %int1792_10865 = torch.constant.int 1792
    %13172 = torch.prim.ListConstruct %int4_10864, %2482, %int1792_10865 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13173 = torch.aten.view %13171, %13172 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10866 = torch.constant.int 4
    %13174 = torch.aten.mul.int %int4_10866, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10867 = torch.constant.int 4096
    %13175 = torch.prim.ListConstruct %13174, %int4096_10867 : (!torch.int, !torch.int) -> !torch.list<int>
    %13176 = torch.aten.view %13120, %13175 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13176, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13177 = torch.aten.mm %13176, %13135 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13177, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10868 = torch.constant.int 4
    %int1792_10869 = torch.constant.int 1792
    %13178 = torch.prim.ListConstruct %int4_10868, %2482, %int1792_10869 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13179 = torch.aten.view %13177, %13178 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10870 = torch.constant.int 4
    %13180 = torch.aten.mul.int %int4_10870, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10871 = torch.constant.int 4096
    %13181 = torch.prim.ListConstruct %13180, %int4096_10871 : (!torch.int, !torch.int) -> !torch.list<int>
    %13182 = torch.aten.view %13121, %13181 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13182, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13183 = torch.aten.mm %13182, %13137 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13183, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10872 = torch.constant.int 4
    %int1792_10873 = torch.constant.int 1792
    %13184 = torch.prim.ListConstruct %int4_10872, %2482, %int1792_10873 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13185 = torch.aten.view %13183, %13184 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13186 = torch.aten.silu %13143 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13187 = torch.aten.silu %13149 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13188 = torch.aten.silu %13155 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13189 = torch.aten.silu %13161 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13190 = torch.aten.silu %13167 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13191 = torch.aten.silu %13173 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13192 = torch.aten.silu %13179 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13193 = torch.aten.silu %13185 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_10874 = torch.constant.int 1
    %int0_10875 = torch.constant.int 0
    %13194 = torch.prim.ListConstruct %int1_10874, %int0_10875 : (!torch.int, !torch.int) -> !torch.list<int>
    %13195 = torch.aten.permute %424, %13194 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10876 = torch.constant.int 1
    %int0_10877 = torch.constant.int 0
    %13196 = torch.prim.ListConstruct %int1_10876, %int0_10877 : (!torch.int, !torch.int) -> !torch.list<int>
    %13197 = torch.aten.permute %425, %13196 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10878 = torch.constant.int 1
    %int0_10879 = torch.constant.int 0
    %13198 = torch.prim.ListConstruct %int1_10878, %int0_10879 : (!torch.int, !torch.int) -> !torch.list<int>
    %13199 = torch.aten.permute %426, %13198 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10880 = torch.constant.int 1
    %int0_10881 = torch.constant.int 0
    %13200 = torch.prim.ListConstruct %int1_10880, %int0_10881 : (!torch.int, !torch.int) -> !torch.list<int>
    %13201 = torch.aten.permute %427, %13200 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10882 = torch.constant.int 1
    %int0_10883 = torch.constant.int 0
    %13202 = torch.prim.ListConstruct %int1_10882, %int0_10883 : (!torch.int, !torch.int) -> !torch.list<int>
    %13203 = torch.aten.permute %428, %13202 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10884 = torch.constant.int 1
    %int0_10885 = torch.constant.int 0
    %13204 = torch.prim.ListConstruct %int1_10884, %int0_10885 : (!torch.int, !torch.int) -> !torch.list<int>
    %13205 = torch.aten.permute %429, %13204 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10886 = torch.constant.int 1
    %int0_10887 = torch.constant.int 0
    %13206 = torch.prim.ListConstruct %int1_10886, %int0_10887 : (!torch.int, !torch.int) -> !torch.list<int>
    %13207 = torch.aten.permute %430, %13206 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_10888 = torch.constant.int 1
    %int0_10889 = torch.constant.int 0
    %13208 = torch.prim.ListConstruct %int1_10888, %int0_10889 : (!torch.int, !torch.int) -> !torch.list<int>
    %13209 = torch.aten.permute %431, %13208 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_10890 = torch.constant.int 4
    %13210 = torch.aten.mul.int %int4_10890, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10891 = torch.constant.int 4096
    %13211 = torch.prim.ListConstruct %13210, %int4096_10891 : (!torch.int, !torch.int) -> !torch.list<int>
    %13212 = torch.aten.view %13114, %13211 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13212, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13213 = torch.aten.mm %13212, %13195 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13213, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10892 = torch.constant.int 4
    %int1792_10893 = torch.constant.int 1792
    %13214 = torch.prim.ListConstruct %int4_10892, %2482, %int1792_10893 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13215 = torch.aten.view %13213, %13214 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10894 = torch.constant.int 4
    %13216 = torch.aten.mul.int %int4_10894, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10895 = torch.constant.int 4096
    %13217 = torch.prim.ListConstruct %13216, %int4096_10895 : (!torch.int, !torch.int) -> !torch.list<int>
    %13218 = torch.aten.view %13115, %13217 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13218, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13219 = torch.aten.mm %13218, %13197 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13219, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10896 = torch.constant.int 4
    %int1792_10897 = torch.constant.int 1792
    %13220 = torch.prim.ListConstruct %int4_10896, %2482, %int1792_10897 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13221 = torch.aten.view %13219, %13220 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10898 = torch.constant.int 4
    %13222 = torch.aten.mul.int %int4_10898, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10899 = torch.constant.int 4096
    %13223 = torch.prim.ListConstruct %13222, %int4096_10899 : (!torch.int, !torch.int) -> !torch.list<int>
    %13224 = torch.aten.view %13116, %13223 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13224, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13225 = torch.aten.mm %13224, %13199 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13225, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10900 = torch.constant.int 4
    %int1792_10901 = torch.constant.int 1792
    %13226 = torch.prim.ListConstruct %int4_10900, %2482, %int1792_10901 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13227 = torch.aten.view %13225, %13226 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10902 = torch.constant.int 4
    %13228 = torch.aten.mul.int %int4_10902, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10903 = torch.constant.int 4096
    %13229 = torch.prim.ListConstruct %13228, %int4096_10903 : (!torch.int, !torch.int) -> !torch.list<int>
    %13230 = torch.aten.view %13117, %13229 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13230, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13231 = torch.aten.mm %13230, %13201 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13231, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10904 = torch.constant.int 4
    %int1792_10905 = torch.constant.int 1792
    %13232 = torch.prim.ListConstruct %int4_10904, %2482, %int1792_10905 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13233 = torch.aten.view %13231, %13232 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10906 = torch.constant.int 4
    %13234 = torch.aten.mul.int %int4_10906, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10907 = torch.constant.int 4096
    %13235 = torch.prim.ListConstruct %13234, %int4096_10907 : (!torch.int, !torch.int) -> !torch.list<int>
    %13236 = torch.aten.view %13118, %13235 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13236, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13237 = torch.aten.mm %13236, %13203 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13237, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10908 = torch.constant.int 4
    %int1792_10909 = torch.constant.int 1792
    %13238 = torch.prim.ListConstruct %int4_10908, %2482, %int1792_10909 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13239 = torch.aten.view %13237, %13238 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10910 = torch.constant.int 4
    %13240 = torch.aten.mul.int %int4_10910, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10911 = torch.constant.int 4096
    %13241 = torch.prim.ListConstruct %13240, %int4096_10911 : (!torch.int, !torch.int) -> !torch.list<int>
    %13242 = torch.aten.view %13119, %13241 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13242, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13243 = torch.aten.mm %13242, %13205 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13243, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10912 = torch.constant.int 4
    %int1792_10913 = torch.constant.int 1792
    %13244 = torch.prim.ListConstruct %int4_10912, %2482, %int1792_10913 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13245 = torch.aten.view %13243, %13244 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10914 = torch.constant.int 4
    %13246 = torch.aten.mul.int %int4_10914, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10915 = torch.constant.int 4096
    %13247 = torch.prim.ListConstruct %13246, %int4096_10915 : (!torch.int, !torch.int) -> !torch.list<int>
    %13248 = torch.aten.view %13120, %13247 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13248, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13249 = torch.aten.mm %13248, %13207 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13249, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10916 = torch.constant.int 4
    %int1792_10917 = torch.constant.int 1792
    %13250 = torch.prim.ListConstruct %int4_10916, %2482, %int1792_10917 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13251 = torch.aten.view %13249, %13250 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_10918 = torch.constant.int 4
    %13252 = torch.aten.mul.int %int4_10918, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_10919 = torch.constant.int 4096
    %13253 = torch.prim.ListConstruct %13252, %int4096_10919 : (!torch.int, !torch.int) -> !torch.list<int>
    %13254 = torch.aten.view %13121, %13253 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13254, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13255 = torch.aten.mm %13254, %13209 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13255, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_10920 = torch.constant.int 4
    %int1792_10921 = torch.constant.int 1792
    %13256 = torch.prim.ListConstruct %int4_10920, %2482, %int1792_10921 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13257 = torch.aten.view %13255, %13256 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13258 = torch.aten.mul.Tensor %13186, %13215 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13259 = torch.aten.mul.Tensor %13187, %13221 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13260 = torch.aten.mul.Tensor %13188, %13227 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13261 = torch.aten.mul.Tensor %13189, %13233 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13262 = torch.aten.mul.Tensor %13190, %13239 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13263 = torch.aten.mul.Tensor %13191, %13245 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13264 = torch.aten.mul.Tensor %13192, %13251 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %13265 = torch.aten.mul.Tensor %13193, %13257 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %13265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_10922 = torch.constant.int 1
    %int0_10923 = torch.constant.int 0
    %13266 = torch.prim.ListConstruct %int1_10922, %int0_10923 : (!torch.int, !torch.int) -> !torch.list<int>
    %13267 = torch.aten.permute %432, %13266 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10924 = torch.constant.int 1
    %int0_10925 = torch.constant.int 0
    %13268 = torch.prim.ListConstruct %int1_10924, %int0_10925 : (!torch.int, !torch.int) -> !torch.list<int>
    %13269 = torch.aten.permute %433, %13268 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10926 = torch.constant.int 1
    %int0_10927 = torch.constant.int 0
    %13270 = torch.prim.ListConstruct %int1_10926, %int0_10927 : (!torch.int, !torch.int) -> !torch.list<int>
    %13271 = torch.aten.permute %434, %13270 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10928 = torch.constant.int 1
    %int0_10929 = torch.constant.int 0
    %13272 = torch.prim.ListConstruct %int1_10928, %int0_10929 : (!torch.int, !torch.int) -> !torch.list<int>
    %13273 = torch.aten.permute %435, %13272 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10930 = torch.constant.int 1
    %int0_10931 = torch.constant.int 0
    %13274 = torch.prim.ListConstruct %int1_10930, %int0_10931 : (!torch.int, !torch.int) -> !torch.list<int>
    %13275 = torch.aten.permute %436, %13274 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10932 = torch.constant.int 1
    %int0_10933 = torch.constant.int 0
    %13276 = torch.prim.ListConstruct %int1_10932, %int0_10933 : (!torch.int, !torch.int) -> !torch.list<int>
    %13277 = torch.aten.permute %437, %13276 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10934 = torch.constant.int 1
    %int0_10935 = torch.constant.int 0
    %13278 = torch.prim.ListConstruct %int1_10934, %int0_10935 : (!torch.int, !torch.int) -> !torch.list<int>
    %13279 = torch.aten.permute %438, %13278 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10936 = torch.constant.int 1
    %int0_10937 = torch.constant.int 0
    %13280 = torch.prim.ListConstruct %int1_10936, %int0_10937 : (!torch.int, !torch.int) -> !torch.list<int>
    %13281 = torch.aten.permute %439, %13280 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_10938 = torch.constant.int 1
    %13282 = torch.aten.size.int %13143, %int1_10938 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10939 = torch.constant.int 4
    %13283 = torch.aten.mul.int %int4_10939, %13282 : !torch.int, !torch.int -> !torch.int
    %int1792_10940 = torch.constant.int 1792
    %13284 = torch.prim.ListConstruct %13283, %int1792_10940 : (!torch.int, !torch.int) -> !torch.list<int>
    %13285 = torch.aten.view %13258, %13284 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13285, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13286 = torch.aten.mm %13285, %13267 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13286, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10941 = torch.constant.int 4
    %int4096_10942 = torch.constant.int 4096
    %13287 = torch.prim.ListConstruct %int4_10941, %13282, %int4096_10942 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13288 = torch.aten.view %13286, %13287 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10943 = torch.constant.int 1
    %13289 = torch.aten.size.int %13149, %int1_10943 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10944 = torch.constant.int 4
    %13290 = torch.aten.mul.int %int4_10944, %13289 : !torch.int, !torch.int -> !torch.int
    %int1792_10945 = torch.constant.int 1792
    %13291 = torch.prim.ListConstruct %13290, %int1792_10945 : (!torch.int, !torch.int) -> !torch.list<int>
    %13292 = torch.aten.view %13259, %13291 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13292, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13293 = torch.aten.mm %13292, %13269 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13293, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10946 = torch.constant.int 4
    %int4096_10947 = torch.constant.int 4096
    %13294 = torch.prim.ListConstruct %int4_10946, %13289, %int4096_10947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13295 = torch.aten.view %13293, %13294 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10948 = torch.constant.int 1
    %13296 = torch.aten.size.int %13155, %int1_10948 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10949 = torch.constant.int 4
    %13297 = torch.aten.mul.int %int4_10949, %13296 : !torch.int, !torch.int -> !torch.int
    %int1792_10950 = torch.constant.int 1792
    %13298 = torch.prim.ListConstruct %13297, %int1792_10950 : (!torch.int, !torch.int) -> !torch.list<int>
    %13299 = torch.aten.view %13260, %13298 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13299, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13300 = torch.aten.mm %13299, %13271 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13300, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10951 = torch.constant.int 4
    %int4096_10952 = torch.constant.int 4096
    %13301 = torch.prim.ListConstruct %int4_10951, %13296, %int4096_10952 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13302 = torch.aten.view %13300, %13301 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10953 = torch.constant.int 1
    %13303 = torch.aten.size.int %13161, %int1_10953 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10954 = torch.constant.int 4
    %13304 = torch.aten.mul.int %int4_10954, %13303 : !torch.int, !torch.int -> !torch.int
    %int1792_10955 = torch.constant.int 1792
    %13305 = torch.prim.ListConstruct %13304, %int1792_10955 : (!torch.int, !torch.int) -> !torch.list<int>
    %13306 = torch.aten.view %13261, %13305 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13306, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13307 = torch.aten.mm %13306, %13273 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13307, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10956 = torch.constant.int 4
    %int4096_10957 = torch.constant.int 4096
    %13308 = torch.prim.ListConstruct %int4_10956, %13303, %int4096_10957 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13309 = torch.aten.view %13307, %13308 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10958 = torch.constant.int 1
    %13310 = torch.aten.size.int %13167, %int1_10958 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10959 = torch.constant.int 4
    %13311 = torch.aten.mul.int %int4_10959, %13310 : !torch.int, !torch.int -> !torch.int
    %int1792_10960 = torch.constant.int 1792
    %13312 = torch.prim.ListConstruct %13311, %int1792_10960 : (!torch.int, !torch.int) -> !torch.list<int>
    %13313 = torch.aten.view %13262, %13312 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13313, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13314 = torch.aten.mm %13313, %13275 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13314, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10961 = torch.constant.int 4
    %int4096_10962 = torch.constant.int 4096
    %13315 = torch.prim.ListConstruct %int4_10961, %13310, %int4096_10962 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13316 = torch.aten.view %13314, %13315 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10963 = torch.constant.int 1
    %13317 = torch.aten.size.int %13173, %int1_10963 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10964 = torch.constant.int 4
    %13318 = torch.aten.mul.int %int4_10964, %13317 : !torch.int, !torch.int -> !torch.int
    %int1792_10965 = torch.constant.int 1792
    %13319 = torch.prim.ListConstruct %13318, %int1792_10965 : (!torch.int, !torch.int) -> !torch.list<int>
    %13320 = torch.aten.view %13263, %13319 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13320, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13321 = torch.aten.mm %13320, %13277 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13321, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10966 = torch.constant.int 4
    %int4096_10967 = torch.constant.int 4096
    %13322 = torch.prim.ListConstruct %int4_10966, %13317, %int4096_10967 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13323 = torch.aten.view %13321, %13322 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10968 = torch.constant.int 1
    %13324 = torch.aten.size.int %13179, %int1_10968 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10969 = torch.constant.int 4
    %13325 = torch.aten.mul.int %int4_10969, %13324 : !torch.int, !torch.int -> !torch.int
    %int1792_10970 = torch.constant.int 1792
    %13326 = torch.prim.ListConstruct %13325, %int1792_10970 : (!torch.int, !torch.int) -> !torch.list<int>
    %13327 = torch.aten.view %13264, %13326 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13327, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13328 = torch.aten.mm %13327, %13279 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13328, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10971 = torch.constant.int 4
    %int4096_10972 = torch.constant.int 4096
    %13329 = torch.prim.ListConstruct %int4_10971, %13324, %int4096_10972 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13330 = torch.aten.view %13328, %13329 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10973 = torch.constant.int 1
    %13331 = torch.aten.size.int %13185, %int1_10973 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_10974 = torch.constant.int 4
    %13332 = torch.aten.mul.int %int4_10974, %13331 : !torch.int, !torch.int -> !torch.int
    %int1792_10975 = torch.constant.int 1792
    %13333 = torch.prim.ListConstruct %13332, %int1792_10975 : (!torch.int, !torch.int) -> !torch.list<int>
    %13334 = torch.aten.view %13265, %13333 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %13334, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %13335 = torch.aten.mm %13334, %13281 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13335, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_10976 = torch.constant.int 4
    %int4096_10977 = torch.constant.int 4096
    %13336 = torch.prim.ListConstruct %int4_10976, %13331, %int4096_10977 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13337 = torch.aten.view %13335, %13336 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13338 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10978 = arith.constant 1 : index
    %dim_10979 = tensor.dim %13338, %c1_10978 : tensor<4x?x4096xf16>
    %13339 = flow.tensor.transfer %13338 : tensor<4x?x4096xf16>{%dim_10979} to #hal.device.promise<@__device_0>
    %13340 = torch_c.from_builtin_tensor %13339 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13341 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10980 = arith.constant 1 : index
    %dim_10981 = tensor.dim %13341, %c1_10980 : tensor<4x?x4096xf16>
    %13342 = flow.tensor.transfer %13341 : tensor<4x?x4096xf16>{%dim_10981} to #hal.device.promise<@__device_0>
    %13343 = torch_c.from_builtin_tensor %13342 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13344 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10982 = arith.constant 1 : index
    %dim_10983 = tensor.dim %13344, %c1_10982 : tensor<4x?x4096xf16>
    %13345 = flow.tensor.transfer %13344 : tensor<4x?x4096xf16>{%dim_10983} to #hal.device.promise<@__device_0>
    %13346 = torch_c.from_builtin_tensor %13345 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13347 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10984 = arith.constant 1 : index
    %dim_10985 = tensor.dim %13347, %c1_10984 : tensor<4x?x4096xf16>
    %13348 = flow.tensor.transfer %13347 : tensor<4x?x4096xf16>{%dim_10985} to #hal.device.promise<@__device_0>
    %13349 = torch_c.from_builtin_tensor %13348 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13350 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10986 = arith.constant 1 : index
    %dim_10987 = tensor.dim %13350, %c1_10986 : tensor<4x?x4096xf16>
    %13351 = flow.tensor.transfer %13350 : tensor<4x?x4096xf16>{%dim_10987} to #hal.device.promise<@__device_0>
    %13352 = torch_c.from_builtin_tensor %13351 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13353 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10988 = arith.constant 1 : index
    %dim_10989 = tensor.dim %13353, %c1_10988 : tensor<4x?x4096xf16>
    %13354 = flow.tensor.transfer %13353 : tensor<4x?x4096xf16>{%dim_10989} to #hal.device.promise<@__device_0>
    %13355 = torch_c.from_builtin_tensor %13354 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13356 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10990 = arith.constant 1 : index
    %dim_10991 = tensor.dim %13356, %c1_10990 : tensor<4x?x4096xf16>
    %13357 = flow.tensor.transfer %13356 : tensor<4x?x4096xf16>{%dim_10991} to #hal.device.promise<@__device_0>
    %13358 = torch_c.from_builtin_tensor %13357 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10992 = torch.constant.int 1
    %13359 = torch.aten.add.Tensor %13288, %13340, %int1_10992 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10993 = torch.constant.int 1
    %13360 = torch.aten.add.Tensor %13359, %13343, %int1_10993 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10994 = torch.constant.int 1
    %13361 = torch.aten.add.Tensor %13360, %13346, %int1_10994 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10995 = torch.constant.int 1
    %13362 = torch.aten.add.Tensor %13361, %13349, %int1_10995 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10996 = torch.constant.int 1
    %13363 = torch.aten.add.Tensor %13362, %13352, %int1_10996 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10997 = torch.constant.int 1
    %13364 = torch.aten.add.Tensor %13363, %13355, %int1_10997 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_10998 = torch.constant.int 1
    %13365 = torch.aten.add.Tensor %13364, %13358, %int1_10998 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13366 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_10999 = arith.constant 1 : index
    %dim_11000 = tensor.dim %13366, %c1_10999 : tensor<4x?x4096xf16>
    %13367 = flow.tensor.transfer %13366 : tensor<4x?x4096xf16>{%dim_11000} to #hal.device.promise<@__device_1>
    %13368 = torch_c.from_builtin_tensor %13367 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13369 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11001 = arith.constant 1 : index
    %dim_11002 = tensor.dim %13369, %c1_11001 : tensor<4x?x4096xf16>
    %13370 = flow.tensor.transfer %13369 : tensor<4x?x4096xf16>{%dim_11002} to #hal.device.promise<@__device_1>
    %13371 = torch_c.from_builtin_tensor %13370 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13372 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11003 = arith.constant 1 : index
    %dim_11004 = tensor.dim %13372, %c1_11003 : tensor<4x?x4096xf16>
    %13373 = flow.tensor.transfer %13372 : tensor<4x?x4096xf16>{%dim_11004} to #hal.device.promise<@__device_1>
    %13374 = torch_c.from_builtin_tensor %13373 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13375 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11005 = arith.constant 1 : index
    %dim_11006 = tensor.dim %13375, %c1_11005 : tensor<4x?x4096xf16>
    %13376 = flow.tensor.transfer %13375 : tensor<4x?x4096xf16>{%dim_11006} to #hal.device.promise<@__device_1>
    %13377 = torch_c.from_builtin_tensor %13376 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13378 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11007 = arith.constant 1 : index
    %dim_11008 = tensor.dim %13378, %c1_11007 : tensor<4x?x4096xf16>
    %13379 = flow.tensor.transfer %13378 : tensor<4x?x4096xf16>{%dim_11008} to #hal.device.promise<@__device_1>
    %13380 = torch_c.from_builtin_tensor %13379 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13381 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11009 = arith.constant 1 : index
    %dim_11010 = tensor.dim %13381, %c1_11009 : tensor<4x?x4096xf16>
    %13382 = flow.tensor.transfer %13381 : tensor<4x?x4096xf16>{%dim_11010} to #hal.device.promise<@__device_1>
    %13383 = torch_c.from_builtin_tensor %13382 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13384 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11011 = arith.constant 1 : index
    %dim_11012 = tensor.dim %13384, %c1_11011 : tensor<4x?x4096xf16>
    %13385 = flow.tensor.transfer %13384 : tensor<4x?x4096xf16>{%dim_11012} to #hal.device.promise<@__device_1>
    %13386 = torch_c.from_builtin_tensor %13385 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11013 = torch.constant.int 1
    %13387 = torch.aten.add.Tensor %13368, %13295, %int1_11013 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11014 = torch.constant.int 1
    %13388 = torch.aten.add.Tensor %13387, %13371, %int1_11014 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11015 = torch.constant.int 1
    %13389 = torch.aten.add.Tensor %13388, %13374, %int1_11015 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11016 = torch.constant.int 1
    %13390 = torch.aten.add.Tensor %13389, %13377, %int1_11016 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11017 = torch.constant.int 1
    %13391 = torch.aten.add.Tensor %13390, %13380, %int1_11017 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11018 = torch.constant.int 1
    %13392 = torch.aten.add.Tensor %13391, %13383, %int1_11018 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11019 = torch.constant.int 1
    %13393 = torch.aten.add.Tensor %13392, %13386, %int1_11019 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13394 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11020 = arith.constant 1 : index
    %dim_11021 = tensor.dim %13394, %c1_11020 : tensor<4x?x4096xf16>
    %13395 = flow.tensor.transfer %13394 : tensor<4x?x4096xf16>{%dim_11021} to #hal.device.promise<@__device_2>
    %13396 = torch_c.from_builtin_tensor %13395 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13397 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11022 = arith.constant 1 : index
    %dim_11023 = tensor.dim %13397, %c1_11022 : tensor<4x?x4096xf16>
    %13398 = flow.tensor.transfer %13397 : tensor<4x?x4096xf16>{%dim_11023} to #hal.device.promise<@__device_2>
    %13399 = torch_c.from_builtin_tensor %13398 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13400 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11024 = arith.constant 1 : index
    %dim_11025 = tensor.dim %13400, %c1_11024 : tensor<4x?x4096xf16>
    %13401 = flow.tensor.transfer %13400 : tensor<4x?x4096xf16>{%dim_11025} to #hal.device.promise<@__device_2>
    %13402 = torch_c.from_builtin_tensor %13401 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13403 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11026 = arith.constant 1 : index
    %dim_11027 = tensor.dim %13403, %c1_11026 : tensor<4x?x4096xf16>
    %13404 = flow.tensor.transfer %13403 : tensor<4x?x4096xf16>{%dim_11027} to #hal.device.promise<@__device_2>
    %13405 = torch_c.from_builtin_tensor %13404 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13406 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11028 = arith.constant 1 : index
    %dim_11029 = tensor.dim %13406, %c1_11028 : tensor<4x?x4096xf16>
    %13407 = flow.tensor.transfer %13406 : tensor<4x?x4096xf16>{%dim_11029} to #hal.device.promise<@__device_2>
    %13408 = torch_c.from_builtin_tensor %13407 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13409 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11030 = arith.constant 1 : index
    %dim_11031 = tensor.dim %13409, %c1_11030 : tensor<4x?x4096xf16>
    %13410 = flow.tensor.transfer %13409 : tensor<4x?x4096xf16>{%dim_11031} to #hal.device.promise<@__device_2>
    %13411 = torch_c.from_builtin_tensor %13410 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13412 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11032 = arith.constant 1 : index
    %dim_11033 = tensor.dim %13412, %c1_11032 : tensor<4x?x4096xf16>
    %13413 = flow.tensor.transfer %13412 : tensor<4x?x4096xf16>{%dim_11033} to #hal.device.promise<@__device_2>
    %13414 = torch_c.from_builtin_tensor %13413 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11034 = torch.constant.int 1
    %13415 = torch.aten.add.Tensor %13396, %13399, %int1_11034 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11035 = torch.constant.int 1
    %13416 = torch.aten.add.Tensor %13415, %13302, %int1_11035 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11036 = torch.constant.int 1
    %13417 = torch.aten.add.Tensor %13416, %13402, %int1_11036 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11037 = torch.constant.int 1
    %13418 = torch.aten.add.Tensor %13417, %13405, %int1_11037 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11038 = torch.constant.int 1
    %13419 = torch.aten.add.Tensor %13418, %13408, %int1_11038 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11039 = torch.constant.int 1
    %13420 = torch.aten.add.Tensor %13419, %13411, %int1_11039 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11040 = torch.constant.int 1
    %13421 = torch.aten.add.Tensor %13420, %13414, %int1_11040 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13422 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11041 = arith.constant 1 : index
    %dim_11042 = tensor.dim %13422, %c1_11041 : tensor<4x?x4096xf16>
    %13423 = flow.tensor.transfer %13422 : tensor<4x?x4096xf16>{%dim_11042} to #hal.device.promise<@__device_3>
    %13424 = torch_c.from_builtin_tensor %13423 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13425 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11043 = arith.constant 1 : index
    %dim_11044 = tensor.dim %13425, %c1_11043 : tensor<4x?x4096xf16>
    %13426 = flow.tensor.transfer %13425 : tensor<4x?x4096xf16>{%dim_11044} to #hal.device.promise<@__device_3>
    %13427 = torch_c.from_builtin_tensor %13426 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13428 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11045 = arith.constant 1 : index
    %dim_11046 = tensor.dim %13428, %c1_11045 : tensor<4x?x4096xf16>
    %13429 = flow.tensor.transfer %13428 : tensor<4x?x4096xf16>{%dim_11046} to #hal.device.promise<@__device_3>
    %13430 = torch_c.from_builtin_tensor %13429 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13431 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11047 = arith.constant 1 : index
    %dim_11048 = tensor.dim %13431, %c1_11047 : tensor<4x?x4096xf16>
    %13432 = flow.tensor.transfer %13431 : tensor<4x?x4096xf16>{%dim_11048} to #hal.device.promise<@__device_3>
    %13433 = torch_c.from_builtin_tensor %13432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13434 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11049 = arith.constant 1 : index
    %dim_11050 = tensor.dim %13434, %c1_11049 : tensor<4x?x4096xf16>
    %13435 = flow.tensor.transfer %13434 : tensor<4x?x4096xf16>{%dim_11050} to #hal.device.promise<@__device_3>
    %13436 = torch_c.from_builtin_tensor %13435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13437 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11051 = arith.constant 1 : index
    %dim_11052 = tensor.dim %13437, %c1_11051 : tensor<4x?x4096xf16>
    %13438 = flow.tensor.transfer %13437 : tensor<4x?x4096xf16>{%dim_11052} to #hal.device.promise<@__device_3>
    %13439 = torch_c.from_builtin_tensor %13438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13440 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11053 = arith.constant 1 : index
    %dim_11054 = tensor.dim %13440, %c1_11053 : tensor<4x?x4096xf16>
    %13441 = flow.tensor.transfer %13440 : tensor<4x?x4096xf16>{%dim_11054} to #hal.device.promise<@__device_3>
    %13442 = torch_c.from_builtin_tensor %13441 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11055 = torch.constant.int 1
    %13443 = torch.aten.add.Tensor %13424, %13427, %int1_11055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11056 = torch.constant.int 1
    %13444 = torch.aten.add.Tensor %13443, %13430, %int1_11056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11057 = torch.constant.int 1
    %13445 = torch.aten.add.Tensor %13444, %13309, %int1_11057 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11058 = torch.constant.int 1
    %13446 = torch.aten.add.Tensor %13445, %13433, %int1_11058 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11059 = torch.constant.int 1
    %13447 = torch.aten.add.Tensor %13446, %13436, %int1_11059 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11060 = torch.constant.int 1
    %13448 = torch.aten.add.Tensor %13447, %13439, %int1_11060 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11061 = torch.constant.int 1
    %13449 = torch.aten.add.Tensor %13448, %13442, %int1_11061 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13450 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11062 = arith.constant 1 : index
    %dim_11063 = tensor.dim %13450, %c1_11062 : tensor<4x?x4096xf16>
    %13451 = flow.tensor.transfer %13450 : tensor<4x?x4096xf16>{%dim_11063} to #hal.device.promise<@__device_4>
    %13452 = torch_c.from_builtin_tensor %13451 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13453 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11064 = arith.constant 1 : index
    %dim_11065 = tensor.dim %13453, %c1_11064 : tensor<4x?x4096xf16>
    %13454 = flow.tensor.transfer %13453 : tensor<4x?x4096xf16>{%dim_11065} to #hal.device.promise<@__device_4>
    %13455 = torch_c.from_builtin_tensor %13454 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13456 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11066 = arith.constant 1 : index
    %dim_11067 = tensor.dim %13456, %c1_11066 : tensor<4x?x4096xf16>
    %13457 = flow.tensor.transfer %13456 : tensor<4x?x4096xf16>{%dim_11067} to #hal.device.promise<@__device_4>
    %13458 = torch_c.from_builtin_tensor %13457 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13459 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11068 = arith.constant 1 : index
    %dim_11069 = tensor.dim %13459, %c1_11068 : tensor<4x?x4096xf16>
    %13460 = flow.tensor.transfer %13459 : tensor<4x?x4096xf16>{%dim_11069} to #hal.device.promise<@__device_4>
    %13461 = torch_c.from_builtin_tensor %13460 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13462 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11070 = arith.constant 1 : index
    %dim_11071 = tensor.dim %13462, %c1_11070 : tensor<4x?x4096xf16>
    %13463 = flow.tensor.transfer %13462 : tensor<4x?x4096xf16>{%dim_11071} to #hal.device.promise<@__device_4>
    %13464 = torch_c.from_builtin_tensor %13463 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13465 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11072 = arith.constant 1 : index
    %dim_11073 = tensor.dim %13465, %c1_11072 : tensor<4x?x4096xf16>
    %13466 = flow.tensor.transfer %13465 : tensor<4x?x4096xf16>{%dim_11073} to #hal.device.promise<@__device_4>
    %13467 = torch_c.from_builtin_tensor %13466 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13468 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11074 = arith.constant 1 : index
    %dim_11075 = tensor.dim %13468, %c1_11074 : tensor<4x?x4096xf16>
    %13469 = flow.tensor.transfer %13468 : tensor<4x?x4096xf16>{%dim_11075} to #hal.device.promise<@__device_4>
    %13470 = torch_c.from_builtin_tensor %13469 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11076 = torch.constant.int 1
    %13471 = torch.aten.add.Tensor %13452, %13455, %int1_11076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11077 = torch.constant.int 1
    %13472 = torch.aten.add.Tensor %13471, %13458, %int1_11077 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11078 = torch.constant.int 1
    %13473 = torch.aten.add.Tensor %13472, %13461, %int1_11078 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11079 = torch.constant.int 1
    %13474 = torch.aten.add.Tensor %13473, %13316, %int1_11079 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11080 = torch.constant.int 1
    %13475 = torch.aten.add.Tensor %13474, %13464, %int1_11080 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11081 = torch.constant.int 1
    %13476 = torch.aten.add.Tensor %13475, %13467, %int1_11081 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11082 = torch.constant.int 1
    %13477 = torch.aten.add.Tensor %13476, %13470, %int1_11082 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13478 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11083 = arith.constant 1 : index
    %dim_11084 = tensor.dim %13478, %c1_11083 : tensor<4x?x4096xf16>
    %13479 = flow.tensor.transfer %13478 : tensor<4x?x4096xf16>{%dim_11084} to #hal.device.promise<@__device_5>
    %13480 = torch_c.from_builtin_tensor %13479 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13481 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11085 = arith.constant 1 : index
    %dim_11086 = tensor.dim %13481, %c1_11085 : tensor<4x?x4096xf16>
    %13482 = flow.tensor.transfer %13481 : tensor<4x?x4096xf16>{%dim_11086} to #hal.device.promise<@__device_5>
    %13483 = torch_c.from_builtin_tensor %13482 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13484 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11087 = arith.constant 1 : index
    %dim_11088 = tensor.dim %13484, %c1_11087 : tensor<4x?x4096xf16>
    %13485 = flow.tensor.transfer %13484 : tensor<4x?x4096xf16>{%dim_11088} to #hal.device.promise<@__device_5>
    %13486 = torch_c.from_builtin_tensor %13485 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13487 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11089 = arith.constant 1 : index
    %dim_11090 = tensor.dim %13487, %c1_11089 : tensor<4x?x4096xf16>
    %13488 = flow.tensor.transfer %13487 : tensor<4x?x4096xf16>{%dim_11090} to #hal.device.promise<@__device_5>
    %13489 = torch_c.from_builtin_tensor %13488 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13490 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11091 = arith.constant 1 : index
    %dim_11092 = tensor.dim %13490, %c1_11091 : tensor<4x?x4096xf16>
    %13491 = flow.tensor.transfer %13490 : tensor<4x?x4096xf16>{%dim_11092} to #hal.device.promise<@__device_5>
    %13492 = torch_c.from_builtin_tensor %13491 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13493 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11093 = arith.constant 1 : index
    %dim_11094 = tensor.dim %13493, %c1_11093 : tensor<4x?x4096xf16>
    %13494 = flow.tensor.transfer %13493 : tensor<4x?x4096xf16>{%dim_11094} to #hal.device.promise<@__device_5>
    %13495 = torch_c.from_builtin_tensor %13494 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13496 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11095 = arith.constant 1 : index
    %dim_11096 = tensor.dim %13496, %c1_11095 : tensor<4x?x4096xf16>
    %13497 = flow.tensor.transfer %13496 : tensor<4x?x4096xf16>{%dim_11096} to #hal.device.promise<@__device_5>
    %13498 = torch_c.from_builtin_tensor %13497 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11097 = torch.constant.int 1
    %13499 = torch.aten.add.Tensor %13480, %13483, %int1_11097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11098 = torch.constant.int 1
    %13500 = torch.aten.add.Tensor %13499, %13486, %int1_11098 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11099 = torch.constant.int 1
    %13501 = torch.aten.add.Tensor %13500, %13489, %int1_11099 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11100 = torch.constant.int 1
    %13502 = torch.aten.add.Tensor %13501, %13492, %int1_11100 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11101 = torch.constant.int 1
    %13503 = torch.aten.add.Tensor %13502, %13323, %int1_11101 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11102 = torch.constant.int 1
    %13504 = torch.aten.add.Tensor %13503, %13495, %int1_11102 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11103 = torch.constant.int 1
    %13505 = torch.aten.add.Tensor %13504, %13498, %int1_11103 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13506 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11104 = arith.constant 1 : index
    %dim_11105 = tensor.dim %13506, %c1_11104 : tensor<4x?x4096xf16>
    %13507 = flow.tensor.transfer %13506 : tensor<4x?x4096xf16>{%dim_11105} to #hal.device.promise<@__device_6>
    %13508 = torch_c.from_builtin_tensor %13507 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13509 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11106 = arith.constant 1 : index
    %dim_11107 = tensor.dim %13509, %c1_11106 : tensor<4x?x4096xf16>
    %13510 = flow.tensor.transfer %13509 : tensor<4x?x4096xf16>{%dim_11107} to #hal.device.promise<@__device_6>
    %13511 = torch_c.from_builtin_tensor %13510 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13512 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11108 = arith.constant 1 : index
    %dim_11109 = tensor.dim %13512, %c1_11108 : tensor<4x?x4096xf16>
    %13513 = flow.tensor.transfer %13512 : tensor<4x?x4096xf16>{%dim_11109} to #hal.device.promise<@__device_6>
    %13514 = torch_c.from_builtin_tensor %13513 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13515 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11110 = arith.constant 1 : index
    %dim_11111 = tensor.dim %13515, %c1_11110 : tensor<4x?x4096xf16>
    %13516 = flow.tensor.transfer %13515 : tensor<4x?x4096xf16>{%dim_11111} to #hal.device.promise<@__device_6>
    %13517 = torch_c.from_builtin_tensor %13516 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13518 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11112 = arith.constant 1 : index
    %dim_11113 = tensor.dim %13518, %c1_11112 : tensor<4x?x4096xf16>
    %13519 = flow.tensor.transfer %13518 : tensor<4x?x4096xf16>{%dim_11113} to #hal.device.promise<@__device_6>
    %13520 = torch_c.from_builtin_tensor %13519 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13521 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11114 = arith.constant 1 : index
    %dim_11115 = tensor.dim %13521, %c1_11114 : tensor<4x?x4096xf16>
    %13522 = flow.tensor.transfer %13521 : tensor<4x?x4096xf16>{%dim_11115} to #hal.device.promise<@__device_6>
    %13523 = torch_c.from_builtin_tensor %13522 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13524 = torch_c.to_builtin_tensor %13337 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11116 = arith.constant 1 : index
    %dim_11117 = tensor.dim %13524, %c1_11116 : tensor<4x?x4096xf16>
    %13525 = flow.tensor.transfer %13524 : tensor<4x?x4096xf16>{%dim_11117} to #hal.device.promise<@__device_6>
    %13526 = torch_c.from_builtin_tensor %13525 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11118 = torch.constant.int 1
    %13527 = torch.aten.add.Tensor %13508, %13511, %int1_11118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11119 = torch.constant.int 1
    %13528 = torch.aten.add.Tensor %13527, %13514, %int1_11119 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11120 = torch.constant.int 1
    %13529 = torch.aten.add.Tensor %13528, %13517, %int1_11120 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11121 = torch.constant.int 1
    %13530 = torch.aten.add.Tensor %13529, %13520, %int1_11121 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11122 = torch.constant.int 1
    %13531 = torch.aten.add.Tensor %13530, %13523, %int1_11122 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11123 = torch.constant.int 1
    %13532 = torch.aten.add.Tensor %13531, %13330, %int1_11123 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11124 = torch.constant.int 1
    %13533 = torch.aten.add.Tensor %13532, %13526, %int1_11124 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13534 = torch_c.to_builtin_tensor %13288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11125 = arith.constant 1 : index
    %dim_11126 = tensor.dim %13534, %c1_11125 : tensor<4x?x4096xf16>
    %13535 = flow.tensor.transfer %13534 : tensor<4x?x4096xf16>{%dim_11126} to #hal.device.promise<@__device_7>
    %13536 = torch_c.from_builtin_tensor %13535 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13537 = torch_c.to_builtin_tensor %13295 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11127 = arith.constant 1 : index
    %dim_11128 = tensor.dim %13537, %c1_11127 : tensor<4x?x4096xf16>
    %13538 = flow.tensor.transfer %13537 : tensor<4x?x4096xf16>{%dim_11128} to #hal.device.promise<@__device_7>
    %13539 = torch_c.from_builtin_tensor %13538 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13540 = torch_c.to_builtin_tensor %13302 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11129 = arith.constant 1 : index
    %dim_11130 = tensor.dim %13540, %c1_11129 : tensor<4x?x4096xf16>
    %13541 = flow.tensor.transfer %13540 : tensor<4x?x4096xf16>{%dim_11130} to #hal.device.promise<@__device_7>
    %13542 = torch_c.from_builtin_tensor %13541 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13543 = torch_c.to_builtin_tensor %13309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11131 = arith.constant 1 : index
    %dim_11132 = tensor.dim %13543, %c1_11131 : tensor<4x?x4096xf16>
    %13544 = flow.tensor.transfer %13543 : tensor<4x?x4096xf16>{%dim_11132} to #hal.device.promise<@__device_7>
    %13545 = torch_c.from_builtin_tensor %13544 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13546 = torch_c.to_builtin_tensor %13316 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11133 = arith.constant 1 : index
    %dim_11134 = tensor.dim %13546, %c1_11133 : tensor<4x?x4096xf16>
    %13547 = flow.tensor.transfer %13546 : tensor<4x?x4096xf16>{%dim_11134} to #hal.device.promise<@__device_7>
    %13548 = torch_c.from_builtin_tensor %13547 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13549 = torch_c.to_builtin_tensor %13323 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11135 = arith.constant 1 : index
    %dim_11136 = tensor.dim %13549, %c1_11135 : tensor<4x?x4096xf16>
    %13550 = flow.tensor.transfer %13549 : tensor<4x?x4096xf16>{%dim_11136} to #hal.device.promise<@__device_7>
    %13551 = torch_c.from_builtin_tensor %13550 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %13552 = torch_c.to_builtin_tensor %13330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_11137 = arith.constant 1 : index
    %dim_11138 = tensor.dim %13552, %c1_11137 : tensor<4x?x4096xf16>
    %13553 = flow.tensor.transfer %13552 : tensor<4x?x4096xf16>{%dim_11138} to #hal.device.promise<@__device_7>
    %13554 = torch_c.from_builtin_tensor %13553 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11139 = torch.constant.int 1
    %13555 = torch.aten.add.Tensor %13536, %13539, %int1_11139 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11140 = torch.constant.int 1
    %13556 = torch.aten.add.Tensor %13555, %13542, %int1_11140 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11141 = torch.constant.int 1
    %13557 = torch.aten.add.Tensor %13556, %13545, %int1_11141 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11142 = torch.constant.int 1
    %13558 = torch.aten.add.Tensor %13557, %13548, %int1_11142 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11143 = torch.constant.int 1
    %13559 = torch.aten.add.Tensor %13558, %13551, %int1_11143 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11144 = torch.constant.int 1
    %13560 = torch.aten.add.Tensor %13559, %13554, %int1_11144 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11145 = torch.constant.int 1
    %13561 = torch.aten.add.Tensor %13560, %13337, %int1_11145 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11146 = torch.constant.int 1
    %13562 = torch.aten.add.Tensor %13042, %13365, %int1_11146 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11147 = torch.constant.int 1
    %13563 = torch.aten.add.Tensor %13043, %13393, %int1_11147 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11148 = torch.constant.int 1
    %13564 = torch.aten.add.Tensor %13044, %13421, %int1_11148 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11149 = torch.constant.int 1
    %13565 = torch.aten.add.Tensor %13045, %13449, %int1_11149 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11150 = torch.constant.int 1
    %13566 = torch.aten.add.Tensor %13046, %13477, %int1_11150 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11151 = torch.constant.int 1
    %13567 = torch.aten.add.Tensor %13047, %13505, %int1_11151 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11152 = torch.constant.int 1
    %13568 = torch.aten.add.Tensor %13048, %13533, %int1_11152 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11153 = torch.constant.int 1
    %13569 = torch.aten.add.Tensor %13049, %13561, %int1_11153 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_11154 = torch.constant.int 6
    %13570 = torch.prims.convert_element_type %13562, %int6_11154 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11155 = torch.constant.int 6
    %13571 = torch.prims.convert_element_type %13563, %int6_11155 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11156 = torch.constant.int 6
    %13572 = torch.prims.convert_element_type %13564, %int6_11156 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11157 = torch.constant.int 6
    %13573 = torch.prims.convert_element_type %13565, %int6_11157 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11158 = torch.constant.int 6
    %13574 = torch.prims.convert_element_type %13566, %int6_11158 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11159 = torch.constant.int 6
    %13575 = torch.prims.convert_element_type %13567, %int6_11159 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11160 = torch.constant.int 6
    %13576 = torch.prims.convert_element_type %13568, %int6_11160 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_11161 = torch.constant.int 6
    %13577 = torch.prims.convert_element_type %13569, %int6_11161 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11162 = torch.constant.int 2
    %13578 = torch.aten.pow.Tensor_Scalar %13570, %int2_11162 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11163 = torch.constant.int 2
    %13579 = torch.aten.pow.Tensor_Scalar %13571, %int2_11163 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11164 = torch.constant.int 2
    %13580 = torch.aten.pow.Tensor_Scalar %13572, %int2_11164 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11165 = torch.constant.int 2
    %13581 = torch.aten.pow.Tensor_Scalar %13573, %int2_11165 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11166 = torch.constant.int 2
    %13582 = torch.aten.pow.Tensor_Scalar %13574, %int2_11166 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11167 = torch.constant.int 2
    %13583 = torch.aten.pow.Tensor_Scalar %13575, %int2_11167 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11168 = torch.constant.int 2
    %13584 = torch.aten.pow.Tensor_Scalar %13576, %int2_11168 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_11169 = torch.constant.int 2
    %13585 = torch.aten.pow.Tensor_Scalar %13577, %int2_11169 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_11170 = torch.constant.int -1
    %13586 = torch.prim.ListConstruct %int-1_11170 : (!torch.int) -> !torch.list<int>
    %true_11171 = torch.constant.bool true
    %none_11172 = torch.constant.none
    %13587 = torch.aten.mean.dim %13578, %13586, %true_11171, %none_11172 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11173 = torch.constant.int -1
    %13588 = torch.prim.ListConstruct %int-1_11173 : (!torch.int) -> !torch.list<int>
    %true_11174 = torch.constant.bool true
    %none_11175 = torch.constant.none
    %13589 = torch.aten.mean.dim %13579, %13588, %true_11174, %none_11175 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11176 = torch.constant.int -1
    %13590 = torch.prim.ListConstruct %int-1_11176 : (!torch.int) -> !torch.list<int>
    %true_11177 = torch.constant.bool true
    %none_11178 = torch.constant.none
    %13591 = torch.aten.mean.dim %13580, %13590, %true_11177, %none_11178 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11179 = torch.constant.int -1
    %13592 = torch.prim.ListConstruct %int-1_11179 : (!torch.int) -> !torch.list<int>
    %true_11180 = torch.constant.bool true
    %none_11181 = torch.constant.none
    %13593 = torch.aten.mean.dim %13581, %13592, %true_11180, %none_11181 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11182 = torch.constant.int -1
    %13594 = torch.prim.ListConstruct %int-1_11182 : (!torch.int) -> !torch.list<int>
    %true_11183 = torch.constant.bool true
    %none_11184 = torch.constant.none
    %13595 = torch.aten.mean.dim %13582, %13594, %true_11183, %none_11184 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11185 = torch.constant.int -1
    %13596 = torch.prim.ListConstruct %int-1_11185 : (!torch.int) -> !torch.list<int>
    %true_11186 = torch.constant.bool true
    %none_11187 = torch.constant.none
    %13597 = torch.aten.mean.dim %13583, %13596, %true_11186, %none_11187 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11188 = torch.constant.int -1
    %13598 = torch.prim.ListConstruct %int-1_11188 : (!torch.int) -> !torch.list<int>
    %true_11189 = torch.constant.bool true
    %none_11190 = torch.constant.none
    %13599 = torch.aten.mean.dim %13584, %13598, %true_11189, %none_11190 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_11191 = torch.constant.int -1
    %13600 = torch.prim.ListConstruct %int-1_11191 : (!torch.int) -> !torch.list<int>
    %true_11192 = torch.constant.bool true
    %none_11193 = torch.constant.none
    %13601 = torch.aten.mean.dim %13585, %13600, %true_11192, %none_11193 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11194 = torch.constant.float 9.9999997473787516E-6
    %int1_11195 = torch.constant.int 1
    %13602 = torch.aten.add.Scalar %13587, %float9.999990e-06_11194, %int1_11195 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11196 = torch.constant.float 9.9999997473787516E-6
    %int1_11197 = torch.constant.int 1
    %13603 = torch.aten.add.Scalar %13589, %float9.999990e-06_11196, %int1_11197 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11198 = torch.constant.float 9.9999997473787516E-6
    %int1_11199 = torch.constant.int 1
    %13604 = torch.aten.add.Scalar %13591, %float9.999990e-06_11198, %int1_11199 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11200 = torch.constant.float 9.9999997473787516E-6
    %int1_11201 = torch.constant.int 1
    %13605 = torch.aten.add.Scalar %13593, %float9.999990e-06_11200, %int1_11201 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11202 = torch.constant.float 9.9999997473787516E-6
    %int1_11203 = torch.constant.int 1
    %13606 = torch.aten.add.Scalar %13595, %float9.999990e-06_11202, %int1_11203 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11204 = torch.constant.float 9.9999997473787516E-6
    %int1_11205 = torch.constant.int 1
    %13607 = torch.aten.add.Scalar %13597, %float9.999990e-06_11204, %int1_11205 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11206 = torch.constant.float 9.9999997473787516E-6
    %int1_11207 = torch.constant.int 1
    %13608 = torch.aten.add.Scalar %13599, %float9.999990e-06_11206, %int1_11207 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_11208 = torch.constant.float 9.9999997473787516E-6
    %int1_11209 = torch.constant.int 1
    %13609 = torch.aten.add.Scalar %13601, %float9.999990e-06_11208, %int1_11209 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13610 = torch.aten.rsqrt %13602 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13611 = torch.aten.rsqrt %13603 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13612 = torch.aten.rsqrt %13604 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13613 = torch.aten.rsqrt %13605 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13614 = torch.aten.rsqrt %13606 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13615 = torch.aten.rsqrt %13607 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13616 = torch.aten.rsqrt %13608 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13617 = torch.aten.rsqrt %13609 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %13617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %13618 = torch.aten.mul.Tensor %13570, %13610 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13619 = torch.aten.mul.Tensor %13571, %13611 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13620 = torch.aten.mul.Tensor %13572, %13612 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13621 = torch.aten.mul.Tensor %13573, %13613 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13622 = torch.aten.mul.Tensor %13574, %13614 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13623 = torch.aten.mul.Tensor %13575, %13615 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13624 = torch.aten.mul.Tensor %13576, %13616 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13625 = torch.aten.mul.Tensor %13577, %13617 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13626 = torch.aten.mul.Tensor %440, %13618 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13627 = torch.aten.mul.Tensor %441, %13619 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13628 = torch.aten.mul.Tensor %442, %13620 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13629 = torch.aten.mul.Tensor %443, %13621 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13630 = torch.aten.mul.Tensor %444, %13622 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13631 = torch.aten.mul.Tensor %445, %13623 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13632 = torch.aten.mul.Tensor %446, %13624 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %13633 = torch.aten.mul.Tensor %447, %13625 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %13633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_11210 = torch.constant.int 5
    %13634 = torch.prims.convert_element_type %13626, %int5_11210 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11211 = torch.constant.int 5
    %13635 = torch.prims.convert_element_type %13627, %int5_11211 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11212 = torch.constant.int 5
    %13636 = torch.prims.convert_element_type %13628, %int5_11212 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11213 = torch.constant.int 5
    %13637 = torch.prims.convert_element_type %13629, %int5_11213 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11214 = torch.constant.int 5
    %13638 = torch.prims.convert_element_type %13630, %int5_11214 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11215 = torch.constant.int 5
    %13639 = torch.prims.convert_element_type %13631, %int5_11215 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11216 = torch.constant.int 5
    %13640 = torch.prims.convert_element_type %13632, %int5_11216 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_11217 = torch.constant.int 5
    %13641 = torch.prims.convert_element_type %13633, %int5_11217 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %13641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_11218 = torch.constant.int 1
    %int0_11219 = torch.constant.int 0
    %13642 = torch.prim.ListConstruct %int1_11218, %int0_11219 : (!torch.int, !torch.int) -> !torch.list<int>
    %13643 = torch.aten.permute %448, %13642 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11220 = torch.constant.int 1
    %int0_11221 = torch.constant.int 0
    %13644 = torch.prim.ListConstruct %int1_11220, %int0_11221 : (!torch.int, !torch.int) -> !torch.list<int>
    %13645 = torch.aten.permute %449, %13644 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11222 = torch.constant.int 1
    %int0_11223 = torch.constant.int 0
    %13646 = torch.prim.ListConstruct %int1_11222, %int0_11223 : (!torch.int, !torch.int) -> !torch.list<int>
    %13647 = torch.aten.permute %450, %13646 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11224 = torch.constant.int 1
    %int0_11225 = torch.constant.int 0
    %13648 = torch.prim.ListConstruct %int1_11224, %int0_11225 : (!torch.int, !torch.int) -> !torch.list<int>
    %13649 = torch.aten.permute %451, %13648 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11226 = torch.constant.int 1
    %int0_11227 = torch.constant.int 0
    %13650 = torch.prim.ListConstruct %int1_11226, %int0_11227 : (!torch.int, !torch.int) -> !torch.list<int>
    %13651 = torch.aten.permute %452, %13650 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11228 = torch.constant.int 1
    %int0_11229 = torch.constant.int 0
    %13652 = torch.prim.ListConstruct %int1_11228, %int0_11229 : (!torch.int, !torch.int) -> !torch.list<int>
    %13653 = torch.aten.permute %453, %13652 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11230 = torch.constant.int 1
    %int0_11231 = torch.constant.int 0
    %13654 = torch.prim.ListConstruct %int1_11230, %int0_11231 : (!torch.int, !torch.int) -> !torch.list<int>
    %13655 = torch.aten.permute %454, %13654 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_11232 = torch.constant.int 1
    %int0_11233 = torch.constant.int 0
    %13656 = torch.prim.ListConstruct %int1_11232, %int0_11233 : (!torch.int, !torch.int) -> !torch.list<int>
    %13657 = torch.aten.permute %455, %13656 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_11234 = torch.constant.int 4
    %13658 = torch.aten.mul.int %int4_11234, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11235 = torch.constant.int 4096
    %13659 = torch.prim.ListConstruct %13658, %int4096_11235 : (!torch.int, !torch.int) -> !torch.list<int>
    %13660 = torch.aten.view %13634, %13659 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13660, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13661 = torch.aten.mm %13660, %13643 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13661, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11236 = torch.constant.int 4
    %int512_11237 = torch.constant.int 512
    %13662 = torch.prim.ListConstruct %int4_11236, %2482, %int512_11237 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13663 = torch.aten.view %13661, %13662 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11238 = torch.constant.int 4
    %13664 = torch.aten.mul.int %int4_11238, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11239 = torch.constant.int 4096
    %13665 = torch.prim.ListConstruct %13664, %int4096_11239 : (!torch.int, !torch.int) -> !torch.list<int>
    %13666 = torch.aten.view %13635, %13665 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13666, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13667 = torch.aten.mm %13666, %13645 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13667, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11240 = torch.constant.int 4
    %int512_11241 = torch.constant.int 512
    %13668 = torch.prim.ListConstruct %int4_11240, %2482, %int512_11241 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13669 = torch.aten.view %13667, %13668 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11242 = torch.constant.int 4
    %13670 = torch.aten.mul.int %int4_11242, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11243 = torch.constant.int 4096
    %13671 = torch.prim.ListConstruct %13670, %int4096_11243 : (!torch.int, !torch.int) -> !torch.list<int>
    %13672 = torch.aten.view %13636, %13671 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13672, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13673 = torch.aten.mm %13672, %13647 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13673, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11244 = torch.constant.int 4
    %int512_11245 = torch.constant.int 512
    %13674 = torch.prim.ListConstruct %int4_11244, %2482, %int512_11245 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13675 = torch.aten.view %13673, %13674 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11246 = torch.constant.int 4
    %13676 = torch.aten.mul.int %int4_11246, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11247 = torch.constant.int 4096
    %13677 = torch.prim.ListConstruct %13676, %int4096_11247 : (!torch.int, !torch.int) -> !torch.list<int>
    %13678 = torch.aten.view %13637, %13677 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13678, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13679 = torch.aten.mm %13678, %13649 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13679, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11248 = torch.constant.int 4
    %int512_11249 = torch.constant.int 512
    %13680 = torch.prim.ListConstruct %int4_11248, %2482, %int512_11249 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13681 = torch.aten.view %13679, %13680 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11250 = torch.constant.int 4
    %13682 = torch.aten.mul.int %int4_11250, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11251 = torch.constant.int 4096
    %13683 = torch.prim.ListConstruct %13682, %int4096_11251 : (!torch.int, !torch.int) -> !torch.list<int>
    %13684 = torch.aten.view %13638, %13683 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13684, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13685 = torch.aten.mm %13684, %13651 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13685, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11252 = torch.constant.int 4
    %int512_11253 = torch.constant.int 512
    %13686 = torch.prim.ListConstruct %int4_11252, %2482, %int512_11253 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13687 = torch.aten.view %13685, %13686 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11254 = torch.constant.int 4
    %13688 = torch.aten.mul.int %int4_11254, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11255 = torch.constant.int 4096
    %13689 = torch.prim.ListConstruct %13688, %int4096_11255 : (!torch.int, !torch.int) -> !torch.list<int>
    %13690 = torch.aten.view %13639, %13689 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13690, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13691 = torch.aten.mm %13690, %13653 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13691, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11256 = torch.constant.int 4
    %int512_11257 = torch.constant.int 512
    %13692 = torch.prim.ListConstruct %int4_11256, %2482, %int512_11257 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13693 = torch.aten.view %13691, %13692 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11258 = torch.constant.int 4
    %13694 = torch.aten.mul.int %int4_11258, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11259 = torch.constant.int 4096
    %13695 = torch.prim.ListConstruct %13694, %int4096_11259 : (!torch.int, !torch.int) -> !torch.list<int>
    %13696 = torch.aten.view %13640, %13695 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13696, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13697 = torch.aten.mm %13696, %13655 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13697, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11260 = torch.constant.int 4
    %int512_11261 = torch.constant.int 512
    %13698 = torch.prim.ListConstruct %int4_11260, %2482, %int512_11261 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13699 = torch.aten.view %13697, %13698 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_11262 = torch.constant.int 4
    %13700 = torch.aten.mul.int %int4_11262, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11263 = torch.constant.int 4096
    %13701 = torch.prim.ListConstruct %13700, %int4096_11263 : (!torch.int, !torch.int) -> !torch.list<int>
    %13702 = torch.aten.view %13641, %13701 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13702, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13703 = torch.aten.mm %13702, %13657 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %13703, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_11264 = torch.constant.int 4
    %int512_11265 = torch.constant.int 512
    %13704 = torch.prim.ListConstruct %int4_11264, %2482, %int512_11265 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13705 = torch.aten.view %13703, %13704 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %13705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_11266 = torch.constant.int 1
    %int0_11267 = torch.constant.int 0
    %13706 = torch.prim.ListConstruct %int1_11266, %int0_11267 : (!torch.int, !torch.int) -> !torch.list<int>
    %13707 = torch.aten.permute %456, %13706 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11268 = torch.constant.int 1
    %int0_11269 = torch.constant.int 0
    %13708 = torch.prim.ListConstruct %int1_11268, %int0_11269 : (!torch.int, !torch.int) -> !torch.list<int>
    %13709 = torch.aten.permute %457, %13708 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11270 = torch.constant.int 1
    %int0_11271 = torch.constant.int 0
    %13710 = torch.prim.ListConstruct %int1_11270, %int0_11271 : (!torch.int, !torch.int) -> !torch.list<int>
    %13711 = torch.aten.permute %458, %13710 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11272 = torch.constant.int 1
    %int0_11273 = torch.constant.int 0
    %13712 = torch.prim.ListConstruct %int1_11272, %int0_11273 : (!torch.int, !torch.int) -> !torch.list<int>
    %13713 = torch.aten.permute %459, %13712 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11274 = torch.constant.int 1
    %int0_11275 = torch.constant.int 0
    %13714 = torch.prim.ListConstruct %int1_11274, %int0_11275 : (!torch.int, !torch.int) -> !torch.list<int>
    %13715 = torch.aten.permute %460, %13714 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11276 = torch.constant.int 1
    %int0_11277 = torch.constant.int 0
    %13716 = torch.prim.ListConstruct %int1_11276, %int0_11277 : (!torch.int, !torch.int) -> !torch.list<int>
    %13717 = torch.aten.permute %461, %13716 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11278 = torch.constant.int 1
    %int0_11279 = torch.constant.int 0
    %13718 = torch.prim.ListConstruct %int1_11278, %int0_11279 : (!torch.int, !torch.int) -> !torch.list<int>
    %13719 = torch.aten.permute %462, %13718 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11280 = torch.constant.int 1
    %int0_11281 = torch.constant.int 0
    %13720 = torch.prim.ListConstruct %int1_11280, %int0_11281 : (!torch.int, !torch.int) -> !torch.list<int>
    %13721 = torch.aten.permute %463, %13720 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_11282 = torch.constant.int 4
    %13722 = torch.aten.mul.int %int4_11282, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11283 = torch.constant.int 4096
    %13723 = torch.prim.ListConstruct %13722, %int4096_11283 : (!torch.int, !torch.int) -> !torch.list<int>
    %13724 = torch.aten.view %13634, %13723 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13724, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13725 = torch.aten.mm %13724, %13707 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13725, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11284 = torch.constant.int 4
    %int128_11285 = torch.constant.int 128
    %13726 = torch.prim.ListConstruct %int4_11284, %2482, %int128_11285 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13727 = torch.aten.view %13725, %13726 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11286 = torch.constant.int 4
    %13728 = torch.aten.mul.int %int4_11286, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11287 = torch.constant.int 4096
    %13729 = torch.prim.ListConstruct %13728, %int4096_11287 : (!torch.int, !torch.int) -> !torch.list<int>
    %13730 = torch.aten.view %13635, %13729 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13730, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13731 = torch.aten.mm %13730, %13709 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13731, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11288 = torch.constant.int 4
    %int128_11289 = torch.constant.int 128
    %13732 = torch.prim.ListConstruct %int4_11288, %2482, %int128_11289 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13733 = torch.aten.view %13731, %13732 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11290 = torch.constant.int 4
    %13734 = torch.aten.mul.int %int4_11290, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11291 = torch.constant.int 4096
    %13735 = torch.prim.ListConstruct %13734, %int4096_11291 : (!torch.int, !torch.int) -> !torch.list<int>
    %13736 = torch.aten.view %13636, %13735 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13736, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13737 = torch.aten.mm %13736, %13711 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13737, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11292 = torch.constant.int 4
    %int128_11293 = torch.constant.int 128
    %13738 = torch.prim.ListConstruct %int4_11292, %2482, %int128_11293 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13739 = torch.aten.view %13737, %13738 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11294 = torch.constant.int 4
    %13740 = torch.aten.mul.int %int4_11294, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11295 = torch.constant.int 4096
    %13741 = torch.prim.ListConstruct %13740, %int4096_11295 : (!torch.int, !torch.int) -> !torch.list<int>
    %13742 = torch.aten.view %13637, %13741 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13742, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13743 = torch.aten.mm %13742, %13713 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13743, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11296 = torch.constant.int 4
    %int128_11297 = torch.constant.int 128
    %13744 = torch.prim.ListConstruct %int4_11296, %2482, %int128_11297 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13745 = torch.aten.view %13743, %13744 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11298 = torch.constant.int 4
    %13746 = torch.aten.mul.int %int4_11298, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11299 = torch.constant.int 4096
    %13747 = torch.prim.ListConstruct %13746, %int4096_11299 : (!torch.int, !torch.int) -> !torch.list<int>
    %13748 = torch.aten.view %13638, %13747 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13748, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13749 = torch.aten.mm %13748, %13715 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13749, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11300 = torch.constant.int 4
    %int128_11301 = torch.constant.int 128
    %13750 = torch.prim.ListConstruct %int4_11300, %2482, %int128_11301 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13751 = torch.aten.view %13749, %13750 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11302 = torch.constant.int 4
    %13752 = torch.aten.mul.int %int4_11302, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11303 = torch.constant.int 4096
    %13753 = torch.prim.ListConstruct %13752, %int4096_11303 : (!torch.int, !torch.int) -> !torch.list<int>
    %13754 = torch.aten.view %13639, %13753 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13754, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13755 = torch.aten.mm %13754, %13717 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13755, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11304 = torch.constant.int 4
    %int128_11305 = torch.constant.int 128
    %13756 = torch.prim.ListConstruct %int4_11304, %2482, %int128_11305 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13757 = torch.aten.view %13755, %13756 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11306 = torch.constant.int 4
    %13758 = torch.aten.mul.int %int4_11306, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11307 = torch.constant.int 4096
    %13759 = torch.prim.ListConstruct %13758, %int4096_11307 : (!torch.int, !torch.int) -> !torch.list<int>
    %13760 = torch.aten.view %13640, %13759 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13760, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13761 = torch.aten.mm %13760, %13719 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13761, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11308 = torch.constant.int 4
    %int128_11309 = torch.constant.int 128
    %13762 = torch.prim.ListConstruct %int4_11308, %2482, %int128_11309 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13763 = torch.aten.view %13761, %13762 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11310 = torch.constant.int 4
    %13764 = torch.aten.mul.int %int4_11310, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11311 = torch.constant.int 4096
    %13765 = torch.prim.ListConstruct %13764, %int4096_11311 : (!torch.int, !torch.int) -> !torch.list<int>
    %13766 = torch.aten.view %13641, %13765 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13766, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13767 = torch.aten.mm %13766, %13721 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13767, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11312 = torch.constant.int 4
    %int128_11313 = torch.constant.int 128
    %13768 = torch.prim.ListConstruct %int4_11312, %2482, %int128_11313 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13769 = torch.aten.view %13767, %13768 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_11314 = torch.constant.int 1
    %int0_11315 = torch.constant.int 0
    %13770 = torch.prim.ListConstruct %int1_11314, %int0_11315 : (!torch.int, !torch.int) -> !torch.list<int>
    %13771 = torch.aten.permute %464, %13770 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11316 = torch.constant.int 1
    %int0_11317 = torch.constant.int 0
    %13772 = torch.prim.ListConstruct %int1_11316, %int0_11317 : (!torch.int, !torch.int) -> !torch.list<int>
    %13773 = torch.aten.permute %465, %13772 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11318 = torch.constant.int 1
    %int0_11319 = torch.constant.int 0
    %13774 = torch.prim.ListConstruct %int1_11318, %int0_11319 : (!torch.int, !torch.int) -> !torch.list<int>
    %13775 = torch.aten.permute %466, %13774 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11320 = torch.constant.int 1
    %int0_11321 = torch.constant.int 0
    %13776 = torch.prim.ListConstruct %int1_11320, %int0_11321 : (!torch.int, !torch.int) -> !torch.list<int>
    %13777 = torch.aten.permute %467, %13776 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11322 = torch.constant.int 1
    %int0_11323 = torch.constant.int 0
    %13778 = torch.prim.ListConstruct %int1_11322, %int0_11323 : (!torch.int, !torch.int) -> !torch.list<int>
    %13779 = torch.aten.permute %468, %13778 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11324 = torch.constant.int 1
    %int0_11325 = torch.constant.int 0
    %13780 = torch.prim.ListConstruct %int1_11324, %int0_11325 : (!torch.int, !torch.int) -> !torch.list<int>
    %13781 = torch.aten.permute %469, %13780 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11326 = torch.constant.int 1
    %int0_11327 = torch.constant.int 0
    %13782 = torch.prim.ListConstruct %int1_11326, %int0_11327 : (!torch.int, !torch.int) -> !torch.list<int>
    %13783 = torch.aten.permute %470, %13782 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_11328 = torch.constant.int 1
    %int0_11329 = torch.constant.int 0
    %13784 = torch.prim.ListConstruct %int1_11328, %int0_11329 : (!torch.int, !torch.int) -> !torch.list<int>
    %13785 = torch.aten.permute %471, %13784 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_11330 = torch.constant.int 4
    %13786 = torch.aten.mul.int %int4_11330, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11331 = torch.constant.int 4096
    %13787 = torch.prim.ListConstruct %13786, %int4096_11331 : (!torch.int, !torch.int) -> !torch.list<int>
    %13788 = torch.aten.view %13634, %13787 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13788, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13789 = torch.aten.mm %13788, %13771 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13789, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11332 = torch.constant.int 4
    %int128_11333 = torch.constant.int 128
    %13790 = torch.prim.ListConstruct %int4_11332, %2482, %int128_11333 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13791 = torch.aten.view %13789, %13790 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11334 = torch.constant.int 4
    %13792 = torch.aten.mul.int %int4_11334, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11335 = torch.constant.int 4096
    %13793 = torch.prim.ListConstruct %13792, %int4096_11335 : (!torch.int, !torch.int) -> !torch.list<int>
    %13794 = torch.aten.view %13635, %13793 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13794, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13795 = torch.aten.mm %13794, %13773 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13795, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11336 = torch.constant.int 4
    %int128_11337 = torch.constant.int 128
    %13796 = torch.prim.ListConstruct %int4_11336, %2482, %int128_11337 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13797 = torch.aten.view %13795, %13796 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11338 = torch.constant.int 4
    %13798 = torch.aten.mul.int %int4_11338, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11339 = torch.constant.int 4096
    %13799 = torch.prim.ListConstruct %13798, %int4096_11339 : (!torch.int, !torch.int) -> !torch.list<int>
    %13800 = torch.aten.view %13636, %13799 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13800, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13801 = torch.aten.mm %13800, %13775 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13801, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11340 = torch.constant.int 4
    %int128_11341 = torch.constant.int 128
    %13802 = torch.prim.ListConstruct %int4_11340, %2482, %int128_11341 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13803 = torch.aten.view %13801, %13802 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11342 = torch.constant.int 4
    %13804 = torch.aten.mul.int %int4_11342, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11343 = torch.constant.int 4096
    %13805 = torch.prim.ListConstruct %13804, %int4096_11343 : (!torch.int, !torch.int) -> !torch.list<int>
    %13806 = torch.aten.view %13637, %13805 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13806, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13807 = torch.aten.mm %13806, %13777 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13807, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11344 = torch.constant.int 4
    %int128_11345 = torch.constant.int 128
    %13808 = torch.prim.ListConstruct %int4_11344, %2482, %int128_11345 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13809 = torch.aten.view %13807, %13808 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11346 = torch.constant.int 4
    %13810 = torch.aten.mul.int %int4_11346, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11347 = torch.constant.int 4096
    %13811 = torch.prim.ListConstruct %13810, %int4096_11347 : (!torch.int, !torch.int) -> !torch.list<int>
    %13812 = torch.aten.view %13638, %13811 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13812, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13813 = torch.aten.mm %13812, %13779 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13813, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11348 = torch.constant.int 4
    %int128_11349 = torch.constant.int 128
    %13814 = torch.prim.ListConstruct %int4_11348, %2482, %int128_11349 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13815 = torch.aten.view %13813, %13814 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11350 = torch.constant.int 4
    %13816 = torch.aten.mul.int %int4_11350, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11351 = torch.constant.int 4096
    %13817 = torch.prim.ListConstruct %13816, %int4096_11351 : (!torch.int, !torch.int) -> !torch.list<int>
    %13818 = torch.aten.view %13639, %13817 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13818, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13819 = torch.aten.mm %13818, %13781 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13819, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11352 = torch.constant.int 4
    %int128_11353 = torch.constant.int 128
    %13820 = torch.prim.ListConstruct %int4_11352, %2482, %int128_11353 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13821 = torch.aten.view %13819, %13820 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11354 = torch.constant.int 4
    %13822 = torch.aten.mul.int %int4_11354, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11355 = torch.constant.int 4096
    %13823 = torch.prim.ListConstruct %13822, %int4096_11355 : (!torch.int, !torch.int) -> !torch.list<int>
    %13824 = torch.aten.view %13640, %13823 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13824, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13825 = torch.aten.mm %13824, %13783 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13825, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11356 = torch.constant.int 4
    %int128_11357 = torch.constant.int 128
    %13826 = torch.prim.ListConstruct %int4_11356, %2482, %int128_11357 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13827 = torch.aten.view %13825, %13826 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11358 = torch.constant.int 4
    %13828 = torch.aten.mul.int %int4_11358, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_11359 = torch.constant.int 4096
    %13829 = torch.prim.ListConstruct %13828, %int4096_11359 : (!torch.int, !torch.int) -> !torch.list<int>
    %13830 = torch.aten.view %13641, %13829 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %13830, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %13831 = torch.aten.mm %13830, %13785 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %13831, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_11360 = torch.constant.int 4
    %int128_11361 = torch.constant.int 128
    %13832 = torch.prim.ListConstruct %int4_11360, %2482, %int128_11361 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13833 = torch.aten.view %13831, %13832 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %13833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_11362 = torch.constant.int 4
    %int4_11363 = torch.constant.int 4
    %int128_11364 = torch.constant.int 128
    %13834 = torch.prim.ListConstruct %int4_11362, %2482, %int4_11363, %int128_11364 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13835 = torch.aten.view %13663, %13834 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11365 = torch.constant.int 4
    %int4_11366 = torch.constant.int 4
    %int128_11367 = torch.constant.int 128
    %13836 = torch.prim.ListConstruct %int4_11365, %2482, %int4_11366, %int128_11367 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13837 = torch.aten.view %13669, %13836 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11368 = torch.constant.int 4
    %int4_11369 = torch.constant.int 4
    %int128_11370 = torch.constant.int 128
    %13838 = torch.prim.ListConstruct %int4_11368, %2482, %int4_11369, %int128_11370 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13839 = torch.aten.view %13675, %13838 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11371 = torch.constant.int 4
    %int4_11372 = torch.constant.int 4
    %int128_11373 = torch.constant.int 128
    %13840 = torch.prim.ListConstruct %int4_11371, %2482, %int4_11372, %int128_11373 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13841 = torch.aten.view %13681, %13840 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11374 = torch.constant.int 4
    %int4_11375 = torch.constant.int 4
    %int128_11376 = torch.constant.int 128
    %13842 = torch.prim.ListConstruct %int4_11374, %2482, %int4_11375, %int128_11376 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13843 = torch.aten.view %13687, %13842 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11377 = torch.constant.int 4
    %int4_11378 = torch.constant.int 4
    %int128_11379 = torch.constant.int 128
    %13844 = torch.prim.ListConstruct %int4_11377, %2482, %int4_11378, %int128_11379 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13845 = torch.aten.view %13693, %13844 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11380 = torch.constant.int 4
    %int4_11381 = torch.constant.int 4
    %int128_11382 = torch.constant.int 128
    %13846 = torch.prim.ListConstruct %int4_11380, %2482, %int4_11381, %int128_11382 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13847 = torch.aten.view %13699, %13846 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11383 = torch.constant.int 4
    %int4_11384 = torch.constant.int 4
    %int128_11385 = torch.constant.int 128
    %13848 = torch.prim.ListConstruct %int4_11383, %2482, %int4_11384, %int128_11385 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13849 = torch.aten.view %13705, %13848 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_11386 = torch.constant.int 4
    %int1_11387 = torch.constant.int 1
    %int128_11388 = torch.constant.int 128
    %13850 = torch.prim.ListConstruct %int4_11386, %2482, %int1_11387, %int128_11388 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13851 = torch.aten.view %13727, %13850 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11389 = torch.constant.int 4
    %int1_11390 = torch.constant.int 1
    %int128_11391 = torch.constant.int 128
    %13852 = torch.prim.ListConstruct %int4_11389, %2482, %int1_11390, %int128_11391 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13853 = torch.aten.view %13733, %13852 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11392 = torch.constant.int 4
    %int1_11393 = torch.constant.int 1
    %int128_11394 = torch.constant.int 128
    %13854 = torch.prim.ListConstruct %int4_11392, %2482, %int1_11393, %int128_11394 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13855 = torch.aten.view %13739, %13854 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11395 = torch.constant.int 4
    %int1_11396 = torch.constant.int 1
    %int128_11397 = torch.constant.int 128
    %13856 = torch.prim.ListConstruct %int4_11395, %2482, %int1_11396, %int128_11397 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13857 = torch.aten.view %13745, %13856 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11398 = torch.constant.int 4
    %int1_11399 = torch.constant.int 1
    %int128_11400 = torch.constant.int 128
    %13858 = torch.prim.ListConstruct %int4_11398, %2482, %int1_11399, %int128_11400 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13859 = torch.aten.view %13751, %13858 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11401 = torch.constant.int 4
    %int1_11402 = torch.constant.int 1
    %int128_11403 = torch.constant.int 128
    %13860 = torch.prim.ListConstruct %int4_11401, %2482, %int1_11402, %int128_11403 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13861 = torch.aten.view %13757, %13860 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11404 = torch.constant.int 4
    %int1_11405 = torch.constant.int 1
    %int128_11406 = torch.constant.int 128
    %13862 = torch.prim.ListConstruct %int4_11404, %2482, %int1_11405, %int128_11406 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13863 = torch.aten.view %13763, %13862 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11407 = torch.constant.int 4
    %int1_11408 = torch.constant.int 1
    %int128_11409 = torch.constant.int 128
    %13864 = torch.prim.ListConstruct %int4_11407, %2482, %int1_11408, %int128_11409 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13865 = torch.aten.view %13769, %13864 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11410 = torch.constant.int 4
    %int1_11411 = torch.constant.int 1
    %int128_11412 = torch.constant.int 128
    %13866 = torch.prim.ListConstruct %int4_11410, %2482, %int1_11411, %int128_11412 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13867 = torch.aten.view %13791, %13866 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11413 = torch.constant.int 4
    %int1_11414 = torch.constant.int 1
    %int128_11415 = torch.constant.int 128
    %13868 = torch.prim.ListConstruct %int4_11413, %2482, %int1_11414, %int128_11415 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13869 = torch.aten.view %13797, %13868 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11416 = torch.constant.int 4
    %int1_11417 = torch.constant.int 1
    %int128_11418 = torch.constant.int 128
    %13870 = torch.prim.ListConstruct %int4_11416, %2482, %int1_11417, %int128_11418 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13871 = torch.aten.view %13803, %13870 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11419 = torch.constant.int 4
    %int1_11420 = torch.constant.int 1
    %int128_11421 = torch.constant.int 128
    %13872 = torch.prim.ListConstruct %int4_11419, %2482, %int1_11420, %int128_11421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13873 = torch.aten.view %13809, %13872 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11422 = torch.constant.int 4
    %int1_11423 = torch.constant.int 1
    %int128_11424 = torch.constant.int 128
    %13874 = torch.prim.ListConstruct %int4_11422, %2482, %int1_11423, %int128_11424 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13875 = torch.aten.view %13815, %13874 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11425 = torch.constant.int 4
    %int1_11426 = torch.constant.int 1
    %int128_11427 = torch.constant.int 128
    %13876 = torch.prim.ListConstruct %int4_11425, %2482, %int1_11426, %int128_11427 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13877 = torch.aten.view %13821, %13876 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11428 = torch.constant.int 4
    %int1_11429 = torch.constant.int 1
    %int128_11430 = torch.constant.int 128
    %13878 = torch.prim.ListConstruct %int4_11428, %2482, %int1_11429, %int128_11430 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13879 = torch.aten.view %13827, %13878 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_11431 = torch.constant.int 4
    %int1_11432 = torch.constant.int 1
    %int128_11433 = torch.constant.int 128
    %13880 = torch.prim.ListConstruct %int4_11431, %2482, %int1_11432, %int128_11433 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %13881 = torch.aten.view %13833, %13880 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %13881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_11434 = torch.constant.int 131072
    %none_11435 = torch.constant.none
    %none_11436 = torch.constant.none
    %cpu_11437 = torch.constant.device "cpu"
    %false_11438 = torch.constant.bool false
    %13882 = torch.aten.arange %int131072_11434, %none_11435, %none_11436, %cpu_11437, %false_11438 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_11439 = torch.constant.int 0
    %int128_11440 = torch.constant.int 128
    %int2_11441 = torch.constant.int 2
    %none_11442 = torch.constant.none
    %none_11443 = torch.constant.none
    %cpu_11444 = torch.constant.device "cpu"
    %false_11445 = torch.constant.bool false
    %13883 = torch.aten.arange.start_step %int0_11439, %int128_11440, %int2_11441, %none_11442, %none_11443, %cpu_11444, %false_11445 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_11446 = torch.constant.int 0
    %int0_11447 = torch.constant.int 0
    %int64_11448 = torch.constant.int 64
    %int1_11449 = torch.constant.int 1
    %13884 = torch.aten.slice.Tensor %13883, %int0_11446, %int0_11447, %int64_11448, %int1_11449 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_11450 = torch.constant.int 6
    %13885 = torch.prims.convert_element_type %13884, %int6_11450 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_11451 = torch.constant.int 128
    %13886 = torch.aten.div.Scalar %13885, %int128_11451 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_11452 = torch.constant.float 5.000000e+05
    %13887 = torch.aten.pow.Scalar %float5.000000e05_11452, %13886 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %13888 = torch.aten.reciprocal %13887 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_11453 = torch.constant.float 1.000000e+00
    %13889 = torch.aten.mul.Scalar %13888, %float1.000000e00_11453 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_11454 = torch.constant.int 131072
    %int1_11455 = torch.constant.int 1
    %13890 = torch.prim.ListConstruct %int131072_11454, %int1_11455 : (!torch.int, !torch.int) -> !torch.list<int>
    %13891 = torch.aten.view %13882, %13890 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %13892 = torch.aten.mul.Tensor %13891, %13889 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %13893 = torch.aten.cos %13892 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %13894 = torch.aten.sin %13892 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %13895 = torch.aten.complex %13893, %13894 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %13896 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13897 = flow.tensor.transfer %13896 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %13898 = torch_c.from_builtin_tensor %13897 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13899 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13900 = flow.tensor.transfer %13899 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %13901 = torch_c.from_builtin_tensor %13900 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13902 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13903 = flow.tensor.transfer %13902 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %13904 = torch_c.from_builtin_tensor %13903 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13905 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13906 = flow.tensor.transfer %13905 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %13907 = torch_c.from_builtin_tensor %13906 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13908 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13909 = flow.tensor.transfer %13908 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %13910 = torch_c.from_builtin_tensor %13909 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13911 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13912 = flow.tensor.transfer %13911 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %13913 = torch_c.from_builtin_tensor %13912 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13914 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13915 = flow.tensor.transfer %13914 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %13916 = torch_c.from_builtin_tensor %13915 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %13917 = torch_c.to_builtin_tensor %13895 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %13918 = flow.tensor.transfer %13917 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %13919 = torch_c.from_builtin_tensor %13918 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_11456 = torch.constant.int 1
    %13920 = torch.aten.size.int %13663, %int1_11456 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11457 = torch.constant.int 0
    %13921 = torch.aten.add.int %int0_11457, %13920 : !torch.int, !torch.int -> !torch.int
    %int0_11458 = torch.constant.int 0
    %int0_11459 = torch.constant.int 0
    %int1_11460 = torch.constant.int 1
    %13922 = torch.aten.slice.Tensor %13898, %int0_11458, %int0_11459, %13921, %int1_11460 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13922, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11461 = torch.constant.int 1
    %int0_11462 = torch.constant.int 0
    %int9223372036854775807_11463 = torch.constant.int 9223372036854775807
    %int1_11464 = torch.constant.int 1
    %13923 = torch.aten.slice.Tensor %13922, %int1_11461, %int0_11462, %int9223372036854775807_11463, %int1_11464 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13923, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11465 = torch.constant.int 0
    %13924 = torch.aten.unsqueeze %13923, %int0_11465 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13924, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11466 = torch.constant.int 2
    %13925 = torch.aten.unsqueeze %13924, %int2_11466 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13925, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11467 = torch.constant.int 3
    %int0_11468 = torch.constant.int 0
    %int9223372036854775807_11469 = torch.constant.int 9223372036854775807
    %int1_11470 = torch.constant.int 1
    %13926 = torch.aten.slice.Tensor %13925, %int3_11467, %int0_11468, %int9223372036854775807_11469, %int1_11470 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13926, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %13927 = torch_c.to_builtin_tensor %13835 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11471 = arith.constant 1 : index
    %dim_11472 = tensor.dim %13927, %c1_11471 : tensor<4x?x4x128xf16>
    %13928 = flow.tensor.bitcast %13927 : tensor<4x?x4x128xf16>{%dim_11472} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11472}
    %13929 = torch_c.from_builtin_tensor %13928 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %13929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %13930 = torch.aten.mul.Tensor %13929, %13926 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %13930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %13931 = torch_c.to_builtin_tensor %13930 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11473 = arith.constant 1 : index
    %dim_11474 = tensor.dim %13931, %c1_11473 : tensor<4x?x4x64xcomplex<f32>>
    %13932 = flow.tensor.bitcast %13931 : tensor<4x?x4x64xcomplex<f32>>{%dim_11474} -> tensor<4x?x4x128xf32>{%dim_11474}
    %13933 = torch_c.from_builtin_tensor %13932 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %13933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11475 = torch.constant.int 5
    %13934 = torch.prims.convert_element_type %13933, %int5_11475 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11476 = torch.constant.int 1
    %13935 = torch.aten.size.int %13669, %int1_11476 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11477 = torch.constant.int 0
    %13936 = torch.aten.add.int %int0_11477, %13935 : !torch.int, !torch.int -> !torch.int
    %int0_11478 = torch.constant.int 0
    %int0_11479 = torch.constant.int 0
    %int1_11480 = torch.constant.int 1
    %13937 = torch.aten.slice.Tensor %13901, %int0_11478, %int0_11479, %13936, %int1_11480 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13937, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11481 = torch.constant.int 1
    %int0_11482 = torch.constant.int 0
    %int9223372036854775807_11483 = torch.constant.int 9223372036854775807
    %int1_11484 = torch.constant.int 1
    %13938 = torch.aten.slice.Tensor %13937, %int1_11481, %int0_11482, %int9223372036854775807_11483, %int1_11484 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13938, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11485 = torch.constant.int 0
    %13939 = torch.aten.unsqueeze %13938, %int0_11485 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13939, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11486 = torch.constant.int 2
    %13940 = torch.aten.unsqueeze %13939, %int2_11486 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13940, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11487 = torch.constant.int 3
    %int0_11488 = torch.constant.int 0
    %int9223372036854775807_11489 = torch.constant.int 9223372036854775807
    %int1_11490 = torch.constant.int 1
    %13941 = torch.aten.slice.Tensor %13940, %int3_11487, %int0_11488, %int9223372036854775807_11489, %int1_11490 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13941, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %13942 = torch_c.to_builtin_tensor %13837 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11491 = arith.constant 1 : index
    %dim_11492 = tensor.dim %13942, %c1_11491 : tensor<4x?x4x128xf16>
    %13943 = flow.tensor.bitcast %13942 : tensor<4x?x4x128xf16>{%dim_11492} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11492}
    %13944 = torch_c.from_builtin_tensor %13943 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %13944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %13945 = torch.aten.mul.Tensor %13944, %13941 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %13945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %13946 = torch_c.to_builtin_tensor %13945 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11493 = arith.constant 1 : index
    %dim_11494 = tensor.dim %13946, %c1_11493 : tensor<4x?x4x64xcomplex<f32>>
    %13947 = flow.tensor.bitcast %13946 : tensor<4x?x4x64xcomplex<f32>>{%dim_11494} -> tensor<4x?x4x128xf32>{%dim_11494}
    %13948 = torch_c.from_builtin_tensor %13947 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %13948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11495 = torch.constant.int 5
    %13949 = torch.prims.convert_element_type %13948, %int5_11495 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11496 = torch.constant.int 1
    %13950 = torch.aten.size.int %13675, %int1_11496 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11497 = torch.constant.int 0
    %13951 = torch.aten.add.int %int0_11497, %13950 : !torch.int, !torch.int -> !torch.int
    %int0_11498 = torch.constant.int 0
    %int0_11499 = torch.constant.int 0
    %int1_11500 = torch.constant.int 1
    %13952 = torch.aten.slice.Tensor %13904, %int0_11498, %int0_11499, %13951, %int1_11500 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13952, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11501 = torch.constant.int 1
    %int0_11502 = torch.constant.int 0
    %int9223372036854775807_11503 = torch.constant.int 9223372036854775807
    %int1_11504 = torch.constant.int 1
    %13953 = torch.aten.slice.Tensor %13952, %int1_11501, %int0_11502, %int9223372036854775807_11503, %int1_11504 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13953, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11505 = torch.constant.int 0
    %13954 = torch.aten.unsqueeze %13953, %int0_11505 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13954, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11506 = torch.constant.int 2
    %13955 = torch.aten.unsqueeze %13954, %int2_11506 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13955, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11507 = torch.constant.int 3
    %int0_11508 = torch.constant.int 0
    %int9223372036854775807_11509 = torch.constant.int 9223372036854775807
    %int1_11510 = torch.constant.int 1
    %13956 = torch.aten.slice.Tensor %13955, %int3_11507, %int0_11508, %int9223372036854775807_11509, %int1_11510 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13956, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %13957 = torch_c.to_builtin_tensor %13839 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11511 = arith.constant 1 : index
    %dim_11512 = tensor.dim %13957, %c1_11511 : tensor<4x?x4x128xf16>
    %13958 = flow.tensor.bitcast %13957 : tensor<4x?x4x128xf16>{%dim_11512} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11512}
    %13959 = torch_c.from_builtin_tensor %13958 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %13959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %13960 = torch.aten.mul.Tensor %13959, %13956 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %13960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %13961 = torch_c.to_builtin_tensor %13960 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11513 = arith.constant 1 : index
    %dim_11514 = tensor.dim %13961, %c1_11513 : tensor<4x?x4x64xcomplex<f32>>
    %13962 = flow.tensor.bitcast %13961 : tensor<4x?x4x64xcomplex<f32>>{%dim_11514} -> tensor<4x?x4x128xf32>{%dim_11514}
    %13963 = torch_c.from_builtin_tensor %13962 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %13963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11515 = torch.constant.int 5
    %13964 = torch.prims.convert_element_type %13963, %int5_11515 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11516 = torch.constant.int 1
    %13965 = torch.aten.size.int %13681, %int1_11516 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11517 = torch.constant.int 0
    %13966 = torch.aten.add.int %int0_11517, %13965 : !torch.int, !torch.int -> !torch.int
    %int0_11518 = torch.constant.int 0
    %int0_11519 = torch.constant.int 0
    %int1_11520 = torch.constant.int 1
    %13967 = torch.aten.slice.Tensor %13907, %int0_11518, %int0_11519, %13966, %int1_11520 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13967, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11521 = torch.constant.int 1
    %int0_11522 = torch.constant.int 0
    %int9223372036854775807_11523 = torch.constant.int 9223372036854775807
    %int1_11524 = torch.constant.int 1
    %13968 = torch.aten.slice.Tensor %13967, %int1_11521, %int0_11522, %int9223372036854775807_11523, %int1_11524 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13968, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11525 = torch.constant.int 0
    %13969 = torch.aten.unsqueeze %13968, %int0_11525 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13969, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11526 = torch.constant.int 2
    %13970 = torch.aten.unsqueeze %13969, %int2_11526 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13970, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11527 = torch.constant.int 3
    %int0_11528 = torch.constant.int 0
    %int9223372036854775807_11529 = torch.constant.int 9223372036854775807
    %int1_11530 = torch.constant.int 1
    %13971 = torch.aten.slice.Tensor %13970, %int3_11527, %int0_11528, %int9223372036854775807_11529, %int1_11530 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13971, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %13972 = torch_c.to_builtin_tensor %13841 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11531 = arith.constant 1 : index
    %dim_11532 = tensor.dim %13972, %c1_11531 : tensor<4x?x4x128xf16>
    %13973 = flow.tensor.bitcast %13972 : tensor<4x?x4x128xf16>{%dim_11532} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11532}
    %13974 = torch_c.from_builtin_tensor %13973 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %13974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %13975 = torch.aten.mul.Tensor %13974, %13971 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %13975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %13976 = torch_c.to_builtin_tensor %13975 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11533 = arith.constant 1 : index
    %dim_11534 = tensor.dim %13976, %c1_11533 : tensor<4x?x4x64xcomplex<f32>>
    %13977 = flow.tensor.bitcast %13976 : tensor<4x?x4x64xcomplex<f32>>{%dim_11534} -> tensor<4x?x4x128xf32>{%dim_11534}
    %13978 = torch_c.from_builtin_tensor %13977 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %13978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11535 = torch.constant.int 5
    %13979 = torch.prims.convert_element_type %13978, %int5_11535 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11536 = torch.constant.int 1
    %13980 = torch.aten.size.int %13687, %int1_11536 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11537 = torch.constant.int 0
    %13981 = torch.aten.add.int %int0_11537, %13980 : !torch.int, !torch.int -> !torch.int
    %int0_11538 = torch.constant.int 0
    %int0_11539 = torch.constant.int 0
    %int1_11540 = torch.constant.int 1
    %13982 = torch.aten.slice.Tensor %13910, %int0_11538, %int0_11539, %13981, %int1_11540 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13982, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11541 = torch.constant.int 1
    %int0_11542 = torch.constant.int 0
    %int9223372036854775807_11543 = torch.constant.int 9223372036854775807
    %int1_11544 = torch.constant.int 1
    %13983 = torch.aten.slice.Tensor %13982, %int1_11541, %int0_11542, %int9223372036854775807_11543, %int1_11544 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13983, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11545 = torch.constant.int 0
    %13984 = torch.aten.unsqueeze %13983, %int0_11545 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13984, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11546 = torch.constant.int 2
    %13985 = torch.aten.unsqueeze %13984, %int2_11546 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13985, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11547 = torch.constant.int 3
    %int0_11548 = torch.constant.int 0
    %int9223372036854775807_11549 = torch.constant.int 9223372036854775807
    %int1_11550 = torch.constant.int 1
    %13986 = torch.aten.slice.Tensor %13985, %int3_11547, %int0_11548, %int9223372036854775807_11549, %int1_11550 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %13986, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %13987 = torch_c.to_builtin_tensor %13843 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11551 = arith.constant 1 : index
    %dim_11552 = tensor.dim %13987, %c1_11551 : tensor<4x?x4x128xf16>
    %13988 = flow.tensor.bitcast %13987 : tensor<4x?x4x128xf16>{%dim_11552} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11552}
    %13989 = torch_c.from_builtin_tensor %13988 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %13989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %13990 = torch.aten.mul.Tensor %13989, %13986 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %13990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %13991 = torch_c.to_builtin_tensor %13990 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11553 = arith.constant 1 : index
    %dim_11554 = tensor.dim %13991, %c1_11553 : tensor<4x?x4x64xcomplex<f32>>
    %13992 = flow.tensor.bitcast %13991 : tensor<4x?x4x64xcomplex<f32>>{%dim_11554} -> tensor<4x?x4x128xf32>{%dim_11554}
    %13993 = torch_c.from_builtin_tensor %13992 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %13993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11555 = torch.constant.int 5
    %13994 = torch.prims.convert_element_type %13993, %int5_11555 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %13994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11556 = torch.constant.int 1
    %13995 = torch.aten.size.int %13693, %int1_11556 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11557 = torch.constant.int 0
    %13996 = torch.aten.add.int %int0_11557, %13995 : !torch.int, !torch.int -> !torch.int
    %int0_11558 = torch.constant.int 0
    %int0_11559 = torch.constant.int 0
    %int1_11560 = torch.constant.int 1
    %13997 = torch.aten.slice.Tensor %13913, %int0_11558, %int0_11559, %13996, %int1_11560 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13997, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11561 = torch.constant.int 1
    %int0_11562 = torch.constant.int 0
    %int9223372036854775807_11563 = torch.constant.int 9223372036854775807
    %int1_11564 = torch.constant.int 1
    %13998 = torch.aten.slice.Tensor %13997, %int1_11561, %int0_11562, %int9223372036854775807_11563, %int1_11564 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %13998, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11565 = torch.constant.int 0
    %13999 = torch.aten.unsqueeze %13998, %int0_11565 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %13999, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11566 = torch.constant.int 2
    %14000 = torch.aten.unsqueeze %13999, %int2_11566 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14000, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11567 = torch.constant.int 3
    %int0_11568 = torch.constant.int 0
    %int9223372036854775807_11569 = torch.constant.int 9223372036854775807
    %int1_11570 = torch.constant.int 1
    %14001 = torch.aten.slice.Tensor %14000, %int3_11567, %int0_11568, %int9223372036854775807_11569, %int1_11570 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14001, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14002 = torch_c.to_builtin_tensor %13845 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11571 = arith.constant 1 : index
    %dim_11572 = tensor.dim %14002, %c1_11571 : tensor<4x?x4x128xf16>
    %14003 = flow.tensor.bitcast %14002 : tensor<4x?x4x128xf16>{%dim_11572} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11572}
    %14004 = torch_c.from_builtin_tensor %14003 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %14004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %14005 = torch.aten.mul.Tensor %14004, %14001 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %14005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %14006 = torch_c.to_builtin_tensor %14005 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11573 = arith.constant 1 : index
    %dim_11574 = tensor.dim %14006, %c1_11573 : tensor<4x?x4x64xcomplex<f32>>
    %14007 = flow.tensor.bitcast %14006 : tensor<4x?x4x64xcomplex<f32>>{%dim_11574} -> tensor<4x?x4x128xf32>{%dim_11574}
    %14008 = torch_c.from_builtin_tensor %14007 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %14008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11575 = torch.constant.int 5
    %14009 = torch.prims.convert_element_type %14008, %int5_11575 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11576 = torch.constant.int 1
    %14010 = torch.aten.size.int %13699, %int1_11576 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11577 = torch.constant.int 0
    %14011 = torch.aten.add.int %int0_11577, %14010 : !torch.int, !torch.int -> !torch.int
    %int0_11578 = torch.constant.int 0
    %int0_11579 = torch.constant.int 0
    %int1_11580 = torch.constant.int 1
    %14012 = torch.aten.slice.Tensor %13916, %int0_11578, %int0_11579, %14011, %int1_11580 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14012, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11581 = torch.constant.int 1
    %int0_11582 = torch.constant.int 0
    %int9223372036854775807_11583 = torch.constant.int 9223372036854775807
    %int1_11584 = torch.constant.int 1
    %14013 = torch.aten.slice.Tensor %14012, %int1_11581, %int0_11582, %int9223372036854775807_11583, %int1_11584 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14013, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11585 = torch.constant.int 0
    %14014 = torch.aten.unsqueeze %14013, %int0_11585 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14014, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11586 = torch.constant.int 2
    %14015 = torch.aten.unsqueeze %14014, %int2_11586 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14015, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11587 = torch.constant.int 3
    %int0_11588 = torch.constant.int 0
    %int9223372036854775807_11589 = torch.constant.int 9223372036854775807
    %int1_11590 = torch.constant.int 1
    %14016 = torch.aten.slice.Tensor %14015, %int3_11587, %int0_11588, %int9223372036854775807_11589, %int1_11590 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14016, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14017 = torch_c.to_builtin_tensor %13847 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11591 = arith.constant 1 : index
    %dim_11592 = tensor.dim %14017, %c1_11591 : tensor<4x?x4x128xf16>
    %14018 = flow.tensor.bitcast %14017 : tensor<4x?x4x128xf16>{%dim_11592} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11592}
    %14019 = torch_c.from_builtin_tensor %14018 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %14019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %14020 = torch.aten.mul.Tensor %14019, %14016 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %14020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %14021 = torch_c.to_builtin_tensor %14020 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11593 = arith.constant 1 : index
    %dim_11594 = tensor.dim %14021, %c1_11593 : tensor<4x?x4x64xcomplex<f32>>
    %14022 = flow.tensor.bitcast %14021 : tensor<4x?x4x64xcomplex<f32>>{%dim_11594} -> tensor<4x?x4x128xf32>{%dim_11594}
    %14023 = torch_c.from_builtin_tensor %14022 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %14023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11595 = torch.constant.int 5
    %14024 = torch.prims.convert_element_type %14023, %int5_11595 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_11596 = torch.constant.int 1
    %14025 = torch.aten.size.int %13705, %int1_11596 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_11597 = torch.constant.int 0
    %14026 = torch.aten.add.int %int0_11597, %14025 : !torch.int, !torch.int -> !torch.int
    %int0_11598 = torch.constant.int 0
    %int0_11599 = torch.constant.int 0
    %int1_11600 = torch.constant.int 1
    %14027 = torch.aten.slice.Tensor %13919, %int0_11598, %int0_11599, %14026, %int1_11600 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14027, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11601 = torch.constant.int 1
    %int0_11602 = torch.constant.int 0
    %int9223372036854775807_11603 = torch.constant.int 9223372036854775807
    %int1_11604 = torch.constant.int 1
    %14028 = torch.aten.slice.Tensor %14027, %int1_11601, %int0_11602, %int9223372036854775807_11603, %int1_11604 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14028, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11605 = torch.constant.int 0
    %14029 = torch.aten.unsqueeze %14028, %int0_11605 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14029, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11606 = torch.constant.int 2
    %14030 = torch.aten.unsqueeze %14029, %int2_11606 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14030, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11607 = torch.constant.int 3
    %int0_11608 = torch.constant.int 0
    %int9223372036854775807_11609 = torch.constant.int 9223372036854775807
    %int1_11610 = torch.constant.int 1
    %14031 = torch.aten.slice.Tensor %14030, %int3_11607, %int0_11608, %int9223372036854775807_11609, %int1_11610 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14031, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14032 = torch_c.to_builtin_tensor %13849 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_11611 = arith.constant 1 : index
    %dim_11612 = tensor.dim %14032, %c1_11611 : tensor<4x?x4x128xf16>
    %14033 = flow.tensor.bitcast %14032 : tensor<4x?x4x128xf16>{%dim_11612} -> tensor<4x?x4x64xcomplex<f16>>{%dim_11612}
    %14034 = torch_c.from_builtin_tensor %14033 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %14034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %14035 = torch.aten.mul.Tensor %14034, %14031 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %14035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %14036 = torch_c.to_builtin_tensor %14035 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_11613 = arith.constant 1 : index
    %dim_11614 = tensor.dim %14036, %c1_11613 : tensor<4x?x4x64xcomplex<f32>>
    %14037 = flow.tensor.bitcast %14036 : tensor<4x?x4x64xcomplex<f32>>{%dim_11614} -> tensor<4x?x4x128xf32>{%dim_11614}
    %14038 = torch_c.from_builtin_tensor %14037 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %14038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_11615 = torch.constant.int 5
    %14039 = torch.prims.convert_element_type %14038, %int5_11615 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_11616 = torch.constant.int 131072
    %none_11617 = torch.constant.none
    %none_11618 = torch.constant.none
    %cpu_11619 = torch.constant.device "cpu"
    %false_11620 = torch.constant.bool false
    %14040 = torch.aten.arange %int131072_11616, %none_11617, %none_11618, %cpu_11619, %false_11620 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_11621 = torch.constant.int 0
    %int128_11622 = torch.constant.int 128
    %int2_11623 = torch.constant.int 2
    %none_11624 = torch.constant.none
    %none_11625 = torch.constant.none
    %cpu_11626 = torch.constant.device "cpu"
    %false_11627 = torch.constant.bool false
    %14041 = torch.aten.arange.start_step %int0_11621, %int128_11622, %int2_11623, %none_11624, %none_11625, %cpu_11626, %false_11627 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_11628 = torch.constant.int 0
    %int0_11629 = torch.constant.int 0
    %int64_11630 = torch.constant.int 64
    %int1_11631 = torch.constant.int 1
    %14042 = torch.aten.slice.Tensor %14041, %int0_11628, %int0_11629, %int64_11630, %int1_11631 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_11632 = torch.constant.int 6
    %14043 = torch.prims.convert_element_type %14042, %int6_11632 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_11633 = torch.constant.int 128
    %14044 = torch.aten.div.Scalar %14043, %int128_11633 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_11634 = torch.constant.float 5.000000e+05
    %14045 = torch.aten.pow.Scalar %float5.000000e05_11634, %14044 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %14046 = torch.aten.reciprocal %14045 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_11635 = torch.constant.float 1.000000e+00
    %14047 = torch.aten.mul.Scalar %14046, %float1.000000e00_11635 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_11636 = torch.constant.int 131072
    %int1_11637 = torch.constant.int 1
    %14048 = torch.prim.ListConstruct %int131072_11636, %int1_11637 : (!torch.int, !torch.int) -> !torch.list<int>
    %14049 = torch.aten.view %14040, %14048 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %14050 = torch.aten.mul.Tensor %14049, %14047 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %14051 = torch.aten.cos %14050 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %14052 = torch.aten.sin %14050 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %14053 = torch.aten.complex %14051, %14052 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %14054 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14055 = flow.tensor.transfer %14054 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %14056 = torch_c.from_builtin_tensor %14055 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14057 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14058 = flow.tensor.transfer %14057 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %14059 = torch_c.from_builtin_tensor %14058 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14060 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14061 = flow.tensor.transfer %14060 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %14062 = torch_c.from_builtin_tensor %14061 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14063 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14064 = flow.tensor.transfer %14063 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %14065 = torch_c.from_builtin_tensor %14064 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14066 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14067 = flow.tensor.transfer %14066 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %14068 = torch_c.from_builtin_tensor %14067 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14069 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14070 = flow.tensor.transfer %14069 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %14071 = torch_c.from_builtin_tensor %14070 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14072 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14073 = flow.tensor.transfer %14072 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %14074 = torch_c.from_builtin_tensor %14073 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %14075 = torch_c.to_builtin_tensor %14053 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %14076 = flow.tensor.transfer %14075 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %14077 = torch_c.from_builtin_tensor %14076 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_11638 = torch.constant.int 1
    %14078 = torch.aten.size.int %13727, %int1_11638 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11639 = torch.constant.int 0
    %14079 = torch.aten.add.int %int0_11639, %14078 : !torch.int, !torch.int -> !torch.int
    %int0_11640 = torch.constant.int 0
    %int0_11641 = torch.constant.int 0
    %int1_11642 = torch.constant.int 1
    %14080 = torch.aten.slice.Tensor %14056, %int0_11640, %int0_11641, %14079, %int1_11642 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14080, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11643 = torch.constant.int 1
    %int0_11644 = torch.constant.int 0
    %int9223372036854775807_11645 = torch.constant.int 9223372036854775807
    %int1_11646 = torch.constant.int 1
    %14081 = torch.aten.slice.Tensor %14080, %int1_11643, %int0_11644, %int9223372036854775807_11645, %int1_11646 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14081, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11647 = torch.constant.int 0
    %14082 = torch.aten.unsqueeze %14081, %int0_11647 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14082, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11648 = torch.constant.int 2
    %14083 = torch.aten.unsqueeze %14082, %int2_11648 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14083, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11649 = torch.constant.int 3
    %int0_11650 = torch.constant.int 0
    %int9223372036854775807_11651 = torch.constant.int 9223372036854775807
    %int1_11652 = torch.constant.int 1
    %14084 = torch.aten.slice.Tensor %14083, %int3_11649, %int0_11650, %int9223372036854775807_11651, %int1_11652 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14084, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14085 = torch_c.to_builtin_tensor %13851 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11653 = arith.constant 1 : index
    %dim_11654 = tensor.dim %14085, %c1_11653 : tensor<4x?x1x128xf16>
    %14086 = flow.tensor.bitcast %14085 : tensor<4x?x1x128xf16>{%dim_11654} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11654}
    %14087 = torch_c.from_builtin_tensor %14086 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14088 = torch.aten.mul.Tensor %14087, %14084 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14089 = torch_c.to_builtin_tensor %14088 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11655 = arith.constant 1 : index
    %dim_11656 = tensor.dim %14089, %c1_11655 : tensor<4x?x1x64xcomplex<f32>>
    %14090 = flow.tensor.bitcast %14089 : tensor<4x?x1x64xcomplex<f32>>{%dim_11656} -> tensor<4x?x1x128xf32>{%dim_11656}
    %14091 = torch_c.from_builtin_tensor %14090 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11657 = torch.constant.int 5
    %14092 = torch.prims.convert_element_type %14091, %int5_11657 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11658 = torch.constant.int 1
    %14093 = torch.aten.size.int %13733, %int1_11658 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11659 = torch.constant.int 0
    %14094 = torch.aten.add.int %int0_11659, %14093 : !torch.int, !torch.int -> !torch.int
    %int0_11660 = torch.constant.int 0
    %int0_11661 = torch.constant.int 0
    %int1_11662 = torch.constant.int 1
    %14095 = torch.aten.slice.Tensor %14059, %int0_11660, %int0_11661, %14094, %int1_11662 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14095, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11663 = torch.constant.int 1
    %int0_11664 = torch.constant.int 0
    %int9223372036854775807_11665 = torch.constant.int 9223372036854775807
    %int1_11666 = torch.constant.int 1
    %14096 = torch.aten.slice.Tensor %14095, %int1_11663, %int0_11664, %int9223372036854775807_11665, %int1_11666 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14096, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11667 = torch.constant.int 0
    %14097 = torch.aten.unsqueeze %14096, %int0_11667 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14097, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11668 = torch.constant.int 2
    %14098 = torch.aten.unsqueeze %14097, %int2_11668 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14098, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11669 = torch.constant.int 3
    %int0_11670 = torch.constant.int 0
    %int9223372036854775807_11671 = torch.constant.int 9223372036854775807
    %int1_11672 = torch.constant.int 1
    %14099 = torch.aten.slice.Tensor %14098, %int3_11669, %int0_11670, %int9223372036854775807_11671, %int1_11672 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14099, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14100 = torch_c.to_builtin_tensor %13853 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11673 = arith.constant 1 : index
    %dim_11674 = tensor.dim %14100, %c1_11673 : tensor<4x?x1x128xf16>
    %14101 = flow.tensor.bitcast %14100 : tensor<4x?x1x128xf16>{%dim_11674} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11674}
    %14102 = torch_c.from_builtin_tensor %14101 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14103 = torch.aten.mul.Tensor %14102, %14099 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14104 = torch_c.to_builtin_tensor %14103 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11675 = arith.constant 1 : index
    %dim_11676 = tensor.dim %14104, %c1_11675 : tensor<4x?x1x64xcomplex<f32>>
    %14105 = flow.tensor.bitcast %14104 : tensor<4x?x1x64xcomplex<f32>>{%dim_11676} -> tensor<4x?x1x128xf32>{%dim_11676}
    %14106 = torch_c.from_builtin_tensor %14105 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11677 = torch.constant.int 5
    %14107 = torch.prims.convert_element_type %14106, %int5_11677 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11678 = torch.constant.int 1
    %14108 = torch.aten.size.int %13739, %int1_11678 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11679 = torch.constant.int 0
    %14109 = torch.aten.add.int %int0_11679, %14108 : !torch.int, !torch.int -> !torch.int
    %int0_11680 = torch.constant.int 0
    %int0_11681 = torch.constant.int 0
    %int1_11682 = torch.constant.int 1
    %14110 = torch.aten.slice.Tensor %14062, %int0_11680, %int0_11681, %14109, %int1_11682 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14110, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11683 = torch.constant.int 1
    %int0_11684 = torch.constant.int 0
    %int9223372036854775807_11685 = torch.constant.int 9223372036854775807
    %int1_11686 = torch.constant.int 1
    %14111 = torch.aten.slice.Tensor %14110, %int1_11683, %int0_11684, %int9223372036854775807_11685, %int1_11686 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14111, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11687 = torch.constant.int 0
    %14112 = torch.aten.unsqueeze %14111, %int0_11687 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14112, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11688 = torch.constant.int 2
    %14113 = torch.aten.unsqueeze %14112, %int2_11688 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14113, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11689 = torch.constant.int 3
    %int0_11690 = torch.constant.int 0
    %int9223372036854775807_11691 = torch.constant.int 9223372036854775807
    %int1_11692 = torch.constant.int 1
    %14114 = torch.aten.slice.Tensor %14113, %int3_11689, %int0_11690, %int9223372036854775807_11691, %int1_11692 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14114, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14115 = torch_c.to_builtin_tensor %13855 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11693 = arith.constant 1 : index
    %dim_11694 = tensor.dim %14115, %c1_11693 : tensor<4x?x1x128xf16>
    %14116 = flow.tensor.bitcast %14115 : tensor<4x?x1x128xf16>{%dim_11694} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11694}
    %14117 = torch_c.from_builtin_tensor %14116 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14118 = torch.aten.mul.Tensor %14117, %14114 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14119 = torch_c.to_builtin_tensor %14118 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11695 = arith.constant 1 : index
    %dim_11696 = tensor.dim %14119, %c1_11695 : tensor<4x?x1x64xcomplex<f32>>
    %14120 = flow.tensor.bitcast %14119 : tensor<4x?x1x64xcomplex<f32>>{%dim_11696} -> tensor<4x?x1x128xf32>{%dim_11696}
    %14121 = torch_c.from_builtin_tensor %14120 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11697 = torch.constant.int 5
    %14122 = torch.prims.convert_element_type %14121, %int5_11697 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11698 = torch.constant.int 1
    %14123 = torch.aten.size.int %13745, %int1_11698 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11699 = torch.constant.int 0
    %14124 = torch.aten.add.int %int0_11699, %14123 : !torch.int, !torch.int -> !torch.int
    %int0_11700 = torch.constant.int 0
    %int0_11701 = torch.constant.int 0
    %int1_11702 = torch.constant.int 1
    %14125 = torch.aten.slice.Tensor %14065, %int0_11700, %int0_11701, %14124, %int1_11702 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14125, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11703 = torch.constant.int 1
    %int0_11704 = torch.constant.int 0
    %int9223372036854775807_11705 = torch.constant.int 9223372036854775807
    %int1_11706 = torch.constant.int 1
    %14126 = torch.aten.slice.Tensor %14125, %int1_11703, %int0_11704, %int9223372036854775807_11705, %int1_11706 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14126, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11707 = torch.constant.int 0
    %14127 = torch.aten.unsqueeze %14126, %int0_11707 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14127, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11708 = torch.constant.int 2
    %14128 = torch.aten.unsqueeze %14127, %int2_11708 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14128, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11709 = torch.constant.int 3
    %int0_11710 = torch.constant.int 0
    %int9223372036854775807_11711 = torch.constant.int 9223372036854775807
    %int1_11712 = torch.constant.int 1
    %14129 = torch.aten.slice.Tensor %14128, %int3_11709, %int0_11710, %int9223372036854775807_11711, %int1_11712 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14129, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14130 = torch_c.to_builtin_tensor %13857 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11713 = arith.constant 1 : index
    %dim_11714 = tensor.dim %14130, %c1_11713 : tensor<4x?x1x128xf16>
    %14131 = flow.tensor.bitcast %14130 : tensor<4x?x1x128xf16>{%dim_11714} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11714}
    %14132 = torch_c.from_builtin_tensor %14131 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14133 = torch.aten.mul.Tensor %14132, %14129 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14134 = torch_c.to_builtin_tensor %14133 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11715 = arith.constant 1 : index
    %dim_11716 = tensor.dim %14134, %c1_11715 : tensor<4x?x1x64xcomplex<f32>>
    %14135 = flow.tensor.bitcast %14134 : tensor<4x?x1x64xcomplex<f32>>{%dim_11716} -> tensor<4x?x1x128xf32>{%dim_11716}
    %14136 = torch_c.from_builtin_tensor %14135 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11717 = torch.constant.int 5
    %14137 = torch.prims.convert_element_type %14136, %int5_11717 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11718 = torch.constant.int 1
    %14138 = torch.aten.size.int %13751, %int1_11718 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11719 = torch.constant.int 0
    %14139 = torch.aten.add.int %int0_11719, %14138 : !torch.int, !torch.int -> !torch.int
    %int0_11720 = torch.constant.int 0
    %int0_11721 = torch.constant.int 0
    %int1_11722 = torch.constant.int 1
    %14140 = torch.aten.slice.Tensor %14068, %int0_11720, %int0_11721, %14139, %int1_11722 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14140, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11723 = torch.constant.int 1
    %int0_11724 = torch.constant.int 0
    %int9223372036854775807_11725 = torch.constant.int 9223372036854775807
    %int1_11726 = torch.constant.int 1
    %14141 = torch.aten.slice.Tensor %14140, %int1_11723, %int0_11724, %int9223372036854775807_11725, %int1_11726 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14141, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11727 = torch.constant.int 0
    %14142 = torch.aten.unsqueeze %14141, %int0_11727 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14142, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11728 = torch.constant.int 2
    %14143 = torch.aten.unsqueeze %14142, %int2_11728 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14143, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11729 = torch.constant.int 3
    %int0_11730 = torch.constant.int 0
    %int9223372036854775807_11731 = torch.constant.int 9223372036854775807
    %int1_11732 = torch.constant.int 1
    %14144 = torch.aten.slice.Tensor %14143, %int3_11729, %int0_11730, %int9223372036854775807_11731, %int1_11732 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14144, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14145 = torch_c.to_builtin_tensor %13859 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11733 = arith.constant 1 : index
    %dim_11734 = tensor.dim %14145, %c1_11733 : tensor<4x?x1x128xf16>
    %14146 = flow.tensor.bitcast %14145 : tensor<4x?x1x128xf16>{%dim_11734} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11734}
    %14147 = torch_c.from_builtin_tensor %14146 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14148 = torch.aten.mul.Tensor %14147, %14144 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14149 = torch_c.to_builtin_tensor %14148 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11735 = arith.constant 1 : index
    %dim_11736 = tensor.dim %14149, %c1_11735 : tensor<4x?x1x64xcomplex<f32>>
    %14150 = flow.tensor.bitcast %14149 : tensor<4x?x1x64xcomplex<f32>>{%dim_11736} -> tensor<4x?x1x128xf32>{%dim_11736}
    %14151 = torch_c.from_builtin_tensor %14150 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11737 = torch.constant.int 5
    %14152 = torch.prims.convert_element_type %14151, %int5_11737 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11738 = torch.constant.int 1
    %14153 = torch.aten.size.int %13757, %int1_11738 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11739 = torch.constant.int 0
    %14154 = torch.aten.add.int %int0_11739, %14153 : !torch.int, !torch.int -> !torch.int
    %int0_11740 = torch.constant.int 0
    %int0_11741 = torch.constant.int 0
    %int1_11742 = torch.constant.int 1
    %14155 = torch.aten.slice.Tensor %14071, %int0_11740, %int0_11741, %14154, %int1_11742 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14155, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11743 = torch.constant.int 1
    %int0_11744 = torch.constant.int 0
    %int9223372036854775807_11745 = torch.constant.int 9223372036854775807
    %int1_11746 = torch.constant.int 1
    %14156 = torch.aten.slice.Tensor %14155, %int1_11743, %int0_11744, %int9223372036854775807_11745, %int1_11746 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14156, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11747 = torch.constant.int 0
    %14157 = torch.aten.unsqueeze %14156, %int0_11747 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14157, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11748 = torch.constant.int 2
    %14158 = torch.aten.unsqueeze %14157, %int2_11748 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14158, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11749 = torch.constant.int 3
    %int0_11750 = torch.constant.int 0
    %int9223372036854775807_11751 = torch.constant.int 9223372036854775807
    %int1_11752 = torch.constant.int 1
    %14159 = torch.aten.slice.Tensor %14158, %int3_11749, %int0_11750, %int9223372036854775807_11751, %int1_11752 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14159, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14160 = torch_c.to_builtin_tensor %13861 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11753 = arith.constant 1 : index
    %dim_11754 = tensor.dim %14160, %c1_11753 : tensor<4x?x1x128xf16>
    %14161 = flow.tensor.bitcast %14160 : tensor<4x?x1x128xf16>{%dim_11754} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11754}
    %14162 = torch_c.from_builtin_tensor %14161 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14163 = torch.aten.mul.Tensor %14162, %14159 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14164 = torch_c.to_builtin_tensor %14163 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11755 = arith.constant 1 : index
    %dim_11756 = tensor.dim %14164, %c1_11755 : tensor<4x?x1x64xcomplex<f32>>
    %14165 = flow.tensor.bitcast %14164 : tensor<4x?x1x64xcomplex<f32>>{%dim_11756} -> tensor<4x?x1x128xf32>{%dim_11756}
    %14166 = torch_c.from_builtin_tensor %14165 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11757 = torch.constant.int 5
    %14167 = torch.prims.convert_element_type %14166, %int5_11757 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11758 = torch.constant.int 1
    %14168 = torch.aten.size.int %13763, %int1_11758 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11759 = torch.constant.int 0
    %14169 = torch.aten.add.int %int0_11759, %14168 : !torch.int, !torch.int -> !torch.int
    %int0_11760 = torch.constant.int 0
    %int0_11761 = torch.constant.int 0
    %int1_11762 = torch.constant.int 1
    %14170 = torch.aten.slice.Tensor %14074, %int0_11760, %int0_11761, %14169, %int1_11762 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14170, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11763 = torch.constant.int 1
    %int0_11764 = torch.constant.int 0
    %int9223372036854775807_11765 = torch.constant.int 9223372036854775807
    %int1_11766 = torch.constant.int 1
    %14171 = torch.aten.slice.Tensor %14170, %int1_11763, %int0_11764, %int9223372036854775807_11765, %int1_11766 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14171, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11767 = torch.constant.int 0
    %14172 = torch.aten.unsqueeze %14171, %int0_11767 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14172, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11768 = torch.constant.int 2
    %14173 = torch.aten.unsqueeze %14172, %int2_11768 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14173, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11769 = torch.constant.int 3
    %int0_11770 = torch.constant.int 0
    %int9223372036854775807_11771 = torch.constant.int 9223372036854775807
    %int1_11772 = torch.constant.int 1
    %14174 = torch.aten.slice.Tensor %14173, %int3_11769, %int0_11770, %int9223372036854775807_11771, %int1_11772 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14174, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14175 = torch_c.to_builtin_tensor %13863 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11773 = arith.constant 1 : index
    %dim_11774 = tensor.dim %14175, %c1_11773 : tensor<4x?x1x128xf16>
    %14176 = flow.tensor.bitcast %14175 : tensor<4x?x1x128xf16>{%dim_11774} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11774}
    %14177 = torch_c.from_builtin_tensor %14176 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14178 = torch.aten.mul.Tensor %14177, %14174 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14179 = torch_c.to_builtin_tensor %14178 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11775 = arith.constant 1 : index
    %dim_11776 = tensor.dim %14179, %c1_11775 : tensor<4x?x1x64xcomplex<f32>>
    %14180 = flow.tensor.bitcast %14179 : tensor<4x?x1x64xcomplex<f32>>{%dim_11776} -> tensor<4x?x1x128xf32>{%dim_11776}
    %14181 = torch_c.from_builtin_tensor %14180 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11777 = torch.constant.int 5
    %14182 = torch.prims.convert_element_type %14181, %int5_11777 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_11778 = torch.constant.int 1
    %14183 = torch.aten.size.int %13769, %int1_11778 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_11779 = torch.constant.int 0
    %14184 = torch.aten.add.int %int0_11779, %14183 : !torch.int, !torch.int -> !torch.int
    %int0_11780 = torch.constant.int 0
    %int0_11781 = torch.constant.int 0
    %int1_11782 = torch.constant.int 1
    %14185 = torch.aten.slice.Tensor %14077, %int0_11780, %int0_11781, %14184, %int1_11782 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14185, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_11783 = torch.constant.int 1
    %int0_11784 = torch.constant.int 0
    %int9223372036854775807_11785 = torch.constant.int 9223372036854775807
    %int1_11786 = torch.constant.int 1
    %14186 = torch.aten.slice.Tensor %14185, %int1_11783, %int0_11784, %int9223372036854775807_11785, %int1_11786 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %14186, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_11787 = torch.constant.int 0
    %14187 = torch.aten.unsqueeze %14186, %int0_11787 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %14187, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_11788 = torch.constant.int 2
    %14188 = torch.aten.unsqueeze %14187, %int2_11788 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14188, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_11789 = torch.constant.int 3
    %int0_11790 = torch.constant.int 0
    %int9223372036854775807_11791 = torch.constant.int 9223372036854775807
    %int1_11792 = torch.constant.int 1
    %14189 = torch.aten.slice.Tensor %14188, %int3_11789, %int0_11790, %int9223372036854775807_11791, %int1_11792 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14189, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %14190 = torch_c.to_builtin_tensor %13865 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_11793 = arith.constant 1 : index
    %dim_11794 = tensor.dim %14190, %c1_11793 : tensor<4x?x1x128xf16>
    %14191 = flow.tensor.bitcast %14190 : tensor<4x?x1x128xf16>{%dim_11794} -> tensor<4x?x1x64xcomplex<f16>>{%dim_11794}
    %14192 = torch_c.from_builtin_tensor %14191 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %14192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %14193 = torch.aten.mul.Tensor %14192, %14189 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %14193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %14194 = torch_c.to_builtin_tensor %14193 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_11795 = arith.constant 1 : index
    %dim_11796 = tensor.dim %14194, %c1_11795 : tensor<4x?x1x64xcomplex<f32>>
    %14195 = flow.tensor.bitcast %14194 : tensor<4x?x1x64xcomplex<f32>>{%dim_11796} -> tensor<4x?x1x128xf32>{%dim_11796}
    %14196 = torch_c.from_builtin_tensor %14195 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %14196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_11797 = torch.constant.int 5
    %14197 = torch.prims.convert_element_type %14196, %int5_11797 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %14197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_11798 = torch.constant.int 64
    %14198 = torch.aten.mul.Scalar %2364, %int64_11798 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14198, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11799 = torch.constant.int 64
    %14199 = torch.aten.mul.Scalar %2367, %int64_11799 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14199, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11800 = torch.constant.int 64
    %14200 = torch.aten.mul.Scalar %2370, %int64_11800 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14200, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11801 = torch.constant.int 64
    %14201 = torch.aten.mul.Scalar %2373, %int64_11801 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14201, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11802 = torch.constant.int 64
    %14202 = torch.aten.mul.Scalar %2376, %int64_11802 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14202, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11803 = torch.constant.int 64
    %14203 = torch.aten.mul.Scalar %2379, %int64_11803 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14203, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11804 = torch.constant.int 64
    %14204 = torch.aten.mul.Scalar %2382, %int64_11804 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14204, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_11805 = torch.constant.int 64
    %14205 = torch.aten.mul.Scalar %2385, %int64_11805 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14205, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12 = torch.constant.int 12
    %int1_11806 = torch.constant.int 1
    %14206 = torch.aten.add.Scalar %14198, %int12, %int1_11806 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14206, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11807 = torch.constant.int 12
    %int1_11808 = torch.constant.int 1
    %14207 = torch.aten.add.Scalar %14199, %int12_11807, %int1_11808 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14207, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11809 = torch.constant.int 12
    %int1_11810 = torch.constant.int 1
    %14208 = torch.aten.add.Scalar %14200, %int12_11809, %int1_11810 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14208, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11811 = torch.constant.int 12
    %int1_11812 = torch.constant.int 1
    %14209 = torch.aten.add.Scalar %14201, %int12_11811, %int1_11812 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14209, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11813 = torch.constant.int 12
    %int1_11814 = torch.constant.int 1
    %14210 = torch.aten.add.Scalar %14202, %int12_11813, %int1_11814 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14210, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11815 = torch.constant.int 12
    %int1_11816 = torch.constant.int 1
    %14211 = torch.aten.add.Scalar %14203, %int12_11815, %int1_11816 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14211, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11817 = torch.constant.int 12
    %int1_11818 = torch.constant.int 1
    %14212 = torch.aten.add.Scalar %14204, %int12_11817, %int1_11818 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14212, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int12_11819 = torch.constant.int 12
    %int1_11820 = torch.constant.int 1
    %14213 = torch.aten.add.Scalar %14205, %int12_11819, %int1_11820 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14213, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_11821 = torch.constant.int 4
    %int16_11822 = torch.constant.int 16
    %int1_11823 = torch.constant.int 1
    %int128_11824 = torch.constant.int 128
    %14214 = torch.prim.ListConstruct %int4_11821, %3095, %int16_11822, %int1_11823, %int128_11824 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14215 = torch.aten.view %14092, %14214 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14215, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11825 = torch.constant.int 4
    %int16_11826 = torch.constant.int 16
    %int1_11827 = torch.constant.int 1
    %int128_11828 = torch.constant.int 128
    %14216 = torch.prim.ListConstruct %int4_11825, %3095, %int16_11826, %int1_11827, %int128_11828 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14217 = torch.aten.view %14107, %14216 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14217, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11829 = torch.constant.int 4
    %int16_11830 = torch.constant.int 16
    %int1_11831 = torch.constant.int 1
    %int128_11832 = torch.constant.int 128
    %14218 = torch.prim.ListConstruct %int4_11829, %3095, %int16_11830, %int1_11831, %int128_11832 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14219 = torch.aten.view %14122, %14218 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14219, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11833 = torch.constant.int 4
    %int16_11834 = torch.constant.int 16
    %int1_11835 = torch.constant.int 1
    %int128_11836 = torch.constant.int 128
    %14220 = torch.prim.ListConstruct %int4_11833, %3095, %int16_11834, %int1_11835, %int128_11836 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14221 = torch.aten.view %14137, %14220 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14221, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11837 = torch.constant.int 4
    %int16_11838 = torch.constant.int 16
    %int1_11839 = torch.constant.int 1
    %int128_11840 = torch.constant.int 128
    %14222 = torch.prim.ListConstruct %int4_11837, %3095, %int16_11838, %int1_11839, %int128_11840 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14223 = torch.aten.view %14152, %14222 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14223, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11841 = torch.constant.int 4
    %int16_11842 = torch.constant.int 16
    %int1_11843 = torch.constant.int 1
    %int128_11844 = torch.constant.int 128
    %14224 = torch.prim.ListConstruct %int4_11841, %3095, %int16_11842, %int1_11843, %int128_11844 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14225 = torch.aten.view %14167, %14224 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14225, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11845 = torch.constant.int 4
    %int16_11846 = torch.constant.int 16
    %int1_11847 = torch.constant.int 1
    %int128_11848 = torch.constant.int 128
    %14226 = torch.prim.ListConstruct %int4_11845, %3095, %int16_11846, %int1_11847, %int128_11848 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14227 = torch.aten.view %14182, %14226 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14227, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11849 = torch.constant.int 4
    %int16_11850 = torch.constant.int 16
    %int1_11851 = torch.constant.int 1
    %int128_11852 = torch.constant.int 128
    %14228 = torch.prim.ListConstruct %int4_11849, %3095, %int16_11850, %int1_11851, %int128_11852 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14229 = torch.aten.view %14197, %14228 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14229, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11853 = torch.constant.int 4
    %14230 = torch.aten.mul.int %int4_11853, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11854 = torch.constant.int 16
    %int1_11855 = torch.constant.int 1
    %int128_11856 = torch.constant.int 128
    %14231 = torch.prim.ListConstruct %14230, %int16_11854, %int1_11855, %int128_11856 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14232 = torch.aten.view %14215, %14231 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14232, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11857 = torch.constant.int 4
    %14233 = torch.aten.mul.int %int4_11857, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11858 = torch.constant.int 16
    %int1_11859 = torch.constant.int 1
    %int128_11860 = torch.constant.int 128
    %14234 = torch.prim.ListConstruct %14233, %int16_11858, %int1_11859, %int128_11860 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14235 = torch.aten.view %14217, %14234 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14235, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11861 = torch.constant.int 4
    %14236 = torch.aten.mul.int %int4_11861, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11862 = torch.constant.int 16
    %int1_11863 = torch.constant.int 1
    %int128_11864 = torch.constant.int 128
    %14237 = torch.prim.ListConstruct %14236, %int16_11862, %int1_11863, %int128_11864 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14238 = torch.aten.view %14219, %14237 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14238, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11865 = torch.constant.int 4
    %14239 = torch.aten.mul.int %int4_11865, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11866 = torch.constant.int 16
    %int1_11867 = torch.constant.int 1
    %int128_11868 = torch.constant.int 128
    %14240 = torch.prim.ListConstruct %14239, %int16_11866, %int1_11867, %int128_11868 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14241 = torch.aten.view %14221, %14240 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14241, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11869 = torch.constant.int 4
    %14242 = torch.aten.mul.int %int4_11869, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11870 = torch.constant.int 16
    %int1_11871 = torch.constant.int 1
    %int128_11872 = torch.constant.int 128
    %14243 = torch.prim.ListConstruct %14242, %int16_11870, %int1_11871, %int128_11872 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14244 = torch.aten.view %14223, %14243 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14244, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11873 = torch.constant.int 4
    %14245 = torch.aten.mul.int %int4_11873, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11874 = torch.constant.int 16
    %int1_11875 = torch.constant.int 1
    %int128_11876 = torch.constant.int 128
    %14246 = torch.prim.ListConstruct %14245, %int16_11874, %int1_11875, %int128_11876 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14247 = torch.aten.view %14225, %14246 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14247, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11877 = torch.constant.int 4
    %14248 = torch.aten.mul.int %int4_11877, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11878 = torch.constant.int 16
    %int1_11879 = torch.constant.int 1
    %int128_11880 = torch.constant.int 128
    %14249 = torch.prim.ListConstruct %14248, %int16_11878, %int1_11879, %int128_11880 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14250 = torch.aten.view %14227, %14249 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14250, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11881 = torch.constant.int 4
    %14251 = torch.aten.mul.int %int4_11881, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11882 = torch.constant.int 16
    %int1_11883 = torch.constant.int 1
    %int128_11884 = torch.constant.int 128
    %14252 = torch.prim.ListConstruct %14251, %int16_11882, %int1_11883, %int128_11884 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14253 = torch.aten.view %14229, %14252 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14253, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11885 = torch.constant.int 4
    %14254 = torch.aten.mul.int %int4_11885, %3095 : !torch.int, !torch.int -> !torch.int
    %14255 = torch.prim.ListConstruct %14254 : (!torch.int) -> !torch.list<int>
    %14256 = torch.aten.view %14206, %14255 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14256, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11886 = torch.constant.int 4
    %14257 = torch.aten.mul.int %int4_11886, %3095 : !torch.int, !torch.int -> !torch.int
    %14258 = torch.prim.ListConstruct %14257 : (!torch.int) -> !torch.list<int>
    %14259 = torch.aten.view %14207, %14258 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14259, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11887 = torch.constant.int 4
    %14260 = torch.aten.mul.int %int4_11887, %3095 : !torch.int, !torch.int -> !torch.int
    %14261 = torch.prim.ListConstruct %14260 : (!torch.int) -> !torch.list<int>
    %14262 = torch.aten.view %14208, %14261 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14262, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11888 = torch.constant.int 4
    %14263 = torch.aten.mul.int %int4_11888, %3095 : !torch.int, !torch.int -> !torch.int
    %14264 = torch.prim.ListConstruct %14263 : (!torch.int) -> !torch.list<int>
    %14265 = torch.aten.view %14209, %14264 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14265, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11889 = torch.constant.int 4
    %14266 = torch.aten.mul.int %int4_11889, %3095 : !torch.int, !torch.int -> !torch.int
    %14267 = torch.prim.ListConstruct %14266 : (!torch.int) -> !torch.list<int>
    %14268 = torch.aten.view %14210, %14267 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14268, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11890 = torch.constant.int 4
    %14269 = torch.aten.mul.int %int4_11890, %3095 : !torch.int, !torch.int -> !torch.int
    %14270 = torch.prim.ListConstruct %14269 : (!torch.int) -> !torch.list<int>
    %14271 = torch.aten.view %14211, %14270 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14271, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11891 = torch.constant.int 4
    %14272 = torch.aten.mul.int %int4_11891, %3095 : !torch.int, !torch.int -> !torch.int
    %14273 = torch.prim.ListConstruct %14272 : (!torch.int) -> !torch.list<int>
    %14274 = torch.aten.view %14212, %14273 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14274, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11892 = torch.constant.int 4
    %14275 = torch.aten.mul.int %int4_11892, %3095 : !torch.int, !torch.int -> !torch.int
    %14276 = torch.prim.ListConstruct %14275 : (!torch.int) -> !torch.list<int>
    %14277 = torch.aten.view %14213, %14276 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14277, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11893 = torch.constant.int 4
    %int16_11894 = torch.constant.int 16
    %int1_11895 = torch.constant.int 1
    %int128_11896 = torch.constant.int 128
    %14278 = torch.prim.ListConstruct %int4_11893, %3095, %int16_11894, %int1_11895, %int128_11896 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14279 = torch.aten.view %13867, %14278 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14279, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11897 = torch.constant.int 4
    %int16_11898 = torch.constant.int 16
    %int1_11899 = torch.constant.int 1
    %int128_11900 = torch.constant.int 128
    %14280 = torch.prim.ListConstruct %int4_11897, %3095, %int16_11898, %int1_11899, %int128_11900 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14281 = torch.aten.view %13869, %14280 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14281, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11901 = torch.constant.int 4
    %int16_11902 = torch.constant.int 16
    %int1_11903 = torch.constant.int 1
    %int128_11904 = torch.constant.int 128
    %14282 = torch.prim.ListConstruct %int4_11901, %3095, %int16_11902, %int1_11903, %int128_11904 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14283 = torch.aten.view %13871, %14282 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14283, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11905 = torch.constant.int 4
    %int16_11906 = torch.constant.int 16
    %int1_11907 = torch.constant.int 1
    %int128_11908 = torch.constant.int 128
    %14284 = torch.prim.ListConstruct %int4_11905, %3095, %int16_11906, %int1_11907, %int128_11908 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14285 = torch.aten.view %13873, %14284 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14285, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11909 = torch.constant.int 4
    %int16_11910 = torch.constant.int 16
    %int1_11911 = torch.constant.int 1
    %int128_11912 = torch.constant.int 128
    %14286 = torch.prim.ListConstruct %int4_11909, %3095, %int16_11910, %int1_11911, %int128_11912 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14287 = torch.aten.view %13875, %14286 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14287, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11913 = torch.constant.int 4
    %int16_11914 = torch.constant.int 16
    %int1_11915 = torch.constant.int 1
    %int128_11916 = torch.constant.int 128
    %14288 = torch.prim.ListConstruct %int4_11913, %3095, %int16_11914, %int1_11915, %int128_11916 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14289 = torch.aten.view %13877, %14288 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14289, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11917 = torch.constant.int 4
    %int16_11918 = torch.constant.int 16
    %int1_11919 = torch.constant.int 1
    %int128_11920 = torch.constant.int 128
    %14290 = torch.prim.ListConstruct %int4_11917, %3095, %int16_11918, %int1_11919, %int128_11920 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14291 = torch.aten.view %13879, %14290 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14291, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11921 = torch.constant.int 4
    %int16_11922 = torch.constant.int 16
    %int1_11923 = torch.constant.int 1
    %int128_11924 = torch.constant.int 128
    %14292 = torch.prim.ListConstruct %int4_11921, %3095, %int16_11922, %int1_11923, %int128_11924 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14293 = torch.aten.view %13881, %14292 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %14293, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_11925 = torch.constant.int 4
    %14294 = torch.aten.mul.int %int4_11925, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11926 = torch.constant.int 16
    %int1_11927 = torch.constant.int 1
    %int128_11928 = torch.constant.int 128
    %14295 = torch.prim.ListConstruct %14294, %int16_11926, %int1_11927, %int128_11928 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14296 = torch.aten.view %14279, %14295 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14296, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11929 = torch.constant.int 4
    %14297 = torch.aten.mul.int %int4_11929, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11930 = torch.constant.int 16
    %int1_11931 = torch.constant.int 1
    %int128_11932 = torch.constant.int 128
    %14298 = torch.prim.ListConstruct %14297, %int16_11930, %int1_11931, %int128_11932 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14299 = torch.aten.view %14281, %14298 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14299, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11933 = torch.constant.int 4
    %14300 = torch.aten.mul.int %int4_11933, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11934 = torch.constant.int 16
    %int1_11935 = torch.constant.int 1
    %int128_11936 = torch.constant.int 128
    %14301 = torch.prim.ListConstruct %14300, %int16_11934, %int1_11935, %int128_11936 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14302 = torch.aten.view %14283, %14301 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14302, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11937 = torch.constant.int 4
    %14303 = torch.aten.mul.int %int4_11937, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11938 = torch.constant.int 16
    %int1_11939 = torch.constant.int 1
    %int128_11940 = torch.constant.int 128
    %14304 = torch.prim.ListConstruct %14303, %int16_11938, %int1_11939, %int128_11940 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14305 = torch.aten.view %14285, %14304 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14305, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11941 = torch.constant.int 4
    %14306 = torch.aten.mul.int %int4_11941, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11942 = torch.constant.int 16
    %int1_11943 = torch.constant.int 1
    %int128_11944 = torch.constant.int 128
    %14307 = torch.prim.ListConstruct %14306, %int16_11942, %int1_11943, %int128_11944 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14308 = torch.aten.view %14287, %14307 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14308, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11945 = torch.constant.int 4
    %14309 = torch.aten.mul.int %int4_11945, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11946 = torch.constant.int 16
    %int1_11947 = torch.constant.int 1
    %int128_11948 = torch.constant.int 128
    %14310 = torch.prim.ListConstruct %14309, %int16_11946, %int1_11947, %int128_11948 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14311 = torch.aten.view %14289, %14310 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14311, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11949 = torch.constant.int 4
    %14312 = torch.aten.mul.int %int4_11949, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11950 = torch.constant.int 16
    %int1_11951 = torch.constant.int 1
    %int128_11952 = torch.constant.int 128
    %14313 = torch.prim.ListConstruct %14312, %int16_11950, %int1_11951, %int128_11952 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14314 = torch.aten.view %14291, %14313 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14314, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_11953 = torch.constant.int 4
    %14315 = torch.aten.mul.int %int4_11953, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_11954 = torch.constant.int 16
    %int1_11955 = torch.constant.int 1
    %int128_11956 = torch.constant.int 128
    %14316 = torch.prim.ListConstruct %14315, %int16_11954, %int1_11955, %int128_11956 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14317 = torch.aten.view %14293, %14316 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14317, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_11957 = torch.constant.int 1
    %int1_11958 = torch.constant.int 1
    %14318 = torch.aten.add.Scalar %14206, %int1_11957, %int1_11958 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14318, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11959 = torch.constant.int 1
    %int1_11960 = torch.constant.int 1
    %14319 = torch.aten.add.Scalar %14207, %int1_11959, %int1_11960 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14319, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11961 = torch.constant.int 1
    %int1_11962 = torch.constant.int 1
    %14320 = torch.aten.add.Scalar %14208, %int1_11961, %int1_11962 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14320, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11963 = torch.constant.int 1
    %int1_11964 = torch.constant.int 1
    %14321 = torch.aten.add.Scalar %14209, %int1_11963, %int1_11964 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14321, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11965 = torch.constant.int 1
    %int1_11966 = torch.constant.int 1
    %14322 = torch.aten.add.Scalar %14210, %int1_11965, %int1_11966 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14322, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11967 = torch.constant.int 1
    %int1_11968 = torch.constant.int 1
    %14323 = torch.aten.add.Scalar %14211, %int1_11967, %int1_11968 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14323, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11969 = torch.constant.int 1
    %int1_11970 = torch.constant.int 1
    %14324 = torch.aten.add.Scalar %14212, %int1_11969, %int1_11970 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14324, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_11971 = torch.constant.int 1
    %int1_11972 = torch.constant.int 1
    %14325 = torch.aten.add.Scalar %14213, %int1_11971, %int1_11972 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %14325, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_11973 = torch.constant.int 4
    %14326 = torch.aten.mul.int %int4_11973, %3095 : !torch.int, !torch.int -> !torch.int
    %14327 = torch.prim.ListConstruct %14326 : (!torch.int) -> !torch.list<int>
    %14328 = torch.aten.view %14318, %14327 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14328, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11974 = torch.constant.int 4
    %14329 = torch.aten.mul.int %int4_11974, %3095 : !torch.int, !torch.int -> !torch.int
    %14330 = torch.prim.ListConstruct %14329 : (!torch.int) -> !torch.list<int>
    %14331 = torch.aten.view %14319, %14330 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14331, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11975 = torch.constant.int 4
    %14332 = torch.aten.mul.int %int4_11975, %3095 : !torch.int, !torch.int -> !torch.int
    %14333 = torch.prim.ListConstruct %14332 : (!torch.int) -> !torch.list<int>
    %14334 = torch.aten.view %14320, %14333 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14334, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11976 = torch.constant.int 4
    %14335 = torch.aten.mul.int %int4_11976, %3095 : !torch.int, !torch.int -> !torch.int
    %14336 = torch.prim.ListConstruct %14335 : (!torch.int) -> !torch.list<int>
    %14337 = torch.aten.view %14321, %14336 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14337, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11977 = torch.constant.int 4
    %14338 = torch.aten.mul.int %int4_11977, %3095 : !torch.int, !torch.int -> !torch.int
    %14339 = torch.prim.ListConstruct %14338 : (!torch.int) -> !torch.list<int>
    %14340 = torch.aten.view %14322, %14339 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14340, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11978 = torch.constant.int 4
    %14341 = torch.aten.mul.int %int4_11978, %3095 : !torch.int, !torch.int -> !torch.int
    %14342 = torch.prim.ListConstruct %14341 : (!torch.int) -> !torch.list<int>
    %14343 = torch.aten.view %14323, %14342 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14343, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11979 = torch.constant.int 4
    %14344 = torch.aten.mul.int %int4_11979, %3095 : !torch.int, !torch.int -> !torch.int
    %14345 = torch.prim.ListConstruct %14344 : (!torch.int) -> !torch.list<int>
    %14346 = torch.aten.view %14324, %14345 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14346, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_11980 = torch.constant.int 4
    %14347 = torch.aten.mul.int %int4_11980, %3095 : !torch.int, !torch.int -> !torch.int
    %14348 = torch.prim.ListConstruct %14347 : (!torch.int) -> !torch.list<int>
    %14349 = torch.aten.view %14325, %14348 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14349, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %14350 = torch.prim.ListConstruct %14256, %14328 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11981 = torch.constant.int 0
    %14351 = torch.aten.cat %14350, %int0_11981 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14351, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14352 = torch.prim.ListConstruct %14259, %14331 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11982 = torch.constant.int 0
    %14353 = torch.aten.cat %14352, %int0_11982 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14353, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14354 = torch.prim.ListConstruct %14262, %14334 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11983 = torch.constant.int 0
    %14355 = torch.aten.cat %14354, %int0_11983 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14355, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14356 = torch.prim.ListConstruct %14265, %14337 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11984 = torch.constant.int 0
    %14357 = torch.aten.cat %14356, %int0_11984 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14357, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14358 = torch.prim.ListConstruct %14268, %14340 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11985 = torch.constant.int 0
    %14359 = torch.aten.cat %14358, %int0_11985 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14359, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14360 = torch.prim.ListConstruct %14271, %14343 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11986 = torch.constant.int 0
    %14361 = torch.aten.cat %14360, %int0_11986 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14361, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14362 = torch.prim.ListConstruct %14274, %14346 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11987 = torch.constant.int 0
    %14363 = torch.aten.cat %14362, %int0_11987 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14363, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14364 = torch.prim.ListConstruct %14277, %14349 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_11988 = torch.constant.int 0
    %14365 = torch.aten.cat %14364, %int0_11988 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %14365, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %14366 = torch.prim.ListConstruct %14232, %14296 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11989 = torch.constant.int 0
    %14367 = torch.aten.cat %14366, %int0_11989 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14367, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14368 = torch.prim.ListConstruct %14235, %14299 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11990 = torch.constant.int 0
    %14369 = torch.aten.cat %14368, %int0_11990 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14369, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14370 = torch.prim.ListConstruct %14238, %14302 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11991 = torch.constant.int 0
    %14371 = torch.aten.cat %14370, %int0_11991 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14371, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14372 = torch.prim.ListConstruct %14241, %14305 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11992 = torch.constant.int 0
    %14373 = torch.aten.cat %14372, %int0_11992 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14373, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14374 = torch.prim.ListConstruct %14244, %14308 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11993 = torch.constant.int 0
    %14375 = torch.aten.cat %14374, %int0_11993 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14375, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14376 = torch.prim.ListConstruct %14247, %14311 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11994 = torch.constant.int 0
    %14377 = torch.aten.cat %14376, %int0_11994 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14377, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14378 = torch.prim.ListConstruct %14250, %14314 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11995 = torch.constant.int 0
    %14379 = torch.aten.cat %14378, %int0_11995 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14379, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14380 = torch.prim.ListConstruct %14253, %14317 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_11996 = torch.constant.int 0
    %14381 = torch.aten.cat %14380, %int0_11996 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14381, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_11997 = torch.constant.int 32
    %int2_11998 = torch.constant.int 2
    %int16_11999 = torch.constant.int 16
    %int1_12000 = torch.constant.int 1
    %int128_12001 = torch.constant.int 128
    %14382 = torch.prim.ListConstruct %3023, %int32_11997, %int2_11998, %int16_11999, %int1_12000, %int128_12001 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14383 = torch.aten.view %12532, %14382 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14383, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12002 = torch.constant.int 32
    %14384 = torch.aten.mul.int %3023, %int32_12002 : !torch.int, !torch.int -> !torch.int
    %int2_12003 = torch.constant.int 2
    %14385 = torch.aten.mul.int %14384, %int2_12003 : !torch.int, !torch.int -> !torch.int
    %int16_12004 = torch.constant.int 16
    %int1_12005 = torch.constant.int 1
    %int128_12006 = torch.constant.int 128
    %14386 = torch.prim.ListConstruct %14385, %int16_12004, %int1_12005, %int128_12006 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14387 = torch.aten.view %14383, %14386 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14387, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14388 = torch.prim.ListConstruct %14351 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12007 = torch.constant.bool false
    %14389 = torch.aten.index_put %14387, %14388, %14367, %false_12007 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14389, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12008 = torch.constant.int 32
    %int2_12009 = torch.constant.int 2
    %int16_12010 = torch.constant.int 16
    %int1_12011 = torch.constant.int 1
    %int128_12012 = torch.constant.int 128
    %14390 = torch.prim.ListConstruct %3023, %int32_12008, %int2_12009, %int16_12010, %int1_12011, %int128_12012 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14391 = torch.aten.view %14389, %14390 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14391, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12013 = torch.constant.int 131072
    %14392 = torch.prim.ListConstruct %3023, %int131072_12013 : (!torch.int, !torch.int) -> !torch.list<int>
    %14393 = torch.aten.view %14391, %14392 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14393, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12014 = torch.constant.int 32
    %int2_12015 = torch.constant.int 2
    %int16_12016 = torch.constant.int 16
    %int1_12017 = torch.constant.int 1
    %int128_12018 = torch.constant.int 128
    %14394 = torch.prim.ListConstruct %3026, %int32_12014, %int2_12015, %int16_12016, %int1_12017, %int128_12018 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14395 = torch.aten.view %12544, %14394 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14395, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12019 = torch.constant.int 32
    %14396 = torch.aten.mul.int %3026, %int32_12019 : !torch.int, !torch.int -> !torch.int
    %int2_12020 = torch.constant.int 2
    %14397 = torch.aten.mul.int %14396, %int2_12020 : !torch.int, !torch.int -> !torch.int
    %int16_12021 = torch.constant.int 16
    %int1_12022 = torch.constant.int 1
    %int128_12023 = torch.constant.int 128
    %14398 = torch.prim.ListConstruct %14397, %int16_12021, %int1_12022, %int128_12023 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14399 = torch.aten.view %14395, %14398 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14399, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14400 = torch.prim.ListConstruct %14353 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12024 = torch.constant.bool false
    %14401 = torch.aten.index_put %14399, %14400, %14369, %false_12024 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14401, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12025 = torch.constant.int 32
    %int2_12026 = torch.constant.int 2
    %int16_12027 = torch.constant.int 16
    %int1_12028 = torch.constant.int 1
    %int128_12029 = torch.constant.int 128
    %14402 = torch.prim.ListConstruct %3026, %int32_12025, %int2_12026, %int16_12027, %int1_12028, %int128_12029 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14403 = torch.aten.view %14401, %14402 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14403, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12030 = torch.constant.int 131072
    %14404 = torch.prim.ListConstruct %3026, %int131072_12030 : (!torch.int, !torch.int) -> !torch.list<int>
    %14405 = torch.aten.view %14403, %14404 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14405, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12031 = torch.constant.int 32
    %int2_12032 = torch.constant.int 2
    %int16_12033 = torch.constant.int 16
    %int1_12034 = torch.constant.int 1
    %int128_12035 = torch.constant.int 128
    %14406 = torch.prim.ListConstruct %3029, %int32_12031, %int2_12032, %int16_12033, %int1_12034, %int128_12035 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14407 = torch.aten.view %12556, %14406 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14407, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12036 = torch.constant.int 32
    %14408 = torch.aten.mul.int %3029, %int32_12036 : !torch.int, !torch.int -> !torch.int
    %int2_12037 = torch.constant.int 2
    %14409 = torch.aten.mul.int %14408, %int2_12037 : !torch.int, !torch.int -> !torch.int
    %int16_12038 = torch.constant.int 16
    %int1_12039 = torch.constant.int 1
    %int128_12040 = torch.constant.int 128
    %14410 = torch.prim.ListConstruct %14409, %int16_12038, %int1_12039, %int128_12040 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14411 = torch.aten.view %14407, %14410 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14411, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14412 = torch.prim.ListConstruct %14355 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12041 = torch.constant.bool false
    %14413 = torch.aten.index_put %14411, %14412, %14371, %false_12041 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14413, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12042 = torch.constant.int 32
    %int2_12043 = torch.constant.int 2
    %int16_12044 = torch.constant.int 16
    %int1_12045 = torch.constant.int 1
    %int128_12046 = torch.constant.int 128
    %14414 = torch.prim.ListConstruct %3029, %int32_12042, %int2_12043, %int16_12044, %int1_12045, %int128_12046 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14415 = torch.aten.view %14413, %14414 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14415, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12047 = torch.constant.int 131072
    %14416 = torch.prim.ListConstruct %3029, %int131072_12047 : (!torch.int, !torch.int) -> !torch.list<int>
    %14417 = torch.aten.view %14415, %14416 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14417, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12048 = torch.constant.int 32
    %int2_12049 = torch.constant.int 2
    %int16_12050 = torch.constant.int 16
    %int1_12051 = torch.constant.int 1
    %int128_12052 = torch.constant.int 128
    %14418 = torch.prim.ListConstruct %3032, %int32_12048, %int2_12049, %int16_12050, %int1_12051, %int128_12052 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14419 = torch.aten.view %12568, %14418 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14419, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12053 = torch.constant.int 32
    %14420 = torch.aten.mul.int %3032, %int32_12053 : !torch.int, !torch.int -> !torch.int
    %int2_12054 = torch.constant.int 2
    %14421 = torch.aten.mul.int %14420, %int2_12054 : !torch.int, !torch.int -> !torch.int
    %int16_12055 = torch.constant.int 16
    %int1_12056 = torch.constant.int 1
    %int128_12057 = torch.constant.int 128
    %14422 = torch.prim.ListConstruct %14421, %int16_12055, %int1_12056, %int128_12057 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14423 = torch.aten.view %14419, %14422 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14423, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14424 = torch.prim.ListConstruct %14357 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12058 = torch.constant.bool false
    %14425 = torch.aten.index_put %14423, %14424, %14373, %false_12058 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14425, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12059 = torch.constant.int 32
    %int2_12060 = torch.constant.int 2
    %int16_12061 = torch.constant.int 16
    %int1_12062 = torch.constant.int 1
    %int128_12063 = torch.constant.int 128
    %14426 = torch.prim.ListConstruct %3032, %int32_12059, %int2_12060, %int16_12061, %int1_12062, %int128_12063 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14427 = torch.aten.view %14425, %14426 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14427, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12064 = torch.constant.int 131072
    %14428 = torch.prim.ListConstruct %3032, %int131072_12064 : (!torch.int, !torch.int) -> !torch.list<int>
    %14429 = torch.aten.view %14427, %14428 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14429, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12065 = torch.constant.int 32
    %int2_12066 = torch.constant.int 2
    %int16_12067 = torch.constant.int 16
    %int1_12068 = torch.constant.int 1
    %int128_12069 = torch.constant.int 128
    %14430 = torch.prim.ListConstruct %3035, %int32_12065, %int2_12066, %int16_12067, %int1_12068, %int128_12069 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14431 = torch.aten.view %12580, %14430 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14431, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12070 = torch.constant.int 32
    %14432 = torch.aten.mul.int %3035, %int32_12070 : !torch.int, !torch.int -> !torch.int
    %int2_12071 = torch.constant.int 2
    %14433 = torch.aten.mul.int %14432, %int2_12071 : !torch.int, !torch.int -> !torch.int
    %int16_12072 = torch.constant.int 16
    %int1_12073 = torch.constant.int 1
    %int128_12074 = torch.constant.int 128
    %14434 = torch.prim.ListConstruct %14433, %int16_12072, %int1_12073, %int128_12074 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14435 = torch.aten.view %14431, %14434 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14435, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14436 = torch.prim.ListConstruct %14359 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12075 = torch.constant.bool false
    %14437 = torch.aten.index_put %14435, %14436, %14375, %false_12075 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14437, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12076 = torch.constant.int 32
    %int2_12077 = torch.constant.int 2
    %int16_12078 = torch.constant.int 16
    %int1_12079 = torch.constant.int 1
    %int128_12080 = torch.constant.int 128
    %14438 = torch.prim.ListConstruct %3035, %int32_12076, %int2_12077, %int16_12078, %int1_12079, %int128_12080 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14439 = torch.aten.view %14437, %14438 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14439, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12081 = torch.constant.int 131072
    %14440 = torch.prim.ListConstruct %3035, %int131072_12081 : (!torch.int, !torch.int) -> !torch.list<int>
    %14441 = torch.aten.view %14439, %14440 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14441, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12082 = torch.constant.int 32
    %int2_12083 = torch.constant.int 2
    %int16_12084 = torch.constant.int 16
    %int1_12085 = torch.constant.int 1
    %int128_12086 = torch.constant.int 128
    %14442 = torch.prim.ListConstruct %3038, %int32_12082, %int2_12083, %int16_12084, %int1_12085, %int128_12086 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14443 = torch.aten.view %12592, %14442 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14443, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12087 = torch.constant.int 32
    %14444 = torch.aten.mul.int %3038, %int32_12087 : !torch.int, !torch.int -> !torch.int
    %int2_12088 = torch.constant.int 2
    %14445 = torch.aten.mul.int %14444, %int2_12088 : !torch.int, !torch.int -> !torch.int
    %int16_12089 = torch.constant.int 16
    %int1_12090 = torch.constant.int 1
    %int128_12091 = torch.constant.int 128
    %14446 = torch.prim.ListConstruct %14445, %int16_12089, %int1_12090, %int128_12091 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14447 = torch.aten.view %14443, %14446 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14447, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14448 = torch.prim.ListConstruct %14361 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12092 = torch.constant.bool false
    %14449 = torch.aten.index_put %14447, %14448, %14377, %false_12092 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14449, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12093 = torch.constant.int 32
    %int2_12094 = torch.constant.int 2
    %int16_12095 = torch.constant.int 16
    %int1_12096 = torch.constant.int 1
    %int128_12097 = torch.constant.int 128
    %14450 = torch.prim.ListConstruct %3038, %int32_12093, %int2_12094, %int16_12095, %int1_12096, %int128_12097 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14451 = torch.aten.view %14449, %14450 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14451, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12098 = torch.constant.int 131072
    %14452 = torch.prim.ListConstruct %3038, %int131072_12098 : (!torch.int, !torch.int) -> !torch.list<int>
    %14453 = torch.aten.view %14451, %14452 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14453, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12099 = torch.constant.int 32
    %int2_12100 = torch.constant.int 2
    %int16_12101 = torch.constant.int 16
    %int1_12102 = torch.constant.int 1
    %int128_12103 = torch.constant.int 128
    %14454 = torch.prim.ListConstruct %3041, %int32_12099, %int2_12100, %int16_12101, %int1_12102, %int128_12103 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14455 = torch.aten.view %12604, %14454 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14455, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12104 = torch.constant.int 32
    %14456 = torch.aten.mul.int %3041, %int32_12104 : !torch.int, !torch.int -> !torch.int
    %int2_12105 = torch.constant.int 2
    %14457 = torch.aten.mul.int %14456, %int2_12105 : !torch.int, !torch.int -> !torch.int
    %int16_12106 = torch.constant.int 16
    %int1_12107 = torch.constant.int 1
    %int128_12108 = torch.constant.int 128
    %14458 = torch.prim.ListConstruct %14457, %int16_12106, %int1_12107, %int128_12108 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14459 = torch.aten.view %14455, %14458 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14459, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14460 = torch.prim.ListConstruct %14363 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12109 = torch.constant.bool false
    %14461 = torch.aten.index_put %14459, %14460, %14379, %false_12109 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14461, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12110 = torch.constant.int 32
    %int2_12111 = torch.constant.int 2
    %int16_12112 = torch.constant.int 16
    %int1_12113 = torch.constant.int 1
    %int128_12114 = torch.constant.int 128
    %14462 = torch.prim.ListConstruct %3041, %int32_12110, %int2_12111, %int16_12112, %int1_12113, %int128_12114 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14463 = torch.aten.view %14461, %14462 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14463, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12115 = torch.constant.int 131072
    %14464 = torch.prim.ListConstruct %3041, %int131072_12115 : (!torch.int, !torch.int) -> !torch.list<int>
    %14465 = torch.aten.view %14463, %14464 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14465, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_12116 = torch.constant.int 32
    %int2_12117 = torch.constant.int 2
    %int16_12118 = torch.constant.int 16
    %int1_12119 = torch.constant.int 1
    %int128_12120 = torch.constant.int 128
    %14466 = torch.prim.ListConstruct %3044, %int32_12116, %int2_12117, %int16_12118, %int1_12119, %int128_12120 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14467 = torch.aten.view %12616, %14466 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14467, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_12121 = torch.constant.int 32
    %14468 = torch.aten.mul.int %3044, %int32_12121 : !torch.int, !torch.int -> !torch.int
    %int2_12122 = torch.constant.int 2
    %14469 = torch.aten.mul.int %14468, %int2_12122 : !torch.int, !torch.int -> !torch.int
    %int16_12123 = torch.constant.int 16
    %int1_12124 = torch.constant.int 1
    %int128_12125 = torch.constant.int 128
    %14470 = torch.prim.ListConstruct %14469, %int16_12123, %int1_12124, %int128_12125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14471 = torch.aten.view %14467, %14470 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14471, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %14472 = torch.prim.ListConstruct %14365 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_12126 = torch.constant.bool false
    %14473 = torch.aten.index_put %14471, %14472, %14381, %false_12126 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %14473, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_12127 = torch.constant.int 32
    %int2_12128 = torch.constant.int 2
    %int16_12129 = torch.constant.int 16
    %int1_12130 = torch.constant.int 1
    %int128_12131 = torch.constant.int 128
    %14474 = torch.prim.ListConstruct %3044, %int32_12127, %int2_12128, %int16_12129, %int1_12130, %int128_12131 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14475 = torch.aten.view %14473, %14474 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %14475, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_12132 = torch.constant.int 131072
    %14476 = torch.prim.ListConstruct %3044, %int131072_12132 : (!torch.int, !torch.int) -> !torch.list<int>
    %14477 = torch.aten.view %14475, %14476 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %14477, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_12133 = torch.constant.int -2
    %14478 = torch.aten.unsqueeze %14092, %int-2_12133 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12134 = torch.constant.int -2
    %14479 = torch.aten.unsqueeze %14107, %int-2_12134 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12135 = torch.constant.int -2
    %14480 = torch.aten.unsqueeze %14122, %int-2_12135 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12136 = torch.constant.int -2
    %14481 = torch.aten.unsqueeze %14137, %int-2_12136 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12137 = torch.constant.int -2
    %14482 = torch.aten.unsqueeze %14152, %int-2_12137 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12138 = torch.constant.int -2
    %14483 = torch.aten.unsqueeze %14167, %int-2_12138 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12139 = torch.constant.int -2
    %14484 = torch.aten.unsqueeze %14182, %int-2_12139 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12140 = torch.constant.int -2
    %14485 = torch.aten.unsqueeze %14197, %int-2_12140 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_12141 = torch.constant.int 4
    %int1_12142 = torch.constant.int 1
    %int4_12143 = torch.constant.int 4
    %int128_12144 = torch.constant.int 128
    %14486 = torch.prim.ListConstruct %int4_12141, %14078, %int1_12142, %int4_12143, %int128_12144 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12145 = torch.constant.bool false
    %14487 = torch.aten.expand %14478, %14486, %false_12145 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12146 = torch.constant.int 4
    %int1_12147 = torch.constant.int 1
    %int4_12148 = torch.constant.int 4
    %int128_12149 = torch.constant.int 128
    %14488 = torch.prim.ListConstruct %int4_12146, %14078, %int1_12147, %int4_12148, %int128_12149 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12150 = torch.constant.bool false
    %14489 = torch.aten.expand %14479, %14488, %false_12150 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12151 = torch.constant.int 4
    %int1_12152 = torch.constant.int 1
    %int4_12153 = torch.constant.int 4
    %int128_12154 = torch.constant.int 128
    %14490 = torch.prim.ListConstruct %int4_12151, %14078, %int1_12152, %int4_12153, %int128_12154 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12155 = torch.constant.bool false
    %14491 = torch.aten.expand %14480, %14490, %false_12155 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12156 = torch.constant.int 4
    %int1_12157 = torch.constant.int 1
    %int4_12158 = torch.constant.int 4
    %int128_12159 = torch.constant.int 128
    %14492 = torch.prim.ListConstruct %int4_12156, %14078, %int1_12157, %int4_12158, %int128_12159 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12160 = torch.constant.bool false
    %14493 = torch.aten.expand %14481, %14492, %false_12160 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12161 = torch.constant.int 4
    %int1_12162 = torch.constant.int 1
    %int4_12163 = torch.constant.int 4
    %int128_12164 = torch.constant.int 128
    %14494 = torch.prim.ListConstruct %int4_12161, %14078, %int1_12162, %int4_12163, %int128_12164 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12165 = torch.constant.bool false
    %14495 = torch.aten.expand %14482, %14494, %false_12165 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12166 = torch.constant.int 4
    %int1_12167 = torch.constant.int 1
    %int4_12168 = torch.constant.int 4
    %int128_12169 = torch.constant.int 128
    %14496 = torch.prim.ListConstruct %int4_12166, %14078, %int1_12167, %int4_12168, %int128_12169 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12170 = torch.constant.bool false
    %14497 = torch.aten.expand %14483, %14496, %false_12170 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12171 = torch.constant.int 4
    %int1_12172 = torch.constant.int 1
    %int4_12173 = torch.constant.int 4
    %int128_12174 = torch.constant.int 128
    %14498 = torch.prim.ListConstruct %int4_12171, %14078, %int1_12172, %int4_12173, %int128_12174 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12175 = torch.constant.bool false
    %14499 = torch.aten.expand %14484, %14498, %false_12175 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12176 = torch.constant.int 4
    %int1_12177 = torch.constant.int 1
    %int4_12178 = torch.constant.int 4
    %int128_12179 = torch.constant.int 128
    %14500 = torch.prim.ListConstruct %int4_12176, %14078, %int1_12177, %int4_12178, %int128_12179 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12180 = torch.constant.bool false
    %14501 = torch.aten.expand %14485, %14500, %false_12180 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12181 = torch.constant.int 4
    %int4_12182 = torch.constant.int 4
    %int128_12183 = torch.constant.int 128
    %14502 = torch.prim.ListConstruct %int4_12181, %14078, %int4_12182, %int128_12183 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14503 = torch.aten.view %14487, %14502 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12184 = torch.constant.int 4
    %int4_12185 = torch.constant.int 4
    %int128_12186 = torch.constant.int 128
    %14504 = torch.prim.ListConstruct %int4_12184, %14078, %int4_12185, %int128_12186 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14505 = torch.aten.view %14489, %14504 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12187 = torch.constant.int 4
    %int4_12188 = torch.constant.int 4
    %int128_12189 = torch.constant.int 128
    %14506 = torch.prim.ListConstruct %int4_12187, %14078, %int4_12188, %int128_12189 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14507 = torch.aten.view %14491, %14506 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12190 = torch.constant.int 4
    %int4_12191 = torch.constant.int 4
    %int128_12192 = torch.constant.int 128
    %14508 = torch.prim.ListConstruct %int4_12190, %14078, %int4_12191, %int128_12192 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14509 = torch.aten.view %14493, %14508 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12193 = torch.constant.int 4
    %int4_12194 = torch.constant.int 4
    %int128_12195 = torch.constant.int 128
    %14510 = torch.prim.ListConstruct %int4_12193, %14078, %int4_12194, %int128_12195 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14511 = torch.aten.view %14495, %14510 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12196 = torch.constant.int 4
    %int4_12197 = torch.constant.int 4
    %int128_12198 = torch.constant.int 128
    %14512 = torch.prim.ListConstruct %int4_12196, %14078, %int4_12197, %int128_12198 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14513 = torch.aten.view %14497, %14512 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12199 = torch.constant.int 4
    %int4_12200 = torch.constant.int 4
    %int128_12201 = torch.constant.int 128
    %14514 = torch.prim.ListConstruct %int4_12199, %14078, %int4_12200, %int128_12201 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14515 = torch.aten.view %14499, %14514 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12202 = torch.constant.int 4
    %int4_12203 = torch.constant.int 4
    %int128_12204 = torch.constant.int 128
    %14516 = torch.prim.ListConstruct %int4_12202, %14078, %int4_12203, %int128_12204 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14517 = torch.aten.view %14501, %14516 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_12205 = torch.constant.int -2
    %14518 = torch.aten.unsqueeze %13867, %int-2_12205 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12206 = torch.constant.int -2
    %14519 = torch.aten.unsqueeze %13869, %int-2_12206 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12207 = torch.constant.int -2
    %14520 = torch.aten.unsqueeze %13871, %int-2_12207 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12208 = torch.constant.int -2
    %14521 = torch.aten.unsqueeze %13873, %int-2_12208 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12209 = torch.constant.int -2
    %14522 = torch.aten.unsqueeze %13875, %int-2_12209 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12210 = torch.constant.int -2
    %14523 = torch.aten.unsqueeze %13877, %int-2_12210 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12211 = torch.constant.int -2
    %14524 = torch.aten.unsqueeze %13879, %int-2_12211 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_12212 = torch.constant.int -2
    %14525 = torch.aten.unsqueeze %13881, %int-2_12212 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %14525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_12213 = torch.constant.int 1
    %14526 = torch.aten.size.int %13791, %int1_12213 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_12214 = torch.constant.int 4
    %int1_12215 = torch.constant.int 1
    %int4_12216 = torch.constant.int 4
    %int128_12217 = torch.constant.int 128
    %14527 = torch.prim.ListConstruct %int4_12214, %14526, %int1_12215, %int4_12216, %int128_12217 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12218 = torch.constant.bool false
    %14528 = torch.aten.expand %14518, %14527, %false_12218 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12219 = torch.constant.int 4
    %int1_12220 = torch.constant.int 1
    %int4_12221 = torch.constant.int 4
    %int128_12222 = torch.constant.int 128
    %14529 = torch.prim.ListConstruct %int4_12219, %14526, %int1_12220, %int4_12221, %int128_12222 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12223 = torch.constant.bool false
    %14530 = torch.aten.expand %14519, %14529, %false_12223 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12224 = torch.constant.int 4
    %int1_12225 = torch.constant.int 1
    %int4_12226 = torch.constant.int 4
    %int128_12227 = torch.constant.int 128
    %14531 = torch.prim.ListConstruct %int4_12224, %14526, %int1_12225, %int4_12226, %int128_12227 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12228 = torch.constant.bool false
    %14532 = torch.aten.expand %14520, %14531, %false_12228 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12229 = torch.constant.int 4
    %int1_12230 = torch.constant.int 1
    %int4_12231 = torch.constant.int 4
    %int128_12232 = torch.constant.int 128
    %14533 = torch.prim.ListConstruct %int4_12229, %14526, %int1_12230, %int4_12231, %int128_12232 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12233 = torch.constant.bool false
    %14534 = torch.aten.expand %14521, %14533, %false_12233 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12234 = torch.constant.int 4
    %int1_12235 = torch.constant.int 1
    %int4_12236 = torch.constant.int 4
    %int128_12237 = torch.constant.int 128
    %14535 = torch.prim.ListConstruct %int4_12234, %14526, %int1_12235, %int4_12236, %int128_12237 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12238 = torch.constant.bool false
    %14536 = torch.aten.expand %14522, %14535, %false_12238 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12239 = torch.constant.int 4
    %int1_12240 = torch.constant.int 1
    %int4_12241 = torch.constant.int 4
    %int128_12242 = torch.constant.int 128
    %14537 = torch.prim.ListConstruct %int4_12239, %14526, %int1_12240, %int4_12241, %int128_12242 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12243 = torch.constant.bool false
    %14538 = torch.aten.expand %14523, %14537, %false_12243 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12244 = torch.constant.int 4
    %int1_12245 = torch.constant.int 1
    %int4_12246 = torch.constant.int 4
    %int128_12247 = torch.constant.int 128
    %14539 = torch.prim.ListConstruct %int4_12244, %14526, %int1_12245, %int4_12246, %int128_12247 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12248 = torch.constant.bool false
    %14540 = torch.aten.expand %14524, %14539, %false_12248 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12249 = torch.constant.int 4
    %int1_12250 = torch.constant.int 1
    %int4_12251 = torch.constant.int 4
    %int128_12252 = torch.constant.int 128
    %14541 = torch.prim.ListConstruct %int4_12249, %14526, %int1_12250, %int4_12251, %int128_12252 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_12253 = torch.constant.bool false
    %14542 = torch.aten.expand %14525, %14541, %false_12253 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %14542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_12254 = torch.constant.int 4
    %int4_12255 = torch.constant.int 4
    %int128_12256 = torch.constant.int 128
    %14543 = torch.prim.ListConstruct %int4_12254, %14526, %int4_12255, %int128_12256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14544 = torch.aten.view %14528, %14543 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12257 = torch.constant.int 4
    %int4_12258 = torch.constant.int 4
    %int128_12259 = torch.constant.int 128
    %14545 = torch.prim.ListConstruct %int4_12257, %14526, %int4_12258, %int128_12259 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14546 = torch.aten.view %14530, %14545 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12260 = torch.constant.int 4
    %int4_12261 = torch.constant.int 4
    %int128_12262 = torch.constant.int 128
    %14547 = torch.prim.ListConstruct %int4_12260, %14526, %int4_12261, %int128_12262 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14548 = torch.aten.view %14532, %14547 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12263 = torch.constant.int 4
    %int4_12264 = torch.constant.int 4
    %int128_12265 = torch.constant.int 128
    %14549 = torch.prim.ListConstruct %int4_12263, %14526, %int4_12264, %int128_12265 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14550 = torch.aten.view %14534, %14549 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12266 = torch.constant.int 4
    %int4_12267 = torch.constant.int 4
    %int128_12268 = torch.constant.int 128
    %14551 = torch.prim.ListConstruct %int4_12266, %14526, %int4_12267, %int128_12268 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14552 = torch.aten.view %14536, %14551 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12269 = torch.constant.int 4
    %int4_12270 = torch.constant.int 4
    %int128_12271 = torch.constant.int 128
    %14553 = torch.prim.ListConstruct %int4_12269, %14526, %int4_12270, %int128_12271 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14554 = torch.aten.view %14538, %14553 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12272 = torch.constant.int 4
    %int4_12273 = torch.constant.int 4
    %int128_12274 = torch.constant.int 128
    %14555 = torch.prim.ListConstruct %int4_12272, %14526, %int4_12273, %int128_12274 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14556 = torch.aten.view %14540, %14555 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12275 = torch.constant.int 4
    %int4_12276 = torch.constant.int 4
    %int128_12277 = torch.constant.int 128
    %14557 = torch.prim.ListConstruct %int4_12275, %14526, %int4_12276, %int128_12277 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14558 = torch.aten.view %14542, %14557 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12278 = torch.constant.int 1
    %int2_12279 = torch.constant.int 2
    %14559 = torch.aten.transpose.int %13934, %int1_12278, %int2_12279 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14559, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12280 = torch.constant.int 1
    %int2_12281 = torch.constant.int 2
    %14560 = torch.aten.transpose.int %13949, %int1_12280, %int2_12281 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14560, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12282 = torch.constant.int 1
    %int2_12283 = torch.constant.int 2
    %14561 = torch.aten.transpose.int %13964, %int1_12282, %int2_12283 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14561, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12284 = torch.constant.int 1
    %int2_12285 = torch.constant.int 2
    %14562 = torch.aten.transpose.int %13979, %int1_12284, %int2_12285 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14562, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12286 = torch.constant.int 1
    %int2_12287 = torch.constant.int 2
    %14563 = torch.aten.transpose.int %13994, %int1_12286, %int2_12287 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14563, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12288 = torch.constant.int 1
    %int2_12289 = torch.constant.int 2
    %14564 = torch.aten.transpose.int %14009, %int1_12288, %int2_12289 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14564, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12290 = torch.constant.int 1
    %int2_12291 = torch.constant.int 2
    %14565 = torch.aten.transpose.int %14024, %int1_12290, %int2_12291 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14565, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12292 = torch.constant.int 1
    %int2_12293 = torch.constant.int 2
    %14566 = torch.aten.transpose.int %14039, %int1_12292, %int2_12293 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14566, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12294 = torch.constant.int 1
    %int2_12295 = torch.constant.int 2
    %14567 = torch.aten.transpose.int %14503, %int1_12294, %int2_12295 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14567, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12296 = torch.constant.int 1
    %int2_12297 = torch.constant.int 2
    %14568 = torch.aten.transpose.int %14505, %int1_12296, %int2_12297 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14568, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12298 = torch.constant.int 1
    %int2_12299 = torch.constant.int 2
    %14569 = torch.aten.transpose.int %14507, %int1_12298, %int2_12299 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14569, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12300 = torch.constant.int 1
    %int2_12301 = torch.constant.int 2
    %14570 = torch.aten.transpose.int %14509, %int1_12300, %int2_12301 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14570, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12302 = torch.constant.int 1
    %int2_12303 = torch.constant.int 2
    %14571 = torch.aten.transpose.int %14511, %int1_12302, %int2_12303 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14571, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12304 = torch.constant.int 1
    %int2_12305 = torch.constant.int 2
    %14572 = torch.aten.transpose.int %14513, %int1_12304, %int2_12305 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14572, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12306 = torch.constant.int 1
    %int2_12307 = torch.constant.int 2
    %14573 = torch.aten.transpose.int %14515, %int1_12306, %int2_12307 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14573, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12308 = torch.constant.int 1
    %int2_12309 = torch.constant.int 2
    %14574 = torch.aten.transpose.int %14517, %int1_12308, %int2_12309 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14574, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12310 = torch.constant.int 1
    %int2_12311 = torch.constant.int 2
    %14575 = torch.aten.transpose.int %14544, %int1_12310, %int2_12311 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14575, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12312 = torch.constant.int 1
    %int2_12313 = torch.constant.int 2
    %14576 = torch.aten.transpose.int %14546, %int1_12312, %int2_12313 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14576, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12314 = torch.constant.int 1
    %int2_12315 = torch.constant.int 2
    %14577 = torch.aten.transpose.int %14548, %int1_12314, %int2_12315 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14577, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12316 = torch.constant.int 1
    %int2_12317 = torch.constant.int 2
    %14578 = torch.aten.transpose.int %14550, %int1_12316, %int2_12317 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14578, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12318 = torch.constant.int 1
    %int2_12319 = torch.constant.int 2
    %14579 = torch.aten.transpose.int %14552, %int1_12318, %int2_12319 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14579, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12320 = torch.constant.int 1
    %int2_12321 = torch.constant.int 2
    %14580 = torch.aten.transpose.int %14554, %int1_12320, %int2_12321 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14580, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12322 = torch.constant.int 1
    %int2_12323 = torch.constant.int 2
    %14581 = torch.aten.transpose.int %14556, %int1_12322, %int2_12323 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14581, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12324 = torch.constant.int 1
    %int2_12325 = torch.constant.int 2
    %14582 = torch.aten.transpose.int %14558, %int1_12324, %int2_12325 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %14582, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12326 = torch.constant.float 0.000000e+00
    %true_12327 = torch.constant.bool true
    %none_12328 = torch.constant.none
    %none_12329 = torch.constant.none
    %14583:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14559, %14567, %14575, %float0.000000e00_12326, %true_12327, %none_12328, %none_12329) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14583#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12330 = torch.constant.float 0.000000e+00
    %true_12331 = torch.constant.bool true
    %none_12332 = torch.constant.none
    %none_12333 = torch.constant.none
    %14584:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14560, %14568, %14576, %float0.000000e00_12330, %true_12331, %none_12332, %none_12333) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14584#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12334 = torch.constant.float 0.000000e+00
    %true_12335 = torch.constant.bool true
    %none_12336 = torch.constant.none
    %none_12337 = torch.constant.none
    %14585:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14561, %14569, %14577, %float0.000000e00_12334, %true_12335, %none_12336, %none_12337) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14585#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12338 = torch.constant.float 0.000000e+00
    %true_12339 = torch.constant.bool true
    %none_12340 = torch.constant.none
    %none_12341 = torch.constant.none
    %14586:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14562, %14570, %14578, %float0.000000e00_12338, %true_12339, %none_12340, %none_12341) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14586#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12342 = torch.constant.float 0.000000e+00
    %true_12343 = torch.constant.bool true
    %none_12344 = torch.constant.none
    %none_12345 = torch.constant.none
    %14587:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14563, %14571, %14579, %float0.000000e00_12342, %true_12343, %none_12344, %none_12345) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14587#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12346 = torch.constant.float 0.000000e+00
    %true_12347 = torch.constant.bool true
    %none_12348 = torch.constant.none
    %none_12349 = torch.constant.none
    %14588:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14564, %14572, %14580, %float0.000000e00_12346, %true_12347, %none_12348, %none_12349) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14588#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12350 = torch.constant.float 0.000000e+00
    %true_12351 = torch.constant.bool true
    %none_12352 = torch.constant.none
    %none_12353 = torch.constant.none
    %14589:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14565, %14573, %14581, %float0.000000e00_12350, %true_12351, %none_12352, %none_12353) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14589#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_12354 = torch.constant.float 0.000000e+00
    %true_12355 = torch.constant.bool true
    %none_12356 = torch.constant.none
    %none_12357 = torch.constant.none
    %14590:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%14566, %14574, %14582, %float0.000000e00_12354, %true_12355, %none_12356, %none_12357) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %14590#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_12358 = torch.constant.int 1
    %int2_12359 = torch.constant.int 2
    %14591 = torch.aten.transpose.int %14583#0, %int1_12358, %int2_12359 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12360 = torch.constant.int 1
    %int2_12361 = torch.constant.int 2
    %14592 = torch.aten.transpose.int %14584#0, %int1_12360, %int2_12361 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12362 = torch.constant.int 1
    %int2_12363 = torch.constant.int 2
    %14593 = torch.aten.transpose.int %14585#0, %int1_12362, %int2_12363 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12364 = torch.constant.int 1
    %int2_12365 = torch.constant.int 2
    %14594 = torch.aten.transpose.int %14586#0, %int1_12364, %int2_12365 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12366 = torch.constant.int 1
    %int2_12367 = torch.constant.int 2
    %14595 = torch.aten.transpose.int %14587#0, %int1_12366, %int2_12367 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12368 = torch.constant.int 1
    %int2_12369 = torch.constant.int 2
    %14596 = torch.aten.transpose.int %14588#0, %int1_12368, %int2_12369 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12370 = torch.constant.int 1
    %int2_12371 = torch.constant.int 2
    %14597 = torch.aten.transpose.int %14589#0, %int1_12370, %int2_12371 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_12372 = torch.constant.int 1
    %int2_12373 = torch.constant.int 2
    %14598 = torch.aten.transpose.int %14590#0, %int1_12372, %int2_12373 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %14598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_12374 = torch.constant.int 4
    %int512_12375 = torch.constant.int 512
    %14599 = torch.prim.ListConstruct %int4_12374, %13920, %int512_12375 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14600 = torch.aten.view %14591, %14599 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12376 = torch.constant.int 4
    %int512_12377 = torch.constant.int 512
    %14601 = torch.prim.ListConstruct %int4_12376, %13935, %int512_12377 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14602 = torch.aten.view %14592, %14601 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12378 = torch.constant.int 4
    %int512_12379 = torch.constant.int 512
    %14603 = torch.prim.ListConstruct %int4_12378, %13950, %int512_12379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14604 = torch.aten.view %14593, %14603 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12380 = torch.constant.int 4
    %int512_12381 = torch.constant.int 512
    %14605 = torch.prim.ListConstruct %int4_12380, %13965, %int512_12381 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14606 = torch.aten.view %14594, %14605 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12382 = torch.constant.int 4
    %int512_12383 = torch.constant.int 512
    %14607 = torch.prim.ListConstruct %int4_12382, %13980, %int512_12383 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14608 = torch.aten.view %14595, %14607 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12384 = torch.constant.int 4
    %int512_12385 = torch.constant.int 512
    %14609 = torch.prim.ListConstruct %int4_12384, %13995, %int512_12385 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14610 = torch.aten.view %14596, %14609 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12386 = torch.constant.int 4
    %int512_12387 = torch.constant.int 512
    %14611 = torch.prim.ListConstruct %int4_12386, %14010, %int512_12387 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14612 = torch.aten.view %14597, %14611 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_12388 = torch.constant.int 4
    %int512_12389 = torch.constant.int 512
    %14613 = torch.prim.ListConstruct %int4_12388, %14025, %int512_12389 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14614 = torch.aten.view %14598, %14613 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %14614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_12390 = torch.constant.int 1
    %int0_12391 = torch.constant.int 0
    %14615 = torch.prim.ListConstruct %int1_12390, %int0_12391 : (!torch.int, !torch.int) -> !torch.list<int>
    %14616 = torch.aten.permute %472, %14615 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12392 = torch.constant.int 1
    %int0_12393 = torch.constant.int 0
    %14617 = torch.prim.ListConstruct %int1_12392, %int0_12393 : (!torch.int, !torch.int) -> !torch.list<int>
    %14618 = torch.aten.permute %473, %14617 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12394 = torch.constant.int 1
    %int0_12395 = torch.constant.int 0
    %14619 = torch.prim.ListConstruct %int1_12394, %int0_12395 : (!torch.int, !torch.int) -> !torch.list<int>
    %14620 = torch.aten.permute %474, %14619 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12396 = torch.constant.int 1
    %int0_12397 = torch.constant.int 0
    %14621 = torch.prim.ListConstruct %int1_12396, %int0_12397 : (!torch.int, !torch.int) -> !torch.list<int>
    %14622 = torch.aten.permute %475, %14621 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12398 = torch.constant.int 1
    %int0_12399 = torch.constant.int 0
    %14623 = torch.prim.ListConstruct %int1_12398, %int0_12399 : (!torch.int, !torch.int) -> !torch.list<int>
    %14624 = torch.aten.permute %476, %14623 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12400 = torch.constant.int 1
    %int0_12401 = torch.constant.int 0
    %14625 = torch.prim.ListConstruct %int1_12400, %int0_12401 : (!torch.int, !torch.int) -> !torch.list<int>
    %14626 = torch.aten.permute %477, %14625 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12402 = torch.constant.int 1
    %int0_12403 = torch.constant.int 0
    %14627 = torch.prim.ListConstruct %int1_12402, %int0_12403 : (!torch.int, !torch.int) -> !torch.list<int>
    %14628 = torch.aten.permute %478, %14627 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_12404 = torch.constant.int 1
    %int0_12405 = torch.constant.int 0
    %14629 = torch.prim.ListConstruct %int1_12404, %int0_12405 : (!torch.int, !torch.int) -> !torch.list<int>
    %14630 = torch.aten.permute %479, %14629 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_12406 = torch.constant.int 4
    %14631 = torch.aten.mul.int %int4_12406, %13920 : !torch.int, !torch.int -> !torch.int
    %int512_12407 = torch.constant.int 512
    %14632 = torch.prim.ListConstruct %14631, %int512_12407 : (!torch.int, !torch.int) -> !torch.list<int>
    %14633 = torch.aten.view %14600, %14632 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14633, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14634 = torch.aten.mm %14633, %14616 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14634, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12408 = torch.constant.int 4
    %int4096_12409 = torch.constant.int 4096
    %14635 = torch.prim.ListConstruct %int4_12408, %13920, %int4096_12409 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14636 = torch.aten.view %14634, %14635 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12410 = torch.constant.int 4
    %14637 = torch.aten.mul.int %int4_12410, %13935 : !torch.int, !torch.int -> !torch.int
    %int512_12411 = torch.constant.int 512
    %14638 = torch.prim.ListConstruct %14637, %int512_12411 : (!torch.int, !torch.int) -> !torch.list<int>
    %14639 = torch.aten.view %14602, %14638 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14639, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14640 = torch.aten.mm %14639, %14618 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14640, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12412 = torch.constant.int 4
    %int4096_12413 = torch.constant.int 4096
    %14641 = torch.prim.ListConstruct %int4_12412, %13935, %int4096_12413 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14642 = torch.aten.view %14640, %14641 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12414 = torch.constant.int 4
    %14643 = torch.aten.mul.int %int4_12414, %13950 : !torch.int, !torch.int -> !torch.int
    %int512_12415 = torch.constant.int 512
    %14644 = torch.prim.ListConstruct %14643, %int512_12415 : (!torch.int, !torch.int) -> !torch.list<int>
    %14645 = torch.aten.view %14604, %14644 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14645, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14646 = torch.aten.mm %14645, %14620 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14646, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12416 = torch.constant.int 4
    %int4096_12417 = torch.constant.int 4096
    %14647 = torch.prim.ListConstruct %int4_12416, %13950, %int4096_12417 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14648 = torch.aten.view %14646, %14647 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12418 = torch.constant.int 4
    %14649 = torch.aten.mul.int %int4_12418, %13965 : !torch.int, !torch.int -> !torch.int
    %int512_12419 = torch.constant.int 512
    %14650 = torch.prim.ListConstruct %14649, %int512_12419 : (!torch.int, !torch.int) -> !torch.list<int>
    %14651 = torch.aten.view %14606, %14650 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14651, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14652 = torch.aten.mm %14651, %14622 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14652, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12420 = torch.constant.int 4
    %int4096_12421 = torch.constant.int 4096
    %14653 = torch.prim.ListConstruct %int4_12420, %13965, %int4096_12421 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14654 = torch.aten.view %14652, %14653 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12422 = torch.constant.int 4
    %14655 = torch.aten.mul.int %int4_12422, %13980 : !torch.int, !torch.int -> !torch.int
    %int512_12423 = torch.constant.int 512
    %14656 = torch.prim.ListConstruct %14655, %int512_12423 : (!torch.int, !torch.int) -> !torch.list<int>
    %14657 = torch.aten.view %14608, %14656 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14657, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14658 = torch.aten.mm %14657, %14624 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14658, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12424 = torch.constant.int 4
    %int4096_12425 = torch.constant.int 4096
    %14659 = torch.prim.ListConstruct %int4_12424, %13980, %int4096_12425 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14660 = torch.aten.view %14658, %14659 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12426 = torch.constant.int 4
    %14661 = torch.aten.mul.int %int4_12426, %13995 : !torch.int, !torch.int -> !torch.int
    %int512_12427 = torch.constant.int 512
    %14662 = torch.prim.ListConstruct %14661, %int512_12427 : (!torch.int, !torch.int) -> !torch.list<int>
    %14663 = torch.aten.view %14610, %14662 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14663, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14664 = torch.aten.mm %14663, %14626 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14664, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12428 = torch.constant.int 4
    %int4096_12429 = torch.constant.int 4096
    %14665 = torch.prim.ListConstruct %int4_12428, %13995, %int4096_12429 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14666 = torch.aten.view %14664, %14665 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12430 = torch.constant.int 4
    %14667 = torch.aten.mul.int %int4_12430, %14010 : !torch.int, !torch.int -> !torch.int
    %int512_12431 = torch.constant.int 512
    %14668 = torch.prim.ListConstruct %14667, %int512_12431 : (!torch.int, !torch.int) -> !torch.list<int>
    %14669 = torch.aten.view %14612, %14668 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14669, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14670 = torch.aten.mm %14669, %14628 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14670, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12432 = torch.constant.int 4
    %int4096_12433 = torch.constant.int 4096
    %14671 = torch.prim.ListConstruct %int4_12432, %14010, %int4096_12433 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14672 = torch.aten.view %14670, %14671 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_12434 = torch.constant.int 4
    %14673 = torch.aten.mul.int %int4_12434, %14025 : !torch.int, !torch.int -> !torch.int
    %int512_12435 = torch.constant.int 512
    %14674 = torch.prim.ListConstruct %14673, %int512_12435 : (!torch.int, !torch.int) -> !torch.list<int>
    %14675 = torch.aten.view %14614, %14674 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %14675, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %14676 = torch.aten.mm %14675, %14630 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %14676, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12436 = torch.constant.int 4
    %int4096_12437 = torch.constant.int 4096
    %14677 = torch.prim.ListConstruct %int4_12436, %14025, %int4096_12437 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %14678 = torch.aten.view %14676, %14677 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14679 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12438 = arith.constant 1 : index
    %dim_12439 = tensor.dim %14679, %c1_12438 : tensor<4x?x4096xf16>
    %14680 = flow.tensor.transfer %14679 : tensor<4x?x4096xf16>{%dim_12439} to #hal.device.promise<@__device_0>
    %14681 = torch_c.from_builtin_tensor %14680 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14682 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12440 = arith.constant 1 : index
    %dim_12441 = tensor.dim %14682, %c1_12440 : tensor<4x?x4096xf16>
    %14683 = flow.tensor.transfer %14682 : tensor<4x?x4096xf16>{%dim_12441} to #hal.device.promise<@__device_0>
    %14684 = torch_c.from_builtin_tensor %14683 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14685 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12442 = arith.constant 1 : index
    %dim_12443 = tensor.dim %14685, %c1_12442 : tensor<4x?x4096xf16>
    %14686 = flow.tensor.transfer %14685 : tensor<4x?x4096xf16>{%dim_12443} to #hal.device.promise<@__device_0>
    %14687 = torch_c.from_builtin_tensor %14686 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14688 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12444 = arith.constant 1 : index
    %dim_12445 = tensor.dim %14688, %c1_12444 : tensor<4x?x4096xf16>
    %14689 = flow.tensor.transfer %14688 : tensor<4x?x4096xf16>{%dim_12445} to #hal.device.promise<@__device_0>
    %14690 = torch_c.from_builtin_tensor %14689 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14691 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12446 = arith.constant 1 : index
    %dim_12447 = tensor.dim %14691, %c1_12446 : tensor<4x?x4096xf16>
    %14692 = flow.tensor.transfer %14691 : tensor<4x?x4096xf16>{%dim_12447} to #hal.device.promise<@__device_0>
    %14693 = torch_c.from_builtin_tensor %14692 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14694 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12448 = arith.constant 1 : index
    %dim_12449 = tensor.dim %14694, %c1_12448 : tensor<4x?x4096xf16>
    %14695 = flow.tensor.transfer %14694 : tensor<4x?x4096xf16>{%dim_12449} to #hal.device.promise<@__device_0>
    %14696 = torch_c.from_builtin_tensor %14695 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14697 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12450 = arith.constant 1 : index
    %dim_12451 = tensor.dim %14697, %c1_12450 : tensor<4x?x4096xf16>
    %14698 = flow.tensor.transfer %14697 : tensor<4x?x4096xf16>{%dim_12451} to #hal.device.promise<@__device_0>
    %14699 = torch_c.from_builtin_tensor %14698 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12452 = torch.constant.int 1
    %14700 = torch.aten.add.Tensor %14636, %14681, %int1_12452 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12453 = torch.constant.int 1
    %14701 = torch.aten.add.Tensor %14700, %14684, %int1_12453 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12454 = torch.constant.int 1
    %14702 = torch.aten.add.Tensor %14701, %14687, %int1_12454 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12455 = torch.constant.int 1
    %14703 = torch.aten.add.Tensor %14702, %14690, %int1_12455 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12456 = torch.constant.int 1
    %14704 = torch.aten.add.Tensor %14703, %14693, %int1_12456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12457 = torch.constant.int 1
    %14705 = torch.aten.add.Tensor %14704, %14696, %int1_12457 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12458 = torch.constant.int 1
    %14706 = torch.aten.add.Tensor %14705, %14699, %int1_12458 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14707 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12459 = arith.constant 1 : index
    %dim_12460 = tensor.dim %14707, %c1_12459 : tensor<4x?x4096xf16>
    %14708 = flow.tensor.transfer %14707 : tensor<4x?x4096xf16>{%dim_12460} to #hal.device.promise<@__device_1>
    %14709 = torch_c.from_builtin_tensor %14708 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14710 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12461 = arith.constant 1 : index
    %dim_12462 = tensor.dim %14710, %c1_12461 : tensor<4x?x4096xf16>
    %14711 = flow.tensor.transfer %14710 : tensor<4x?x4096xf16>{%dim_12462} to #hal.device.promise<@__device_1>
    %14712 = torch_c.from_builtin_tensor %14711 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14713 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12463 = arith.constant 1 : index
    %dim_12464 = tensor.dim %14713, %c1_12463 : tensor<4x?x4096xf16>
    %14714 = flow.tensor.transfer %14713 : tensor<4x?x4096xf16>{%dim_12464} to #hal.device.promise<@__device_1>
    %14715 = torch_c.from_builtin_tensor %14714 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14716 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12465 = arith.constant 1 : index
    %dim_12466 = tensor.dim %14716, %c1_12465 : tensor<4x?x4096xf16>
    %14717 = flow.tensor.transfer %14716 : tensor<4x?x4096xf16>{%dim_12466} to #hal.device.promise<@__device_1>
    %14718 = torch_c.from_builtin_tensor %14717 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14719 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12467 = arith.constant 1 : index
    %dim_12468 = tensor.dim %14719, %c1_12467 : tensor<4x?x4096xf16>
    %14720 = flow.tensor.transfer %14719 : tensor<4x?x4096xf16>{%dim_12468} to #hal.device.promise<@__device_1>
    %14721 = torch_c.from_builtin_tensor %14720 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14722 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12469 = arith.constant 1 : index
    %dim_12470 = tensor.dim %14722, %c1_12469 : tensor<4x?x4096xf16>
    %14723 = flow.tensor.transfer %14722 : tensor<4x?x4096xf16>{%dim_12470} to #hal.device.promise<@__device_1>
    %14724 = torch_c.from_builtin_tensor %14723 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14725 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12471 = arith.constant 1 : index
    %dim_12472 = tensor.dim %14725, %c1_12471 : tensor<4x?x4096xf16>
    %14726 = flow.tensor.transfer %14725 : tensor<4x?x4096xf16>{%dim_12472} to #hal.device.promise<@__device_1>
    %14727 = torch_c.from_builtin_tensor %14726 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12473 = torch.constant.int 1
    %14728 = torch.aten.add.Tensor %14709, %14642, %int1_12473 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12474 = torch.constant.int 1
    %14729 = torch.aten.add.Tensor %14728, %14712, %int1_12474 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12475 = torch.constant.int 1
    %14730 = torch.aten.add.Tensor %14729, %14715, %int1_12475 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12476 = torch.constant.int 1
    %14731 = torch.aten.add.Tensor %14730, %14718, %int1_12476 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12477 = torch.constant.int 1
    %14732 = torch.aten.add.Tensor %14731, %14721, %int1_12477 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12478 = torch.constant.int 1
    %14733 = torch.aten.add.Tensor %14732, %14724, %int1_12478 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12479 = torch.constant.int 1
    %14734 = torch.aten.add.Tensor %14733, %14727, %int1_12479 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14735 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12480 = arith.constant 1 : index
    %dim_12481 = tensor.dim %14735, %c1_12480 : tensor<4x?x4096xf16>
    %14736 = flow.tensor.transfer %14735 : tensor<4x?x4096xf16>{%dim_12481} to #hal.device.promise<@__device_2>
    %14737 = torch_c.from_builtin_tensor %14736 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14738 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12482 = arith.constant 1 : index
    %dim_12483 = tensor.dim %14738, %c1_12482 : tensor<4x?x4096xf16>
    %14739 = flow.tensor.transfer %14738 : tensor<4x?x4096xf16>{%dim_12483} to #hal.device.promise<@__device_2>
    %14740 = torch_c.from_builtin_tensor %14739 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14741 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12484 = arith.constant 1 : index
    %dim_12485 = tensor.dim %14741, %c1_12484 : tensor<4x?x4096xf16>
    %14742 = flow.tensor.transfer %14741 : tensor<4x?x4096xf16>{%dim_12485} to #hal.device.promise<@__device_2>
    %14743 = torch_c.from_builtin_tensor %14742 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14744 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12486 = arith.constant 1 : index
    %dim_12487 = tensor.dim %14744, %c1_12486 : tensor<4x?x4096xf16>
    %14745 = flow.tensor.transfer %14744 : tensor<4x?x4096xf16>{%dim_12487} to #hal.device.promise<@__device_2>
    %14746 = torch_c.from_builtin_tensor %14745 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14747 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12488 = arith.constant 1 : index
    %dim_12489 = tensor.dim %14747, %c1_12488 : tensor<4x?x4096xf16>
    %14748 = flow.tensor.transfer %14747 : tensor<4x?x4096xf16>{%dim_12489} to #hal.device.promise<@__device_2>
    %14749 = torch_c.from_builtin_tensor %14748 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14750 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12490 = arith.constant 1 : index
    %dim_12491 = tensor.dim %14750, %c1_12490 : tensor<4x?x4096xf16>
    %14751 = flow.tensor.transfer %14750 : tensor<4x?x4096xf16>{%dim_12491} to #hal.device.promise<@__device_2>
    %14752 = torch_c.from_builtin_tensor %14751 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14753 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12492 = arith.constant 1 : index
    %dim_12493 = tensor.dim %14753, %c1_12492 : tensor<4x?x4096xf16>
    %14754 = flow.tensor.transfer %14753 : tensor<4x?x4096xf16>{%dim_12493} to #hal.device.promise<@__device_2>
    %14755 = torch_c.from_builtin_tensor %14754 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12494 = torch.constant.int 1
    %14756 = torch.aten.add.Tensor %14737, %14740, %int1_12494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12495 = torch.constant.int 1
    %14757 = torch.aten.add.Tensor %14756, %14648, %int1_12495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12496 = torch.constant.int 1
    %14758 = torch.aten.add.Tensor %14757, %14743, %int1_12496 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12497 = torch.constant.int 1
    %14759 = torch.aten.add.Tensor %14758, %14746, %int1_12497 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12498 = torch.constant.int 1
    %14760 = torch.aten.add.Tensor %14759, %14749, %int1_12498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12499 = torch.constant.int 1
    %14761 = torch.aten.add.Tensor %14760, %14752, %int1_12499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12500 = torch.constant.int 1
    %14762 = torch.aten.add.Tensor %14761, %14755, %int1_12500 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14763 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12501 = arith.constant 1 : index
    %dim_12502 = tensor.dim %14763, %c1_12501 : tensor<4x?x4096xf16>
    %14764 = flow.tensor.transfer %14763 : tensor<4x?x4096xf16>{%dim_12502} to #hal.device.promise<@__device_3>
    %14765 = torch_c.from_builtin_tensor %14764 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14766 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12503 = arith.constant 1 : index
    %dim_12504 = tensor.dim %14766, %c1_12503 : tensor<4x?x4096xf16>
    %14767 = flow.tensor.transfer %14766 : tensor<4x?x4096xf16>{%dim_12504} to #hal.device.promise<@__device_3>
    %14768 = torch_c.from_builtin_tensor %14767 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14769 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12505 = arith.constant 1 : index
    %dim_12506 = tensor.dim %14769, %c1_12505 : tensor<4x?x4096xf16>
    %14770 = flow.tensor.transfer %14769 : tensor<4x?x4096xf16>{%dim_12506} to #hal.device.promise<@__device_3>
    %14771 = torch_c.from_builtin_tensor %14770 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14772 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12507 = arith.constant 1 : index
    %dim_12508 = tensor.dim %14772, %c1_12507 : tensor<4x?x4096xf16>
    %14773 = flow.tensor.transfer %14772 : tensor<4x?x4096xf16>{%dim_12508} to #hal.device.promise<@__device_3>
    %14774 = torch_c.from_builtin_tensor %14773 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14775 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12509 = arith.constant 1 : index
    %dim_12510 = tensor.dim %14775, %c1_12509 : tensor<4x?x4096xf16>
    %14776 = flow.tensor.transfer %14775 : tensor<4x?x4096xf16>{%dim_12510} to #hal.device.promise<@__device_3>
    %14777 = torch_c.from_builtin_tensor %14776 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14778 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12511 = arith.constant 1 : index
    %dim_12512 = tensor.dim %14778, %c1_12511 : tensor<4x?x4096xf16>
    %14779 = flow.tensor.transfer %14778 : tensor<4x?x4096xf16>{%dim_12512} to #hal.device.promise<@__device_3>
    %14780 = torch_c.from_builtin_tensor %14779 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14781 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12513 = arith.constant 1 : index
    %dim_12514 = tensor.dim %14781, %c1_12513 : tensor<4x?x4096xf16>
    %14782 = flow.tensor.transfer %14781 : tensor<4x?x4096xf16>{%dim_12514} to #hal.device.promise<@__device_3>
    %14783 = torch_c.from_builtin_tensor %14782 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12515 = torch.constant.int 1
    %14784 = torch.aten.add.Tensor %14765, %14768, %int1_12515 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12516 = torch.constant.int 1
    %14785 = torch.aten.add.Tensor %14784, %14771, %int1_12516 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12517 = torch.constant.int 1
    %14786 = torch.aten.add.Tensor %14785, %14654, %int1_12517 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12518 = torch.constant.int 1
    %14787 = torch.aten.add.Tensor %14786, %14774, %int1_12518 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12519 = torch.constant.int 1
    %14788 = torch.aten.add.Tensor %14787, %14777, %int1_12519 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12520 = torch.constant.int 1
    %14789 = torch.aten.add.Tensor %14788, %14780, %int1_12520 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12521 = torch.constant.int 1
    %14790 = torch.aten.add.Tensor %14789, %14783, %int1_12521 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14791 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12522 = arith.constant 1 : index
    %dim_12523 = tensor.dim %14791, %c1_12522 : tensor<4x?x4096xf16>
    %14792 = flow.tensor.transfer %14791 : tensor<4x?x4096xf16>{%dim_12523} to #hal.device.promise<@__device_4>
    %14793 = torch_c.from_builtin_tensor %14792 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14794 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12524 = arith.constant 1 : index
    %dim_12525 = tensor.dim %14794, %c1_12524 : tensor<4x?x4096xf16>
    %14795 = flow.tensor.transfer %14794 : tensor<4x?x4096xf16>{%dim_12525} to #hal.device.promise<@__device_4>
    %14796 = torch_c.from_builtin_tensor %14795 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14797 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12526 = arith.constant 1 : index
    %dim_12527 = tensor.dim %14797, %c1_12526 : tensor<4x?x4096xf16>
    %14798 = flow.tensor.transfer %14797 : tensor<4x?x4096xf16>{%dim_12527} to #hal.device.promise<@__device_4>
    %14799 = torch_c.from_builtin_tensor %14798 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14800 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12528 = arith.constant 1 : index
    %dim_12529 = tensor.dim %14800, %c1_12528 : tensor<4x?x4096xf16>
    %14801 = flow.tensor.transfer %14800 : tensor<4x?x4096xf16>{%dim_12529} to #hal.device.promise<@__device_4>
    %14802 = torch_c.from_builtin_tensor %14801 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14803 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12530 = arith.constant 1 : index
    %dim_12531 = tensor.dim %14803, %c1_12530 : tensor<4x?x4096xf16>
    %14804 = flow.tensor.transfer %14803 : tensor<4x?x4096xf16>{%dim_12531} to #hal.device.promise<@__device_4>
    %14805 = torch_c.from_builtin_tensor %14804 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14806 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12532 = arith.constant 1 : index
    %dim_12533 = tensor.dim %14806, %c1_12532 : tensor<4x?x4096xf16>
    %14807 = flow.tensor.transfer %14806 : tensor<4x?x4096xf16>{%dim_12533} to #hal.device.promise<@__device_4>
    %14808 = torch_c.from_builtin_tensor %14807 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14809 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12534 = arith.constant 1 : index
    %dim_12535 = tensor.dim %14809, %c1_12534 : tensor<4x?x4096xf16>
    %14810 = flow.tensor.transfer %14809 : tensor<4x?x4096xf16>{%dim_12535} to #hal.device.promise<@__device_4>
    %14811 = torch_c.from_builtin_tensor %14810 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12536 = torch.constant.int 1
    %14812 = torch.aten.add.Tensor %14793, %14796, %int1_12536 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12537 = torch.constant.int 1
    %14813 = torch.aten.add.Tensor %14812, %14799, %int1_12537 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12538 = torch.constant.int 1
    %14814 = torch.aten.add.Tensor %14813, %14802, %int1_12538 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12539 = torch.constant.int 1
    %14815 = torch.aten.add.Tensor %14814, %14660, %int1_12539 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12540 = torch.constant.int 1
    %14816 = torch.aten.add.Tensor %14815, %14805, %int1_12540 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12541 = torch.constant.int 1
    %14817 = torch.aten.add.Tensor %14816, %14808, %int1_12541 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12542 = torch.constant.int 1
    %14818 = torch.aten.add.Tensor %14817, %14811, %int1_12542 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14819 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12543 = arith.constant 1 : index
    %dim_12544 = tensor.dim %14819, %c1_12543 : tensor<4x?x4096xf16>
    %14820 = flow.tensor.transfer %14819 : tensor<4x?x4096xf16>{%dim_12544} to #hal.device.promise<@__device_5>
    %14821 = torch_c.from_builtin_tensor %14820 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14822 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12545 = arith.constant 1 : index
    %dim_12546 = tensor.dim %14822, %c1_12545 : tensor<4x?x4096xf16>
    %14823 = flow.tensor.transfer %14822 : tensor<4x?x4096xf16>{%dim_12546} to #hal.device.promise<@__device_5>
    %14824 = torch_c.from_builtin_tensor %14823 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14825 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12547 = arith.constant 1 : index
    %dim_12548 = tensor.dim %14825, %c1_12547 : tensor<4x?x4096xf16>
    %14826 = flow.tensor.transfer %14825 : tensor<4x?x4096xf16>{%dim_12548} to #hal.device.promise<@__device_5>
    %14827 = torch_c.from_builtin_tensor %14826 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14828 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12549 = arith.constant 1 : index
    %dim_12550 = tensor.dim %14828, %c1_12549 : tensor<4x?x4096xf16>
    %14829 = flow.tensor.transfer %14828 : tensor<4x?x4096xf16>{%dim_12550} to #hal.device.promise<@__device_5>
    %14830 = torch_c.from_builtin_tensor %14829 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14831 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12551 = arith.constant 1 : index
    %dim_12552 = tensor.dim %14831, %c1_12551 : tensor<4x?x4096xf16>
    %14832 = flow.tensor.transfer %14831 : tensor<4x?x4096xf16>{%dim_12552} to #hal.device.promise<@__device_5>
    %14833 = torch_c.from_builtin_tensor %14832 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14834 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12553 = arith.constant 1 : index
    %dim_12554 = tensor.dim %14834, %c1_12553 : tensor<4x?x4096xf16>
    %14835 = flow.tensor.transfer %14834 : tensor<4x?x4096xf16>{%dim_12554} to #hal.device.promise<@__device_5>
    %14836 = torch_c.from_builtin_tensor %14835 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14837 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12555 = arith.constant 1 : index
    %dim_12556 = tensor.dim %14837, %c1_12555 : tensor<4x?x4096xf16>
    %14838 = flow.tensor.transfer %14837 : tensor<4x?x4096xf16>{%dim_12556} to #hal.device.promise<@__device_5>
    %14839 = torch_c.from_builtin_tensor %14838 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12557 = torch.constant.int 1
    %14840 = torch.aten.add.Tensor %14821, %14824, %int1_12557 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12558 = torch.constant.int 1
    %14841 = torch.aten.add.Tensor %14840, %14827, %int1_12558 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12559 = torch.constant.int 1
    %14842 = torch.aten.add.Tensor %14841, %14830, %int1_12559 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12560 = torch.constant.int 1
    %14843 = torch.aten.add.Tensor %14842, %14833, %int1_12560 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12561 = torch.constant.int 1
    %14844 = torch.aten.add.Tensor %14843, %14666, %int1_12561 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12562 = torch.constant.int 1
    %14845 = torch.aten.add.Tensor %14844, %14836, %int1_12562 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12563 = torch.constant.int 1
    %14846 = torch.aten.add.Tensor %14845, %14839, %int1_12563 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14847 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12564 = arith.constant 1 : index
    %dim_12565 = tensor.dim %14847, %c1_12564 : tensor<4x?x4096xf16>
    %14848 = flow.tensor.transfer %14847 : tensor<4x?x4096xf16>{%dim_12565} to #hal.device.promise<@__device_6>
    %14849 = torch_c.from_builtin_tensor %14848 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14850 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12566 = arith.constant 1 : index
    %dim_12567 = tensor.dim %14850, %c1_12566 : tensor<4x?x4096xf16>
    %14851 = flow.tensor.transfer %14850 : tensor<4x?x4096xf16>{%dim_12567} to #hal.device.promise<@__device_6>
    %14852 = torch_c.from_builtin_tensor %14851 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14853 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12568 = arith.constant 1 : index
    %dim_12569 = tensor.dim %14853, %c1_12568 : tensor<4x?x4096xf16>
    %14854 = flow.tensor.transfer %14853 : tensor<4x?x4096xf16>{%dim_12569} to #hal.device.promise<@__device_6>
    %14855 = torch_c.from_builtin_tensor %14854 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14856 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12570 = arith.constant 1 : index
    %dim_12571 = tensor.dim %14856, %c1_12570 : tensor<4x?x4096xf16>
    %14857 = flow.tensor.transfer %14856 : tensor<4x?x4096xf16>{%dim_12571} to #hal.device.promise<@__device_6>
    %14858 = torch_c.from_builtin_tensor %14857 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14859 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12572 = arith.constant 1 : index
    %dim_12573 = tensor.dim %14859, %c1_12572 : tensor<4x?x4096xf16>
    %14860 = flow.tensor.transfer %14859 : tensor<4x?x4096xf16>{%dim_12573} to #hal.device.promise<@__device_6>
    %14861 = torch_c.from_builtin_tensor %14860 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14862 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12574 = arith.constant 1 : index
    %dim_12575 = tensor.dim %14862, %c1_12574 : tensor<4x?x4096xf16>
    %14863 = flow.tensor.transfer %14862 : tensor<4x?x4096xf16>{%dim_12575} to #hal.device.promise<@__device_6>
    %14864 = torch_c.from_builtin_tensor %14863 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14865 = torch_c.to_builtin_tensor %14678 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12576 = arith.constant 1 : index
    %dim_12577 = tensor.dim %14865, %c1_12576 : tensor<4x?x4096xf16>
    %14866 = flow.tensor.transfer %14865 : tensor<4x?x4096xf16>{%dim_12577} to #hal.device.promise<@__device_6>
    %14867 = torch_c.from_builtin_tensor %14866 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12578 = torch.constant.int 1
    %14868 = torch.aten.add.Tensor %14849, %14852, %int1_12578 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12579 = torch.constant.int 1
    %14869 = torch.aten.add.Tensor %14868, %14855, %int1_12579 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12580 = torch.constant.int 1
    %14870 = torch.aten.add.Tensor %14869, %14858, %int1_12580 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12581 = torch.constant.int 1
    %14871 = torch.aten.add.Tensor %14870, %14861, %int1_12581 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12582 = torch.constant.int 1
    %14872 = torch.aten.add.Tensor %14871, %14864, %int1_12582 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12583 = torch.constant.int 1
    %14873 = torch.aten.add.Tensor %14872, %14672, %int1_12583 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12584 = torch.constant.int 1
    %14874 = torch.aten.add.Tensor %14873, %14867, %int1_12584 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14875 = torch_c.to_builtin_tensor %14636 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12585 = arith.constant 1 : index
    %dim_12586 = tensor.dim %14875, %c1_12585 : tensor<4x?x4096xf16>
    %14876 = flow.tensor.transfer %14875 : tensor<4x?x4096xf16>{%dim_12586} to #hal.device.promise<@__device_7>
    %14877 = torch_c.from_builtin_tensor %14876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14878 = torch_c.to_builtin_tensor %14642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12587 = arith.constant 1 : index
    %dim_12588 = tensor.dim %14878, %c1_12587 : tensor<4x?x4096xf16>
    %14879 = flow.tensor.transfer %14878 : tensor<4x?x4096xf16>{%dim_12588} to #hal.device.promise<@__device_7>
    %14880 = torch_c.from_builtin_tensor %14879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14881 = torch_c.to_builtin_tensor %14648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12589 = arith.constant 1 : index
    %dim_12590 = tensor.dim %14881, %c1_12589 : tensor<4x?x4096xf16>
    %14882 = flow.tensor.transfer %14881 : tensor<4x?x4096xf16>{%dim_12590} to #hal.device.promise<@__device_7>
    %14883 = torch_c.from_builtin_tensor %14882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14884 = torch_c.to_builtin_tensor %14654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12591 = arith.constant 1 : index
    %dim_12592 = tensor.dim %14884, %c1_12591 : tensor<4x?x4096xf16>
    %14885 = flow.tensor.transfer %14884 : tensor<4x?x4096xf16>{%dim_12592} to #hal.device.promise<@__device_7>
    %14886 = torch_c.from_builtin_tensor %14885 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14887 = torch_c.to_builtin_tensor %14660 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12593 = arith.constant 1 : index
    %dim_12594 = tensor.dim %14887, %c1_12593 : tensor<4x?x4096xf16>
    %14888 = flow.tensor.transfer %14887 : tensor<4x?x4096xf16>{%dim_12594} to #hal.device.promise<@__device_7>
    %14889 = torch_c.from_builtin_tensor %14888 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14890 = torch_c.to_builtin_tensor %14666 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12595 = arith.constant 1 : index
    %dim_12596 = tensor.dim %14890, %c1_12595 : tensor<4x?x4096xf16>
    %14891 = flow.tensor.transfer %14890 : tensor<4x?x4096xf16>{%dim_12596} to #hal.device.promise<@__device_7>
    %14892 = torch_c.from_builtin_tensor %14891 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %14893 = torch_c.to_builtin_tensor %14672 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12597 = arith.constant 1 : index
    %dim_12598 = tensor.dim %14893, %c1_12597 : tensor<4x?x4096xf16>
    %14894 = flow.tensor.transfer %14893 : tensor<4x?x4096xf16>{%dim_12598} to #hal.device.promise<@__device_7>
    %14895 = torch_c.from_builtin_tensor %14894 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12599 = torch.constant.int 1
    %14896 = torch.aten.add.Tensor %14877, %14880, %int1_12599 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12600 = torch.constant.int 1
    %14897 = torch.aten.add.Tensor %14896, %14883, %int1_12600 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12601 = torch.constant.int 1
    %14898 = torch.aten.add.Tensor %14897, %14886, %int1_12601 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12602 = torch.constant.int 1
    %14899 = torch.aten.add.Tensor %14898, %14889, %int1_12602 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12603 = torch.constant.int 1
    %14900 = torch.aten.add.Tensor %14899, %14892, %int1_12603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12604 = torch.constant.int 1
    %14901 = torch.aten.add.Tensor %14900, %14895, %int1_12604 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12605 = torch.constant.int 1
    %14902 = torch.aten.add.Tensor %14901, %14678, %int1_12605 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12606 = torch.constant.int 1
    %14903 = torch.aten.add.Tensor %13562, %14706, %int1_12606 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12607 = torch.constant.int 1
    %14904 = torch.aten.add.Tensor %13563, %14734, %int1_12607 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12608 = torch.constant.int 1
    %14905 = torch.aten.add.Tensor %13564, %14762, %int1_12608 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12609 = torch.constant.int 1
    %14906 = torch.aten.add.Tensor %13565, %14790, %int1_12609 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12610 = torch.constant.int 1
    %14907 = torch.aten.add.Tensor %13566, %14818, %int1_12610 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12611 = torch.constant.int 1
    %14908 = torch.aten.add.Tensor %13567, %14846, %int1_12611 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12612 = torch.constant.int 1
    %14909 = torch.aten.add.Tensor %13568, %14874, %int1_12612 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12613 = torch.constant.int 1
    %14910 = torch.aten.add.Tensor %13569, %14902, %int1_12613 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_12614 = torch.constant.int 6
    %14911 = torch.prims.convert_element_type %14903, %int6_12614 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12615 = torch.constant.int 6
    %14912 = torch.prims.convert_element_type %14904, %int6_12615 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12616 = torch.constant.int 6
    %14913 = torch.prims.convert_element_type %14905, %int6_12616 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12617 = torch.constant.int 6
    %14914 = torch.prims.convert_element_type %14906, %int6_12617 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12618 = torch.constant.int 6
    %14915 = torch.prims.convert_element_type %14907, %int6_12618 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12619 = torch.constant.int 6
    %14916 = torch.prims.convert_element_type %14908, %int6_12619 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12620 = torch.constant.int 6
    %14917 = torch.prims.convert_element_type %14909, %int6_12620 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_12621 = torch.constant.int 6
    %14918 = torch.prims.convert_element_type %14910, %int6_12621 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12622 = torch.constant.int 2
    %14919 = torch.aten.pow.Tensor_Scalar %14911, %int2_12622 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12623 = torch.constant.int 2
    %14920 = torch.aten.pow.Tensor_Scalar %14912, %int2_12623 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12624 = torch.constant.int 2
    %14921 = torch.aten.pow.Tensor_Scalar %14913, %int2_12624 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12625 = torch.constant.int 2
    %14922 = torch.aten.pow.Tensor_Scalar %14914, %int2_12625 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12626 = torch.constant.int 2
    %14923 = torch.aten.pow.Tensor_Scalar %14915, %int2_12626 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12627 = torch.constant.int 2
    %14924 = torch.aten.pow.Tensor_Scalar %14916, %int2_12627 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12628 = torch.constant.int 2
    %14925 = torch.aten.pow.Tensor_Scalar %14917, %int2_12628 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_12629 = torch.constant.int 2
    %14926 = torch.aten.pow.Tensor_Scalar %14918, %int2_12629 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_12630 = torch.constant.int -1
    %14927 = torch.prim.ListConstruct %int-1_12630 : (!torch.int) -> !torch.list<int>
    %true_12631 = torch.constant.bool true
    %none_12632 = torch.constant.none
    %14928 = torch.aten.mean.dim %14919, %14927, %true_12631, %none_12632 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12633 = torch.constant.int -1
    %14929 = torch.prim.ListConstruct %int-1_12633 : (!torch.int) -> !torch.list<int>
    %true_12634 = torch.constant.bool true
    %none_12635 = torch.constant.none
    %14930 = torch.aten.mean.dim %14920, %14929, %true_12634, %none_12635 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12636 = torch.constant.int -1
    %14931 = torch.prim.ListConstruct %int-1_12636 : (!torch.int) -> !torch.list<int>
    %true_12637 = torch.constant.bool true
    %none_12638 = torch.constant.none
    %14932 = torch.aten.mean.dim %14921, %14931, %true_12637, %none_12638 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12639 = torch.constant.int -1
    %14933 = torch.prim.ListConstruct %int-1_12639 : (!torch.int) -> !torch.list<int>
    %true_12640 = torch.constant.bool true
    %none_12641 = torch.constant.none
    %14934 = torch.aten.mean.dim %14922, %14933, %true_12640, %none_12641 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12642 = torch.constant.int -1
    %14935 = torch.prim.ListConstruct %int-1_12642 : (!torch.int) -> !torch.list<int>
    %true_12643 = torch.constant.bool true
    %none_12644 = torch.constant.none
    %14936 = torch.aten.mean.dim %14923, %14935, %true_12643, %none_12644 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12645 = torch.constant.int -1
    %14937 = torch.prim.ListConstruct %int-1_12645 : (!torch.int) -> !torch.list<int>
    %true_12646 = torch.constant.bool true
    %none_12647 = torch.constant.none
    %14938 = torch.aten.mean.dim %14924, %14937, %true_12646, %none_12647 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12648 = torch.constant.int -1
    %14939 = torch.prim.ListConstruct %int-1_12648 : (!torch.int) -> !torch.list<int>
    %true_12649 = torch.constant.bool true
    %none_12650 = torch.constant.none
    %14940 = torch.aten.mean.dim %14925, %14939, %true_12649, %none_12650 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_12651 = torch.constant.int -1
    %14941 = torch.prim.ListConstruct %int-1_12651 : (!torch.int) -> !torch.list<int>
    %true_12652 = torch.constant.bool true
    %none_12653 = torch.constant.none
    %14942 = torch.aten.mean.dim %14926, %14941, %true_12652, %none_12653 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12654 = torch.constant.float 9.9999997473787516E-6
    %int1_12655 = torch.constant.int 1
    %14943 = torch.aten.add.Scalar %14928, %float9.999990e-06_12654, %int1_12655 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12656 = torch.constant.float 9.9999997473787516E-6
    %int1_12657 = torch.constant.int 1
    %14944 = torch.aten.add.Scalar %14930, %float9.999990e-06_12656, %int1_12657 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12658 = torch.constant.float 9.9999997473787516E-6
    %int1_12659 = torch.constant.int 1
    %14945 = torch.aten.add.Scalar %14932, %float9.999990e-06_12658, %int1_12659 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12660 = torch.constant.float 9.9999997473787516E-6
    %int1_12661 = torch.constant.int 1
    %14946 = torch.aten.add.Scalar %14934, %float9.999990e-06_12660, %int1_12661 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12662 = torch.constant.float 9.9999997473787516E-6
    %int1_12663 = torch.constant.int 1
    %14947 = torch.aten.add.Scalar %14936, %float9.999990e-06_12662, %int1_12663 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12664 = torch.constant.float 9.9999997473787516E-6
    %int1_12665 = torch.constant.int 1
    %14948 = torch.aten.add.Scalar %14938, %float9.999990e-06_12664, %int1_12665 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12666 = torch.constant.float 9.9999997473787516E-6
    %int1_12667 = torch.constant.int 1
    %14949 = torch.aten.add.Scalar %14940, %float9.999990e-06_12666, %int1_12667 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_12668 = torch.constant.float 9.9999997473787516E-6
    %int1_12669 = torch.constant.int 1
    %14950 = torch.aten.add.Scalar %14942, %float9.999990e-06_12668, %int1_12669 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14951 = torch.aten.rsqrt %14943 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14952 = torch.aten.rsqrt %14944 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14953 = torch.aten.rsqrt %14945 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14954 = torch.aten.rsqrt %14946 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14955 = torch.aten.rsqrt %14947 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14956 = torch.aten.rsqrt %14948 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14957 = torch.aten.rsqrt %14949 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14958 = torch.aten.rsqrt %14950 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %14958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %14959 = torch.aten.mul.Tensor %14911, %14951 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14960 = torch.aten.mul.Tensor %14912, %14952 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14961 = torch.aten.mul.Tensor %14913, %14953 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14962 = torch.aten.mul.Tensor %14914, %14954 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14963 = torch.aten.mul.Tensor %14915, %14955 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14964 = torch.aten.mul.Tensor %14916, %14956 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14965 = torch.aten.mul.Tensor %14917, %14957 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14966 = torch.aten.mul.Tensor %14918, %14958 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14967 = torch.aten.mul.Tensor %480, %14959 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14968 = torch.aten.mul.Tensor %481, %14960 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14969 = torch.aten.mul.Tensor %482, %14961 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14970 = torch.aten.mul.Tensor %483, %14962 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14971 = torch.aten.mul.Tensor %484, %14963 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14972 = torch.aten.mul.Tensor %485, %14964 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14973 = torch.aten.mul.Tensor %486, %14965 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %14974 = torch.aten.mul.Tensor %487, %14966 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %14974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_12670 = torch.constant.int 5
    %14975 = torch.prims.convert_element_type %14967, %int5_12670 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12671 = torch.constant.int 5
    %14976 = torch.prims.convert_element_type %14968, %int5_12671 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12672 = torch.constant.int 5
    %14977 = torch.prims.convert_element_type %14969, %int5_12672 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12673 = torch.constant.int 5
    %14978 = torch.prims.convert_element_type %14970, %int5_12673 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12674 = torch.constant.int 5
    %14979 = torch.prims.convert_element_type %14971, %int5_12674 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12675 = torch.constant.int 5
    %14980 = torch.prims.convert_element_type %14972, %int5_12675 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12676 = torch.constant.int 5
    %14981 = torch.prims.convert_element_type %14973, %int5_12676 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_12677 = torch.constant.int 5
    %14982 = torch.prims.convert_element_type %14974, %int5_12677 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %14982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12678 = torch.constant.int 1
    %int0_12679 = torch.constant.int 0
    %14983 = torch.prim.ListConstruct %int1_12678, %int0_12679 : (!torch.int, !torch.int) -> !torch.list<int>
    %14984 = torch.aten.permute %488, %14983 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12680 = torch.constant.int 1
    %int0_12681 = torch.constant.int 0
    %14985 = torch.prim.ListConstruct %int1_12680, %int0_12681 : (!torch.int, !torch.int) -> !torch.list<int>
    %14986 = torch.aten.permute %489, %14985 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12682 = torch.constant.int 1
    %int0_12683 = torch.constant.int 0
    %14987 = torch.prim.ListConstruct %int1_12682, %int0_12683 : (!torch.int, !torch.int) -> !torch.list<int>
    %14988 = torch.aten.permute %490, %14987 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12684 = torch.constant.int 1
    %int0_12685 = torch.constant.int 0
    %14989 = torch.prim.ListConstruct %int1_12684, %int0_12685 : (!torch.int, !torch.int) -> !torch.list<int>
    %14990 = torch.aten.permute %491, %14989 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12686 = torch.constant.int 1
    %int0_12687 = torch.constant.int 0
    %14991 = torch.prim.ListConstruct %int1_12686, %int0_12687 : (!torch.int, !torch.int) -> !torch.list<int>
    %14992 = torch.aten.permute %492, %14991 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12688 = torch.constant.int 1
    %int0_12689 = torch.constant.int 0
    %14993 = torch.prim.ListConstruct %int1_12688, %int0_12689 : (!torch.int, !torch.int) -> !torch.list<int>
    %14994 = torch.aten.permute %493, %14993 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12690 = torch.constant.int 1
    %int0_12691 = torch.constant.int 0
    %14995 = torch.prim.ListConstruct %int1_12690, %int0_12691 : (!torch.int, !torch.int) -> !torch.list<int>
    %14996 = torch.aten.permute %494, %14995 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12692 = torch.constant.int 1
    %int0_12693 = torch.constant.int 0
    %14997 = torch.prim.ListConstruct %int1_12692, %int0_12693 : (!torch.int, !torch.int) -> !torch.list<int>
    %14998 = torch.aten.permute %495, %14997 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_12694 = torch.constant.int 4
    %14999 = torch.aten.mul.int %int4_12694, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12695 = torch.constant.int 4096
    %15000 = torch.prim.ListConstruct %14999, %int4096_12695 : (!torch.int, !torch.int) -> !torch.list<int>
    %15001 = torch.aten.view %14975, %15000 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15001, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15002 = torch.aten.mm %15001, %14984 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15002, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12696 = torch.constant.int 4
    %int1792_12697 = torch.constant.int 1792
    %15003 = torch.prim.ListConstruct %int4_12696, %2482, %int1792_12697 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15004 = torch.aten.view %15002, %15003 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12698 = torch.constant.int 4
    %15005 = torch.aten.mul.int %int4_12698, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12699 = torch.constant.int 4096
    %15006 = torch.prim.ListConstruct %15005, %int4096_12699 : (!torch.int, !torch.int) -> !torch.list<int>
    %15007 = torch.aten.view %14976, %15006 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15007, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15008 = torch.aten.mm %15007, %14986 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15008, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12700 = torch.constant.int 4
    %int1792_12701 = torch.constant.int 1792
    %15009 = torch.prim.ListConstruct %int4_12700, %2482, %int1792_12701 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15010 = torch.aten.view %15008, %15009 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12702 = torch.constant.int 4
    %15011 = torch.aten.mul.int %int4_12702, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12703 = torch.constant.int 4096
    %15012 = torch.prim.ListConstruct %15011, %int4096_12703 : (!torch.int, !torch.int) -> !torch.list<int>
    %15013 = torch.aten.view %14977, %15012 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15013, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15014 = torch.aten.mm %15013, %14988 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15014, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12704 = torch.constant.int 4
    %int1792_12705 = torch.constant.int 1792
    %15015 = torch.prim.ListConstruct %int4_12704, %2482, %int1792_12705 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15016 = torch.aten.view %15014, %15015 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12706 = torch.constant.int 4
    %15017 = torch.aten.mul.int %int4_12706, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12707 = torch.constant.int 4096
    %15018 = torch.prim.ListConstruct %15017, %int4096_12707 : (!torch.int, !torch.int) -> !torch.list<int>
    %15019 = torch.aten.view %14978, %15018 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15019, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15020 = torch.aten.mm %15019, %14990 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15020, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12708 = torch.constant.int 4
    %int1792_12709 = torch.constant.int 1792
    %15021 = torch.prim.ListConstruct %int4_12708, %2482, %int1792_12709 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15022 = torch.aten.view %15020, %15021 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12710 = torch.constant.int 4
    %15023 = torch.aten.mul.int %int4_12710, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12711 = torch.constant.int 4096
    %15024 = torch.prim.ListConstruct %15023, %int4096_12711 : (!torch.int, !torch.int) -> !torch.list<int>
    %15025 = torch.aten.view %14979, %15024 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15025, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15026 = torch.aten.mm %15025, %14992 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15026, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12712 = torch.constant.int 4
    %int1792_12713 = torch.constant.int 1792
    %15027 = torch.prim.ListConstruct %int4_12712, %2482, %int1792_12713 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15028 = torch.aten.view %15026, %15027 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12714 = torch.constant.int 4
    %15029 = torch.aten.mul.int %int4_12714, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12715 = torch.constant.int 4096
    %15030 = torch.prim.ListConstruct %15029, %int4096_12715 : (!torch.int, !torch.int) -> !torch.list<int>
    %15031 = torch.aten.view %14980, %15030 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15031, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15032 = torch.aten.mm %15031, %14994 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15032, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12716 = torch.constant.int 4
    %int1792_12717 = torch.constant.int 1792
    %15033 = torch.prim.ListConstruct %int4_12716, %2482, %int1792_12717 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15034 = torch.aten.view %15032, %15033 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12718 = torch.constant.int 4
    %15035 = torch.aten.mul.int %int4_12718, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12719 = torch.constant.int 4096
    %15036 = torch.prim.ListConstruct %15035, %int4096_12719 : (!torch.int, !torch.int) -> !torch.list<int>
    %15037 = torch.aten.view %14981, %15036 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15037, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15038 = torch.aten.mm %15037, %14996 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15038, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12720 = torch.constant.int 4
    %int1792_12721 = torch.constant.int 1792
    %15039 = torch.prim.ListConstruct %int4_12720, %2482, %int1792_12721 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15040 = torch.aten.view %15038, %15039 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12722 = torch.constant.int 4
    %15041 = torch.aten.mul.int %int4_12722, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12723 = torch.constant.int 4096
    %15042 = torch.prim.ListConstruct %15041, %int4096_12723 : (!torch.int, !torch.int) -> !torch.list<int>
    %15043 = torch.aten.view %14982, %15042 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15043, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15044 = torch.aten.mm %15043, %14998 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15044, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12724 = torch.constant.int 4
    %int1792_12725 = torch.constant.int 1792
    %15045 = torch.prim.ListConstruct %int4_12724, %2482, %int1792_12725 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15046 = torch.aten.view %15044, %15045 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15047 = torch.aten.silu %15004 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15048 = torch.aten.silu %15010 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15049 = torch.aten.silu %15016 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15050 = torch.aten.silu %15022 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15051 = torch.aten.silu %15028 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15052 = torch.aten.silu %15034 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15053 = torch.aten.silu %15040 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15054 = torch.aten.silu %15046 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_12726 = torch.constant.int 1
    %int0_12727 = torch.constant.int 0
    %15055 = torch.prim.ListConstruct %int1_12726, %int0_12727 : (!torch.int, !torch.int) -> !torch.list<int>
    %15056 = torch.aten.permute %496, %15055 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12728 = torch.constant.int 1
    %int0_12729 = torch.constant.int 0
    %15057 = torch.prim.ListConstruct %int1_12728, %int0_12729 : (!torch.int, !torch.int) -> !torch.list<int>
    %15058 = torch.aten.permute %497, %15057 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12730 = torch.constant.int 1
    %int0_12731 = torch.constant.int 0
    %15059 = torch.prim.ListConstruct %int1_12730, %int0_12731 : (!torch.int, !torch.int) -> !torch.list<int>
    %15060 = torch.aten.permute %498, %15059 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12732 = torch.constant.int 1
    %int0_12733 = torch.constant.int 0
    %15061 = torch.prim.ListConstruct %int1_12732, %int0_12733 : (!torch.int, !torch.int) -> !torch.list<int>
    %15062 = torch.aten.permute %499, %15061 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12734 = torch.constant.int 1
    %int0_12735 = torch.constant.int 0
    %15063 = torch.prim.ListConstruct %int1_12734, %int0_12735 : (!torch.int, !torch.int) -> !torch.list<int>
    %15064 = torch.aten.permute %500, %15063 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12736 = torch.constant.int 1
    %int0_12737 = torch.constant.int 0
    %15065 = torch.prim.ListConstruct %int1_12736, %int0_12737 : (!torch.int, !torch.int) -> !torch.list<int>
    %15066 = torch.aten.permute %501, %15065 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12738 = torch.constant.int 1
    %int0_12739 = torch.constant.int 0
    %15067 = torch.prim.ListConstruct %int1_12738, %int0_12739 : (!torch.int, !torch.int) -> !torch.list<int>
    %15068 = torch.aten.permute %502, %15067 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_12740 = torch.constant.int 1
    %int0_12741 = torch.constant.int 0
    %15069 = torch.prim.ListConstruct %int1_12740, %int0_12741 : (!torch.int, !torch.int) -> !torch.list<int>
    %15070 = torch.aten.permute %503, %15069 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_12742 = torch.constant.int 4
    %15071 = torch.aten.mul.int %int4_12742, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12743 = torch.constant.int 4096
    %15072 = torch.prim.ListConstruct %15071, %int4096_12743 : (!torch.int, !torch.int) -> !torch.list<int>
    %15073 = torch.aten.view %14975, %15072 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15073, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15074 = torch.aten.mm %15073, %15056 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15074, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12744 = torch.constant.int 4
    %int1792_12745 = torch.constant.int 1792
    %15075 = torch.prim.ListConstruct %int4_12744, %2482, %int1792_12745 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15076 = torch.aten.view %15074, %15075 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12746 = torch.constant.int 4
    %15077 = torch.aten.mul.int %int4_12746, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12747 = torch.constant.int 4096
    %15078 = torch.prim.ListConstruct %15077, %int4096_12747 : (!torch.int, !torch.int) -> !torch.list<int>
    %15079 = torch.aten.view %14976, %15078 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15079, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15080 = torch.aten.mm %15079, %15058 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15080, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12748 = torch.constant.int 4
    %int1792_12749 = torch.constant.int 1792
    %15081 = torch.prim.ListConstruct %int4_12748, %2482, %int1792_12749 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15082 = torch.aten.view %15080, %15081 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12750 = torch.constant.int 4
    %15083 = torch.aten.mul.int %int4_12750, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12751 = torch.constant.int 4096
    %15084 = torch.prim.ListConstruct %15083, %int4096_12751 : (!torch.int, !torch.int) -> !torch.list<int>
    %15085 = torch.aten.view %14977, %15084 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15085, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15086 = torch.aten.mm %15085, %15060 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15086, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12752 = torch.constant.int 4
    %int1792_12753 = torch.constant.int 1792
    %15087 = torch.prim.ListConstruct %int4_12752, %2482, %int1792_12753 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15088 = torch.aten.view %15086, %15087 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12754 = torch.constant.int 4
    %15089 = torch.aten.mul.int %int4_12754, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12755 = torch.constant.int 4096
    %15090 = torch.prim.ListConstruct %15089, %int4096_12755 : (!torch.int, !torch.int) -> !torch.list<int>
    %15091 = torch.aten.view %14978, %15090 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15091, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15092 = torch.aten.mm %15091, %15062 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15092, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12756 = torch.constant.int 4
    %int1792_12757 = torch.constant.int 1792
    %15093 = torch.prim.ListConstruct %int4_12756, %2482, %int1792_12757 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15094 = torch.aten.view %15092, %15093 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12758 = torch.constant.int 4
    %15095 = torch.aten.mul.int %int4_12758, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12759 = torch.constant.int 4096
    %15096 = torch.prim.ListConstruct %15095, %int4096_12759 : (!torch.int, !torch.int) -> !torch.list<int>
    %15097 = torch.aten.view %14979, %15096 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15097, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15098 = torch.aten.mm %15097, %15064 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15098, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12760 = torch.constant.int 4
    %int1792_12761 = torch.constant.int 1792
    %15099 = torch.prim.ListConstruct %int4_12760, %2482, %int1792_12761 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15100 = torch.aten.view %15098, %15099 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12762 = torch.constant.int 4
    %15101 = torch.aten.mul.int %int4_12762, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12763 = torch.constant.int 4096
    %15102 = torch.prim.ListConstruct %15101, %int4096_12763 : (!torch.int, !torch.int) -> !torch.list<int>
    %15103 = torch.aten.view %14980, %15102 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15103, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15104 = torch.aten.mm %15103, %15066 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15104, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12764 = torch.constant.int 4
    %int1792_12765 = torch.constant.int 1792
    %15105 = torch.prim.ListConstruct %int4_12764, %2482, %int1792_12765 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15106 = torch.aten.view %15104, %15105 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12766 = torch.constant.int 4
    %15107 = torch.aten.mul.int %int4_12766, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12767 = torch.constant.int 4096
    %15108 = torch.prim.ListConstruct %15107, %int4096_12767 : (!torch.int, !torch.int) -> !torch.list<int>
    %15109 = torch.aten.view %14981, %15108 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15109, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15110 = torch.aten.mm %15109, %15068 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15110, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12768 = torch.constant.int 4
    %int1792_12769 = torch.constant.int 1792
    %15111 = torch.prim.ListConstruct %int4_12768, %2482, %int1792_12769 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15112 = torch.aten.view %15110, %15111 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_12770 = torch.constant.int 4
    %15113 = torch.aten.mul.int %int4_12770, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_12771 = torch.constant.int 4096
    %15114 = torch.prim.ListConstruct %15113, %int4096_12771 : (!torch.int, !torch.int) -> !torch.list<int>
    %15115 = torch.aten.view %14982, %15114 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15115, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15116 = torch.aten.mm %15115, %15070 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15116, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_12772 = torch.constant.int 4
    %int1792_12773 = torch.constant.int 1792
    %15117 = torch.prim.ListConstruct %int4_12772, %2482, %int1792_12773 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15118 = torch.aten.view %15116, %15117 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15119 = torch.aten.mul.Tensor %15047, %15076 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15120 = torch.aten.mul.Tensor %15048, %15082 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15121 = torch.aten.mul.Tensor %15049, %15088 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15122 = torch.aten.mul.Tensor %15050, %15094 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15123 = torch.aten.mul.Tensor %15051, %15100 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15124 = torch.aten.mul.Tensor %15052, %15106 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15125 = torch.aten.mul.Tensor %15053, %15112 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %15126 = torch.aten.mul.Tensor %15054, %15118 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %15126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_12774 = torch.constant.int 1
    %int0_12775 = torch.constant.int 0
    %15127 = torch.prim.ListConstruct %int1_12774, %int0_12775 : (!torch.int, !torch.int) -> !torch.list<int>
    %15128 = torch.aten.permute %504, %15127 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12776 = torch.constant.int 1
    %int0_12777 = torch.constant.int 0
    %15129 = torch.prim.ListConstruct %int1_12776, %int0_12777 : (!torch.int, !torch.int) -> !torch.list<int>
    %15130 = torch.aten.permute %505, %15129 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12778 = torch.constant.int 1
    %int0_12779 = torch.constant.int 0
    %15131 = torch.prim.ListConstruct %int1_12778, %int0_12779 : (!torch.int, !torch.int) -> !torch.list<int>
    %15132 = torch.aten.permute %506, %15131 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12780 = torch.constant.int 1
    %int0_12781 = torch.constant.int 0
    %15133 = torch.prim.ListConstruct %int1_12780, %int0_12781 : (!torch.int, !torch.int) -> !torch.list<int>
    %15134 = torch.aten.permute %507, %15133 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12782 = torch.constant.int 1
    %int0_12783 = torch.constant.int 0
    %15135 = torch.prim.ListConstruct %int1_12782, %int0_12783 : (!torch.int, !torch.int) -> !torch.list<int>
    %15136 = torch.aten.permute %508, %15135 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12784 = torch.constant.int 1
    %int0_12785 = torch.constant.int 0
    %15137 = torch.prim.ListConstruct %int1_12784, %int0_12785 : (!torch.int, !torch.int) -> !torch.list<int>
    %15138 = torch.aten.permute %509, %15137 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12786 = torch.constant.int 1
    %int0_12787 = torch.constant.int 0
    %15139 = torch.prim.ListConstruct %int1_12786, %int0_12787 : (!torch.int, !torch.int) -> !torch.list<int>
    %15140 = torch.aten.permute %510, %15139 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12788 = torch.constant.int 1
    %int0_12789 = torch.constant.int 0
    %15141 = torch.prim.ListConstruct %int1_12788, %int0_12789 : (!torch.int, !torch.int) -> !torch.list<int>
    %15142 = torch.aten.permute %511, %15141 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_12790 = torch.constant.int 1
    %15143 = torch.aten.size.int %15004, %int1_12790 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12791 = torch.constant.int 4
    %15144 = torch.aten.mul.int %int4_12791, %15143 : !torch.int, !torch.int -> !torch.int
    %int1792_12792 = torch.constant.int 1792
    %15145 = torch.prim.ListConstruct %15144, %int1792_12792 : (!torch.int, !torch.int) -> !torch.list<int>
    %15146 = torch.aten.view %15119, %15145 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15146, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15147 = torch.aten.mm %15146, %15128 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15147, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12793 = torch.constant.int 4
    %int4096_12794 = torch.constant.int 4096
    %15148 = torch.prim.ListConstruct %int4_12793, %15143, %int4096_12794 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15149 = torch.aten.view %15147, %15148 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12795 = torch.constant.int 1
    %15150 = torch.aten.size.int %15010, %int1_12795 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12796 = torch.constant.int 4
    %15151 = torch.aten.mul.int %int4_12796, %15150 : !torch.int, !torch.int -> !torch.int
    %int1792_12797 = torch.constant.int 1792
    %15152 = torch.prim.ListConstruct %15151, %int1792_12797 : (!torch.int, !torch.int) -> !torch.list<int>
    %15153 = torch.aten.view %15120, %15152 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15153, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15154 = torch.aten.mm %15153, %15130 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15154, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12798 = torch.constant.int 4
    %int4096_12799 = torch.constant.int 4096
    %15155 = torch.prim.ListConstruct %int4_12798, %15150, %int4096_12799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15156 = torch.aten.view %15154, %15155 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12800 = torch.constant.int 1
    %15157 = torch.aten.size.int %15016, %int1_12800 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12801 = torch.constant.int 4
    %15158 = torch.aten.mul.int %int4_12801, %15157 : !torch.int, !torch.int -> !torch.int
    %int1792_12802 = torch.constant.int 1792
    %15159 = torch.prim.ListConstruct %15158, %int1792_12802 : (!torch.int, !torch.int) -> !torch.list<int>
    %15160 = torch.aten.view %15121, %15159 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15160, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15161 = torch.aten.mm %15160, %15132 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15161, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12803 = torch.constant.int 4
    %int4096_12804 = torch.constant.int 4096
    %15162 = torch.prim.ListConstruct %int4_12803, %15157, %int4096_12804 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15163 = torch.aten.view %15161, %15162 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12805 = torch.constant.int 1
    %15164 = torch.aten.size.int %15022, %int1_12805 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12806 = torch.constant.int 4
    %15165 = torch.aten.mul.int %int4_12806, %15164 : !torch.int, !torch.int -> !torch.int
    %int1792_12807 = torch.constant.int 1792
    %15166 = torch.prim.ListConstruct %15165, %int1792_12807 : (!torch.int, !torch.int) -> !torch.list<int>
    %15167 = torch.aten.view %15122, %15166 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15167, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15168 = torch.aten.mm %15167, %15134 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15168, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12808 = torch.constant.int 4
    %int4096_12809 = torch.constant.int 4096
    %15169 = torch.prim.ListConstruct %int4_12808, %15164, %int4096_12809 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15170 = torch.aten.view %15168, %15169 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12810 = torch.constant.int 1
    %15171 = torch.aten.size.int %15028, %int1_12810 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12811 = torch.constant.int 4
    %15172 = torch.aten.mul.int %int4_12811, %15171 : !torch.int, !torch.int -> !torch.int
    %int1792_12812 = torch.constant.int 1792
    %15173 = torch.prim.ListConstruct %15172, %int1792_12812 : (!torch.int, !torch.int) -> !torch.list<int>
    %15174 = torch.aten.view %15123, %15173 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15174, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15175 = torch.aten.mm %15174, %15136 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15175, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12813 = torch.constant.int 4
    %int4096_12814 = torch.constant.int 4096
    %15176 = torch.prim.ListConstruct %int4_12813, %15171, %int4096_12814 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15177 = torch.aten.view %15175, %15176 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12815 = torch.constant.int 1
    %15178 = torch.aten.size.int %15034, %int1_12815 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12816 = torch.constant.int 4
    %15179 = torch.aten.mul.int %int4_12816, %15178 : !torch.int, !torch.int -> !torch.int
    %int1792_12817 = torch.constant.int 1792
    %15180 = torch.prim.ListConstruct %15179, %int1792_12817 : (!torch.int, !torch.int) -> !torch.list<int>
    %15181 = torch.aten.view %15124, %15180 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15181, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15182 = torch.aten.mm %15181, %15138 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15182, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12818 = torch.constant.int 4
    %int4096_12819 = torch.constant.int 4096
    %15183 = torch.prim.ListConstruct %int4_12818, %15178, %int4096_12819 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15184 = torch.aten.view %15182, %15183 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12820 = torch.constant.int 1
    %15185 = torch.aten.size.int %15040, %int1_12820 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12821 = torch.constant.int 4
    %15186 = torch.aten.mul.int %int4_12821, %15185 : !torch.int, !torch.int -> !torch.int
    %int1792_12822 = torch.constant.int 1792
    %15187 = torch.prim.ListConstruct %15186, %int1792_12822 : (!torch.int, !torch.int) -> !torch.list<int>
    %15188 = torch.aten.view %15125, %15187 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15188, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15189 = torch.aten.mm %15188, %15140 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15189, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12823 = torch.constant.int 4
    %int4096_12824 = torch.constant.int 4096
    %15190 = torch.prim.ListConstruct %int4_12823, %15185, %int4096_12824 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15191 = torch.aten.view %15189, %15190 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12825 = torch.constant.int 1
    %15192 = torch.aten.size.int %15046, %int1_12825 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_12826 = torch.constant.int 4
    %15193 = torch.aten.mul.int %int4_12826, %15192 : !torch.int, !torch.int -> !torch.int
    %int1792_12827 = torch.constant.int 1792
    %15194 = torch.prim.ListConstruct %15193, %int1792_12827 : (!torch.int, !torch.int) -> !torch.list<int>
    %15195 = torch.aten.view %15126, %15194 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %15195, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %15196 = torch.aten.mm %15195, %15142 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15196, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_12828 = torch.constant.int 4
    %int4096_12829 = torch.constant.int 4096
    %15197 = torch.prim.ListConstruct %int4_12828, %15192, %int4096_12829 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15198 = torch.aten.view %15196, %15197 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15199 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12830 = arith.constant 1 : index
    %dim_12831 = tensor.dim %15199, %c1_12830 : tensor<4x?x4096xf16>
    %15200 = flow.tensor.transfer %15199 : tensor<4x?x4096xf16>{%dim_12831} to #hal.device.promise<@__device_0>
    %15201 = torch_c.from_builtin_tensor %15200 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15202 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12832 = arith.constant 1 : index
    %dim_12833 = tensor.dim %15202, %c1_12832 : tensor<4x?x4096xf16>
    %15203 = flow.tensor.transfer %15202 : tensor<4x?x4096xf16>{%dim_12833} to #hal.device.promise<@__device_0>
    %15204 = torch_c.from_builtin_tensor %15203 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15205 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12834 = arith.constant 1 : index
    %dim_12835 = tensor.dim %15205, %c1_12834 : tensor<4x?x4096xf16>
    %15206 = flow.tensor.transfer %15205 : tensor<4x?x4096xf16>{%dim_12835} to #hal.device.promise<@__device_0>
    %15207 = torch_c.from_builtin_tensor %15206 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15208 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12836 = arith.constant 1 : index
    %dim_12837 = tensor.dim %15208, %c1_12836 : tensor<4x?x4096xf16>
    %15209 = flow.tensor.transfer %15208 : tensor<4x?x4096xf16>{%dim_12837} to #hal.device.promise<@__device_0>
    %15210 = torch_c.from_builtin_tensor %15209 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15211 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12838 = arith.constant 1 : index
    %dim_12839 = tensor.dim %15211, %c1_12838 : tensor<4x?x4096xf16>
    %15212 = flow.tensor.transfer %15211 : tensor<4x?x4096xf16>{%dim_12839} to #hal.device.promise<@__device_0>
    %15213 = torch_c.from_builtin_tensor %15212 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15214 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12840 = arith.constant 1 : index
    %dim_12841 = tensor.dim %15214, %c1_12840 : tensor<4x?x4096xf16>
    %15215 = flow.tensor.transfer %15214 : tensor<4x?x4096xf16>{%dim_12841} to #hal.device.promise<@__device_0>
    %15216 = torch_c.from_builtin_tensor %15215 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15217 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12842 = arith.constant 1 : index
    %dim_12843 = tensor.dim %15217, %c1_12842 : tensor<4x?x4096xf16>
    %15218 = flow.tensor.transfer %15217 : tensor<4x?x4096xf16>{%dim_12843} to #hal.device.promise<@__device_0>
    %15219 = torch_c.from_builtin_tensor %15218 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12844 = torch.constant.int 1
    %15220 = torch.aten.add.Tensor %15149, %15201, %int1_12844 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12845 = torch.constant.int 1
    %15221 = torch.aten.add.Tensor %15220, %15204, %int1_12845 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12846 = torch.constant.int 1
    %15222 = torch.aten.add.Tensor %15221, %15207, %int1_12846 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12847 = torch.constant.int 1
    %15223 = torch.aten.add.Tensor %15222, %15210, %int1_12847 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12848 = torch.constant.int 1
    %15224 = torch.aten.add.Tensor %15223, %15213, %int1_12848 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12849 = torch.constant.int 1
    %15225 = torch.aten.add.Tensor %15224, %15216, %int1_12849 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12850 = torch.constant.int 1
    %15226 = torch.aten.add.Tensor %15225, %15219, %int1_12850 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15227 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12851 = arith.constant 1 : index
    %dim_12852 = tensor.dim %15227, %c1_12851 : tensor<4x?x4096xf16>
    %15228 = flow.tensor.transfer %15227 : tensor<4x?x4096xf16>{%dim_12852} to #hal.device.promise<@__device_1>
    %15229 = torch_c.from_builtin_tensor %15228 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15230 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12853 = arith.constant 1 : index
    %dim_12854 = tensor.dim %15230, %c1_12853 : tensor<4x?x4096xf16>
    %15231 = flow.tensor.transfer %15230 : tensor<4x?x4096xf16>{%dim_12854} to #hal.device.promise<@__device_1>
    %15232 = torch_c.from_builtin_tensor %15231 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15233 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12855 = arith.constant 1 : index
    %dim_12856 = tensor.dim %15233, %c1_12855 : tensor<4x?x4096xf16>
    %15234 = flow.tensor.transfer %15233 : tensor<4x?x4096xf16>{%dim_12856} to #hal.device.promise<@__device_1>
    %15235 = torch_c.from_builtin_tensor %15234 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15236 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12857 = arith.constant 1 : index
    %dim_12858 = tensor.dim %15236, %c1_12857 : tensor<4x?x4096xf16>
    %15237 = flow.tensor.transfer %15236 : tensor<4x?x4096xf16>{%dim_12858} to #hal.device.promise<@__device_1>
    %15238 = torch_c.from_builtin_tensor %15237 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15239 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12859 = arith.constant 1 : index
    %dim_12860 = tensor.dim %15239, %c1_12859 : tensor<4x?x4096xf16>
    %15240 = flow.tensor.transfer %15239 : tensor<4x?x4096xf16>{%dim_12860} to #hal.device.promise<@__device_1>
    %15241 = torch_c.from_builtin_tensor %15240 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15242 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12861 = arith.constant 1 : index
    %dim_12862 = tensor.dim %15242, %c1_12861 : tensor<4x?x4096xf16>
    %15243 = flow.tensor.transfer %15242 : tensor<4x?x4096xf16>{%dim_12862} to #hal.device.promise<@__device_1>
    %15244 = torch_c.from_builtin_tensor %15243 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15245 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12863 = arith.constant 1 : index
    %dim_12864 = tensor.dim %15245, %c1_12863 : tensor<4x?x4096xf16>
    %15246 = flow.tensor.transfer %15245 : tensor<4x?x4096xf16>{%dim_12864} to #hal.device.promise<@__device_1>
    %15247 = torch_c.from_builtin_tensor %15246 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12865 = torch.constant.int 1
    %15248 = torch.aten.add.Tensor %15229, %15156, %int1_12865 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12866 = torch.constant.int 1
    %15249 = torch.aten.add.Tensor %15248, %15232, %int1_12866 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12867 = torch.constant.int 1
    %15250 = torch.aten.add.Tensor %15249, %15235, %int1_12867 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12868 = torch.constant.int 1
    %15251 = torch.aten.add.Tensor %15250, %15238, %int1_12868 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12869 = torch.constant.int 1
    %15252 = torch.aten.add.Tensor %15251, %15241, %int1_12869 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12870 = torch.constant.int 1
    %15253 = torch.aten.add.Tensor %15252, %15244, %int1_12870 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12871 = torch.constant.int 1
    %15254 = torch.aten.add.Tensor %15253, %15247, %int1_12871 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15255 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12872 = arith.constant 1 : index
    %dim_12873 = tensor.dim %15255, %c1_12872 : tensor<4x?x4096xf16>
    %15256 = flow.tensor.transfer %15255 : tensor<4x?x4096xf16>{%dim_12873} to #hal.device.promise<@__device_2>
    %15257 = torch_c.from_builtin_tensor %15256 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15258 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12874 = arith.constant 1 : index
    %dim_12875 = tensor.dim %15258, %c1_12874 : tensor<4x?x4096xf16>
    %15259 = flow.tensor.transfer %15258 : tensor<4x?x4096xf16>{%dim_12875} to #hal.device.promise<@__device_2>
    %15260 = torch_c.from_builtin_tensor %15259 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15261 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12876 = arith.constant 1 : index
    %dim_12877 = tensor.dim %15261, %c1_12876 : tensor<4x?x4096xf16>
    %15262 = flow.tensor.transfer %15261 : tensor<4x?x4096xf16>{%dim_12877} to #hal.device.promise<@__device_2>
    %15263 = torch_c.from_builtin_tensor %15262 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15264 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12878 = arith.constant 1 : index
    %dim_12879 = tensor.dim %15264, %c1_12878 : tensor<4x?x4096xf16>
    %15265 = flow.tensor.transfer %15264 : tensor<4x?x4096xf16>{%dim_12879} to #hal.device.promise<@__device_2>
    %15266 = torch_c.from_builtin_tensor %15265 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15267 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12880 = arith.constant 1 : index
    %dim_12881 = tensor.dim %15267, %c1_12880 : tensor<4x?x4096xf16>
    %15268 = flow.tensor.transfer %15267 : tensor<4x?x4096xf16>{%dim_12881} to #hal.device.promise<@__device_2>
    %15269 = torch_c.from_builtin_tensor %15268 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15270 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12882 = arith.constant 1 : index
    %dim_12883 = tensor.dim %15270, %c1_12882 : tensor<4x?x4096xf16>
    %15271 = flow.tensor.transfer %15270 : tensor<4x?x4096xf16>{%dim_12883} to #hal.device.promise<@__device_2>
    %15272 = torch_c.from_builtin_tensor %15271 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15273 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12884 = arith.constant 1 : index
    %dim_12885 = tensor.dim %15273, %c1_12884 : tensor<4x?x4096xf16>
    %15274 = flow.tensor.transfer %15273 : tensor<4x?x4096xf16>{%dim_12885} to #hal.device.promise<@__device_2>
    %15275 = torch_c.from_builtin_tensor %15274 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12886 = torch.constant.int 1
    %15276 = torch.aten.add.Tensor %15257, %15260, %int1_12886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12887 = torch.constant.int 1
    %15277 = torch.aten.add.Tensor %15276, %15163, %int1_12887 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12888 = torch.constant.int 1
    %15278 = torch.aten.add.Tensor %15277, %15263, %int1_12888 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12889 = torch.constant.int 1
    %15279 = torch.aten.add.Tensor %15278, %15266, %int1_12889 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12890 = torch.constant.int 1
    %15280 = torch.aten.add.Tensor %15279, %15269, %int1_12890 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12891 = torch.constant.int 1
    %15281 = torch.aten.add.Tensor %15280, %15272, %int1_12891 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12892 = torch.constant.int 1
    %15282 = torch.aten.add.Tensor %15281, %15275, %int1_12892 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15283 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12893 = arith.constant 1 : index
    %dim_12894 = tensor.dim %15283, %c1_12893 : tensor<4x?x4096xf16>
    %15284 = flow.tensor.transfer %15283 : tensor<4x?x4096xf16>{%dim_12894} to #hal.device.promise<@__device_3>
    %15285 = torch_c.from_builtin_tensor %15284 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15286 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12895 = arith.constant 1 : index
    %dim_12896 = tensor.dim %15286, %c1_12895 : tensor<4x?x4096xf16>
    %15287 = flow.tensor.transfer %15286 : tensor<4x?x4096xf16>{%dim_12896} to #hal.device.promise<@__device_3>
    %15288 = torch_c.from_builtin_tensor %15287 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15289 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12897 = arith.constant 1 : index
    %dim_12898 = tensor.dim %15289, %c1_12897 : tensor<4x?x4096xf16>
    %15290 = flow.tensor.transfer %15289 : tensor<4x?x4096xf16>{%dim_12898} to #hal.device.promise<@__device_3>
    %15291 = torch_c.from_builtin_tensor %15290 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15292 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12899 = arith.constant 1 : index
    %dim_12900 = tensor.dim %15292, %c1_12899 : tensor<4x?x4096xf16>
    %15293 = flow.tensor.transfer %15292 : tensor<4x?x4096xf16>{%dim_12900} to #hal.device.promise<@__device_3>
    %15294 = torch_c.from_builtin_tensor %15293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15295 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12901 = arith.constant 1 : index
    %dim_12902 = tensor.dim %15295, %c1_12901 : tensor<4x?x4096xf16>
    %15296 = flow.tensor.transfer %15295 : tensor<4x?x4096xf16>{%dim_12902} to #hal.device.promise<@__device_3>
    %15297 = torch_c.from_builtin_tensor %15296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15298 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12903 = arith.constant 1 : index
    %dim_12904 = tensor.dim %15298, %c1_12903 : tensor<4x?x4096xf16>
    %15299 = flow.tensor.transfer %15298 : tensor<4x?x4096xf16>{%dim_12904} to #hal.device.promise<@__device_3>
    %15300 = torch_c.from_builtin_tensor %15299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15301 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12905 = arith.constant 1 : index
    %dim_12906 = tensor.dim %15301, %c1_12905 : tensor<4x?x4096xf16>
    %15302 = flow.tensor.transfer %15301 : tensor<4x?x4096xf16>{%dim_12906} to #hal.device.promise<@__device_3>
    %15303 = torch_c.from_builtin_tensor %15302 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12907 = torch.constant.int 1
    %15304 = torch.aten.add.Tensor %15285, %15288, %int1_12907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12908 = torch.constant.int 1
    %15305 = torch.aten.add.Tensor %15304, %15291, %int1_12908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12909 = torch.constant.int 1
    %15306 = torch.aten.add.Tensor %15305, %15170, %int1_12909 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12910 = torch.constant.int 1
    %15307 = torch.aten.add.Tensor %15306, %15294, %int1_12910 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12911 = torch.constant.int 1
    %15308 = torch.aten.add.Tensor %15307, %15297, %int1_12911 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12912 = torch.constant.int 1
    %15309 = torch.aten.add.Tensor %15308, %15300, %int1_12912 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12913 = torch.constant.int 1
    %15310 = torch.aten.add.Tensor %15309, %15303, %int1_12913 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15311 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12914 = arith.constant 1 : index
    %dim_12915 = tensor.dim %15311, %c1_12914 : tensor<4x?x4096xf16>
    %15312 = flow.tensor.transfer %15311 : tensor<4x?x4096xf16>{%dim_12915} to #hal.device.promise<@__device_4>
    %15313 = torch_c.from_builtin_tensor %15312 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15314 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12916 = arith.constant 1 : index
    %dim_12917 = tensor.dim %15314, %c1_12916 : tensor<4x?x4096xf16>
    %15315 = flow.tensor.transfer %15314 : tensor<4x?x4096xf16>{%dim_12917} to #hal.device.promise<@__device_4>
    %15316 = torch_c.from_builtin_tensor %15315 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15317 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12918 = arith.constant 1 : index
    %dim_12919 = tensor.dim %15317, %c1_12918 : tensor<4x?x4096xf16>
    %15318 = flow.tensor.transfer %15317 : tensor<4x?x4096xf16>{%dim_12919} to #hal.device.promise<@__device_4>
    %15319 = torch_c.from_builtin_tensor %15318 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15320 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12920 = arith.constant 1 : index
    %dim_12921 = tensor.dim %15320, %c1_12920 : tensor<4x?x4096xf16>
    %15321 = flow.tensor.transfer %15320 : tensor<4x?x4096xf16>{%dim_12921} to #hal.device.promise<@__device_4>
    %15322 = torch_c.from_builtin_tensor %15321 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15323 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12922 = arith.constant 1 : index
    %dim_12923 = tensor.dim %15323, %c1_12922 : tensor<4x?x4096xf16>
    %15324 = flow.tensor.transfer %15323 : tensor<4x?x4096xf16>{%dim_12923} to #hal.device.promise<@__device_4>
    %15325 = torch_c.from_builtin_tensor %15324 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15326 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12924 = arith.constant 1 : index
    %dim_12925 = tensor.dim %15326, %c1_12924 : tensor<4x?x4096xf16>
    %15327 = flow.tensor.transfer %15326 : tensor<4x?x4096xf16>{%dim_12925} to #hal.device.promise<@__device_4>
    %15328 = torch_c.from_builtin_tensor %15327 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15329 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12926 = arith.constant 1 : index
    %dim_12927 = tensor.dim %15329, %c1_12926 : tensor<4x?x4096xf16>
    %15330 = flow.tensor.transfer %15329 : tensor<4x?x4096xf16>{%dim_12927} to #hal.device.promise<@__device_4>
    %15331 = torch_c.from_builtin_tensor %15330 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12928 = torch.constant.int 1
    %15332 = torch.aten.add.Tensor %15313, %15316, %int1_12928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12929 = torch.constant.int 1
    %15333 = torch.aten.add.Tensor %15332, %15319, %int1_12929 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12930 = torch.constant.int 1
    %15334 = torch.aten.add.Tensor %15333, %15322, %int1_12930 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12931 = torch.constant.int 1
    %15335 = torch.aten.add.Tensor %15334, %15177, %int1_12931 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12932 = torch.constant.int 1
    %15336 = torch.aten.add.Tensor %15335, %15325, %int1_12932 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12933 = torch.constant.int 1
    %15337 = torch.aten.add.Tensor %15336, %15328, %int1_12933 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12934 = torch.constant.int 1
    %15338 = torch.aten.add.Tensor %15337, %15331, %int1_12934 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15339 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12935 = arith.constant 1 : index
    %dim_12936 = tensor.dim %15339, %c1_12935 : tensor<4x?x4096xf16>
    %15340 = flow.tensor.transfer %15339 : tensor<4x?x4096xf16>{%dim_12936} to #hal.device.promise<@__device_5>
    %15341 = torch_c.from_builtin_tensor %15340 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15342 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12937 = arith.constant 1 : index
    %dim_12938 = tensor.dim %15342, %c1_12937 : tensor<4x?x4096xf16>
    %15343 = flow.tensor.transfer %15342 : tensor<4x?x4096xf16>{%dim_12938} to #hal.device.promise<@__device_5>
    %15344 = torch_c.from_builtin_tensor %15343 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15345 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12939 = arith.constant 1 : index
    %dim_12940 = tensor.dim %15345, %c1_12939 : tensor<4x?x4096xf16>
    %15346 = flow.tensor.transfer %15345 : tensor<4x?x4096xf16>{%dim_12940} to #hal.device.promise<@__device_5>
    %15347 = torch_c.from_builtin_tensor %15346 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15348 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12941 = arith.constant 1 : index
    %dim_12942 = tensor.dim %15348, %c1_12941 : tensor<4x?x4096xf16>
    %15349 = flow.tensor.transfer %15348 : tensor<4x?x4096xf16>{%dim_12942} to #hal.device.promise<@__device_5>
    %15350 = torch_c.from_builtin_tensor %15349 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15351 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12943 = arith.constant 1 : index
    %dim_12944 = tensor.dim %15351, %c1_12943 : tensor<4x?x4096xf16>
    %15352 = flow.tensor.transfer %15351 : tensor<4x?x4096xf16>{%dim_12944} to #hal.device.promise<@__device_5>
    %15353 = torch_c.from_builtin_tensor %15352 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15354 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12945 = arith.constant 1 : index
    %dim_12946 = tensor.dim %15354, %c1_12945 : tensor<4x?x4096xf16>
    %15355 = flow.tensor.transfer %15354 : tensor<4x?x4096xf16>{%dim_12946} to #hal.device.promise<@__device_5>
    %15356 = torch_c.from_builtin_tensor %15355 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15357 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12947 = arith.constant 1 : index
    %dim_12948 = tensor.dim %15357, %c1_12947 : tensor<4x?x4096xf16>
    %15358 = flow.tensor.transfer %15357 : tensor<4x?x4096xf16>{%dim_12948} to #hal.device.promise<@__device_5>
    %15359 = torch_c.from_builtin_tensor %15358 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12949 = torch.constant.int 1
    %15360 = torch.aten.add.Tensor %15341, %15344, %int1_12949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12950 = torch.constant.int 1
    %15361 = torch.aten.add.Tensor %15360, %15347, %int1_12950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12951 = torch.constant.int 1
    %15362 = torch.aten.add.Tensor %15361, %15350, %int1_12951 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12952 = torch.constant.int 1
    %15363 = torch.aten.add.Tensor %15362, %15353, %int1_12952 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12953 = torch.constant.int 1
    %15364 = torch.aten.add.Tensor %15363, %15184, %int1_12953 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12954 = torch.constant.int 1
    %15365 = torch.aten.add.Tensor %15364, %15356, %int1_12954 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12955 = torch.constant.int 1
    %15366 = torch.aten.add.Tensor %15365, %15359, %int1_12955 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15367 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12956 = arith.constant 1 : index
    %dim_12957 = tensor.dim %15367, %c1_12956 : tensor<4x?x4096xf16>
    %15368 = flow.tensor.transfer %15367 : tensor<4x?x4096xf16>{%dim_12957} to #hal.device.promise<@__device_6>
    %15369 = torch_c.from_builtin_tensor %15368 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15370 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12958 = arith.constant 1 : index
    %dim_12959 = tensor.dim %15370, %c1_12958 : tensor<4x?x4096xf16>
    %15371 = flow.tensor.transfer %15370 : tensor<4x?x4096xf16>{%dim_12959} to #hal.device.promise<@__device_6>
    %15372 = torch_c.from_builtin_tensor %15371 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15373 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12960 = arith.constant 1 : index
    %dim_12961 = tensor.dim %15373, %c1_12960 : tensor<4x?x4096xf16>
    %15374 = flow.tensor.transfer %15373 : tensor<4x?x4096xf16>{%dim_12961} to #hal.device.promise<@__device_6>
    %15375 = torch_c.from_builtin_tensor %15374 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15376 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12962 = arith.constant 1 : index
    %dim_12963 = tensor.dim %15376, %c1_12962 : tensor<4x?x4096xf16>
    %15377 = flow.tensor.transfer %15376 : tensor<4x?x4096xf16>{%dim_12963} to #hal.device.promise<@__device_6>
    %15378 = torch_c.from_builtin_tensor %15377 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15379 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12964 = arith.constant 1 : index
    %dim_12965 = tensor.dim %15379, %c1_12964 : tensor<4x?x4096xf16>
    %15380 = flow.tensor.transfer %15379 : tensor<4x?x4096xf16>{%dim_12965} to #hal.device.promise<@__device_6>
    %15381 = torch_c.from_builtin_tensor %15380 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15382 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12966 = arith.constant 1 : index
    %dim_12967 = tensor.dim %15382, %c1_12966 : tensor<4x?x4096xf16>
    %15383 = flow.tensor.transfer %15382 : tensor<4x?x4096xf16>{%dim_12967} to #hal.device.promise<@__device_6>
    %15384 = torch_c.from_builtin_tensor %15383 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15385 = torch_c.to_builtin_tensor %15198 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12968 = arith.constant 1 : index
    %dim_12969 = tensor.dim %15385, %c1_12968 : tensor<4x?x4096xf16>
    %15386 = flow.tensor.transfer %15385 : tensor<4x?x4096xf16>{%dim_12969} to #hal.device.promise<@__device_6>
    %15387 = torch_c.from_builtin_tensor %15386 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12970 = torch.constant.int 1
    %15388 = torch.aten.add.Tensor %15369, %15372, %int1_12970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12971 = torch.constant.int 1
    %15389 = torch.aten.add.Tensor %15388, %15375, %int1_12971 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12972 = torch.constant.int 1
    %15390 = torch.aten.add.Tensor %15389, %15378, %int1_12972 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12973 = torch.constant.int 1
    %15391 = torch.aten.add.Tensor %15390, %15381, %int1_12973 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12974 = torch.constant.int 1
    %15392 = torch.aten.add.Tensor %15391, %15384, %int1_12974 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12975 = torch.constant.int 1
    %15393 = torch.aten.add.Tensor %15392, %15191, %int1_12975 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12976 = torch.constant.int 1
    %15394 = torch.aten.add.Tensor %15393, %15387, %int1_12976 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15395 = torch_c.to_builtin_tensor %15149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12977 = arith.constant 1 : index
    %dim_12978 = tensor.dim %15395, %c1_12977 : tensor<4x?x4096xf16>
    %15396 = flow.tensor.transfer %15395 : tensor<4x?x4096xf16>{%dim_12978} to #hal.device.promise<@__device_7>
    %15397 = torch_c.from_builtin_tensor %15396 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15398 = torch_c.to_builtin_tensor %15156 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12979 = arith.constant 1 : index
    %dim_12980 = tensor.dim %15398, %c1_12979 : tensor<4x?x4096xf16>
    %15399 = flow.tensor.transfer %15398 : tensor<4x?x4096xf16>{%dim_12980} to #hal.device.promise<@__device_7>
    %15400 = torch_c.from_builtin_tensor %15399 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15401 = torch_c.to_builtin_tensor %15163 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12981 = arith.constant 1 : index
    %dim_12982 = tensor.dim %15401, %c1_12981 : tensor<4x?x4096xf16>
    %15402 = flow.tensor.transfer %15401 : tensor<4x?x4096xf16>{%dim_12982} to #hal.device.promise<@__device_7>
    %15403 = torch_c.from_builtin_tensor %15402 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15404 = torch_c.to_builtin_tensor %15170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12983 = arith.constant 1 : index
    %dim_12984 = tensor.dim %15404, %c1_12983 : tensor<4x?x4096xf16>
    %15405 = flow.tensor.transfer %15404 : tensor<4x?x4096xf16>{%dim_12984} to #hal.device.promise<@__device_7>
    %15406 = torch_c.from_builtin_tensor %15405 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15407 = torch_c.to_builtin_tensor %15177 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12985 = arith.constant 1 : index
    %dim_12986 = tensor.dim %15407, %c1_12985 : tensor<4x?x4096xf16>
    %15408 = flow.tensor.transfer %15407 : tensor<4x?x4096xf16>{%dim_12986} to #hal.device.promise<@__device_7>
    %15409 = torch_c.from_builtin_tensor %15408 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15410 = torch_c.to_builtin_tensor %15184 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12987 = arith.constant 1 : index
    %dim_12988 = tensor.dim %15410, %c1_12987 : tensor<4x?x4096xf16>
    %15411 = flow.tensor.transfer %15410 : tensor<4x?x4096xf16>{%dim_12988} to #hal.device.promise<@__device_7>
    %15412 = torch_c.from_builtin_tensor %15411 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %15413 = torch_c.to_builtin_tensor %15191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_12989 = arith.constant 1 : index
    %dim_12990 = tensor.dim %15413, %c1_12989 : tensor<4x?x4096xf16>
    %15414 = flow.tensor.transfer %15413 : tensor<4x?x4096xf16>{%dim_12990} to #hal.device.promise<@__device_7>
    %15415 = torch_c.from_builtin_tensor %15414 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12991 = torch.constant.int 1
    %15416 = torch.aten.add.Tensor %15397, %15400, %int1_12991 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12992 = torch.constant.int 1
    %15417 = torch.aten.add.Tensor %15416, %15403, %int1_12992 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12993 = torch.constant.int 1
    %15418 = torch.aten.add.Tensor %15417, %15406, %int1_12993 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12994 = torch.constant.int 1
    %15419 = torch.aten.add.Tensor %15418, %15409, %int1_12994 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12995 = torch.constant.int 1
    %15420 = torch.aten.add.Tensor %15419, %15412, %int1_12995 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12996 = torch.constant.int 1
    %15421 = torch.aten.add.Tensor %15420, %15415, %int1_12996 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12997 = torch.constant.int 1
    %15422 = torch.aten.add.Tensor %15421, %15198, %int1_12997 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12998 = torch.constant.int 1
    %15423 = torch.aten.add.Tensor %14903, %15226, %int1_12998 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_12999 = torch.constant.int 1
    %15424 = torch.aten.add.Tensor %14904, %15254, %int1_12999 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13000 = torch.constant.int 1
    %15425 = torch.aten.add.Tensor %14905, %15282, %int1_13000 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13001 = torch.constant.int 1
    %15426 = torch.aten.add.Tensor %14906, %15310, %int1_13001 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13002 = torch.constant.int 1
    %15427 = torch.aten.add.Tensor %14907, %15338, %int1_13002 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13003 = torch.constant.int 1
    %15428 = torch.aten.add.Tensor %14908, %15366, %int1_13003 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13004 = torch.constant.int 1
    %15429 = torch.aten.add.Tensor %14909, %15394, %int1_13004 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13005 = torch.constant.int 1
    %15430 = torch.aten.add.Tensor %14910, %15422, %int1_13005 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_13006 = torch.constant.int 6
    %15431 = torch.prims.convert_element_type %15423, %int6_13006 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13007 = torch.constant.int 6
    %15432 = torch.prims.convert_element_type %15424, %int6_13007 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13008 = torch.constant.int 6
    %15433 = torch.prims.convert_element_type %15425, %int6_13008 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13009 = torch.constant.int 6
    %15434 = torch.prims.convert_element_type %15426, %int6_13009 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13010 = torch.constant.int 6
    %15435 = torch.prims.convert_element_type %15427, %int6_13010 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13011 = torch.constant.int 6
    %15436 = torch.prims.convert_element_type %15428, %int6_13011 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13012 = torch.constant.int 6
    %15437 = torch.prims.convert_element_type %15429, %int6_13012 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_13013 = torch.constant.int 6
    %15438 = torch.prims.convert_element_type %15430, %int6_13013 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13014 = torch.constant.int 2
    %15439 = torch.aten.pow.Tensor_Scalar %15431, %int2_13014 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13015 = torch.constant.int 2
    %15440 = torch.aten.pow.Tensor_Scalar %15432, %int2_13015 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13016 = torch.constant.int 2
    %15441 = torch.aten.pow.Tensor_Scalar %15433, %int2_13016 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13017 = torch.constant.int 2
    %15442 = torch.aten.pow.Tensor_Scalar %15434, %int2_13017 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13018 = torch.constant.int 2
    %15443 = torch.aten.pow.Tensor_Scalar %15435, %int2_13018 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13019 = torch.constant.int 2
    %15444 = torch.aten.pow.Tensor_Scalar %15436, %int2_13019 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13020 = torch.constant.int 2
    %15445 = torch.aten.pow.Tensor_Scalar %15437, %int2_13020 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_13021 = torch.constant.int 2
    %15446 = torch.aten.pow.Tensor_Scalar %15438, %int2_13021 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_13022 = torch.constant.int -1
    %15447 = torch.prim.ListConstruct %int-1_13022 : (!torch.int) -> !torch.list<int>
    %true_13023 = torch.constant.bool true
    %none_13024 = torch.constant.none
    %15448 = torch.aten.mean.dim %15439, %15447, %true_13023, %none_13024 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13025 = torch.constant.int -1
    %15449 = torch.prim.ListConstruct %int-1_13025 : (!torch.int) -> !torch.list<int>
    %true_13026 = torch.constant.bool true
    %none_13027 = torch.constant.none
    %15450 = torch.aten.mean.dim %15440, %15449, %true_13026, %none_13027 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13028 = torch.constant.int -1
    %15451 = torch.prim.ListConstruct %int-1_13028 : (!torch.int) -> !torch.list<int>
    %true_13029 = torch.constant.bool true
    %none_13030 = torch.constant.none
    %15452 = torch.aten.mean.dim %15441, %15451, %true_13029, %none_13030 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13031 = torch.constant.int -1
    %15453 = torch.prim.ListConstruct %int-1_13031 : (!torch.int) -> !torch.list<int>
    %true_13032 = torch.constant.bool true
    %none_13033 = torch.constant.none
    %15454 = torch.aten.mean.dim %15442, %15453, %true_13032, %none_13033 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13034 = torch.constant.int -1
    %15455 = torch.prim.ListConstruct %int-1_13034 : (!torch.int) -> !torch.list<int>
    %true_13035 = torch.constant.bool true
    %none_13036 = torch.constant.none
    %15456 = torch.aten.mean.dim %15443, %15455, %true_13035, %none_13036 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13037 = torch.constant.int -1
    %15457 = torch.prim.ListConstruct %int-1_13037 : (!torch.int) -> !torch.list<int>
    %true_13038 = torch.constant.bool true
    %none_13039 = torch.constant.none
    %15458 = torch.aten.mean.dim %15444, %15457, %true_13038, %none_13039 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13040 = torch.constant.int -1
    %15459 = torch.prim.ListConstruct %int-1_13040 : (!torch.int) -> !torch.list<int>
    %true_13041 = torch.constant.bool true
    %none_13042 = torch.constant.none
    %15460 = torch.aten.mean.dim %15445, %15459, %true_13041, %none_13042 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_13043 = torch.constant.int -1
    %15461 = torch.prim.ListConstruct %int-1_13043 : (!torch.int) -> !torch.list<int>
    %true_13044 = torch.constant.bool true
    %none_13045 = torch.constant.none
    %15462 = torch.aten.mean.dim %15446, %15461, %true_13044, %none_13045 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13046 = torch.constant.float 9.9999997473787516E-6
    %int1_13047 = torch.constant.int 1
    %15463 = torch.aten.add.Scalar %15448, %float9.999990e-06_13046, %int1_13047 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13048 = torch.constant.float 9.9999997473787516E-6
    %int1_13049 = torch.constant.int 1
    %15464 = torch.aten.add.Scalar %15450, %float9.999990e-06_13048, %int1_13049 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13050 = torch.constant.float 9.9999997473787516E-6
    %int1_13051 = torch.constant.int 1
    %15465 = torch.aten.add.Scalar %15452, %float9.999990e-06_13050, %int1_13051 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13052 = torch.constant.float 9.9999997473787516E-6
    %int1_13053 = torch.constant.int 1
    %15466 = torch.aten.add.Scalar %15454, %float9.999990e-06_13052, %int1_13053 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13054 = torch.constant.float 9.9999997473787516E-6
    %int1_13055 = torch.constant.int 1
    %15467 = torch.aten.add.Scalar %15456, %float9.999990e-06_13054, %int1_13055 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13056 = torch.constant.float 9.9999997473787516E-6
    %int1_13057 = torch.constant.int 1
    %15468 = torch.aten.add.Scalar %15458, %float9.999990e-06_13056, %int1_13057 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13058 = torch.constant.float 9.9999997473787516E-6
    %int1_13059 = torch.constant.int 1
    %15469 = torch.aten.add.Scalar %15460, %float9.999990e-06_13058, %int1_13059 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_13060 = torch.constant.float 9.9999997473787516E-6
    %int1_13061 = torch.constant.int 1
    %15470 = torch.aten.add.Scalar %15462, %float9.999990e-06_13060, %int1_13061 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15471 = torch.aten.rsqrt %15463 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15472 = torch.aten.rsqrt %15464 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15473 = torch.aten.rsqrt %15465 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15474 = torch.aten.rsqrt %15466 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15475 = torch.aten.rsqrt %15467 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15476 = torch.aten.rsqrt %15468 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15477 = torch.aten.rsqrt %15469 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15478 = torch.aten.rsqrt %15470 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %15478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %15479 = torch.aten.mul.Tensor %15431, %15471 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15480 = torch.aten.mul.Tensor %15432, %15472 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15481 = torch.aten.mul.Tensor %15433, %15473 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15482 = torch.aten.mul.Tensor %15434, %15474 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15483 = torch.aten.mul.Tensor %15435, %15475 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15484 = torch.aten.mul.Tensor %15436, %15476 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15485 = torch.aten.mul.Tensor %15437, %15477 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15486 = torch.aten.mul.Tensor %15438, %15478 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15487 = torch.aten.mul.Tensor %512, %15479 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15488 = torch.aten.mul.Tensor %513, %15480 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15489 = torch.aten.mul.Tensor %514, %15481 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15490 = torch.aten.mul.Tensor %515, %15482 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15491 = torch.aten.mul.Tensor %516, %15483 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15492 = torch.aten.mul.Tensor %517, %15484 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15493 = torch.aten.mul.Tensor %518, %15485 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %15494 = torch.aten.mul.Tensor %519, %15486 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %15494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_13062 = torch.constant.int 5
    %15495 = torch.prims.convert_element_type %15487, %int5_13062 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13063 = torch.constant.int 5
    %15496 = torch.prims.convert_element_type %15488, %int5_13063 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13064 = torch.constant.int 5
    %15497 = torch.prims.convert_element_type %15489, %int5_13064 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13065 = torch.constant.int 5
    %15498 = torch.prims.convert_element_type %15490, %int5_13065 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13066 = torch.constant.int 5
    %15499 = torch.prims.convert_element_type %15491, %int5_13066 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13067 = torch.constant.int 5
    %15500 = torch.prims.convert_element_type %15492, %int5_13067 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13068 = torch.constant.int 5
    %15501 = torch.prims.convert_element_type %15493, %int5_13068 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_13069 = torch.constant.int 5
    %15502 = torch.prims.convert_element_type %15494, %int5_13069 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %15502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_13070 = torch.constant.int 1
    %int0_13071 = torch.constant.int 0
    %15503 = torch.prim.ListConstruct %int1_13070, %int0_13071 : (!torch.int, !torch.int) -> !torch.list<int>
    %15504 = torch.aten.permute %520, %15503 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13072 = torch.constant.int 1
    %int0_13073 = torch.constant.int 0
    %15505 = torch.prim.ListConstruct %int1_13072, %int0_13073 : (!torch.int, !torch.int) -> !torch.list<int>
    %15506 = torch.aten.permute %521, %15505 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13074 = torch.constant.int 1
    %int0_13075 = torch.constant.int 0
    %15507 = torch.prim.ListConstruct %int1_13074, %int0_13075 : (!torch.int, !torch.int) -> !torch.list<int>
    %15508 = torch.aten.permute %522, %15507 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13076 = torch.constant.int 1
    %int0_13077 = torch.constant.int 0
    %15509 = torch.prim.ListConstruct %int1_13076, %int0_13077 : (!torch.int, !torch.int) -> !torch.list<int>
    %15510 = torch.aten.permute %523, %15509 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13078 = torch.constant.int 1
    %int0_13079 = torch.constant.int 0
    %15511 = torch.prim.ListConstruct %int1_13078, %int0_13079 : (!torch.int, !torch.int) -> !torch.list<int>
    %15512 = torch.aten.permute %524, %15511 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13080 = torch.constant.int 1
    %int0_13081 = torch.constant.int 0
    %15513 = torch.prim.ListConstruct %int1_13080, %int0_13081 : (!torch.int, !torch.int) -> !torch.list<int>
    %15514 = torch.aten.permute %525, %15513 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13082 = torch.constant.int 1
    %int0_13083 = torch.constant.int 0
    %15515 = torch.prim.ListConstruct %int1_13082, %int0_13083 : (!torch.int, !torch.int) -> !torch.list<int>
    %15516 = torch.aten.permute %526, %15515 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_13084 = torch.constant.int 1
    %int0_13085 = torch.constant.int 0
    %15517 = torch.prim.ListConstruct %int1_13084, %int0_13085 : (!torch.int, !torch.int) -> !torch.list<int>
    %15518 = torch.aten.permute %527, %15517 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_13086 = torch.constant.int 4
    %15519 = torch.aten.mul.int %int4_13086, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13087 = torch.constant.int 4096
    %15520 = torch.prim.ListConstruct %15519, %int4096_13087 : (!torch.int, !torch.int) -> !torch.list<int>
    %15521 = torch.aten.view %15495, %15520 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15521, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15522 = torch.aten.mm %15521, %15504 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15522, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13088 = torch.constant.int 4
    %int512_13089 = torch.constant.int 512
    %15523 = torch.prim.ListConstruct %int4_13088, %2482, %int512_13089 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15524 = torch.aten.view %15522, %15523 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13090 = torch.constant.int 4
    %15525 = torch.aten.mul.int %int4_13090, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13091 = torch.constant.int 4096
    %15526 = torch.prim.ListConstruct %15525, %int4096_13091 : (!torch.int, !torch.int) -> !torch.list<int>
    %15527 = torch.aten.view %15496, %15526 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15527, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15528 = torch.aten.mm %15527, %15506 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15528, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13092 = torch.constant.int 4
    %int512_13093 = torch.constant.int 512
    %15529 = torch.prim.ListConstruct %int4_13092, %2482, %int512_13093 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15530 = torch.aten.view %15528, %15529 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13094 = torch.constant.int 4
    %15531 = torch.aten.mul.int %int4_13094, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13095 = torch.constant.int 4096
    %15532 = torch.prim.ListConstruct %15531, %int4096_13095 : (!torch.int, !torch.int) -> !torch.list<int>
    %15533 = torch.aten.view %15497, %15532 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15533, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15534 = torch.aten.mm %15533, %15508 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15534, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13096 = torch.constant.int 4
    %int512_13097 = torch.constant.int 512
    %15535 = torch.prim.ListConstruct %int4_13096, %2482, %int512_13097 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15536 = torch.aten.view %15534, %15535 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13098 = torch.constant.int 4
    %15537 = torch.aten.mul.int %int4_13098, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13099 = torch.constant.int 4096
    %15538 = torch.prim.ListConstruct %15537, %int4096_13099 : (!torch.int, !torch.int) -> !torch.list<int>
    %15539 = torch.aten.view %15498, %15538 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15539, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15540 = torch.aten.mm %15539, %15510 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15540, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13100 = torch.constant.int 4
    %int512_13101 = torch.constant.int 512
    %15541 = torch.prim.ListConstruct %int4_13100, %2482, %int512_13101 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15542 = torch.aten.view %15540, %15541 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13102 = torch.constant.int 4
    %15543 = torch.aten.mul.int %int4_13102, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13103 = torch.constant.int 4096
    %15544 = torch.prim.ListConstruct %15543, %int4096_13103 : (!torch.int, !torch.int) -> !torch.list<int>
    %15545 = torch.aten.view %15499, %15544 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15545, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15546 = torch.aten.mm %15545, %15512 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15546, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13104 = torch.constant.int 4
    %int512_13105 = torch.constant.int 512
    %15547 = torch.prim.ListConstruct %int4_13104, %2482, %int512_13105 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15548 = torch.aten.view %15546, %15547 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13106 = torch.constant.int 4
    %15549 = torch.aten.mul.int %int4_13106, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13107 = torch.constant.int 4096
    %15550 = torch.prim.ListConstruct %15549, %int4096_13107 : (!torch.int, !torch.int) -> !torch.list<int>
    %15551 = torch.aten.view %15500, %15550 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15551, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15552 = torch.aten.mm %15551, %15514 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15552, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13108 = torch.constant.int 4
    %int512_13109 = torch.constant.int 512
    %15553 = torch.prim.ListConstruct %int4_13108, %2482, %int512_13109 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15554 = torch.aten.view %15552, %15553 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13110 = torch.constant.int 4
    %15555 = torch.aten.mul.int %int4_13110, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13111 = torch.constant.int 4096
    %15556 = torch.prim.ListConstruct %15555, %int4096_13111 : (!torch.int, !torch.int) -> !torch.list<int>
    %15557 = torch.aten.view %15501, %15556 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15557, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15558 = torch.aten.mm %15557, %15516 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15558, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13112 = torch.constant.int 4
    %int512_13113 = torch.constant.int 512
    %15559 = torch.prim.ListConstruct %int4_13112, %2482, %int512_13113 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15560 = torch.aten.view %15558, %15559 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_13114 = torch.constant.int 4
    %15561 = torch.aten.mul.int %int4_13114, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13115 = torch.constant.int 4096
    %15562 = torch.prim.ListConstruct %15561, %int4096_13115 : (!torch.int, !torch.int) -> !torch.list<int>
    %15563 = torch.aten.view %15502, %15562 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15563, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15564 = torch.aten.mm %15563, %15518 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %15564, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_13116 = torch.constant.int 4
    %int512_13117 = torch.constant.int 512
    %15565 = torch.prim.ListConstruct %int4_13116, %2482, %int512_13117 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15566 = torch.aten.view %15564, %15565 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %15566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_13118 = torch.constant.int 1
    %int0_13119 = torch.constant.int 0
    %15567 = torch.prim.ListConstruct %int1_13118, %int0_13119 : (!torch.int, !torch.int) -> !torch.list<int>
    %15568 = torch.aten.permute %528, %15567 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13120 = torch.constant.int 1
    %int0_13121 = torch.constant.int 0
    %15569 = torch.prim.ListConstruct %int1_13120, %int0_13121 : (!torch.int, !torch.int) -> !torch.list<int>
    %15570 = torch.aten.permute %529, %15569 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13122 = torch.constant.int 1
    %int0_13123 = torch.constant.int 0
    %15571 = torch.prim.ListConstruct %int1_13122, %int0_13123 : (!torch.int, !torch.int) -> !torch.list<int>
    %15572 = torch.aten.permute %530, %15571 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13124 = torch.constant.int 1
    %int0_13125 = torch.constant.int 0
    %15573 = torch.prim.ListConstruct %int1_13124, %int0_13125 : (!torch.int, !torch.int) -> !torch.list<int>
    %15574 = torch.aten.permute %531, %15573 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13126 = torch.constant.int 1
    %int0_13127 = torch.constant.int 0
    %15575 = torch.prim.ListConstruct %int1_13126, %int0_13127 : (!torch.int, !torch.int) -> !torch.list<int>
    %15576 = torch.aten.permute %532, %15575 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13128 = torch.constant.int 1
    %int0_13129 = torch.constant.int 0
    %15577 = torch.prim.ListConstruct %int1_13128, %int0_13129 : (!torch.int, !torch.int) -> !torch.list<int>
    %15578 = torch.aten.permute %533, %15577 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13130 = torch.constant.int 1
    %int0_13131 = torch.constant.int 0
    %15579 = torch.prim.ListConstruct %int1_13130, %int0_13131 : (!torch.int, !torch.int) -> !torch.list<int>
    %15580 = torch.aten.permute %534, %15579 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13132 = torch.constant.int 1
    %int0_13133 = torch.constant.int 0
    %15581 = torch.prim.ListConstruct %int1_13132, %int0_13133 : (!torch.int, !torch.int) -> !torch.list<int>
    %15582 = torch.aten.permute %535, %15581 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_13134 = torch.constant.int 4
    %15583 = torch.aten.mul.int %int4_13134, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13135 = torch.constant.int 4096
    %15584 = torch.prim.ListConstruct %15583, %int4096_13135 : (!torch.int, !torch.int) -> !torch.list<int>
    %15585 = torch.aten.view %15495, %15584 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15585, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15586 = torch.aten.mm %15585, %15568 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15586, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13136 = torch.constant.int 4
    %int128_13137 = torch.constant.int 128
    %15587 = torch.prim.ListConstruct %int4_13136, %2482, %int128_13137 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15588 = torch.aten.view %15586, %15587 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13138 = torch.constant.int 4
    %15589 = torch.aten.mul.int %int4_13138, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13139 = torch.constant.int 4096
    %15590 = torch.prim.ListConstruct %15589, %int4096_13139 : (!torch.int, !torch.int) -> !torch.list<int>
    %15591 = torch.aten.view %15496, %15590 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15591, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15592 = torch.aten.mm %15591, %15570 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15592, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13140 = torch.constant.int 4
    %int128_13141 = torch.constant.int 128
    %15593 = torch.prim.ListConstruct %int4_13140, %2482, %int128_13141 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15594 = torch.aten.view %15592, %15593 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13142 = torch.constant.int 4
    %15595 = torch.aten.mul.int %int4_13142, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13143 = torch.constant.int 4096
    %15596 = torch.prim.ListConstruct %15595, %int4096_13143 : (!torch.int, !torch.int) -> !torch.list<int>
    %15597 = torch.aten.view %15497, %15596 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15597, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15598 = torch.aten.mm %15597, %15572 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15598, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13144 = torch.constant.int 4
    %int128_13145 = torch.constant.int 128
    %15599 = torch.prim.ListConstruct %int4_13144, %2482, %int128_13145 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15600 = torch.aten.view %15598, %15599 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13146 = torch.constant.int 4
    %15601 = torch.aten.mul.int %int4_13146, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13147 = torch.constant.int 4096
    %15602 = torch.prim.ListConstruct %15601, %int4096_13147 : (!torch.int, !torch.int) -> !torch.list<int>
    %15603 = torch.aten.view %15498, %15602 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15603, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15604 = torch.aten.mm %15603, %15574 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15604, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13148 = torch.constant.int 4
    %int128_13149 = torch.constant.int 128
    %15605 = torch.prim.ListConstruct %int4_13148, %2482, %int128_13149 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15606 = torch.aten.view %15604, %15605 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13150 = torch.constant.int 4
    %15607 = torch.aten.mul.int %int4_13150, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13151 = torch.constant.int 4096
    %15608 = torch.prim.ListConstruct %15607, %int4096_13151 : (!torch.int, !torch.int) -> !torch.list<int>
    %15609 = torch.aten.view %15499, %15608 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15609, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15610 = torch.aten.mm %15609, %15576 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15610, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13152 = torch.constant.int 4
    %int128_13153 = torch.constant.int 128
    %15611 = torch.prim.ListConstruct %int4_13152, %2482, %int128_13153 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15612 = torch.aten.view %15610, %15611 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13154 = torch.constant.int 4
    %15613 = torch.aten.mul.int %int4_13154, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13155 = torch.constant.int 4096
    %15614 = torch.prim.ListConstruct %15613, %int4096_13155 : (!torch.int, !torch.int) -> !torch.list<int>
    %15615 = torch.aten.view %15500, %15614 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15615, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15616 = torch.aten.mm %15615, %15578 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15616, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13156 = torch.constant.int 4
    %int128_13157 = torch.constant.int 128
    %15617 = torch.prim.ListConstruct %int4_13156, %2482, %int128_13157 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15618 = torch.aten.view %15616, %15617 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13158 = torch.constant.int 4
    %15619 = torch.aten.mul.int %int4_13158, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13159 = torch.constant.int 4096
    %15620 = torch.prim.ListConstruct %15619, %int4096_13159 : (!torch.int, !torch.int) -> !torch.list<int>
    %15621 = torch.aten.view %15501, %15620 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15621, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15622 = torch.aten.mm %15621, %15580 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15622, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13160 = torch.constant.int 4
    %int128_13161 = torch.constant.int 128
    %15623 = torch.prim.ListConstruct %int4_13160, %2482, %int128_13161 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15624 = torch.aten.view %15622, %15623 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13162 = torch.constant.int 4
    %15625 = torch.aten.mul.int %int4_13162, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13163 = torch.constant.int 4096
    %15626 = torch.prim.ListConstruct %15625, %int4096_13163 : (!torch.int, !torch.int) -> !torch.list<int>
    %15627 = torch.aten.view %15502, %15626 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15627, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15628 = torch.aten.mm %15627, %15582 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15628, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13164 = torch.constant.int 4
    %int128_13165 = torch.constant.int 128
    %15629 = torch.prim.ListConstruct %int4_13164, %2482, %int128_13165 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15630 = torch.aten.view %15628, %15629 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_13166 = torch.constant.int 1
    %int0_13167 = torch.constant.int 0
    %15631 = torch.prim.ListConstruct %int1_13166, %int0_13167 : (!torch.int, !torch.int) -> !torch.list<int>
    %15632 = torch.aten.permute %536, %15631 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13168 = torch.constant.int 1
    %int0_13169 = torch.constant.int 0
    %15633 = torch.prim.ListConstruct %int1_13168, %int0_13169 : (!torch.int, !torch.int) -> !torch.list<int>
    %15634 = torch.aten.permute %537, %15633 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13170 = torch.constant.int 1
    %int0_13171 = torch.constant.int 0
    %15635 = torch.prim.ListConstruct %int1_13170, %int0_13171 : (!torch.int, !torch.int) -> !torch.list<int>
    %15636 = torch.aten.permute %538, %15635 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13172 = torch.constant.int 1
    %int0_13173 = torch.constant.int 0
    %15637 = torch.prim.ListConstruct %int1_13172, %int0_13173 : (!torch.int, !torch.int) -> !torch.list<int>
    %15638 = torch.aten.permute %539, %15637 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13174 = torch.constant.int 1
    %int0_13175 = torch.constant.int 0
    %15639 = torch.prim.ListConstruct %int1_13174, %int0_13175 : (!torch.int, !torch.int) -> !torch.list<int>
    %15640 = torch.aten.permute %540, %15639 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13176 = torch.constant.int 1
    %int0_13177 = torch.constant.int 0
    %15641 = torch.prim.ListConstruct %int1_13176, %int0_13177 : (!torch.int, !torch.int) -> !torch.list<int>
    %15642 = torch.aten.permute %541, %15641 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13178 = torch.constant.int 1
    %int0_13179 = torch.constant.int 0
    %15643 = torch.prim.ListConstruct %int1_13178, %int0_13179 : (!torch.int, !torch.int) -> !torch.list<int>
    %15644 = torch.aten.permute %542, %15643 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_13180 = torch.constant.int 1
    %int0_13181 = torch.constant.int 0
    %15645 = torch.prim.ListConstruct %int1_13180, %int0_13181 : (!torch.int, !torch.int) -> !torch.list<int>
    %15646 = torch.aten.permute %543, %15645 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_13182 = torch.constant.int 4
    %15647 = torch.aten.mul.int %int4_13182, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13183 = torch.constant.int 4096
    %15648 = torch.prim.ListConstruct %15647, %int4096_13183 : (!torch.int, !torch.int) -> !torch.list<int>
    %15649 = torch.aten.view %15495, %15648 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15649, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15650 = torch.aten.mm %15649, %15632 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15650, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13184 = torch.constant.int 4
    %int128_13185 = torch.constant.int 128
    %15651 = torch.prim.ListConstruct %int4_13184, %2482, %int128_13185 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15652 = torch.aten.view %15650, %15651 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13186 = torch.constant.int 4
    %15653 = torch.aten.mul.int %int4_13186, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13187 = torch.constant.int 4096
    %15654 = torch.prim.ListConstruct %15653, %int4096_13187 : (!torch.int, !torch.int) -> !torch.list<int>
    %15655 = torch.aten.view %15496, %15654 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15655, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15656 = torch.aten.mm %15655, %15634 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15656, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13188 = torch.constant.int 4
    %int128_13189 = torch.constant.int 128
    %15657 = torch.prim.ListConstruct %int4_13188, %2482, %int128_13189 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15658 = torch.aten.view %15656, %15657 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13190 = torch.constant.int 4
    %15659 = torch.aten.mul.int %int4_13190, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13191 = torch.constant.int 4096
    %15660 = torch.prim.ListConstruct %15659, %int4096_13191 : (!torch.int, !torch.int) -> !torch.list<int>
    %15661 = torch.aten.view %15497, %15660 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15661, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15662 = torch.aten.mm %15661, %15636 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15662, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13192 = torch.constant.int 4
    %int128_13193 = torch.constant.int 128
    %15663 = torch.prim.ListConstruct %int4_13192, %2482, %int128_13193 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15664 = torch.aten.view %15662, %15663 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13194 = torch.constant.int 4
    %15665 = torch.aten.mul.int %int4_13194, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13195 = torch.constant.int 4096
    %15666 = torch.prim.ListConstruct %15665, %int4096_13195 : (!torch.int, !torch.int) -> !torch.list<int>
    %15667 = torch.aten.view %15498, %15666 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15667, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15668 = torch.aten.mm %15667, %15638 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15668, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13196 = torch.constant.int 4
    %int128_13197 = torch.constant.int 128
    %15669 = torch.prim.ListConstruct %int4_13196, %2482, %int128_13197 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15670 = torch.aten.view %15668, %15669 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13198 = torch.constant.int 4
    %15671 = torch.aten.mul.int %int4_13198, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13199 = torch.constant.int 4096
    %15672 = torch.prim.ListConstruct %15671, %int4096_13199 : (!torch.int, !torch.int) -> !torch.list<int>
    %15673 = torch.aten.view %15499, %15672 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15673, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15674 = torch.aten.mm %15673, %15640 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15674, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13200 = torch.constant.int 4
    %int128_13201 = torch.constant.int 128
    %15675 = torch.prim.ListConstruct %int4_13200, %2482, %int128_13201 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15676 = torch.aten.view %15674, %15675 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13202 = torch.constant.int 4
    %15677 = torch.aten.mul.int %int4_13202, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13203 = torch.constant.int 4096
    %15678 = torch.prim.ListConstruct %15677, %int4096_13203 : (!torch.int, !torch.int) -> !torch.list<int>
    %15679 = torch.aten.view %15500, %15678 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15679, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15680 = torch.aten.mm %15679, %15642 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15680, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13204 = torch.constant.int 4
    %int128_13205 = torch.constant.int 128
    %15681 = torch.prim.ListConstruct %int4_13204, %2482, %int128_13205 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15682 = torch.aten.view %15680, %15681 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13206 = torch.constant.int 4
    %15683 = torch.aten.mul.int %int4_13206, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13207 = torch.constant.int 4096
    %15684 = torch.prim.ListConstruct %15683, %int4096_13207 : (!torch.int, !torch.int) -> !torch.list<int>
    %15685 = torch.aten.view %15501, %15684 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15685, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15686 = torch.aten.mm %15685, %15644 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15686, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13208 = torch.constant.int 4
    %int128_13209 = torch.constant.int 128
    %15687 = torch.prim.ListConstruct %int4_13208, %2482, %int128_13209 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15688 = torch.aten.view %15686, %15687 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13210 = torch.constant.int 4
    %15689 = torch.aten.mul.int %int4_13210, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_13211 = torch.constant.int 4096
    %15690 = torch.prim.ListConstruct %15689, %int4096_13211 : (!torch.int, !torch.int) -> !torch.list<int>
    %15691 = torch.aten.view %15502, %15690 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %15691, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %15692 = torch.aten.mm %15691, %15646 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %15692, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_13212 = torch.constant.int 4
    %int128_13213 = torch.constant.int 128
    %15693 = torch.prim.ListConstruct %int4_13212, %2482, %int128_13213 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15694 = torch.aten.view %15692, %15693 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %15694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_13214 = torch.constant.int 4
    %int4_13215 = torch.constant.int 4
    %int128_13216 = torch.constant.int 128
    %15695 = torch.prim.ListConstruct %int4_13214, %2482, %int4_13215, %int128_13216 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15696 = torch.aten.view %15524, %15695 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13217 = torch.constant.int 4
    %int4_13218 = torch.constant.int 4
    %int128_13219 = torch.constant.int 128
    %15697 = torch.prim.ListConstruct %int4_13217, %2482, %int4_13218, %int128_13219 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15698 = torch.aten.view %15530, %15697 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13220 = torch.constant.int 4
    %int4_13221 = torch.constant.int 4
    %int128_13222 = torch.constant.int 128
    %15699 = torch.prim.ListConstruct %int4_13220, %2482, %int4_13221, %int128_13222 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15700 = torch.aten.view %15536, %15699 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13223 = torch.constant.int 4
    %int4_13224 = torch.constant.int 4
    %int128_13225 = torch.constant.int 128
    %15701 = torch.prim.ListConstruct %int4_13223, %2482, %int4_13224, %int128_13225 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15702 = torch.aten.view %15542, %15701 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13226 = torch.constant.int 4
    %int4_13227 = torch.constant.int 4
    %int128_13228 = torch.constant.int 128
    %15703 = torch.prim.ListConstruct %int4_13226, %2482, %int4_13227, %int128_13228 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15704 = torch.aten.view %15548, %15703 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13229 = torch.constant.int 4
    %int4_13230 = torch.constant.int 4
    %int128_13231 = torch.constant.int 128
    %15705 = torch.prim.ListConstruct %int4_13229, %2482, %int4_13230, %int128_13231 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15706 = torch.aten.view %15554, %15705 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13232 = torch.constant.int 4
    %int4_13233 = torch.constant.int 4
    %int128_13234 = torch.constant.int 128
    %15707 = torch.prim.ListConstruct %int4_13232, %2482, %int4_13233, %int128_13234 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15708 = torch.aten.view %15560, %15707 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13235 = torch.constant.int 4
    %int4_13236 = torch.constant.int 4
    %int128_13237 = torch.constant.int 128
    %15709 = torch.prim.ListConstruct %int4_13235, %2482, %int4_13236, %int128_13237 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15710 = torch.aten.view %15566, %15709 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_13238 = torch.constant.int 4
    %int1_13239 = torch.constant.int 1
    %int128_13240 = torch.constant.int 128
    %15711 = torch.prim.ListConstruct %int4_13238, %2482, %int1_13239, %int128_13240 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15712 = torch.aten.view %15588, %15711 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13241 = torch.constant.int 4
    %int1_13242 = torch.constant.int 1
    %int128_13243 = torch.constant.int 128
    %15713 = torch.prim.ListConstruct %int4_13241, %2482, %int1_13242, %int128_13243 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15714 = torch.aten.view %15594, %15713 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13244 = torch.constant.int 4
    %int1_13245 = torch.constant.int 1
    %int128_13246 = torch.constant.int 128
    %15715 = torch.prim.ListConstruct %int4_13244, %2482, %int1_13245, %int128_13246 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15716 = torch.aten.view %15600, %15715 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13247 = torch.constant.int 4
    %int1_13248 = torch.constant.int 1
    %int128_13249 = torch.constant.int 128
    %15717 = torch.prim.ListConstruct %int4_13247, %2482, %int1_13248, %int128_13249 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15718 = torch.aten.view %15606, %15717 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13250 = torch.constant.int 4
    %int1_13251 = torch.constant.int 1
    %int128_13252 = torch.constant.int 128
    %15719 = torch.prim.ListConstruct %int4_13250, %2482, %int1_13251, %int128_13252 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15720 = torch.aten.view %15612, %15719 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13253 = torch.constant.int 4
    %int1_13254 = torch.constant.int 1
    %int128_13255 = torch.constant.int 128
    %15721 = torch.prim.ListConstruct %int4_13253, %2482, %int1_13254, %int128_13255 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15722 = torch.aten.view %15618, %15721 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13256 = torch.constant.int 4
    %int1_13257 = torch.constant.int 1
    %int128_13258 = torch.constant.int 128
    %15723 = torch.prim.ListConstruct %int4_13256, %2482, %int1_13257, %int128_13258 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15724 = torch.aten.view %15624, %15723 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13259 = torch.constant.int 4
    %int1_13260 = torch.constant.int 1
    %int128_13261 = torch.constant.int 128
    %15725 = torch.prim.ListConstruct %int4_13259, %2482, %int1_13260, %int128_13261 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15726 = torch.aten.view %15630, %15725 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13262 = torch.constant.int 4
    %int1_13263 = torch.constant.int 1
    %int128_13264 = torch.constant.int 128
    %15727 = torch.prim.ListConstruct %int4_13262, %2482, %int1_13263, %int128_13264 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15728 = torch.aten.view %15652, %15727 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13265 = torch.constant.int 4
    %int1_13266 = torch.constant.int 1
    %int128_13267 = torch.constant.int 128
    %15729 = torch.prim.ListConstruct %int4_13265, %2482, %int1_13266, %int128_13267 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15730 = torch.aten.view %15658, %15729 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13268 = torch.constant.int 4
    %int1_13269 = torch.constant.int 1
    %int128_13270 = torch.constant.int 128
    %15731 = torch.prim.ListConstruct %int4_13268, %2482, %int1_13269, %int128_13270 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15732 = torch.aten.view %15664, %15731 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13271 = torch.constant.int 4
    %int1_13272 = torch.constant.int 1
    %int128_13273 = torch.constant.int 128
    %15733 = torch.prim.ListConstruct %int4_13271, %2482, %int1_13272, %int128_13273 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15734 = torch.aten.view %15670, %15733 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13274 = torch.constant.int 4
    %int1_13275 = torch.constant.int 1
    %int128_13276 = torch.constant.int 128
    %15735 = torch.prim.ListConstruct %int4_13274, %2482, %int1_13275, %int128_13276 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15736 = torch.aten.view %15676, %15735 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13277 = torch.constant.int 4
    %int1_13278 = torch.constant.int 1
    %int128_13279 = torch.constant.int 128
    %15737 = torch.prim.ListConstruct %int4_13277, %2482, %int1_13278, %int128_13279 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15738 = torch.aten.view %15682, %15737 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13280 = torch.constant.int 4
    %int1_13281 = torch.constant.int 1
    %int128_13282 = torch.constant.int 128
    %15739 = torch.prim.ListConstruct %int4_13280, %2482, %int1_13281, %int128_13282 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15740 = torch.aten.view %15688, %15739 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_13283 = torch.constant.int 4
    %int1_13284 = torch.constant.int 1
    %int128_13285 = torch.constant.int 128
    %15741 = torch.prim.ListConstruct %int4_13283, %2482, %int1_13284, %int128_13285 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %15742 = torch.aten.view %15694, %15741 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_13286 = torch.constant.int 131072
    %none_13287 = torch.constant.none
    %none_13288 = torch.constant.none
    %cpu_13289 = torch.constant.device "cpu"
    %false_13290 = torch.constant.bool false
    %15743 = torch.aten.arange %int131072_13286, %none_13287, %none_13288, %cpu_13289, %false_13290 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_13291 = torch.constant.int 0
    %int128_13292 = torch.constant.int 128
    %int2_13293 = torch.constant.int 2
    %none_13294 = torch.constant.none
    %none_13295 = torch.constant.none
    %cpu_13296 = torch.constant.device "cpu"
    %false_13297 = torch.constant.bool false
    %15744 = torch.aten.arange.start_step %int0_13291, %int128_13292, %int2_13293, %none_13294, %none_13295, %cpu_13296, %false_13297 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_13298 = torch.constant.int 0
    %int0_13299 = torch.constant.int 0
    %int64_13300 = torch.constant.int 64
    %int1_13301 = torch.constant.int 1
    %15745 = torch.aten.slice.Tensor %15744, %int0_13298, %int0_13299, %int64_13300, %int1_13301 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_13302 = torch.constant.int 6
    %15746 = torch.prims.convert_element_type %15745, %int6_13302 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_13303 = torch.constant.int 128
    %15747 = torch.aten.div.Scalar %15746, %int128_13303 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_13304 = torch.constant.float 5.000000e+05
    %15748 = torch.aten.pow.Scalar %float5.000000e05_13304, %15747 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %15749 = torch.aten.reciprocal %15748 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_13305 = torch.constant.float 1.000000e+00
    %15750 = torch.aten.mul.Scalar %15749, %float1.000000e00_13305 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_13306 = torch.constant.int 131072
    %int1_13307 = torch.constant.int 1
    %15751 = torch.prim.ListConstruct %int131072_13306, %int1_13307 : (!torch.int, !torch.int) -> !torch.list<int>
    %15752 = torch.aten.view %15743, %15751 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %15753 = torch.aten.mul.Tensor %15752, %15750 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %15754 = torch.aten.cos %15753 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %15755 = torch.aten.sin %15753 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %15756 = torch.aten.complex %15754, %15755 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %15757 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15758 = flow.tensor.transfer %15757 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %15759 = torch_c.from_builtin_tensor %15758 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15760 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15761 = flow.tensor.transfer %15760 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %15762 = torch_c.from_builtin_tensor %15761 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15763 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15764 = flow.tensor.transfer %15763 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %15765 = torch_c.from_builtin_tensor %15764 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15766 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15767 = flow.tensor.transfer %15766 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %15768 = torch_c.from_builtin_tensor %15767 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15769 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15770 = flow.tensor.transfer %15769 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %15771 = torch_c.from_builtin_tensor %15770 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15772 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15773 = flow.tensor.transfer %15772 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %15774 = torch_c.from_builtin_tensor %15773 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15775 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15776 = flow.tensor.transfer %15775 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %15777 = torch_c.from_builtin_tensor %15776 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15778 = torch_c.to_builtin_tensor %15756 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15779 = flow.tensor.transfer %15778 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %15780 = torch_c.from_builtin_tensor %15779 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_13308 = torch.constant.int 1
    %15781 = torch.aten.size.int %15524, %int1_13308 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13309 = torch.constant.int 0
    %15782 = torch.aten.add.int %int0_13309, %15781 : !torch.int, !torch.int -> !torch.int
    %int0_13310 = torch.constant.int 0
    %int0_13311 = torch.constant.int 0
    %int1_13312 = torch.constant.int 1
    %15783 = torch.aten.slice.Tensor %15759, %int0_13310, %int0_13311, %15782, %int1_13312 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15783, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13313 = torch.constant.int 1
    %int0_13314 = torch.constant.int 0
    %int9223372036854775807_13315 = torch.constant.int 9223372036854775807
    %int1_13316 = torch.constant.int 1
    %15784 = torch.aten.slice.Tensor %15783, %int1_13313, %int0_13314, %int9223372036854775807_13315, %int1_13316 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15784, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13317 = torch.constant.int 0
    %15785 = torch.aten.unsqueeze %15784, %int0_13317 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15785, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13318 = torch.constant.int 2
    %15786 = torch.aten.unsqueeze %15785, %int2_13318 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15786, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13319 = torch.constant.int 3
    %int0_13320 = torch.constant.int 0
    %int9223372036854775807_13321 = torch.constant.int 9223372036854775807
    %int1_13322 = torch.constant.int 1
    %15787 = torch.aten.slice.Tensor %15786, %int3_13319, %int0_13320, %int9223372036854775807_13321, %int1_13322 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15787, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15788 = torch_c.to_builtin_tensor %15696 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13323 = arith.constant 1 : index
    %dim_13324 = tensor.dim %15788, %c1_13323 : tensor<4x?x4x128xf16>
    %15789 = flow.tensor.bitcast %15788 : tensor<4x?x4x128xf16>{%dim_13324} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13324}
    %15790 = torch_c.from_builtin_tensor %15789 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15791 = torch.aten.mul.Tensor %15790, %15787 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15792 = torch_c.to_builtin_tensor %15791 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13325 = arith.constant 1 : index
    %dim_13326 = tensor.dim %15792, %c1_13325 : tensor<4x?x4x64xcomplex<f32>>
    %15793 = flow.tensor.bitcast %15792 : tensor<4x?x4x64xcomplex<f32>>{%dim_13326} -> tensor<4x?x4x128xf32>{%dim_13326}
    %15794 = torch_c.from_builtin_tensor %15793 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13327 = torch.constant.int 5
    %15795 = torch.prims.convert_element_type %15794, %int5_13327 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13328 = torch.constant.int 1
    %15796 = torch.aten.size.int %15530, %int1_13328 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13329 = torch.constant.int 0
    %15797 = torch.aten.add.int %int0_13329, %15796 : !torch.int, !torch.int -> !torch.int
    %int0_13330 = torch.constant.int 0
    %int0_13331 = torch.constant.int 0
    %int1_13332 = torch.constant.int 1
    %15798 = torch.aten.slice.Tensor %15762, %int0_13330, %int0_13331, %15797, %int1_13332 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15798, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13333 = torch.constant.int 1
    %int0_13334 = torch.constant.int 0
    %int9223372036854775807_13335 = torch.constant.int 9223372036854775807
    %int1_13336 = torch.constant.int 1
    %15799 = torch.aten.slice.Tensor %15798, %int1_13333, %int0_13334, %int9223372036854775807_13335, %int1_13336 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15799, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13337 = torch.constant.int 0
    %15800 = torch.aten.unsqueeze %15799, %int0_13337 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15800, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13338 = torch.constant.int 2
    %15801 = torch.aten.unsqueeze %15800, %int2_13338 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15801, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13339 = torch.constant.int 3
    %int0_13340 = torch.constant.int 0
    %int9223372036854775807_13341 = torch.constant.int 9223372036854775807
    %int1_13342 = torch.constant.int 1
    %15802 = torch.aten.slice.Tensor %15801, %int3_13339, %int0_13340, %int9223372036854775807_13341, %int1_13342 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15802, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15803 = torch_c.to_builtin_tensor %15698 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13343 = arith.constant 1 : index
    %dim_13344 = tensor.dim %15803, %c1_13343 : tensor<4x?x4x128xf16>
    %15804 = flow.tensor.bitcast %15803 : tensor<4x?x4x128xf16>{%dim_13344} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13344}
    %15805 = torch_c.from_builtin_tensor %15804 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15806 = torch.aten.mul.Tensor %15805, %15802 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15807 = torch_c.to_builtin_tensor %15806 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13345 = arith.constant 1 : index
    %dim_13346 = tensor.dim %15807, %c1_13345 : tensor<4x?x4x64xcomplex<f32>>
    %15808 = flow.tensor.bitcast %15807 : tensor<4x?x4x64xcomplex<f32>>{%dim_13346} -> tensor<4x?x4x128xf32>{%dim_13346}
    %15809 = torch_c.from_builtin_tensor %15808 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13347 = torch.constant.int 5
    %15810 = torch.prims.convert_element_type %15809, %int5_13347 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13348 = torch.constant.int 1
    %15811 = torch.aten.size.int %15536, %int1_13348 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13349 = torch.constant.int 0
    %15812 = torch.aten.add.int %int0_13349, %15811 : !torch.int, !torch.int -> !torch.int
    %int0_13350 = torch.constant.int 0
    %int0_13351 = torch.constant.int 0
    %int1_13352 = torch.constant.int 1
    %15813 = torch.aten.slice.Tensor %15765, %int0_13350, %int0_13351, %15812, %int1_13352 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15813, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13353 = torch.constant.int 1
    %int0_13354 = torch.constant.int 0
    %int9223372036854775807_13355 = torch.constant.int 9223372036854775807
    %int1_13356 = torch.constant.int 1
    %15814 = torch.aten.slice.Tensor %15813, %int1_13353, %int0_13354, %int9223372036854775807_13355, %int1_13356 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15814, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13357 = torch.constant.int 0
    %15815 = torch.aten.unsqueeze %15814, %int0_13357 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15815, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13358 = torch.constant.int 2
    %15816 = torch.aten.unsqueeze %15815, %int2_13358 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15816, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13359 = torch.constant.int 3
    %int0_13360 = torch.constant.int 0
    %int9223372036854775807_13361 = torch.constant.int 9223372036854775807
    %int1_13362 = torch.constant.int 1
    %15817 = torch.aten.slice.Tensor %15816, %int3_13359, %int0_13360, %int9223372036854775807_13361, %int1_13362 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15817, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15818 = torch_c.to_builtin_tensor %15700 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13363 = arith.constant 1 : index
    %dim_13364 = tensor.dim %15818, %c1_13363 : tensor<4x?x4x128xf16>
    %15819 = flow.tensor.bitcast %15818 : tensor<4x?x4x128xf16>{%dim_13364} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13364}
    %15820 = torch_c.from_builtin_tensor %15819 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15821 = torch.aten.mul.Tensor %15820, %15817 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15822 = torch_c.to_builtin_tensor %15821 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13365 = arith.constant 1 : index
    %dim_13366 = tensor.dim %15822, %c1_13365 : tensor<4x?x4x64xcomplex<f32>>
    %15823 = flow.tensor.bitcast %15822 : tensor<4x?x4x64xcomplex<f32>>{%dim_13366} -> tensor<4x?x4x128xf32>{%dim_13366}
    %15824 = torch_c.from_builtin_tensor %15823 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13367 = torch.constant.int 5
    %15825 = torch.prims.convert_element_type %15824, %int5_13367 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13368 = torch.constant.int 1
    %15826 = torch.aten.size.int %15542, %int1_13368 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13369 = torch.constant.int 0
    %15827 = torch.aten.add.int %int0_13369, %15826 : !torch.int, !torch.int -> !torch.int
    %int0_13370 = torch.constant.int 0
    %int0_13371 = torch.constant.int 0
    %int1_13372 = torch.constant.int 1
    %15828 = torch.aten.slice.Tensor %15768, %int0_13370, %int0_13371, %15827, %int1_13372 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15828, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13373 = torch.constant.int 1
    %int0_13374 = torch.constant.int 0
    %int9223372036854775807_13375 = torch.constant.int 9223372036854775807
    %int1_13376 = torch.constant.int 1
    %15829 = torch.aten.slice.Tensor %15828, %int1_13373, %int0_13374, %int9223372036854775807_13375, %int1_13376 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15829, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13377 = torch.constant.int 0
    %15830 = torch.aten.unsqueeze %15829, %int0_13377 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15830, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13378 = torch.constant.int 2
    %15831 = torch.aten.unsqueeze %15830, %int2_13378 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15831, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13379 = torch.constant.int 3
    %int0_13380 = torch.constant.int 0
    %int9223372036854775807_13381 = torch.constant.int 9223372036854775807
    %int1_13382 = torch.constant.int 1
    %15832 = torch.aten.slice.Tensor %15831, %int3_13379, %int0_13380, %int9223372036854775807_13381, %int1_13382 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15832, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15833 = torch_c.to_builtin_tensor %15702 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13383 = arith.constant 1 : index
    %dim_13384 = tensor.dim %15833, %c1_13383 : tensor<4x?x4x128xf16>
    %15834 = flow.tensor.bitcast %15833 : tensor<4x?x4x128xf16>{%dim_13384} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13384}
    %15835 = torch_c.from_builtin_tensor %15834 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15836 = torch.aten.mul.Tensor %15835, %15832 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15837 = torch_c.to_builtin_tensor %15836 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13385 = arith.constant 1 : index
    %dim_13386 = tensor.dim %15837, %c1_13385 : tensor<4x?x4x64xcomplex<f32>>
    %15838 = flow.tensor.bitcast %15837 : tensor<4x?x4x64xcomplex<f32>>{%dim_13386} -> tensor<4x?x4x128xf32>{%dim_13386}
    %15839 = torch_c.from_builtin_tensor %15838 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13387 = torch.constant.int 5
    %15840 = torch.prims.convert_element_type %15839, %int5_13387 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13388 = torch.constant.int 1
    %15841 = torch.aten.size.int %15548, %int1_13388 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13389 = torch.constant.int 0
    %15842 = torch.aten.add.int %int0_13389, %15841 : !torch.int, !torch.int -> !torch.int
    %int0_13390 = torch.constant.int 0
    %int0_13391 = torch.constant.int 0
    %int1_13392 = torch.constant.int 1
    %15843 = torch.aten.slice.Tensor %15771, %int0_13390, %int0_13391, %15842, %int1_13392 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15843, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13393 = torch.constant.int 1
    %int0_13394 = torch.constant.int 0
    %int9223372036854775807_13395 = torch.constant.int 9223372036854775807
    %int1_13396 = torch.constant.int 1
    %15844 = torch.aten.slice.Tensor %15843, %int1_13393, %int0_13394, %int9223372036854775807_13395, %int1_13396 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15844, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13397 = torch.constant.int 0
    %15845 = torch.aten.unsqueeze %15844, %int0_13397 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15845, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13398 = torch.constant.int 2
    %15846 = torch.aten.unsqueeze %15845, %int2_13398 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15846, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13399 = torch.constant.int 3
    %int0_13400 = torch.constant.int 0
    %int9223372036854775807_13401 = torch.constant.int 9223372036854775807
    %int1_13402 = torch.constant.int 1
    %15847 = torch.aten.slice.Tensor %15846, %int3_13399, %int0_13400, %int9223372036854775807_13401, %int1_13402 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15847, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15848 = torch_c.to_builtin_tensor %15704 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13403 = arith.constant 1 : index
    %dim_13404 = tensor.dim %15848, %c1_13403 : tensor<4x?x4x128xf16>
    %15849 = flow.tensor.bitcast %15848 : tensor<4x?x4x128xf16>{%dim_13404} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13404}
    %15850 = torch_c.from_builtin_tensor %15849 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15851 = torch.aten.mul.Tensor %15850, %15847 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15852 = torch_c.to_builtin_tensor %15851 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13405 = arith.constant 1 : index
    %dim_13406 = tensor.dim %15852, %c1_13405 : tensor<4x?x4x64xcomplex<f32>>
    %15853 = flow.tensor.bitcast %15852 : tensor<4x?x4x64xcomplex<f32>>{%dim_13406} -> tensor<4x?x4x128xf32>{%dim_13406}
    %15854 = torch_c.from_builtin_tensor %15853 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13407 = torch.constant.int 5
    %15855 = torch.prims.convert_element_type %15854, %int5_13407 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13408 = torch.constant.int 1
    %15856 = torch.aten.size.int %15554, %int1_13408 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13409 = torch.constant.int 0
    %15857 = torch.aten.add.int %int0_13409, %15856 : !torch.int, !torch.int -> !torch.int
    %int0_13410 = torch.constant.int 0
    %int0_13411 = torch.constant.int 0
    %int1_13412 = torch.constant.int 1
    %15858 = torch.aten.slice.Tensor %15774, %int0_13410, %int0_13411, %15857, %int1_13412 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15858, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13413 = torch.constant.int 1
    %int0_13414 = torch.constant.int 0
    %int9223372036854775807_13415 = torch.constant.int 9223372036854775807
    %int1_13416 = torch.constant.int 1
    %15859 = torch.aten.slice.Tensor %15858, %int1_13413, %int0_13414, %int9223372036854775807_13415, %int1_13416 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15859, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13417 = torch.constant.int 0
    %15860 = torch.aten.unsqueeze %15859, %int0_13417 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15860, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13418 = torch.constant.int 2
    %15861 = torch.aten.unsqueeze %15860, %int2_13418 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15861, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13419 = torch.constant.int 3
    %int0_13420 = torch.constant.int 0
    %int9223372036854775807_13421 = torch.constant.int 9223372036854775807
    %int1_13422 = torch.constant.int 1
    %15862 = torch.aten.slice.Tensor %15861, %int3_13419, %int0_13420, %int9223372036854775807_13421, %int1_13422 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15862, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15863 = torch_c.to_builtin_tensor %15706 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13423 = arith.constant 1 : index
    %dim_13424 = tensor.dim %15863, %c1_13423 : tensor<4x?x4x128xf16>
    %15864 = flow.tensor.bitcast %15863 : tensor<4x?x4x128xf16>{%dim_13424} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13424}
    %15865 = torch_c.from_builtin_tensor %15864 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15866 = torch.aten.mul.Tensor %15865, %15862 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15867 = torch_c.to_builtin_tensor %15866 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13425 = arith.constant 1 : index
    %dim_13426 = tensor.dim %15867, %c1_13425 : tensor<4x?x4x64xcomplex<f32>>
    %15868 = flow.tensor.bitcast %15867 : tensor<4x?x4x64xcomplex<f32>>{%dim_13426} -> tensor<4x?x4x128xf32>{%dim_13426}
    %15869 = torch_c.from_builtin_tensor %15868 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13427 = torch.constant.int 5
    %15870 = torch.prims.convert_element_type %15869, %int5_13427 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13428 = torch.constant.int 1
    %15871 = torch.aten.size.int %15560, %int1_13428 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13429 = torch.constant.int 0
    %15872 = torch.aten.add.int %int0_13429, %15871 : !torch.int, !torch.int -> !torch.int
    %int0_13430 = torch.constant.int 0
    %int0_13431 = torch.constant.int 0
    %int1_13432 = torch.constant.int 1
    %15873 = torch.aten.slice.Tensor %15777, %int0_13430, %int0_13431, %15872, %int1_13432 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15873, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13433 = torch.constant.int 1
    %int0_13434 = torch.constant.int 0
    %int9223372036854775807_13435 = torch.constant.int 9223372036854775807
    %int1_13436 = torch.constant.int 1
    %15874 = torch.aten.slice.Tensor %15873, %int1_13433, %int0_13434, %int9223372036854775807_13435, %int1_13436 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15874, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13437 = torch.constant.int 0
    %15875 = torch.aten.unsqueeze %15874, %int0_13437 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15875, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13438 = torch.constant.int 2
    %15876 = torch.aten.unsqueeze %15875, %int2_13438 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15876, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13439 = torch.constant.int 3
    %int0_13440 = torch.constant.int 0
    %int9223372036854775807_13441 = torch.constant.int 9223372036854775807
    %int1_13442 = torch.constant.int 1
    %15877 = torch.aten.slice.Tensor %15876, %int3_13439, %int0_13440, %int9223372036854775807_13441, %int1_13442 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15877, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15878 = torch_c.to_builtin_tensor %15708 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13443 = arith.constant 1 : index
    %dim_13444 = tensor.dim %15878, %c1_13443 : tensor<4x?x4x128xf16>
    %15879 = flow.tensor.bitcast %15878 : tensor<4x?x4x128xf16>{%dim_13444} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13444}
    %15880 = torch_c.from_builtin_tensor %15879 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15881 = torch.aten.mul.Tensor %15880, %15877 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15882 = torch_c.to_builtin_tensor %15881 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13445 = arith.constant 1 : index
    %dim_13446 = tensor.dim %15882, %c1_13445 : tensor<4x?x4x64xcomplex<f32>>
    %15883 = flow.tensor.bitcast %15882 : tensor<4x?x4x64xcomplex<f32>>{%dim_13446} -> tensor<4x?x4x128xf32>{%dim_13446}
    %15884 = torch_c.from_builtin_tensor %15883 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13447 = torch.constant.int 5
    %15885 = torch.prims.convert_element_type %15884, %int5_13447 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_13448 = torch.constant.int 1
    %15886 = torch.aten.size.int %15566, %int1_13448 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_13449 = torch.constant.int 0
    %15887 = torch.aten.add.int %int0_13449, %15886 : !torch.int, !torch.int -> !torch.int
    %int0_13450 = torch.constant.int 0
    %int0_13451 = torch.constant.int 0
    %int1_13452 = torch.constant.int 1
    %15888 = torch.aten.slice.Tensor %15780, %int0_13450, %int0_13451, %15887, %int1_13452 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15888, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13453 = torch.constant.int 1
    %int0_13454 = torch.constant.int 0
    %int9223372036854775807_13455 = torch.constant.int 9223372036854775807
    %int1_13456 = torch.constant.int 1
    %15889 = torch.aten.slice.Tensor %15888, %int1_13453, %int0_13454, %int9223372036854775807_13455, %int1_13456 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15889, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13457 = torch.constant.int 0
    %15890 = torch.aten.unsqueeze %15889, %int0_13457 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15890, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13458 = torch.constant.int 2
    %15891 = torch.aten.unsqueeze %15890, %int2_13458 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15891, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13459 = torch.constant.int 3
    %int0_13460 = torch.constant.int 0
    %int9223372036854775807_13461 = torch.constant.int 9223372036854775807
    %int1_13462 = torch.constant.int 1
    %15892 = torch.aten.slice.Tensor %15891, %int3_13459, %int0_13460, %int9223372036854775807_13461, %int1_13462 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15892, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15893 = torch_c.to_builtin_tensor %15710 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_13463 = arith.constant 1 : index
    %dim_13464 = tensor.dim %15893, %c1_13463 : tensor<4x?x4x128xf16>
    %15894 = flow.tensor.bitcast %15893 : tensor<4x?x4x128xf16>{%dim_13464} -> tensor<4x?x4x64xcomplex<f16>>{%dim_13464}
    %15895 = torch_c.from_builtin_tensor %15894 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %15895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %15896 = torch.aten.mul.Tensor %15895, %15892 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %15896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %15897 = torch_c.to_builtin_tensor %15896 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_13465 = arith.constant 1 : index
    %dim_13466 = tensor.dim %15897, %c1_13465 : tensor<4x?x4x64xcomplex<f32>>
    %15898 = flow.tensor.bitcast %15897 : tensor<4x?x4x64xcomplex<f32>>{%dim_13466} -> tensor<4x?x4x128xf32>{%dim_13466}
    %15899 = torch_c.from_builtin_tensor %15898 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %15899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_13467 = torch.constant.int 5
    %15900 = torch.prims.convert_element_type %15899, %int5_13467 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %15900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_13468 = torch.constant.int 131072
    %none_13469 = torch.constant.none
    %none_13470 = torch.constant.none
    %cpu_13471 = torch.constant.device "cpu"
    %false_13472 = torch.constant.bool false
    %15901 = torch.aten.arange %int131072_13468, %none_13469, %none_13470, %cpu_13471, %false_13472 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_13473 = torch.constant.int 0
    %int128_13474 = torch.constant.int 128
    %int2_13475 = torch.constant.int 2
    %none_13476 = torch.constant.none
    %none_13477 = torch.constant.none
    %cpu_13478 = torch.constant.device "cpu"
    %false_13479 = torch.constant.bool false
    %15902 = torch.aten.arange.start_step %int0_13473, %int128_13474, %int2_13475, %none_13476, %none_13477, %cpu_13478, %false_13479 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_13480 = torch.constant.int 0
    %int0_13481 = torch.constant.int 0
    %int64_13482 = torch.constant.int 64
    %int1_13483 = torch.constant.int 1
    %15903 = torch.aten.slice.Tensor %15902, %int0_13480, %int0_13481, %int64_13482, %int1_13483 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_13484 = torch.constant.int 6
    %15904 = torch.prims.convert_element_type %15903, %int6_13484 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_13485 = torch.constant.int 128
    %15905 = torch.aten.div.Scalar %15904, %int128_13485 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_13486 = torch.constant.float 5.000000e+05
    %15906 = torch.aten.pow.Scalar %float5.000000e05_13486, %15905 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %15907 = torch.aten.reciprocal %15906 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_13487 = torch.constant.float 1.000000e+00
    %15908 = torch.aten.mul.Scalar %15907, %float1.000000e00_13487 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_13488 = torch.constant.int 131072
    %int1_13489 = torch.constant.int 1
    %15909 = torch.prim.ListConstruct %int131072_13488, %int1_13489 : (!torch.int, !torch.int) -> !torch.list<int>
    %15910 = torch.aten.view %15901, %15909 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %15911 = torch.aten.mul.Tensor %15910, %15908 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %15912 = torch.aten.cos %15911 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %15913 = torch.aten.sin %15911 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %15914 = torch.aten.complex %15912, %15913 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %15915 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15916 = flow.tensor.transfer %15915 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %15917 = torch_c.from_builtin_tensor %15916 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15918 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15919 = flow.tensor.transfer %15918 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %15920 = torch_c.from_builtin_tensor %15919 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15921 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15922 = flow.tensor.transfer %15921 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %15923 = torch_c.from_builtin_tensor %15922 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15924 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15925 = flow.tensor.transfer %15924 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %15926 = torch_c.from_builtin_tensor %15925 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15927 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15928 = flow.tensor.transfer %15927 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %15929 = torch_c.from_builtin_tensor %15928 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15930 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15931 = flow.tensor.transfer %15930 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %15932 = torch_c.from_builtin_tensor %15931 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15933 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15934 = flow.tensor.transfer %15933 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %15935 = torch_c.from_builtin_tensor %15934 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %15936 = torch_c.to_builtin_tensor %15914 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %15937 = flow.tensor.transfer %15936 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %15938 = torch_c.from_builtin_tensor %15937 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_13490 = torch.constant.int 1
    %15939 = torch.aten.size.int %15588, %int1_13490 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13491 = torch.constant.int 0
    %15940 = torch.aten.add.int %int0_13491, %15939 : !torch.int, !torch.int -> !torch.int
    %int0_13492 = torch.constant.int 0
    %int0_13493 = torch.constant.int 0
    %int1_13494 = torch.constant.int 1
    %15941 = torch.aten.slice.Tensor %15917, %int0_13492, %int0_13493, %15940, %int1_13494 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15941, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13495 = torch.constant.int 1
    %int0_13496 = torch.constant.int 0
    %int9223372036854775807_13497 = torch.constant.int 9223372036854775807
    %int1_13498 = torch.constant.int 1
    %15942 = torch.aten.slice.Tensor %15941, %int1_13495, %int0_13496, %int9223372036854775807_13497, %int1_13498 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15942, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13499 = torch.constant.int 0
    %15943 = torch.aten.unsqueeze %15942, %int0_13499 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15943, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13500 = torch.constant.int 2
    %15944 = torch.aten.unsqueeze %15943, %int2_13500 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15944, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13501 = torch.constant.int 3
    %int0_13502 = torch.constant.int 0
    %int9223372036854775807_13503 = torch.constant.int 9223372036854775807
    %int1_13504 = torch.constant.int 1
    %15945 = torch.aten.slice.Tensor %15944, %int3_13501, %int0_13502, %int9223372036854775807_13503, %int1_13504 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15945, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15946 = torch_c.to_builtin_tensor %15712 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13505 = arith.constant 1 : index
    %dim_13506 = tensor.dim %15946, %c1_13505 : tensor<4x?x1x128xf16>
    %15947 = flow.tensor.bitcast %15946 : tensor<4x?x1x128xf16>{%dim_13506} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13506}
    %15948 = torch_c.from_builtin_tensor %15947 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %15948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %15949 = torch.aten.mul.Tensor %15948, %15945 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %15950 = torch_c.to_builtin_tensor %15949 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13507 = arith.constant 1 : index
    %dim_13508 = tensor.dim %15950, %c1_13507 : tensor<4x?x1x64xcomplex<f32>>
    %15951 = flow.tensor.bitcast %15950 : tensor<4x?x1x64xcomplex<f32>>{%dim_13508} -> tensor<4x?x1x128xf32>{%dim_13508}
    %15952 = torch_c.from_builtin_tensor %15951 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %15952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13509 = torch.constant.int 5
    %15953 = torch.prims.convert_element_type %15952, %int5_13509 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13510 = torch.constant.int 1
    %15954 = torch.aten.size.int %15594, %int1_13510 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13511 = torch.constant.int 0
    %15955 = torch.aten.add.int %int0_13511, %15954 : !torch.int, !torch.int -> !torch.int
    %int0_13512 = torch.constant.int 0
    %int0_13513 = torch.constant.int 0
    %int1_13514 = torch.constant.int 1
    %15956 = torch.aten.slice.Tensor %15920, %int0_13512, %int0_13513, %15955, %int1_13514 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15956, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13515 = torch.constant.int 1
    %int0_13516 = torch.constant.int 0
    %int9223372036854775807_13517 = torch.constant.int 9223372036854775807
    %int1_13518 = torch.constant.int 1
    %15957 = torch.aten.slice.Tensor %15956, %int1_13515, %int0_13516, %int9223372036854775807_13517, %int1_13518 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15957, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13519 = torch.constant.int 0
    %15958 = torch.aten.unsqueeze %15957, %int0_13519 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15958, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13520 = torch.constant.int 2
    %15959 = torch.aten.unsqueeze %15958, %int2_13520 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15959, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13521 = torch.constant.int 3
    %int0_13522 = torch.constant.int 0
    %int9223372036854775807_13523 = torch.constant.int 9223372036854775807
    %int1_13524 = torch.constant.int 1
    %15960 = torch.aten.slice.Tensor %15959, %int3_13521, %int0_13522, %int9223372036854775807_13523, %int1_13524 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15960, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15961 = torch_c.to_builtin_tensor %15714 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13525 = arith.constant 1 : index
    %dim_13526 = tensor.dim %15961, %c1_13525 : tensor<4x?x1x128xf16>
    %15962 = flow.tensor.bitcast %15961 : tensor<4x?x1x128xf16>{%dim_13526} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13526}
    %15963 = torch_c.from_builtin_tensor %15962 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %15963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %15964 = torch.aten.mul.Tensor %15963, %15960 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %15965 = torch_c.to_builtin_tensor %15964 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13527 = arith.constant 1 : index
    %dim_13528 = tensor.dim %15965, %c1_13527 : tensor<4x?x1x64xcomplex<f32>>
    %15966 = flow.tensor.bitcast %15965 : tensor<4x?x1x64xcomplex<f32>>{%dim_13528} -> tensor<4x?x1x128xf32>{%dim_13528}
    %15967 = torch_c.from_builtin_tensor %15966 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %15967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13529 = torch.constant.int 5
    %15968 = torch.prims.convert_element_type %15967, %int5_13529 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13530 = torch.constant.int 1
    %15969 = torch.aten.size.int %15600, %int1_13530 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13531 = torch.constant.int 0
    %15970 = torch.aten.add.int %int0_13531, %15969 : !torch.int, !torch.int -> !torch.int
    %int0_13532 = torch.constant.int 0
    %int0_13533 = torch.constant.int 0
    %int1_13534 = torch.constant.int 1
    %15971 = torch.aten.slice.Tensor %15923, %int0_13532, %int0_13533, %15970, %int1_13534 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15971, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13535 = torch.constant.int 1
    %int0_13536 = torch.constant.int 0
    %int9223372036854775807_13537 = torch.constant.int 9223372036854775807
    %int1_13538 = torch.constant.int 1
    %15972 = torch.aten.slice.Tensor %15971, %int1_13535, %int0_13536, %int9223372036854775807_13537, %int1_13538 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15972, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13539 = torch.constant.int 0
    %15973 = torch.aten.unsqueeze %15972, %int0_13539 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15973, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13540 = torch.constant.int 2
    %15974 = torch.aten.unsqueeze %15973, %int2_13540 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15974, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13541 = torch.constant.int 3
    %int0_13542 = torch.constant.int 0
    %int9223372036854775807_13543 = torch.constant.int 9223372036854775807
    %int1_13544 = torch.constant.int 1
    %15975 = torch.aten.slice.Tensor %15974, %int3_13541, %int0_13542, %int9223372036854775807_13543, %int1_13544 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15975, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15976 = torch_c.to_builtin_tensor %15716 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13545 = arith.constant 1 : index
    %dim_13546 = tensor.dim %15976, %c1_13545 : tensor<4x?x1x128xf16>
    %15977 = flow.tensor.bitcast %15976 : tensor<4x?x1x128xf16>{%dim_13546} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13546}
    %15978 = torch_c.from_builtin_tensor %15977 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %15978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %15979 = torch.aten.mul.Tensor %15978, %15975 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %15980 = torch_c.to_builtin_tensor %15979 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13547 = arith.constant 1 : index
    %dim_13548 = tensor.dim %15980, %c1_13547 : tensor<4x?x1x64xcomplex<f32>>
    %15981 = flow.tensor.bitcast %15980 : tensor<4x?x1x64xcomplex<f32>>{%dim_13548} -> tensor<4x?x1x128xf32>{%dim_13548}
    %15982 = torch_c.from_builtin_tensor %15981 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %15982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13549 = torch.constant.int 5
    %15983 = torch.prims.convert_element_type %15982, %int5_13549 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13550 = torch.constant.int 1
    %15984 = torch.aten.size.int %15606, %int1_13550 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13551 = torch.constant.int 0
    %15985 = torch.aten.add.int %int0_13551, %15984 : !torch.int, !torch.int -> !torch.int
    %int0_13552 = torch.constant.int 0
    %int0_13553 = torch.constant.int 0
    %int1_13554 = torch.constant.int 1
    %15986 = torch.aten.slice.Tensor %15926, %int0_13552, %int0_13553, %15985, %int1_13554 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15986, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13555 = torch.constant.int 1
    %int0_13556 = torch.constant.int 0
    %int9223372036854775807_13557 = torch.constant.int 9223372036854775807
    %int1_13558 = torch.constant.int 1
    %15987 = torch.aten.slice.Tensor %15986, %int1_13555, %int0_13556, %int9223372036854775807_13557, %int1_13558 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %15987, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13559 = torch.constant.int 0
    %15988 = torch.aten.unsqueeze %15987, %int0_13559 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %15988, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13560 = torch.constant.int 2
    %15989 = torch.aten.unsqueeze %15988, %int2_13560 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15989, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13561 = torch.constant.int 3
    %int0_13562 = torch.constant.int 0
    %int9223372036854775807_13563 = torch.constant.int 9223372036854775807
    %int1_13564 = torch.constant.int 1
    %15990 = torch.aten.slice.Tensor %15989, %int3_13561, %int0_13562, %int9223372036854775807_13563, %int1_13564 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15990, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %15991 = torch_c.to_builtin_tensor %15718 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13565 = arith.constant 1 : index
    %dim_13566 = tensor.dim %15991, %c1_13565 : tensor<4x?x1x128xf16>
    %15992 = flow.tensor.bitcast %15991 : tensor<4x?x1x128xf16>{%dim_13566} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13566}
    %15993 = torch_c.from_builtin_tensor %15992 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %15993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %15994 = torch.aten.mul.Tensor %15993, %15990 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %15994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %15995 = torch_c.to_builtin_tensor %15994 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13567 = arith.constant 1 : index
    %dim_13568 = tensor.dim %15995, %c1_13567 : tensor<4x?x1x64xcomplex<f32>>
    %15996 = flow.tensor.bitcast %15995 : tensor<4x?x1x64xcomplex<f32>>{%dim_13568} -> tensor<4x?x1x128xf32>{%dim_13568}
    %15997 = torch_c.from_builtin_tensor %15996 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %15997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13569 = torch.constant.int 5
    %15998 = torch.prims.convert_element_type %15997, %int5_13569 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %15998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13570 = torch.constant.int 1
    %15999 = torch.aten.size.int %15612, %int1_13570 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13571 = torch.constant.int 0
    %16000 = torch.aten.add.int %int0_13571, %15999 : !torch.int, !torch.int -> !torch.int
    %int0_13572 = torch.constant.int 0
    %int0_13573 = torch.constant.int 0
    %int1_13574 = torch.constant.int 1
    %16001 = torch.aten.slice.Tensor %15929, %int0_13572, %int0_13573, %16000, %int1_13574 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16001, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13575 = torch.constant.int 1
    %int0_13576 = torch.constant.int 0
    %int9223372036854775807_13577 = torch.constant.int 9223372036854775807
    %int1_13578 = torch.constant.int 1
    %16002 = torch.aten.slice.Tensor %16001, %int1_13575, %int0_13576, %int9223372036854775807_13577, %int1_13578 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16002, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13579 = torch.constant.int 0
    %16003 = torch.aten.unsqueeze %16002, %int0_13579 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %16003, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13580 = torch.constant.int 2
    %16004 = torch.aten.unsqueeze %16003, %int2_13580 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16004, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13581 = torch.constant.int 3
    %int0_13582 = torch.constant.int 0
    %int9223372036854775807_13583 = torch.constant.int 9223372036854775807
    %int1_13584 = torch.constant.int 1
    %16005 = torch.aten.slice.Tensor %16004, %int3_13581, %int0_13582, %int9223372036854775807_13583, %int1_13584 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16005, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %16006 = torch_c.to_builtin_tensor %15720 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13585 = arith.constant 1 : index
    %dim_13586 = tensor.dim %16006, %c1_13585 : tensor<4x?x1x128xf16>
    %16007 = flow.tensor.bitcast %16006 : tensor<4x?x1x128xf16>{%dim_13586} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13586}
    %16008 = torch_c.from_builtin_tensor %16007 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %16008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %16009 = torch.aten.mul.Tensor %16008, %16005 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %16010 = torch_c.to_builtin_tensor %16009 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13587 = arith.constant 1 : index
    %dim_13588 = tensor.dim %16010, %c1_13587 : tensor<4x?x1x64xcomplex<f32>>
    %16011 = flow.tensor.bitcast %16010 : tensor<4x?x1x64xcomplex<f32>>{%dim_13588} -> tensor<4x?x1x128xf32>{%dim_13588}
    %16012 = torch_c.from_builtin_tensor %16011 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %16012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13589 = torch.constant.int 5
    %16013 = torch.prims.convert_element_type %16012, %int5_13589 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %16013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13590 = torch.constant.int 1
    %16014 = torch.aten.size.int %15618, %int1_13590 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13591 = torch.constant.int 0
    %16015 = torch.aten.add.int %int0_13591, %16014 : !torch.int, !torch.int -> !torch.int
    %int0_13592 = torch.constant.int 0
    %int0_13593 = torch.constant.int 0
    %int1_13594 = torch.constant.int 1
    %16016 = torch.aten.slice.Tensor %15932, %int0_13592, %int0_13593, %16015, %int1_13594 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16016, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13595 = torch.constant.int 1
    %int0_13596 = torch.constant.int 0
    %int9223372036854775807_13597 = torch.constant.int 9223372036854775807
    %int1_13598 = torch.constant.int 1
    %16017 = torch.aten.slice.Tensor %16016, %int1_13595, %int0_13596, %int9223372036854775807_13597, %int1_13598 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16017, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13599 = torch.constant.int 0
    %16018 = torch.aten.unsqueeze %16017, %int0_13599 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %16018, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13600 = torch.constant.int 2
    %16019 = torch.aten.unsqueeze %16018, %int2_13600 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16019, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13601 = torch.constant.int 3
    %int0_13602 = torch.constant.int 0
    %int9223372036854775807_13603 = torch.constant.int 9223372036854775807
    %int1_13604 = torch.constant.int 1
    %16020 = torch.aten.slice.Tensor %16019, %int3_13601, %int0_13602, %int9223372036854775807_13603, %int1_13604 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16020, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %16021 = torch_c.to_builtin_tensor %15722 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13605 = arith.constant 1 : index
    %dim_13606 = tensor.dim %16021, %c1_13605 : tensor<4x?x1x128xf16>
    %16022 = flow.tensor.bitcast %16021 : tensor<4x?x1x128xf16>{%dim_13606} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13606}
    %16023 = torch_c.from_builtin_tensor %16022 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %16023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %16024 = torch.aten.mul.Tensor %16023, %16020 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %16025 = torch_c.to_builtin_tensor %16024 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13607 = arith.constant 1 : index
    %dim_13608 = tensor.dim %16025, %c1_13607 : tensor<4x?x1x64xcomplex<f32>>
    %16026 = flow.tensor.bitcast %16025 : tensor<4x?x1x64xcomplex<f32>>{%dim_13608} -> tensor<4x?x1x128xf32>{%dim_13608}
    %16027 = torch_c.from_builtin_tensor %16026 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %16027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13609 = torch.constant.int 5
    %16028 = torch.prims.convert_element_type %16027, %int5_13609 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %16028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13610 = torch.constant.int 1
    %16029 = torch.aten.size.int %15624, %int1_13610 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13611 = torch.constant.int 0
    %16030 = torch.aten.add.int %int0_13611, %16029 : !torch.int, !torch.int -> !torch.int
    %int0_13612 = torch.constant.int 0
    %int0_13613 = torch.constant.int 0
    %int1_13614 = torch.constant.int 1
    %16031 = torch.aten.slice.Tensor %15935, %int0_13612, %int0_13613, %16030, %int1_13614 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16031, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13615 = torch.constant.int 1
    %int0_13616 = torch.constant.int 0
    %int9223372036854775807_13617 = torch.constant.int 9223372036854775807
    %int1_13618 = torch.constant.int 1
    %16032 = torch.aten.slice.Tensor %16031, %int1_13615, %int0_13616, %int9223372036854775807_13617, %int1_13618 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16032, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13619 = torch.constant.int 0
    %16033 = torch.aten.unsqueeze %16032, %int0_13619 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %16033, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13620 = torch.constant.int 2
    %16034 = torch.aten.unsqueeze %16033, %int2_13620 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16034, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13621 = torch.constant.int 3
    %int0_13622 = torch.constant.int 0
    %int9223372036854775807_13623 = torch.constant.int 9223372036854775807
    %int1_13624 = torch.constant.int 1
    %16035 = torch.aten.slice.Tensor %16034, %int3_13621, %int0_13622, %int9223372036854775807_13623, %int1_13624 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16035, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %16036 = torch_c.to_builtin_tensor %15724 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13625 = arith.constant 1 : index
    %dim_13626 = tensor.dim %16036, %c1_13625 : tensor<4x?x1x128xf16>
    %16037 = flow.tensor.bitcast %16036 : tensor<4x?x1x128xf16>{%dim_13626} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13626}
    %16038 = torch_c.from_builtin_tensor %16037 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %16038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %16039 = torch.aten.mul.Tensor %16038, %16035 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %16040 = torch_c.to_builtin_tensor %16039 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13627 = arith.constant 1 : index
    %dim_13628 = tensor.dim %16040, %c1_13627 : tensor<4x?x1x64xcomplex<f32>>
    %16041 = flow.tensor.bitcast %16040 : tensor<4x?x1x64xcomplex<f32>>{%dim_13628} -> tensor<4x?x1x128xf32>{%dim_13628}
    %16042 = torch_c.from_builtin_tensor %16041 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %16042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13629 = torch.constant.int 5
    %16043 = torch.prims.convert_element_type %16042, %int5_13629 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %16043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_13630 = torch.constant.int 1
    %16044 = torch.aten.size.int %15630, %int1_13630 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_13631 = torch.constant.int 0
    %16045 = torch.aten.add.int %int0_13631, %16044 : !torch.int, !torch.int -> !torch.int
    %int0_13632 = torch.constant.int 0
    %int0_13633 = torch.constant.int 0
    %int1_13634 = torch.constant.int 1
    %16046 = torch.aten.slice.Tensor %15938, %int0_13632, %int0_13633, %16045, %int1_13634 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16046, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_13635 = torch.constant.int 1
    %int0_13636 = torch.constant.int 0
    %int9223372036854775807_13637 = torch.constant.int 9223372036854775807
    %int1_13638 = torch.constant.int 1
    %16047 = torch.aten.slice.Tensor %16046, %int1_13635, %int0_13636, %int9223372036854775807_13637, %int1_13638 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %16047, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_13639 = torch.constant.int 0
    %16048 = torch.aten.unsqueeze %16047, %int0_13639 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %16048, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_13640 = torch.constant.int 2
    %16049 = torch.aten.unsqueeze %16048, %int2_13640 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16049, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_13641 = torch.constant.int 3
    %int0_13642 = torch.constant.int 0
    %int9223372036854775807_13643 = torch.constant.int 9223372036854775807
    %int1_13644 = torch.constant.int 1
    %16050 = torch.aten.slice.Tensor %16049, %int3_13641, %int0_13642, %int9223372036854775807_13643, %int1_13644 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16050, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %16051 = torch_c.to_builtin_tensor %15726 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_13645 = arith.constant 1 : index
    %dim_13646 = tensor.dim %16051, %c1_13645 : tensor<4x?x1x128xf16>
    %16052 = flow.tensor.bitcast %16051 : tensor<4x?x1x128xf16>{%dim_13646} -> tensor<4x?x1x64xcomplex<f16>>{%dim_13646}
    %16053 = torch_c.from_builtin_tensor %16052 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %16053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %16054 = torch.aten.mul.Tensor %16053, %16050 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %16054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %16055 = torch_c.to_builtin_tensor %16054 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_13647 = arith.constant 1 : index
    %dim_13648 = tensor.dim %16055, %c1_13647 : tensor<4x?x1x64xcomplex<f32>>
    %16056 = flow.tensor.bitcast %16055 : tensor<4x?x1x64xcomplex<f32>>{%dim_13648} -> tensor<4x?x1x128xf32>{%dim_13648}
    %16057 = torch_c.from_builtin_tensor %16056 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %16057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_13649 = torch.constant.int 5
    %16058 = torch.prims.convert_element_type %16057, %int5_13649 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %16058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_13650 = torch.constant.int 64
    %16059 = torch.aten.mul.Scalar %2364, %int64_13650 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16059, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13651 = torch.constant.int 64
    %16060 = torch.aten.mul.Scalar %2367, %int64_13651 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16060, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13652 = torch.constant.int 64
    %16061 = torch.aten.mul.Scalar %2370, %int64_13652 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16061, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13653 = torch.constant.int 64
    %16062 = torch.aten.mul.Scalar %2373, %int64_13653 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16062, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13654 = torch.constant.int 64
    %16063 = torch.aten.mul.Scalar %2376, %int64_13654 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16063, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13655 = torch.constant.int 64
    %16064 = torch.aten.mul.Scalar %2379, %int64_13655 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16064, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13656 = torch.constant.int 64
    %16065 = torch.aten.mul.Scalar %2382, %int64_13656 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16065, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_13657 = torch.constant.int 64
    %16066 = torch.aten.mul.Scalar %2385, %int64_13657 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16066, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14 = torch.constant.int 14
    %int1_13658 = torch.constant.int 1
    %16067 = torch.aten.add.Scalar %16059, %int14, %int1_13658 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16067, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13659 = torch.constant.int 14
    %int1_13660 = torch.constant.int 1
    %16068 = torch.aten.add.Scalar %16060, %int14_13659, %int1_13660 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16068, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13661 = torch.constant.int 14
    %int1_13662 = torch.constant.int 1
    %16069 = torch.aten.add.Scalar %16061, %int14_13661, %int1_13662 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16069, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13663 = torch.constant.int 14
    %int1_13664 = torch.constant.int 1
    %16070 = torch.aten.add.Scalar %16062, %int14_13663, %int1_13664 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16070, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13665 = torch.constant.int 14
    %int1_13666 = torch.constant.int 1
    %16071 = torch.aten.add.Scalar %16063, %int14_13665, %int1_13666 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16071, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13667 = torch.constant.int 14
    %int1_13668 = torch.constant.int 1
    %16072 = torch.aten.add.Scalar %16064, %int14_13667, %int1_13668 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16072, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13669 = torch.constant.int 14
    %int1_13670 = torch.constant.int 1
    %16073 = torch.aten.add.Scalar %16065, %int14_13669, %int1_13670 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16073, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int14_13671 = torch.constant.int 14
    %int1_13672 = torch.constant.int 1
    %16074 = torch.aten.add.Scalar %16066, %int14_13671, %int1_13672 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16074, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_13673 = torch.constant.int 4
    %int16_13674 = torch.constant.int 16
    %int1_13675 = torch.constant.int 1
    %int128_13676 = torch.constant.int 128
    %16075 = torch.prim.ListConstruct %int4_13673, %3095, %int16_13674, %int1_13675, %int128_13676 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16076 = torch.aten.view %15953, %16075 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16076, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13677 = torch.constant.int 4
    %int16_13678 = torch.constant.int 16
    %int1_13679 = torch.constant.int 1
    %int128_13680 = torch.constant.int 128
    %16077 = torch.prim.ListConstruct %int4_13677, %3095, %int16_13678, %int1_13679, %int128_13680 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16078 = torch.aten.view %15968, %16077 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16078, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13681 = torch.constant.int 4
    %int16_13682 = torch.constant.int 16
    %int1_13683 = torch.constant.int 1
    %int128_13684 = torch.constant.int 128
    %16079 = torch.prim.ListConstruct %int4_13681, %3095, %int16_13682, %int1_13683, %int128_13684 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16080 = torch.aten.view %15983, %16079 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16080, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13685 = torch.constant.int 4
    %int16_13686 = torch.constant.int 16
    %int1_13687 = torch.constant.int 1
    %int128_13688 = torch.constant.int 128
    %16081 = torch.prim.ListConstruct %int4_13685, %3095, %int16_13686, %int1_13687, %int128_13688 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16082 = torch.aten.view %15998, %16081 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16082, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13689 = torch.constant.int 4
    %int16_13690 = torch.constant.int 16
    %int1_13691 = torch.constant.int 1
    %int128_13692 = torch.constant.int 128
    %16083 = torch.prim.ListConstruct %int4_13689, %3095, %int16_13690, %int1_13691, %int128_13692 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16084 = torch.aten.view %16013, %16083 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16084, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13693 = torch.constant.int 4
    %int16_13694 = torch.constant.int 16
    %int1_13695 = torch.constant.int 1
    %int128_13696 = torch.constant.int 128
    %16085 = torch.prim.ListConstruct %int4_13693, %3095, %int16_13694, %int1_13695, %int128_13696 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16086 = torch.aten.view %16028, %16085 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16086, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13697 = torch.constant.int 4
    %int16_13698 = torch.constant.int 16
    %int1_13699 = torch.constant.int 1
    %int128_13700 = torch.constant.int 128
    %16087 = torch.prim.ListConstruct %int4_13697, %3095, %int16_13698, %int1_13699, %int128_13700 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16088 = torch.aten.view %16043, %16087 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16088, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13701 = torch.constant.int 4
    %int16_13702 = torch.constant.int 16
    %int1_13703 = torch.constant.int 1
    %int128_13704 = torch.constant.int 128
    %16089 = torch.prim.ListConstruct %int4_13701, %3095, %int16_13702, %int1_13703, %int128_13704 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16090 = torch.aten.view %16058, %16089 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16090, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13705 = torch.constant.int 4
    %16091 = torch.aten.mul.int %int4_13705, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13706 = torch.constant.int 16
    %int1_13707 = torch.constant.int 1
    %int128_13708 = torch.constant.int 128
    %16092 = torch.prim.ListConstruct %16091, %int16_13706, %int1_13707, %int128_13708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16093 = torch.aten.view %16076, %16092 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16093, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13709 = torch.constant.int 4
    %16094 = torch.aten.mul.int %int4_13709, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13710 = torch.constant.int 16
    %int1_13711 = torch.constant.int 1
    %int128_13712 = torch.constant.int 128
    %16095 = torch.prim.ListConstruct %16094, %int16_13710, %int1_13711, %int128_13712 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16096 = torch.aten.view %16078, %16095 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16096, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13713 = torch.constant.int 4
    %16097 = torch.aten.mul.int %int4_13713, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13714 = torch.constant.int 16
    %int1_13715 = torch.constant.int 1
    %int128_13716 = torch.constant.int 128
    %16098 = torch.prim.ListConstruct %16097, %int16_13714, %int1_13715, %int128_13716 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16099 = torch.aten.view %16080, %16098 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16099, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13717 = torch.constant.int 4
    %16100 = torch.aten.mul.int %int4_13717, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13718 = torch.constant.int 16
    %int1_13719 = torch.constant.int 1
    %int128_13720 = torch.constant.int 128
    %16101 = torch.prim.ListConstruct %16100, %int16_13718, %int1_13719, %int128_13720 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16102 = torch.aten.view %16082, %16101 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16102, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13721 = torch.constant.int 4
    %16103 = torch.aten.mul.int %int4_13721, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13722 = torch.constant.int 16
    %int1_13723 = torch.constant.int 1
    %int128_13724 = torch.constant.int 128
    %16104 = torch.prim.ListConstruct %16103, %int16_13722, %int1_13723, %int128_13724 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16105 = torch.aten.view %16084, %16104 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16105, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13725 = torch.constant.int 4
    %16106 = torch.aten.mul.int %int4_13725, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13726 = torch.constant.int 16
    %int1_13727 = torch.constant.int 1
    %int128_13728 = torch.constant.int 128
    %16107 = torch.prim.ListConstruct %16106, %int16_13726, %int1_13727, %int128_13728 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16108 = torch.aten.view %16086, %16107 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16108, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13729 = torch.constant.int 4
    %16109 = torch.aten.mul.int %int4_13729, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13730 = torch.constant.int 16
    %int1_13731 = torch.constant.int 1
    %int128_13732 = torch.constant.int 128
    %16110 = torch.prim.ListConstruct %16109, %int16_13730, %int1_13731, %int128_13732 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16111 = torch.aten.view %16088, %16110 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16111, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13733 = torch.constant.int 4
    %16112 = torch.aten.mul.int %int4_13733, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13734 = torch.constant.int 16
    %int1_13735 = torch.constant.int 1
    %int128_13736 = torch.constant.int 128
    %16113 = torch.prim.ListConstruct %16112, %int16_13734, %int1_13735, %int128_13736 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16114 = torch.aten.view %16090, %16113 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16114, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13737 = torch.constant.int 4
    %16115 = torch.aten.mul.int %int4_13737, %3095 : !torch.int, !torch.int -> !torch.int
    %16116 = torch.prim.ListConstruct %16115 : (!torch.int) -> !torch.list<int>
    %16117 = torch.aten.view %16067, %16116 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16117, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13738 = torch.constant.int 4
    %16118 = torch.aten.mul.int %int4_13738, %3095 : !torch.int, !torch.int -> !torch.int
    %16119 = torch.prim.ListConstruct %16118 : (!torch.int) -> !torch.list<int>
    %16120 = torch.aten.view %16068, %16119 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16120, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13739 = torch.constant.int 4
    %16121 = torch.aten.mul.int %int4_13739, %3095 : !torch.int, !torch.int -> !torch.int
    %16122 = torch.prim.ListConstruct %16121 : (!torch.int) -> !torch.list<int>
    %16123 = torch.aten.view %16069, %16122 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16123, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13740 = torch.constant.int 4
    %16124 = torch.aten.mul.int %int4_13740, %3095 : !torch.int, !torch.int -> !torch.int
    %16125 = torch.prim.ListConstruct %16124 : (!torch.int) -> !torch.list<int>
    %16126 = torch.aten.view %16070, %16125 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16126, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13741 = torch.constant.int 4
    %16127 = torch.aten.mul.int %int4_13741, %3095 : !torch.int, !torch.int -> !torch.int
    %16128 = torch.prim.ListConstruct %16127 : (!torch.int) -> !torch.list<int>
    %16129 = torch.aten.view %16071, %16128 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16129, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13742 = torch.constant.int 4
    %16130 = torch.aten.mul.int %int4_13742, %3095 : !torch.int, !torch.int -> !torch.int
    %16131 = torch.prim.ListConstruct %16130 : (!torch.int) -> !torch.list<int>
    %16132 = torch.aten.view %16072, %16131 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16132, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13743 = torch.constant.int 4
    %16133 = torch.aten.mul.int %int4_13743, %3095 : !torch.int, !torch.int -> !torch.int
    %16134 = torch.prim.ListConstruct %16133 : (!torch.int) -> !torch.list<int>
    %16135 = torch.aten.view %16073, %16134 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16135, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13744 = torch.constant.int 4
    %16136 = torch.aten.mul.int %int4_13744, %3095 : !torch.int, !torch.int -> !torch.int
    %16137 = torch.prim.ListConstruct %16136 : (!torch.int) -> !torch.list<int>
    %16138 = torch.aten.view %16074, %16137 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16138, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13745 = torch.constant.int 4
    %int16_13746 = torch.constant.int 16
    %int1_13747 = torch.constant.int 1
    %int128_13748 = torch.constant.int 128
    %16139 = torch.prim.ListConstruct %int4_13745, %3095, %int16_13746, %int1_13747, %int128_13748 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16140 = torch.aten.view %15728, %16139 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16140, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13749 = torch.constant.int 4
    %int16_13750 = torch.constant.int 16
    %int1_13751 = torch.constant.int 1
    %int128_13752 = torch.constant.int 128
    %16141 = torch.prim.ListConstruct %int4_13749, %3095, %int16_13750, %int1_13751, %int128_13752 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16142 = torch.aten.view %15730, %16141 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16142, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13753 = torch.constant.int 4
    %int16_13754 = torch.constant.int 16
    %int1_13755 = torch.constant.int 1
    %int128_13756 = torch.constant.int 128
    %16143 = torch.prim.ListConstruct %int4_13753, %3095, %int16_13754, %int1_13755, %int128_13756 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16144 = torch.aten.view %15732, %16143 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16144, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13757 = torch.constant.int 4
    %int16_13758 = torch.constant.int 16
    %int1_13759 = torch.constant.int 1
    %int128_13760 = torch.constant.int 128
    %16145 = torch.prim.ListConstruct %int4_13757, %3095, %int16_13758, %int1_13759, %int128_13760 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16146 = torch.aten.view %15734, %16145 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16146, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13761 = torch.constant.int 4
    %int16_13762 = torch.constant.int 16
    %int1_13763 = torch.constant.int 1
    %int128_13764 = torch.constant.int 128
    %16147 = torch.prim.ListConstruct %int4_13761, %3095, %int16_13762, %int1_13763, %int128_13764 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16148 = torch.aten.view %15736, %16147 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16148, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13765 = torch.constant.int 4
    %int16_13766 = torch.constant.int 16
    %int1_13767 = torch.constant.int 1
    %int128_13768 = torch.constant.int 128
    %16149 = torch.prim.ListConstruct %int4_13765, %3095, %int16_13766, %int1_13767, %int128_13768 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16150 = torch.aten.view %15738, %16149 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16150, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13769 = torch.constant.int 4
    %int16_13770 = torch.constant.int 16
    %int1_13771 = torch.constant.int 1
    %int128_13772 = torch.constant.int 128
    %16151 = torch.prim.ListConstruct %int4_13769, %3095, %int16_13770, %int1_13771, %int128_13772 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16152 = torch.aten.view %15740, %16151 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16152, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13773 = torch.constant.int 4
    %int16_13774 = torch.constant.int 16
    %int1_13775 = torch.constant.int 1
    %int128_13776 = torch.constant.int 128
    %16153 = torch.prim.ListConstruct %int4_13773, %3095, %int16_13774, %int1_13775, %int128_13776 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16154 = torch.aten.view %15742, %16153 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %16154, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_13777 = torch.constant.int 4
    %16155 = torch.aten.mul.int %int4_13777, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13778 = torch.constant.int 16
    %int1_13779 = torch.constant.int 1
    %int128_13780 = torch.constant.int 128
    %16156 = torch.prim.ListConstruct %16155, %int16_13778, %int1_13779, %int128_13780 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16157 = torch.aten.view %16140, %16156 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16157, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13781 = torch.constant.int 4
    %16158 = torch.aten.mul.int %int4_13781, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13782 = torch.constant.int 16
    %int1_13783 = torch.constant.int 1
    %int128_13784 = torch.constant.int 128
    %16159 = torch.prim.ListConstruct %16158, %int16_13782, %int1_13783, %int128_13784 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16160 = torch.aten.view %16142, %16159 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16160, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13785 = torch.constant.int 4
    %16161 = torch.aten.mul.int %int4_13785, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13786 = torch.constant.int 16
    %int1_13787 = torch.constant.int 1
    %int128_13788 = torch.constant.int 128
    %16162 = torch.prim.ListConstruct %16161, %int16_13786, %int1_13787, %int128_13788 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16163 = torch.aten.view %16144, %16162 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16163, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13789 = torch.constant.int 4
    %16164 = torch.aten.mul.int %int4_13789, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13790 = torch.constant.int 16
    %int1_13791 = torch.constant.int 1
    %int128_13792 = torch.constant.int 128
    %16165 = torch.prim.ListConstruct %16164, %int16_13790, %int1_13791, %int128_13792 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16166 = torch.aten.view %16146, %16165 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16166, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13793 = torch.constant.int 4
    %16167 = torch.aten.mul.int %int4_13793, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13794 = torch.constant.int 16
    %int1_13795 = torch.constant.int 1
    %int128_13796 = torch.constant.int 128
    %16168 = torch.prim.ListConstruct %16167, %int16_13794, %int1_13795, %int128_13796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16169 = torch.aten.view %16148, %16168 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16169, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13797 = torch.constant.int 4
    %16170 = torch.aten.mul.int %int4_13797, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13798 = torch.constant.int 16
    %int1_13799 = torch.constant.int 1
    %int128_13800 = torch.constant.int 128
    %16171 = torch.prim.ListConstruct %16170, %int16_13798, %int1_13799, %int128_13800 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16172 = torch.aten.view %16150, %16171 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16172, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13801 = torch.constant.int 4
    %16173 = torch.aten.mul.int %int4_13801, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13802 = torch.constant.int 16
    %int1_13803 = torch.constant.int 1
    %int128_13804 = torch.constant.int 128
    %16174 = torch.prim.ListConstruct %16173, %int16_13802, %int1_13803, %int128_13804 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16175 = torch.aten.view %16152, %16174 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16175, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_13805 = torch.constant.int 4
    %16176 = torch.aten.mul.int %int4_13805, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_13806 = torch.constant.int 16
    %int1_13807 = torch.constant.int 1
    %int128_13808 = torch.constant.int 128
    %16177 = torch.prim.ListConstruct %16176, %int16_13806, %int1_13807, %int128_13808 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16178 = torch.aten.view %16154, %16177 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16178, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_13809 = torch.constant.int 1
    %int1_13810 = torch.constant.int 1
    %16179 = torch.aten.add.Scalar %16067, %int1_13809, %int1_13810 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16179, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13811 = torch.constant.int 1
    %int1_13812 = torch.constant.int 1
    %16180 = torch.aten.add.Scalar %16068, %int1_13811, %int1_13812 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16180, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13813 = torch.constant.int 1
    %int1_13814 = torch.constant.int 1
    %16181 = torch.aten.add.Scalar %16069, %int1_13813, %int1_13814 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16181, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13815 = torch.constant.int 1
    %int1_13816 = torch.constant.int 1
    %16182 = torch.aten.add.Scalar %16070, %int1_13815, %int1_13816 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16182, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13817 = torch.constant.int 1
    %int1_13818 = torch.constant.int 1
    %16183 = torch.aten.add.Scalar %16071, %int1_13817, %int1_13818 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16183, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13819 = torch.constant.int 1
    %int1_13820 = torch.constant.int 1
    %16184 = torch.aten.add.Scalar %16072, %int1_13819, %int1_13820 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16184, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13821 = torch.constant.int 1
    %int1_13822 = torch.constant.int 1
    %16185 = torch.aten.add.Scalar %16073, %int1_13821, %int1_13822 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16185, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_13823 = torch.constant.int 1
    %int1_13824 = torch.constant.int 1
    %16186 = torch.aten.add.Scalar %16074, %int1_13823, %int1_13824 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %16186, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_13825 = torch.constant.int 4
    %16187 = torch.aten.mul.int %int4_13825, %3095 : !torch.int, !torch.int -> !torch.int
    %16188 = torch.prim.ListConstruct %16187 : (!torch.int) -> !torch.list<int>
    %16189 = torch.aten.view %16179, %16188 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16189, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13826 = torch.constant.int 4
    %16190 = torch.aten.mul.int %int4_13826, %3095 : !torch.int, !torch.int -> !torch.int
    %16191 = torch.prim.ListConstruct %16190 : (!torch.int) -> !torch.list<int>
    %16192 = torch.aten.view %16180, %16191 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16192, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13827 = torch.constant.int 4
    %16193 = torch.aten.mul.int %int4_13827, %3095 : !torch.int, !torch.int -> !torch.int
    %16194 = torch.prim.ListConstruct %16193 : (!torch.int) -> !torch.list<int>
    %16195 = torch.aten.view %16181, %16194 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16195, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13828 = torch.constant.int 4
    %16196 = torch.aten.mul.int %int4_13828, %3095 : !torch.int, !torch.int -> !torch.int
    %16197 = torch.prim.ListConstruct %16196 : (!torch.int) -> !torch.list<int>
    %16198 = torch.aten.view %16182, %16197 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16198, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13829 = torch.constant.int 4
    %16199 = torch.aten.mul.int %int4_13829, %3095 : !torch.int, !torch.int -> !torch.int
    %16200 = torch.prim.ListConstruct %16199 : (!torch.int) -> !torch.list<int>
    %16201 = torch.aten.view %16183, %16200 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16201, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13830 = torch.constant.int 4
    %16202 = torch.aten.mul.int %int4_13830, %3095 : !torch.int, !torch.int -> !torch.int
    %16203 = torch.prim.ListConstruct %16202 : (!torch.int) -> !torch.list<int>
    %16204 = torch.aten.view %16184, %16203 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16204, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13831 = torch.constant.int 4
    %16205 = torch.aten.mul.int %int4_13831, %3095 : !torch.int, !torch.int -> !torch.int
    %16206 = torch.prim.ListConstruct %16205 : (!torch.int) -> !torch.list<int>
    %16207 = torch.aten.view %16185, %16206 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16207, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_13832 = torch.constant.int 4
    %16208 = torch.aten.mul.int %int4_13832, %3095 : !torch.int, !torch.int -> !torch.int
    %16209 = torch.prim.ListConstruct %16208 : (!torch.int) -> !torch.list<int>
    %16210 = torch.aten.view %16186, %16209 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16210, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %16211 = torch.prim.ListConstruct %16117, %16189 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13833 = torch.constant.int 0
    %16212 = torch.aten.cat %16211, %int0_13833 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16212, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16213 = torch.prim.ListConstruct %16120, %16192 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13834 = torch.constant.int 0
    %16214 = torch.aten.cat %16213, %int0_13834 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16214, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16215 = torch.prim.ListConstruct %16123, %16195 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13835 = torch.constant.int 0
    %16216 = torch.aten.cat %16215, %int0_13835 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16216, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16217 = torch.prim.ListConstruct %16126, %16198 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13836 = torch.constant.int 0
    %16218 = torch.aten.cat %16217, %int0_13836 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16218, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16219 = torch.prim.ListConstruct %16129, %16201 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13837 = torch.constant.int 0
    %16220 = torch.aten.cat %16219, %int0_13837 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16220, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16221 = torch.prim.ListConstruct %16132, %16204 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13838 = torch.constant.int 0
    %16222 = torch.aten.cat %16221, %int0_13838 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16222, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16223 = torch.prim.ListConstruct %16135, %16207 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13839 = torch.constant.int 0
    %16224 = torch.aten.cat %16223, %int0_13839 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16224, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16225 = torch.prim.ListConstruct %16138, %16210 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_13840 = torch.constant.int 0
    %16226 = torch.aten.cat %16225, %int0_13840 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %16226, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %16227 = torch.prim.ListConstruct %16093, %16157 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13841 = torch.constant.int 0
    %16228 = torch.aten.cat %16227, %int0_13841 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16228, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16229 = torch.prim.ListConstruct %16096, %16160 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13842 = torch.constant.int 0
    %16230 = torch.aten.cat %16229, %int0_13842 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16230, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16231 = torch.prim.ListConstruct %16099, %16163 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13843 = torch.constant.int 0
    %16232 = torch.aten.cat %16231, %int0_13843 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16232, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16233 = torch.prim.ListConstruct %16102, %16166 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13844 = torch.constant.int 0
    %16234 = torch.aten.cat %16233, %int0_13844 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16234, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16235 = torch.prim.ListConstruct %16105, %16169 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13845 = torch.constant.int 0
    %16236 = torch.aten.cat %16235, %int0_13845 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16236, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16237 = torch.prim.ListConstruct %16108, %16172 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13846 = torch.constant.int 0
    %16238 = torch.aten.cat %16237, %int0_13846 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16238, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16239 = torch.prim.ListConstruct %16111, %16175 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13847 = torch.constant.int 0
    %16240 = torch.aten.cat %16239, %int0_13847 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16240, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16241 = torch.prim.ListConstruct %16114, %16178 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_13848 = torch.constant.int 0
    %16242 = torch.aten.cat %16241, %int0_13848 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16242, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13849 = torch.constant.int 32
    %int2_13850 = torch.constant.int 2
    %int16_13851 = torch.constant.int 16
    %int1_13852 = torch.constant.int 1
    %int128_13853 = torch.constant.int 128
    %16243 = torch.prim.ListConstruct %3023, %int32_13849, %int2_13850, %int16_13851, %int1_13852, %int128_13853 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16244 = torch.aten.view %14393, %16243 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16244, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13854 = torch.constant.int 32
    %16245 = torch.aten.mul.int %3023, %int32_13854 : !torch.int, !torch.int -> !torch.int
    %int2_13855 = torch.constant.int 2
    %16246 = torch.aten.mul.int %16245, %int2_13855 : !torch.int, !torch.int -> !torch.int
    %int16_13856 = torch.constant.int 16
    %int1_13857 = torch.constant.int 1
    %int128_13858 = torch.constant.int 128
    %16247 = torch.prim.ListConstruct %16246, %int16_13856, %int1_13857, %int128_13858 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16248 = torch.aten.view %16244, %16247 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16248, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16249 = torch.prim.ListConstruct %16212 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13859 = torch.constant.bool false
    %16250 = torch.aten.index_put %16248, %16249, %16228, %false_13859 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16250, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13860 = torch.constant.int 32
    %int2_13861 = torch.constant.int 2
    %int16_13862 = torch.constant.int 16
    %int1_13863 = torch.constant.int 1
    %int128_13864 = torch.constant.int 128
    %16251 = torch.prim.ListConstruct %3023, %int32_13860, %int2_13861, %int16_13862, %int1_13863, %int128_13864 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16252 = torch.aten.view %16250, %16251 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16252, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13865 = torch.constant.int 131072
    %16253 = torch.prim.ListConstruct %3023, %int131072_13865 : (!torch.int, !torch.int) -> !torch.list<int>
    %16254 = torch.aten.view %16252, %16253 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16254, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13866 = torch.constant.int 32
    %int2_13867 = torch.constant.int 2
    %int16_13868 = torch.constant.int 16
    %int1_13869 = torch.constant.int 1
    %int128_13870 = torch.constant.int 128
    %16255 = torch.prim.ListConstruct %3026, %int32_13866, %int2_13867, %int16_13868, %int1_13869, %int128_13870 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16256 = torch.aten.view %14405, %16255 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16256, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13871 = torch.constant.int 32
    %16257 = torch.aten.mul.int %3026, %int32_13871 : !torch.int, !torch.int -> !torch.int
    %int2_13872 = torch.constant.int 2
    %16258 = torch.aten.mul.int %16257, %int2_13872 : !torch.int, !torch.int -> !torch.int
    %int16_13873 = torch.constant.int 16
    %int1_13874 = torch.constant.int 1
    %int128_13875 = torch.constant.int 128
    %16259 = torch.prim.ListConstruct %16258, %int16_13873, %int1_13874, %int128_13875 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16260 = torch.aten.view %16256, %16259 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16260, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16261 = torch.prim.ListConstruct %16214 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13876 = torch.constant.bool false
    %16262 = torch.aten.index_put %16260, %16261, %16230, %false_13876 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16262, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13877 = torch.constant.int 32
    %int2_13878 = torch.constant.int 2
    %int16_13879 = torch.constant.int 16
    %int1_13880 = torch.constant.int 1
    %int128_13881 = torch.constant.int 128
    %16263 = torch.prim.ListConstruct %3026, %int32_13877, %int2_13878, %int16_13879, %int1_13880, %int128_13881 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16264 = torch.aten.view %16262, %16263 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16264, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13882 = torch.constant.int 131072
    %16265 = torch.prim.ListConstruct %3026, %int131072_13882 : (!torch.int, !torch.int) -> !torch.list<int>
    %16266 = torch.aten.view %16264, %16265 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16266, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13883 = torch.constant.int 32
    %int2_13884 = torch.constant.int 2
    %int16_13885 = torch.constant.int 16
    %int1_13886 = torch.constant.int 1
    %int128_13887 = torch.constant.int 128
    %16267 = torch.prim.ListConstruct %3029, %int32_13883, %int2_13884, %int16_13885, %int1_13886, %int128_13887 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16268 = torch.aten.view %14417, %16267 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16268, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13888 = torch.constant.int 32
    %16269 = torch.aten.mul.int %3029, %int32_13888 : !torch.int, !torch.int -> !torch.int
    %int2_13889 = torch.constant.int 2
    %16270 = torch.aten.mul.int %16269, %int2_13889 : !torch.int, !torch.int -> !torch.int
    %int16_13890 = torch.constant.int 16
    %int1_13891 = torch.constant.int 1
    %int128_13892 = torch.constant.int 128
    %16271 = torch.prim.ListConstruct %16270, %int16_13890, %int1_13891, %int128_13892 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16272 = torch.aten.view %16268, %16271 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16272, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16273 = torch.prim.ListConstruct %16216 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13893 = torch.constant.bool false
    %16274 = torch.aten.index_put %16272, %16273, %16232, %false_13893 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16274, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13894 = torch.constant.int 32
    %int2_13895 = torch.constant.int 2
    %int16_13896 = torch.constant.int 16
    %int1_13897 = torch.constant.int 1
    %int128_13898 = torch.constant.int 128
    %16275 = torch.prim.ListConstruct %3029, %int32_13894, %int2_13895, %int16_13896, %int1_13897, %int128_13898 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16276 = torch.aten.view %16274, %16275 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16276, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13899 = torch.constant.int 131072
    %16277 = torch.prim.ListConstruct %3029, %int131072_13899 : (!torch.int, !torch.int) -> !torch.list<int>
    %16278 = torch.aten.view %16276, %16277 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16278, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13900 = torch.constant.int 32
    %int2_13901 = torch.constant.int 2
    %int16_13902 = torch.constant.int 16
    %int1_13903 = torch.constant.int 1
    %int128_13904 = torch.constant.int 128
    %16279 = torch.prim.ListConstruct %3032, %int32_13900, %int2_13901, %int16_13902, %int1_13903, %int128_13904 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16280 = torch.aten.view %14429, %16279 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16280, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13905 = torch.constant.int 32
    %16281 = torch.aten.mul.int %3032, %int32_13905 : !torch.int, !torch.int -> !torch.int
    %int2_13906 = torch.constant.int 2
    %16282 = torch.aten.mul.int %16281, %int2_13906 : !torch.int, !torch.int -> !torch.int
    %int16_13907 = torch.constant.int 16
    %int1_13908 = torch.constant.int 1
    %int128_13909 = torch.constant.int 128
    %16283 = torch.prim.ListConstruct %16282, %int16_13907, %int1_13908, %int128_13909 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16284 = torch.aten.view %16280, %16283 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16284, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16285 = torch.prim.ListConstruct %16218 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13910 = torch.constant.bool false
    %16286 = torch.aten.index_put %16284, %16285, %16234, %false_13910 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16286, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13911 = torch.constant.int 32
    %int2_13912 = torch.constant.int 2
    %int16_13913 = torch.constant.int 16
    %int1_13914 = torch.constant.int 1
    %int128_13915 = torch.constant.int 128
    %16287 = torch.prim.ListConstruct %3032, %int32_13911, %int2_13912, %int16_13913, %int1_13914, %int128_13915 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16288 = torch.aten.view %16286, %16287 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16288, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13916 = torch.constant.int 131072
    %16289 = torch.prim.ListConstruct %3032, %int131072_13916 : (!torch.int, !torch.int) -> !torch.list<int>
    %16290 = torch.aten.view %16288, %16289 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16290, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13917 = torch.constant.int 32
    %int2_13918 = torch.constant.int 2
    %int16_13919 = torch.constant.int 16
    %int1_13920 = torch.constant.int 1
    %int128_13921 = torch.constant.int 128
    %16291 = torch.prim.ListConstruct %3035, %int32_13917, %int2_13918, %int16_13919, %int1_13920, %int128_13921 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16292 = torch.aten.view %14441, %16291 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16292, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13922 = torch.constant.int 32
    %16293 = torch.aten.mul.int %3035, %int32_13922 : !torch.int, !torch.int -> !torch.int
    %int2_13923 = torch.constant.int 2
    %16294 = torch.aten.mul.int %16293, %int2_13923 : !torch.int, !torch.int -> !torch.int
    %int16_13924 = torch.constant.int 16
    %int1_13925 = torch.constant.int 1
    %int128_13926 = torch.constant.int 128
    %16295 = torch.prim.ListConstruct %16294, %int16_13924, %int1_13925, %int128_13926 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16296 = torch.aten.view %16292, %16295 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16296, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16297 = torch.prim.ListConstruct %16220 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13927 = torch.constant.bool false
    %16298 = torch.aten.index_put %16296, %16297, %16236, %false_13927 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16298, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13928 = torch.constant.int 32
    %int2_13929 = torch.constant.int 2
    %int16_13930 = torch.constant.int 16
    %int1_13931 = torch.constant.int 1
    %int128_13932 = torch.constant.int 128
    %16299 = torch.prim.ListConstruct %3035, %int32_13928, %int2_13929, %int16_13930, %int1_13931, %int128_13932 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16300 = torch.aten.view %16298, %16299 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16300, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13933 = torch.constant.int 131072
    %16301 = torch.prim.ListConstruct %3035, %int131072_13933 : (!torch.int, !torch.int) -> !torch.list<int>
    %16302 = torch.aten.view %16300, %16301 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16302, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13934 = torch.constant.int 32
    %int2_13935 = torch.constant.int 2
    %int16_13936 = torch.constant.int 16
    %int1_13937 = torch.constant.int 1
    %int128_13938 = torch.constant.int 128
    %16303 = torch.prim.ListConstruct %3038, %int32_13934, %int2_13935, %int16_13936, %int1_13937, %int128_13938 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16304 = torch.aten.view %14453, %16303 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16304, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13939 = torch.constant.int 32
    %16305 = torch.aten.mul.int %3038, %int32_13939 : !torch.int, !torch.int -> !torch.int
    %int2_13940 = torch.constant.int 2
    %16306 = torch.aten.mul.int %16305, %int2_13940 : !torch.int, !torch.int -> !torch.int
    %int16_13941 = torch.constant.int 16
    %int1_13942 = torch.constant.int 1
    %int128_13943 = torch.constant.int 128
    %16307 = torch.prim.ListConstruct %16306, %int16_13941, %int1_13942, %int128_13943 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16308 = torch.aten.view %16304, %16307 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16308, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16309 = torch.prim.ListConstruct %16222 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13944 = torch.constant.bool false
    %16310 = torch.aten.index_put %16308, %16309, %16238, %false_13944 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16310, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13945 = torch.constant.int 32
    %int2_13946 = torch.constant.int 2
    %int16_13947 = torch.constant.int 16
    %int1_13948 = torch.constant.int 1
    %int128_13949 = torch.constant.int 128
    %16311 = torch.prim.ListConstruct %3038, %int32_13945, %int2_13946, %int16_13947, %int1_13948, %int128_13949 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16312 = torch.aten.view %16310, %16311 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16312, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13950 = torch.constant.int 131072
    %16313 = torch.prim.ListConstruct %3038, %int131072_13950 : (!torch.int, !torch.int) -> !torch.list<int>
    %16314 = torch.aten.view %16312, %16313 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16314, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13951 = torch.constant.int 32
    %int2_13952 = torch.constant.int 2
    %int16_13953 = torch.constant.int 16
    %int1_13954 = torch.constant.int 1
    %int128_13955 = torch.constant.int 128
    %16315 = torch.prim.ListConstruct %3041, %int32_13951, %int2_13952, %int16_13953, %int1_13954, %int128_13955 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16316 = torch.aten.view %14465, %16315 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16316, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13956 = torch.constant.int 32
    %16317 = torch.aten.mul.int %3041, %int32_13956 : !torch.int, !torch.int -> !torch.int
    %int2_13957 = torch.constant.int 2
    %16318 = torch.aten.mul.int %16317, %int2_13957 : !torch.int, !torch.int -> !torch.int
    %int16_13958 = torch.constant.int 16
    %int1_13959 = torch.constant.int 1
    %int128_13960 = torch.constant.int 128
    %16319 = torch.prim.ListConstruct %16318, %int16_13958, %int1_13959, %int128_13960 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16320 = torch.aten.view %16316, %16319 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16320, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16321 = torch.prim.ListConstruct %16224 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13961 = torch.constant.bool false
    %16322 = torch.aten.index_put %16320, %16321, %16240, %false_13961 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16322, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13962 = torch.constant.int 32
    %int2_13963 = torch.constant.int 2
    %int16_13964 = torch.constant.int 16
    %int1_13965 = torch.constant.int 1
    %int128_13966 = torch.constant.int 128
    %16323 = torch.prim.ListConstruct %3041, %int32_13962, %int2_13963, %int16_13964, %int1_13965, %int128_13966 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16324 = torch.aten.view %16322, %16323 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16324, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13967 = torch.constant.int 131072
    %16325 = torch.prim.ListConstruct %3041, %int131072_13967 : (!torch.int, !torch.int) -> !torch.list<int>
    %16326 = torch.aten.view %16324, %16325 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16326, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_13968 = torch.constant.int 32
    %int2_13969 = torch.constant.int 2
    %int16_13970 = torch.constant.int 16
    %int1_13971 = torch.constant.int 1
    %int128_13972 = torch.constant.int 128
    %16327 = torch.prim.ListConstruct %3044, %int32_13968, %int2_13969, %int16_13970, %int1_13971, %int128_13972 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16328 = torch.aten.view %14477, %16327 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16328, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_13973 = torch.constant.int 32
    %16329 = torch.aten.mul.int %3044, %int32_13973 : !torch.int, !torch.int -> !torch.int
    %int2_13974 = torch.constant.int 2
    %16330 = torch.aten.mul.int %16329, %int2_13974 : !torch.int, !torch.int -> !torch.int
    %int16_13975 = torch.constant.int 16
    %int1_13976 = torch.constant.int 1
    %int128_13977 = torch.constant.int 128
    %16331 = torch.prim.ListConstruct %16330, %int16_13975, %int1_13976, %int128_13977 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16332 = torch.aten.view %16328, %16331 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16332, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %16333 = torch.prim.ListConstruct %16226 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_13978 = torch.constant.bool false
    %16334 = torch.aten.index_put %16332, %16333, %16242, %false_13978 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %16334, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_13979 = torch.constant.int 32
    %int2_13980 = torch.constant.int 2
    %int16_13981 = torch.constant.int 16
    %int1_13982 = torch.constant.int 1
    %int128_13983 = torch.constant.int 128
    %16335 = torch.prim.ListConstruct %3044, %int32_13979, %int2_13980, %int16_13981, %int1_13982, %int128_13983 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16336 = torch.aten.view %16334, %16335 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %16336, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_13984 = torch.constant.int 131072
    %16337 = torch.prim.ListConstruct %3044, %int131072_13984 : (!torch.int, !torch.int) -> !torch.list<int>
    %16338 = torch.aten.view %16336, %16337 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %16338, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_13985 = torch.constant.int -2
    %16339 = torch.aten.unsqueeze %15953, %int-2_13985 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13986 = torch.constant.int -2
    %16340 = torch.aten.unsqueeze %15968, %int-2_13986 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13987 = torch.constant.int -2
    %16341 = torch.aten.unsqueeze %15983, %int-2_13987 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13988 = torch.constant.int -2
    %16342 = torch.aten.unsqueeze %15998, %int-2_13988 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13989 = torch.constant.int -2
    %16343 = torch.aten.unsqueeze %16013, %int-2_13989 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13990 = torch.constant.int -2
    %16344 = torch.aten.unsqueeze %16028, %int-2_13990 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13991 = torch.constant.int -2
    %16345 = torch.aten.unsqueeze %16043, %int-2_13991 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_13992 = torch.constant.int -2
    %16346 = torch.aten.unsqueeze %16058, %int-2_13992 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_13993 = torch.constant.int 4
    %int1_13994 = torch.constant.int 1
    %int4_13995 = torch.constant.int 4
    %int128_13996 = torch.constant.int 128
    %16347 = torch.prim.ListConstruct %int4_13993, %15939, %int1_13994, %int4_13995, %int128_13996 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_13997 = torch.constant.bool false
    %16348 = torch.aten.expand %16339, %16347, %false_13997 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_13998 = torch.constant.int 4
    %int1_13999 = torch.constant.int 1
    %int4_14000 = torch.constant.int 4
    %int128_14001 = torch.constant.int 128
    %16349 = torch.prim.ListConstruct %int4_13998, %15939, %int1_13999, %int4_14000, %int128_14001 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14002 = torch.constant.bool false
    %16350 = torch.aten.expand %16340, %16349, %false_14002 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14003 = torch.constant.int 4
    %int1_14004 = torch.constant.int 1
    %int4_14005 = torch.constant.int 4
    %int128_14006 = torch.constant.int 128
    %16351 = torch.prim.ListConstruct %int4_14003, %15939, %int1_14004, %int4_14005, %int128_14006 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14007 = torch.constant.bool false
    %16352 = torch.aten.expand %16341, %16351, %false_14007 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14008 = torch.constant.int 4
    %int1_14009 = torch.constant.int 1
    %int4_14010 = torch.constant.int 4
    %int128_14011 = torch.constant.int 128
    %16353 = torch.prim.ListConstruct %int4_14008, %15939, %int1_14009, %int4_14010, %int128_14011 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14012 = torch.constant.bool false
    %16354 = torch.aten.expand %16342, %16353, %false_14012 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14013 = torch.constant.int 4
    %int1_14014 = torch.constant.int 1
    %int4_14015 = torch.constant.int 4
    %int128_14016 = torch.constant.int 128
    %16355 = torch.prim.ListConstruct %int4_14013, %15939, %int1_14014, %int4_14015, %int128_14016 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14017 = torch.constant.bool false
    %16356 = torch.aten.expand %16343, %16355, %false_14017 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14018 = torch.constant.int 4
    %int1_14019 = torch.constant.int 1
    %int4_14020 = torch.constant.int 4
    %int128_14021 = torch.constant.int 128
    %16357 = torch.prim.ListConstruct %int4_14018, %15939, %int1_14019, %int4_14020, %int128_14021 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14022 = torch.constant.bool false
    %16358 = torch.aten.expand %16344, %16357, %false_14022 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14023 = torch.constant.int 4
    %int1_14024 = torch.constant.int 1
    %int4_14025 = torch.constant.int 4
    %int128_14026 = torch.constant.int 128
    %16359 = torch.prim.ListConstruct %int4_14023, %15939, %int1_14024, %int4_14025, %int128_14026 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14027 = torch.constant.bool false
    %16360 = torch.aten.expand %16345, %16359, %false_14027 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14028 = torch.constant.int 4
    %int1_14029 = torch.constant.int 1
    %int4_14030 = torch.constant.int 4
    %int128_14031 = torch.constant.int 128
    %16361 = torch.prim.ListConstruct %int4_14028, %15939, %int1_14029, %int4_14030, %int128_14031 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14032 = torch.constant.bool false
    %16362 = torch.aten.expand %16346, %16361, %false_14032 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14033 = torch.constant.int 4
    %int4_14034 = torch.constant.int 4
    %int128_14035 = torch.constant.int 128
    %16363 = torch.prim.ListConstruct %int4_14033, %15939, %int4_14034, %int128_14035 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16364 = torch.aten.view %16348, %16363 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14036 = torch.constant.int 4
    %int4_14037 = torch.constant.int 4
    %int128_14038 = torch.constant.int 128
    %16365 = torch.prim.ListConstruct %int4_14036, %15939, %int4_14037, %int128_14038 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16366 = torch.aten.view %16350, %16365 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14039 = torch.constant.int 4
    %int4_14040 = torch.constant.int 4
    %int128_14041 = torch.constant.int 128
    %16367 = torch.prim.ListConstruct %int4_14039, %15939, %int4_14040, %int128_14041 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16368 = torch.aten.view %16352, %16367 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14042 = torch.constant.int 4
    %int4_14043 = torch.constant.int 4
    %int128_14044 = torch.constant.int 128
    %16369 = torch.prim.ListConstruct %int4_14042, %15939, %int4_14043, %int128_14044 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16370 = torch.aten.view %16354, %16369 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14045 = torch.constant.int 4
    %int4_14046 = torch.constant.int 4
    %int128_14047 = torch.constant.int 128
    %16371 = torch.prim.ListConstruct %int4_14045, %15939, %int4_14046, %int128_14047 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16372 = torch.aten.view %16356, %16371 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14048 = torch.constant.int 4
    %int4_14049 = torch.constant.int 4
    %int128_14050 = torch.constant.int 128
    %16373 = torch.prim.ListConstruct %int4_14048, %15939, %int4_14049, %int128_14050 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16374 = torch.aten.view %16358, %16373 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14051 = torch.constant.int 4
    %int4_14052 = torch.constant.int 4
    %int128_14053 = torch.constant.int 128
    %16375 = torch.prim.ListConstruct %int4_14051, %15939, %int4_14052, %int128_14053 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16376 = torch.aten.view %16360, %16375 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14054 = torch.constant.int 4
    %int4_14055 = torch.constant.int 4
    %int128_14056 = torch.constant.int 128
    %16377 = torch.prim.ListConstruct %int4_14054, %15939, %int4_14055, %int128_14056 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16378 = torch.aten.view %16362, %16377 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_14057 = torch.constant.int -2
    %16379 = torch.aten.unsqueeze %15728, %int-2_14057 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14058 = torch.constant.int -2
    %16380 = torch.aten.unsqueeze %15730, %int-2_14058 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14059 = torch.constant.int -2
    %16381 = torch.aten.unsqueeze %15732, %int-2_14059 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14060 = torch.constant.int -2
    %16382 = torch.aten.unsqueeze %15734, %int-2_14060 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14061 = torch.constant.int -2
    %16383 = torch.aten.unsqueeze %15736, %int-2_14061 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14062 = torch.constant.int -2
    %16384 = torch.aten.unsqueeze %15738, %int-2_14062 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14063 = torch.constant.int -2
    %16385 = torch.aten.unsqueeze %15740, %int-2_14063 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_14064 = torch.constant.int -2
    %16386 = torch.aten.unsqueeze %15742, %int-2_14064 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %16386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_14065 = torch.constant.int 1
    %16387 = torch.aten.size.int %15652, %int1_14065 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_14066 = torch.constant.int 4
    %int1_14067 = torch.constant.int 1
    %int4_14068 = torch.constant.int 4
    %int128_14069 = torch.constant.int 128
    %16388 = torch.prim.ListConstruct %int4_14066, %16387, %int1_14067, %int4_14068, %int128_14069 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14070 = torch.constant.bool false
    %16389 = torch.aten.expand %16379, %16388, %false_14070 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14071 = torch.constant.int 4
    %int1_14072 = torch.constant.int 1
    %int4_14073 = torch.constant.int 4
    %int128_14074 = torch.constant.int 128
    %16390 = torch.prim.ListConstruct %int4_14071, %16387, %int1_14072, %int4_14073, %int128_14074 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14075 = torch.constant.bool false
    %16391 = torch.aten.expand %16380, %16390, %false_14075 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14076 = torch.constant.int 4
    %int1_14077 = torch.constant.int 1
    %int4_14078 = torch.constant.int 4
    %int128_14079 = torch.constant.int 128
    %16392 = torch.prim.ListConstruct %int4_14076, %16387, %int1_14077, %int4_14078, %int128_14079 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14080 = torch.constant.bool false
    %16393 = torch.aten.expand %16381, %16392, %false_14080 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14081 = torch.constant.int 4
    %int1_14082 = torch.constant.int 1
    %int4_14083 = torch.constant.int 4
    %int128_14084 = torch.constant.int 128
    %16394 = torch.prim.ListConstruct %int4_14081, %16387, %int1_14082, %int4_14083, %int128_14084 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14085 = torch.constant.bool false
    %16395 = torch.aten.expand %16382, %16394, %false_14085 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14086 = torch.constant.int 4
    %int1_14087 = torch.constant.int 1
    %int4_14088 = torch.constant.int 4
    %int128_14089 = torch.constant.int 128
    %16396 = torch.prim.ListConstruct %int4_14086, %16387, %int1_14087, %int4_14088, %int128_14089 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14090 = torch.constant.bool false
    %16397 = torch.aten.expand %16383, %16396, %false_14090 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14091 = torch.constant.int 4
    %int1_14092 = torch.constant.int 1
    %int4_14093 = torch.constant.int 4
    %int128_14094 = torch.constant.int 128
    %16398 = torch.prim.ListConstruct %int4_14091, %16387, %int1_14092, %int4_14093, %int128_14094 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14095 = torch.constant.bool false
    %16399 = torch.aten.expand %16384, %16398, %false_14095 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14096 = torch.constant.int 4
    %int1_14097 = torch.constant.int 1
    %int4_14098 = torch.constant.int 4
    %int128_14099 = torch.constant.int 128
    %16400 = torch.prim.ListConstruct %int4_14096, %16387, %int1_14097, %int4_14098, %int128_14099 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14100 = torch.constant.bool false
    %16401 = torch.aten.expand %16385, %16400, %false_14100 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14101 = torch.constant.int 4
    %int1_14102 = torch.constant.int 1
    %int4_14103 = torch.constant.int 4
    %int128_14104 = torch.constant.int 128
    %16402 = torch.prim.ListConstruct %int4_14101, %16387, %int1_14102, %int4_14103, %int128_14104 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_14105 = torch.constant.bool false
    %16403 = torch.aten.expand %16386, %16402, %false_14105 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %16403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_14106 = torch.constant.int 4
    %int4_14107 = torch.constant.int 4
    %int128_14108 = torch.constant.int 128
    %16404 = torch.prim.ListConstruct %int4_14106, %16387, %int4_14107, %int128_14108 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16405 = torch.aten.view %16389, %16404 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14109 = torch.constant.int 4
    %int4_14110 = torch.constant.int 4
    %int128_14111 = torch.constant.int 128
    %16406 = torch.prim.ListConstruct %int4_14109, %16387, %int4_14110, %int128_14111 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16407 = torch.aten.view %16391, %16406 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14112 = torch.constant.int 4
    %int4_14113 = torch.constant.int 4
    %int128_14114 = torch.constant.int 128
    %16408 = torch.prim.ListConstruct %int4_14112, %16387, %int4_14113, %int128_14114 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16409 = torch.aten.view %16393, %16408 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14115 = torch.constant.int 4
    %int4_14116 = torch.constant.int 4
    %int128_14117 = torch.constant.int 128
    %16410 = torch.prim.ListConstruct %int4_14115, %16387, %int4_14116, %int128_14117 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16411 = torch.aten.view %16395, %16410 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14118 = torch.constant.int 4
    %int4_14119 = torch.constant.int 4
    %int128_14120 = torch.constant.int 128
    %16412 = torch.prim.ListConstruct %int4_14118, %16387, %int4_14119, %int128_14120 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16413 = torch.aten.view %16397, %16412 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14121 = torch.constant.int 4
    %int4_14122 = torch.constant.int 4
    %int128_14123 = torch.constant.int 128
    %16414 = torch.prim.ListConstruct %int4_14121, %16387, %int4_14122, %int128_14123 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16415 = torch.aten.view %16399, %16414 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14124 = torch.constant.int 4
    %int4_14125 = torch.constant.int 4
    %int128_14126 = torch.constant.int 128
    %16416 = torch.prim.ListConstruct %int4_14124, %16387, %int4_14125, %int128_14126 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16417 = torch.aten.view %16401, %16416 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14127 = torch.constant.int 4
    %int4_14128 = torch.constant.int 4
    %int128_14129 = torch.constant.int 128
    %16418 = torch.prim.ListConstruct %int4_14127, %16387, %int4_14128, %int128_14129 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16419 = torch.aten.view %16403, %16418 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14130 = torch.constant.int 1
    %int2_14131 = torch.constant.int 2
    %16420 = torch.aten.transpose.int %15795, %int1_14130, %int2_14131 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16420, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14132 = torch.constant.int 1
    %int2_14133 = torch.constant.int 2
    %16421 = torch.aten.transpose.int %15810, %int1_14132, %int2_14133 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16421, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14134 = torch.constant.int 1
    %int2_14135 = torch.constant.int 2
    %16422 = torch.aten.transpose.int %15825, %int1_14134, %int2_14135 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16422, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14136 = torch.constant.int 1
    %int2_14137 = torch.constant.int 2
    %16423 = torch.aten.transpose.int %15840, %int1_14136, %int2_14137 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16423, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14138 = torch.constant.int 1
    %int2_14139 = torch.constant.int 2
    %16424 = torch.aten.transpose.int %15855, %int1_14138, %int2_14139 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16424, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14140 = torch.constant.int 1
    %int2_14141 = torch.constant.int 2
    %16425 = torch.aten.transpose.int %15870, %int1_14140, %int2_14141 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16425, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14142 = torch.constant.int 1
    %int2_14143 = torch.constant.int 2
    %16426 = torch.aten.transpose.int %15885, %int1_14142, %int2_14143 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16426, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14144 = torch.constant.int 1
    %int2_14145 = torch.constant.int 2
    %16427 = torch.aten.transpose.int %15900, %int1_14144, %int2_14145 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16427, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14146 = torch.constant.int 1
    %int2_14147 = torch.constant.int 2
    %16428 = torch.aten.transpose.int %16364, %int1_14146, %int2_14147 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16428, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14148 = torch.constant.int 1
    %int2_14149 = torch.constant.int 2
    %16429 = torch.aten.transpose.int %16366, %int1_14148, %int2_14149 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16429, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14150 = torch.constant.int 1
    %int2_14151 = torch.constant.int 2
    %16430 = torch.aten.transpose.int %16368, %int1_14150, %int2_14151 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16430, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14152 = torch.constant.int 1
    %int2_14153 = torch.constant.int 2
    %16431 = torch.aten.transpose.int %16370, %int1_14152, %int2_14153 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16431, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14154 = torch.constant.int 1
    %int2_14155 = torch.constant.int 2
    %16432 = torch.aten.transpose.int %16372, %int1_14154, %int2_14155 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16432, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14156 = torch.constant.int 1
    %int2_14157 = torch.constant.int 2
    %16433 = torch.aten.transpose.int %16374, %int1_14156, %int2_14157 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16433, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14158 = torch.constant.int 1
    %int2_14159 = torch.constant.int 2
    %16434 = torch.aten.transpose.int %16376, %int1_14158, %int2_14159 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16434, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14160 = torch.constant.int 1
    %int2_14161 = torch.constant.int 2
    %16435 = torch.aten.transpose.int %16378, %int1_14160, %int2_14161 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16435, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14162 = torch.constant.int 1
    %int2_14163 = torch.constant.int 2
    %16436 = torch.aten.transpose.int %16405, %int1_14162, %int2_14163 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16436, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14164 = torch.constant.int 1
    %int2_14165 = torch.constant.int 2
    %16437 = torch.aten.transpose.int %16407, %int1_14164, %int2_14165 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16437, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14166 = torch.constant.int 1
    %int2_14167 = torch.constant.int 2
    %16438 = torch.aten.transpose.int %16409, %int1_14166, %int2_14167 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16438, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14168 = torch.constant.int 1
    %int2_14169 = torch.constant.int 2
    %16439 = torch.aten.transpose.int %16411, %int1_14168, %int2_14169 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16439, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14170 = torch.constant.int 1
    %int2_14171 = torch.constant.int 2
    %16440 = torch.aten.transpose.int %16413, %int1_14170, %int2_14171 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16440, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14172 = torch.constant.int 1
    %int2_14173 = torch.constant.int 2
    %16441 = torch.aten.transpose.int %16415, %int1_14172, %int2_14173 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16441, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14174 = torch.constant.int 1
    %int2_14175 = torch.constant.int 2
    %16442 = torch.aten.transpose.int %16417, %int1_14174, %int2_14175 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16442, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14176 = torch.constant.int 1
    %int2_14177 = torch.constant.int 2
    %16443 = torch.aten.transpose.int %16419, %int1_14176, %int2_14177 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %16443, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14178 = torch.constant.float 0.000000e+00
    %true_14179 = torch.constant.bool true
    %none_14180 = torch.constant.none
    %none_14181 = torch.constant.none
    %16444:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16420, %16428, %16436, %float0.000000e00_14178, %true_14179, %none_14180, %none_14181) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16444#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14182 = torch.constant.float 0.000000e+00
    %true_14183 = torch.constant.bool true
    %none_14184 = torch.constant.none
    %none_14185 = torch.constant.none
    %16445:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16421, %16429, %16437, %float0.000000e00_14182, %true_14183, %none_14184, %none_14185) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16445#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14186 = torch.constant.float 0.000000e+00
    %true_14187 = torch.constant.bool true
    %none_14188 = torch.constant.none
    %none_14189 = torch.constant.none
    %16446:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16422, %16430, %16438, %float0.000000e00_14186, %true_14187, %none_14188, %none_14189) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16446#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14190 = torch.constant.float 0.000000e+00
    %true_14191 = torch.constant.bool true
    %none_14192 = torch.constant.none
    %none_14193 = torch.constant.none
    %16447:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16423, %16431, %16439, %float0.000000e00_14190, %true_14191, %none_14192, %none_14193) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16447#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14194 = torch.constant.float 0.000000e+00
    %true_14195 = torch.constant.bool true
    %none_14196 = torch.constant.none
    %none_14197 = torch.constant.none
    %16448:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16424, %16432, %16440, %float0.000000e00_14194, %true_14195, %none_14196, %none_14197) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16448#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14198 = torch.constant.float 0.000000e+00
    %true_14199 = torch.constant.bool true
    %none_14200 = torch.constant.none
    %none_14201 = torch.constant.none
    %16449:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16425, %16433, %16441, %float0.000000e00_14198, %true_14199, %none_14200, %none_14201) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16449#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14202 = torch.constant.float 0.000000e+00
    %true_14203 = torch.constant.bool true
    %none_14204 = torch.constant.none
    %none_14205 = torch.constant.none
    %16450:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16426, %16434, %16442, %float0.000000e00_14202, %true_14203, %none_14204, %none_14205) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16450#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_14206 = torch.constant.float 0.000000e+00
    %true_14207 = torch.constant.bool true
    %none_14208 = torch.constant.none
    %none_14209 = torch.constant.none
    %16451:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%16427, %16435, %16443, %float0.000000e00_14206, %true_14207, %none_14208, %none_14209) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %16451#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_14210 = torch.constant.int 1
    %int2_14211 = torch.constant.int 2
    %16452 = torch.aten.transpose.int %16444#0, %int1_14210, %int2_14211 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14212 = torch.constant.int 1
    %int2_14213 = torch.constant.int 2
    %16453 = torch.aten.transpose.int %16445#0, %int1_14212, %int2_14213 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14214 = torch.constant.int 1
    %int2_14215 = torch.constant.int 2
    %16454 = torch.aten.transpose.int %16446#0, %int1_14214, %int2_14215 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14216 = torch.constant.int 1
    %int2_14217 = torch.constant.int 2
    %16455 = torch.aten.transpose.int %16447#0, %int1_14216, %int2_14217 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14218 = torch.constant.int 1
    %int2_14219 = torch.constant.int 2
    %16456 = torch.aten.transpose.int %16448#0, %int1_14218, %int2_14219 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14220 = torch.constant.int 1
    %int2_14221 = torch.constant.int 2
    %16457 = torch.aten.transpose.int %16449#0, %int1_14220, %int2_14221 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14222 = torch.constant.int 1
    %int2_14223 = torch.constant.int 2
    %16458 = torch.aten.transpose.int %16450#0, %int1_14222, %int2_14223 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_14224 = torch.constant.int 1
    %int2_14225 = torch.constant.int 2
    %16459 = torch.aten.transpose.int %16451#0, %int1_14224, %int2_14225 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %16459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_14226 = torch.constant.int 4
    %int512_14227 = torch.constant.int 512
    %16460 = torch.prim.ListConstruct %int4_14226, %15781, %int512_14227 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16461 = torch.aten.view %16452, %16460 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14228 = torch.constant.int 4
    %int512_14229 = torch.constant.int 512
    %16462 = torch.prim.ListConstruct %int4_14228, %15796, %int512_14229 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16463 = torch.aten.view %16453, %16462 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14230 = torch.constant.int 4
    %int512_14231 = torch.constant.int 512
    %16464 = torch.prim.ListConstruct %int4_14230, %15811, %int512_14231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16465 = torch.aten.view %16454, %16464 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14232 = torch.constant.int 4
    %int512_14233 = torch.constant.int 512
    %16466 = torch.prim.ListConstruct %int4_14232, %15826, %int512_14233 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16467 = torch.aten.view %16455, %16466 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14234 = torch.constant.int 4
    %int512_14235 = torch.constant.int 512
    %16468 = torch.prim.ListConstruct %int4_14234, %15841, %int512_14235 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16469 = torch.aten.view %16456, %16468 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14236 = torch.constant.int 4
    %int512_14237 = torch.constant.int 512
    %16470 = torch.prim.ListConstruct %int4_14236, %15856, %int512_14237 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16471 = torch.aten.view %16457, %16470 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14238 = torch.constant.int 4
    %int512_14239 = torch.constant.int 512
    %16472 = torch.prim.ListConstruct %int4_14238, %15871, %int512_14239 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16473 = torch.aten.view %16458, %16472 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14240 = torch.constant.int 4
    %int512_14241 = torch.constant.int 512
    %16474 = torch.prim.ListConstruct %int4_14240, %15886, %int512_14241 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16475 = torch.aten.view %16459, %16474 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %16475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_14242 = torch.constant.int 1
    %int0_14243 = torch.constant.int 0
    %16476 = torch.prim.ListConstruct %int1_14242, %int0_14243 : (!torch.int, !torch.int) -> !torch.list<int>
    %16477 = torch.aten.permute %544, %16476 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14244 = torch.constant.int 1
    %int0_14245 = torch.constant.int 0
    %16478 = torch.prim.ListConstruct %int1_14244, %int0_14245 : (!torch.int, !torch.int) -> !torch.list<int>
    %16479 = torch.aten.permute %545, %16478 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14246 = torch.constant.int 1
    %int0_14247 = torch.constant.int 0
    %16480 = torch.prim.ListConstruct %int1_14246, %int0_14247 : (!torch.int, !torch.int) -> !torch.list<int>
    %16481 = torch.aten.permute %546, %16480 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14248 = torch.constant.int 1
    %int0_14249 = torch.constant.int 0
    %16482 = torch.prim.ListConstruct %int1_14248, %int0_14249 : (!torch.int, !torch.int) -> !torch.list<int>
    %16483 = torch.aten.permute %547, %16482 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14250 = torch.constant.int 1
    %int0_14251 = torch.constant.int 0
    %16484 = torch.prim.ListConstruct %int1_14250, %int0_14251 : (!torch.int, !torch.int) -> !torch.list<int>
    %16485 = torch.aten.permute %548, %16484 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14252 = torch.constant.int 1
    %int0_14253 = torch.constant.int 0
    %16486 = torch.prim.ListConstruct %int1_14252, %int0_14253 : (!torch.int, !torch.int) -> !torch.list<int>
    %16487 = torch.aten.permute %549, %16486 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14254 = torch.constant.int 1
    %int0_14255 = torch.constant.int 0
    %16488 = torch.prim.ListConstruct %int1_14254, %int0_14255 : (!torch.int, !torch.int) -> !torch.list<int>
    %16489 = torch.aten.permute %550, %16488 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_14256 = torch.constant.int 1
    %int0_14257 = torch.constant.int 0
    %16490 = torch.prim.ListConstruct %int1_14256, %int0_14257 : (!torch.int, !torch.int) -> !torch.list<int>
    %16491 = torch.aten.permute %551, %16490 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_14258 = torch.constant.int 4
    %16492 = torch.aten.mul.int %int4_14258, %15781 : !torch.int, !torch.int -> !torch.int
    %int512_14259 = torch.constant.int 512
    %16493 = torch.prim.ListConstruct %16492, %int512_14259 : (!torch.int, !torch.int) -> !torch.list<int>
    %16494 = torch.aten.view %16461, %16493 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16494, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16495 = torch.aten.mm %16494, %16477 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16495, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14260 = torch.constant.int 4
    %int4096_14261 = torch.constant.int 4096
    %16496 = torch.prim.ListConstruct %int4_14260, %15781, %int4096_14261 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16497 = torch.aten.view %16495, %16496 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14262 = torch.constant.int 4
    %16498 = torch.aten.mul.int %int4_14262, %15796 : !torch.int, !torch.int -> !torch.int
    %int512_14263 = torch.constant.int 512
    %16499 = torch.prim.ListConstruct %16498, %int512_14263 : (!torch.int, !torch.int) -> !torch.list<int>
    %16500 = torch.aten.view %16463, %16499 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16500, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16501 = torch.aten.mm %16500, %16479 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16501, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14264 = torch.constant.int 4
    %int4096_14265 = torch.constant.int 4096
    %16502 = torch.prim.ListConstruct %int4_14264, %15796, %int4096_14265 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16503 = torch.aten.view %16501, %16502 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14266 = torch.constant.int 4
    %16504 = torch.aten.mul.int %int4_14266, %15811 : !torch.int, !torch.int -> !torch.int
    %int512_14267 = torch.constant.int 512
    %16505 = torch.prim.ListConstruct %16504, %int512_14267 : (!torch.int, !torch.int) -> !torch.list<int>
    %16506 = torch.aten.view %16465, %16505 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16506, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16507 = torch.aten.mm %16506, %16481 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16507, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14268 = torch.constant.int 4
    %int4096_14269 = torch.constant.int 4096
    %16508 = torch.prim.ListConstruct %int4_14268, %15811, %int4096_14269 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16509 = torch.aten.view %16507, %16508 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14270 = torch.constant.int 4
    %16510 = torch.aten.mul.int %int4_14270, %15826 : !torch.int, !torch.int -> !torch.int
    %int512_14271 = torch.constant.int 512
    %16511 = torch.prim.ListConstruct %16510, %int512_14271 : (!torch.int, !torch.int) -> !torch.list<int>
    %16512 = torch.aten.view %16467, %16511 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16512, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16513 = torch.aten.mm %16512, %16483 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16513, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14272 = torch.constant.int 4
    %int4096_14273 = torch.constant.int 4096
    %16514 = torch.prim.ListConstruct %int4_14272, %15826, %int4096_14273 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16515 = torch.aten.view %16513, %16514 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14274 = torch.constant.int 4
    %16516 = torch.aten.mul.int %int4_14274, %15841 : !torch.int, !torch.int -> !torch.int
    %int512_14275 = torch.constant.int 512
    %16517 = torch.prim.ListConstruct %16516, %int512_14275 : (!torch.int, !torch.int) -> !torch.list<int>
    %16518 = torch.aten.view %16469, %16517 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16518, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16519 = torch.aten.mm %16518, %16485 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16519, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14276 = torch.constant.int 4
    %int4096_14277 = torch.constant.int 4096
    %16520 = torch.prim.ListConstruct %int4_14276, %15841, %int4096_14277 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16521 = torch.aten.view %16519, %16520 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14278 = torch.constant.int 4
    %16522 = torch.aten.mul.int %int4_14278, %15856 : !torch.int, !torch.int -> !torch.int
    %int512_14279 = torch.constant.int 512
    %16523 = torch.prim.ListConstruct %16522, %int512_14279 : (!torch.int, !torch.int) -> !torch.list<int>
    %16524 = torch.aten.view %16471, %16523 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16524, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16525 = torch.aten.mm %16524, %16487 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16525, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14280 = torch.constant.int 4
    %int4096_14281 = torch.constant.int 4096
    %16526 = torch.prim.ListConstruct %int4_14280, %15856, %int4096_14281 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16527 = torch.aten.view %16525, %16526 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14282 = torch.constant.int 4
    %16528 = torch.aten.mul.int %int4_14282, %15871 : !torch.int, !torch.int -> !torch.int
    %int512_14283 = torch.constant.int 512
    %16529 = torch.prim.ListConstruct %16528, %int512_14283 : (!torch.int, !torch.int) -> !torch.list<int>
    %16530 = torch.aten.view %16473, %16529 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16530, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16531 = torch.aten.mm %16530, %16489 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16531, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14284 = torch.constant.int 4
    %int4096_14285 = torch.constant.int 4096
    %16532 = torch.prim.ListConstruct %int4_14284, %15871, %int4096_14285 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16533 = torch.aten.view %16531, %16532 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_14286 = torch.constant.int 4
    %16534 = torch.aten.mul.int %int4_14286, %15886 : !torch.int, !torch.int -> !torch.int
    %int512_14287 = torch.constant.int 512
    %16535 = torch.prim.ListConstruct %16534, %int512_14287 : (!torch.int, !torch.int) -> !torch.list<int>
    %16536 = torch.aten.view %16475, %16535 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %16536, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %16537 = torch.aten.mm %16536, %16491 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16537, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14288 = torch.constant.int 4
    %int4096_14289 = torch.constant.int 4096
    %16538 = torch.prim.ListConstruct %int4_14288, %15886, %int4096_14289 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16539 = torch.aten.view %16537, %16538 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16540 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14290 = arith.constant 1 : index
    %dim_14291 = tensor.dim %16540, %c1_14290 : tensor<4x?x4096xf16>
    %16541 = flow.tensor.transfer %16540 : tensor<4x?x4096xf16>{%dim_14291} to #hal.device.promise<@__device_0>
    %16542 = torch_c.from_builtin_tensor %16541 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16543 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14292 = arith.constant 1 : index
    %dim_14293 = tensor.dim %16543, %c1_14292 : tensor<4x?x4096xf16>
    %16544 = flow.tensor.transfer %16543 : tensor<4x?x4096xf16>{%dim_14293} to #hal.device.promise<@__device_0>
    %16545 = torch_c.from_builtin_tensor %16544 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16546 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14294 = arith.constant 1 : index
    %dim_14295 = tensor.dim %16546, %c1_14294 : tensor<4x?x4096xf16>
    %16547 = flow.tensor.transfer %16546 : tensor<4x?x4096xf16>{%dim_14295} to #hal.device.promise<@__device_0>
    %16548 = torch_c.from_builtin_tensor %16547 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16549 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14296 = arith.constant 1 : index
    %dim_14297 = tensor.dim %16549, %c1_14296 : tensor<4x?x4096xf16>
    %16550 = flow.tensor.transfer %16549 : tensor<4x?x4096xf16>{%dim_14297} to #hal.device.promise<@__device_0>
    %16551 = torch_c.from_builtin_tensor %16550 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16552 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14298 = arith.constant 1 : index
    %dim_14299 = tensor.dim %16552, %c1_14298 : tensor<4x?x4096xf16>
    %16553 = flow.tensor.transfer %16552 : tensor<4x?x4096xf16>{%dim_14299} to #hal.device.promise<@__device_0>
    %16554 = torch_c.from_builtin_tensor %16553 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16555 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14300 = arith.constant 1 : index
    %dim_14301 = tensor.dim %16555, %c1_14300 : tensor<4x?x4096xf16>
    %16556 = flow.tensor.transfer %16555 : tensor<4x?x4096xf16>{%dim_14301} to #hal.device.promise<@__device_0>
    %16557 = torch_c.from_builtin_tensor %16556 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16558 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14302 = arith.constant 1 : index
    %dim_14303 = tensor.dim %16558, %c1_14302 : tensor<4x?x4096xf16>
    %16559 = flow.tensor.transfer %16558 : tensor<4x?x4096xf16>{%dim_14303} to #hal.device.promise<@__device_0>
    %16560 = torch_c.from_builtin_tensor %16559 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14304 = torch.constant.int 1
    %16561 = torch.aten.add.Tensor %16497, %16542, %int1_14304 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14305 = torch.constant.int 1
    %16562 = torch.aten.add.Tensor %16561, %16545, %int1_14305 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14306 = torch.constant.int 1
    %16563 = torch.aten.add.Tensor %16562, %16548, %int1_14306 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14307 = torch.constant.int 1
    %16564 = torch.aten.add.Tensor %16563, %16551, %int1_14307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14308 = torch.constant.int 1
    %16565 = torch.aten.add.Tensor %16564, %16554, %int1_14308 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14309 = torch.constant.int 1
    %16566 = torch.aten.add.Tensor %16565, %16557, %int1_14309 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14310 = torch.constant.int 1
    %16567 = torch.aten.add.Tensor %16566, %16560, %int1_14310 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16568 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14311 = arith.constant 1 : index
    %dim_14312 = tensor.dim %16568, %c1_14311 : tensor<4x?x4096xf16>
    %16569 = flow.tensor.transfer %16568 : tensor<4x?x4096xf16>{%dim_14312} to #hal.device.promise<@__device_1>
    %16570 = torch_c.from_builtin_tensor %16569 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16571 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14313 = arith.constant 1 : index
    %dim_14314 = tensor.dim %16571, %c1_14313 : tensor<4x?x4096xf16>
    %16572 = flow.tensor.transfer %16571 : tensor<4x?x4096xf16>{%dim_14314} to #hal.device.promise<@__device_1>
    %16573 = torch_c.from_builtin_tensor %16572 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16574 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14315 = arith.constant 1 : index
    %dim_14316 = tensor.dim %16574, %c1_14315 : tensor<4x?x4096xf16>
    %16575 = flow.tensor.transfer %16574 : tensor<4x?x4096xf16>{%dim_14316} to #hal.device.promise<@__device_1>
    %16576 = torch_c.from_builtin_tensor %16575 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16577 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14317 = arith.constant 1 : index
    %dim_14318 = tensor.dim %16577, %c1_14317 : tensor<4x?x4096xf16>
    %16578 = flow.tensor.transfer %16577 : tensor<4x?x4096xf16>{%dim_14318} to #hal.device.promise<@__device_1>
    %16579 = torch_c.from_builtin_tensor %16578 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16580 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14319 = arith.constant 1 : index
    %dim_14320 = tensor.dim %16580, %c1_14319 : tensor<4x?x4096xf16>
    %16581 = flow.tensor.transfer %16580 : tensor<4x?x4096xf16>{%dim_14320} to #hal.device.promise<@__device_1>
    %16582 = torch_c.from_builtin_tensor %16581 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16583 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14321 = arith.constant 1 : index
    %dim_14322 = tensor.dim %16583, %c1_14321 : tensor<4x?x4096xf16>
    %16584 = flow.tensor.transfer %16583 : tensor<4x?x4096xf16>{%dim_14322} to #hal.device.promise<@__device_1>
    %16585 = torch_c.from_builtin_tensor %16584 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16586 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14323 = arith.constant 1 : index
    %dim_14324 = tensor.dim %16586, %c1_14323 : tensor<4x?x4096xf16>
    %16587 = flow.tensor.transfer %16586 : tensor<4x?x4096xf16>{%dim_14324} to #hal.device.promise<@__device_1>
    %16588 = torch_c.from_builtin_tensor %16587 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14325 = torch.constant.int 1
    %16589 = torch.aten.add.Tensor %16570, %16503, %int1_14325 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14326 = torch.constant.int 1
    %16590 = torch.aten.add.Tensor %16589, %16573, %int1_14326 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14327 = torch.constant.int 1
    %16591 = torch.aten.add.Tensor %16590, %16576, %int1_14327 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14328 = torch.constant.int 1
    %16592 = torch.aten.add.Tensor %16591, %16579, %int1_14328 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14329 = torch.constant.int 1
    %16593 = torch.aten.add.Tensor %16592, %16582, %int1_14329 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14330 = torch.constant.int 1
    %16594 = torch.aten.add.Tensor %16593, %16585, %int1_14330 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14331 = torch.constant.int 1
    %16595 = torch.aten.add.Tensor %16594, %16588, %int1_14331 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16596 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14332 = arith.constant 1 : index
    %dim_14333 = tensor.dim %16596, %c1_14332 : tensor<4x?x4096xf16>
    %16597 = flow.tensor.transfer %16596 : tensor<4x?x4096xf16>{%dim_14333} to #hal.device.promise<@__device_2>
    %16598 = torch_c.from_builtin_tensor %16597 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16599 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14334 = arith.constant 1 : index
    %dim_14335 = tensor.dim %16599, %c1_14334 : tensor<4x?x4096xf16>
    %16600 = flow.tensor.transfer %16599 : tensor<4x?x4096xf16>{%dim_14335} to #hal.device.promise<@__device_2>
    %16601 = torch_c.from_builtin_tensor %16600 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16602 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14336 = arith.constant 1 : index
    %dim_14337 = tensor.dim %16602, %c1_14336 : tensor<4x?x4096xf16>
    %16603 = flow.tensor.transfer %16602 : tensor<4x?x4096xf16>{%dim_14337} to #hal.device.promise<@__device_2>
    %16604 = torch_c.from_builtin_tensor %16603 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16605 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14338 = arith.constant 1 : index
    %dim_14339 = tensor.dim %16605, %c1_14338 : tensor<4x?x4096xf16>
    %16606 = flow.tensor.transfer %16605 : tensor<4x?x4096xf16>{%dim_14339} to #hal.device.promise<@__device_2>
    %16607 = torch_c.from_builtin_tensor %16606 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16608 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14340 = arith.constant 1 : index
    %dim_14341 = tensor.dim %16608, %c1_14340 : tensor<4x?x4096xf16>
    %16609 = flow.tensor.transfer %16608 : tensor<4x?x4096xf16>{%dim_14341} to #hal.device.promise<@__device_2>
    %16610 = torch_c.from_builtin_tensor %16609 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16611 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14342 = arith.constant 1 : index
    %dim_14343 = tensor.dim %16611, %c1_14342 : tensor<4x?x4096xf16>
    %16612 = flow.tensor.transfer %16611 : tensor<4x?x4096xf16>{%dim_14343} to #hal.device.promise<@__device_2>
    %16613 = torch_c.from_builtin_tensor %16612 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16614 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14344 = arith.constant 1 : index
    %dim_14345 = tensor.dim %16614, %c1_14344 : tensor<4x?x4096xf16>
    %16615 = flow.tensor.transfer %16614 : tensor<4x?x4096xf16>{%dim_14345} to #hal.device.promise<@__device_2>
    %16616 = torch_c.from_builtin_tensor %16615 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14346 = torch.constant.int 1
    %16617 = torch.aten.add.Tensor %16598, %16601, %int1_14346 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14347 = torch.constant.int 1
    %16618 = torch.aten.add.Tensor %16617, %16509, %int1_14347 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14348 = torch.constant.int 1
    %16619 = torch.aten.add.Tensor %16618, %16604, %int1_14348 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14349 = torch.constant.int 1
    %16620 = torch.aten.add.Tensor %16619, %16607, %int1_14349 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14350 = torch.constant.int 1
    %16621 = torch.aten.add.Tensor %16620, %16610, %int1_14350 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14351 = torch.constant.int 1
    %16622 = torch.aten.add.Tensor %16621, %16613, %int1_14351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14352 = torch.constant.int 1
    %16623 = torch.aten.add.Tensor %16622, %16616, %int1_14352 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16624 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14353 = arith.constant 1 : index
    %dim_14354 = tensor.dim %16624, %c1_14353 : tensor<4x?x4096xf16>
    %16625 = flow.tensor.transfer %16624 : tensor<4x?x4096xf16>{%dim_14354} to #hal.device.promise<@__device_3>
    %16626 = torch_c.from_builtin_tensor %16625 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16627 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14355 = arith.constant 1 : index
    %dim_14356 = tensor.dim %16627, %c1_14355 : tensor<4x?x4096xf16>
    %16628 = flow.tensor.transfer %16627 : tensor<4x?x4096xf16>{%dim_14356} to #hal.device.promise<@__device_3>
    %16629 = torch_c.from_builtin_tensor %16628 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16630 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14357 = arith.constant 1 : index
    %dim_14358 = tensor.dim %16630, %c1_14357 : tensor<4x?x4096xf16>
    %16631 = flow.tensor.transfer %16630 : tensor<4x?x4096xf16>{%dim_14358} to #hal.device.promise<@__device_3>
    %16632 = torch_c.from_builtin_tensor %16631 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16633 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14359 = arith.constant 1 : index
    %dim_14360 = tensor.dim %16633, %c1_14359 : tensor<4x?x4096xf16>
    %16634 = flow.tensor.transfer %16633 : tensor<4x?x4096xf16>{%dim_14360} to #hal.device.promise<@__device_3>
    %16635 = torch_c.from_builtin_tensor %16634 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16636 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14361 = arith.constant 1 : index
    %dim_14362 = tensor.dim %16636, %c1_14361 : tensor<4x?x4096xf16>
    %16637 = flow.tensor.transfer %16636 : tensor<4x?x4096xf16>{%dim_14362} to #hal.device.promise<@__device_3>
    %16638 = torch_c.from_builtin_tensor %16637 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16639 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14363 = arith.constant 1 : index
    %dim_14364 = tensor.dim %16639, %c1_14363 : tensor<4x?x4096xf16>
    %16640 = flow.tensor.transfer %16639 : tensor<4x?x4096xf16>{%dim_14364} to #hal.device.promise<@__device_3>
    %16641 = torch_c.from_builtin_tensor %16640 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16642 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14365 = arith.constant 1 : index
    %dim_14366 = tensor.dim %16642, %c1_14365 : tensor<4x?x4096xf16>
    %16643 = flow.tensor.transfer %16642 : tensor<4x?x4096xf16>{%dim_14366} to #hal.device.promise<@__device_3>
    %16644 = torch_c.from_builtin_tensor %16643 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14367 = torch.constant.int 1
    %16645 = torch.aten.add.Tensor %16626, %16629, %int1_14367 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14368 = torch.constant.int 1
    %16646 = torch.aten.add.Tensor %16645, %16632, %int1_14368 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14369 = torch.constant.int 1
    %16647 = torch.aten.add.Tensor %16646, %16515, %int1_14369 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14370 = torch.constant.int 1
    %16648 = torch.aten.add.Tensor %16647, %16635, %int1_14370 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14371 = torch.constant.int 1
    %16649 = torch.aten.add.Tensor %16648, %16638, %int1_14371 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14372 = torch.constant.int 1
    %16650 = torch.aten.add.Tensor %16649, %16641, %int1_14372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14373 = torch.constant.int 1
    %16651 = torch.aten.add.Tensor %16650, %16644, %int1_14373 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16652 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14374 = arith.constant 1 : index
    %dim_14375 = tensor.dim %16652, %c1_14374 : tensor<4x?x4096xf16>
    %16653 = flow.tensor.transfer %16652 : tensor<4x?x4096xf16>{%dim_14375} to #hal.device.promise<@__device_4>
    %16654 = torch_c.from_builtin_tensor %16653 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16655 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14376 = arith.constant 1 : index
    %dim_14377 = tensor.dim %16655, %c1_14376 : tensor<4x?x4096xf16>
    %16656 = flow.tensor.transfer %16655 : tensor<4x?x4096xf16>{%dim_14377} to #hal.device.promise<@__device_4>
    %16657 = torch_c.from_builtin_tensor %16656 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16658 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14378 = arith.constant 1 : index
    %dim_14379 = tensor.dim %16658, %c1_14378 : tensor<4x?x4096xf16>
    %16659 = flow.tensor.transfer %16658 : tensor<4x?x4096xf16>{%dim_14379} to #hal.device.promise<@__device_4>
    %16660 = torch_c.from_builtin_tensor %16659 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16661 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14380 = arith.constant 1 : index
    %dim_14381 = tensor.dim %16661, %c1_14380 : tensor<4x?x4096xf16>
    %16662 = flow.tensor.transfer %16661 : tensor<4x?x4096xf16>{%dim_14381} to #hal.device.promise<@__device_4>
    %16663 = torch_c.from_builtin_tensor %16662 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16664 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14382 = arith.constant 1 : index
    %dim_14383 = tensor.dim %16664, %c1_14382 : tensor<4x?x4096xf16>
    %16665 = flow.tensor.transfer %16664 : tensor<4x?x4096xf16>{%dim_14383} to #hal.device.promise<@__device_4>
    %16666 = torch_c.from_builtin_tensor %16665 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16667 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14384 = arith.constant 1 : index
    %dim_14385 = tensor.dim %16667, %c1_14384 : tensor<4x?x4096xf16>
    %16668 = flow.tensor.transfer %16667 : tensor<4x?x4096xf16>{%dim_14385} to #hal.device.promise<@__device_4>
    %16669 = torch_c.from_builtin_tensor %16668 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16670 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14386 = arith.constant 1 : index
    %dim_14387 = tensor.dim %16670, %c1_14386 : tensor<4x?x4096xf16>
    %16671 = flow.tensor.transfer %16670 : tensor<4x?x4096xf16>{%dim_14387} to #hal.device.promise<@__device_4>
    %16672 = torch_c.from_builtin_tensor %16671 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14388 = torch.constant.int 1
    %16673 = torch.aten.add.Tensor %16654, %16657, %int1_14388 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14389 = torch.constant.int 1
    %16674 = torch.aten.add.Tensor %16673, %16660, %int1_14389 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14390 = torch.constant.int 1
    %16675 = torch.aten.add.Tensor %16674, %16663, %int1_14390 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14391 = torch.constant.int 1
    %16676 = torch.aten.add.Tensor %16675, %16521, %int1_14391 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14392 = torch.constant.int 1
    %16677 = torch.aten.add.Tensor %16676, %16666, %int1_14392 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14393 = torch.constant.int 1
    %16678 = torch.aten.add.Tensor %16677, %16669, %int1_14393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14394 = torch.constant.int 1
    %16679 = torch.aten.add.Tensor %16678, %16672, %int1_14394 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16680 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14395 = arith.constant 1 : index
    %dim_14396 = tensor.dim %16680, %c1_14395 : tensor<4x?x4096xf16>
    %16681 = flow.tensor.transfer %16680 : tensor<4x?x4096xf16>{%dim_14396} to #hal.device.promise<@__device_5>
    %16682 = torch_c.from_builtin_tensor %16681 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16683 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14397 = arith.constant 1 : index
    %dim_14398 = tensor.dim %16683, %c1_14397 : tensor<4x?x4096xf16>
    %16684 = flow.tensor.transfer %16683 : tensor<4x?x4096xf16>{%dim_14398} to #hal.device.promise<@__device_5>
    %16685 = torch_c.from_builtin_tensor %16684 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16686 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14399 = arith.constant 1 : index
    %dim_14400 = tensor.dim %16686, %c1_14399 : tensor<4x?x4096xf16>
    %16687 = flow.tensor.transfer %16686 : tensor<4x?x4096xf16>{%dim_14400} to #hal.device.promise<@__device_5>
    %16688 = torch_c.from_builtin_tensor %16687 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16689 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14401 = arith.constant 1 : index
    %dim_14402 = tensor.dim %16689, %c1_14401 : tensor<4x?x4096xf16>
    %16690 = flow.tensor.transfer %16689 : tensor<4x?x4096xf16>{%dim_14402} to #hal.device.promise<@__device_5>
    %16691 = torch_c.from_builtin_tensor %16690 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16692 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14403 = arith.constant 1 : index
    %dim_14404 = tensor.dim %16692, %c1_14403 : tensor<4x?x4096xf16>
    %16693 = flow.tensor.transfer %16692 : tensor<4x?x4096xf16>{%dim_14404} to #hal.device.promise<@__device_5>
    %16694 = torch_c.from_builtin_tensor %16693 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16695 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14405 = arith.constant 1 : index
    %dim_14406 = tensor.dim %16695, %c1_14405 : tensor<4x?x4096xf16>
    %16696 = flow.tensor.transfer %16695 : tensor<4x?x4096xf16>{%dim_14406} to #hal.device.promise<@__device_5>
    %16697 = torch_c.from_builtin_tensor %16696 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16698 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14407 = arith.constant 1 : index
    %dim_14408 = tensor.dim %16698, %c1_14407 : tensor<4x?x4096xf16>
    %16699 = flow.tensor.transfer %16698 : tensor<4x?x4096xf16>{%dim_14408} to #hal.device.promise<@__device_5>
    %16700 = torch_c.from_builtin_tensor %16699 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14409 = torch.constant.int 1
    %16701 = torch.aten.add.Tensor %16682, %16685, %int1_14409 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14410 = torch.constant.int 1
    %16702 = torch.aten.add.Tensor %16701, %16688, %int1_14410 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14411 = torch.constant.int 1
    %16703 = torch.aten.add.Tensor %16702, %16691, %int1_14411 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14412 = torch.constant.int 1
    %16704 = torch.aten.add.Tensor %16703, %16694, %int1_14412 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14413 = torch.constant.int 1
    %16705 = torch.aten.add.Tensor %16704, %16527, %int1_14413 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14414 = torch.constant.int 1
    %16706 = torch.aten.add.Tensor %16705, %16697, %int1_14414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14415 = torch.constant.int 1
    %16707 = torch.aten.add.Tensor %16706, %16700, %int1_14415 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16708 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14416 = arith.constant 1 : index
    %dim_14417 = tensor.dim %16708, %c1_14416 : tensor<4x?x4096xf16>
    %16709 = flow.tensor.transfer %16708 : tensor<4x?x4096xf16>{%dim_14417} to #hal.device.promise<@__device_6>
    %16710 = torch_c.from_builtin_tensor %16709 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16711 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14418 = arith.constant 1 : index
    %dim_14419 = tensor.dim %16711, %c1_14418 : tensor<4x?x4096xf16>
    %16712 = flow.tensor.transfer %16711 : tensor<4x?x4096xf16>{%dim_14419} to #hal.device.promise<@__device_6>
    %16713 = torch_c.from_builtin_tensor %16712 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16714 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14420 = arith.constant 1 : index
    %dim_14421 = tensor.dim %16714, %c1_14420 : tensor<4x?x4096xf16>
    %16715 = flow.tensor.transfer %16714 : tensor<4x?x4096xf16>{%dim_14421} to #hal.device.promise<@__device_6>
    %16716 = torch_c.from_builtin_tensor %16715 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16717 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14422 = arith.constant 1 : index
    %dim_14423 = tensor.dim %16717, %c1_14422 : tensor<4x?x4096xf16>
    %16718 = flow.tensor.transfer %16717 : tensor<4x?x4096xf16>{%dim_14423} to #hal.device.promise<@__device_6>
    %16719 = torch_c.from_builtin_tensor %16718 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16720 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14424 = arith.constant 1 : index
    %dim_14425 = tensor.dim %16720, %c1_14424 : tensor<4x?x4096xf16>
    %16721 = flow.tensor.transfer %16720 : tensor<4x?x4096xf16>{%dim_14425} to #hal.device.promise<@__device_6>
    %16722 = torch_c.from_builtin_tensor %16721 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16723 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14426 = arith.constant 1 : index
    %dim_14427 = tensor.dim %16723, %c1_14426 : tensor<4x?x4096xf16>
    %16724 = flow.tensor.transfer %16723 : tensor<4x?x4096xf16>{%dim_14427} to #hal.device.promise<@__device_6>
    %16725 = torch_c.from_builtin_tensor %16724 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16726 = torch_c.to_builtin_tensor %16539 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14428 = arith.constant 1 : index
    %dim_14429 = tensor.dim %16726, %c1_14428 : tensor<4x?x4096xf16>
    %16727 = flow.tensor.transfer %16726 : tensor<4x?x4096xf16>{%dim_14429} to #hal.device.promise<@__device_6>
    %16728 = torch_c.from_builtin_tensor %16727 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14430 = torch.constant.int 1
    %16729 = torch.aten.add.Tensor %16710, %16713, %int1_14430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14431 = torch.constant.int 1
    %16730 = torch.aten.add.Tensor %16729, %16716, %int1_14431 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14432 = torch.constant.int 1
    %16731 = torch.aten.add.Tensor %16730, %16719, %int1_14432 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14433 = torch.constant.int 1
    %16732 = torch.aten.add.Tensor %16731, %16722, %int1_14433 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14434 = torch.constant.int 1
    %16733 = torch.aten.add.Tensor %16732, %16725, %int1_14434 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14435 = torch.constant.int 1
    %16734 = torch.aten.add.Tensor %16733, %16533, %int1_14435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14436 = torch.constant.int 1
    %16735 = torch.aten.add.Tensor %16734, %16728, %int1_14436 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16736 = torch_c.to_builtin_tensor %16497 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14437 = arith.constant 1 : index
    %dim_14438 = tensor.dim %16736, %c1_14437 : tensor<4x?x4096xf16>
    %16737 = flow.tensor.transfer %16736 : tensor<4x?x4096xf16>{%dim_14438} to #hal.device.promise<@__device_7>
    %16738 = torch_c.from_builtin_tensor %16737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16739 = torch_c.to_builtin_tensor %16503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14439 = arith.constant 1 : index
    %dim_14440 = tensor.dim %16739, %c1_14439 : tensor<4x?x4096xf16>
    %16740 = flow.tensor.transfer %16739 : tensor<4x?x4096xf16>{%dim_14440} to #hal.device.promise<@__device_7>
    %16741 = torch_c.from_builtin_tensor %16740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16742 = torch_c.to_builtin_tensor %16509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14441 = arith.constant 1 : index
    %dim_14442 = tensor.dim %16742, %c1_14441 : tensor<4x?x4096xf16>
    %16743 = flow.tensor.transfer %16742 : tensor<4x?x4096xf16>{%dim_14442} to #hal.device.promise<@__device_7>
    %16744 = torch_c.from_builtin_tensor %16743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16745 = torch_c.to_builtin_tensor %16515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14443 = arith.constant 1 : index
    %dim_14444 = tensor.dim %16745, %c1_14443 : tensor<4x?x4096xf16>
    %16746 = flow.tensor.transfer %16745 : tensor<4x?x4096xf16>{%dim_14444} to #hal.device.promise<@__device_7>
    %16747 = torch_c.from_builtin_tensor %16746 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16748 = torch_c.to_builtin_tensor %16521 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14445 = arith.constant 1 : index
    %dim_14446 = tensor.dim %16748, %c1_14445 : tensor<4x?x4096xf16>
    %16749 = flow.tensor.transfer %16748 : tensor<4x?x4096xf16>{%dim_14446} to #hal.device.promise<@__device_7>
    %16750 = torch_c.from_builtin_tensor %16749 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16751 = torch_c.to_builtin_tensor %16527 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14447 = arith.constant 1 : index
    %dim_14448 = tensor.dim %16751, %c1_14447 : tensor<4x?x4096xf16>
    %16752 = flow.tensor.transfer %16751 : tensor<4x?x4096xf16>{%dim_14448} to #hal.device.promise<@__device_7>
    %16753 = torch_c.from_builtin_tensor %16752 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %16754 = torch_c.to_builtin_tensor %16533 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14449 = arith.constant 1 : index
    %dim_14450 = tensor.dim %16754, %c1_14449 : tensor<4x?x4096xf16>
    %16755 = flow.tensor.transfer %16754 : tensor<4x?x4096xf16>{%dim_14450} to #hal.device.promise<@__device_7>
    %16756 = torch_c.from_builtin_tensor %16755 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14451 = torch.constant.int 1
    %16757 = torch.aten.add.Tensor %16738, %16741, %int1_14451 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14452 = torch.constant.int 1
    %16758 = torch.aten.add.Tensor %16757, %16744, %int1_14452 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14453 = torch.constant.int 1
    %16759 = torch.aten.add.Tensor %16758, %16747, %int1_14453 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14454 = torch.constant.int 1
    %16760 = torch.aten.add.Tensor %16759, %16750, %int1_14454 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14455 = torch.constant.int 1
    %16761 = torch.aten.add.Tensor %16760, %16753, %int1_14455 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14456 = torch.constant.int 1
    %16762 = torch.aten.add.Tensor %16761, %16756, %int1_14456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14457 = torch.constant.int 1
    %16763 = torch.aten.add.Tensor %16762, %16539, %int1_14457 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14458 = torch.constant.int 1
    %16764 = torch.aten.add.Tensor %15423, %16567, %int1_14458 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14459 = torch.constant.int 1
    %16765 = torch.aten.add.Tensor %15424, %16595, %int1_14459 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14460 = torch.constant.int 1
    %16766 = torch.aten.add.Tensor %15425, %16623, %int1_14460 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14461 = torch.constant.int 1
    %16767 = torch.aten.add.Tensor %15426, %16651, %int1_14461 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14462 = torch.constant.int 1
    %16768 = torch.aten.add.Tensor %15427, %16679, %int1_14462 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14463 = torch.constant.int 1
    %16769 = torch.aten.add.Tensor %15428, %16707, %int1_14463 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14464 = torch.constant.int 1
    %16770 = torch.aten.add.Tensor %15429, %16735, %int1_14464 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14465 = torch.constant.int 1
    %16771 = torch.aten.add.Tensor %15430, %16763, %int1_14465 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_14466 = torch.constant.int 6
    %16772 = torch.prims.convert_element_type %16764, %int6_14466 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14467 = torch.constant.int 6
    %16773 = torch.prims.convert_element_type %16765, %int6_14467 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14468 = torch.constant.int 6
    %16774 = torch.prims.convert_element_type %16766, %int6_14468 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14469 = torch.constant.int 6
    %16775 = torch.prims.convert_element_type %16767, %int6_14469 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14470 = torch.constant.int 6
    %16776 = torch.prims.convert_element_type %16768, %int6_14470 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14471 = torch.constant.int 6
    %16777 = torch.prims.convert_element_type %16769, %int6_14471 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14472 = torch.constant.int 6
    %16778 = torch.prims.convert_element_type %16770, %int6_14472 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14473 = torch.constant.int 6
    %16779 = torch.prims.convert_element_type %16771, %int6_14473 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14474 = torch.constant.int 2
    %16780 = torch.aten.pow.Tensor_Scalar %16772, %int2_14474 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14475 = torch.constant.int 2
    %16781 = torch.aten.pow.Tensor_Scalar %16773, %int2_14475 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14476 = torch.constant.int 2
    %16782 = torch.aten.pow.Tensor_Scalar %16774, %int2_14476 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14477 = torch.constant.int 2
    %16783 = torch.aten.pow.Tensor_Scalar %16775, %int2_14477 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14478 = torch.constant.int 2
    %16784 = torch.aten.pow.Tensor_Scalar %16776, %int2_14478 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14479 = torch.constant.int 2
    %16785 = torch.aten.pow.Tensor_Scalar %16777, %int2_14479 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14480 = torch.constant.int 2
    %16786 = torch.aten.pow.Tensor_Scalar %16778, %int2_14480 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14481 = torch.constant.int 2
    %16787 = torch.aten.pow.Tensor_Scalar %16779, %int2_14481 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_14482 = torch.constant.int -1
    %16788 = torch.prim.ListConstruct %int-1_14482 : (!torch.int) -> !torch.list<int>
    %true_14483 = torch.constant.bool true
    %none_14484 = torch.constant.none
    %16789 = torch.aten.mean.dim %16780, %16788, %true_14483, %none_14484 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14485 = torch.constant.int -1
    %16790 = torch.prim.ListConstruct %int-1_14485 : (!torch.int) -> !torch.list<int>
    %true_14486 = torch.constant.bool true
    %none_14487 = torch.constant.none
    %16791 = torch.aten.mean.dim %16781, %16790, %true_14486, %none_14487 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14488 = torch.constant.int -1
    %16792 = torch.prim.ListConstruct %int-1_14488 : (!torch.int) -> !torch.list<int>
    %true_14489 = torch.constant.bool true
    %none_14490 = torch.constant.none
    %16793 = torch.aten.mean.dim %16782, %16792, %true_14489, %none_14490 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14491 = torch.constant.int -1
    %16794 = torch.prim.ListConstruct %int-1_14491 : (!torch.int) -> !torch.list<int>
    %true_14492 = torch.constant.bool true
    %none_14493 = torch.constant.none
    %16795 = torch.aten.mean.dim %16783, %16794, %true_14492, %none_14493 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14494 = torch.constant.int -1
    %16796 = torch.prim.ListConstruct %int-1_14494 : (!torch.int) -> !torch.list<int>
    %true_14495 = torch.constant.bool true
    %none_14496 = torch.constant.none
    %16797 = torch.aten.mean.dim %16784, %16796, %true_14495, %none_14496 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14497 = torch.constant.int -1
    %16798 = torch.prim.ListConstruct %int-1_14497 : (!torch.int) -> !torch.list<int>
    %true_14498 = torch.constant.bool true
    %none_14499 = torch.constant.none
    %16799 = torch.aten.mean.dim %16785, %16798, %true_14498, %none_14499 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14500 = torch.constant.int -1
    %16800 = torch.prim.ListConstruct %int-1_14500 : (!torch.int) -> !torch.list<int>
    %true_14501 = torch.constant.bool true
    %none_14502 = torch.constant.none
    %16801 = torch.aten.mean.dim %16786, %16800, %true_14501, %none_14502 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14503 = torch.constant.int -1
    %16802 = torch.prim.ListConstruct %int-1_14503 : (!torch.int) -> !torch.list<int>
    %true_14504 = torch.constant.bool true
    %none_14505 = torch.constant.none
    %16803 = torch.aten.mean.dim %16787, %16802, %true_14504, %none_14505 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14506 = torch.constant.float 9.9999997473787516E-6
    %int1_14507 = torch.constant.int 1
    %16804 = torch.aten.add.Scalar %16789, %float9.999990e-06_14506, %int1_14507 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14508 = torch.constant.float 9.9999997473787516E-6
    %int1_14509 = torch.constant.int 1
    %16805 = torch.aten.add.Scalar %16791, %float9.999990e-06_14508, %int1_14509 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14510 = torch.constant.float 9.9999997473787516E-6
    %int1_14511 = torch.constant.int 1
    %16806 = torch.aten.add.Scalar %16793, %float9.999990e-06_14510, %int1_14511 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14512 = torch.constant.float 9.9999997473787516E-6
    %int1_14513 = torch.constant.int 1
    %16807 = torch.aten.add.Scalar %16795, %float9.999990e-06_14512, %int1_14513 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14514 = torch.constant.float 9.9999997473787516E-6
    %int1_14515 = torch.constant.int 1
    %16808 = torch.aten.add.Scalar %16797, %float9.999990e-06_14514, %int1_14515 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14516 = torch.constant.float 9.9999997473787516E-6
    %int1_14517 = torch.constant.int 1
    %16809 = torch.aten.add.Scalar %16799, %float9.999990e-06_14516, %int1_14517 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14518 = torch.constant.float 9.9999997473787516E-6
    %int1_14519 = torch.constant.int 1
    %16810 = torch.aten.add.Scalar %16801, %float9.999990e-06_14518, %int1_14519 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14520 = torch.constant.float 9.9999997473787516E-6
    %int1_14521 = torch.constant.int 1
    %16811 = torch.aten.add.Scalar %16803, %float9.999990e-06_14520, %int1_14521 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16812 = torch.aten.rsqrt %16804 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16813 = torch.aten.rsqrt %16805 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16814 = torch.aten.rsqrt %16806 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16815 = torch.aten.rsqrt %16807 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16816 = torch.aten.rsqrt %16808 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16817 = torch.aten.rsqrt %16809 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16818 = torch.aten.rsqrt %16810 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16819 = torch.aten.rsqrt %16811 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %16819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %16820 = torch.aten.mul.Tensor %16772, %16812 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16821 = torch.aten.mul.Tensor %16773, %16813 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16822 = torch.aten.mul.Tensor %16774, %16814 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16823 = torch.aten.mul.Tensor %16775, %16815 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16824 = torch.aten.mul.Tensor %16776, %16816 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16825 = torch.aten.mul.Tensor %16777, %16817 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16826 = torch.aten.mul.Tensor %16778, %16818 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16827 = torch.aten.mul.Tensor %16779, %16819 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16828 = torch.aten.mul.Tensor %552, %16820 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16829 = torch.aten.mul.Tensor %553, %16821 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16830 = torch.aten.mul.Tensor %554, %16822 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16831 = torch.aten.mul.Tensor %555, %16823 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16832 = torch.aten.mul.Tensor %556, %16824 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16833 = torch.aten.mul.Tensor %557, %16825 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16834 = torch.aten.mul.Tensor %558, %16826 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %16835 = torch.aten.mul.Tensor %559, %16827 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %16835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_14522 = torch.constant.int 5
    %16836 = torch.prims.convert_element_type %16828, %int5_14522 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14523 = torch.constant.int 5
    %16837 = torch.prims.convert_element_type %16829, %int5_14523 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14524 = torch.constant.int 5
    %16838 = torch.prims.convert_element_type %16830, %int5_14524 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14525 = torch.constant.int 5
    %16839 = torch.prims.convert_element_type %16831, %int5_14525 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14526 = torch.constant.int 5
    %16840 = torch.prims.convert_element_type %16832, %int5_14526 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14527 = torch.constant.int 5
    %16841 = torch.prims.convert_element_type %16833, %int5_14527 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14528 = torch.constant.int 5
    %16842 = torch.prims.convert_element_type %16834, %int5_14528 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14529 = torch.constant.int 5
    %16843 = torch.prims.convert_element_type %16835, %int5_14529 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %16843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14530 = torch.constant.int 1
    %int0_14531 = torch.constant.int 0
    %16844 = torch.prim.ListConstruct %int1_14530, %int0_14531 : (!torch.int, !torch.int) -> !torch.list<int>
    %16845 = torch.aten.permute %560, %16844 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14532 = torch.constant.int 1
    %int0_14533 = torch.constant.int 0
    %16846 = torch.prim.ListConstruct %int1_14532, %int0_14533 : (!torch.int, !torch.int) -> !torch.list<int>
    %16847 = torch.aten.permute %561, %16846 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14534 = torch.constant.int 1
    %int0_14535 = torch.constant.int 0
    %16848 = torch.prim.ListConstruct %int1_14534, %int0_14535 : (!torch.int, !torch.int) -> !torch.list<int>
    %16849 = torch.aten.permute %562, %16848 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14536 = torch.constant.int 1
    %int0_14537 = torch.constant.int 0
    %16850 = torch.prim.ListConstruct %int1_14536, %int0_14537 : (!torch.int, !torch.int) -> !torch.list<int>
    %16851 = torch.aten.permute %563, %16850 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14538 = torch.constant.int 1
    %int0_14539 = torch.constant.int 0
    %16852 = torch.prim.ListConstruct %int1_14538, %int0_14539 : (!torch.int, !torch.int) -> !torch.list<int>
    %16853 = torch.aten.permute %564, %16852 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14540 = torch.constant.int 1
    %int0_14541 = torch.constant.int 0
    %16854 = torch.prim.ListConstruct %int1_14540, %int0_14541 : (!torch.int, !torch.int) -> !torch.list<int>
    %16855 = torch.aten.permute %565, %16854 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14542 = torch.constant.int 1
    %int0_14543 = torch.constant.int 0
    %16856 = torch.prim.ListConstruct %int1_14542, %int0_14543 : (!torch.int, !torch.int) -> !torch.list<int>
    %16857 = torch.aten.permute %566, %16856 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14544 = torch.constant.int 1
    %int0_14545 = torch.constant.int 0
    %16858 = torch.prim.ListConstruct %int1_14544, %int0_14545 : (!torch.int, !torch.int) -> !torch.list<int>
    %16859 = torch.aten.permute %567, %16858 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_14546 = torch.constant.int 4
    %16860 = torch.aten.mul.int %int4_14546, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14547 = torch.constant.int 4096
    %16861 = torch.prim.ListConstruct %16860, %int4096_14547 : (!torch.int, !torch.int) -> !torch.list<int>
    %16862 = torch.aten.view %16836, %16861 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16862, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16863 = torch.aten.mm %16862, %16845 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16863, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14548 = torch.constant.int 4
    %int1792_14549 = torch.constant.int 1792
    %16864 = torch.prim.ListConstruct %int4_14548, %2482, %int1792_14549 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16865 = torch.aten.view %16863, %16864 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14550 = torch.constant.int 4
    %16866 = torch.aten.mul.int %int4_14550, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14551 = torch.constant.int 4096
    %16867 = torch.prim.ListConstruct %16866, %int4096_14551 : (!torch.int, !torch.int) -> !torch.list<int>
    %16868 = torch.aten.view %16837, %16867 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16868, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16869 = torch.aten.mm %16868, %16847 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16869, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14552 = torch.constant.int 4
    %int1792_14553 = torch.constant.int 1792
    %16870 = torch.prim.ListConstruct %int4_14552, %2482, %int1792_14553 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16871 = torch.aten.view %16869, %16870 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14554 = torch.constant.int 4
    %16872 = torch.aten.mul.int %int4_14554, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14555 = torch.constant.int 4096
    %16873 = torch.prim.ListConstruct %16872, %int4096_14555 : (!torch.int, !torch.int) -> !torch.list<int>
    %16874 = torch.aten.view %16838, %16873 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16874, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16875 = torch.aten.mm %16874, %16849 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16875, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14556 = torch.constant.int 4
    %int1792_14557 = torch.constant.int 1792
    %16876 = torch.prim.ListConstruct %int4_14556, %2482, %int1792_14557 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16877 = torch.aten.view %16875, %16876 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14558 = torch.constant.int 4
    %16878 = torch.aten.mul.int %int4_14558, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14559 = torch.constant.int 4096
    %16879 = torch.prim.ListConstruct %16878, %int4096_14559 : (!torch.int, !torch.int) -> !torch.list<int>
    %16880 = torch.aten.view %16839, %16879 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16880, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16881 = torch.aten.mm %16880, %16851 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16881, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14560 = torch.constant.int 4
    %int1792_14561 = torch.constant.int 1792
    %16882 = torch.prim.ListConstruct %int4_14560, %2482, %int1792_14561 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16883 = torch.aten.view %16881, %16882 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14562 = torch.constant.int 4
    %16884 = torch.aten.mul.int %int4_14562, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14563 = torch.constant.int 4096
    %16885 = torch.prim.ListConstruct %16884, %int4096_14563 : (!torch.int, !torch.int) -> !torch.list<int>
    %16886 = torch.aten.view %16840, %16885 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16886, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16887 = torch.aten.mm %16886, %16853 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16887, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14564 = torch.constant.int 4
    %int1792_14565 = torch.constant.int 1792
    %16888 = torch.prim.ListConstruct %int4_14564, %2482, %int1792_14565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16889 = torch.aten.view %16887, %16888 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14566 = torch.constant.int 4
    %16890 = torch.aten.mul.int %int4_14566, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14567 = torch.constant.int 4096
    %16891 = torch.prim.ListConstruct %16890, %int4096_14567 : (!torch.int, !torch.int) -> !torch.list<int>
    %16892 = torch.aten.view %16841, %16891 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16892, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16893 = torch.aten.mm %16892, %16855 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16893, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14568 = torch.constant.int 4
    %int1792_14569 = torch.constant.int 1792
    %16894 = torch.prim.ListConstruct %int4_14568, %2482, %int1792_14569 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16895 = torch.aten.view %16893, %16894 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14570 = torch.constant.int 4
    %16896 = torch.aten.mul.int %int4_14570, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14571 = torch.constant.int 4096
    %16897 = torch.prim.ListConstruct %16896, %int4096_14571 : (!torch.int, !torch.int) -> !torch.list<int>
    %16898 = torch.aten.view %16842, %16897 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16898, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16899 = torch.aten.mm %16898, %16857 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16899, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14572 = torch.constant.int 4
    %int1792_14573 = torch.constant.int 1792
    %16900 = torch.prim.ListConstruct %int4_14572, %2482, %int1792_14573 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16901 = torch.aten.view %16899, %16900 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14574 = torch.constant.int 4
    %16902 = torch.aten.mul.int %int4_14574, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14575 = torch.constant.int 4096
    %16903 = torch.prim.ListConstruct %16902, %int4096_14575 : (!torch.int, !torch.int) -> !torch.list<int>
    %16904 = torch.aten.view %16843, %16903 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16904, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16905 = torch.aten.mm %16904, %16859 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16905, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14576 = torch.constant.int 4
    %int1792_14577 = torch.constant.int 1792
    %16906 = torch.prim.ListConstruct %int4_14576, %2482, %int1792_14577 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16907 = torch.aten.view %16905, %16906 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16908 = torch.aten.silu %16865 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16909 = torch.aten.silu %16871 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16910 = torch.aten.silu %16877 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16911 = torch.aten.silu %16883 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16912 = torch.aten.silu %16889 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16913 = torch.aten.silu %16895 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16914 = torch.aten.silu %16901 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16915 = torch.aten.silu %16907 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_14578 = torch.constant.int 1
    %int0_14579 = torch.constant.int 0
    %16916 = torch.prim.ListConstruct %int1_14578, %int0_14579 : (!torch.int, !torch.int) -> !torch.list<int>
    %16917 = torch.aten.permute %568, %16916 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14580 = torch.constant.int 1
    %int0_14581 = torch.constant.int 0
    %16918 = torch.prim.ListConstruct %int1_14580, %int0_14581 : (!torch.int, !torch.int) -> !torch.list<int>
    %16919 = torch.aten.permute %569, %16918 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14582 = torch.constant.int 1
    %int0_14583 = torch.constant.int 0
    %16920 = torch.prim.ListConstruct %int1_14582, %int0_14583 : (!torch.int, !torch.int) -> !torch.list<int>
    %16921 = torch.aten.permute %570, %16920 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14584 = torch.constant.int 1
    %int0_14585 = torch.constant.int 0
    %16922 = torch.prim.ListConstruct %int1_14584, %int0_14585 : (!torch.int, !torch.int) -> !torch.list<int>
    %16923 = torch.aten.permute %571, %16922 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14586 = torch.constant.int 1
    %int0_14587 = torch.constant.int 0
    %16924 = torch.prim.ListConstruct %int1_14586, %int0_14587 : (!torch.int, !torch.int) -> !torch.list<int>
    %16925 = torch.aten.permute %572, %16924 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14588 = torch.constant.int 1
    %int0_14589 = torch.constant.int 0
    %16926 = torch.prim.ListConstruct %int1_14588, %int0_14589 : (!torch.int, !torch.int) -> !torch.list<int>
    %16927 = torch.aten.permute %573, %16926 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14590 = torch.constant.int 1
    %int0_14591 = torch.constant.int 0
    %16928 = torch.prim.ListConstruct %int1_14590, %int0_14591 : (!torch.int, !torch.int) -> !torch.list<int>
    %16929 = torch.aten.permute %574, %16928 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_14592 = torch.constant.int 1
    %int0_14593 = torch.constant.int 0
    %16930 = torch.prim.ListConstruct %int1_14592, %int0_14593 : (!torch.int, !torch.int) -> !torch.list<int>
    %16931 = torch.aten.permute %575, %16930 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_14594 = torch.constant.int 4
    %16932 = torch.aten.mul.int %int4_14594, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14595 = torch.constant.int 4096
    %16933 = torch.prim.ListConstruct %16932, %int4096_14595 : (!torch.int, !torch.int) -> !torch.list<int>
    %16934 = torch.aten.view %16836, %16933 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16934, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16935 = torch.aten.mm %16934, %16917 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16935, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14596 = torch.constant.int 4
    %int1792_14597 = torch.constant.int 1792
    %16936 = torch.prim.ListConstruct %int4_14596, %2482, %int1792_14597 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16937 = torch.aten.view %16935, %16936 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14598 = torch.constant.int 4
    %16938 = torch.aten.mul.int %int4_14598, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14599 = torch.constant.int 4096
    %16939 = torch.prim.ListConstruct %16938, %int4096_14599 : (!torch.int, !torch.int) -> !torch.list<int>
    %16940 = torch.aten.view %16837, %16939 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16940, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16941 = torch.aten.mm %16940, %16919 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16941, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14600 = torch.constant.int 4
    %int1792_14601 = torch.constant.int 1792
    %16942 = torch.prim.ListConstruct %int4_14600, %2482, %int1792_14601 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16943 = torch.aten.view %16941, %16942 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14602 = torch.constant.int 4
    %16944 = torch.aten.mul.int %int4_14602, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14603 = torch.constant.int 4096
    %16945 = torch.prim.ListConstruct %16944, %int4096_14603 : (!torch.int, !torch.int) -> !torch.list<int>
    %16946 = torch.aten.view %16838, %16945 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16946, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16947 = torch.aten.mm %16946, %16921 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16947, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14604 = torch.constant.int 4
    %int1792_14605 = torch.constant.int 1792
    %16948 = torch.prim.ListConstruct %int4_14604, %2482, %int1792_14605 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16949 = torch.aten.view %16947, %16948 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14606 = torch.constant.int 4
    %16950 = torch.aten.mul.int %int4_14606, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14607 = torch.constant.int 4096
    %16951 = torch.prim.ListConstruct %16950, %int4096_14607 : (!torch.int, !torch.int) -> !torch.list<int>
    %16952 = torch.aten.view %16839, %16951 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16952, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16953 = torch.aten.mm %16952, %16923 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16953, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14608 = torch.constant.int 4
    %int1792_14609 = torch.constant.int 1792
    %16954 = torch.prim.ListConstruct %int4_14608, %2482, %int1792_14609 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16955 = torch.aten.view %16953, %16954 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14610 = torch.constant.int 4
    %16956 = torch.aten.mul.int %int4_14610, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14611 = torch.constant.int 4096
    %16957 = torch.prim.ListConstruct %16956, %int4096_14611 : (!torch.int, !torch.int) -> !torch.list<int>
    %16958 = torch.aten.view %16840, %16957 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16958, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16959 = torch.aten.mm %16958, %16925 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16959, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14612 = torch.constant.int 4
    %int1792_14613 = torch.constant.int 1792
    %16960 = torch.prim.ListConstruct %int4_14612, %2482, %int1792_14613 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16961 = torch.aten.view %16959, %16960 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14614 = torch.constant.int 4
    %16962 = torch.aten.mul.int %int4_14614, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14615 = torch.constant.int 4096
    %16963 = torch.prim.ListConstruct %16962, %int4096_14615 : (!torch.int, !torch.int) -> !torch.list<int>
    %16964 = torch.aten.view %16841, %16963 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16964, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16965 = torch.aten.mm %16964, %16927 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16965, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14616 = torch.constant.int 4
    %int1792_14617 = torch.constant.int 1792
    %16966 = torch.prim.ListConstruct %int4_14616, %2482, %int1792_14617 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16967 = torch.aten.view %16965, %16966 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14618 = torch.constant.int 4
    %16968 = torch.aten.mul.int %int4_14618, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14619 = torch.constant.int 4096
    %16969 = torch.prim.ListConstruct %16968, %int4096_14619 : (!torch.int, !torch.int) -> !torch.list<int>
    %16970 = torch.aten.view %16842, %16969 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16970, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16971 = torch.aten.mm %16970, %16929 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16971, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14620 = torch.constant.int 4
    %int1792_14621 = torch.constant.int 1792
    %16972 = torch.prim.ListConstruct %int4_14620, %2482, %int1792_14621 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16973 = torch.aten.view %16971, %16972 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_14622 = torch.constant.int 4
    %16974 = torch.aten.mul.int %int4_14622, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14623 = torch.constant.int 4096
    %16975 = torch.prim.ListConstruct %16974, %int4096_14623 : (!torch.int, !torch.int) -> !torch.list<int>
    %16976 = torch.aten.view %16843, %16975 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %16976, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %16977 = torch.aten.mm %16976, %16931 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %16977, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_14624 = torch.constant.int 4
    %int1792_14625 = torch.constant.int 1792
    %16978 = torch.prim.ListConstruct %int4_14624, %2482, %int1792_14625 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %16979 = torch.aten.view %16977, %16978 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16980 = torch.aten.mul.Tensor %16908, %16937 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16981 = torch.aten.mul.Tensor %16909, %16943 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16982 = torch.aten.mul.Tensor %16910, %16949 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16983 = torch.aten.mul.Tensor %16911, %16955 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16984 = torch.aten.mul.Tensor %16912, %16961 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16985 = torch.aten.mul.Tensor %16913, %16967 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16986 = torch.aten.mul.Tensor %16914, %16973 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %16987 = torch.aten.mul.Tensor %16915, %16979 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %16987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_14626 = torch.constant.int 1
    %int0_14627 = torch.constant.int 0
    %16988 = torch.prim.ListConstruct %int1_14626, %int0_14627 : (!torch.int, !torch.int) -> !torch.list<int>
    %16989 = torch.aten.permute %576, %16988 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14628 = torch.constant.int 1
    %int0_14629 = torch.constant.int 0
    %16990 = torch.prim.ListConstruct %int1_14628, %int0_14629 : (!torch.int, !torch.int) -> !torch.list<int>
    %16991 = torch.aten.permute %577, %16990 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14630 = torch.constant.int 1
    %int0_14631 = torch.constant.int 0
    %16992 = torch.prim.ListConstruct %int1_14630, %int0_14631 : (!torch.int, !torch.int) -> !torch.list<int>
    %16993 = torch.aten.permute %578, %16992 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14632 = torch.constant.int 1
    %int0_14633 = torch.constant.int 0
    %16994 = torch.prim.ListConstruct %int1_14632, %int0_14633 : (!torch.int, !torch.int) -> !torch.list<int>
    %16995 = torch.aten.permute %579, %16994 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14634 = torch.constant.int 1
    %int0_14635 = torch.constant.int 0
    %16996 = torch.prim.ListConstruct %int1_14634, %int0_14635 : (!torch.int, !torch.int) -> !torch.list<int>
    %16997 = torch.aten.permute %580, %16996 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14636 = torch.constant.int 1
    %int0_14637 = torch.constant.int 0
    %16998 = torch.prim.ListConstruct %int1_14636, %int0_14637 : (!torch.int, !torch.int) -> !torch.list<int>
    %16999 = torch.aten.permute %581, %16998 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14638 = torch.constant.int 1
    %int0_14639 = torch.constant.int 0
    %17000 = torch.prim.ListConstruct %int1_14638, %int0_14639 : (!torch.int, !torch.int) -> !torch.list<int>
    %17001 = torch.aten.permute %582, %17000 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14640 = torch.constant.int 1
    %int0_14641 = torch.constant.int 0
    %17002 = torch.prim.ListConstruct %int1_14640, %int0_14641 : (!torch.int, !torch.int) -> !torch.list<int>
    %17003 = torch.aten.permute %583, %17002 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_14642 = torch.constant.int 1
    %17004 = torch.aten.size.int %16865, %int1_14642 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14643 = torch.constant.int 4
    %17005 = torch.aten.mul.int %int4_14643, %17004 : !torch.int, !torch.int -> !torch.int
    %int1792_14644 = torch.constant.int 1792
    %17006 = torch.prim.ListConstruct %17005, %int1792_14644 : (!torch.int, !torch.int) -> !torch.list<int>
    %17007 = torch.aten.view %16980, %17006 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17007, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17008 = torch.aten.mm %17007, %16989 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17008, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14645 = torch.constant.int 4
    %int4096_14646 = torch.constant.int 4096
    %17009 = torch.prim.ListConstruct %int4_14645, %17004, %int4096_14646 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17010 = torch.aten.view %17008, %17009 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14647 = torch.constant.int 1
    %17011 = torch.aten.size.int %16871, %int1_14647 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14648 = torch.constant.int 4
    %17012 = torch.aten.mul.int %int4_14648, %17011 : !torch.int, !torch.int -> !torch.int
    %int1792_14649 = torch.constant.int 1792
    %17013 = torch.prim.ListConstruct %17012, %int1792_14649 : (!torch.int, !torch.int) -> !torch.list<int>
    %17014 = torch.aten.view %16981, %17013 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17014, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17015 = torch.aten.mm %17014, %16991 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17015, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14650 = torch.constant.int 4
    %int4096_14651 = torch.constant.int 4096
    %17016 = torch.prim.ListConstruct %int4_14650, %17011, %int4096_14651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17017 = torch.aten.view %17015, %17016 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14652 = torch.constant.int 1
    %17018 = torch.aten.size.int %16877, %int1_14652 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14653 = torch.constant.int 4
    %17019 = torch.aten.mul.int %int4_14653, %17018 : !torch.int, !torch.int -> !torch.int
    %int1792_14654 = torch.constant.int 1792
    %17020 = torch.prim.ListConstruct %17019, %int1792_14654 : (!torch.int, !torch.int) -> !torch.list<int>
    %17021 = torch.aten.view %16982, %17020 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17021, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17022 = torch.aten.mm %17021, %16993 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17022, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14655 = torch.constant.int 4
    %int4096_14656 = torch.constant.int 4096
    %17023 = torch.prim.ListConstruct %int4_14655, %17018, %int4096_14656 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17024 = torch.aten.view %17022, %17023 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14657 = torch.constant.int 1
    %17025 = torch.aten.size.int %16883, %int1_14657 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14658 = torch.constant.int 4
    %17026 = torch.aten.mul.int %int4_14658, %17025 : !torch.int, !torch.int -> !torch.int
    %int1792_14659 = torch.constant.int 1792
    %17027 = torch.prim.ListConstruct %17026, %int1792_14659 : (!torch.int, !torch.int) -> !torch.list<int>
    %17028 = torch.aten.view %16983, %17027 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17028, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17029 = torch.aten.mm %17028, %16995 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17029, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14660 = torch.constant.int 4
    %int4096_14661 = torch.constant.int 4096
    %17030 = torch.prim.ListConstruct %int4_14660, %17025, %int4096_14661 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17031 = torch.aten.view %17029, %17030 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14662 = torch.constant.int 1
    %17032 = torch.aten.size.int %16889, %int1_14662 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14663 = torch.constant.int 4
    %17033 = torch.aten.mul.int %int4_14663, %17032 : !torch.int, !torch.int -> !torch.int
    %int1792_14664 = torch.constant.int 1792
    %17034 = torch.prim.ListConstruct %17033, %int1792_14664 : (!torch.int, !torch.int) -> !torch.list<int>
    %17035 = torch.aten.view %16984, %17034 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17035, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17036 = torch.aten.mm %17035, %16997 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17036, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14665 = torch.constant.int 4
    %int4096_14666 = torch.constant.int 4096
    %17037 = torch.prim.ListConstruct %int4_14665, %17032, %int4096_14666 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17038 = torch.aten.view %17036, %17037 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14667 = torch.constant.int 1
    %17039 = torch.aten.size.int %16895, %int1_14667 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14668 = torch.constant.int 4
    %17040 = torch.aten.mul.int %int4_14668, %17039 : !torch.int, !torch.int -> !torch.int
    %int1792_14669 = torch.constant.int 1792
    %17041 = torch.prim.ListConstruct %17040, %int1792_14669 : (!torch.int, !torch.int) -> !torch.list<int>
    %17042 = torch.aten.view %16985, %17041 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17042, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17043 = torch.aten.mm %17042, %16999 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17043, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14670 = torch.constant.int 4
    %int4096_14671 = torch.constant.int 4096
    %17044 = torch.prim.ListConstruct %int4_14670, %17039, %int4096_14671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17045 = torch.aten.view %17043, %17044 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14672 = torch.constant.int 1
    %17046 = torch.aten.size.int %16901, %int1_14672 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14673 = torch.constant.int 4
    %17047 = torch.aten.mul.int %int4_14673, %17046 : !torch.int, !torch.int -> !torch.int
    %int1792_14674 = torch.constant.int 1792
    %17048 = torch.prim.ListConstruct %17047, %int1792_14674 : (!torch.int, !torch.int) -> !torch.list<int>
    %17049 = torch.aten.view %16986, %17048 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17049, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17050 = torch.aten.mm %17049, %17001 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17050, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14675 = torch.constant.int 4
    %int4096_14676 = torch.constant.int 4096
    %17051 = torch.prim.ListConstruct %int4_14675, %17046, %int4096_14676 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17052 = torch.aten.view %17050, %17051 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14677 = torch.constant.int 1
    %17053 = torch.aten.size.int %16907, %int1_14677 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_14678 = torch.constant.int 4
    %17054 = torch.aten.mul.int %int4_14678, %17053 : !torch.int, !torch.int -> !torch.int
    %int1792_14679 = torch.constant.int 1792
    %17055 = torch.prim.ListConstruct %17054, %int1792_14679 : (!torch.int, !torch.int) -> !torch.list<int>
    %17056 = torch.aten.view %16987, %17055 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %17056, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %17057 = torch.aten.mm %17056, %17003 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17057, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_14680 = torch.constant.int 4
    %int4096_14681 = torch.constant.int 4096
    %17058 = torch.prim.ListConstruct %int4_14680, %17053, %int4096_14681 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17059 = torch.aten.view %17057, %17058 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17060 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14682 = arith.constant 1 : index
    %dim_14683 = tensor.dim %17060, %c1_14682 : tensor<4x?x4096xf16>
    %17061 = flow.tensor.transfer %17060 : tensor<4x?x4096xf16>{%dim_14683} to #hal.device.promise<@__device_0>
    %17062 = torch_c.from_builtin_tensor %17061 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17063 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14684 = arith.constant 1 : index
    %dim_14685 = tensor.dim %17063, %c1_14684 : tensor<4x?x4096xf16>
    %17064 = flow.tensor.transfer %17063 : tensor<4x?x4096xf16>{%dim_14685} to #hal.device.promise<@__device_0>
    %17065 = torch_c.from_builtin_tensor %17064 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17066 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14686 = arith.constant 1 : index
    %dim_14687 = tensor.dim %17066, %c1_14686 : tensor<4x?x4096xf16>
    %17067 = flow.tensor.transfer %17066 : tensor<4x?x4096xf16>{%dim_14687} to #hal.device.promise<@__device_0>
    %17068 = torch_c.from_builtin_tensor %17067 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17069 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14688 = arith.constant 1 : index
    %dim_14689 = tensor.dim %17069, %c1_14688 : tensor<4x?x4096xf16>
    %17070 = flow.tensor.transfer %17069 : tensor<4x?x4096xf16>{%dim_14689} to #hal.device.promise<@__device_0>
    %17071 = torch_c.from_builtin_tensor %17070 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17072 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14690 = arith.constant 1 : index
    %dim_14691 = tensor.dim %17072, %c1_14690 : tensor<4x?x4096xf16>
    %17073 = flow.tensor.transfer %17072 : tensor<4x?x4096xf16>{%dim_14691} to #hal.device.promise<@__device_0>
    %17074 = torch_c.from_builtin_tensor %17073 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17075 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14692 = arith.constant 1 : index
    %dim_14693 = tensor.dim %17075, %c1_14692 : tensor<4x?x4096xf16>
    %17076 = flow.tensor.transfer %17075 : tensor<4x?x4096xf16>{%dim_14693} to #hal.device.promise<@__device_0>
    %17077 = torch_c.from_builtin_tensor %17076 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17078 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14694 = arith.constant 1 : index
    %dim_14695 = tensor.dim %17078, %c1_14694 : tensor<4x?x4096xf16>
    %17079 = flow.tensor.transfer %17078 : tensor<4x?x4096xf16>{%dim_14695} to #hal.device.promise<@__device_0>
    %17080 = torch_c.from_builtin_tensor %17079 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14696 = torch.constant.int 1
    %17081 = torch.aten.add.Tensor %17010, %17062, %int1_14696 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14697 = torch.constant.int 1
    %17082 = torch.aten.add.Tensor %17081, %17065, %int1_14697 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14698 = torch.constant.int 1
    %17083 = torch.aten.add.Tensor %17082, %17068, %int1_14698 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14699 = torch.constant.int 1
    %17084 = torch.aten.add.Tensor %17083, %17071, %int1_14699 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14700 = torch.constant.int 1
    %17085 = torch.aten.add.Tensor %17084, %17074, %int1_14700 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14701 = torch.constant.int 1
    %17086 = torch.aten.add.Tensor %17085, %17077, %int1_14701 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14702 = torch.constant.int 1
    %17087 = torch.aten.add.Tensor %17086, %17080, %int1_14702 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17088 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14703 = arith.constant 1 : index
    %dim_14704 = tensor.dim %17088, %c1_14703 : tensor<4x?x4096xf16>
    %17089 = flow.tensor.transfer %17088 : tensor<4x?x4096xf16>{%dim_14704} to #hal.device.promise<@__device_1>
    %17090 = torch_c.from_builtin_tensor %17089 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17091 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14705 = arith.constant 1 : index
    %dim_14706 = tensor.dim %17091, %c1_14705 : tensor<4x?x4096xf16>
    %17092 = flow.tensor.transfer %17091 : tensor<4x?x4096xf16>{%dim_14706} to #hal.device.promise<@__device_1>
    %17093 = torch_c.from_builtin_tensor %17092 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17094 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14707 = arith.constant 1 : index
    %dim_14708 = tensor.dim %17094, %c1_14707 : tensor<4x?x4096xf16>
    %17095 = flow.tensor.transfer %17094 : tensor<4x?x4096xf16>{%dim_14708} to #hal.device.promise<@__device_1>
    %17096 = torch_c.from_builtin_tensor %17095 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17097 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14709 = arith.constant 1 : index
    %dim_14710 = tensor.dim %17097, %c1_14709 : tensor<4x?x4096xf16>
    %17098 = flow.tensor.transfer %17097 : tensor<4x?x4096xf16>{%dim_14710} to #hal.device.promise<@__device_1>
    %17099 = torch_c.from_builtin_tensor %17098 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17100 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14711 = arith.constant 1 : index
    %dim_14712 = tensor.dim %17100, %c1_14711 : tensor<4x?x4096xf16>
    %17101 = flow.tensor.transfer %17100 : tensor<4x?x4096xf16>{%dim_14712} to #hal.device.promise<@__device_1>
    %17102 = torch_c.from_builtin_tensor %17101 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17103 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14713 = arith.constant 1 : index
    %dim_14714 = tensor.dim %17103, %c1_14713 : tensor<4x?x4096xf16>
    %17104 = flow.tensor.transfer %17103 : tensor<4x?x4096xf16>{%dim_14714} to #hal.device.promise<@__device_1>
    %17105 = torch_c.from_builtin_tensor %17104 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17106 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14715 = arith.constant 1 : index
    %dim_14716 = tensor.dim %17106, %c1_14715 : tensor<4x?x4096xf16>
    %17107 = flow.tensor.transfer %17106 : tensor<4x?x4096xf16>{%dim_14716} to #hal.device.promise<@__device_1>
    %17108 = torch_c.from_builtin_tensor %17107 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14717 = torch.constant.int 1
    %17109 = torch.aten.add.Tensor %17090, %17017, %int1_14717 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14718 = torch.constant.int 1
    %17110 = torch.aten.add.Tensor %17109, %17093, %int1_14718 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14719 = torch.constant.int 1
    %17111 = torch.aten.add.Tensor %17110, %17096, %int1_14719 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14720 = torch.constant.int 1
    %17112 = torch.aten.add.Tensor %17111, %17099, %int1_14720 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14721 = torch.constant.int 1
    %17113 = torch.aten.add.Tensor %17112, %17102, %int1_14721 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14722 = torch.constant.int 1
    %17114 = torch.aten.add.Tensor %17113, %17105, %int1_14722 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14723 = torch.constant.int 1
    %17115 = torch.aten.add.Tensor %17114, %17108, %int1_14723 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17116 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14724 = arith.constant 1 : index
    %dim_14725 = tensor.dim %17116, %c1_14724 : tensor<4x?x4096xf16>
    %17117 = flow.tensor.transfer %17116 : tensor<4x?x4096xf16>{%dim_14725} to #hal.device.promise<@__device_2>
    %17118 = torch_c.from_builtin_tensor %17117 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17119 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14726 = arith.constant 1 : index
    %dim_14727 = tensor.dim %17119, %c1_14726 : tensor<4x?x4096xf16>
    %17120 = flow.tensor.transfer %17119 : tensor<4x?x4096xf16>{%dim_14727} to #hal.device.promise<@__device_2>
    %17121 = torch_c.from_builtin_tensor %17120 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17122 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14728 = arith.constant 1 : index
    %dim_14729 = tensor.dim %17122, %c1_14728 : tensor<4x?x4096xf16>
    %17123 = flow.tensor.transfer %17122 : tensor<4x?x4096xf16>{%dim_14729} to #hal.device.promise<@__device_2>
    %17124 = torch_c.from_builtin_tensor %17123 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17125 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14730 = arith.constant 1 : index
    %dim_14731 = tensor.dim %17125, %c1_14730 : tensor<4x?x4096xf16>
    %17126 = flow.tensor.transfer %17125 : tensor<4x?x4096xf16>{%dim_14731} to #hal.device.promise<@__device_2>
    %17127 = torch_c.from_builtin_tensor %17126 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17128 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14732 = arith.constant 1 : index
    %dim_14733 = tensor.dim %17128, %c1_14732 : tensor<4x?x4096xf16>
    %17129 = flow.tensor.transfer %17128 : tensor<4x?x4096xf16>{%dim_14733} to #hal.device.promise<@__device_2>
    %17130 = torch_c.from_builtin_tensor %17129 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17131 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14734 = arith.constant 1 : index
    %dim_14735 = tensor.dim %17131, %c1_14734 : tensor<4x?x4096xf16>
    %17132 = flow.tensor.transfer %17131 : tensor<4x?x4096xf16>{%dim_14735} to #hal.device.promise<@__device_2>
    %17133 = torch_c.from_builtin_tensor %17132 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17134 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14736 = arith.constant 1 : index
    %dim_14737 = tensor.dim %17134, %c1_14736 : tensor<4x?x4096xf16>
    %17135 = flow.tensor.transfer %17134 : tensor<4x?x4096xf16>{%dim_14737} to #hal.device.promise<@__device_2>
    %17136 = torch_c.from_builtin_tensor %17135 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14738 = torch.constant.int 1
    %17137 = torch.aten.add.Tensor %17118, %17121, %int1_14738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14739 = torch.constant.int 1
    %17138 = torch.aten.add.Tensor %17137, %17024, %int1_14739 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14740 = torch.constant.int 1
    %17139 = torch.aten.add.Tensor %17138, %17124, %int1_14740 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14741 = torch.constant.int 1
    %17140 = torch.aten.add.Tensor %17139, %17127, %int1_14741 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14742 = torch.constant.int 1
    %17141 = torch.aten.add.Tensor %17140, %17130, %int1_14742 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14743 = torch.constant.int 1
    %17142 = torch.aten.add.Tensor %17141, %17133, %int1_14743 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14744 = torch.constant.int 1
    %17143 = torch.aten.add.Tensor %17142, %17136, %int1_14744 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17144 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14745 = arith.constant 1 : index
    %dim_14746 = tensor.dim %17144, %c1_14745 : tensor<4x?x4096xf16>
    %17145 = flow.tensor.transfer %17144 : tensor<4x?x4096xf16>{%dim_14746} to #hal.device.promise<@__device_3>
    %17146 = torch_c.from_builtin_tensor %17145 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17147 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14747 = arith.constant 1 : index
    %dim_14748 = tensor.dim %17147, %c1_14747 : tensor<4x?x4096xf16>
    %17148 = flow.tensor.transfer %17147 : tensor<4x?x4096xf16>{%dim_14748} to #hal.device.promise<@__device_3>
    %17149 = torch_c.from_builtin_tensor %17148 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17150 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14749 = arith.constant 1 : index
    %dim_14750 = tensor.dim %17150, %c1_14749 : tensor<4x?x4096xf16>
    %17151 = flow.tensor.transfer %17150 : tensor<4x?x4096xf16>{%dim_14750} to #hal.device.promise<@__device_3>
    %17152 = torch_c.from_builtin_tensor %17151 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17153 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14751 = arith.constant 1 : index
    %dim_14752 = tensor.dim %17153, %c1_14751 : tensor<4x?x4096xf16>
    %17154 = flow.tensor.transfer %17153 : tensor<4x?x4096xf16>{%dim_14752} to #hal.device.promise<@__device_3>
    %17155 = torch_c.from_builtin_tensor %17154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17156 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14753 = arith.constant 1 : index
    %dim_14754 = tensor.dim %17156, %c1_14753 : tensor<4x?x4096xf16>
    %17157 = flow.tensor.transfer %17156 : tensor<4x?x4096xf16>{%dim_14754} to #hal.device.promise<@__device_3>
    %17158 = torch_c.from_builtin_tensor %17157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17159 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14755 = arith.constant 1 : index
    %dim_14756 = tensor.dim %17159, %c1_14755 : tensor<4x?x4096xf16>
    %17160 = flow.tensor.transfer %17159 : tensor<4x?x4096xf16>{%dim_14756} to #hal.device.promise<@__device_3>
    %17161 = torch_c.from_builtin_tensor %17160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17162 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14757 = arith.constant 1 : index
    %dim_14758 = tensor.dim %17162, %c1_14757 : tensor<4x?x4096xf16>
    %17163 = flow.tensor.transfer %17162 : tensor<4x?x4096xf16>{%dim_14758} to #hal.device.promise<@__device_3>
    %17164 = torch_c.from_builtin_tensor %17163 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14759 = torch.constant.int 1
    %17165 = torch.aten.add.Tensor %17146, %17149, %int1_14759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14760 = torch.constant.int 1
    %17166 = torch.aten.add.Tensor %17165, %17152, %int1_14760 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14761 = torch.constant.int 1
    %17167 = torch.aten.add.Tensor %17166, %17031, %int1_14761 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14762 = torch.constant.int 1
    %17168 = torch.aten.add.Tensor %17167, %17155, %int1_14762 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14763 = torch.constant.int 1
    %17169 = torch.aten.add.Tensor %17168, %17158, %int1_14763 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14764 = torch.constant.int 1
    %17170 = torch.aten.add.Tensor %17169, %17161, %int1_14764 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14765 = torch.constant.int 1
    %17171 = torch.aten.add.Tensor %17170, %17164, %int1_14765 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17172 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14766 = arith.constant 1 : index
    %dim_14767 = tensor.dim %17172, %c1_14766 : tensor<4x?x4096xf16>
    %17173 = flow.tensor.transfer %17172 : tensor<4x?x4096xf16>{%dim_14767} to #hal.device.promise<@__device_4>
    %17174 = torch_c.from_builtin_tensor %17173 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17175 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14768 = arith.constant 1 : index
    %dim_14769 = tensor.dim %17175, %c1_14768 : tensor<4x?x4096xf16>
    %17176 = flow.tensor.transfer %17175 : tensor<4x?x4096xf16>{%dim_14769} to #hal.device.promise<@__device_4>
    %17177 = torch_c.from_builtin_tensor %17176 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17178 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14770 = arith.constant 1 : index
    %dim_14771 = tensor.dim %17178, %c1_14770 : tensor<4x?x4096xf16>
    %17179 = flow.tensor.transfer %17178 : tensor<4x?x4096xf16>{%dim_14771} to #hal.device.promise<@__device_4>
    %17180 = torch_c.from_builtin_tensor %17179 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17181 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14772 = arith.constant 1 : index
    %dim_14773 = tensor.dim %17181, %c1_14772 : tensor<4x?x4096xf16>
    %17182 = flow.tensor.transfer %17181 : tensor<4x?x4096xf16>{%dim_14773} to #hal.device.promise<@__device_4>
    %17183 = torch_c.from_builtin_tensor %17182 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17184 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14774 = arith.constant 1 : index
    %dim_14775 = tensor.dim %17184, %c1_14774 : tensor<4x?x4096xf16>
    %17185 = flow.tensor.transfer %17184 : tensor<4x?x4096xf16>{%dim_14775} to #hal.device.promise<@__device_4>
    %17186 = torch_c.from_builtin_tensor %17185 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17187 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14776 = arith.constant 1 : index
    %dim_14777 = tensor.dim %17187, %c1_14776 : tensor<4x?x4096xf16>
    %17188 = flow.tensor.transfer %17187 : tensor<4x?x4096xf16>{%dim_14777} to #hal.device.promise<@__device_4>
    %17189 = torch_c.from_builtin_tensor %17188 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17190 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14778 = arith.constant 1 : index
    %dim_14779 = tensor.dim %17190, %c1_14778 : tensor<4x?x4096xf16>
    %17191 = flow.tensor.transfer %17190 : tensor<4x?x4096xf16>{%dim_14779} to #hal.device.promise<@__device_4>
    %17192 = torch_c.from_builtin_tensor %17191 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14780 = torch.constant.int 1
    %17193 = torch.aten.add.Tensor %17174, %17177, %int1_14780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14781 = torch.constant.int 1
    %17194 = torch.aten.add.Tensor %17193, %17180, %int1_14781 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14782 = torch.constant.int 1
    %17195 = torch.aten.add.Tensor %17194, %17183, %int1_14782 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14783 = torch.constant.int 1
    %17196 = torch.aten.add.Tensor %17195, %17038, %int1_14783 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14784 = torch.constant.int 1
    %17197 = torch.aten.add.Tensor %17196, %17186, %int1_14784 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14785 = torch.constant.int 1
    %17198 = torch.aten.add.Tensor %17197, %17189, %int1_14785 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14786 = torch.constant.int 1
    %17199 = torch.aten.add.Tensor %17198, %17192, %int1_14786 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17200 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14787 = arith.constant 1 : index
    %dim_14788 = tensor.dim %17200, %c1_14787 : tensor<4x?x4096xf16>
    %17201 = flow.tensor.transfer %17200 : tensor<4x?x4096xf16>{%dim_14788} to #hal.device.promise<@__device_5>
    %17202 = torch_c.from_builtin_tensor %17201 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17203 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14789 = arith.constant 1 : index
    %dim_14790 = tensor.dim %17203, %c1_14789 : tensor<4x?x4096xf16>
    %17204 = flow.tensor.transfer %17203 : tensor<4x?x4096xf16>{%dim_14790} to #hal.device.promise<@__device_5>
    %17205 = torch_c.from_builtin_tensor %17204 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17206 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14791 = arith.constant 1 : index
    %dim_14792 = tensor.dim %17206, %c1_14791 : tensor<4x?x4096xf16>
    %17207 = flow.tensor.transfer %17206 : tensor<4x?x4096xf16>{%dim_14792} to #hal.device.promise<@__device_5>
    %17208 = torch_c.from_builtin_tensor %17207 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17209 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14793 = arith.constant 1 : index
    %dim_14794 = tensor.dim %17209, %c1_14793 : tensor<4x?x4096xf16>
    %17210 = flow.tensor.transfer %17209 : tensor<4x?x4096xf16>{%dim_14794} to #hal.device.promise<@__device_5>
    %17211 = torch_c.from_builtin_tensor %17210 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17212 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14795 = arith.constant 1 : index
    %dim_14796 = tensor.dim %17212, %c1_14795 : tensor<4x?x4096xf16>
    %17213 = flow.tensor.transfer %17212 : tensor<4x?x4096xf16>{%dim_14796} to #hal.device.promise<@__device_5>
    %17214 = torch_c.from_builtin_tensor %17213 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17215 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14797 = arith.constant 1 : index
    %dim_14798 = tensor.dim %17215, %c1_14797 : tensor<4x?x4096xf16>
    %17216 = flow.tensor.transfer %17215 : tensor<4x?x4096xf16>{%dim_14798} to #hal.device.promise<@__device_5>
    %17217 = torch_c.from_builtin_tensor %17216 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17218 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14799 = arith.constant 1 : index
    %dim_14800 = tensor.dim %17218, %c1_14799 : tensor<4x?x4096xf16>
    %17219 = flow.tensor.transfer %17218 : tensor<4x?x4096xf16>{%dim_14800} to #hal.device.promise<@__device_5>
    %17220 = torch_c.from_builtin_tensor %17219 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14801 = torch.constant.int 1
    %17221 = torch.aten.add.Tensor %17202, %17205, %int1_14801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14802 = torch.constant.int 1
    %17222 = torch.aten.add.Tensor %17221, %17208, %int1_14802 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14803 = torch.constant.int 1
    %17223 = torch.aten.add.Tensor %17222, %17211, %int1_14803 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14804 = torch.constant.int 1
    %17224 = torch.aten.add.Tensor %17223, %17214, %int1_14804 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14805 = torch.constant.int 1
    %17225 = torch.aten.add.Tensor %17224, %17045, %int1_14805 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14806 = torch.constant.int 1
    %17226 = torch.aten.add.Tensor %17225, %17217, %int1_14806 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14807 = torch.constant.int 1
    %17227 = torch.aten.add.Tensor %17226, %17220, %int1_14807 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17228 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14808 = arith.constant 1 : index
    %dim_14809 = tensor.dim %17228, %c1_14808 : tensor<4x?x4096xf16>
    %17229 = flow.tensor.transfer %17228 : tensor<4x?x4096xf16>{%dim_14809} to #hal.device.promise<@__device_6>
    %17230 = torch_c.from_builtin_tensor %17229 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17231 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14810 = arith.constant 1 : index
    %dim_14811 = tensor.dim %17231, %c1_14810 : tensor<4x?x4096xf16>
    %17232 = flow.tensor.transfer %17231 : tensor<4x?x4096xf16>{%dim_14811} to #hal.device.promise<@__device_6>
    %17233 = torch_c.from_builtin_tensor %17232 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17234 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14812 = arith.constant 1 : index
    %dim_14813 = tensor.dim %17234, %c1_14812 : tensor<4x?x4096xf16>
    %17235 = flow.tensor.transfer %17234 : tensor<4x?x4096xf16>{%dim_14813} to #hal.device.promise<@__device_6>
    %17236 = torch_c.from_builtin_tensor %17235 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17237 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14814 = arith.constant 1 : index
    %dim_14815 = tensor.dim %17237, %c1_14814 : tensor<4x?x4096xf16>
    %17238 = flow.tensor.transfer %17237 : tensor<4x?x4096xf16>{%dim_14815} to #hal.device.promise<@__device_6>
    %17239 = torch_c.from_builtin_tensor %17238 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17240 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14816 = arith.constant 1 : index
    %dim_14817 = tensor.dim %17240, %c1_14816 : tensor<4x?x4096xf16>
    %17241 = flow.tensor.transfer %17240 : tensor<4x?x4096xf16>{%dim_14817} to #hal.device.promise<@__device_6>
    %17242 = torch_c.from_builtin_tensor %17241 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17243 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14818 = arith.constant 1 : index
    %dim_14819 = tensor.dim %17243, %c1_14818 : tensor<4x?x4096xf16>
    %17244 = flow.tensor.transfer %17243 : tensor<4x?x4096xf16>{%dim_14819} to #hal.device.promise<@__device_6>
    %17245 = torch_c.from_builtin_tensor %17244 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17246 = torch_c.to_builtin_tensor %17059 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14820 = arith.constant 1 : index
    %dim_14821 = tensor.dim %17246, %c1_14820 : tensor<4x?x4096xf16>
    %17247 = flow.tensor.transfer %17246 : tensor<4x?x4096xf16>{%dim_14821} to #hal.device.promise<@__device_6>
    %17248 = torch_c.from_builtin_tensor %17247 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14822 = torch.constant.int 1
    %17249 = torch.aten.add.Tensor %17230, %17233, %int1_14822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14823 = torch.constant.int 1
    %17250 = torch.aten.add.Tensor %17249, %17236, %int1_14823 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14824 = torch.constant.int 1
    %17251 = torch.aten.add.Tensor %17250, %17239, %int1_14824 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14825 = torch.constant.int 1
    %17252 = torch.aten.add.Tensor %17251, %17242, %int1_14825 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14826 = torch.constant.int 1
    %17253 = torch.aten.add.Tensor %17252, %17245, %int1_14826 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14827 = torch.constant.int 1
    %17254 = torch.aten.add.Tensor %17253, %17052, %int1_14827 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14828 = torch.constant.int 1
    %17255 = torch.aten.add.Tensor %17254, %17248, %int1_14828 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17256 = torch_c.to_builtin_tensor %17010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14829 = arith.constant 1 : index
    %dim_14830 = tensor.dim %17256, %c1_14829 : tensor<4x?x4096xf16>
    %17257 = flow.tensor.transfer %17256 : tensor<4x?x4096xf16>{%dim_14830} to #hal.device.promise<@__device_7>
    %17258 = torch_c.from_builtin_tensor %17257 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17259 = torch_c.to_builtin_tensor %17017 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14831 = arith.constant 1 : index
    %dim_14832 = tensor.dim %17259, %c1_14831 : tensor<4x?x4096xf16>
    %17260 = flow.tensor.transfer %17259 : tensor<4x?x4096xf16>{%dim_14832} to #hal.device.promise<@__device_7>
    %17261 = torch_c.from_builtin_tensor %17260 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17262 = torch_c.to_builtin_tensor %17024 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14833 = arith.constant 1 : index
    %dim_14834 = tensor.dim %17262, %c1_14833 : tensor<4x?x4096xf16>
    %17263 = flow.tensor.transfer %17262 : tensor<4x?x4096xf16>{%dim_14834} to #hal.device.promise<@__device_7>
    %17264 = torch_c.from_builtin_tensor %17263 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17265 = torch_c.to_builtin_tensor %17031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14835 = arith.constant 1 : index
    %dim_14836 = tensor.dim %17265, %c1_14835 : tensor<4x?x4096xf16>
    %17266 = flow.tensor.transfer %17265 : tensor<4x?x4096xf16>{%dim_14836} to #hal.device.promise<@__device_7>
    %17267 = torch_c.from_builtin_tensor %17266 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17268 = torch_c.to_builtin_tensor %17038 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14837 = arith.constant 1 : index
    %dim_14838 = tensor.dim %17268, %c1_14837 : tensor<4x?x4096xf16>
    %17269 = flow.tensor.transfer %17268 : tensor<4x?x4096xf16>{%dim_14838} to #hal.device.promise<@__device_7>
    %17270 = torch_c.from_builtin_tensor %17269 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17271 = torch_c.to_builtin_tensor %17045 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14839 = arith.constant 1 : index
    %dim_14840 = tensor.dim %17271, %c1_14839 : tensor<4x?x4096xf16>
    %17272 = flow.tensor.transfer %17271 : tensor<4x?x4096xf16>{%dim_14840} to #hal.device.promise<@__device_7>
    %17273 = torch_c.from_builtin_tensor %17272 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %17274 = torch_c.to_builtin_tensor %17052 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_14841 = arith.constant 1 : index
    %dim_14842 = tensor.dim %17274, %c1_14841 : tensor<4x?x4096xf16>
    %17275 = flow.tensor.transfer %17274 : tensor<4x?x4096xf16>{%dim_14842} to #hal.device.promise<@__device_7>
    %17276 = torch_c.from_builtin_tensor %17275 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14843 = torch.constant.int 1
    %17277 = torch.aten.add.Tensor %17258, %17261, %int1_14843 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14844 = torch.constant.int 1
    %17278 = torch.aten.add.Tensor %17277, %17264, %int1_14844 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14845 = torch.constant.int 1
    %17279 = torch.aten.add.Tensor %17278, %17267, %int1_14845 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14846 = torch.constant.int 1
    %17280 = torch.aten.add.Tensor %17279, %17270, %int1_14846 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14847 = torch.constant.int 1
    %17281 = torch.aten.add.Tensor %17280, %17273, %int1_14847 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14848 = torch.constant.int 1
    %17282 = torch.aten.add.Tensor %17281, %17276, %int1_14848 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14849 = torch.constant.int 1
    %17283 = torch.aten.add.Tensor %17282, %17059, %int1_14849 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14850 = torch.constant.int 1
    %17284 = torch.aten.add.Tensor %16764, %17087, %int1_14850 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14851 = torch.constant.int 1
    %17285 = torch.aten.add.Tensor %16765, %17115, %int1_14851 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14852 = torch.constant.int 1
    %17286 = torch.aten.add.Tensor %16766, %17143, %int1_14852 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14853 = torch.constant.int 1
    %17287 = torch.aten.add.Tensor %16767, %17171, %int1_14853 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14854 = torch.constant.int 1
    %17288 = torch.aten.add.Tensor %16768, %17199, %int1_14854 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14855 = torch.constant.int 1
    %17289 = torch.aten.add.Tensor %16769, %17227, %int1_14855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14856 = torch.constant.int 1
    %17290 = torch.aten.add.Tensor %16770, %17255, %int1_14856 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14857 = torch.constant.int 1
    %17291 = torch.aten.add.Tensor %16771, %17283, %int1_14857 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_14858 = torch.constant.int 6
    %17292 = torch.prims.convert_element_type %17284, %int6_14858 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14859 = torch.constant.int 6
    %17293 = torch.prims.convert_element_type %17285, %int6_14859 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14860 = torch.constant.int 6
    %17294 = torch.prims.convert_element_type %17286, %int6_14860 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14861 = torch.constant.int 6
    %17295 = torch.prims.convert_element_type %17287, %int6_14861 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14862 = torch.constant.int 6
    %17296 = torch.prims.convert_element_type %17288, %int6_14862 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14863 = torch.constant.int 6
    %17297 = torch.prims.convert_element_type %17289, %int6_14863 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14864 = torch.constant.int 6
    %17298 = torch.prims.convert_element_type %17290, %int6_14864 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_14865 = torch.constant.int 6
    %17299 = torch.prims.convert_element_type %17291, %int6_14865 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14866 = torch.constant.int 2
    %17300 = torch.aten.pow.Tensor_Scalar %17292, %int2_14866 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14867 = torch.constant.int 2
    %17301 = torch.aten.pow.Tensor_Scalar %17293, %int2_14867 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14868 = torch.constant.int 2
    %17302 = torch.aten.pow.Tensor_Scalar %17294, %int2_14868 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14869 = torch.constant.int 2
    %17303 = torch.aten.pow.Tensor_Scalar %17295, %int2_14869 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14870 = torch.constant.int 2
    %17304 = torch.aten.pow.Tensor_Scalar %17296, %int2_14870 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14871 = torch.constant.int 2
    %17305 = torch.aten.pow.Tensor_Scalar %17297, %int2_14871 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14872 = torch.constant.int 2
    %17306 = torch.aten.pow.Tensor_Scalar %17298, %int2_14872 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_14873 = torch.constant.int 2
    %17307 = torch.aten.pow.Tensor_Scalar %17299, %int2_14873 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_14874 = torch.constant.int -1
    %17308 = torch.prim.ListConstruct %int-1_14874 : (!torch.int) -> !torch.list<int>
    %true_14875 = torch.constant.bool true
    %none_14876 = torch.constant.none
    %17309 = torch.aten.mean.dim %17300, %17308, %true_14875, %none_14876 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14877 = torch.constant.int -1
    %17310 = torch.prim.ListConstruct %int-1_14877 : (!torch.int) -> !torch.list<int>
    %true_14878 = torch.constant.bool true
    %none_14879 = torch.constant.none
    %17311 = torch.aten.mean.dim %17301, %17310, %true_14878, %none_14879 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14880 = torch.constant.int -1
    %17312 = torch.prim.ListConstruct %int-1_14880 : (!torch.int) -> !torch.list<int>
    %true_14881 = torch.constant.bool true
    %none_14882 = torch.constant.none
    %17313 = torch.aten.mean.dim %17302, %17312, %true_14881, %none_14882 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14883 = torch.constant.int -1
    %17314 = torch.prim.ListConstruct %int-1_14883 : (!torch.int) -> !torch.list<int>
    %true_14884 = torch.constant.bool true
    %none_14885 = torch.constant.none
    %17315 = torch.aten.mean.dim %17303, %17314, %true_14884, %none_14885 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14886 = torch.constant.int -1
    %17316 = torch.prim.ListConstruct %int-1_14886 : (!torch.int) -> !torch.list<int>
    %true_14887 = torch.constant.bool true
    %none_14888 = torch.constant.none
    %17317 = torch.aten.mean.dim %17304, %17316, %true_14887, %none_14888 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14889 = torch.constant.int -1
    %17318 = torch.prim.ListConstruct %int-1_14889 : (!torch.int) -> !torch.list<int>
    %true_14890 = torch.constant.bool true
    %none_14891 = torch.constant.none
    %17319 = torch.aten.mean.dim %17305, %17318, %true_14890, %none_14891 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14892 = torch.constant.int -1
    %17320 = torch.prim.ListConstruct %int-1_14892 : (!torch.int) -> !torch.list<int>
    %true_14893 = torch.constant.bool true
    %none_14894 = torch.constant.none
    %17321 = torch.aten.mean.dim %17306, %17320, %true_14893, %none_14894 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_14895 = torch.constant.int -1
    %17322 = torch.prim.ListConstruct %int-1_14895 : (!torch.int) -> !torch.list<int>
    %true_14896 = torch.constant.bool true
    %none_14897 = torch.constant.none
    %17323 = torch.aten.mean.dim %17307, %17322, %true_14896, %none_14897 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14898 = torch.constant.float 9.9999997473787516E-6
    %int1_14899 = torch.constant.int 1
    %17324 = torch.aten.add.Scalar %17309, %float9.999990e-06_14898, %int1_14899 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14900 = torch.constant.float 9.9999997473787516E-6
    %int1_14901 = torch.constant.int 1
    %17325 = torch.aten.add.Scalar %17311, %float9.999990e-06_14900, %int1_14901 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14902 = torch.constant.float 9.9999997473787516E-6
    %int1_14903 = torch.constant.int 1
    %17326 = torch.aten.add.Scalar %17313, %float9.999990e-06_14902, %int1_14903 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14904 = torch.constant.float 9.9999997473787516E-6
    %int1_14905 = torch.constant.int 1
    %17327 = torch.aten.add.Scalar %17315, %float9.999990e-06_14904, %int1_14905 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14906 = torch.constant.float 9.9999997473787516E-6
    %int1_14907 = torch.constant.int 1
    %17328 = torch.aten.add.Scalar %17317, %float9.999990e-06_14906, %int1_14907 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14908 = torch.constant.float 9.9999997473787516E-6
    %int1_14909 = torch.constant.int 1
    %17329 = torch.aten.add.Scalar %17319, %float9.999990e-06_14908, %int1_14909 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14910 = torch.constant.float 9.9999997473787516E-6
    %int1_14911 = torch.constant.int 1
    %17330 = torch.aten.add.Scalar %17321, %float9.999990e-06_14910, %int1_14911 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_14912 = torch.constant.float 9.9999997473787516E-6
    %int1_14913 = torch.constant.int 1
    %17331 = torch.aten.add.Scalar %17323, %float9.999990e-06_14912, %int1_14913 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17332 = torch.aten.rsqrt %17324 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17333 = torch.aten.rsqrt %17325 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17334 = torch.aten.rsqrt %17326 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17335 = torch.aten.rsqrt %17327 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17336 = torch.aten.rsqrt %17328 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17337 = torch.aten.rsqrt %17329 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17338 = torch.aten.rsqrt %17330 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17339 = torch.aten.rsqrt %17331 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %17339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %17340 = torch.aten.mul.Tensor %17292, %17332 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17341 = torch.aten.mul.Tensor %17293, %17333 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17342 = torch.aten.mul.Tensor %17294, %17334 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17343 = torch.aten.mul.Tensor %17295, %17335 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17344 = torch.aten.mul.Tensor %17296, %17336 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17345 = torch.aten.mul.Tensor %17297, %17337 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17346 = torch.aten.mul.Tensor %17298, %17338 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17347 = torch.aten.mul.Tensor %17299, %17339 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17348 = torch.aten.mul.Tensor %584, %17340 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17349 = torch.aten.mul.Tensor %585, %17341 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17350 = torch.aten.mul.Tensor %586, %17342 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17351 = torch.aten.mul.Tensor %587, %17343 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17352 = torch.aten.mul.Tensor %588, %17344 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17353 = torch.aten.mul.Tensor %589, %17345 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17354 = torch.aten.mul.Tensor %590, %17346 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %17355 = torch.aten.mul.Tensor %591, %17347 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %17355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_14914 = torch.constant.int 5
    %17356 = torch.prims.convert_element_type %17348, %int5_14914 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14915 = torch.constant.int 5
    %17357 = torch.prims.convert_element_type %17349, %int5_14915 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14916 = torch.constant.int 5
    %17358 = torch.prims.convert_element_type %17350, %int5_14916 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14917 = torch.constant.int 5
    %17359 = torch.prims.convert_element_type %17351, %int5_14917 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14918 = torch.constant.int 5
    %17360 = torch.prims.convert_element_type %17352, %int5_14918 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14919 = torch.constant.int 5
    %17361 = torch.prims.convert_element_type %17353, %int5_14919 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14920 = torch.constant.int 5
    %17362 = torch.prims.convert_element_type %17354, %int5_14920 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_14921 = torch.constant.int 5
    %17363 = torch.prims.convert_element_type %17355, %int5_14921 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %17363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_14922 = torch.constant.int 1
    %int0_14923 = torch.constant.int 0
    %17364 = torch.prim.ListConstruct %int1_14922, %int0_14923 : (!torch.int, !torch.int) -> !torch.list<int>
    %17365 = torch.aten.permute %592, %17364 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14924 = torch.constant.int 1
    %int0_14925 = torch.constant.int 0
    %17366 = torch.prim.ListConstruct %int1_14924, %int0_14925 : (!torch.int, !torch.int) -> !torch.list<int>
    %17367 = torch.aten.permute %593, %17366 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14926 = torch.constant.int 1
    %int0_14927 = torch.constant.int 0
    %17368 = torch.prim.ListConstruct %int1_14926, %int0_14927 : (!torch.int, !torch.int) -> !torch.list<int>
    %17369 = torch.aten.permute %594, %17368 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14928 = torch.constant.int 1
    %int0_14929 = torch.constant.int 0
    %17370 = torch.prim.ListConstruct %int1_14928, %int0_14929 : (!torch.int, !torch.int) -> !torch.list<int>
    %17371 = torch.aten.permute %595, %17370 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14930 = torch.constant.int 1
    %int0_14931 = torch.constant.int 0
    %17372 = torch.prim.ListConstruct %int1_14930, %int0_14931 : (!torch.int, !torch.int) -> !torch.list<int>
    %17373 = torch.aten.permute %596, %17372 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14932 = torch.constant.int 1
    %int0_14933 = torch.constant.int 0
    %17374 = torch.prim.ListConstruct %int1_14932, %int0_14933 : (!torch.int, !torch.int) -> !torch.list<int>
    %17375 = torch.aten.permute %597, %17374 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14934 = torch.constant.int 1
    %int0_14935 = torch.constant.int 0
    %17376 = torch.prim.ListConstruct %int1_14934, %int0_14935 : (!torch.int, !torch.int) -> !torch.list<int>
    %17377 = torch.aten.permute %598, %17376 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_14936 = torch.constant.int 1
    %int0_14937 = torch.constant.int 0
    %17378 = torch.prim.ListConstruct %int1_14936, %int0_14937 : (!torch.int, !torch.int) -> !torch.list<int>
    %17379 = torch.aten.permute %599, %17378 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_14938 = torch.constant.int 4
    %17380 = torch.aten.mul.int %int4_14938, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14939 = torch.constant.int 4096
    %17381 = torch.prim.ListConstruct %17380, %int4096_14939 : (!torch.int, !torch.int) -> !torch.list<int>
    %17382 = torch.aten.view %17356, %17381 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17382, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17383 = torch.aten.mm %17382, %17365 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17383, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14940 = torch.constant.int 4
    %int512_14941 = torch.constant.int 512
    %17384 = torch.prim.ListConstruct %int4_14940, %2482, %int512_14941 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17385 = torch.aten.view %17383, %17384 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14942 = torch.constant.int 4
    %17386 = torch.aten.mul.int %int4_14942, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14943 = torch.constant.int 4096
    %17387 = torch.prim.ListConstruct %17386, %int4096_14943 : (!torch.int, !torch.int) -> !torch.list<int>
    %17388 = torch.aten.view %17357, %17387 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17388, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17389 = torch.aten.mm %17388, %17367 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17389, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14944 = torch.constant.int 4
    %int512_14945 = torch.constant.int 512
    %17390 = torch.prim.ListConstruct %int4_14944, %2482, %int512_14945 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17391 = torch.aten.view %17389, %17390 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14946 = torch.constant.int 4
    %17392 = torch.aten.mul.int %int4_14946, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14947 = torch.constant.int 4096
    %17393 = torch.prim.ListConstruct %17392, %int4096_14947 : (!torch.int, !torch.int) -> !torch.list<int>
    %17394 = torch.aten.view %17358, %17393 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17394, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17395 = torch.aten.mm %17394, %17369 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17395, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14948 = torch.constant.int 4
    %int512_14949 = torch.constant.int 512
    %17396 = torch.prim.ListConstruct %int4_14948, %2482, %int512_14949 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17397 = torch.aten.view %17395, %17396 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14950 = torch.constant.int 4
    %17398 = torch.aten.mul.int %int4_14950, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14951 = torch.constant.int 4096
    %17399 = torch.prim.ListConstruct %17398, %int4096_14951 : (!torch.int, !torch.int) -> !torch.list<int>
    %17400 = torch.aten.view %17359, %17399 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17400, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17401 = torch.aten.mm %17400, %17371 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17401, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14952 = torch.constant.int 4
    %int512_14953 = torch.constant.int 512
    %17402 = torch.prim.ListConstruct %int4_14952, %2482, %int512_14953 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17403 = torch.aten.view %17401, %17402 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14954 = torch.constant.int 4
    %17404 = torch.aten.mul.int %int4_14954, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14955 = torch.constant.int 4096
    %17405 = torch.prim.ListConstruct %17404, %int4096_14955 : (!torch.int, !torch.int) -> !torch.list<int>
    %17406 = torch.aten.view %17360, %17405 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17406, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17407 = torch.aten.mm %17406, %17373 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17407, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14956 = torch.constant.int 4
    %int512_14957 = torch.constant.int 512
    %17408 = torch.prim.ListConstruct %int4_14956, %2482, %int512_14957 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17409 = torch.aten.view %17407, %17408 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14958 = torch.constant.int 4
    %17410 = torch.aten.mul.int %int4_14958, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14959 = torch.constant.int 4096
    %17411 = torch.prim.ListConstruct %17410, %int4096_14959 : (!torch.int, !torch.int) -> !torch.list<int>
    %17412 = torch.aten.view %17361, %17411 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17412, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17413 = torch.aten.mm %17412, %17375 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17413, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14960 = torch.constant.int 4
    %int512_14961 = torch.constant.int 512
    %17414 = torch.prim.ListConstruct %int4_14960, %2482, %int512_14961 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17415 = torch.aten.view %17413, %17414 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14962 = torch.constant.int 4
    %17416 = torch.aten.mul.int %int4_14962, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14963 = torch.constant.int 4096
    %17417 = torch.prim.ListConstruct %17416, %int4096_14963 : (!torch.int, !torch.int) -> !torch.list<int>
    %17418 = torch.aten.view %17362, %17417 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17418, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17419 = torch.aten.mm %17418, %17377 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17419, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14964 = torch.constant.int 4
    %int512_14965 = torch.constant.int 512
    %17420 = torch.prim.ListConstruct %int4_14964, %2482, %int512_14965 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17421 = torch.aten.view %17419, %17420 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_14966 = torch.constant.int 4
    %17422 = torch.aten.mul.int %int4_14966, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14967 = torch.constant.int 4096
    %17423 = torch.prim.ListConstruct %17422, %int4096_14967 : (!torch.int, !torch.int) -> !torch.list<int>
    %17424 = torch.aten.view %17363, %17423 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17424, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17425 = torch.aten.mm %17424, %17379 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %17425, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_14968 = torch.constant.int 4
    %int512_14969 = torch.constant.int 512
    %17426 = torch.prim.ListConstruct %int4_14968, %2482, %int512_14969 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17427 = torch.aten.view %17425, %17426 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %17427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_14970 = torch.constant.int 1
    %int0_14971 = torch.constant.int 0
    %17428 = torch.prim.ListConstruct %int1_14970, %int0_14971 : (!torch.int, !torch.int) -> !torch.list<int>
    %17429 = torch.aten.permute %600, %17428 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14972 = torch.constant.int 1
    %int0_14973 = torch.constant.int 0
    %17430 = torch.prim.ListConstruct %int1_14972, %int0_14973 : (!torch.int, !torch.int) -> !torch.list<int>
    %17431 = torch.aten.permute %601, %17430 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14974 = torch.constant.int 1
    %int0_14975 = torch.constant.int 0
    %17432 = torch.prim.ListConstruct %int1_14974, %int0_14975 : (!torch.int, !torch.int) -> !torch.list<int>
    %17433 = torch.aten.permute %602, %17432 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14976 = torch.constant.int 1
    %int0_14977 = torch.constant.int 0
    %17434 = torch.prim.ListConstruct %int1_14976, %int0_14977 : (!torch.int, !torch.int) -> !torch.list<int>
    %17435 = torch.aten.permute %603, %17434 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14978 = torch.constant.int 1
    %int0_14979 = torch.constant.int 0
    %17436 = torch.prim.ListConstruct %int1_14978, %int0_14979 : (!torch.int, !torch.int) -> !torch.list<int>
    %17437 = torch.aten.permute %604, %17436 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14980 = torch.constant.int 1
    %int0_14981 = torch.constant.int 0
    %17438 = torch.prim.ListConstruct %int1_14980, %int0_14981 : (!torch.int, !torch.int) -> !torch.list<int>
    %17439 = torch.aten.permute %605, %17438 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14982 = torch.constant.int 1
    %int0_14983 = torch.constant.int 0
    %17440 = torch.prim.ListConstruct %int1_14982, %int0_14983 : (!torch.int, !torch.int) -> !torch.list<int>
    %17441 = torch.aten.permute %606, %17440 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_14984 = torch.constant.int 1
    %int0_14985 = torch.constant.int 0
    %17442 = torch.prim.ListConstruct %int1_14984, %int0_14985 : (!torch.int, !torch.int) -> !torch.list<int>
    %17443 = torch.aten.permute %607, %17442 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_14986 = torch.constant.int 4
    %17444 = torch.aten.mul.int %int4_14986, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14987 = torch.constant.int 4096
    %17445 = torch.prim.ListConstruct %17444, %int4096_14987 : (!torch.int, !torch.int) -> !torch.list<int>
    %17446 = torch.aten.view %17356, %17445 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17446, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17447 = torch.aten.mm %17446, %17429 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17447, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_14988 = torch.constant.int 4
    %int128_14989 = torch.constant.int 128
    %17448 = torch.prim.ListConstruct %int4_14988, %2482, %int128_14989 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17449 = torch.aten.view %17447, %17448 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_14990 = torch.constant.int 4
    %17450 = torch.aten.mul.int %int4_14990, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14991 = torch.constant.int 4096
    %17451 = torch.prim.ListConstruct %17450, %int4096_14991 : (!torch.int, !torch.int) -> !torch.list<int>
    %17452 = torch.aten.view %17357, %17451 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17452, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17453 = torch.aten.mm %17452, %17431 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17453, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_14992 = torch.constant.int 4
    %int128_14993 = torch.constant.int 128
    %17454 = torch.prim.ListConstruct %int4_14992, %2482, %int128_14993 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17455 = torch.aten.view %17453, %17454 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_14994 = torch.constant.int 4
    %17456 = torch.aten.mul.int %int4_14994, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14995 = torch.constant.int 4096
    %17457 = torch.prim.ListConstruct %17456, %int4096_14995 : (!torch.int, !torch.int) -> !torch.list<int>
    %17458 = torch.aten.view %17358, %17457 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17458, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17459 = torch.aten.mm %17458, %17433 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17459, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_14996 = torch.constant.int 4
    %int128_14997 = torch.constant.int 128
    %17460 = torch.prim.ListConstruct %int4_14996, %2482, %int128_14997 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17461 = torch.aten.view %17459, %17460 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_14998 = torch.constant.int 4
    %17462 = torch.aten.mul.int %int4_14998, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_14999 = torch.constant.int 4096
    %17463 = torch.prim.ListConstruct %17462, %int4096_14999 : (!torch.int, !torch.int) -> !torch.list<int>
    %17464 = torch.aten.view %17359, %17463 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17464, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17465 = torch.aten.mm %17464, %17435 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17465, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15000 = torch.constant.int 4
    %int128_15001 = torch.constant.int 128
    %17466 = torch.prim.ListConstruct %int4_15000, %2482, %int128_15001 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17467 = torch.aten.view %17465, %17466 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15002 = torch.constant.int 4
    %17468 = torch.aten.mul.int %int4_15002, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15003 = torch.constant.int 4096
    %17469 = torch.prim.ListConstruct %17468, %int4096_15003 : (!torch.int, !torch.int) -> !torch.list<int>
    %17470 = torch.aten.view %17360, %17469 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17470, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17471 = torch.aten.mm %17470, %17437 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17471, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15004 = torch.constant.int 4
    %int128_15005 = torch.constant.int 128
    %17472 = torch.prim.ListConstruct %int4_15004, %2482, %int128_15005 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17473 = torch.aten.view %17471, %17472 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15006 = torch.constant.int 4
    %17474 = torch.aten.mul.int %int4_15006, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15007 = torch.constant.int 4096
    %17475 = torch.prim.ListConstruct %17474, %int4096_15007 : (!torch.int, !torch.int) -> !torch.list<int>
    %17476 = torch.aten.view %17361, %17475 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17476, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17477 = torch.aten.mm %17476, %17439 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17477, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15008 = torch.constant.int 4
    %int128_15009 = torch.constant.int 128
    %17478 = torch.prim.ListConstruct %int4_15008, %2482, %int128_15009 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17479 = torch.aten.view %17477, %17478 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15010 = torch.constant.int 4
    %17480 = torch.aten.mul.int %int4_15010, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15011 = torch.constant.int 4096
    %17481 = torch.prim.ListConstruct %17480, %int4096_15011 : (!torch.int, !torch.int) -> !torch.list<int>
    %17482 = torch.aten.view %17362, %17481 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17482, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17483 = torch.aten.mm %17482, %17441 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17483, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15012 = torch.constant.int 4
    %int128_15013 = torch.constant.int 128
    %17484 = torch.prim.ListConstruct %int4_15012, %2482, %int128_15013 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17485 = torch.aten.view %17483, %17484 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15014 = torch.constant.int 4
    %17486 = torch.aten.mul.int %int4_15014, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15015 = torch.constant.int 4096
    %17487 = torch.prim.ListConstruct %17486, %int4096_15015 : (!torch.int, !torch.int) -> !torch.list<int>
    %17488 = torch.aten.view %17363, %17487 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17488, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17489 = torch.aten.mm %17488, %17443 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17489, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15016 = torch.constant.int 4
    %int128_15017 = torch.constant.int 128
    %17490 = torch.prim.ListConstruct %int4_15016, %2482, %int128_15017 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17491 = torch.aten.view %17489, %17490 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_15018 = torch.constant.int 1
    %int0_15019 = torch.constant.int 0
    %17492 = torch.prim.ListConstruct %int1_15018, %int0_15019 : (!torch.int, !torch.int) -> !torch.list<int>
    %17493 = torch.aten.permute %608, %17492 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15020 = torch.constant.int 1
    %int0_15021 = torch.constant.int 0
    %17494 = torch.prim.ListConstruct %int1_15020, %int0_15021 : (!torch.int, !torch.int) -> !torch.list<int>
    %17495 = torch.aten.permute %609, %17494 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15022 = torch.constant.int 1
    %int0_15023 = torch.constant.int 0
    %17496 = torch.prim.ListConstruct %int1_15022, %int0_15023 : (!torch.int, !torch.int) -> !torch.list<int>
    %17497 = torch.aten.permute %610, %17496 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15024 = torch.constant.int 1
    %int0_15025 = torch.constant.int 0
    %17498 = torch.prim.ListConstruct %int1_15024, %int0_15025 : (!torch.int, !torch.int) -> !torch.list<int>
    %17499 = torch.aten.permute %611, %17498 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15026 = torch.constant.int 1
    %int0_15027 = torch.constant.int 0
    %17500 = torch.prim.ListConstruct %int1_15026, %int0_15027 : (!torch.int, !torch.int) -> !torch.list<int>
    %17501 = torch.aten.permute %612, %17500 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15028 = torch.constant.int 1
    %int0_15029 = torch.constant.int 0
    %17502 = torch.prim.ListConstruct %int1_15028, %int0_15029 : (!torch.int, !torch.int) -> !torch.list<int>
    %17503 = torch.aten.permute %613, %17502 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15030 = torch.constant.int 1
    %int0_15031 = torch.constant.int 0
    %17504 = torch.prim.ListConstruct %int1_15030, %int0_15031 : (!torch.int, !torch.int) -> !torch.list<int>
    %17505 = torch.aten.permute %614, %17504 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_15032 = torch.constant.int 1
    %int0_15033 = torch.constant.int 0
    %17506 = torch.prim.ListConstruct %int1_15032, %int0_15033 : (!torch.int, !torch.int) -> !torch.list<int>
    %17507 = torch.aten.permute %615, %17506 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_15034 = torch.constant.int 4
    %17508 = torch.aten.mul.int %int4_15034, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15035 = torch.constant.int 4096
    %17509 = torch.prim.ListConstruct %17508, %int4096_15035 : (!torch.int, !torch.int) -> !torch.list<int>
    %17510 = torch.aten.view %17356, %17509 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17510, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17511 = torch.aten.mm %17510, %17493 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17511, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15036 = torch.constant.int 4
    %int128_15037 = torch.constant.int 128
    %17512 = torch.prim.ListConstruct %int4_15036, %2482, %int128_15037 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17513 = torch.aten.view %17511, %17512 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15038 = torch.constant.int 4
    %17514 = torch.aten.mul.int %int4_15038, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15039 = torch.constant.int 4096
    %17515 = torch.prim.ListConstruct %17514, %int4096_15039 : (!torch.int, !torch.int) -> !torch.list<int>
    %17516 = torch.aten.view %17357, %17515 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17516, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17517 = torch.aten.mm %17516, %17495 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17517, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15040 = torch.constant.int 4
    %int128_15041 = torch.constant.int 128
    %17518 = torch.prim.ListConstruct %int4_15040, %2482, %int128_15041 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17519 = torch.aten.view %17517, %17518 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15042 = torch.constant.int 4
    %17520 = torch.aten.mul.int %int4_15042, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15043 = torch.constant.int 4096
    %17521 = torch.prim.ListConstruct %17520, %int4096_15043 : (!torch.int, !torch.int) -> !torch.list<int>
    %17522 = torch.aten.view %17358, %17521 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17522, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17523 = torch.aten.mm %17522, %17497 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17523, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15044 = torch.constant.int 4
    %int128_15045 = torch.constant.int 128
    %17524 = torch.prim.ListConstruct %int4_15044, %2482, %int128_15045 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17525 = torch.aten.view %17523, %17524 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15046 = torch.constant.int 4
    %17526 = torch.aten.mul.int %int4_15046, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15047 = torch.constant.int 4096
    %17527 = torch.prim.ListConstruct %17526, %int4096_15047 : (!torch.int, !torch.int) -> !torch.list<int>
    %17528 = torch.aten.view %17359, %17527 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17528, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17529 = torch.aten.mm %17528, %17499 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17529, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15048 = torch.constant.int 4
    %int128_15049 = torch.constant.int 128
    %17530 = torch.prim.ListConstruct %int4_15048, %2482, %int128_15049 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17531 = torch.aten.view %17529, %17530 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15050 = torch.constant.int 4
    %17532 = torch.aten.mul.int %int4_15050, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15051 = torch.constant.int 4096
    %17533 = torch.prim.ListConstruct %17532, %int4096_15051 : (!torch.int, !torch.int) -> !torch.list<int>
    %17534 = torch.aten.view %17360, %17533 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17534, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17535 = torch.aten.mm %17534, %17501 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17535, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15052 = torch.constant.int 4
    %int128_15053 = torch.constant.int 128
    %17536 = torch.prim.ListConstruct %int4_15052, %2482, %int128_15053 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17537 = torch.aten.view %17535, %17536 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15054 = torch.constant.int 4
    %17538 = torch.aten.mul.int %int4_15054, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15055 = torch.constant.int 4096
    %17539 = torch.prim.ListConstruct %17538, %int4096_15055 : (!torch.int, !torch.int) -> !torch.list<int>
    %17540 = torch.aten.view %17361, %17539 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17540, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17541 = torch.aten.mm %17540, %17503 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17541, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15056 = torch.constant.int 4
    %int128_15057 = torch.constant.int 128
    %17542 = torch.prim.ListConstruct %int4_15056, %2482, %int128_15057 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17543 = torch.aten.view %17541, %17542 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15058 = torch.constant.int 4
    %17544 = torch.aten.mul.int %int4_15058, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15059 = torch.constant.int 4096
    %17545 = torch.prim.ListConstruct %17544, %int4096_15059 : (!torch.int, !torch.int) -> !torch.list<int>
    %17546 = torch.aten.view %17362, %17545 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17546, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17547 = torch.aten.mm %17546, %17505 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17547, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15060 = torch.constant.int 4
    %int128_15061 = torch.constant.int 128
    %17548 = torch.prim.ListConstruct %int4_15060, %2482, %int128_15061 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17549 = torch.aten.view %17547, %17548 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15062 = torch.constant.int 4
    %17550 = torch.aten.mul.int %int4_15062, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_15063 = torch.constant.int 4096
    %17551 = torch.prim.ListConstruct %17550, %int4096_15063 : (!torch.int, !torch.int) -> !torch.list<int>
    %17552 = torch.aten.view %17363, %17551 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %17552, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %17553 = torch.aten.mm %17552, %17507 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %17553, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_15064 = torch.constant.int 4
    %int128_15065 = torch.constant.int 128
    %17554 = torch.prim.ListConstruct %int4_15064, %2482, %int128_15065 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17555 = torch.aten.view %17553, %17554 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %17555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_15066 = torch.constant.int 4
    %int4_15067 = torch.constant.int 4
    %int128_15068 = torch.constant.int 128
    %17556 = torch.prim.ListConstruct %int4_15066, %2482, %int4_15067, %int128_15068 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17557 = torch.aten.view %17385, %17556 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15069 = torch.constant.int 4
    %int4_15070 = torch.constant.int 4
    %int128_15071 = torch.constant.int 128
    %17558 = torch.prim.ListConstruct %int4_15069, %2482, %int4_15070, %int128_15071 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17559 = torch.aten.view %17391, %17558 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15072 = torch.constant.int 4
    %int4_15073 = torch.constant.int 4
    %int128_15074 = torch.constant.int 128
    %17560 = torch.prim.ListConstruct %int4_15072, %2482, %int4_15073, %int128_15074 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17561 = torch.aten.view %17397, %17560 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15075 = torch.constant.int 4
    %int4_15076 = torch.constant.int 4
    %int128_15077 = torch.constant.int 128
    %17562 = torch.prim.ListConstruct %int4_15075, %2482, %int4_15076, %int128_15077 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17563 = torch.aten.view %17403, %17562 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15078 = torch.constant.int 4
    %int4_15079 = torch.constant.int 4
    %int128_15080 = torch.constant.int 128
    %17564 = torch.prim.ListConstruct %int4_15078, %2482, %int4_15079, %int128_15080 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17565 = torch.aten.view %17409, %17564 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15081 = torch.constant.int 4
    %int4_15082 = torch.constant.int 4
    %int128_15083 = torch.constant.int 128
    %17566 = torch.prim.ListConstruct %int4_15081, %2482, %int4_15082, %int128_15083 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17567 = torch.aten.view %17415, %17566 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15084 = torch.constant.int 4
    %int4_15085 = torch.constant.int 4
    %int128_15086 = torch.constant.int 128
    %17568 = torch.prim.ListConstruct %int4_15084, %2482, %int4_15085, %int128_15086 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17569 = torch.aten.view %17421, %17568 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15087 = torch.constant.int 4
    %int4_15088 = torch.constant.int 4
    %int128_15089 = torch.constant.int 128
    %17570 = torch.prim.ListConstruct %int4_15087, %2482, %int4_15088, %int128_15089 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17571 = torch.aten.view %17427, %17570 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15090 = torch.constant.int 4
    %int1_15091 = torch.constant.int 1
    %int128_15092 = torch.constant.int 128
    %17572 = torch.prim.ListConstruct %int4_15090, %2482, %int1_15091, %int128_15092 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17573 = torch.aten.view %17449, %17572 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15093 = torch.constant.int 4
    %int1_15094 = torch.constant.int 1
    %int128_15095 = torch.constant.int 128
    %17574 = torch.prim.ListConstruct %int4_15093, %2482, %int1_15094, %int128_15095 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17575 = torch.aten.view %17455, %17574 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15096 = torch.constant.int 4
    %int1_15097 = torch.constant.int 1
    %int128_15098 = torch.constant.int 128
    %17576 = torch.prim.ListConstruct %int4_15096, %2482, %int1_15097, %int128_15098 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17577 = torch.aten.view %17461, %17576 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15099 = torch.constant.int 4
    %int1_15100 = torch.constant.int 1
    %int128_15101 = torch.constant.int 128
    %17578 = torch.prim.ListConstruct %int4_15099, %2482, %int1_15100, %int128_15101 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17579 = torch.aten.view %17467, %17578 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15102 = torch.constant.int 4
    %int1_15103 = torch.constant.int 1
    %int128_15104 = torch.constant.int 128
    %17580 = torch.prim.ListConstruct %int4_15102, %2482, %int1_15103, %int128_15104 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17581 = torch.aten.view %17473, %17580 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15105 = torch.constant.int 4
    %int1_15106 = torch.constant.int 1
    %int128_15107 = torch.constant.int 128
    %17582 = torch.prim.ListConstruct %int4_15105, %2482, %int1_15106, %int128_15107 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17583 = torch.aten.view %17479, %17582 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15108 = torch.constant.int 4
    %int1_15109 = torch.constant.int 1
    %int128_15110 = torch.constant.int 128
    %17584 = torch.prim.ListConstruct %int4_15108, %2482, %int1_15109, %int128_15110 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17585 = torch.aten.view %17485, %17584 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15111 = torch.constant.int 4
    %int1_15112 = torch.constant.int 1
    %int128_15113 = torch.constant.int 128
    %17586 = torch.prim.ListConstruct %int4_15111, %2482, %int1_15112, %int128_15113 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17587 = torch.aten.view %17491, %17586 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15114 = torch.constant.int 4
    %int1_15115 = torch.constant.int 1
    %int128_15116 = torch.constant.int 128
    %17588 = torch.prim.ListConstruct %int4_15114, %2482, %int1_15115, %int128_15116 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17589 = torch.aten.view %17513, %17588 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15117 = torch.constant.int 4
    %int1_15118 = torch.constant.int 1
    %int128_15119 = torch.constant.int 128
    %17590 = torch.prim.ListConstruct %int4_15117, %2482, %int1_15118, %int128_15119 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17591 = torch.aten.view %17519, %17590 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15120 = torch.constant.int 4
    %int1_15121 = torch.constant.int 1
    %int128_15122 = torch.constant.int 128
    %17592 = torch.prim.ListConstruct %int4_15120, %2482, %int1_15121, %int128_15122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17593 = torch.aten.view %17525, %17592 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15123 = torch.constant.int 4
    %int1_15124 = torch.constant.int 1
    %int128_15125 = torch.constant.int 128
    %17594 = torch.prim.ListConstruct %int4_15123, %2482, %int1_15124, %int128_15125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17595 = torch.aten.view %17531, %17594 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15126 = torch.constant.int 4
    %int1_15127 = torch.constant.int 1
    %int128_15128 = torch.constant.int 128
    %17596 = torch.prim.ListConstruct %int4_15126, %2482, %int1_15127, %int128_15128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17597 = torch.aten.view %17537, %17596 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15129 = torch.constant.int 4
    %int1_15130 = torch.constant.int 1
    %int128_15131 = torch.constant.int 128
    %17598 = torch.prim.ListConstruct %int4_15129, %2482, %int1_15130, %int128_15131 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17599 = torch.aten.view %17543, %17598 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15132 = torch.constant.int 4
    %int1_15133 = torch.constant.int 1
    %int128_15134 = torch.constant.int 128
    %17600 = torch.prim.ListConstruct %int4_15132, %2482, %int1_15133, %int128_15134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17601 = torch.aten.view %17549, %17600 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_15135 = torch.constant.int 4
    %int1_15136 = torch.constant.int 1
    %int128_15137 = torch.constant.int 128
    %17602 = torch.prim.ListConstruct %int4_15135, %2482, %int1_15136, %int128_15137 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17603 = torch.aten.view %17555, %17602 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_15138 = torch.constant.int 131072
    %none_15139 = torch.constant.none
    %none_15140 = torch.constant.none
    %cpu_15141 = torch.constant.device "cpu"
    %false_15142 = torch.constant.bool false
    %17604 = torch.aten.arange %int131072_15138, %none_15139, %none_15140, %cpu_15141, %false_15142 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_15143 = torch.constant.int 0
    %int128_15144 = torch.constant.int 128
    %int2_15145 = torch.constant.int 2
    %none_15146 = torch.constant.none
    %none_15147 = torch.constant.none
    %cpu_15148 = torch.constant.device "cpu"
    %false_15149 = torch.constant.bool false
    %17605 = torch.aten.arange.start_step %int0_15143, %int128_15144, %int2_15145, %none_15146, %none_15147, %cpu_15148, %false_15149 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_15150 = torch.constant.int 0
    %int0_15151 = torch.constant.int 0
    %int64_15152 = torch.constant.int 64
    %int1_15153 = torch.constant.int 1
    %17606 = torch.aten.slice.Tensor %17605, %int0_15150, %int0_15151, %int64_15152, %int1_15153 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_15154 = torch.constant.int 6
    %17607 = torch.prims.convert_element_type %17606, %int6_15154 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_15155 = torch.constant.int 128
    %17608 = torch.aten.div.Scalar %17607, %int128_15155 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_15156 = torch.constant.float 5.000000e+05
    %17609 = torch.aten.pow.Scalar %float5.000000e05_15156, %17608 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %17610 = torch.aten.reciprocal %17609 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_15157 = torch.constant.float 1.000000e+00
    %17611 = torch.aten.mul.Scalar %17610, %float1.000000e00_15157 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_15158 = torch.constant.int 131072
    %int1_15159 = torch.constant.int 1
    %17612 = torch.prim.ListConstruct %int131072_15158, %int1_15159 : (!torch.int, !torch.int) -> !torch.list<int>
    %17613 = torch.aten.view %17604, %17612 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %17614 = torch.aten.mul.Tensor %17613, %17611 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %17615 = torch.aten.cos %17614 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %17616 = torch.aten.sin %17614 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %17617 = torch.aten.complex %17615, %17616 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %17618 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17619 = flow.tensor.transfer %17618 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %17620 = torch_c.from_builtin_tensor %17619 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17621 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17622 = flow.tensor.transfer %17621 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %17623 = torch_c.from_builtin_tensor %17622 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17624 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17625 = flow.tensor.transfer %17624 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %17626 = torch_c.from_builtin_tensor %17625 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17627 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17628 = flow.tensor.transfer %17627 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %17629 = torch_c.from_builtin_tensor %17628 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17630 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17631 = flow.tensor.transfer %17630 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %17632 = torch_c.from_builtin_tensor %17631 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17633 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17634 = flow.tensor.transfer %17633 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %17635 = torch_c.from_builtin_tensor %17634 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17636 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17637 = flow.tensor.transfer %17636 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %17638 = torch_c.from_builtin_tensor %17637 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17639 = torch_c.to_builtin_tensor %17617 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17640 = flow.tensor.transfer %17639 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %17641 = torch_c.from_builtin_tensor %17640 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_15160 = torch.constant.int 1
    %17642 = torch.aten.size.int %17385, %int1_15160 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15161 = torch.constant.int 0
    %17643 = torch.aten.add.int %int0_15161, %17642 : !torch.int, !torch.int -> !torch.int
    %int0_15162 = torch.constant.int 0
    %int0_15163 = torch.constant.int 0
    %int1_15164 = torch.constant.int 1
    %17644 = torch.aten.slice.Tensor %17620, %int0_15162, %int0_15163, %17643, %int1_15164 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17644, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15165 = torch.constant.int 1
    %int0_15166 = torch.constant.int 0
    %int9223372036854775807_15167 = torch.constant.int 9223372036854775807
    %int1_15168 = torch.constant.int 1
    %17645 = torch.aten.slice.Tensor %17644, %int1_15165, %int0_15166, %int9223372036854775807_15167, %int1_15168 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17645, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15169 = torch.constant.int 0
    %17646 = torch.aten.unsqueeze %17645, %int0_15169 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17646, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15170 = torch.constant.int 2
    %17647 = torch.aten.unsqueeze %17646, %int2_15170 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17647, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15171 = torch.constant.int 3
    %int0_15172 = torch.constant.int 0
    %int9223372036854775807_15173 = torch.constant.int 9223372036854775807
    %int1_15174 = torch.constant.int 1
    %17648 = torch.aten.slice.Tensor %17647, %int3_15171, %int0_15172, %int9223372036854775807_15173, %int1_15174 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17648, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17649 = torch_c.to_builtin_tensor %17557 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15175 = arith.constant 1 : index
    %dim_15176 = tensor.dim %17649, %c1_15175 : tensor<4x?x4x128xf16>
    %17650 = flow.tensor.bitcast %17649 : tensor<4x?x4x128xf16>{%dim_15176} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15176}
    %17651 = torch_c.from_builtin_tensor %17650 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17652 = torch.aten.mul.Tensor %17651, %17648 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17653 = torch_c.to_builtin_tensor %17652 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15177 = arith.constant 1 : index
    %dim_15178 = tensor.dim %17653, %c1_15177 : tensor<4x?x4x64xcomplex<f32>>
    %17654 = flow.tensor.bitcast %17653 : tensor<4x?x4x64xcomplex<f32>>{%dim_15178} -> tensor<4x?x4x128xf32>{%dim_15178}
    %17655 = torch_c.from_builtin_tensor %17654 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15179 = torch.constant.int 5
    %17656 = torch.prims.convert_element_type %17655, %int5_15179 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15180 = torch.constant.int 1
    %17657 = torch.aten.size.int %17391, %int1_15180 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15181 = torch.constant.int 0
    %17658 = torch.aten.add.int %int0_15181, %17657 : !torch.int, !torch.int -> !torch.int
    %int0_15182 = torch.constant.int 0
    %int0_15183 = torch.constant.int 0
    %int1_15184 = torch.constant.int 1
    %17659 = torch.aten.slice.Tensor %17623, %int0_15182, %int0_15183, %17658, %int1_15184 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17659, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15185 = torch.constant.int 1
    %int0_15186 = torch.constant.int 0
    %int9223372036854775807_15187 = torch.constant.int 9223372036854775807
    %int1_15188 = torch.constant.int 1
    %17660 = torch.aten.slice.Tensor %17659, %int1_15185, %int0_15186, %int9223372036854775807_15187, %int1_15188 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17660, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15189 = torch.constant.int 0
    %17661 = torch.aten.unsqueeze %17660, %int0_15189 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17661, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15190 = torch.constant.int 2
    %17662 = torch.aten.unsqueeze %17661, %int2_15190 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17662, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15191 = torch.constant.int 3
    %int0_15192 = torch.constant.int 0
    %int9223372036854775807_15193 = torch.constant.int 9223372036854775807
    %int1_15194 = torch.constant.int 1
    %17663 = torch.aten.slice.Tensor %17662, %int3_15191, %int0_15192, %int9223372036854775807_15193, %int1_15194 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17663, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17664 = torch_c.to_builtin_tensor %17559 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15195 = arith.constant 1 : index
    %dim_15196 = tensor.dim %17664, %c1_15195 : tensor<4x?x4x128xf16>
    %17665 = flow.tensor.bitcast %17664 : tensor<4x?x4x128xf16>{%dim_15196} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15196}
    %17666 = torch_c.from_builtin_tensor %17665 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17667 = torch.aten.mul.Tensor %17666, %17663 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17668 = torch_c.to_builtin_tensor %17667 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15197 = arith.constant 1 : index
    %dim_15198 = tensor.dim %17668, %c1_15197 : tensor<4x?x4x64xcomplex<f32>>
    %17669 = flow.tensor.bitcast %17668 : tensor<4x?x4x64xcomplex<f32>>{%dim_15198} -> tensor<4x?x4x128xf32>{%dim_15198}
    %17670 = torch_c.from_builtin_tensor %17669 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15199 = torch.constant.int 5
    %17671 = torch.prims.convert_element_type %17670, %int5_15199 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15200 = torch.constant.int 1
    %17672 = torch.aten.size.int %17397, %int1_15200 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15201 = torch.constant.int 0
    %17673 = torch.aten.add.int %int0_15201, %17672 : !torch.int, !torch.int -> !torch.int
    %int0_15202 = torch.constant.int 0
    %int0_15203 = torch.constant.int 0
    %int1_15204 = torch.constant.int 1
    %17674 = torch.aten.slice.Tensor %17626, %int0_15202, %int0_15203, %17673, %int1_15204 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17674, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15205 = torch.constant.int 1
    %int0_15206 = torch.constant.int 0
    %int9223372036854775807_15207 = torch.constant.int 9223372036854775807
    %int1_15208 = torch.constant.int 1
    %17675 = torch.aten.slice.Tensor %17674, %int1_15205, %int0_15206, %int9223372036854775807_15207, %int1_15208 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17675, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15209 = torch.constant.int 0
    %17676 = torch.aten.unsqueeze %17675, %int0_15209 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17676, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15210 = torch.constant.int 2
    %17677 = torch.aten.unsqueeze %17676, %int2_15210 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17677, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15211 = torch.constant.int 3
    %int0_15212 = torch.constant.int 0
    %int9223372036854775807_15213 = torch.constant.int 9223372036854775807
    %int1_15214 = torch.constant.int 1
    %17678 = torch.aten.slice.Tensor %17677, %int3_15211, %int0_15212, %int9223372036854775807_15213, %int1_15214 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17678, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17679 = torch_c.to_builtin_tensor %17561 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15215 = arith.constant 1 : index
    %dim_15216 = tensor.dim %17679, %c1_15215 : tensor<4x?x4x128xf16>
    %17680 = flow.tensor.bitcast %17679 : tensor<4x?x4x128xf16>{%dim_15216} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15216}
    %17681 = torch_c.from_builtin_tensor %17680 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17682 = torch.aten.mul.Tensor %17681, %17678 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17683 = torch_c.to_builtin_tensor %17682 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15217 = arith.constant 1 : index
    %dim_15218 = tensor.dim %17683, %c1_15217 : tensor<4x?x4x64xcomplex<f32>>
    %17684 = flow.tensor.bitcast %17683 : tensor<4x?x4x64xcomplex<f32>>{%dim_15218} -> tensor<4x?x4x128xf32>{%dim_15218}
    %17685 = torch_c.from_builtin_tensor %17684 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15219 = torch.constant.int 5
    %17686 = torch.prims.convert_element_type %17685, %int5_15219 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15220 = torch.constant.int 1
    %17687 = torch.aten.size.int %17403, %int1_15220 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15221 = torch.constant.int 0
    %17688 = torch.aten.add.int %int0_15221, %17687 : !torch.int, !torch.int -> !torch.int
    %int0_15222 = torch.constant.int 0
    %int0_15223 = torch.constant.int 0
    %int1_15224 = torch.constant.int 1
    %17689 = torch.aten.slice.Tensor %17629, %int0_15222, %int0_15223, %17688, %int1_15224 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17689, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15225 = torch.constant.int 1
    %int0_15226 = torch.constant.int 0
    %int9223372036854775807_15227 = torch.constant.int 9223372036854775807
    %int1_15228 = torch.constant.int 1
    %17690 = torch.aten.slice.Tensor %17689, %int1_15225, %int0_15226, %int9223372036854775807_15227, %int1_15228 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17690, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15229 = torch.constant.int 0
    %17691 = torch.aten.unsqueeze %17690, %int0_15229 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17691, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15230 = torch.constant.int 2
    %17692 = torch.aten.unsqueeze %17691, %int2_15230 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17692, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15231 = torch.constant.int 3
    %int0_15232 = torch.constant.int 0
    %int9223372036854775807_15233 = torch.constant.int 9223372036854775807
    %int1_15234 = torch.constant.int 1
    %17693 = torch.aten.slice.Tensor %17692, %int3_15231, %int0_15232, %int9223372036854775807_15233, %int1_15234 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17693, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17694 = torch_c.to_builtin_tensor %17563 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15235 = arith.constant 1 : index
    %dim_15236 = tensor.dim %17694, %c1_15235 : tensor<4x?x4x128xf16>
    %17695 = flow.tensor.bitcast %17694 : tensor<4x?x4x128xf16>{%dim_15236} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15236}
    %17696 = torch_c.from_builtin_tensor %17695 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17697 = torch.aten.mul.Tensor %17696, %17693 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17698 = torch_c.to_builtin_tensor %17697 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15237 = arith.constant 1 : index
    %dim_15238 = tensor.dim %17698, %c1_15237 : tensor<4x?x4x64xcomplex<f32>>
    %17699 = flow.tensor.bitcast %17698 : tensor<4x?x4x64xcomplex<f32>>{%dim_15238} -> tensor<4x?x4x128xf32>{%dim_15238}
    %17700 = torch_c.from_builtin_tensor %17699 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15239 = torch.constant.int 5
    %17701 = torch.prims.convert_element_type %17700, %int5_15239 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15240 = torch.constant.int 1
    %17702 = torch.aten.size.int %17409, %int1_15240 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15241 = torch.constant.int 0
    %17703 = torch.aten.add.int %int0_15241, %17702 : !torch.int, !torch.int -> !torch.int
    %int0_15242 = torch.constant.int 0
    %int0_15243 = torch.constant.int 0
    %int1_15244 = torch.constant.int 1
    %17704 = torch.aten.slice.Tensor %17632, %int0_15242, %int0_15243, %17703, %int1_15244 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17704, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15245 = torch.constant.int 1
    %int0_15246 = torch.constant.int 0
    %int9223372036854775807_15247 = torch.constant.int 9223372036854775807
    %int1_15248 = torch.constant.int 1
    %17705 = torch.aten.slice.Tensor %17704, %int1_15245, %int0_15246, %int9223372036854775807_15247, %int1_15248 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17705, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15249 = torch.constant.int 0
    %17706 = torch.aten.unsqueeze %17705, %int0_15249 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17706, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15250 = torch.constant.int 2
    %17707 = torch.aten.unsqueeze %17706, %int2_15250 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17707, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15251 = torch.constant.int 3
    %int0_15252 = torch.constant.int 0
    %int9223372036854775807_15253 = torch.constant.int 9223372036854775807
    %int1_15254 = torch.constant.int 1
    %17708 = torch.aten.slice.Tensor %17707, %int3_15251, %int0_15252, %int9223372036854775807_15253, %int1_15254 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17708, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17709 = torch_c.to_builtin_tensor %17565 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15255 = arith.constant 1 : index
    %dim_15256 = tensor.dim %17709, %c1_15255 : tensor<4x?x4x128xf16>
    %17710 = flow.tensor.bitcast %17709 : tensor<4x?x4x128xf16>{%dim_15256} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15256}
    %17711 = torch_c.from_builtin_tensor %17710 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17712 = torch.aten.mul.Tensor %17711, %17708 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17713 = torch_c.to_builtin_tensor %17712 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15257 = arith.constant 1 : index
    %dim_15258 = tensor.dim %17713, %c1_15257 : tensor<4x?x4x64xcomplex<f32>>
    %17714 = flow.tensor.bitcast %17713 : tensor<4x?x4x64xcomplex<f32>>{%dim_15258} -> tensor<4x?x4x128xf32>{%dim_15258}
    %17715 = torch_c.from_builtin_tensor %17714 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15259 = torch.constant.int 5
    %17716 = torch.prims.convert_element_type %17715, %int5_15259 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15260 = torch.constant.int 1
    %17717 = torch.aten.size.int %17415, %int1_15260 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15261 = torch.constant.int 0
    %17718 = torch.aten.add.int %int0_15261, %17717 : !torch.int, !torch.int -> !torch.int
    %int0_15262 = torch.constant.int 0
    %int0_15263 = torch.constant.int 0
    %int1_15264 = torch.constant.int 1
    %17719 = torch.aten.slice.Tensor %17635, %int0_15262, %int0_15263, %17718, %int1_15264 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17719, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15265 = torch.constant.int 1
    %int0_15266 = torch.constant.int 0
    %int9223372036854775807_15267 = torch.constant.int 9223372036854775807
    %int1_15268 = torch.constant.int 1
    %17720 = torch.aten.slice.Tensor %17719, %int1_15265, %int0_15266, %int9223372036854775807_15267, %int1_15268 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17720, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15269 = torch.constant.int 0
    %17721 = torch.aten.unsqueeze %17720, %int0_15269 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17721, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15270 = torch.constant.int 2
    %17722 = torch.aten.unsqueeze %17721, %int2_15270 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17722, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15271 = torch.constant.int 3
    %int0_15272 = torch.constant.int 0
    %int9223372036854775807_15273 = torch.constant.int 9223372036854775807
    %int1_15274 = torch.constant.int 1
    %17723 = torch.aten.slice.Tensor %17722, %int3_15271, %int0_15272, %int9223372036854775807_15273, %int1_15274 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17723, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17724 = torch_c.to_builtin_tensor %17567 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15275 = arith.constant 1 : index
    %dim_15276 = tensor.dim %17724, %c1_15275 : tensor<4x?x4x128xf16>
    %17725 = flow.tensor.bitcast %17724 : tensor<4x?x4x128xf16>{%dim_15276} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15276}
    %17726 = torch_c.from_builtin_tensor %17725 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17727 = torch.aten.mul.Tensor %17726, %17723 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17728 = torch_c.to_builtin_tensor %17727 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15277 = arith.constant 1 : index
    %dim_15278 = tensor.dim %17728, %c1_15277 : tensor<4x?x4x64xcomplex<f32>>
    %17729 = flow.tensor.bitcast %17728 : tensor<4x?x4x64xcomplex<f32>>{%dim_15278} -> tensor<4x?x4x128xf32>{%dim_15278}
    %17730 = torch_c.from_builtin_tensor %17729 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15279 = torch.constant.int 5
    %17731 = torch.prims.convert_element_type %17730, %int5_15279 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15280 = torch.constant.int 1
    %17732 = torch.aten.size.int %17421, %int1_15280 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15281 = torch.constant.int 0
    %17733 = torch.aten.add.int %int0_15281, %17732 : !torch.int, !torch.int -> !torch.int
    %int0_15282 = torch.constant.int 0
    %int0_15283 = torch.constant.int 0
    %int1_15284 = torch.constant.int 1
    %17734 = torch.aten.slice.Tensor %17638, %int0_15282, %int0_15283, %17733, %int1_15284 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17734, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15285 = torch.constant.int 1
    %int0_15286 = torch.constant.int 0
    %int9223372036854775807_15287 = torch.constant.int 9223372036854775807
    %int1_15288 = torch.constant.int 1
    %17735 = torch.aten.slice.Tensor %17734, %int1_15285, %int0_15286, %int9223372036854775807_15287, %int1_15288 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17735, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15289 = torch.constant.int 0
    %17736 = torch.aten.unsqueeze %17735, %int0_15289 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17736, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15290 = torch.constant.int 2
    %17737 = torch.aten.unsqueeze %17736, %int2_15290 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17737, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15291 = torch.constant.int 3
    %int0_15292 = torch.constant.int 0
    %int9223372036854775807_15293 = torch.constant.int 9223372036854775807
    %int1_15294 = torch.constant.int 1
    %17738 = torch.aten.slice.Tensor %17737, %int3_15291, %int0_15292, %int9223372036854775807_15293, %int1_15294 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17738, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17739 = torch_c.to_builtin_tensor %17569 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15295 = arith.constant 1 : index
    %dim_15296 = tensor.dim %17739, %c1_15295 : tensor<4x?x4x128xf16>
    %17740 = flow.tensor.bitcast %17739 : tensor<4x?x4x128xf16>{%dim_15296} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15296}
    %17741 = torch_c.from_builtin_tensor %17740 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17742 = torch.aten.mul.Tensor %17741, %17738 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17743 = torch_c.to_builtin_tensor %17742 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15297 = arith.constant 1 : index
    %dim_15298 = tensor.dim %17743, %c1_15297 : tensor<4x?x4x64xcomplex<f32>>
    %17744 = flow.tensor.bitcast %17743 : tensor<4x?x4x64xcomplex<f32>>{%dim_15298} -> tensor<4x?x4x128xf32>{%dim_15298}
    %17745 = torch_c.from_builtin_tensor %17744 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15299 = torch.constant.int 5
    %17746 = torch.prims.convert_element_type %17745, %int5_15299 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15300 = torch.constant.int 1
    %17747 = torch.aten.size.int %17427, %int1_15300 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_15301 = torch.constant.int 0
    %17748 = torch.aten.add.int %int0_15301, %17747 : !torch.int, !torch.int -> !torch.int
    %int0_15302 = torch.constant.int 0
    %int0_15303 = torch.constant.int 0
    %int1_15304 = torch.constant.int 1
    %17749 = torch.aten.slice.Tensor %17641, %int0_15302, %int0_15303, %17748, %int1_15304 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17749, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15305 = torch.constant.int 1
    %int0_15306 = torch.constant.int 0
    %int9223372036854775807_15307 = torch.constant.int 9223372036854775807
    %int1_15308 = torch.constant.int 1
    %17750 = torch.aten.slice.Tensor %17749, %int1_15305, %int0_15306, %int9223372036854775807_15307, %int1_15308 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17750, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15309 = torch.constant.int 0
    %17751 = torch.aten.unsqueeze %17750, %int0_15309 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17751, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15310 = torch.constant.int 2
    %17752 = torch.aten.unsqueeze %17751, %int2_15310 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17752, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15311 = torch.constant.int 3
    %int0_15312 = torch.constant.int 0
    %int9223372036854775807_15313 = torch.constant.int 9223372036854775807
    %int1_15314 = torch.constant.int 1
    %17753 = torch.aten.slice.Tensor %17752, %int3_15311, %int0_15312, %int9223372036854775807_15313, %int1_15314 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17753, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17754 = torch_c.to_builtin_tensor %17571 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_15315 = arith.constant 1 : index
    %dim_15316 = tensor.dim %17754, %c1_15315 : tensor<4x?x4x128xf16>
    %17755 = flow.tensor.bitcast %17754 : tensor<4x?x4x128xf16>{%dim_15316} -> tensor<4x?x4x64xcomplex<f16>>{%dim_15316}
    %17756 = torch_c.from_builtin_tensor %17755 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %17756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %17757 = torch.aten.mul.Tensor %17756, %17753 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %17757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %17758 = torch_c.to_builtin_tensor %17757 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_15317 = arith.constant 1 : index
    %dim_15318 = tensor.dim %17758, %c1_15317 : tensor<4x?x4x64xcomplex<f32>>
    %17759 = flow.tensor.bitcast %17758 : tensor<4x?x4x64xcomplex<f32>>{%dim_15318} -> tensor<4x?x4x128xf32>{%dim_15318}
    %17760 = torch_c.from_builtin_tensor %17759 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %17760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_15319 = torch.constant.int 5
    %17761 = torch.prims.convert_element_type %17760, %int5_15319 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %17761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_15320 = torch.constant.int 131072
    %none_15321 = torch.constant.none
    %none_15322 = torch.constant.none
    %cpu_15323 = torch.constant.device "cpu"
    %false_15324 = torch.constant.bool false
    %17762 = torch.aten.arange %int131072_15320, %none_15321, %none_15322, %cpu_15323, %false_15324 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_15325 = torch.constant.int 0
    %int128_15326 = torch.constant.int 128
    %int2_15327 = torch.constant.int 2
    %none_15328 = torch.constant.none
    %none_15329 = torch.constant.none
    %cpu_15330 = torch.constant.device "cpu"
    %false_15331 = torch.constant.bool false
    %17763 = torch.aten.arange.start_step %int0_15325, %int128_15326, %int2_15327, %none_15328, %none_15329, %cpu_15330, %false_15331 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_15332 = torch.constant.int 0
    %int0_15333 = torch.constant.int 0
    %int64_15334 = torch.constant.int 64
    %int1_15335 = torch.constant.int 1
    %17764 = torch.aten.slice.Tensor %17763, %int0_15332, %int0_15333, %int64_15334, %int1_15335 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_15336 = torch.constant.int 6
    %17765 = torch.prims.convert_element_type %17764, %int6_15336 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_15337 = torch.constant.int 128
    %17766 = torch.aten.div.Scalar %17765, %int128_15337 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_15338 = torch.constant.float 5.000000e+05
    %17767 = torch.aten.pow.Scalar %float5.000000e05_15338, %17766 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %17768 = torch.aten.reciprocal %17767 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_15339 = torch.constant.float 1.000000e+00
    %17769 = torch.aten.mul.Scalar %17768, %float1.000000e00_15339 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_15340 = torch.constant.int 131072
    %int1_15341 = torch.constant.int 1
    %17770 = torch.prim.ListConstruct %int131072_15340, %int1_15341 : (!torch.int, !torch.int) -> !torch.list<int>
    %17771 = torch.aten.view %17762, %17770 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %17772 = torch.aten.mul.Tensor %17771, %17769 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %17773 = torch.aten.cos %17772 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %17774 = torch.aten.sin %17772 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %17775 = torch.aten.complex %17773, %17774 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %17776 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17777 = flow.tensor.transfer %17776 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %17778 = torch_c.from_builtin_tensor %17777 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17779 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17780 = flow.tensor.transfer %17779 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %17781 = torch_c.from_builtin_tensor %17780 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17782 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17783 = flow.tensor.transfer %17782 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %17784 = torch_c.from_builtin_tensor %17783 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17785 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17786 = flow.tensor.transfer %17785 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %17787 = torch_c.from_builtin_tensor %17786 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17788 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17789 = flow.tensor.transfer %17788 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %17790 = torch_c.from_builtin_tensor %17789 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17791 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17792 = flow.tensor.transfer %17791 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %17793 = torch_c.from_builtin_tensor %17792 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17794 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17795 = flow.tensor.transfer %17794 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %17796 = torch_c.from_builtin_tensor %17795 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %17797 = torch_c.to_builtin_tensor %17775 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %17798 = flow.tensor.transfer %17797 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %17799 = torch_c.from_builtin_tensor %17798 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_15342 = torch.constant.int 1
    %17800 = torch.aten.size.int %17449, %int1_15342 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15343 = torch.constant.int 0
    %17801 = torch.aten.add.int %int0_15343, %17800 : !torch.int, !torch.int -> !torch.int
    %int0_15344 = torch.constant.int 0
    %int0_15345 = torch.constant.int 0
    %int1_15346 = torch.constant.int 1
    %17802 = torch.aten.slice.Tensor %17778, %int0_15344, %int0_15345, %17801, %int1_15346 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17802, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15347 = torch.constant.int 1
    %int0_15348 = torch.constant.int 0
    %int9223372036854775807_15349 = torch.constant.int 9223372036854775807
    %int1_15350 = torch.constant.int 1
    %17803 = torch.aten.slice.Tensor %17802, %int1_15347, %int0_15348, %int9223372036854775807_15349, %int1_15350 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17803, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15351 = torch.constant.int 0
    %17804 = torch.aten.unsqueeze %17803, %int0_15351 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17804, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15352 = torch.constant.int 2
    %17805 = torch.aten.unsqueeze %17804, %int2_15352 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17805, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15353 = torch.constant.int 3
    %int0_15354 = torch.constant.int 0
    %int9223372036854775807_15355 = torch.constant.int 9223372036854775807
    %int1_15356 = torch.constant.int 1
    %17806 = torch.aten.slice.Tensor %17805, %int3_15353, %int0_15354, %int9223372036854775807_15355, %int1_15356 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17806, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17807 = torch_c.to_builtin_tensor %17573 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15357 = arith.constant 1 : index
    %dim_15358 = tensor.dim %17807, %c1_15357 : tensor<4x?x1x128xf16>
    %17808 = flow.tensor.bitcast %17807 : tensor<4x?x1x128xf16>{%dim_15358} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15358}
    %17809 = torch_c.from_builtin_tensor %17808 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17810 = torch.aten.mul.Tensor %17809, %17806 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17811 = torch_c.to_builtin_tensor %17810 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15359 = arith.constant 1 : index
    %dim_15360 = tensor.dim %17811, %c1_15359 : tensor<4x?x1x64xcomplex<f32>>
    %17812 = flow.tensor.bitcast %17811 : tensor<4x?x1x64xcomplex<f32>>{%dim_15360} -> tensor<4x?x1x128xf32>{%dim_15360}
    %17813 = torch_c.from_builtin_tensor %17812 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15361 = torch.constant.int 5
    %17814 = torch.prims.convert_element_type %17813, %int5_15361 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15362 = torch.constant.int 1
    %17815 = torch.aten.size.int %17455, %int1_15362 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15363 = torch.constant.int 0
    %17816 = torch.aten.add.int %int0_15363, %17815 : !torch.int, !torch.int -> !torch.int
    %int0_15364 = torch.constant.int 0
    %int0_15365 = torch.constant.int 0
    %int1_15366 = torch.constant.int 1
    %17817 = torch.aten.slice.Tensor %17781, %int0_15364, %int0_15365, %17816, %int1_15366 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17817, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15367 = torch.constant.int 1
    %int0_15368 = torch.constant.int 0
    %int9223372036854775807_15369 = torch.constant.int 9223372036854775807
    %int1_15370 = torch.constant.int 1
    %17818 = torch.aten.slice.Tensor %17817, %int1_15367, %int0_15368, %int9223372036854775807_15369, %int1_15370 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17818, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15371 = torch.constant.int 0
    %17819 = torch.aten.unsqueeze %17818, %int0_15371 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17819, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15372 = torch.constant.int 2
    %17820 = torch.aten.unsqueeze %17819, %int2_15372 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17820, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15373 = torch.constant.int 3
    %int0_15374 = torch.constant.int 0
    %int9223372036854775807_15375 = torch.constant.int 9223372036854775807
    %int1_15376 = torch.constant.int 1
    %17821 = torch.aten.slice.Tensor %17820, %int3_15373, %int0_15374, %int9223372036854775807_15375, %int1_15376 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17821, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17822 = torch_c.to_builtin_tensor %17575 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15377 = arith.constant 1 : index
    %dim_15378 = tensor.dim %17822, %c1_15377 : tensor<4x?x1x128xf16>
    %17823 = flow.tensor.bitcast %17822 : tensor<4x?x1x128xf16>{%dim_15378} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15378}
    %17824 = torch_c.from_builtin_tensor %17823 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17825 = torch.aten.mul.Tensor %17824, %17821 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17826 = torch_c.to_builtin_tensor %17825 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15379 = arith.constant 1 : index
    %dim_15380 = tensor.dim %17826, %c1_15379 : tensor<4x?x1x64xcomplex<f32>>
    %17827 = flow.tensor.bitcast %17826 : tensor<4x?x1x64xcomplex<f32>>{%dim_15380} -> tensor<4x?x1x128xf32>{%dim_15380}
    %17828 = torch_c.from_builtin_tensor %17827 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15381 = torch.constant.int 5
    %17829 = torch.prims.convert_element_type %17828, %int5_15381 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15382 = torch.constant.int 1
    %17830 = torch.aten.size.int %17461, %int1_15382 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15383 = torch.constant.int 0
    %17831 = torch.aten.add.int %int0_15383, %17830 : !torch.int, !torch.int -> !torch.int
    %int0_15384 = torch.constant.int 0
    %int0_15385 = torch.constant.int 0
    %int1_15386 = torch.constant.int 1
    %17832 = torch.aten.slice.Tensor %17784, %int0_15384, %int0_15385, %17831, %int1_15386 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17832, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15387 = torch.constant.int 1
    %int0_15388 = torch.constant.int 0
    %int9223372036854775807_15389 = torch.constant.int 9223372036854775807
    %int1_15390 = torch.constant.int 1
    %17833 = torch.aten.slice.Tensor %17832, %int1_15387, %int0_15388, %int9223372036854775807_15389, %int1_15390 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17833, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15391 = torch.constant.int 0
    %17834 = torch.aten.unsqueeze %17833, %int0_15391 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17834, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15392 = torch.constant.int 2
    %17835 = torch.aten.unsqueeze %17834, %int2_15392 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17835, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15393 = torch.constant.int 3
    %int0_15394 = torch.constant.int 0
    %int9223372036854775807_15395 = torch.constant.int 9223372036854775807
    %int1_15396 = torch.constant.int 1
    %17836 = torch.aten.slice.Tensor %17835, %int3_15393, %int0_15394, %int9223372036854775807_15395, %int1_15396 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17836, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17837 = torch_c.to_builtin_tensor %17577 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15397 = arith.constant 1 : index
    %dim_15398 = tensor.dim %17837, %c1_15397 : tensor<4x?x1x128xf16>
    %17838 = flow.tensor.bitcast %17837 : tensor<4x?x1x128xf16>{%dim_15398} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15398}
    %17839 = torch_c.from_builtin_tensor %17838 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17840 = torch.aten.mul.Tensor %17839, %17836 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17841 = torch_c.to_builtin_tensor %17840 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15399 = arith.constant 1 : index
    %dim_15400 = tensor.dim %17841, %c1_15399 : tensor<4x?x1x64xcomplex<f32>>
    %17842 = flow.tensor.bitcast %17841 : tensor<4x?x1x64xcomplex<f32>>{%dim_15400} -> tensor<4x?x1x128xf32>{%dim_15400}
    %17843 = torch_c.from_builtin_tensor %17842 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15401 = torch.constant.int 5
    %17844 = torch.prims.convert_element_type %17843, %int5_15401 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15402 = torch.constant.int 1
    %17845 = torch.aten.size.int %17467, %int1_15402 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15403 = torch.constant.int 0
    %17846 = torch.aten.add.int %int0_15403, %17845 : !torch.int, !torch.int -> !torch.int
    %int0_15404 = torch.constant.int 0
    %int0_15405 = torch.constant.int 0
    %int1_15406 = torch.constant.int 1
    %17847 = torch.aten.slice.Tensor %17787, %int0_15404, %int0_15405, %17846, %int1_15406 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17847, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15407 = torch.constant.int 1
    %int0_15408 = torch.constant.int 0
    %int9223372036854775807_15409 = torch.constant.int 9223372036854775807
    %int1_15410 = torch.constant.int 1
    %17848 = torch.aten.slice.Tensor %17847, %int1_15407, %int0_15408, %int9223372036854775807_15409, %int1_15410 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17848, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15411 = torch.constant.int 0
    %17849 = torch.aten.unsqueeze %17848, %int0_15411 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17849, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15412 = torch.constant.int 2
    %17850 = torch.aten.unsqueeze %17849, %int2_15412 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17850, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15413 = torch.constant.int 3
    %int0_15414 = torch.constant.int 0
    %int9223372036854775807_15415 = torch.constant.int 9223372036854775807
    %int1_15416 = torch.constant.int 1
    %17851 = torch.aten.slice.Tensor %17850, %int3_15413, %int0_15414, %int9223372036854775807_15415, %int1_15416 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17851, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17852 = torch_c.to_builtin_tensor %17579 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15417 = arith.constant 1 : index
    %dim_15418 = tensor.dim %17852, %c1_15417 : tensor<4x?x1x128xf16>
    %17853 = flow.tensor.bitcast %17852 : tensor<4x?x1x128xf16>{%dim_15418} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15418}
    %17854 = torch_c.from_builtin_tensor %17853 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17855 = torch.aten.mul.Tensor %17854, %17851 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17856 = torch_c.to_builtin_tensor %17855 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15419 = arith.constant 1 : index
    %dim_15420 = tensor.dim %17856, %c1_15419 : tensor<4x?x1x64xcomplex<f32>>
    %17857 = flow.tensor.bitcast %17856 : tensor<4x?x1x64xcomplex<f32>>{%dim_15420} -> tensor<4x?x1x128xf32>{%dim_15420}
    %17858 = torch_c.from_builtin_tensor %17857 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15421 = torch.constant.int 5
    %17859 = torch.prims.convert_element_type %17858, %int5_15421 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15422 = torch.constant.int 1
    %17860 = torch.aten.size.int %17473, %int1_15422 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15423 = torch.constant.int 0
    %17861 = torch.aten.add.int %int0_15423, %17860 : !torch.int, !torch.int -> !torch.int
    %int0_15424 = torch.constant.int 0
    %int0_15425 = torch.constant.int 0
    %int1_15426 = torch.constant.int 1
    %17862 = torch.aten.slice.Tensor %17790, %int0_15424, %int0_15425, %17861, %int1_15426 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17862, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15427 = torch.constant.int 1
    %int0_15428 = torch.constant.int 0
    %int9223372036854775807_15429 = torch.constant.int 9223372036854775807
    %int1_15430 = torch.constant.int 1
    %17863 = torch.aten.slice.Tensor %17862, %int1_15427, %int0_15428, %int9223372036854775807_15429, %int1_15430 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17863, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15431 = torch.constant.int 0
    %17864 = torch.aten.unsqueeze %17863, %int0_15431 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17864, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15432 = torch.constant.int 2
    %17865 = torch.aten.unsqueeze %17864, %int2_15432 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17865, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15433 = torch.constant.int 3
    %int0_15434 = torch.constant.int 0
    %int9223372036854775807_15435 = torch.constant.int 9223372036854775807
    %int1_15436 = torch.constant.int 1
    %17866 = torch.aten.slice.Tensor %17865, %int3_15433, %int0_15434, %int9223372036854775807_15435, %int1_15436 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17866, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17867 = torch_c.to_builtin_tensor %17581 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15437 = arith.constant 1 : index
    %dim_15438 = tensor.dim %17867, %c1_15437 : tensor<4x?x1x128xf16>
    %17868 = flow.tensor.bitcast %17867 : tensor<4x?x1x128xf16>{%dim_15438} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15438}
    %17869 = torch_c.from_builtin_tensor %17868 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17870 = torch.aten.mul.Tensor %17869, %17866 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17871 = torch_c.to_builtin_tensor %17870 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15439 = arith.constant 1 : index
    %dim_15440 = tensor.dim %17871, %c1_15439 : tensor<4x?x1x64xcomplex<f32>>
    %17872 = flow.tensor.bitcast %17871 : tensor<4x?x1x64xcomplex<f32>>{%dim_15440} -> tensor<4x?x1x128xf32>{%dim_15440}
    %17873 = torch_c.from_builtin_tensor %17872 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15441 = torch.constant.int 5
    %17874 = torch.prims.convert_element_type %17873, %int5_15441 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15442 = torch.constant.int 1
    %17875 = torch.aten.size.int %17479, %int1_15442 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15443 = torch.constant.int 0
    %17876 = torch.aten.add.int %int0_15443, %17875 : !torch.int, !torch.int -> !torch.int
    %int0_15444 = torch.constant.int 0
    %int0_15445 = torch.constant.int 0
    %int1_15446 = torch.constant.int 1
    %17877 = torch.aten.slice.Tensor %17793, %int0_15444, %int0_15445, %17876, %int1_15446 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17877, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15447 = torch.constant.int 1
    %int0_15448 = torch.constant.int 0
    %int9223372036854775807_15449 = torch.constant.int 9223372036854775807
    %int1_15450 = torch.constant.int 1
    %17878 = torch.aten.slice.Tensor %17877, %int1_15447, %int0_15448, %int9223372036854775807_15449, %int1_15450 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17878, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15451 = torch.constant.int 0
    %17879 = torch.aten.unsqueeze %17878, %int0_15451 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17879, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15452 = torch.constant.int 2
    %17880 = torch.aten.unsqueeze %17879, %int2_15452 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17880, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15453 = torch.constant.int 3
    %int0_15454 = torch.constant.int 0
    %int9223372036854775807_15455 = torch.constant.int 9223372036854775807
    %int1_15456 = torch.constant.int 1
    %17881 = torch.aten.slice.Tensor %17880, %int3_15453, %int0_15454, %int9223372036854775807_15455, %int1_15456 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17881, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17882 = torch_c.to_builtin_tensor %17583 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15457 = arith.constant 1 : index
    %dim_15458 = tensor.dim %17882, %c1_15457 : tensor<4x?x1x128xf16>
    %17883 = flow.tensor.bitcast %17882 : tensor<4x?x1x128xf16>{%dim_15458} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15458}
    %17884 = torch_c.from_builtin_tensor %17883 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17885 = torch.aten.mul.Tensor %17884, %17881 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17886 = torch_c.to_builtin_tensor %17885 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15459 = arith.constant 1 : index
    %dim_15460 = tensor.dim %17886, %c1_15459 : tensor<4x?x1x64xcomplex<f32>>
    %17887 = flow.tensor.bitcast %17886 : tensor<4x?x1x64xcomplex<f32>>{%dim_15460} -> tensor<4x?x1x128xf32>{%dim_15460}
    %17888 = torch_c.from_builtin_tensor %17887 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15461 = torch.constant.int 5
    %17889 = torch.prims.convert_element_type %17888, %int5_15461 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15462 = torch.constant.int 1
    %17890 = torch.aten.size.int %17485, %int1_15462 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15463 = torch.constant.int 0
    %17891 = torch.aten.add.int %int0_15463, %17890 : !torch.int, !torch.int -> !torch.int
    %int0_15464 = torch.constant.int 0
    %int0_15465 = torch.constant.int 0
    %int1_15466 = torch.constant.int 1
    %17892 = torch.aten.slice.Tensor %17796, %int0_15464, %int0_15465, %17891, %int1_15466 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17892, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15467 = torch.constant.int 1
    %int0_15468 = torch.constant.int 0
    %int9223372036854775807_15469 = torch.constant.int 9223372036854775807
    %int1_15470 = torch.constant.int 1
    %17893 = torch.aten.slice.Tensor %17892, %int1_15467, %int0_15468, %int9223372036854775807_15469, %int1_15470 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17893, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15471 = torch.constant.int 0
    %17894 = torch.aten.unsqueeze %17893, %int0_15471 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17894, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15472 = torch.constant.int 2
    %17895 = torch.aten.unsqueeze %17894, %int2_15472 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17895, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15473 = torch.constant.int 3
    %int0_15474 = torch.constant.int 0
    %int9223372036854775807_15475 = torch.constant.int 9223372036854775807
    %int1_15476 = torch.constant.int 1
    %17896 = torch.aten.slice.Tensor %17895, %int3_15473, %int0_15474, %int9223372036854775807_15475, %int1_15476 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17896, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17897 = torch_c.to_builtin_tensor %17585 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15477 = arith.constant 1 : index
    %dim_15478 = tensor.dim %17897, %c1_15477 : tensor<4x?x1x128xf16>
    %17898 = flow.tensor.bitcast %17897 : tensor<4x?x1x128xf16>{%dim_15478} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15478}
    %17899 = torch_c.from_builtin_tensor %17898 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17900 = torch.aten.mul.Tensor %17899, %17896 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17901 = torch_c.to_builtin_tensor %17900 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15479 = arith.constant 1 : index
    %dim_15480 = tensor.dim %17901, %c1_15479 : tensor<4x?x1x64xcomplex<f32>>
    %17902 = flow.tensor.bitcast %17901 : tensor<4x?x1x64xcomplex<f32>>{%dim_15480} -> tensor<4x?x1x128xf32>{%dim_15480}
    %17903 = torch_c.from_builtin_tensor %17902 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15481 = torch.constant.int 5
    %17904 = torch.prims.convert_element_type %17903, %int5_15481 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_15482 = torch.constant.int 1
    %17905 = torch.aten.size.int %17491, %int1_15482 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_15483 = torch.constant.int 0
    %17906 = torch.aten.add.int %int0_15483, %17905 : !torch.int, !torch.int -> !torch.int
    %int0_15484 = torch.constant.int 0
    %int0_15485 = torch.constant.int 0
    %int1_15486 = torch.constant.int 1
    %17907 = torch.aten.slice.Tensor %17799, %int0_15484, %int0_15485, %17906, %int1_15486 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17907, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_15487 = torch.constant.int 1
    %int0_15488 = torch.constant.int 0
    %int9223372036854775807_15489 = torch.constant.int 9223372036854775807
    %int1_15490 = torch.constant.int 1
    %17908 = torch.aten.slice.Tensor %17907, %int1_15487, %int0_15488, %int9223372036854775807_15489, %int1_15490 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %17908, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_15491 = torch.constant.int 0
    %17909 = torch.aten.unsqueeze %17908, %int0_15491 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %17909, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_15492 = torch.constant.int 2
    %17910 = torch.aten.unsqueeze %17909, %int2_15492 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17910, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_15493 = torch.constant.int 3
    %int0_15494 = torch.constant.int 0
    %int9223372036854775807_15495 = torch.constant.int 9223372036854775807
    %int1_15496 = torch.constant.int 1
    %17911 = torch.aten.slice.Tensor %17910, %int3_15493, %int0_15494, %int9223372036854775807_15495, %int1_15496 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17911, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %17912 = torch_c.to_builtin_tensor %17587 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_15497 = arith.constant 1 : index
    %dim_15498 = tensor.dim %17912, %c1_15497 : tensor<4x?x1x128xf16>
    %17913 = flow.tensor.bitcast %17912 : tensor<4x?x1x128xf16>{%dim_15498} -> tensor<4x?x1x64xcomplex<f16>>{%dim_15498}
    %17914 = torch_c.from_builtin_tensor %17913 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %17914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %17915 = torch.aten.mul.Tensor %17914, %17911 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %17915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %17916 = torch_c.to_builtin_tensor %17915 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_15499 = arith.constant 1 : index
    %dim_15500 = tensor.dim %17916, %c1_15499 : tensor<4x?x1x64xcomplex<f32>>
    %17917 = flow.tensor.bitcast %17916 : tensor<4x?x1x64xcomplex<f32>>{%dim_15500} -> tensor<4x?x1x128xf32>{%dim_15500}
    %17918 = torch_c.from_builtin_tensor %17917 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %17918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_15501 = torch.constant.int 5
    %17919 = torch.prims.convert_element_type %17918, %int5_15501 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %17919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_15502 = torch.constant.int 64
    %17920 = torch.aten.mul.Scalar %2364, %int64_15502 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17920, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15503 = torch.constant.int 64
    %17921 = torch.aten.mul.Scalar %2367, %int64_15503 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17921, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15504 = torch.constant.int 64
    %17922 = torch.aten.mul.Scalar %2370, %int64_15504 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17922, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15505 = torch.constant.int 64
    %17923 = torch.aten.mul.Scalar %2373, %int64_15505 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17923, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15506 = torch.constant.int 64
    %17924 = torch.aten.mul.Scalar %2376, %int64_15506 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17924, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15507 = torch.constant.int 64
    %17925 = torch.aten.mul.Scalar %2379, %int64_15507 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17925, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15508 = torch.constant.int 64
    %17926 = torch.aten.mul.Scalar %2382, %int64_15508 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17926, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_15509 = torch.constant.int 64
    %17927 = torch.aten.mul.Scalar %2385, %int64_15509 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17927, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15510 = torch.constant.int 16
    %int1_15511 = torch.constant.int 1
    %17928 = torch.aten.add.Scalar %17920, %int16_15510, %int1_15511 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17928, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15512 = torch.constant.int 16
    %int1_15513 = torch.constant.int 1
    %17929 = torch.aten.add.Scalar %17921, %int16_15512, %int1_15513 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17929, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15514 = torch.constant.int 16
    %int1_15515 = torch.constant.int 1
    %17930 = torch.aten.add.Scalar %17922, %int16_15514, %int1_15515 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17930, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15516 = torch.constant.int 16
    %int1_15517 = torch.constant.int 1
    %17931 = torch.aten.add.Scalar %17923, %int16_15516, %int1_15517 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17931, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15518 = torch.constant.int 16
    %int1_15519 = torch.constant.int 1
    %17932 = torch.aten.add.Scalar %17924, %int16_15518, %int1_15519 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17932, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15520 = torch.constant.int 16
    %int1_15521 = torch.constant.int 1
    %17933 = torch.aten.add.Scalar %17925, %int16_15520, %int1_15521 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17933, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15522 = torch.constant.int 16
    %int1_15523 = torch.constant.int 1
    %17934 = torch.aten.add.Scalar %17926, %int16_15522, %int1_15523 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17934, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int16_15524 = torch.constant.int 16
    %int1_15525 = torch.constant.int 1
    %17935 = torch.aten.add.Scalar %17927, %int16_15524, %int1_15525 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %17935, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_15526 = torch.constant.int 4
    %int16_15527 = torch.constant.int 16
    %int1_15528 = torch.constant.int 1
    %int128_15529 = torch.constant.int 128
    %17936 = torch.prim.ListConstruct %int4_15526, %3095, %int16_15527, %int1_15528, %int128_15529 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17937 = torch.aten.view %17814, %17936 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17937, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15530 = torch.constant.int 4
    %int16_15531 = torch.constant.int 16
    %int1_15532 = torch.constant.int 1
    %int128_15533 = torch.constant.int 128
    %17938 = torch.prim.ListConstruct %int4_15530, %3095, %int16_15531, %int1_15532, %int128_15533 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17939 = torch.aten.view %17829, %17938 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17939, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15534 = torch.constant.int 4
    %int16_15535 = torch.constant.int 16
    %int1_15536 = torch.constant.int 1
    %int128_15537 = torch.constant.int 128
    %17940 = torch.prim.ListConstruct %int4_15534, %3095, %int16_15535, %int1_15536, %int128_15537 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17941 = torch.aten.view %17844, %17940 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17941, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15538 = torch.constant.int 4
    %int16_15539 = torch.constant.int 16
    %int1_15540 = torch.constant.int 1
    %int128_15541 = torch.constant.int 128
    %17942 = torch.prim.ListConstruct %int4_15538, %3095, %int16_15539, %int1_15540, %int128_15541 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17943 = torch.aten.view %17859, %17942 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17943, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15542 = torch.constant.int 4
    %int16_15543 = torch.constant.int 16
    %int1_15544 = torch.constant.int 1
    %int128_15545 = torch.constant.int 128
    %17944 = torch.prim.ListConstruct %int4_15542, %3095, %int16_15543, %int1_15544, %int128_15545 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17945 = torch.aten.view %17874, %17944 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17945, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15546 = torch.constant.int 4
    %int16_15547 = torch.constant.int 16
    %int1_15548 = torch.constant.int 1
    %int128_15549 = torch.constant.int 128
    %17946 = torch.prim.ListConstruct %int4_15546, %3095, %int16_15547, %int1_15548, %int128_15549 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17947 = torch.aten.view %17889, %17946 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17947, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15550 = torch.constant.int 4
    %int16_15551 = torch.constant.int 16
    %int1_15552 = torch.constant.int 1
    %int128_15553 = torch.constant.int 128
    %17948 = torch.prim.ListConstruct %int4_15550, %3095, %int16_15551, %int1_15552, %int128_15553 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17949 = torch.aten.view %17904, %17948 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17949, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15554 = torch.constant.int 4
    %int16_15555 = torch.constant.int 16
    %int1_15556 = torch.constant.int 1
    %int128_15557 = torch.constant.int 128
    %17950 = torch.prim.ListConstruct %int4_15554, %3095, %int16_15555, %int1_15556, %int128_15557 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17951 = torch.aten.view %17919, %17950 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %17951, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15558 = torch.constant.int 4
    %17952 = torch.aten.mul.int %int4_15558, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15559 = torch.constant.int 16
    %int1_15560 = torch.constant.int 1
    %int128_15561 = torch.constant.int 128
    %17953 = torch.prim.ListConstruct %17952, %int16_15559, %int1_15560, %int128_15561 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17954 = torch.aten.view %17937, %17953 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17954, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15562 = torch.constant.int 4
    %17955 = torch.aten.mul.int %int4_15562, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15563 = torch.constant.int 16
    %int1_15564 = torch.constant.int 1
    %int128_15565 = torch.constant.int 128
    %17956 = torch.prim.ListConstruct %17955, %int16_15563, %int1_15564, %int128_15565 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17957 = torch.aten.view %17939, %17956 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17957, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15566 = torch.constant.int 4
    %17958 = torch.aten.mul.int %int4_15566, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15567 = torch.constant.int 16
    %int1_15568 = torch.constant.int 1
    %int128_15569 = torch.constant.int 128
    %17959 = torch.prim.ListConstruct %17958, %int16_15567, %int1_15568, %int128_15569 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17960 = torch.aten.view %17941, %17959 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17960, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15570 = torch.constant.int 4
    %17961 = torch.aten.mul.int %int4_15570, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15571 = torch.constant.int 16
    %int1_15572 = torch.constant.int 1
    %int128_15573 = torch.constant.int 128
    %17962 = torch.prim.ListConstruct %17961, %int16_15571, %int1_15572, %int128_15573 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17963 = torch.aten.view %17943, %17962 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17963, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15574 = torch.constant.int 4
    %17964 = torch.aten.mul.int %int4_15574, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15575 = torch.constant.int 16
    %int1_15576 = torch.constant.int 1
    %int128_15577 = torch.constant.int 128
    %17965 = torch.prim.ListConstruct %17964, %int16_15575, %int1_15576, %int128_15577 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17966 = torch.aten.view %17945, %17965 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17966, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15578 = torch.constant.int 4
    %17967 = torch.aten.mul.int %int4_15578, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15579 = torch.constant.int 16
    %int1_15580 = torch.constant.int 1
    %int128_15581 = torch.constant.int 128
    %17968 = torch.prim.ListConstruct %17967, %int16_15579, %int1_15580, %int128_15581 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17969 = torch.aten.view %17947, %17968 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17969, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15582 = torch.constant.int 4
    %17970 = torch.aten.mul.int %int4_15582, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15583 = torch.constant.int 16
    %int1_15584 = torch.constant.int 1
    %int128_15585 = torch.constant.int 128
    %17971 = torch.prim.ListConstruct %17970, %int16_15583, %int1_15584, %int128_15585 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17972 = torch.aten.view %17949, %17971 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17972, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15586 = torch.constant.int 4
    %17973 = torch.aten.mul.int %int4_15586, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15587 = torch.constant.int 16
    %int1_15588 = torch.constant.int 1
    %int128_15589 = torch.constant.int 128
    %17974 = torch.prim.ListConstruct %17973, %int16_15587, %int1_15588, %int128_15589 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %17975 = torch.aten.view %17951, %17974 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %17975, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15590 = torch.constant.int 4
    %17976 = torch.aten.mul.int %int4_15590, %3095 : !torch.int, !torch.int -> !torch.int
    %17977 = torch.prim.ListConstruct %17976 : (!torch.int) -> !torch.list<int>
    %17978 = torch.aten.view %17928, %17977 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17978, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15591 = torch.constant.int 4
    %17979 = torch.aten.mul.int %int4_15591, %3095 : !torch.int, !torch.int -> !torch.int
    %17980 = torch.prim.ListConstruct %17979 : (!torch.int) -> !torch.list<int>
    %17981 = torch.aten.view %17929, %17980 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17981, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15592 = torch.constant.int 4
    %17982 = torch.aten.mul.int %int4_15592, %3095 : !torch.int, !torch.int -> !torch.int
    %17983 = torch.prim.ListConstruct %17982 : (!torch.int) -> !torch.list<int>
    %17984 = torch.aten.view %17930, %17983 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17984, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15593 = torch.constant.int 4
    %17985 = torch.aten.mul.int %int4_15593, %3095 : !torch.int, !torch.int -> !torch.int
    %17986 = torch.prim.ListConstruct %17985 : (!torch.int) -> !torch.list<int>
    %17987 = torch.aten.view %17931, %17986 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17987, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15594 = torch.constant.int 4
    %17988 = torch.aten.mul.int %int4_15594, %3095 : !torch.int, !torch.int -> !torch.int
    %17989 = torch.prim.ListConstruct %17988 : (!torch.int) -> !torch.list<int>
    %17990 = torch.aten.view %17932, %17989 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17990, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15595 = torch.constant.int 4
    %17991 = torch.aten.mul.int %int4_15595, %3095 : !torch.int, !torch.int -> !torch.int
    %17992 = torch.prim.ListConstruct %17991 : (!torch.int) -> !torch.list<int>
    %17993 = torch.aten.view %17933, %17992 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17993, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15596 = torch.constant.int 4
    %17994 = torch.aten.mul.int %int4_15596, %3095 : !torch.int, !torch.int -> !torch.int
    %17995 = torch.prim.ListConstruct %17994 : (!torch.int) -> !torch.list<int>
    %17996 = torch.aten.view %17934, %17995 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17996, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15597 = torch.constant.int 4
    %17997 = torch.aten.mul.int %int4_15597, %3095 : !torch.int, !torch.int -> !torch.int
    %17998 = torch.prim.ListConstruct %17997 : (!torch.int) -> !torch.list<int>
    %17999 = torch.aten.view %17935, %17998 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %17999, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15598 = torch.constant.int 4
    %int16_15599 = torch.constant.int 16
    %int1_15600 = torch.constant.int 1
    %int128_15601 = torch.constant.int 128
    %18000 = torch.prim.ListConstruct %int4_15598, %3095, %int16_15599, %int1_15600, %int128_15601 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18001 = torch.aten.view %17589, %18000 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18001, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15602 = torch.constant.int 4
    %int16_15603 = torch.constant.int 16
    %int1_15604 = torch.constant.int 1
    %int128_15605 = torch.constant.int 128
    %18002 = torch.prim.ListConstruct %int4_15602, %3095, %int16_15603, %int1_15604, %int128_15605 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18003 = torch.aten.view %17591, %18002 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18003, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15606 = torch.constant.int 4
    %int16_15607 = torch.constant.int 16
    %int1_15608 = torch.constant.int 1
    %int128_15609 = torch.constant.int 128
    %18004 = torch.prim.ListConstruct %int4_15606, %3095, %int16_15607, %int1_15608, %int128_15609 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18005 = torch.aten.view %17593, %18004 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18005, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15610 = torch.constant.int 4
    %int16_15611 = torch.constant.int 16
    %int1_15612 = torch.constant.int 1
    %int128_15613 = torch.constant.int 128
    %18006 = torch.prim.ListConstruct %int4_15610, %3095, %int16_15611, %int1_15612, %int128_15613 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18007 = torch.aten.view %17595, %18006 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18007, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15614 = torch.constant.int 4
    %int16_15615 = torch.constant.int 16
    %int1_15616 = torch.constant.int 1
    %int128_15617 = torch.constant.int 128
    %18008 = torch.prim.ListConstruct %int4_15614, %3095, %int16_15615, %int1_15616, %int128_15617 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18009 = torch.aten.view %17597, %18008 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18009, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15618 = torch.constant.int 4
    %int16_15619 = torch.constant.int 16
    %int1_15620 = torch.constant.int 1
    %int128_15621 = torch.constant.int 128
    %18010 = torch.prim.ListConstruct %int4_15618, %3095, %int16_15619, %int1_15620, %int128_15621 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18011 = torch.aten.view %17599, %18010 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18011, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15622 = torch.constant.int 4
    %int16_15623 = torch.constant.int 16
    %int1_15624 = torch.constant.int 1
    %int128_15625 = torch.constant.int 128
    %18012 = torch.prim.ListConstruct %int4_15622, %3095, %int16_15623, %int1_15624, %int128_15625 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18013 = torch.aten.view %17601, %18012 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18013, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15626 = torch.constant.int 4
    %int16_15627 = torch.constant.int 16
    %int1_15628 = torch.constant.int 1
    %int128_15629 = torch.constant.int 128
    %18014 = torch.prim.ListConstruct %int4_15626, %3095, %int16_15627, %int1_15628, %int128_15629 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18015 = torch.aten.view %17603, %18014 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %18015, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_15630 = torch.constant.int 4
    %18016 = torch.aten.mul.int %int4_15630, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15631 = torch.constant.int 16
    %int1_15632 = torch.constant.int 1
    %int128_15633 = torch.constant.int 128
    %18017 = torch.prim.ListConstruct %18016, %int16_15631, %int1_15632, %int128_15633 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18018 = torch.aten.view %18001, %18017 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18018, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15634 = torch.constant.int 4
    %18019 = torch.aten.mul.int %int4_15634, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15635 = torch.constant.int 16
    %int1_15636 = torch.constant.int 1
    %int128_15637 = torch.constant.int 128
    %18020 = torch.prim.ListConstruct %18019, %int16_15635, %int1_15636, %int128_15637 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18021 = torch.aten.view %18003, %18020 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18021, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15638 = torch.constant.int 4
    %18022 = torch.aten.mul.int %int4_15638, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15639 = torch.constant.int 16
    %int1_15640 = torch.constant.int 1
    %int128_15641 = torch.constant.int 128
    %18023 = torch.prim.ListConstruct %18022, %int16_15639, %int1_15640, %int128_15641 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18024 = torch.aten.view %18005, %18023 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18024, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15642 = torch.constant.int 4
    %18025 = torch.aten.mul.int %int4_15642, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15643 = torch.constant.int 16
    %int1_15644 = torch.constant.int 1
    %int128_15645 = torch.constant.int 128
    %18026 = torch.prim.ListConstruct %18025, %int16_15643, %int1_15644, %int128_15645 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18027 = torch.aten.view %18007, %18026 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18027, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15646 = torch.constant.int 4
    %18028 = torch.aten.mul.int %int4_15646, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15647 = torch.constant.int 16
    %int1_15648 = torch.constant.int 1
    %int128_15649 = torch.constant.int 128
    %18029 = torch.prim.ListConstruct %18028, %int16_15647, %int1_15648, %int128_15649 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18030 = torch.aten.view %18009, %18029 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18030, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15650 = torch.constant.int 4
    %18031 = torch.aten.mul.int %int4_15650, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15651 = torch.constant.int 16
    %int1_15652 = torch.constant.int 1
    %int128_15653 = torch.constant.int 128
    %18032 = torch.prim.ListConstruct %18031, %int16_15651, %int1_15652, %int128_15653 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18033 = torch.aten.view %18011, %18032 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18033, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15654 = torch.constant.int 4
    %18034 = torch.aten.mul.int %int4_15654, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15655 = torch.constant.int 16
    %int1_15656 = torch.constant.int 1
    %int128_15657 = torch.constant.int 128
    %18035 = torch.prim.ListConstruct %18034, %int16_15655, %int1_15656, %int128_15657 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18036 = torch.aten.view %18013, %18035 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18036, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_15658 = torch.constant.int 4
    %18037 = torch.aten.mul.int %int4_15658, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_15659 = torch.constant.int 16
    %int1_15660 = torch.constant.int 1
    %int128_15661 = torch.constant.int 128
    %18038 = torch.prim.ListConstruct %18037, %int16_15659, %int1_15660, %int128_15661 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18039 = torch.aten.view %18015, %18038 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18039, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_15662 = torch.constant.int 1
    %int1_15663 = torch.constant.int 1
    %18040 = torch.aten.add.Scalar %17928, %int1_15662, %int1_15663 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18040, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15664 = torch.constant.int 1
    %int1_15665 = torch.constant.int 1
    %18041 = torch.aten.add.Scalar %17929, %int1_15664, %int1_15665 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18041, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15666 = torch.constant.int 1
    %int1_15667 = torch.constant.int 1
    %18042 = torch.aten.add.Scalar %17930, %int1_15666, %int1_15667 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18042, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15668 = torch.constant.int 1
    %int1_15669 = torch.constant.int 1
    %18043 = torch.aten.add.Scalar %17931, %int1_15668, %int1_15669 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18043, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15670 = torch.constant.int 1
    %int1_15671 = torch.constant.int 1
    %18044 = torch.aten.add.Scalar %17932, %int1_15670, %int1_15671 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18044, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15672 = torch.constant.int 1
    %int1_15673 = torch.constant.int 1
    %18045 = torch.aten.add.Scalar %17933, %int1_15672, %int1_15673 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18045, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15674 = torch.constant.int 1
    %int1_15675 = torch.constant.int 1
    %18046 = torch.aten.add.Scalar %17934, %int1_15674, %int1_15675 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18046, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_15676 = torch.constant.int 1
    %int1_15677 = torch.constant.int 1
    %18047 = torch.aten.add.Scalar %17935, %int1_15676, %int1_15677 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %18047, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_15678 = torch.constant.int 4
    %18048 = torch.aten.mul.int %int4_15678, %3095 : !torch.int, !torch.int -> !torch.int
    %18049 = torch.prim.ListConstruct %18048 : (!torch.int) -> !torch.list<int>
    %18050 = torch.aten.view %18040, %18049 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18050, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15679 = torch.constant.int 4
    %18051 = torch.aten.mul.int %int4_15679, %3095 : !torch.int, !torch.int -> !torch.int
    %18052 = torch.prim.ListConstruct %18051 : (!torch.int) -> !torch.list<int>
    %18053 = torch.aten.view %18041, %18052 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18053, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15680 = torch.constant.int 4
    %18054 = torch.aten.mul.int %int4_15680, %3095 : !torch.int, !torch.int -> !torch.int
    %18055 = torch.prim.ListConstruct %18054 : (!torch.int) -> !torch.list<int>
    %18056 = torch.aten.view %18042, %18055 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18056, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15681 = torch.constant.int 4
    %18057 = torch.aten.mul.int %int4_15681, %3095 : !torch.int, !torch.int -> !torch.int
    %18058 = torch.prim.ListConstruct %18057 : (!torch.int) -> !torch.list<int>
    %18059 = torch.aten.view %18043, %18058 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18059, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15682 = torch.constant.int 4
    %18060 = torch.aten.mul.int %int4_15682, %3095 : !torch.int, !torch.int -> !torch.int
    %18061 = torch.prim.ListConstruct %18060 : (!torch.int) -> !torch.list<int>
    %18062 = torch.aten.view %18044, %18061 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18062, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15683 = torch.constant.int 4
    %18063 = torch.aten.mul.int %int4_15683, %3095 : !torch.int, !torch.int -> !torch.int
    %18064 = torch.prim.ListConstruct %18063 : (!torch.int) -> !torch.list<int>
    %18065 = torch.aten.view %18045, %18064 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18065, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15684 = torch.constant.int 4
    %18066 = torch.aten.mul.int %int4_15684, %3095 : !torch.int, !torch.int -> !torch.int
    %18067 = torch.prim.ListConstruct %18066 : (!torch.int) -> !torch.list<int>
    %18068 = torch.aten.view %18046, %18067 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18068, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_15685 = torch.constant.int 4
    %18069 = torch.aten.mul.int %int4_15685, %3095 : !torch.int, !torch.int -> !torch.int
    %18070 = torch.prim.ListConstruct %18069 : (!torch.int) -> !torch.list<int>
    %18071 = torch.aten.view %18047, %18070 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18071, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %18072 = torch.prim.ListConstruct %17978, %18050 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15686 = torch.constant.int 0
    %18073 = torch.aten.cat %18072, %int0_15686 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18073, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18074 = torch.prim.ListConstruct %17981, %18053 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15687 = torch.constant.int 0
    %18075 = torch.aten.cat %18074, %int0_15687 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18075, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18076 = torch.prim.ListConstruct %17984, %18056 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15688 = torch.constant.int 0
    %18077 = torch.aten.cat %18076, %int0_15688 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18077, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18078 = torch.prim.ListConstruct %17987, %18059 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15689 = torch.constant.int 0
    %18079 = torch.aten.cat %18078, %int0_15689 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18079, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18080 = torch.prim.ListConstruct %17990, %18062 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15690 = torch.constant.int 0
    %18081 = torch.aten.cat %18080, %int0_15690 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18081, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18082 = torch.prim.ListConstruct %17993, %18065 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15691 = torch.constant.int 0
    %18083 = torch.aten.cat %18082, %int0_15691 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18083, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18084 = torch.prim.ListConstruct %17996, %18068 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15692 = torch.constant.int 0
    %18085 = torch.aten.cat %18084, %int0_15692 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18085, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18086 = torch.prim.ListConstruct %17999, %18071 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_15693 = torch.constant.int 0
    %18087 = torch.aten.cat %18086, %int0_15693 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %18087, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %18088 = torch.prim.ListConstruct %17954, %18018 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15694 = torch.constant.int 0
    %18089 = torch.aten.cat %18088, %int0_15694 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18089, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18090 = torch.prim.ListConstruct %17957, %18021 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15695 = torch.constant.int 0
    %18091 = torch.aten.cat %18090, %int0_15695 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18091, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18092 = torch.prim.ListConstruct %17960, %18024 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15696 = torch.constant.int 0
    %18093 = torch.aten.cat %18092, %int0_15696 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18093, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18094 = torch.prim.ListConstruct %17963, %18027 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15697 = torch.constant.int 0
    %18095 = torch.aten.cat %18094, %int0_15697 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18095, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18096 = torch.prim.ListConstruct %17966, %18030 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15698 = torch.constant.int 0
    %18097 = torch.aten.cat %18096, %int0_15698 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18097, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18098 = torch.prim.ListConstruct %17969, %18033 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15699 = torch.constant.int 0
    %18099 = torch.aten.cat %18098, %int0_15699 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18099, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18100 = torch.prim.ListConstruct %17972, %18036 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15700 = torch.constant.int 0
    %18101 = torch.aten.cat %18100, %int0_15700 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18101, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18102 = torch.prim.ListConstruct %17975, %18039 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_15701 = torch.constant.int 0
    %18103 = torch.aten.cat %18102, %int0_15701 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18103, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15702 = torch.constant.int 32
    %int2_15703 = torch.constant.int 2
    %int16_15704 = torch.constant.int 16
    %int1_15705 = torch.constant.int 1
    %int128_15706 = torch.constant.int 128
    %18104 = torch.prim.ListConstruct %3023, %int32_15702, %int2_15703, %int16_15704, %int1_15705, %int128_15706 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18105 = torch.aten.view %16254, %18104 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18105, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15707 = torch.constant.int 32
    %18106 = torch.aten.mul.int %3023, %int32_15707 : !torch.int, !torch.int -> !torch.int
    %int2_15708 = torch.constant.int 2
    %18107 = torch.aten.mul.int %18106, %int2_15708 : !torch.int, !torch.int -> !torch.int
    %int16_15709 = torch.constant.int 16
    %int1_15710 = torch.constant.int 1
    %int128_15711 = torch.constant.int 128
    %18108 = torch.prim.ListConstruct %18107, %int16_15709, %int1_15710, %int128_15711 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18109 = torch.aten.view %18105, %18108 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18109, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18110 = torch.prim.ListConstruct %18073 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15712 = torch.constant.bool false
    %18111 = torch.aten.index_put %18109, %18110, %18089, %false_15712 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18111, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15713 = torch.constant.int 32
    %int2_15714 = torch.constant.int 2
    %int16_15715 = torch.constant.int 16
    %int1_15716 = torch.constant.int 1
    %int128_15717 = torch.constant.int 128
    %18112 = torch.prim.ListConstruct %3023, %int32_15713, %int2_15714, %int16_15715, %int1_15716, %int128_15717 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18113 = torch.aten.view %18111, %18112 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18113, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15718 = torch.constant.int 131072
    %18114 = torch.prim.ListConstruct %3023, %int131072_15718 : (!torch.int, !torch.int) -> !torch.list<int>
    %18115 = torch.aten.view %18113, %18114 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18115, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15719 = torch.constant.int 32
    %int2_15720 = torch.constant.int 2
    %int16_15721 = torch.constant.int 16
    %int1_15722 = torch.constant.int 1
    %int128_15723 = torch.constant.int 128
    %18116 = torch.prim.ListConstruct %3026, %int32_15719, %int2_15720, %int16_15721, %int1_15722, %int128_15723 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18117 = torch.aten.view %16266, %18116 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18117, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15724 = torch.constant.int 32
    %18118 = torch.aten.mul.int %3026, %int32_15724 : !torch.int, !torch.int -> !torch.int
    %int2_15725 = torch.constant.int 2
    %18119 = torch.aten.mul.int %18118, %int2_15725 : !torch.int, !torch.int -> !torch.int
    %int16_15726 = torch.constant.int 16
    %int1_15727 = torch.constant.int 1
    %int128_15728 = torch.constant.int 128
    %18120 = torch.prim.ListConstruct %18119, %int16_15726, %int1_15727, %int128_15728 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18121 = torch.aten.view %18117, %18120 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18121, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18122 = torch.prim.ListConstruct %18075 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15729 = torch.constant.bool false
    %18123 = torch.aten.index_put %18121, %18122, %18091, %false_15729 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18123, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15730 = torch.constant.int 32
    %int2_15731 = torch.constant.int 2
    %int16_15732 = torch.constant.int 16
    %int1_15733 = torch.constant.int 1
    %int128_15734 = torch.constant.int 128
    %18124 = torch.prim.ListConstruct %3026, %int32_15730, %int2_15731, %int16_15732, %int1_15733, %int128_15734 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18125 = torch.aten.view %18123, %18124 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18125, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15735 = torch.constant.int 131072
    %18126 = torch.prim.ListConstruct %3026, %int131072_15735 : (!torch.int, !torch.int) -> !torch.list<int>
    %18127 = torch.aten.view %18125, %18126 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18127, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15736 = torch.constant.int 32
    %int2_15737 = torch.constant.int 2
    %int16_15738 = torch.constant.int 16
    %int1_15739 = torch.constant.int 1
    %int128_15740 = torch.constant.int 128
    %18128 = torch.prim.ListConstruct %3029, %int32_15736, %int2_15737, %int16_15738, %int1_15739, %int128_15740 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18129 = torch.aten.view %16278, %18128 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18129, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15741 = torch.constant.int 32
    %18130 = torch.aten.mul.int %3029, %int32_15741 : !torch.int, !torch.int -> !torch.int
    %int2_15742 = torch.constant.int 2
    %18131 = torch.aten.mul.int %18130, %int2_15742 : !torch.int, !torch.int -> !torch.int
    %int16_15743 = torch.constant.int 16
    %int1_15744 = torch.constant.int 1
    %int128_15745 = torch.constant.int 128
    %18132 = torch.prim.ListConstruct %18131, %int16_15743, %int1_15744, %int128_15745 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18133 = torch.aten.view %18129, %18132 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18133, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18134 = torch.prim.ListConstruct %18077 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15746 = torch.constant.bool false
    %18135 = torch.aten.index_put %18133, %18134, %18093, %false_15746 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18135, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15747 = torch.constant.int 32
    %int2_15748 = torch.constant.int 2
    %int16_15749 = torch.constant.int 16
    %int1_15750 = torch.constant.int 1
    %int128_15751 = torch.constant.int 128
    %18136 = torch.prim.ListConstruct %3029, %int32_15747, %int2_15748, %int16_15749, %int1_15750, %int128_15751 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18137 = torch.aten.view %18135, %18136 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18137, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15752 = torch.constant.int 131072
    %18138 = torch.prim.ListConstruct %3029, %int131072_15752 : (!torch.int, !torch.int) -> !torch.list<int>
    %18139 = torch.aten.view %18137, %18138 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18139, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15753 = torch.constant.int 32
    %int2_15754 = torch.constant.int 2
    %int16_15755 = torch.constant.int 16
    %int1_15756 = torch.constant.int 1
    %int128_15757 = torch.constant.int 128
    %18140 = torch.prim.ListConstruct %3032, %int32_15753, %int2_15754, %int16_15755, %int1_15756, %int128_15757 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18141 = torch.aten.view %16290, %18140 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18141, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15758 = torch.constant.int 32
    %18142 = torch.aten.mul.int %3032, %int32_15758 : !torch.int, !torch.int -> !torch.int
    %int2_15759 = torch.constant.int 2
    %18143 = torch.aten.mul.int %18142, %int2_15759 : !torch.int, !torch.int -> !torch.int
    %int16_15760 = torch.constant.int 16
    %int1_15761 = torch.constant.int 1
    %int128_15762 = torch.constant.int 128
    %18144 = torch.prim.ListConstruct %18143, %int16_15760, %int1_15761, %int128_15762 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18145 = torch.aten.view %18141, %18144 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18145, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18146 = torch.prim.ListConstruct %18079 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15763 = torch.constant.bool false
    %18147 = torch.aten.index_put %18145, %18146, %18095, %false_15763 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18147, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15764 = torch.constant.int 32
    %int2_15765 = torch.constant.int 2
    %int16_15766 = torch.constant.int 16
    %int1_15767 = torch.constant.int 1
    %int128_15768 = torch.constant.int 128
    %18148 = torch.prim.ListConstruct %3032, %int32_15764, %int2_15765, %int16_15766, %int1_15767, %int128_15768 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18149 = torch.aten.view %18147, %18148 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18149, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15769 = torch.constant.int 131072
    %18150 = torch.prim.ListConstruct %3032, %int131072_15769 : (!torch.int, !torch.int) -> !torch.list<int>
    %18151 = torch.aten.view %18149, %18150 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18151, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15770 = torch.constant.int 32
    %int2_15771 = torch.constant.int 2
    %int16_15772 = torch.constant.int 16
    %int1_15773 = torch.constant.int 1
    %int128_15774 = torch.constant.int 128
    %18152 = torch.prim.ListConstruct %3035, %int32_15770, %int2_15771, %int16_15772, %int1_15773, %int128_15774 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18153 = torch.aten.view %16302, %18152 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18153, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15775 = torch.constant.int 32
    %18154 = torch.aten.mul.int %3035, %int32_15775 : !torch.int, !torch.int -> !torch.int
    %int2_15776 = torch.constant.int 2
    %18155 = torch.aten.mul.int %18154, %int2_15776 : !torch.int, !torch.int -> !torch.int
    %int16_15777 = torch.constant.int 16
    %int1_15778 = torch.constant.int 1
    %int128_15779 = torch.constant.int 128
    %18156 = torch.prim.ListConstruct %18155, %int16_15777, %int1_15778, %int128_15779 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18157 = torch.aten.view %18153, %18156 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18157, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18158 = torch.prim.ListConstruct %18081 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15780 = torch.constant.bool false
    %18159 = torch.aten.index_put %18157, %18158, %18097, %false_15780 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18159, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15781 = torch.constant.int 32
    %int2_15782 = torch.constant.int 2
    %int16_15783 = torch.constant.int 16
    %int1_15784 = torch.constant.int 1
    %int128_15785 = torch.constant.int 128
    %18160 = torch.prim.ListConstruct %3035, %int32_15781, %int2_15782, %int16_15783, %int1_15784, %int128_15785 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18161 = torch.aten.view %18159, %18160 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18161, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15786 = torch.constant.int 131072
    %18162 = torch.prim.ListConstruct %3035, %int131072_15786 : (!torch.int, !torch.int) -> !torch.list<int>
    %18163 = torch.aten.view %18161, %18162 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18163, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15787 = torch.constant.int 32
    %int2_15788 = torch.constant.int 2
    %int16_15789 = torch.constant.int 16
    %int1_15790 = torch.constant.int 1
    %int128_15791 = torch.constant.int 128
    %18164 = torch.prim.ListConstruct %3038, %int32_15787, %int2_15788, %int16_15789, %int1_15790, %int128_15791 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18165 = torch.aten.view %16314, %18164 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18165, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15792 = torch.constant.int 32
    %18166 = torch.aten.mul.int %3038, %int32_15792 : !torch.int, !torch.int -> !torch.int
    %int2_15793 = torch.constant.int 2
    %18167 = torch.aten.mul.int %18166, %int2_15793 : !torch.int, !torch.int -> !torch.int
    %int16_15794 = torch.constant.int 16
    %int1_15795 = torch.constant.int 1
    %int128_15796 = torch.constant.int 128
    %18168 = torch.prim.ListConstruct %18167, %int16_15794, %int1_15795, %int128_15796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18169 = torch.aten.view %18165, %18168 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18169, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18170 = torch.prim.ListConstruct %18083 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15797 = torch.constant.bool false
    %18171 = torch.aten.index_put %18169, %18170, %18099, %false_15797 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18171, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15798 = torch.constant.int 32
    %int2_15799 = torch.constant.int 2
    %int16_15800 = torch.constant.int 16
    %int1_15801 = torch.constant.int 1
    %int128_15802 = torch.constant.int 128
    %18172 = torch.prim.ListConstruct %3038, %int32_15798, %int2_15799, %int16_15800, %int1_15801, %int128_15802 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18173 = torch.aten.view %18171, %18172 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18173, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15803 = torch.constant.int 131072
    %18174 = torch.prim.ListConstruct %3038, %int131072_15803 : (!torch.int, !torch.int) -> !torch.list<int>
    %18175 = torch.aten.view %18173, %18174 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18175, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15804 = torch.constant.int 32
    %int2_15805 = torch.constant.int 2
    %int16_15806 = torch.constant.int 16
    %int1_15807 = torch.constant.int 1
    %int128_15808 = torch.constant.int 128
    %18176 = torch.prim.ListConstruct %3041, %int32_15804, %int2_15805, %int16_15806, %int1_15807, %int128_15808 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18177 = torch.aten.view %16326, %18176 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18177, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15809 = torch.constant.int 32
    %18178 = torch.aten.mul.int %3041, %int32_15809 : !torch.int, !torch.int -> !torch.int
    %int2_15810 = torch.constant.int 2
    %18179 = torch.aten.mul.int %18178, %int2_15810 : !torch.int, !torch.int -> !torch.int
    %int16_15811 = torch.constant.int 16
    %int1_15812 = torch.constant.int 1
    %int128_15813 = torch.constant.int 128
    %18180 = torch.prim.ListConstruct %18179, %int16_15811, %int1_15812, %int128_15813 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18181 = torch.aten.view %18177, %18180 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18181, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18182 = torch.prim.ListConstruct %18085 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15814 = torch.constant.bool false
    %18183 = torch.aten.index_put %18181, %18182, %18101, %false_15814 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18183, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15815 = torch.constant.int 32
    %int2_15816 = torch.constant.int 2
    %int16_15817 = torch.constant.int 16
    %int1_15818 = torch.constant.int 1
    %int128_15819 = torch.constant.int 128
    %18184 = torch.prim.ListConstruct %3041, %int32_15815, %int2_15816, %int16_15817, %int1_15818, %int128_15819 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18185 = torch.aten.view %18183, %18184 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18185, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15820 = torch.constant.int 131072
    %18186 = torch.prim.ListConstruct %3041, %int131072_15820 : (!torch.int, !torch.int) -> !torch.list<int>
    %18187 = torch.aten.view %18185, %18186 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18187, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_15821 = torch.constant.int 32
    %int2_15822 = torch.constant.int 2
    %int16_15823 = torch.constant.int 16
    %int1_15824 = torch.constant.int 1
    %int128_15825 = torch.constant.int 128
    %18188 = torch.prim.ListConstruct %3044, %int32_15821, %int2_15822, %int16_15823, %int1_15824, %int128_15825 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18189 = torch.aten.view %16338, %18188 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18189, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_15826 = torch.constant.int 32
    %18190 = torch.aten.mul.int %3044, %int32_15826 : !torch.int, !torch.int -> !torch.int
    %int2_15827 = torch.constant.int 2
    %18191 = torch.aten.mul.int %18190, %int2_15827 : !torch.int, !torch.int -> !torch.int
    %int16_15828 = torch.constant.int 16
    %int1_15829 = torch.constant.int 1
    %int128_15830 = torch.constant.int 128
    %18192 = torch.prim.ListConstruct %18191, %int16_15828, %int1_15829, %int128_15830 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18193 = torch.aten.view %18189, %18192 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18193, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %18194 = torch.prim.ListConstruct %18087 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_15831 = torch.constant.bool false
    %18195 = torch.aten.index_put %18193, %18194, %18103, %false_15831 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %18195, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_15832 = torch.constant.int 32
    %int2_15833 = torch.constant.int 2
    %int16_15834 = torch.constant.int 16
    %int1_15835 = torch.constant.int 1
    %int128_15836 = torch.constant.int 128
    %18196 = torch.prim.ListConstruct %3044, %int32_15832, %int2_15833, %int16_15834, %int1_15835, %int128_15836 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18197 = torch.aten.view %18195, %18196 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %18197, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_15837 = torch.constant.int 131072
    %18198 = torch.prim.ListConstruct %3044, %int131072_15837 : (!torch.int, !torch.int) -> !torch.list<int>
    %18199 = torch.aten.view %18197, %18198 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %18199, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_15838 = torch.constant.int -2
    %18200 = torch.aten.unsqueeze %17814, %int-2_15838 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15839 = torch.constant.int -2
    %18201 = torch.aten.unsqueeze %17829, %int-2_15839 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15840 = torch.constant.int -2
    %18202 = torch.aten.unsqueeze %17844, %int-2_15840 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15841 = torch.constant.int -2
    %18203 = torch.aten.unsqueeze %17859, %int-2_15841 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15842 = torch.constant.int -2
    %18204 = torch.aten.unsqueeze %17874, %int-2_15842 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15843 = torch.constant.int -2
    %18205 = torch.aten.unsqueeze %17889, %int-2_15843 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15844 = torch.constant.int -2
    %18206 = torch.aten.unsqueeze %17904, %int-2_15844 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15845 = torch.constant.int -2
    %18207 = torch.aten.unsqueeze %17919, %int-2_15845 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_15846 = torch.constant.int 4
    %int1_15847 = torch.constant.int 1
    %int4_15848 = torch.constant.int 4
    %int128_15849 = torch.constant.int 128
    %18208 = torch.prim.ListConstruct %int4_15846, %17800, %int1_15847, %int4_15848, %int128_15849 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15850 = torch.constant.bool false
    %18209 = torch.aten.expand %18200, %18208, %false_15850 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15851 = torch.constant.int 4
    %int1_15852 = torch.constant.int 1
    %int4_15853 = torch.constant.int 4
    %int128_15854 = torch.constant.int 128
    %18210 = torch.prim.ListConstruct %int4_15851, %17800, %int1_15852, %int4_15853, %int128_15854 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15855 = torch.constant.bool false
    %18211 = torch.aten.expand %18201, %18210, %false_15855 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15856 = torch.constant.int 4
    %int1_15857 = torch.constant.int 1
    %int4_15858 = torch.constant.int 4
    %int128_15859 = torch.constant.int 128
    %18212 = torch.prim.ListConstruct %int4_15856, %17800, %int1_15857, %int4_15858, %int128_15859 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15860 = torch.constant.bool false
    %18213 = torch.aten.expand %18202, %18212, %false_15860 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15861 = torch.constant.int 4
    %int1_15862 = torch.constant.int 1
    %int4_15863 = torch.constant.int 4
    %int128_15864 = torch.constant.int 128
    %18214 = torch.prim.ListConstruct %int4_15861, %17800, %int1_15862, %int4_15863, %int128_15864 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15865 = torch.constant.bool false
    %18215 = torch.aten.expand %18203, %18214, %false_15865 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15866 = torch.constant.int 4
    %int1_15867 = torch.constant.int 1
    %int4_15868 = torch.constant.int 4
    %int128_15869 = torch.constant.int 128
    %18216 = torch.prim.ListConstruct %int4_15866, %17800, %int1_15867, %int4_15868, %int128_15869 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15870 = torch.constant.bool false
    %18217 = torch.aten.expand %18204, %18216, %false_15870 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15871 = torch.constant.int 4
    %int1_15872 = torch.constant.int 1
    %int4_15873 = torch.constant.int 4
    %int128_15874 = torch.constant.int 128
    %18218 = torch.prim.ListConstruct %int4_15871, %17800, %int1_15872, %int4_15873, %int128_15874 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15875 = torch.constant.bool false
    %18219 = torch.aten.expand %18205, %18218, %false_15875 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15876 = torch.constant.int 4
    %int1_15877 = torch.constant.int 1
    %int4_15878 = torch.constant.int 4
    %int128_15879 = torch.constant.int 128
    %18220 = torch.prim.ListConstruct %int4_15876, %17800, %int1_15877, %int4_15878, %int128_15879 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15880 = torch.constant.bool false
    %18221 = torch.aten.expand %18206, %18220, %false_15880 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15881 = torch.constant.int 4
    %int1_15882 = torch.constant.int 1
    %int4_15883 = torch.constant.int 4
    %int128_15884 = torch.constant.int 128
    %18222 = torch.prim.ListConstruct %int4_15881, %17800, %int1_15882, %int4_15883, %int128_15884 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15885 = torch.constant.bool false
    %18223 = torch.aten.expand %18207, %18222, %false_15885 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15886 = torch.constant.int 4
    %int4_15887 = torch.constant.int 4
    %int128_15888 = torch.constant.int 128
    %18224 = torch.prim.ListConstruct %int4_15886, %17800, %int4_15887, %int128_15888 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18225 = torch.aten.view %18209, %18224 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15889 = torch.constant.int 4
    %int4_15890 = torch.constant.int 4
    %int128_15891 = torch.constant.int 128
    %18226 = torch.prim.ListConstruct %int4_15889, %17800, %int4_15890, %int128_15891 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18227 = torch.aten.view %18211, %18226 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15892 = torch.constant.int 4
    %int4_15893 = torch.constant.int 4
    %int128_15894 = torch.constant.int 128
    %18228 = torch.prim.ListConstruct %int4_15892, %17800, %int4_15893, %int128_15894 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18229 = torch.aten.view %18213, %18228 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15895 = torch.constant.int 4
    %int4_15896 = torch.constant.int 4
    %int128_15897 = torch.constant.int 128
    %18230 = torch.prim.ListConstruct %int4_15895, %17800, %int4_15896, %int128_15897 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18231 = torch.aten.view %18215, %18230 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15898 = torch.constant.int 4
    %int4_15899 = torch.constant.int 4
    %int128_15900 = torch.constant.int 128
    %18232 = torch.prim.ListConstruct %int4_15898, %17800, %int4_15899, %int128_15900 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18233 = torch.aten.view %18217, %18232 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15901 = torch.constant.int 4
    %int4_15902 = torch.constant.int 4
    %int128_15903 = torch.constant.int 128
    %18234 = torch.prim.ListConstruct %int4_15901, %17800, %int4_15902, %int128_15903 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18235 = torch.aten.view %18219, %18234 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15904 = torch.constant.int 4
    %int4_15905 = torch.constant.int 4
    %int128_15906 = torch.constant.int 128
    %18236 = torch.prim.ListConstruct %int4_15904, %17800, %int4_15905, %int128_15906 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18237 = torch.aten.view %18221, %18236 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15907 = torch.constant.int 4
    %int4_15908 = torch.constant.int 4
    %int128_15909 = torch.constant.int 128
    %18238 = torch.prim.ListConstruct %int4_15907, %17800, %int4_15908, %int128_15909 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18239 = torch.aten.view %18223, %18238 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_15910 = torch.constant.int -2
    %18240 = torch.aten.unsqueeze %17589, %int-2_15910 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15911 = torch.constant.int -2
    %18241 = torch.aten.unsqueeze %17591, %int-2_15911 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15912 = torch.constant.int -2
    %18242 = torch.aten.unsqueeze %17593, %int-2_15912 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15913 = torch.constant.int -2
    %18243 = torch.aten.unsqueeze %17595, %int-2_15913 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15914 = torch.constant.int -2
    %18244 = torch.aten.unsqueeze %17597, %int-2_15914 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15915 = torch.constant.int -2
    %18245 = torch.aten.unsqueeze %17599, %int-2_15915 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15916 = torch.constant.int -2
    %18246 = torch.aten.unsqueeze %17601, %int-2_15916 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_15917 = torch.constant.int -2
    %18247 = torch.aten.unsqueeze %17603, %int-2_15917 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %18247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_15918 = torch.constant.int 1
    %18248 = torch.aten.size.int %17513, %int1_15918 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_15919 = torch.constant.int 4
    %int1_15920 = torch.constant.int 1
    %int4_15921 = torch.constant.int 4
    %int128_15922 = torch.constant.int 128
    %18249 = torch.prim.ListConstruct %int4_15919, %18248, %int1_15920, %int4_15921, %int128_15922 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15923 = torch.constant.bool false
    %18250 = torch.aten.expand %18240, %18249, %false_15923 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15924 = torch.constant.int 4
    %int1_15925 = torch.constant.int 1
    %int4_15926 = torch.constant.int 4
    %int128_15927 = torch.constant.int 128
    %18251 = torch.prim.ListConstruct %int4_15924, %18248, %int1_15925, %int4_15926, %int128_15927 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15928 = torch.constant.bool false
    %18252 = torch.aten.expand %18241, %18251, %false_15928 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15929 = torch.constant.int 4
    %int1_15930 = torch.constant.int 1
    %int4_15931 = torch.constant.int 4
    %int128_15932 = torch.constant.int 128
    %18253 = torch.prim.ListConstruct %int4_15929, %18248, %int1_15930, %int4_15931, %int128_15932 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15933 = torch.constant.bool false
    %18254 = torch.aten.expand %18242, %18253, %false_15933 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15934 = torch.constant.int 4
    %int1_15935 = torch.constant.int 1
    %int4_15936 = torch.constant.int 4
    %int128_15937 = torch.constant.int 128
    %18255 = torch.prim.ListConstruct %int4_15934, %18248, %int1_15935, %int4_15936, %int128_15937 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15938 = torch.constant.bool false
    %18256 = torch.aten.expand %18243, %18255, %false_15938 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15939 = torch.constant.int 4
    %int1_15940 = torch.constant.int 1
    %int4_15941 = torch.constant.int 4
    %int128_15942 = torch.constant.int 128
    %18257 = torch.prim.ListConstruct %int4_15939, %18248, %int1_15940, %int4_15941, %int128_15942 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15943 = torch.constant.bool false
    %18258 = torch.aten.expand %18244, %18257, %false_15943 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15944 = torch.constant.int 4
    %int1_15945 = torch.constant.int 1
    %int4_15946 = torch.constant.int 4
    %int128_15947 = torch.constant.int 128
    %18259 = torch.prim.ListConstruct %int4_15944, %18248, %int1_15945, %int4_15946, %int128_15947 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15948 = torch.constant.bool false
    %18260 = torch.aten.expand %18245, %18259, %false_15948 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15949 = torch.constant.int 4
    %int1_15950 = torch.constant.int 1
    %int4_15951 = torch.constant.int 4
    %int128_15952 = torch.constant.int 128
    %18261 = torch.prim.ListConstruct %int4_15949, %18248, %int1_15950, %int4_15951, %int128_15952 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15953 = torch.constant.bool false
    %18262 = torch.aten.expand %18246, %18261, %false_15953 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15954 = torch.constant.int 4
    %int1_15955 = torch.constant.int 1
    %int4_15956 = torch.constant.int 4
    %int128_15957 = torch.constant.int 128
    %18263 = torch.prim.ListConstruct %int4_15954, %18248, %int1_15955, %int4_15956, %int128_15957 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_15958 = torch.constant.bool false
    %18264 = torch.aten.expand %18247, %18263, %false_15958 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %18264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_15959 = torch.constant.int 4
    %int4_15960 = torch.constant.int 4
    %int128_15961 = torch.constant.int 128
    %18265 = torch.prim.ListConstruct %int4_15959, %18248, %int4_15960, %int128_15961 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18266 = torch.aten.view %18250, %18265 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15962 = torch.constant.int 4
    %int4_15963 = torch.constant.int 4
    %int128_15964 = torch.constant.int 128
    %18267 = torch.prim.ListConstruct %int4_15962, %18248, %int4_15963, %int128_15964 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18268 = torch.aten.view %18252, %18267 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15965 = torch.constant.int 4
    %int4_15966 = torch.constant.int 4
    %int128_15967 = torch.constant.int 128
    %18269 = torch.prim.ListConstruct %int4_15965, %18248, %int4_15966, %int128_15967 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18270 = torch.aten.view %18254, %18269 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15968 = torch.constant.int 4
    %int4_15969 = torch.constant.int 4
    %int128_15970 = torch.constant.int 128
    %18271 = torch.prim.ListConstruct %int4_15968, %18248, %int4_15969, %int128_15970 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18272 = torch.aten.view %18256, %18271 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15971 = torch.constant.int 4
    %int4_15972 = torch.constant.int 4
    %int128_15973 = torch.constant.int 128
    %18273 = torch.prim.ListConstruct %int4_15971, %18248, %int4_15972, %int128_15973 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18274 = torch.aten.view %18258, %18273 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15974 = torch.constant.int 4
    %int4_15975 = torch.constant.int 4
    %int128_15976 = torch.constant.int 128
    %18275 = torch.prim.ListConstruct %int4_15974, %18248, %int4_15975, %int128_15976 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18276 = torch.aten.view %18260, %18275 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15977 = torch.constant.int 4
    %int4_15978 = torch.constant.int 4
    %int128_15979 = torch.constant.int 128
    %18277 = torch.prim.ListConstruct %int4_15977, %18248, %int4_15978, %int128_15979 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18278 = torch.aten.view %18262, %18277 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_15980 = torch.constant.int 4
    %int4_15981 = torch.constant.int 4
    %int128_15982 = torch.constant.int 128
    %18279 = torch.prim.ListConstruct %int4_15980, %18248, %int4_15981, %int128_15982 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18280 = torch.aten.view %18264, %18279 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_15983 = torch.constant.int 1
    %int2_15984 = torch.constant.int 2
    %18281 = torch.aten.transpose.int %17656, %int1_15983, %int2_15984 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18281, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15985 = torch.constant.int 1
    %int2_15986 = torch.constant.int 2
    %18282 = torch.aten.transpose.int %17671, %int1_15985, %int2_15986 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18282, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15987 = torch.constant.int 1
    %int2_15988 = torch.constant.int 2
    %18283 = torch.aten.transpose.int %17686, %int1_15987, %int2_15988 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18283, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15989 = torch.constant.int 1
    %int2_15990 = torch.constant.int 2
    %18284 = torch.aten.transpose.int %17701, %int1_15989, %int2_15990 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18284, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15991 = torch.constant.int 1
    %int2_15992 = torch.constant.int 2
    %18285 = torch.aten.transpose.int %17716, %int1_15991, %int2_15992 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18285, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15993 = torch.constant.int 1
    %int2_15994 = torch.constant.int 2
    %18286 = torch.aten.transpose.int %17731, %int1_15993, %int2_15994 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18286, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15995 = torch.constant.int 1
    %int2_15996 = torch.constant.int 2
    %18287 = torch.aten.transpose.int %17746, %int1_15995, %int2_15996 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18287, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15997 = torch.constant.int 1
    %int2_15998 = torch.constant.int 2
    %18288 = torch.aten.transpose.int %17761, %int1_15997, %int2_15998 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18288, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_15999 = torch.constant.int 1
    %int2_16000 = torch.constant.int 2
    %18289 = torch.aten.transpose.int %18225, %int1_15999, %int2_16000 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18289, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16001 = torch.constant.int 1
    %int2_16002 = torch.constant.int 2
    %18290 = torch.aten.transpose.int %18227, %int1_16001, %int2_16002 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18290, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16003 = torch.constant.int 1
    %int2_16004 = torch.constant.int 2
    %18291 = torch.aten.transpose.int %18229, %int1_16003, %int2_16004 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18291, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16005 = torch.constant.int 1
    %int2_16006 = torch.constant.int 2
    %18292 = torch.aten.transpose.int %18231, %int1_16005, %int2_16006 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18292, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16007 = torch.constant.int 1
    %int2_16008 = torch.constant.int 2
    %18293 = torch.aten.transpose.int %18233, %int1_16007, %int2_16008 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18293, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16009 = torch.constant.int 1
    %int2_16010 = torch.constant.int 2
    %18294 = torch.aten.transpose.int %18235, %int1_16009, %int2_16010 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18294, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16011 = torch.constant.int 1
    %int2_16012 = torch.constant.int 2
    %18295 = torch.aten.transpose.int %18237, %int1_16011, %int2_16012 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18295, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16013 = torch.constant.int 1
    %int2_16014 = torch.constant.int 2
    %18296 = torch.aten.transpose.int %18239, %int1_16013, %int2_16014 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18296, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16015 = torch.constant.int 1
    %int2_16016 = torch.constant.int 2
    %18297 = torch.aten.transpose.int %18266, %int1_16015, %int2_16016 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18297, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16017 = torch.constant.int 1
    %int2_16018 = torch.constant.int 2
    %18298 = torch.aten.transpose.int %18268, %int1_16017, %int2_16018 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18298, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16019 = torch.constant.int 1
    %int2_16020 = torch.constant.int 2
    %18299 = torch.aten.transpose.int %18270, %int1_16019, %int2_16020 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18299, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16021 = torch.constant.int 1
    %int2_16022 = torch.constant.int 2
    %18300 = torch.aten.transpose.int %18272, %int1_16021, %int2_16022 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18300, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16023 = torch.constant.int 1
    %int2_16024 = torch.constant.int 2
    %18301 = torch.aten.transpose.int %18274, %int1_16023, %int2_16024 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18301, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16025 = torch.constant.int 1
    %int2_16026 = torch.constant.int 2
    %18302 = torch.aten.transpose.int %18276, %int1_16025, %int2_16026 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18302, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16027 = torch.constant.int 1
    %int2_16028 = torch.constant.int 2
    %18303 = torch.aten.transpose.int %18278, %int1_16027, %int2_16028 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18303, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16029 = torch.constant.int 1
    %int2_16030 = torch.constant.int 2
    %18304 = torch.aten.transpose.int %18280, %int1_16029, %int2_16030 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %18304, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16031 = torch.constant.float 0.000000e+00
    %true_16032 = torch.constant.bool true
    %none_16033 = torch.constant.none
    %none_16034 = torch.constant.none
    %18305:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18281, %18289, %18297, %float0.000000e00_16031, %true_16032, %none_16033, %none_16034) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18305#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16035 = torch.constant.float 0.000000e+00
    %true_16036 = torch.constant.bool true
    %none_16037 = torch.constant.none
    %none_16038 = torch.constant.none
    %18306:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18282, %18290, %18298, %float0.000000e00_16035, %true_16036, %none_16037, %none_16038) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18306#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16039 = torch.constant.float 0.000000e+00
    %true_16040 = torch.constant.bool true
    %none_16041 = torch.constant.none
    %none_16042 = torch.constant.none
    %18307:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18283, %18291, %18299, %float0.000000e00_16039, %true_16040, %none_16041, %none_16042) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18307#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16043 = torch.constant.float 0.000000e+00
    %true_16044 = torch.constant.bool true
    %none_16045 = torch.constant.none
    %none_16046 = torch.constant.none
    %18308:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18284, %18292, %18300, %float0.000000e00_16043, %true_16044, %none_16045, %none_16046) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18308#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16047 = torch.constant.float 0.000000e+00
    %true_16048 = torch.constant.bool true
    %none_16049 = torch.constant.none
    %none_16050 = torch.constant.none
    %18309:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18285, %18293, %18301, %float0.000000e00_16047, %true_16048, %none_16049, %none_16050) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18309#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16051 = torch.constant.float 0.000000e+00
    %true_16052 = torch.constant.bool true
    %none_16053 = torch.constant.none
    %none_16054 = torch.constant.none
    %18310:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18286, %18294, %18302, %float0.000000e00_16051, %true_16052, %none_16053, %none_16054) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18310#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16055 = torch.constant.float 0.000000e+00
    %true_16056 = torch.constant.bool true
    %none_16057 = torch.constant.none
    %none_16058 = torch.constant.none
    %18311:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18287, %18295, %18303, %float0.000000e00_16055, %true_16056, %none_16057, %none_16058) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18311#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_16059 = torch.constant.float 0.000000e+00
    %true_16060 = torch.constant.bool true
    %none_16061 = torch.constant.none
    %none_16062 = torch.constant.none
    %18312:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%18288, %18296, %18304, %float0.000000e00_16059, %true_16060, %none_16061, %none_16062) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %18312#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_16063 = torch.constant.int 1
    %int2_16064 = torch.constant.int 2
    %18313 = torch.aten.transpose.int %18305#0, %int1_16063, %int2_16064 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16065 = torch.constant.int 1
    %int2_16066 = torch.constant.int 2
    %18314 = torch.aten.transpose.int %18306#0, %int1_16065, %int2_16066 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16067 = torch.constant.int 1
    %int2_16068 = torch.constant.int 2
    %18315 = torch.aten.transpose.int %18307#0, %int1_16067, %int2_16068 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16069 = torch.constant.int 1
    %int2_16070 = torch.constant.int 2
    %18316 = torch.aten.transpose.int %18308#0, %int1_16069, %int2_16070 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16071 = torch.constant.int 1
    %int2_16072 = torch.constant.int 2
    %18317 = torch.aten.transpose.int %18309#0, %int1_16071, %int2_16072 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16073 = torch.constant.int 1
    %int2_16074 = torch.constant.int 2
    %18318 = torch.aten.transpose.int %18310#0, %int1_16073, %int2_16074 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16075 = torch.constant.int 1
    %int2_16076 = torch.constant.int 2
    %18319 = torch.aten.transpose.int %18311#0, %int1_16075, %int2_16076 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_16077 = torch.constant.int 1
    %int2_16078 = torch.constant.int 2
    %18320 = torch.aten.transpose.int %18312#0, %int1_16077, %int2_16078 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %18320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16079 = torch.constant.int 4
    %int512_16080 = torch.constant.int 512
    %18321 = torch.prim.ListConstruct %int4_16079, %17642, %int512_16080 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18322 = torch.aten.view %18313, %18321 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16081 = torch.constant.int 4
    %int512_16082 = torch.constant.int 512
    %18323 = torch.prim.ListConstruct %int4_16081, %17657, %int512_16082 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18324 = torch.aten.view %18314, %18323 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16083 = torch.constant.int 4
    %int512_16084 = torch.constant.int 512
    %18325 = torch.prim.ListConstruct %int4_16083, %17672, %int512_16084 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18326 = torch.aten.view %18315, %18325 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16085 = torch.constant.int 4
    %int512_16086 = torch.constant.int 512
    %18327 = torch.prim.ListConstruct %int4_16085, %17687, %int512_16086 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18328 = torch.aten.view %18316, %18327 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16087 = torch.constant.int 4
    %int512_16088 = torch.constant.int 512
    %18329 = torch.prim.ListConstruct %int4_16087, %17702, %int512_16088 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18330 = torch.aten.view %18317, %18329 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16089 = torch.constant.int 4
    %int512_16090 = torch.constant.int 512
    %18331 = torch.prim.ListConstruct %int4_16089, %17717, %int512_16090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18332 = torch.aten.view %18318, %18331 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16091 = torch.constant.int 4
    %int512_16092 = torch.constant.int 512
    %18333 = torch.prim.ListConstruct %int4_16091, %17732, %int512_16092 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18334 = torch.aten.view %18319, %18333 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16093 = torch.constant.int 4
    %int512_16094 = torch.constant.int 512
    %18335 = torch.prim.ListConstruct %int4_16093, %17747, %int512_16094 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18336 = torch.aten.view %18320, %18335 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %18336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_16095 = torch.constant.int 1
    %int0_16096 = torch.constant.int 0
    %18337 = torch.prim.ListConstruct %int1_16095, %int0_16096 : (!torch.int, !torch.int) -> !torch.list<int>
    %18338 = torch.aten.permute %616, %18337 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16097 = torch.constant.int 1
    %int0_16098 = torch.constant.int 0
    %18339 = torch.prim.ListConstruct %int1_16097, %int0_16098 : (!torch.int, !torch.int) -> !torch.list<int>
    %18340 = torch.aten.permute %617, %18339 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16099 = torch.constant.int 1
    %int0_16100 = torch.constant.int 0
    %18341 = torch.prim.ListConstruct %int1_16099, %int0_16100 : (!torch.int, !torch.int) -> !torch.list<int>
    %18342 = torch.aten.permute %618, %18341 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16101 = torch.constant.int 1
    %int0_16102 = torch.constant.int 0
    %18343 = torch.prim.ListConstruct %int1_16101, %int0_16102 : (!torch.int, !torch.int) -> !torch.list<int>
    %18344 = torch.aten.permute %619, %18343 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16103 = torch.constant.int 1
    %int0_16104 = torch.constant.int 0
    %18345 = torch.prim.ListConstruct %int1_16103, %int0_16104 : (!torch.int, !torch.int) -> !torch.list<int>
    %18346 = torch.aten.permute %620, %18345 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16105 = torch.constant.int 1
    %int0_16106 = torch.constant.int 0
    %18347 = torch.prim.ListConstruct %int1_16105, %int0_16106 : (!torch.int, !torch.int) -> !torch.list<int>
    %18348 = torch.aten.permute %621, %18347 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16107 = torch.constant.int 1
    %int0_16108 = torch.constant.int 0
    %18349 = torch.prim.ListConstruct %int1_16107, %int0_16108 : (!torch.int, !torch.int) -> !torch.list<int>
    %18350 = torch.aten.permute %622, %18349 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_16109 = torch.constant.int 1
    %int0_16110 = torch.constant.int 0
    %18351 = torch.prim.ListConstruct %int1_16109, %int0_16110 : (!torch.int, !torch.int) -> !torch.list<int>
    %18352 = torch.aten.permute %623, %18351 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_16111 = torch.constant.int 4
    %18353 = torch.aten.mul.int %int4_16111, %17642 : !torch.int, !torch.int -> !torch.int
    %int512_16112 = torch.constant.int 512
    %18354 = torch.prim.ListConstruct %18353, %int512_16112 : (!torch.int, !torch.int) -> !torch.list<int>
    %18355 = torch.aten.view %18322, %18354 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18355, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18356 = torch.aten.mm %18355, %18338 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18356, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16113 = torch.constant.int 4
    %int4096_16114 = torch.constant.int 4096
    %18357 = torch.prim.ListConstruct %int4_16113, %17642, %int4096_16114 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18358 = torch.aten.view %18356, %18357 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16115 = torch.constant.int 4
    %18359 = torch.aten.mul.int %int4_16115, %17657 : !torch.int, !torch.int -> !torch.int
    %int512_16116 = torch.constant.int 512
    %18360 = torch.prim.ListConstruct %18359, %int512_16116 : (!torch.int, !torch.int) -> !torch.list<int>
    %18361 = torch.aten.view %18324, %18360 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18361, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18362 = torch.aten.mm %18361, %18340 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18362, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16117 = torch.constant.int 4
    %int4096_16118 = torch.constant.int 4096
    %18363 = torch.prim.ListConstruct %int4_16117, %17657, %int4096_16118 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18364 = torch.aten.view %18362, %18363 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16119 = torch.constant.int 4
    %18365 = torch.aten.mul.int %int4_16119, %17672 : !torch.int, !torch.int -> !torch.int
    %int512_16120 = torch.constant.int 512
    %18366 = torch.prim.ListConstruct %18365, %int512_16120 : (!torch.int, !torch.int) -> !torch.list<int>
    %18367 = torch.aten.view %18326, %18366 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18367, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18368 = torch.aten.mm %18367, %18342 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18368, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16121 = torch.constant.int 4
    %int4096_16122 = torch.constant.int 4096
    %18369 = torch.prim.ListConstruct %int4_16121, %17672, %int4096_16122 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18370 = torch.aten.view %18368, %18369 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16123 = torch.constant.int 4
    %18371 = torch.aten.mul.int %int4_16123, %17687 : !torch.int, !torch.int -> !torch.int
    %int512_16124 = torch.constant.int 512
    %18372 = torch.prim.ListConstruct %18371, %int512_16124 : (!torch.int, !torch.int) -> !torch.list<int>
    %18373 = torch.aten.view %18328, %18372 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18373, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18374 = torch.aten.mm %18373, %18344 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18374, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16125 = torch.constant.int 4
    %int4096_16126 = torch.constant.int 4096
    %18375 = torch.prim.ListConstruct %int4_16125, %17687, %int4096_16126 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18376 = torch.aten.view %18374, %18375 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16127 = torch.constant.int 4
    %18377 = torch.aten.mul.int %int4_16127, %17702 : !torch.int, !torch.int -> !torch.int
    %int512_16128 = torch.constant.int 512
    %18378 = torch.prim.ListConstruct %18377, %int512_16128 : (!torch.int, !torch.int) -> !torch.list<int>
    %18379 = torch.aten.view %18330, %18378 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18379, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18380 = torch.aten.mm %18379, %18346 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18380, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16129 = torch.constant.int 4
    %int4096_16130 = torch.constant.int 4096
    %18381 = torch.prim.ListConstruct %int4_16129, %17702, %int4096_16130 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18382 = torch.aten.view %18380, %18381 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16131 = torch.constant.int 4
    %18383 = torch.aten.mul.int %int4_16131, %17717 : !torch.int, !torch.int -> !torch.int
    %int512_16132 = torch.constant.int 512
    %18384 = torch.prim.ListConstruct %18383, %int512_16132 : (!torch.int, !torch.int) -> !torch.list<int>
    %18385 = torch.aten.view %18332, %18384 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18385, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18386 = torch.aten.mm %18385, %18348 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18386, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16133 = torch.constant.int 4
    %int4096_16134 = torch.constant.int 4096
    %18387 = torch.prim.ListConstruct %int4_16133, %17717, %int4096_16134 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18388 = torch.aten.view %18386, %18387 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16135 = torch.constant.int 4
    %18389 = torch.aten.mul.int %int4_16135, %17732 : !torch.int, !torch.int -> !torch.int
    %int512_16136 = torch.constant.int 512
    %18390 = torch.prim.ListConstruct %18389, %int512_16136 : (!torch.int, !torch.int) -> !torch.list<int>
    %18391 = torch.aten.view %18334, %18390 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18391, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18392 = torch.aten.mm %18391, %18350 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18392, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16137 = torch.constant.int 4
    %int4096_16138 = torch.constant.int 4096
    %18393 = torch.prim.ListConstruct %int4_16137, %17732, %int4096_16138 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18394 = torch.aten.view %18392, %18393 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_16139 = torch.constant.int 4
    %18395 = torch.aten.mul.int %int4_16139, %17747 : !torch.int, !torch.int -> !torch.int
    %int512_16140 = torch.constant.int 512
    %18396 = torch.prim.ListConstruct %18395, %int512_16140 : (!torch.int, !torch.int) -> !torch.list<int>
    %18397 = torch.aten.view %18336, %18396 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %18397, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %18398 = torch.aten.mm %18397, %18352 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18398, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16141 = torch.constant.int 4
    %int4096_16142 = torch.constant.int 4096
    %18399 = torch.prim.ListConstruct %int4_16141, %17747, %int4096_16142 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18400 = torch.aten.view %18398, %18399 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18401 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16143 = arith.constant 1 : index
    %dim_16144 = tensor.dim %18401, %c1_16143 : tensor<4x?x4096xf16>
    %18402 = flow.tensor.transfer %18401 : tensor<4x?x4096xf16>{%dim_16144} to #hal.device.promise<@__device_0>
    %18403 = torch_c.from_builtin_tensor %18402 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18404 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16145 = arith.constant 1 : index
    %dim_16146 = tensor.dim %18404, %c1_16145 : tensor<4x?x4096xf16>
    %18405 = flow.tensor.transfer %18404 : tensor<4x?x4096xf16>{%dim_16146} to #hal.device.promise<@__device_0>
    %18406 = torch_c.from_builtin_tensor %18405 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18407 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16147 = arith.constant 1 : index
    %dim_16148 = tensor.dim %18407, %c1_16147 : tensor<4x?x4096xf16>
    %18408 = flow.tensor.transfer %18407 : tensor<4x?x4096xf16>{%dim_16148} to #hal.device.promise<@__device_0>
    %18409 = torch_c.from_builtin_tensor %18408 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18410 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16149 = arith.constant 1 : index
    %dim_16150 = tensor.dim %18410, %c1_16149 : tensor<4x?x4096xf16>
    %18411 = flow.tensor.transfer %18410 : tensor<4x?x4096xf16>{%dim_16150} to #hal.device.promise<@__device_0>
    %18412 = torch_c.from_builtin_tensor %18411 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18413 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16151 = arith.constant 1 : index
    %dim_16152 = tensor.dim %18413, %c1_16151 : tensor<4x?x4096xf16>
    %18414 = flow.tensor.transfer %18413 : tensor<4x?x4096xf16>{%dim_16152} to #hal.device.promise<@__device_0>
    %18415 = torch_c.from_builtin_tensor %18414 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18416 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16153 = arith.constant 1 : index
    %dim_16154 = tensor.dim %18416, %c1_16153 : tensor<4x?x4096xf16>
    %18417 = flow.tensor.transfer %18416 : tensor<4x?x4096xf16>{%dim_16154} to #hal.device.promise<@__device_0>
    %18418 = torch_c.from_builtin_tensor %18417 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18419 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16155 = arith.constant 1 : index
    %dim_16156 = tensor.dim %18419, %c1_16155 : tensor<4x?x4096xf16>
    %18420 = flow.tensor.transfer %18419 : tensor<4x?x4096xf16>{%dim_16156} to #hal.device.promise<@__device_0>
    %18421 = torch_c.from_builtin_tensor %18420 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16157 = torch.constant.int 1
    %18422 = torch.aten.add.Tensor %18358, %18403, %int1_16157 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16158 = torch.constant.int 1
    %18423 = torch.aten.add.Tensor %18422, %18406, %int1_16158 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16159 = torch.constant.int 1
    %18424 = torch.aten.add.Tensor %18423, %18409, %int1_16159 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16160 = torch.constant.int 1
    %18425 = torch.aten.add.Tensor %18424, %18412, %int1_16160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16161 = torch.constant.int 1
    %18426 = torch.aten.add.Tensor %18425, %18415, %int1_16161 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16162 = torch.constant.int 1
    %18427 = torch.aten.add.Tensor %18426, %18418, %int1_16162 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16163 = torch.constant.int 1
    %18428 = torch.aten.add.Tensor %18427, %18421, %int1_16163 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18429 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16164 = arith.constant 1 : index
    %dim_16165 = tensor.dim %18429, %c1_16164 : tensor<4x?x4096xf16>
    %18430 = flow.tensor.transfer %18429 : tensor<4x?x4096xf16>{%dim_16165} to #hal.device.promise<@__device_1>
    %18431 = torch_c.from_builtin_tensor %18430 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18432 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16166 = arith.constant 1 : index
    %dim_16167 = tensor.dim %18432, %c1_16166 : tensor<4x?x4096xf16>
    %18433 = flow.tensor.transfer %18432 : tensor<4x?x4096xf16>{%dim_16167} to #hal.device.promise<@__device_1>
    %18434 = torch_c.from_builtin_tensor %18433 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18435 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16168 = arith.constant 1 : index
    %dim_16169 = tensor.dim %18435, %c1_16168 : tensor<4x?x4096xf16>
    %18436 = flow.tensor.transfer %18435 : tensor<4x?x4096xf16>{%dim_16169} to #hal.device.promise<@__device_1>
    %18437 = torch_c.from_builtin_tensor %18436 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18438 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16170 = arith.constant 1 : index
    %dim_16171 = tensor.dim %18438, %c1_16170 : tensor<4x?x4096xf16>
    %18439 = flow.tensor.transfer %18438 : tensor<4x?x4096xf16>{%dim_16171} to #hal.device.promise<@__device_1>
    %18440 = torch_c.from_builtin_tensor %18439 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18441 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16172 = arith.constant 1 : index
    %dim_16173 = tensor.dim %18441, %c1_16172 : tensor<4x?x4096xf16>
    %18442 = flow.tensor.transfer %18441 : tensor<4x?x4096xf16>{%dim_16173} to #hal.device.promise<@__device_1>
    %18443 = torch_c.from_builtin_tensor %18442 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18444 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16174 = arith.constant 1 : index
    %dim_16175 = tensor.dim %18444, %c1_16174 : tensor<4x?x4096xf16>
    %18445 = flow.tensor.transfer %18444 : tensor<4x?x4096xf16>{%dim_16175} to #hal.device.promise<@__device_1>
    %18446 = torch_c.from_builtin_tensor %18445 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18447 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16176 = arith.constant 1 : index
    %dim_16177 = tensor.dim %18447, %c1_16176 : tensor<4x?x4096xf16>
    %18448 = flow.tensor.transfer %18447 : tensor<4x?x4096xf16>{%dim_16177} to #hal.device.promise<@__device_1>
    %18449 = torch_c.from_builtin_tensor %18448 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16178 = torch.constant.int 1
    %18450 = torch.aten.add.Tensor %18431, %18364, %int1_16178 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16179 = torch.constant.int 1
    %18451 = torch.aten.add.Tensor %18450, %18434, %int1_16179 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16180 = torch.constant.int 1
    %18452 = torch.aten.add.Tensor %18451, %18437, %int1_16180 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16181 = torch.constant.int 1
    %18453 = torch.aten.add.Tensor %18452, %18440, %int1_16181 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16182 = torch.constant.int 1
    %18454 = torch.aten.add.Tensor %18453, %18443, %int1_16182 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16183 = torch.constant.int 1
    %18455 = torch.aten.add.Tensor %18454, %18446, %int1_16183 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16184 = torch.constant.int 1
    %18456 = torch.aten.add.Tensor %18455, %18449, %int1_16184 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18457 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16185 = arith.constant 1 : index
    %dim_16186 = tensor.dim %18457, %c1_16185 : tensor<4x?x4096xf16>
    %18458 = flow.tensor.transfer %18457 : tensor<4x?x4096xf16>{%dim_16186} to #hal.device.promise<@__device_2>
    %18459 = torch_c.from_builtin_tensor %18458 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18460 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16187 = arith.constant 1 : index
    %dim_16188 = tensor.dim %18460, %c1_16187 : tensor<4x?x4096xf16>
    %18461 = flow.tensor.transfer %18460 : tensor<4x?x4096xf16>{%dim_16188} to #hal.device.promise<@__device_2>
    %18462 = torch_c.from_builtin_tensor %18461 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18463 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16189 = arith.constant 1 : index
    %dim_16190 = tensor.dim %18463, %c1_16189 : tensor<4x?x4096xf16>
    %18464 = flow.tensor.transfer %18463 : tensor<4x?x4096xf16>{%dim_16190} to #hal.device.promise<@__device_2>
    %18465 = torch_c.from_builtin_tensor %18464 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18466 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16191 = arith.constant 1 : index
    %dim_16192 = tensor.dim %18466, %c1_16191 : tensor<4x?x4096xf16>
    %18467 = flow.tensor.transfer %18466 : tensor<4x?x4096xf16>{%dim_16192} to #hal.device.promise<@__device_2>
    %18468 = torch_c.from_builtin_tensor %18467 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18469 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16193 = arith.constant 1 : index
    %dim_16194 = tensor.dim %18469, %c1_16193 : tensor<4x?x4096xf16>
    %18470 = flow.tensor.transfer %18469 : tensor<4x?x4096xf16>{%dim_16194} to #hal.device.promise<@__device_2>
    %18471 = torch_c.from_builtin_tensor %18470 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18472 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16195 = arith.constant 1 : index
    %dim_16196 = tensor.dim %18472, %c1_16195 : tensor<4x?x4096xf16>
    %18473 = flow.tensor.transfer %18472 : tensor<4x?x4096xf16>{%dim_16196} to #hal.device.promise<@__device_2>
    %18474 = torch_c.from_builtin_tensor %18473 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18475 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16197 = arith.constant 1 : index
    %dim_16198 = tensor.dim %18475, %c1_16197 : tensor<4x?x4096xf16>
    %18476 = flow.tensor.transfer %18475 : tensor<4x?x4096xf16>{%dim_16198} to #hal.device.promise<@__device_2>
    %18477 = torch_c.from_builtin_tensor %18476 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16199 = torch.constant.int 1
    %18478 = torch.aten.add.Tensor %18459, %18462, %int1_16199 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16200 = torch.constant.int 1
    %18479 = torch.aten.add.Tensor %18478, %18370, %int1_16200 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16201 = torch.constant.int 1
    %18480 = torch.aten.add.Tensor %18479, %18465, %int1_16201 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16202 = torch.constant.int 1
    %18481 = torch.aten.add.Tensor %18480, %18468, %int1_16202 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16203 = torch.constant.int 1
    %18482 = torch.aten.add.Tensor %18481, %18471, %int1_16203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16204 = torch.constant.int 1
    %18483 = torch.aten.add.Tensor %18482, %18474, %int1_16204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16205 = torch.constant.int 1
    %18484 = torch.aten.add.Tensor %18483, %18477, %int1_16205 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18485 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16206 = arith.constant 1 : index
    %dim_16207 = tensor.dim %18485, %c1_16206 : tensor<4x?x4096xf16>
    %18486 = flow.tensor.transfer %18485 : tensor<4x?x4096xf16>{%dim_16207} to #hal.device.promise<@__device_3>
    %18487 = torch_c.from_builtin_tensor %18486 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18488 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16208 = arith.constant 1 : index
    %dim_16209 = tensor.dim %18488, %c1_16208 : tensor<4x?x4096xf16>
    %18489 = flow.tensor.transfer %18488 : tensor<4x?x4096xf16>{%dim_16209} to #hal.device.promise<@__device_3>
    %18490 = torch_c.from_builtin_tensor %18489 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18491 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16210 = arith.constant 1 : index
    %dim_16211 = tensor.dim %18491, %c1_16210 : tensor<4x?x4096xf16>
    %18492 = flow.tensor.transfer %18491 : tensor<4x?x4096xf16>{%dim_16211} to #hal.device.promise<@__device_3>
    %18493 = torch_c.from_builtin_tensor %18492 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18494 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16212 = arith.constant 1 : index
    %dim_16213 = tensor.dim %18494, %c1_16212 : tensor<4x?x4096xf16>
    %18495 = flow.tensor.transfer %18494 : tensor<4x?x4096xf16>{%dim_16213} to #hal.device.promise<@__device_3>
    %18496 = torch_c.from_builtin_tensor %18495 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18497 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16214 = arith.constant 1 : index
    %dim_16215 = tensor.dim %18497, %c1_16214 : tensor<4x?x4096xf16>
    %18498 = flow.tensor.transfer %18497 : tensor<4x?x4096xf16>{%dim_16215} to #hal.device.promise<@__device_3>
    %18499 = torch_c.from_builtin_tensor %18498 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18500 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16216 = arith.constant 1 : index
    %dim_16217 = tensor.dim %18500, %c1_16216 : tensor<4x?x4096xf16>
    %18501 = flow.tensor.transfer %18500 : tensor<4x?x4096xf16>{%dim_16217} to #hal.device.promise<@__device_3>
    %18502 = torch_c.from_builtin_tensor %18501 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18503 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16218 = arith.constant 1 : index
    %dim_16219 = tensor.dim %18503, %c1_16218 : tensor<4x?x4096xf16>
    %18504 = flow.tensor.transfer %18503 : tensor<4x?x4096xf16>{%dim_16219} to #hal.device.promise<@__device_3>
    %18505 = torch_c.from_builtin_tensor %18504 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16220 = torch.constant.int 1
    %18506 = torch.aten.add.Tensor %18487, %18490, %int1_16220 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16221 = torch.constant.int 1
    %18507 = torch.aten.add.Tensor %18506, %18493, %int1_16221 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16222 = torch.constant.int 1
    %18508 = torch.aten.add.Tensor %18507, %18376, %int1_16222 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16223 = torch.constant.int 1
    %18509 = torch.aten.add.Tensor %18508, %18496, %int1_16223 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16224 = torch.constant.int 1
    %18510 = torch.aten.add.Tensor %18509, %18499, %int1_16224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16225 = torch.constant.int 1
    %18511 = torch.aten.add.Tensor %18510, %18502, %int1_16225 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16226 = torch.constant.int 1
    %18512 = torch.aten.add.Tensor %18511, %18505, %int1_16226 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18513 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16227 = arith.constant 1 : index
    %dim_16228 = tensor.dim %18513, %c1_16227 : tensor<4x?x4096xf16>
    %18514 = flow.tensor.transfer %18513 : tensor<4x?x4096xf16>{%dim_16228} to #hal.device.promise<@__device_4>
    %18515 = torch_c.from_builtin_tensor %18514 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18516 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16229 = arith.constant 1 : index
    %dim_16230 = tensor.dim %18516, %c1_16229 : tensor<4x?x4096xf16>
    %18517 = flow.tensor.transfer %18516 : tensor<4x?x4096xf16>{%dim_16230} to #hal.device.promise<@__device_4>
    %18518 = torch_c.from_builtin_tensor %18517 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18519 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16231 = arith.constant 1 : index
    %dim_16232 = tensor.dim %18519, %c1_16231 : tensor<4x?x4096xf16>
    %18520 = flow.tensor.transfer %18519 : tensor<4x?x4096xf16>{%dim_16232} to #hal.device.promise<@__device_4>
    %18521 = torch_c.from_builtin_tensor %18520 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18522 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16233 = arith.constant 1 : index
    %dim_16234 = tensor.dim %18522, %c1_16233 : tensor<4x?x4096xf16>
    %18523 = flow.tensor.transfer %18522 : tensor<4x?x4096xf16>{%dim_16234} to #hal.device.promise<@__device_4>
    %18524 = torch_c.from_builtin_tensor %18523 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18525 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16235 = arith.constant 1 : index
    %dim_16236 = tensor.dim %18525, %c1_16235 : tensor<4x?x4096xf16>
    %18526 = flow.tensor.transfer %18525 : tensor<4x?x4096xf16>{%dim_16236} to #hal.device.promise<@__device_4>
    %18527 = torch_c.from_builtin_tensor %18526 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18528 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16237 = arith.constant 1 : index
    %dim_16238 = tensor.dim %18528, %c1_16237 : tensor<4x?x4096xf16>
    %18529 = flow.tensor.transfer %18528 : tensor<4x?x4096xf16>{%dim_16238} to #hal.device.promise<@__device_4>
    %18530 = torch_c.from_builtin_tensor %18529 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18531 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16239 = arith.constant 1 : index
    %dim_16240 = tensor.dim %18531, %c1_16239 : tensor<4x?x4096xf16>
    %18532 = flow.tensor.transfer %18531 : tensor<4x?x4096xf16>{%dim_16240} to #hal.device.promise<@__device_4>
    %18533 = torch_c.from_builtin_tensor %18532 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16241 = torch.constant.int 1
    %18534 = torch.aten.add.Tensor %18515, %18518, %int1_16241 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16242 = torch.constant.int 1
    %18535 = torch.aten.add.Tensor %18534, %18521, %int1_16242 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16243 = torch.constant.int 1
    %18536 = torch.aten.add.Tensor %18535, %18524, %int1_16243 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16244 = torch.constant.int 1
    %18537 = torch.aten.add.Tensor %18536, %18382, %int1_16244 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16245 = torch.constant.int 1
    %18538 = torch.aten.add.Tensor %18537, %18527, %int1_16245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16246 = torch.constant.int 1
    %18539 = torch.aten.add.Tensor %18538, %18530, %int1_16246 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16247 = torch.constant.int 1
    %18540 = torch.aten.add.Tensor %18539, %18533, %int1_16247 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18541 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16248 = arith.constant 1 : index
    %dim_16249 = tensor.dim %18541, %c1_16248 : tensor<4x?x4096xf16>
    %18542 = flow.tensor.transfer %18541 : tensor<4x?x4096xf16>{%dim_16249} to #hal.device.promise<@__device_5>
    %18543 = torch_c.from_builtin_tensor %18542 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18544 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16250 = arith.constant 1 : index
    %dim_16251 = tensor.dim %18544, %c1_16250 : tensor<4x?x4096xf16>
    %18545 = flow.tensor.transfer %18544 : tensor<4x?x4096xf16>{%dim_16251} to #hal.device.promise<@__device_5>
    %18546 = torch_c.from_builtin_tensor %18545 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18547 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16252 = arith.constant 1 : index
    %dim_16253 = tensor.dim %18547, %c1_16252 : tensor<4x?x4096xf16>
    %18548 = flow.tensor.transfer %18547 : tensor<4x?x4096xf16>{%dim_16253} to #hal.device.promise<@__device_5>
    %18549 = torch_c.from_builtin_tensor %18548 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18550 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16254 = arith.constant 1 : index
    %dim_16255 = tensor.dim %18550, %c1_16254 : tensor<4x?x4096xf16>
    %18551 = flow.tensor.transfer %18550 : tensor<4x?x4096xf16>{%dim_16255} to #hal.device.promise<@__device_5>
    %18552 = torch_c.from_builtin_tensor %18551 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18553 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16256 = arith.constant 1 : index
    %dim_16257 = tensor.dim %18553, %c1_16256 : tensor<4x?x4096xf16>
    %18554 = flow.tensor.transfer %18553 : tensor<4x?x4096xf16>{%dim_16257} to #hal.device.promise<@__device_5>
    %18555 = torch_c.from_builtin_tensor %18554 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18556 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16258 = arith.constant 1 : index
    %dim_16259 = tensor.dim %18556, %c1_16258 : tensor<4x?x4096xf16>
    %18557 = flow.tensor.transfer %18556 : tensor<4x?x4096xf16>{%dim_16259} to #hal.device.promise<@__device_5>
    %18558 = torch_c.from_builtin_tensor %18557 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18559 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16260 = arith.constant 1 : index
    %dim_16261 = tensor.dim %18559, %c1_16260 : tensor<4x?x4096xf16>
    %18560 = flow.tensor.transfer %18559 : tensor<4x?x4096xf16>{%dim_16261} to #hal.device.promise<@__device_5>
    %18561 = torch_c.from_builtin_tensor %18560 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16262 = torch.constant.int 1
    %18562 = torch.aten.add.Tensor %18543, %18546, %int1_16262 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16263 = torch.constant.int 1
    %18563 = torch.aten.add.Tensor %18562, %18549, %int1_16263 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16264 = torch.constant.int 1
    %18564 = torch.aten.add.Tensor %18563, %18552, %int1_16264 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16265 = torch.constant.int 1
    %18565 = torch.aten.add.Tensor %18564, %18555, %int1_16265 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16266 = torch.constant.int 1
    %18566 = torch.aten.add.Tensor %18565, %18388, %int1_16266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16267 = torch.constant.int 1
    %18567 = torch.aten.add.Tensor %18566, %18558, %int1_16267 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16268 = torch.constant.int 1
    %18568 = torch.aten.add.Tensor %18567, %18561, %int1_16268 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18569 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16269 = arith.constant 1 : index
    %dim_16270 = tensor.dim %18569, %c1_16269 : tensor<4x?x4096xf16>
    %18570 = flow.tensor.transfer %18569 : tensor<4x?x4096xf16>{%dim_16270} to #hal.device.promise<@__device_6>
    %18571 = torch_c.from_builtin_tensor %18570 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18572 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16271 = arith.constant 1 : index
    %dim_16272 = tensor.dim %18572, %c1_16271 : tensor<4x?x4096xf16>
    %18573 = flow.tensor.transfer %18572 : tensor<4x?x4096xf16>{%dim_16272} to #hal.device.promise<@__device_6>
    %18574 = torch_c.from_builtin_tensor %18573 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18575 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16273 = arith.constant 1 : index
    %dim_16274 = tensor.dim %18575, %c1_16273 : tensor<4x?x4096xf16>
    %18576 = flow.tensor.transfer %18575 : tensor<4x?x4096xf16>{%dim_16274} to #hal.device.promise<@__device_6>
    %18577 = torch_c.from_builtin_tensor %18576 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18578 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16275 = arith.constant 1 : index
    %dim_16276 = tensor.dim %18578, %c1_16275 : tensor<4x?x4096xf16>
    %18579 = flow.tensor.transfer %18578 : tensor<4x?x4096xf16>{%dim_16276} to #hal.device.promise<@__device_6>
    %18580 = torch_c.from_builtin_tensor %18579 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18581 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16277 = arith.constant 1 : index
    %dim_16278 = tensor.dim %18581, %c1_16277 : tensor<4x?x4096xf16>
    %18582 = flow.tensor.transfer %18581 : tensor<4x?x4096xf16>{%dim_16278} to #hal.device.promise<@__device_6>
    %18583 = torch_c.from_builtin_tensor %18582 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18584 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16279 = arith.constant 1 : index
    %dim_16280 = tensor.dim %18584, %c1_16279 : tensor<4x?x4096xf16>
    %18585 = flow.tensor.transfer %18584 : tensor<4x?x4096xf16>{%dim_16280} to #hal.device.promise<@__device_6>
    %18586 = torch_c.from_builtin_tensor %18585 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18587 = torch_c.to_builtin_tensor %18400 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16281 = arith.constant 1 : index
    %dim_16282 = tensor.dim %18587, %c1_16281 : tensor<4x?x4096xf16>
    %18588 = flow.tensor.transfer %18587 : tensor<4x?x4096xf16>{%dim_16282} to #hal.device.promise<@__device_6>
    %18589 = torch_c.from_builtin_tensor %18588 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16283 = torch.constant.int 1
    %18590 = torch.aten.add.Tensor %18571, %18574, %int1_16283 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16284 = torch.constant.int 1
    %18591 = torch.aten.add.Tensor %18590, %18577, %int1_16284 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16285 = torch.constant.int 1
    %18592 = torch.aten.add.Tensor %18591, %18580, %int1_16285 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16286 = torch.constant.int 1
    %18593 = torch.aten.add.Tensor %18592, %18583, %int1_16286 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16287 = torch.constant.int 1
    %18594 = torch.aten.add.Tensor %18593, %18586, %int1_16287 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16288 = torch.constant.int 1
    %18595 = torch.aten.add.Tensor %18594, %18394, %int1_16288 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16289 = torch.constant.int 1
    %18596 = torch.aten.add.Tensor %18595, %18589, %int1_16289 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18597 = torch_c.to_builtin_tensor %18358 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16290 = arith.constant 1 : index
    %dim_16291 = tensor.dim %18597, %c1_16290 : tensor<4x?x4096xf16>
    %18598 = flow.tensor.transfer %18597 : tensor<4x?x4096xf16>{%dim_16291} to #hal.device.promise<@__device_7>
    %18599 = torch_c.from_builtin_tensor %18598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18600 = torch_c.to_builtin_tensor %18364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16292 = arith.constant 1 : index
    %dim_16293 = tensor.dim %18600, %c1_16292 : tensor<4x?x4096xf16>
    %18601 = flow.tensor.transfer %18600 : tensor<4x?x4096xf16>{%dim_16293} to #hal.device.promise<@__device_7>
    %18602 = torch_c.from_builtin_tensor %18601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18603 = torch_c.to_builtin_tensor %18370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16294 = arith.constant 1 : index
    %dim_16295 = tensor.dim %18603, %c1_16294 : tensor<4x?x4096xf16>
    %18604 = flow.tensor.transfer %18603 : tensor<4x?x4096xf16>{%dim_16295} to #hal.device.promise<@__device_7>
    %18605 = torch_c.from_builtin_tensor %18604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18606 = torch_c.to_builtin_tensor %18376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16296 = arith.constant 1 : index
    %dim_16297 = tensor.dim %18606, %c1_16296 : tensor<4x?x4096xf16>
    %18607 = flow.tensor.transfer %18606 : tensor<4x?x4096xf16>{%dim_16297} to #hal.device.promise<@__device_7>
    %18608 = torch_c.from_builtin_tensor %18607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18609 = torch_c.to_builtin_tensor %18382 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16298 = arith.constant 1 : index
    %dim_16299 = tensor.dim %18609, %c1_16298 : tensor<4x?x4096xf16>
    %18610 = flow.tensor.transfer %18609 : tensor<4x?x4096xf16>{%dim_16299} to #hal.device.promise<@__device_7>
    %18611 = torch_c.from_builtin_tensor %18610 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18612 = torch_c.to_builtin_tensor %18388 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16300 = arith.constant 1 : index
    %dim_16301 = tensor.dim %18612, %c1_16300 : tensor<4x?x4096xf16>
    %18613 = flow.tensor.transfer %18612 : tensor<4x?x4096xf16>{%dim_16301} to #hal.device.promise<@__device_7>
    %18614 = torch_c.from_builtin_tensor %18613 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18615 = torch_c.to_builtin_tensor %18394 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16302 = arith.constant 1 : index
    %dim_16303 = tensor.dim %18615, %c1_16302 : tensor<4x?x4096xf16>
    %18616 = flow.tensor.transfer %18615 : tensor<4x?x4096xf16>{%dim_16303} to #hal.device.promise<@__device_7>
    %18617 = torch_c.from_builtin_tensor %18616 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16304 = torch.constant.int 1
    %18618 = torch.aten.add.Tensor %18599, %18602, %int1_16304 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16305 = torch.constant.int 1
    %18619 = torch.aten.add.Tensor %18618, %18605, %int1_16305 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16306 = torch.constant.int 1
    %18620 = torch.aten.add.Tensor %18619, %18608, %int1_16306 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16307 = torch.constant.int 1
    %18621 = torch.aten.add.Tensor %18620, %18611, %int1_16307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16308 = torch.constant.int 1
    %18622 = torch.aten.add.Tensor %18621, %18614, %int1_16308 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16309 = torch.constant.int 1
    %18623 = torch.aten.add.Tensor %18622, %18617, %int1_16309 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16310 = torch.constant.int 1
    %18624 = torch.aten.add.Tensor %18623, %18400, %int1_16310 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16311 = torch.constant.int 1
    %18625 = torch.aten.add.Tensor %17284, %18428, %int1_16311 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16312 = torch.constant.int 1
    %18626 = torch.aten.add.Tensor %17285, %18456, %int1_16312 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16313 = torch.constant.int 1
    %18627 = torch.aten.add.Tensor %17286, %18484, %int1_16313 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16314 = torch.constant.int 1
    %18628 = torch.aten.add.Tensor %17287, %18512, %int1_16314 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16315 = torch.constant.int 1
    %18629 = torch.aten.add.Tensor %17288, %18540, %int1_16315 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16316 = torch.constant.int 1
    %18630 = torch.aten.add.Tensor %17289, %18568, %int1_16316 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16317 = torch.constant.int 1
    %18631 = torch.aten.add.Tensor %17290, %18596, %int1_16317 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16318 = torch.constant.int 1
    %18632 = torch.aten.add.Tensor %17291, %18624, %int1_16318 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_16319 = torch.constant.int 6
    %18633 = torch.prims.convert_element_type %18625, %int6_16319 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16320 = torch.constant.int 6
    %18634 = torch.prims.convert_element_type %18626, %int6_16320 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16321 = torch.constant.int 6
    %18635 = torch.prims.convert_element_type %18627, %int6_16321 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16322 = torch.constant.int 6
    %18636 = torch.prims.convert_element_type %18628, %int6_16322 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16323 = torch.constant.int 6
    %18637 = torch.prims.convert_element_type %18629, %int6_16323 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16324 = torch.constant.int 6
    %18638 = torch.prims.convert_element_type %18630, %int6_16324 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16325 = torch.constant.int 6
    %18639 = torch.prims.convert_element_type %18631, %int6_16325 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16326 = torch.constant.int 6
    %18640 = torch.prims.convert_element_type %18632, %int6_16326 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16327 = torch.constant.int 2
    %18641 = torch.aten.pow.Tensor_Scalar %18633, %int2_16327 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16328 = torch.constant.int 2
    %18642 = torch.aten.pow.Tensor_Scalar %18634, %int2_16328 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16329 = torch.constant.int 2
    %18643 = torch.aten.pow.Tensor_Scalar %18635, %int2_16329 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16330 = torch.constant.int 2
    %18644 = torch.aten.pow.Tensor_Scalar %18636, %int2_16330 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16331 = torch.constant.int 2
    %18645 = torch.aten.pow.Tensor_Scalar %18637, %int2_16331 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16332 = torch.constant.int 2
    %18646 = torch.aten.pow.Tensor_Scalar %18638, %int2_16332 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16333 = torch.constant.int 2
    %18647 = torch.aten.pow.Tensor_Scalar %18639, %int2_16333 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16334 = torch.constant.int 2
    %18648 = torch.aten.pow.Tensor_Scalar %18640, %int2_16334 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_16335 = torch.constant.int -1
    %18649 = torch.prim.ListConstruct %int-1_16335 : (!torch.int) -> !torch.list<int>
    %true_16336 = torch.constant.bool true
    %none_16337 = torch.constant.none
    %18650 = torch.aten.mean.dim %18641, %18649, %true_16336, %none_16337 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16338 = torch.constant.int -1
    %18651 = torch.prim.ListConstruct %int-1_16338 : (!torch.int) -> !torch.list<int>
    %true_16339 = torch.constant.bool true
    %none_16340 = torch.constant.none
    %18652 = torch.aten.mean.dim %18642, %18651, %true_16339, %none_16340 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16341 = torch.constant.int -1
    %18653 = torch.prim.ListConstruct %int-1_16341 : (!torch.int) -> !torch.list<int>
    %true_16342 = torch.constant.bool true
    %none_16343 = torch.constant.none
    %18654 = torch.aten.mean.dim %18643, %18653, %true_16342, %none_16343 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16344 = torch.constant.int -1
    %18655 = torch.prim.ListConstruct %int-1_16344 : (!torch.int) -> !torch.list<int>
    %true_16345 = torch.constant.bool true
    %none_16346 = torch.constant.none
    %18656 = torch.aten.mean.dim %18644, %18655, %true_16345, %none_16346 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16347 = torch.constant.int -1
    %18657 = torch.prim.ListConstruct %int-1_16347 : (!torch.int) -> !torch.list<int>
    %true_16348 = torch.constant.bool true
    %none_16349 = torch.constant.none
    %18658 = torch.aten.mean.dim %18645, %18657, %true_16348, %none_16349 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16350 = torch.constant.int -1
    %18659 = torch.prim.ListConstruct %int-1_16350 : (!torch.int) -> !torch.list<int>
    %true_16351 = torch.constant.bool true
    %none_16352 = torch.constant.none
    %18660 = torch.aten.mean.dim %18646, %18659, %true_16351, %none_16352 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16353 = torch.constant.int -1
    %18661 = torch.prim.ListConstruct %int-1_16353 : (!torch.int) -> !torch.list<int>
    %true_16354 = torch.constant.bool true
    %none_16355 = torch.constant.none
    %18662 = torch.aten.mean.dim %18647, %18661, %true_16354, %none_16355 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16356 = torch.constant.int -1
    %18663 = torch.prim.ListConstruct %int-1_16356 : (!torch.int) -> !torch.list<int>
    %true_16357 = torch.constant.bool true
    %none_16358 = torch.constant.none
    %18664 = torch.aten.mean.dim %18648, %18663, %true_16357, %none_16358 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16359 = torch.constant.float 9.9999997473787516E-6
    %int1_16360 = torch.constant.int 1
    %18665 = torch.aten.add.Scalar %18650, %float9.999990e-06_16359, %int1_16360 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16361 = torch.constant.float 9.9999997473787516E-6
    %int1_16362 = torch.constant.int 1
    %18666 = torch.aten.add.Scalar %18652, %float9.999990e-06_16361, %int1_16362 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16363 = torch.constant.float 9.9999997473787516E-6
    %int1_16364 = torch.constant.int 1
    %18667 = torch.aten.add.Scalar %18654, %float9.999990e-06_16363, %int1_16364 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16365 = torch.constant.float 9.9999997473787516E-6
    %int1_16366 = torch.constant.int 1
    %18668 = torch.aten.add.Scalar %18656, %float9.999990e-06_16365, %int1_16366 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16367 = torch.constant.float 9.9999997473787516E-6
    %int1_16368 = torch.constant.int 1
    %18669 = torch.aten.add.Scalar %18658, %float9.999990e-06_16367, %int1_16368 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16369 = torch.constant.float 9.9999997473787516E-6
    %int1_16370 = torch.constant.int 1
    %18670 = torch.aten.add.Scalar %18660, %float9.999990e-06_16369, %int1_16370 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16371 = torch.constant.float 9.9999997473787516E-6
    %int1_16372 = torch.constant.int 1
    %18671 = torch.aten.add.Scalar %18662, %float9.999990e-06_16371, %int1_16372 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16373 = torch.constant.float 9.9999997473787516E-6
    %int1_16374 = torch.constant.int 1
    %18672 = torch.aten.add.Scalar %18664, %float9.999990e-06_16373, %int1_16374 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18673 = torch.aten.rsqrt %18665 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18674 = torch.aten.rsqrt %18666 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18675 = torch.aten.rsqrt %18667 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18676 = torch.aten.rsqrt %18668 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18677 = torch.aten.rsqrt %18669 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18678 = torch.aten.rsqrt %18670 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18679 = torch.aten.rsqrt %18671 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18680 = torch.aten.rsqrt %18672 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %18680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %18681 = torch.aten.mul.Tensor %18633, %18673 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18682 = torch.aten.mul.Tensor %18634, %18674 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18683 = torch.aten.mul.Tensor %18635, %18675 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18684 = torch.aten.mul.Tensor %18636, %18676 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18685 = torch.aten.mul.Tensor %18637, %18677 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18686 = torch.aten.mul.Tensor %18638, %18678 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18687 = torch.aten.mul.Tensor %18639, %18679 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18688 = torch.aten.mul.Tensor %18640, %18680 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18689 = torch.aten.mul.Tensor %624, %18681 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18690 = torch.aten.mul.Tensor %625, %18682 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18691 = torch.aten.mul.Tensor %626, %18683 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18692 = torch.aten.mul.Tensor %627, %18684 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18693 = torch.aten.mul.Tensor %628, %18685 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18694 = torch.aten.mul.Tensor %629, %18686 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18695 = torch.aten.mul.Tensor %630, %18687 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %18696 = torch.aten.mul.Tensor %631, %18688 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %18696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_16375 = torch.constant.int 5
    %18697 = torch.prims.convert_element_type %18689, %int5_16375 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16376 = torch.constant.int 5
    %18698 = torch.prims.convert_element_type %18690, %int5_16376 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16377 = torch.constant.int 5
    %18699 = torch.prims.convert_element_type %18691, %int5_16377 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16378 = torch.constant.int 5
    %18700 = torch.prims.convert_element_type %18692, %int5_16378 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16379 = torch.constant.int 5
    %18701 = torch.prims.convert_element_type %18693, %int5_16379 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16380 = torch.constant.int 5
    %18702 = torch.prims.convert_element_type %18694, %int5_16380 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16381 = torch.constant.int 5
    %18703 = torch.prims.convert_element_type %18695, %int5_16381 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16382 = torch.constant.int 5
    %18704 = torch.prims.convert_element_type %18696, %int5_16382 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16383 = torch.constant.int 1
    %int0_16384 = torch.constant.int 0
    %18705 = torch.prim.ListConstruct %int1_16383, %int0_16384 : (!torch.int, !torch.int) -> !torch.list<int>
    %18706 = torch.aten.permute %632, %18705 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16385 = torch.constant.int 1
    %int0_16386 = torch.constant.int 0
    %18707 = torch.prim.ListConstruct %int1_16385, %int0_16386 : (!torch.int, !torch.int) -> !torch.list<int>
    %18708 = torch.aten.permute %633, %18707 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16387 = torch.constant.int 1
    %int0_16388 = torch.constant.int 0
    %18709 = torch.prim.ListConstruct %int1_16387, %int0_16388 : (!torch.int, !torch.int) -> !torch.list<int>
    %18710 = torch.aten.permute %634, %18709 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16389 = torch.constant.int 1
    %int0_16390 = torch.constant.int 0
    %18711 = torch.prim.ListConstruct %int1_16389, %int0_16390 : (!torch.int, !torch.int) -> !torch.list<int>
    %18712 = torch.aten.permute %635, %18711 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16391 = torch.constant.int 1
    %int0_16392 = torch.constant.int 0
    %18713 = torch.prim.ListConstruct %int1_16391, %int0_16392 : (!torch.int, !torch.int) -> !torch.list<int>
    %18714 = torch.aten.permute %636, %18713 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16393 = torch.constant.int 1
    %int0_16394 = torch.constant.int 0
    %18715 = torch.prim.ListConstruct %int1_16393, %int0_16394 : (!torch.int, !torch.int) -> !torch.list<int>
    %18716 = torch.aten.permute %637, %18715 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16395 = torch.constant.int 1
    %int0_16396 = torch.constant.int 0
    %18717 = torch.prim.ListConstruct %int1_16395, %int0_16396 : (!torch.int, !torch.int) -> !torch.list<int>
    %18718 = torch.aten.permute %638, %18717 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16397 = torch.constant.int 1
    %int0_16398 = torch.constant.int 0
    %18719 = torch.prim.ListConstruct %int1_16397, %int0_16398 : (!torch.int, !torch.int) -> !torch.list<int>
    %18720 = torch.aten.permute %639, %18719 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_16399 = torch.constant.int 4
    %18721 = torch.aten.mul.int %int4_16399, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16400 = torch.constant.int 4096
    %18722 = torch.prim.ListConstruct %18721, %int4096_16400 : (!torch.int, !torch.int) -> !torch.list<int>
    %18723 = torch.aten.view %18697, %18722 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18723, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18724 = torch.aten.mm %18723, %18706 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18724, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16401 = torch.constant.int 4
    %int1792_16402 = torch.constant.int 1792
    %18725 = torch.prim.ListConstruct %int4_16401, %2482, %int1792_16402 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18726 = torch.aten.view %18724, %18725 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16403 = torch.constant.int 4
    %18727 = torch.aten.mul.int %int4_16403, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16404 = torch.constant.int 4096
    %18728 = torch.prim.ListConstruct %18727, %int4096_16404 : (!torch.int, !torch.int) -> !torch.list<int>
    %18729 = torch.aten.view %18698, %18728 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18729, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18730 = torch.aten.mm %18729, %18708 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18730, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16405 = torch.constant.int 4
    %int1792_16406 = torch.constant.int 1792
    %18731 = torch.prim.ListConstruct %int4_16405, %2482, %int1792_16406 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18732 = torch.aten.view %18730, %18731 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16407 = torch.constant.int 4
    %18733 = torch.aten.mul.int %int4_16407, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16408 = torch.constant.int 4096
    %18734 = torch.prim.ListConstruct %18733, %int4096_16408 : (!torch.int, !torch.int) -> !torch.list<int>
    %18735 = torch.aten.view %18699, %18734 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18735, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18736 = torch.aten.mm %18735, %18710 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18736, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16409 = torch.constant.int 4
    %int1792_16410 = torch.constant.int 1792
    %18737 = torch.prim.ListConstruct %int4_16409, %2482, %int1792_16410 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18738 = torch.aten.view %18736, %18737 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16411 = torch.constant.int 4
    %18739 = torch.aten.mul.int %int4_16411, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16412 = torch.constant.int 4096
    %18740 = torch.prim.ListConstruct %18739, %int4096_16412 : (!torch.int, !torch.int) -> !torch.list<int>
    %18741 = torch.aten.view %18700, %18740 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18741, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18742 = torch.aten.mm %18741, %18712 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18742, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16413 = torch.constant.int 4
    %int1792_16414 = torch.constant.int 1792
    %18743 = torch.prim.ListConstruct %int4_16413, %2482, %int1792_16414 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18744 = torch.aten.view %18742, %18743 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16415 = torch.constant.int 4
    %18745 = torch.aten.mul.int %int4_16415, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16416 = torch.constant.int 4096
    %18746 = torch.prim.ListConstruct %18745, %int4096_16416 : (!torch.int, !torch.int) -> !torch.list<int>
    %18747 = torch.aten.view %18701, %18746 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18747, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18748 = torch.aten.mm %18747, %18714 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18748, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16417 = torch.constant.int 4
    %int1792_16418 = torch.constant.int 1792
    %18749 = torch.prim.ListConstruct %int4_16417, %2482, %int1792_16418 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18750 = torch.aten.view %18748, %18749 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16419 = torch.constant.int 4
    %18751 = torch.aten.mul.int %int4_16419, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16420 = torch.constant.int 4096
    %18752 = torch.prim.ListConstruct %18751, %int4096_16420 : (!torch.int, !torch.int) -> !torch.list<int>
    %18753 = torch.aten.view %18702, %18752 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18753, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18754 = torch.aten.mm %18753, %18716 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18754, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16421 = torch.constant.int 4
    %int1792_16422 = torch.constant.int 1792
    %18755 = torch.prim.ListConstruct %int4_16421, %2482, %int1792_16422 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18756 = torch.aten.view %18754, %18755 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16423 = torch.constant.int 4
    %18757 = torch.aten.mul.int %int4_16423, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16424 = torch.constant.int 4096
    %18758 = torch.prim.ListConstruct %18757, %int4096_16424 : (!torch.int, !torch.int) -> !torch.list<int>
    %18759 = torch.aten.view %18703, %18758 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18759, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18760 = torch.aten.mm %18759, %18718 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18760, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16425 = torch.constant.int 4
    %int1792_16426 = torch.constant.int 1792
    %18761 = torch.prim.ListConstruct %int4_16425, %2482, %int1792_16426 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18762 = torch.aten.view %18760, %18761 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16427 = torch.constant.int 4
    %18763 = torch.aten.mul.int %int4_16427, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16428 = torch.constant.int 4096
    %18764 = torch.prim.ListConstruct %18763, %int4096_16428 : (!torch.int, !torch.int) -> !torch.list<int>
    %18765 = torch.aten.view %18704, %18764 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18765, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18766 = torch.aten.mm %18765, %18720 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18766, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16429 = torch.constant.int 4
    %int1792_16430 = torch.constant.int 1792
    %18767 = torch.prim.ListConstruct %int4_16429, %2482, %int1792_16430 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18768 = torch.aten.view %18766, %18767 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18769 = torch.aten.silu %18726 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18770 = torch.aten.silu %18732 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18771 = torch.aten.silu %18738 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18772 = torch.aten.silu %18744 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18773 = torch.aten.silu %18750 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18774 = torch.aten.silu %18756 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18775 = torch.aten.silu %18762 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18776 = torch.aten.silu %18768 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_16431 = torch.constant.int 1
    %int0_16432 = torch.constant.int 0
    %18777 = torch.prim.ListConstruct %int1_16431, %int0_16432 : (!torch.int, !torch.int) -> !torch.list<int>
    %18778 = torch.aten.permute %640, %18777 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16433 = torch.constant.int 1
    %int0_16434 = torch.constant.int 0
    %18779 = torch.prim.ListConstruct %int1_16433, %int0_16434 : (!torch.int, !torch.int) -> !torch.list<int>
    %18780 = torch.aten.permute %641, %18779 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16435 = torch.constant.int 1
    %int0_16436 = torch.constant.int 0
    %18781 = torch.prim.ListConstruct %int1_16435, %int0_16436 : (!torch.int, !torch.int) -> !torch.list<int>
    %18782 = torch.aten.permute %642, %18781 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16437 = torch.constant.int 1
    %int0_16438 = torch.constant.int 0
    %18783 = torch.prim.ListConstruct %int1_16437, %int0_16438 : (!torch.int, !torch.int) -> !torch.list<int>
    %18784 = torch.aten.permute %643, %18783 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16439 = torch.constant.int 1
    %int0_16440 = torch.constant.int 0
    %18785 = torch.prim.ListConstruct %int1_16439, %int0_16440 : (!torch.int, !torch.int) -> !torch.list<int>
    %18786 = torch.aten.permute %644, %18785 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16441 = torch.constant.int 1
    %int0_16442 = torch.constant.int 0
    %18787 = torch.prim.ListConstruct %int1_16441, %int0_16442 : (!torch.int, !torch.int) -> !torch.list<int>
    %18788 = torch.aten.permute %645, %18787 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16443 = torch.constant.int 1
    %int0_16444 = torch.constant.int 0
    %18789 = torch.prim.ListConstruct %int1_16443, %int0_16444 : (!torch.int, !torch.int) -> !torch.list<int>
    %18790 = torch.aten.permute %646, %18789 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_16445 = torch.constant.int 1
    %int0_16446 = torch.constant.int 0
    %18791 = torch.prim.ListConstruct %int1_16445, %int0_16446 : (!torch.int, !torch.int) -> !torch.list<int>
    %18792 = torch.aten.permute %647, %18791 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_16447 = torch.constant.int 4
    %18793 = torch.aten.mul.int %int4_16447, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16448 = torch.constant.int 4096
    %18794 = torch.prim.ListConstruct %18793, %int4096_16448 : (!torch.int, !torch.int) -> !torch.list<int>
    %18795 = torch.aten.view %18697, %18794 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18795, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18796 = torch.aten.mm %18795, %18778 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18796, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16449 = torch.constant.int 4
    %int1792_16450 = torch.constant.int 1792
    %18797 = torch.prim.ListConstruct %int4_16449, %2482, %int1792_16450 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18798 = torch.aten.view %18796, %18797 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16451 = torch.constant.int 4
    %18799 = torch.aten.mul.int %int4_16451, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16452 = torch.constant.int 4096
    %18800 = torch.prim.ListConstruct %18799, %int4096_16452 : (!torch.int, !torch.int) -> !torch.list<int>
    %18801 = torch.aten.view %18698, %18800 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18801, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18802 = torch.aten.mm %18801, %18780 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18802, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16453 = torch.constant.int 4
    %int1792_16454 = torch.constant.int 1792
    %18803 = torch.prim.ListConstruct %int4_16453, %2482, %int1792_16454 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18804 = torch.aten.view %18802, %18803 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16455 = torch.constant.int 4
    %18805 = torch.aten.mul.int %int4_16455, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16456 = torch.constant.int 4096
    %18806 = torch.prim.ListConstruct %18805, %int4096_16456 : (!torch.int, !torch.int) -> !torch.list<int>
    %18807 = torch.aten.view %18699, %18806 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18807, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18808 = torch.aten.mm %18807, %18782 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18808, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16457 = torch.constant.int 4
    %int1792_16458 = torch.constant.int 1792
    %18809 = torch.prim.ListConstruct %int4_16457, %2482, %int1792_16458 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18810 = torch.aten.view %18808, %18809 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16459 = torch.constant.int 4
    %18811 = torch.aten.mul.int %int4_16459, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16460 = torch.constant.int 4096
    %18812 = torch.prim.ListConstruct %18811, %int4096_16460 : (!torch.int, !torch.int) -> !torch.list<int>
    %18813 = torch.aten.view %18700, %18812 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18813, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18814 = torch.aten.mm %18813, %18784 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18814, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16461 = torch.constant.int 4
    %int1792_16462 = torch.constant.int 1792
    %18815 = torch.prim.ListConstruct %int4_16461, %2482, %int1792_16462 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18816 = torch.aten.view %18814, %18815 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16463 = torch.constant.int 4
    %18817 = torch.aten.mul.int %int4_16463, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16464 = torch.constant.int 4096
    %18818 = torch.prim.ListConstruct %18817, %int4096_16464 : (!torch.int, !torch.int) -> !torch.list<int>
    %18819 = torch.aten.view %18701, %18818 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18819, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18820 = torch.aten.mm %18819, %18786 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18820, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16465 = torch.constant.int 4
    %int1792_16466 = torch.constant.int 1792
    %18821 = torch.prim.ListConstruct %int4_16465, %2482, %int1792_16466 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18822 = torch.aten.view %18820, %18821 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16467 = torch.constant.int 4
    %18823 = torch.aten.mul.int %int4_16467, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16468 = torch.constant.int 4096
    %18824 = torch.prim.ListConstruct %18823, %int4096_16468 : (!torch.int, !torch.int) -> !torch.list<int>
    %18825 = torch.aten.view %18702, %18824 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18825, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18826 = torch.aten.mm %18825, %18788 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18826, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16469 = torch.constant.int 4
    %int1792_16470 = torch.constant.int 1792
    %18827 = torch.prim.ListConstruct %int4_16469, %2482, %int1792_16470 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18828 = torch.aten.view %18826, %18827 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16471 = torch.constant.int 4
    %18829 = torch.aten.mul.int %int4_16471, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16472 = torch.constant.int 4096
    %18830 = torch.prim.ListConstruct %18829, %int4096_16472 : (!torch.int, !torch.int) -> !torch.list<int>
    %18831 = torch.aten.view %18703, %18830 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18831, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18832 = torch.aten.mm %18831, %18790 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18832, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16473 = torch.constant.int 4
    %int1792_16474 = torch.constant.int 1792
    %18833 = torch.prim.ListConstruct %int4_16473, %2482, %int1792_16474 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18834 = torch.aten.view %18832, %18833 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_16475 = torch.constant.int 4
    %18835 = torch.aten.mul.int %int4_16475, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16476 = torch.constant.int 4096
    %18836 = torch.prim.ListConstruct %18835, %int4096_16476 : (!torch.int, !torch.int) -> !torch.list<int>
    %18837 = torch.aten.view %18704, %18836 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18837, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %18838 = torch.aten.mm %18837, %18792 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18838, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_16477 = torch.constant.int 4
    %int1792_16478 = torch.constant.int 1792
    %18839 = torch.prim.ListConstruct %int4_16477, %2482, %int1792_16478 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18840 = torch.aten.view %18838, %18839 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18841 = torch.aten.mul.Tensor %18769, %18798 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18842 = torch.aten.mul.Tensor %18770, %18804 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18843 = torch.aten.mul.Tensor %18771, %18810 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18844 = torch.aten.mul.Tensor %18772, %18816 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18845 = torch.aten.mul.Tensor %18773, %18822 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18846 = torch.aten.mul.Tensor %18774, %18828 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18847 = torch.aten.mul.Tensor %18775, %18834 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %18848 = torch.aten.mul.Tensor %18776, %18840 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %18848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_16479 = torch.constant.int 1
    %int0_16480 = torch.constant.int 0
    %18849 = torch.prim.ListConstruct %int1_16479, %int0_16480 : (!torch.int, !torch.int) -> !torch.list<int>
    %18850 = torch.aten.permute %648, %18849 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16481 = torch.constant.int 1
    %int0_16482 = torch.constant.int 0
    %18851 = torch.prim.ListConstruct %int1_16481, %int0_16482 : (!torch.int, !torch.int) -> !torch.list<int>
    %18852 = torch.aten.permute %649, %18851 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16483 = torch.constant.int 1
    %int0_16484 = torch.constant.int 0
    %18853 = torch.prim.ListConstruct %int1_16483, %int0_16484 : (!torch.int, !torch.int) -> !torch.list<int>
    %18854 = torch.aten.permute %650, %18853 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16485 = torch.constant.int 1
    %int0_16486 = torch.constant.int 0
    %18855 = torch.prim.ListConstruct %int1_16485, %int0_16486 : (!torch.int, !torch.int) -> !torch.list<int>
    %18856 = torch.aten.permute %651, %18855 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16487 = torch.constant.int 1
    %int0_16488 = torch.constant.int 0
    %18857 = torch.prim.ListConstruct %int1_16487, %int0_16488 : (!torch.int, !torch.int) -> !torch.list<int>
    %18858 = torch.aten.permute %652, %18857 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16489 = torch.constant.int 1
    %int0_16490 = torch.constant.int 0
    %18859 = torch.prim.ListConstruct %int1_16489, %int0_16490 : (!torch.int, !torch.int) -> !torch.list<int>
    %18860 = torch.aten.permute %653, %18859 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16491 = torch.constant.int 1
    %int0_16492 = torch.constant.int 0
    %18861 = torch.prim.ListConstruct %int1_16491, %int0_16492 : (!torch.int, !torch.int) -> !torch.list<int>
    %18862 = torch.aten.permute %654, %18861 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16493 = torch.constant.int 1
    %int0_16494 = torch.constant.int 0
    %18863 = torch.prim.ListConstruct %int1_16493, %int0_16494 : (!torch.int, !torch.int) -> !torch.list<int>
    %18864 = torch.aten.permute %655, %18863 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_16495 = torch.constant.int 1
    %18865 = torch.aten.size.int %18726, %int1_16495 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16496 = torch.constant.int 4
    %18866 = torch.aten.mul.int %int4_16496, %18865 : !torch.int, !torch.int -> !torch.int
    %int1792_16497 = torch.constant.int 1792
    %18867 = torch.prim.ListConstruct %18866, %int1792_16497 : (!torch.int, !torch.int) -> !torch.list<int>
    %18868 = torch.aten.view %18841, %18867 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18868, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18869 = torch.aten.mm %18868, %18850 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18869, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16498 = torch.constant.int 4
    %int4096_16499 = torch.constant.int 4096
    %18870 = torch.prim.ListConstruct %int4_16498, %18865, %int4096_16499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18871 = torch.aten.view %18869, %18870 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16500 = torch.constant.int 1
    %18872 = torch.aten.size.int %18732, %int1_16500 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16501 = torch.constant.int 4
    %18873 = torch.aten.mul.int %int4_16501, %18872 : !torch.int, !torch.int -> !torch.int
    %int1792_16502 = torch.constant.int 1792
    %18874 = torch.prim.ListConstruct %18873, %int1792_16502 : (!torch.int, !torch.int) -> !torch.list<int>
    %18875 = torch.aten.view %18842, %18874 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18875, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18876 = torch.aten.mm %18875, %18852 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18876, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16503 = torch.constant.int 4
    %int4096_16504 = torch.constant.int 4096
    %18877 = torch.prim.ListConstruct %int4_16503, %18872, %int4096_16504 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18878 = torch.aten.view %18876, %18877 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16505 = torch.constant.int 1
    %18879 = torch.aten.size.int %18738, %int1_16505 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16506 = torch.constant.int 4
    %18880 = torch.aten.mul.int %int4_16506, %18879 : !torch.int, !torch.int -> !torch.int
    %int1792_16507 = torch.constant.int 1792
    %18881 = torch.prim.ListConstruct %18880, %int1792_16507 : (!torch.int, !torch.int) -> !torch.list<int>
    %18882 = torch.aten.view %18843, %18881 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18882, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18883 = torch.aten.mm %18882, %18854 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18883, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16508 = torch.constant.int 4
    %int4096_16509 = torch.constant.int 4096
    %18884 = torch.prim.ListConstruct %int4_16508, %18879, %int4096_16509 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18885 = torch.aten.view %18883, %18884 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16510 = torch.constant.int 1
    %18886 = torch.aten.size.int %18744, %int1_16510 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16511 = torch.constant.int 4
    %18887 = torch.aten.mul.int %int4_16511, %18886 : !torch.int, !torch.int -> !torch.int
    %int1792_16512 = torch.constant.int 1792
    %18888 = torch.prim.ListConstruct %18887, %int1792_16512 : (!torch.int, !torch.int) -> !torch.list<int>
    %18889 = torch.aten.view %18844, %18888 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18889, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18890 = torch.aten.mm %18889, %18856 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18890, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16513 = torch.constant.int 4
    %int4096_16514 = torch.constant.int 4096
    %18891 = torch.prim.ListConstruct %int4_16513, %18886, %int4096_16514 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18892 = torch.aten.view %18890, %18891 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16515 = torch.constant.int 1
    %18893 = torch.aten.size.int %18750, %int1_16515 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16516 = torch.constant.int 4
    %18894 = torch.aten.mul.int %int4_16516, %18893 : !torch.int, !torch.int -> !torch.int
    %int1792_16517 = torch.constant.int 1792
    %18895 = torch.prim.ListConstruct %18894, %int1792_16517 : (!torch.int, !torch.int) -> !torch.list<int>
    %18896 = torch.aten.view %18845, %18895 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18896, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18897 = torch.aten.mm %18896, %18858 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18897, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16518 = torch.constant.int 4
    %int4096_16519 = torch.constant.int 4096
    %18898 = torch.prim.ListConstruct %int4_16518, %18893, %int4096_16519 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18899 = torch.aten.view %18897, %18898 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16520 = torch.constant.int 1
    %18900 = torch.aten.size.int %18756, %int1_16520 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16521 = torch.constant.int 4
    %18901 = torch.aten.mul.int %int4_16521, %18900 : !torch.int, !torch.int -> !torch.int
    %int1792_16522 = torch.constant.int 1792
    %18902 = torch.prim.ListConstruct %18901, %int1792_16522 : (!torch.int, !torch.int) -> !torch.list<int>
    %18903 = torch.aten.view %18846, %18902 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18903, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18904 = torch.aten.mm %18903, %18860 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18904, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16523 = torch.constant.int 4
    %int4096_16524 = torch.constant.int 4096
    %18905 = torch.prim.ListConstruct %int4_16523, %18900, %int4096_16524 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18906 = torch.aten.view %18904, %18905 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16525 = torch.constant.int 1
    %18907 = torch.aten.size.int %18762, %int1_16525 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16526 = torch.constant.int 4
    %18908 = torch.aten.mul.int %int4_16526, %18907 : !torch.int, !torch.int -> !torch.int
    %int1792_16527 = torch.constant.int 1792
    %18909 = torch.prim.ListConstruct %18908, %int1792_16527 : (!torch.int, !torch.int) -> !torch.list<int>
    %18910 = torch.aten.view %18847, %18909 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18910, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18911 = torch.aten.mm %18910, %18862 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18911, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16528 = torch.constant.int 4
    %int4096_16529 = torch.constant.int 4096
    %18912 = torch.prim.ListConstruct %int4_16528, %18907, %int4096_16529 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18913 = torch.aten.view %18911, %18912 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16530 = torch.constant.int 1
    %18914 = torch.aten.size.int %18768, %int1_16530 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_16531 = torch.constant.int 4
    %18915 = torch.aten.mul.int %int4_16531, %18914 : !torch.int, !torch.int -> !torch.int
    %int1792_16532 = torch.constant.int 1792
    %18916 = torch.prim.ListConstruct %18915, %int1792_16532 : (!torch.int, !torch.int) -> !torch.list<int>
    %18917 = torch.aten.view %18848, %18916 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %18917, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %18918 = torch.aten.mm %18917, %18864 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %18918, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_16533 = torch.constant.int 4
    %int4096_16534 = torch.constant.int 4096
    %18919 = torch.prim.ListConstruct %int4_16533, %18914, %int4096_16534 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %18920 = torch.aten.view %18918, %18919 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18921 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16535 = arith.constant 1 : index
    %dim_16536 = tensor.dim %18921, %c1_16535 : tensor<4x?x4096xf16>
    %18922 = flow.tensor.transfer %18921 : tensor<4x?x4096xf16>{%dim_16536} to #hal.device.promise<@__device_0>
    %18923 = torch_c.from_builtin_tensor %18922 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18924 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16537 = arith.constant 1 : index
    %dim_16538 = tensor.dim %18924, %c1_16537 : tensor<4x?x4096xf16>
    %18925 = flow.tensor.transfer %18924 : tensor<4x?x4096xf16>{%dim_16538} to #hal.device.promise<@__device_0>
    %18926 = torch_c.from_builtin_tensor %18925 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18927 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16539 = arith.constant 1 : index
    %dim_16540 = tensor.dim %18927, %c1_16539 : tensor<4x?x4096xf16>
    %18928 = flow.tensor.transfer %18927 : tensor<4x?x4096xf16>{%dim_16540} to #hal.device.promise<@__device_0>
    %18929 = torch_c.from_builtin_tensor %18928 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18930 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16541 = arith.constant 1 : index
    %dim_16542 = tensor.dim %18930, %c1_16541 : tensor<4x?x4096xf16>
    %18931 = flow.tensor.transfer %18930 : tensor<4x?x4096xf16>{%dim_16542} to #hal.device.promise<@__device_0>
    %18932 = torch_c.from_builtin_tensor %18931 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18933 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16543 = arith.constant 1 : index
    %dim_16544 = tensor.dim %18933, %c1_16543 : tensor<4x?x4096xf16>
    %18934 = flow.tensor.transfer %18933 : tensor<4x?x4096xf16>{%dim_16544} to #hal.device.promise<@__device_0>
    %18935 = torch_c.from_builtin_tensor %18934 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18936 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16545 = arith.constant 1 : index
    %dim_16546 = tensor.dim %18936, %c1_16545 : tensor<4x?x4096xf16>
    %18937 = flow.tensor.transfer %18936 : tensor<4x?x4096xf16>{%dim_16546} to #hal.device.promise<@__device_0>
    %18938 = torch_c.from_builtin_tensor %18937 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18939 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16547 = arith.constant 1 : index
    %dim_16548 = tensor.dim %18939, %c1_16547 : tensor<4x?x4096xf16>
    %18940 = flow.tensor.transfer %18939 : tensor<4x?x4096xf16>{%dim_16548} to #hal.device.promise<@__device_0>
    %18941 = torch_c.from_builtin_tensor %18940 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16549 = torch.constant.int 1
    %18942 = torch.aten.add.Tensor %18871, %18923, %int1_16549 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16550 = torch.constant.int 1
    %18943 = torch.aten.add.Tensor %18942, %18926, %int1_16550 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16551 = torch.constant.int 1
    %18944 = torch.aten.add.Tensor %18943, %18929, %int1_16551 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16552 = torch.constant.int 1
    %18945 = torch.aten.add.Tensor %18944, %18932, %int1_16552 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16553 = torch.constant.int 1
    %18946 = torch.aten.add.Tensor %18945, %18935, %int1_16553 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16554 = torch.constant.int 1
    %18947 = torch.aten.add.Tensor %18946, %18938, %int1_16554 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16555 = torch.constant.int 1
    %18948 = torch.aten.add.Tensor %18947, %18941, %int1_16555 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18949 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16556 = arith.constant 1 : index
    %dim_16557 = tensor.dim %18949, %c1_16556 : tensor<4x?x4096xf16>
    %18950 = flow.tensor.transfer %18949 : tensor<4x?x4096xf16>{%dim_16557} to #hal.device.promise<@__device_1>
    %18951 = torch_c.from_builtin_tensor %18950 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18952 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16558 = arith.constant 1 : index
    %dim_16559 = tensor.dim %18952, %c1_16558 : tensor<4x?x4096xf16>
    %18953 = flow.tensor.transfer %18952 : tensor<4x?x4096xf16>{%dim_16559} to #hal.device.promise<@__device_1>
    %18954 = torch_c.from_builtin_tensor %18953 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18955 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16560 = arith.constant 1 : index
    %dim_16561 = tensor.dim %18955, %c1_16560 : tensor<4x?x4096xf16>
    %18956 = flow.tensor.transfer %18955 : tensor<4x?x4096xf16>{%dim_16561} to #hal.device.promise<@__device_1>
    %18957 = torch_c.from_builtin_tensor %18956 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18958 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16562 = arith.constant 1 : index
    %dim_16563 = tensor.dim %18958, %c1_16562 : tensor<4x?x4096xf16>
    %18959 = flow.tensor.transfer %18958 : tensor<4x?x4096xf16>{%dim_16563} to #hal.device.promise<@__device_1>
    %18960 = torch_c.from_builtin_tensor %18959 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18961 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16564 = arith.constant 1 : index
    %dim_16565 = tensor.dim %18961, %c1_16564 : tensor<4x?x4096xf16>
    %18962 = flow.tensor.transfer %18961 : tensor<4x?x4096xf16>{%dim_16565} to #hal.device.promise<@__device_1>
    %18963 = torch_c.from_builtin_tensor %18962 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18964 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16566 = arith.constant 1 : index
    %dim_16567 = tensor.dim %18964, %c1_16566 : tensor<4x?x4096xf16>
    %18965 = flow.tensor.transfer %18964 : tensor<4x?x4096xf16>{%dim_16567} to #hal.device.promise<@__device_1>
    %18966 = torch_c.from_builtin_tensor %18965 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18967 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16568 = arith.constant 1 : index
    %dim_16569 = tensor.dim %18967, %c1_16568 : tensor<4x?x4096xf16>
    %18968 = flow.tensor.transfer %18967 : tensor<4x?x4096xf16>{%dim_16569} to #hal.device.promise<@__device_1>
    %18969 = torch_c.from_builtin_tensor %18968 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16570 = torch.constant.int 1
    %18970 = torch.aten.add.Tensor %18951, %18878, %int1_16570 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16571 = torch.constant.int 1
    %18971 = torch.aten.add.Tensor %18970, %18954, %int1_16571 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16572 = torch.constant.int 1
    %18972 = torch.aten.add.Tensor %18971, %18957, %int1_16572 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16573 = torch.constant.int 1
    %18973 = torch.aten.add.Tensor %18972, %18960, %int1_16573 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16574 = torch.constant.int 1
    %18974 = torch.aten.add.Tensor %18973, %18963, %int1_16574 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16575 = torch.constant.int 1
    %18975 = torch.aten.add.Tensor %18974, %18966, %int1_16575 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16576 = torch.constant.int 1
    %18976 = torch.aten.add.Tensor %18975, %18969, %int1_16576 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18977 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16577 = arith.constant 1 : index
    %dim_16578 = tensor.dim %18977, %c1_16577 : tensor<4x?x4096xf16>
    %18978 = flow.tensor.transfer %18977 : tensor<4x?x4096xf16>{%dim_16578} to #hal.device.promise<@__device_2>
    %18979 = torch_c.from_builtin_tensor %18978 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18980 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16579 = arith.constant 1 : index
    %dim_16580 = tensor.dim %18980, %c1_16579 : tensor<4x?x4096xf16>
    %18981 = flow.tensor.transfer %18980 : tensor<4x?x4096xf16>{%dim_16580} to #hal.device.promise<@__device_2>
    %18982 = torch_c.from_builtin_tensor %18981 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18983 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16581 = arith.constant 1 : index
    %dim_16582 = tensor.dim %18983, %c1_16581 : tensor<4x?x4096xf16>
    %18984 = flow.tensor.transfer %18983 : tensor<4x?x4096xf16>{%dim_16582} to #hal.device.promise<@__device_2>
    %18985 = torch_c.from_builtin_tensor %18984 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18986 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16583 = arith.constant 1 : index
    %dim_16584 = tensor.dim %18986, %c1_16583 : tensor<4x?x4096xf16>
    %18987 = flow.tensor.transfer %18986 : tensor<4x?x4096xf16>{%dim_16584} to #hal.device.promise<@__device_2>
    %18988 = torch_c.from_builtin_tensor %18987 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18989 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16585 = arith.constant 1 : index
    %dim_16586 = tensor.dim %18989, %c1_16585 : tensor<4x?x4096xf16>
    %18990 = flow.tensor.transfer %18989 : tensor<4x?x4096xf16>{%dim_16586} to #hal.device.promise<@__device_2>
    %18991 = torch_c.from_builtin_tensor %18990 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18992 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16587 = arith.constant 1 : index
    %dim_16588 = tensor.dim %18992, %c1_16587 : tensor<4x?x4096xf16>
    %18993 = flow.tensor.transfer %18992 : tensor<4x?x4096xf16>{%dim_16588} to #hal.device.promise<@__device_2>
    %18994 = torch_c.from_builtin_tensor %18993 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %18995 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16589 = arith.constant 1 : index
    %dim_16590 = tensor.dim %18995, %c1_16589 : tensor<4x?x4096xf16>
    %18996 = flow.tensor.transfer %18995 : tensor<4x?x4096xf16>{%dim_16590} to #hal.device.promise<@__device_2>
    %18997 = torch_c.from_builtin_tensor %18996 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16591 = torch.constant.int 1
    %18998 = torch.aten.add.Tensor %18979, %18982, %int1_16591 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16592 = torch.constant.int 1
    %18999 = torch.aten.add.Tensor %18998, %18885, %int1_16592 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %18999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16593 = torch.constant.int 1
    %19000 = torch.aten.add.Tensor %18999, %18985, %int1_16593 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16594 = torch.constant.int 1
    %19001 = torch.aten.add.Tensor %19000, %18988, %int1_16594 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16595 = torch.constant.int 1
    %19002 = torch.aten.add.Tensor %19001, %18991, %int1_16595 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16596 = torch.constant.int 1
    %19003 = torch.aten.add.Tensor %19002, %18994, %int1_16596 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16597 = torch.constant.int 1
    %19004 = torch.aten.add.Tensor %19003, %18997, %int1_16597 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19005 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16598 = arith.constant 1 : index
    %dim_16599 = tensor.dim %19005, %c1_16598 : tensor<4x?x4096xf16>
    %19006 = flow.tensor.transfer %19005 : tensor<4x?x4096xf16>{%dim_16599} to #hal.device.promise<@__device_3>
    %19007 = torch_c.from_builtin_tensor %19006 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19008 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16600 = arith.constant 1 : index
    %dim_16601 = tensor.dim %19008, %c1_16600 : tensor<4x?x4096xf16>
    %19009 = flow.tensor.transfer %19008 : tensor<4x?x4096xf16>{%dim_16601} to #hal.device.promise<@__device_3>
    %19010 = torch_c.from_builtin_tensor %19009 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19011 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16602 = arith.constant 1 : index
    %dim_16603 = tensor.dim %19011, %c1_16602 : tensor<4x?x4096xf16>
    %19012 = flow.tensor.transfer %19011 : tensor<4x?x4096xf16>{%dim_16603} to #hal.device.promise<@__device_3>
    %19013 = torch_c.from_builtin_tensor %19012 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19014 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16604 = arith.constant 1 : index
    %dim_16605 = tensor.dim %19014, %c1_16604 : tensor<4x?x4096xf16>
    %19015 = flow.tensor.transfer %19014 : tensor<4x?x4096xf16>{%dim_16605} to #hal.device.promise<@__device_3>
    %19016 = torch_c.from_builtin_tensor %19015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19017 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16606 = arith.constant 1 : index
    %dim_16607 = tensor.dim %19017, %c1_16606 : tensor<4x?x4096xf16>
    %19018 = flow.tensor.transfer %19017 : tensor<4x?x4096xf16>{%dim_16607} to #hal.device.promise<@__device_3>
    %19019 = torch_c.from_builtin_tensor %19018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19020 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16608 = arith.constant 1 : index
    %dim_16609 = tensor.dim %19020, %c1_16608 : tensor<4x?x4096xf16>
    %19021 = flow.tensor.transfer %19020 : tensor<4x?x4096xf16>{%dim_16609} to #hal.device.promise<@__device_3>
    %19022 = torch_c.from_builtin_tensor %19021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19023 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16610 = arith.constant 1 : index
    %dim_16611 = tensor.dim %19023, %c1_16610 : tensor<4x?x4096xf16>
    %19024 = flow.tensor.transfer %19023 : tensor<4x?x4096xf16>{%dim_16611} to #hal.device.promise<@__device_3>
    %19025 = torch_c.from_builtin_tensor %19024 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16612 = torch.constant.int 1
    %19026 = torch.aten.add.Tensor %19007, %19010, %int1_16612 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16613 = torch.constant.int 1
    %19027 = torch.aten.add.Tensor %19026, %19013, %int1_16613 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16614 = torch.constant.int 1
    %19028 = torch.aten.add.Tensor %19027, %18892, %int1_16614 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16615 = torch.constant.int 1
    %19029 = torch.aten.add.Tensor %19028, %19016, %int1_16615 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16616 = torch.constant.int 1
    %19030 = torch.aten.add.Tensor %19029, %19019, %int1_16616 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16617 = torch.constant.int 1
    %19031 = torch.aten.add.Tensor %19030, %19022, %int1_16617 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16618 = torch.constant.int 1
    %19032 = torch.aten.add.Tensor %19031, %19025, %int1_16618 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19033 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16619 = arith.constant 1 : index
    %dim_16620 = tensor.dim %19033, %c1_16619 : tensor<4x?x4096xf16>
    %19034 = flow.tensor.transfer %19033 : tensor<4x?x4096xf16>{%dim_16620} to #hal.device.promise<@__device_4>
    %19035 = torch_c.from_builtin_tensor %19034 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19036 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16621 = arith.constant 1 : index
    %dim_16622 = tensor.dim %19036, %c1_16621 : tensor<4x?x4096xf16>
    %19037 = flow.tensor.transfer %19036 : tensor<4x?x4096xf16>{%dim_16622} to #hal.device.promise<@__device_4>
    %19038 = torch_c.from_builtin_tensor %19037 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19039 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16623 = arith.constant 1 : index
    %dim_16624 = tensor.dim %19039, %c1_16623 : tensor<4x?x4096xf16>
    %19040 = flow.tensor.transfer %19039 : tensor<4x?x4096xf16>{%dim_16624} to #hal.device.promise<@__device_4>
    %19041 = torch_c.from_builtin_tensor %19040 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19042 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16625 = arith.constant 1 : index
    %dim_16626 = tensor.dim %19042, %c1_16625 : tensor<4x?x4096xf16>
    %19043 = flow.tensor.transfer %19042 : tensor<4x?x4096xf16>{%dim_16626} to #hal.device.promise<@__device_4>
    %19044 = torch_c.from_builtin_tensor %19043 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19045 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16627 = arith.constant 1 : index
    %dim_16628 = tensor.dim %19045, %c1_16627 : tensor<4x?x4096xf16>
    %19046 = flow.tensor.transfer %19045 : tensor<4x?x4096xf16>{%dim_16628} to #hal.device.promise<@__device_4>
    %19047 = torch_c.from_builtin_tensor %19046 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19048 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16629 = arith.constant 1 : index
    %dim_16630 = tensor.dim %19048, %c1_16629 : tensor<4x?x4096xf16>
    %19049 = flow.tensor.transfer %19048 : tensor<4x?x4096xf16>{%dim_16630} to #hal.device.promise<@__device_4>
    %19050 = torch_c.from_builtin_tensor %19049 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19051 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16631 = arith.constant 1 : index
    %dim_16632 = tensor.dim %19051, %c1_16631 : tensor<4x?x4096xf16>
    %19052 = flow.tensor.transfer %19051 : tensor<4x?x4096xf16>{%dim_16632} to #hal.device.promise<@__device_4>
    %19053 = torch_c.from_builtin_tensor %19052 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16633 = torch.constant.int 1
    %19054 = torch.aten.add.Tensor %19035, %19038, %int1_16633 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16634 = torch.constant.int 1
    %19055 = torch.aten.add.Tensor %19054, %19041, %int1_16634 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16635 = torch.constant.int 1
    %19056 = torch.aten.add.Tensor %19055, %19044, %int1_16635 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16636 = torch.constant.int 1
    %19057 = torch.aten.add.Tensor %19056, %18899, %int1_16636 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16637 = torch.constant.int 1
    %19058 = torch.aten.add.Tensor %19057, %19047, %int1_16637 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16638 = torch.constant.int 1
    %19059 = torch.aten.add.Tensor %19058, %19050, %int1_16638 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16639 = torch.constant.int 1
    %19060 = torch.aten.add.Tensor %19059, %19053, %int1_16639 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19061 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16640 = arith.constant 1 : index
    %dim_16641 = tensor.dim %19061, %c1_16640 : tensor<4x?x4096xf16>
    %19062 = flow.tensor.transfer %19061 : tensor<4x?x4096xf16>{%dim_16641} to #hal.device.promise<@__device_5>
    %19063 = torch_c.from_builtin_tensor %19062 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19064 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16642 = arith.constant 1 : index
    %dim_16643 = tensor.dim %19064, %c1_16642 : tensor<4x?x4096xf16>
    %19065 = flow.tensor.transfer %19064 : tensor<4x?x4096xf16>{%dim_16643} to #hal.device.promise<@__device_5>
    %19066 = torch_c.from_builtin_tensor %19065 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19067 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16644 = arith.constant 1 : index
    %dim_16645 = tensor.dim %19067, %c1_16644 : tensor<4x?x4096xf16>
    %19068 = flow.tensor.transfer %19067 : tensor<4x?x4096xf16>{%dim_16645} to #hal.device.promise<@__device_5>
    %19069 = torch_c.from_builtin_tensor %19068 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19070 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16646 = arith.constant 1 : index
    %dim_16647 = tensor.dim %19070, %c1_16646 : tensor<4x?x4096xf16>
    %19071 = flow.tensor.transfer %19070 : tensor<4x?x4096xf16>{%dim_16647} to #hal.device.promise<@__device_5>
    %19072 = torch_c.from_builtin_tensor %19071 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19073 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16648 = arith.constant 1 : index
    %dim_16649 = tensor.dim %19073, %c1_16648 : tensor<4x?x4096xf16>
    %19074 = flow.tensor.transfer %19073 : tensor<4x?x4096xf16>{%dim_16649} to #hal.device.promise<@__device_5>
    %19075 = torch_c.from_builtin_tensor %19074 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19076 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16650 = arith.constant 1 : index
    %dim_16651 = tensor.dim %19076, %c1_16650 : tensor<4x?x4096xf16>
    %19077 = flow.tensor.transfer %19076 : tensor<4x?x4096xf16>{%dim_16651} to #hal.device.promise<@__device_5>
    %19078 = torch_c.from_builtin_tensor %19077 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19079 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16652 = arith.constant 1 : index
    %dim_16653 = tensor.dim %19079, %c1_16652 : tensor<4x?x4096xf16>
    %19080 = flow.tensor.transfer %19079 : tensor<4x?x4096xf16>{%dim_16653} to #hal.device.promise<@__device_5>
    %19081 = torch_c.from_builtin_tensor %19080 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16654 = torch.constant.int 1
    %19082 = torch.aten.add.Tensor %19063, %19066, %int1_16654 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16655 = torch.constant.int 1
    %19083 = torch.aten.add.Tensor %19082, %19069, %int1_16655 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16656 = torch.constant.int 1
    %19084 = torch.aten.add.Tensor %19083, %19072, %int1_16656 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16657 = torch.constant.int 1
    %19085 = torch.aten.add.Tensor %19084, %19075, %int1_16657 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16658 = torch.constant.int 1
    %19086 = torch.aten.add.Tensor %19085, %18906, %int1_16658 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16659 = torch.constant.int 1
    %19087 = torch.aten.add.Tensor %19086, %19078, %int1_16659 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16660 = torch.constant.int 1
    %19088 = torch.aten.add.Tensor %19087, %19081, %int1_16660 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19089 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16661 = arith.constant 1 : index
    %dim_16662 = tensor.dim %19089, %c1_16661 : tensor<4x?x4096xf16>
    %19090 = flow.tensor.transfer %19089 : tensor<4x?x4096xf16>{%dim_16662} to #hal.device.promise<@__device_6>
    %19091 = torch_c.from_builtin_tensor %19090 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19092 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16663 = arith.constant 1 : index
    %dim_16664 = tensor.dim %19092, %c1_16663 : tensor<4x?x4096xf16>
    %19093 = flow.tensor.transfer %19092 : tensor<4x?x4096xf16>{%dim_16664} to #hal.device.promise<@__device_6>
    %19094 = torch_c.from_builtin_tensor %19093 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19095 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16665 = arith.constant 1 : index
    %dim_16666 = tensor.dim %19095, %c1_16665 : tensor<4x?x4096xf16>
    %19096 = flow.tensor.transfer %19095 : tensor<4x?x4096xf16>{%dim_16666} to #hal.device.promise<@__device_6>
    %19097 = torch_c.from_builtin_tensor %19096 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19098 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16667 = arith.constant 1 : index
    %dim_16668 = tensor.dim %19098, %c1_16667 : tensor<4x?x4096xf16>
    %19099 = flow.tensor.transfer %19098 : tensor<4x?x4096xf16>{%dim_16668} to #hal.device.promise<@__device_6>
    %19100 = torch_c.from_builtin_tensor %19099 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19101 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16669 = arith.constant 1 : index
    %dim_16670 = tensor.dim %19101, %c1_16669 : tensor<4x?x4096xf16>
    %19102 = flow.tensor.transfer %19101 : tensor<4x?x4096xf16>{%dim_16670} to #hal.device.promise<@__device_6>
    %19103 = torch_c.from_builtin_tensor %19102 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19104 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16671 = arith.constant 1 : index
    %dim_16672 = tensor.dim %19104, %c1_16671 : tensor<4x?x4096xf16>
    %19105 = flow.tensor.transfer %19104 : tensor<4x?x4096xf16>{%dim_16672} to #hal.device.promise<@__device_6>
    %19106 = torch_c.from_builtin_tensor %19105 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19107 = torch_c.to_builtin_tensor %18920 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16673 = arith.constant 1 : index
    %dim_16674 = tensor.dim %19107, %c1_16673 : tensor<4x?x4096xf16>
    %19108 = flow.tensor.transfer %19107 : tensor<4x?x4096xf16>{%dim_16674} to #hal.device.promise<@__device_6>
    %19109 = torch_c.from_builtin_tensor %19108 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16675 = torch.constant.int 1
    %19110 = torch.aten.add.Tensor %19091, %19094, %int1_16675 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16676 = torch.constant.int 1
    %19111 = torch.aten.add.Tensor %19110, %19097, %int1_16676 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16677 = torch.constant.int 1
    %19112 = torch.aten.add.Tensor %19111, %19100, %int1_16677 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16678 = torch.constant.int 1
    %19113 = torch.aten.add.Tensor %19112, %19103, %int1_16678 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16679 = torch.constant.int 1
    %19114 = torch.aten.add.Tensor %19113, %19106, %int1_16679 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16680 = torch.constant.int 1
    %19115 = torch.aten.add.Tensor %19114, %18913, %int1_16680 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16681 = torch.constant.int 1
    %19116 = torch.aten.add.Tensor %19115, %19109, %int1_16681 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19117 = torch_c.to_builtin_tensor %18871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16682 = arith.constant 1 : index
    %dim_16683 = tensor.dim %19117, %c1_16682 : tensor<4x?x4096xf16>
    %19118 = flow.tensor.transfer %19117 : tensor<4x?x4096xf16>{%dim_16683} to #hal.device.promise<@__device_7>
    %19119 = torch_c.from_builtin_tensor %19118 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19120 = torch_c.to_builtin_tensor %18878 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16684 = arith.constant 1 : index
    %dim_16685 = tensor.dim %19120, %c1_16684 : tensor<4x?x4096xf16>
    %19121 = flow.tensor.transfer %19120 : tensor<4x?x4096xf16>{%dim_16685} to #hal.device.promise<@__device_7>
    %19122 = torch_c.from_builtin_tensor %19121 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19123 = torch_c.to_builtin_tensor %18885 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16686 = arith.constant 1 : index
    %dim_16687 = tensor.dim %19123, %c1_16686 : tensor<4x?x4096xf16>
    %19124 = flow.tensor.transfer %19123 : tensor<4x?x4096xf16>{%dim_16687} to #hal.device.promise<@__device_7>
    %19125 = torch_c.from_builtin_tensor %19124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19126 = torch_c.to_builtin_tensor %18892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16688 = arith.constant 1 : index
    %dim_16689 = tensor.dim %19126, %c1_16688 : tensor<4x?x4096xf16>
    %19127 = flow.tensor.transfer %19126 : tensor<4x?x4096xf16>{%dim_16689} to #hal.device.promise<@__device_7>
    %19128 = torch_c.from_builtin_tensor %19127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19129 = torch_c.to_builtin_tensor %18899 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16690 = arith.constant 1 : index
    %dim_16691 = tensor.dim %19129, %c1_16690 : tensor<4x?x4096xf16>
    %19130 = flow.tensor.transfer %19129 : tensor<4x?x4096xf16>{%dim_16691} to #hal.device.promise<@__device_7>
    %19131 = torch_c.from_builtin_tensor %19130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19132 = torch_c.to_builtin_tensor %18906 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16692 = arith.constant 1 : index
    %dim_16693 = tensor.dim %19132, %c1_16692 : tensor<4x?x4096xf16>
    %19133 = flow.tensor.transfer %19132 : tensor<4x?x4096xf16>{%dim_16693} to #hal.device.promise<@__device_7>
    %19134 = torch_c.from_builtin_tensor %19133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %19135 = torch_c.to_builtin_tensor %18913 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_16694 = arith.constant 1 : index
    %dim_16695 = tensor.dim %19135, %c1_16694 : tensor<4x?x4096xf16>
    %19136 = flow.tensor.transfer %19135 : tensor<4x?x4096xf16>{%dim_16695} to #hal.device.promise<@__device_7>
    %19137 = torch_c.from_builtin_tensor %19136 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16696 = torch.constant.int 1
    %19138 = torch.aten.add.Tensor %19119, %19122, %int1_16696 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16697 = torch.constant.int 1
    %19139 = torch.aten.add.Tensor %19138, %19125, %int1_16697 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16698 = torch.constant.int 1
    %19140 = torch.aten.add.Tensor %19139, %19128, %int1_16698 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16699 = torch.constant.int 1
    %19141 = torch.aten.add.Tensor %19140, %19131, %int1_16699 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16700 = torch.constant.int 1
    %19142 = torch.aten.add.Tensor %19141, %19134, %int1_16700 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16701 = torch.constant.int 1
    %19143 = torch.aten.add.Tensor %19142, %19137, %int1_16701 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16702 = torch.constant.int 1
    %19144 = torch.aten.add.Tensor %19143, %18920, %int1_16702 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16703 = torch.constant.int 1
    %19145 = torch.aten.add.Tensor %18625, %18948, %int1_16703 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16704 = torch.constant.int 1
    %19146 = torch.aten.add.Tensor %18626, %18976, %int1_16704 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16705 = torch.constant.int 1
    %19147 = torch.aten.add.Tensor %18627, %19004, %int1_16705 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16706 = torch.constant.int 1
    %19148 = torch.aten.add.Tensor %18628, %19032, %int1_16706 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16707 = torch.constant.int 1
    %19149 = torch.aten.add.Tensor %18629, %19060, %int1_16707 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16708 = torch.constant.int 1
    %19150 = torch.aten.add.Tensor %18630, %19088, %int1_16708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16709 = torch.constant.int 1
    %19151 = torch.aten.add.Tensor %18631, %19116, %int1_16709 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16710 = torch.constant.int 1
    %19152 = torch.aten.add.Tensor %18632, %19144, %int1_16710 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_16711 = torch.constant.int 6
    %19153 = torch.prims.convert_element_type %19145, %int6_16711 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16712 = torch.constant.int 6
    %19154 = torch.prims.convert_element_type %19146, %int6_16712 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16713 = torch.constant.int 6
    %19155 = torch.prims.convert_element_type %19147, %int6_16713 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16714 = torch.constant.int 6
    %19156 = torch.prims.convert_element_type %19148, %int6_16714 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16715 = torch.constant.int 6
    %19157 = torch.prims.convert_element_type %19149, %int6_16715 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16716 = torch.constant.int 6
    %19158 = torch.prims.convert_element_type %19150, %int6_16716 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16717 = torch.constant.int 6
    %19159 = torch.prims.convert_element_type %19151, %int6_16717 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_16718 = torch.constant.int 6
    %19160 = torch.prims.convert_element_type %19152, %int6_16718 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16719 = torch.constant.int 2
    %19161 = torch.aten.pow.Tensor_Scalar %19153, %int2_16719 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16720 = torch.constant.int 2
    %19162 = torch.aten.pow.Tensor_Scalar %19154, %int2_16720 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16721 = torch.constant.int 2
    %19163 = torch.aten.pow.Tensor_Scalar %19155, %int2_16721 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16722 = torch.constant.int 2
    %19164 = torch.aten.pow.Tensor_Scalar %19156, %int2_16722 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16723 = torch.constant.int 2
    %19165 = torch.aten.pow.Tensor_Scalar %19157, %int2_16723 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16724 = torch.constant.int 2
    %19166 = torch.aten.pow.Tensor_Scalar %19158, %int2_16724 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16725 = torch.constant.int 2
    %19167 = torch.aten.pow.Tensor_Scalar %19159, %int2_16725 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_16726 = torch.constant.int 2
    %19168 = torch.aten.pow.Tensor_Scalar %19160, %int2_16726 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_16727 = torch.constant.int -1
    %19169 = torch.prim.ListConstruct %int-1_16727 : (!torch.int) -> !torch.list<int>
    %true_16728 = torch.constant.bool true
    %none_16729 = torch.constant.none
    %19170 = torch.aten.mean.dim %19161, %19169, %true_16728, %none_16729 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16730 = torch.constant.int -1
    %19171 = torch.prim.ListConstruct %int-1_16730 : (!torch.int) -> !torch.list<int>
    %true_16731 = torch.constant.bool true
    %none_16732 = torch.constant.none
    %19172 = torch.aten.mean.dim %19162, %19171, %true_16731, %none_16732 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16733 = torch.constant.int -1
    %19173 = torch.prim.ListConstruct %int-1_16733 : (!torch.int) -> !torch.list<int>
    %true_16734 = torch.constant.bool true
    %none_16735 = torch.constant.none
    %19174 = torch.aten.mean.dim %19163, %19173, %true_16734, %none_16735 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16736 = torch.constant.int -1
    %19175 = torch.prim.ListConstruct %int-1_16736 : (!torch.int) -> !torch.list<int>
    %true_16737 = torch.constant.bool true
    %none_16738 = torch.constant.none
    %19176 = torch.aten.mean.dim %19164, %19175, %true_16737, %none_16738 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16739 = torch.constant.int -1
    %19177 = torch.prim.ListConstruct %int-1_16739 : (!torch.int) -> !torch.list<int>
    %true_16740 = torch.constant.bool true
    %none_16741 = torch.constant.none
    %19178 = torch.aten.mean.dim %19165, %19177, %true_16740, %none_16741 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16742 = torch.constant.int -1
    %19179 = torch.prim.ListConstruct %int-1_16742 : (!torch.int) -> !torch.list<int>
    %true_16743 = torch.constant.bool true
    %none_16744 = torch.constant.none
    %19180 = torch.aten.mean.dim %19166, %19179, %true_16743, %none_16744 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16745 = torch.constant.int -1
    %19181 = torch.prim.ListConstruct %int-1_16745 : (!torch.int) -> !torch.list<int>
    %true_16746 = torch.constant.bool true
    %none_16747 = torch.constant.none
    %19182 = torch.aten.mean.dim %19167, %19181, %true_16746, %none_16747 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_16748 = torch.constant.int -1
    %19183 = torch.prim.ListConstruct %int-1_16748 : (!torch.int) -> !torch.list<int>
    %true_16749 = torch.constant.bool true
    %none_16750 = torch.constant.none
    %19184 = torch.aten.mean.dim %19168, %19183, %true_16749, %none_16750 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16751 = torch.constant.float 9.9999997473787516E-6
    %int1_16752 = torch.constant.int 1
    %19185 = torch.aten.add.Scalar %19170, %float9.999990e-06_16751, %int1_16752 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16753 = torch.constant.float 9.9999997473787516E-6
    %int1_16754 = torch.constant.int 1
    %19186 = torch.aten.add.Scalar %19172, %float9.999990e-06_16753, %int1_16754 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16755 = torch.constant.float 9.9999997473787516E-6
    %int1_16756 = torch.constant.int 1
    %19187 = torch.aten.add.Scalar %19174, %float9.999990e-06_16755, %int1_16756 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16757 = torch.constant.float 9.9999997473787516E-6
    %int1_16758 = torch.constant.int 1
    %19188 = torch.aten.add.Scalar %19176, %float9.999990e-06_16757, %int1_16758 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16759 = torch.constant.float 9.9999997473787516E-6
    %int1_16760 = torch.constant.int 1
    %19189 = torch.aten.add.Scalar %19178, %float9.999990e-06_16759, %int1_16760 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16761 = torch.constant.float 9.9999997473787516E-6
    %int1_16762 = torch.constant.int 1
    %19190 = torch.aten.add.Scalar %19180, %float9.999990e-06_16761, %int1_16762 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16763 = torch.constant.float 9.9999997473787516E-6
    %int1_16764 = torch.constant.int 1
    %19191 = torch.aten.add.Scalar %19182, %float9.999990e-06_16763, %int1_16764 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_16765 = torch.constant.float 9.9999997473787516E-6
    %int1_16766 = torch.constant.int 1
    %19192 = torch.aten.add.Scalar %19184, %float9.999990e-06_16765, %int1_16766 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19193 = torch.aten.rsqrt %19185 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19194 = torch.aten.rsqrt %19186 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19195 = torch.aten.rsqrt %19187 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19196 = torch.aten.rsqrt %19188 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19197 = torch.aten.rsqrt %19189 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19198 = torch.aten.rsqrt %19190 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19199 = torch.aten.rsqrt %19191 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19200 = torch.aten.rsqrt %19192 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %19200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %19201 = torch.aten.mul.Tensor %19153, %19193 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19202 = torch.aten.mul.Tensor %19154, %19194 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19203 = torch.aten.mul.Tensor %19155, %19195 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19204 = torch.aten.mul.Tensor %19156, %19196 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19205 = torch.aten.mul.Tensor %19157, %19197 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19206 = torch.aten.mul.Tensor %19158, %19198 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19207 = torch.aten.mul.Tensor %19159, %19199 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19208 = torch.aten.mul.Tensor %19160, %19200 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19209 = torch.aten.mul.Tensor %656, %19201 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19210 = torch.aten.mul.Tensor %657, %19202 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19211 = torch.aten.mul.Tensor %658, %19203 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19212 = torch.aten.mul.Tensor %659, %19204 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19213 = torch.aten.mul.Tensor %660, %19205 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19214 = torch.aten.mul.Tensor %661, %19206 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19215 = torch.aten.mul.Tensor %662, %19207 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %19216 = torch.aten.mul.Tensor %663, %19208 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %19216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_16767 = torch.constant.int 5
    %19217 = torch.prims.convert_element_type %19209, %int5_16767 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16768 = torch.constant.int 5
    %19218 = torch.prims.convert_element_type %19210, %int5_16768 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16769 = torch.constant.int 5
    %19219 = torch.prims.convert_element_type %19211, %int5_16769 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16770 = torch.constant.int 5
    %19220 = torch.prims.convert_element_type %19212, %int5_16770 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16771 = torch.constant.int 5
    %19221 = torch.prims.convert_element_type %19213, %int5_16771 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16772 = torch.constant.int 5
    %19222 = torch.prims.convert_element_type %19214, %int5_16772 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16773 = torch.constant.int 5
    %19223 = torch.prims.convert_element_type %19215, %int5_16773 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_16774 = torch.constant.int 5
    %19224 = torch.prims.convert_element_type %19216, %int5_16774 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %19224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_16775 = torch.constant.int 1
    %int0_16776 = torch.constant.int 0
    %19225 = torch.prim.ListConstruct %int1_16775, %int0_16776 : (!torch.int, !torch.int) -> !torch.list<int>
    %19226 = torch.aten.permute %664, %19225 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16777 = torch.constant.int 1
    %int0_16778 = torch.constant.int 0
    %19227 = torch.prim.ListConstruct %int1_16777, %int0_16778 : (!torch.int, !torch.int) -> !torch.list<int>
    %19228 = torch.aten.permute %665, %19227 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16779 = torch.constant.int 1
    %int0_16780 = torch.constant.int 0
    %19229 = torch.prim.ListConstruct %int1_16779, %int0_16780 : (!torch.int, !torch.int) -> !torch.list<int>
    %19230 = torch.aten.permute %666, %19229 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16781 = torch.constant.int 1
    %int0_16782 = torch.constant.int 0
    %19231 = torch.prim.ListConstruct %int1_16781, %int0_16782 : (!torch.int, !torch.int) -> !torch.list<int>
    %19232 = torch.aten.permute %667, %19231 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16783 = torch.constant.int 1
    %int0_16784 = torch.constant.int 0
    %19233 = torch.prim.ListConstruct %int1_16783, %int0_16784 : (!torch.int, !torch.int) -> !torch.list<int>
    %19234 = torch.aten.permute %668, %19233 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16785 = torch.constant.int 1
    %int0_16786 = torch.constant.int 0
    %19235 = torch.prim.ListConstruct %int1_16785, %int0_16786 : (!torch.int, !torch.int) -> !torch.list<int>
    %19236 = torch.aten.permute %669, %19235 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16787 = torch.constant.int 1
    %int0_16788 = torch.constant.int 0
    %19237 = torch.prim.ListConstruct %int1_16787, %int0_16788 : (!torch.int, !torch.int) -> !torch.list<int>
    %19238 = torch.aten.permute %670, %19237 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_16789 = torch.constant.int 1
    %int0_16790 = torch.constant.int 0
    %19239 = torch.prim.ListConstruct %int1_16789, %int0_16790 : (!torch.int, !torch.int) -> !torch.list<int>
    %19240 = torch.aten.permute %671, %19239 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_16791 = torch.constant.int 4
    %19241 = torch.aten.mul.int %int4_16791, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16792 = torch.constant.int 4096
    %19242 = torch.prim.ListConstruct %19241, %int4096_16792 : (!torch.int, !torch.int) -> !torch.list<int>
    %19243 = torch.aten.view %19217, %19242 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19243, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19244 = torch.aten.mm %19243, %19226 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19244, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16793 = torch.constant.int 4
    %int512_16794 = torch.constant.int 512
    %19245 = torch.prim.ListConstruct %int4_16793, %2482, %int512_16794 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19246 = torch.aten.view %19244, %19245 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16795 = torch.constant.int 4
    %19247 = torch.aten.mul.int %int4_16795, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16796 = torch.constant.int 4096
    %19248 = torch.prim.ListConstruct %19247, %int4096_16796 : (!torch.int, !torch.int) -> !torch.list<int>
    %19249 = torch.aten.view %19218, %19248 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19249, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19250 = torch.aten.mm %19249, %19228 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19250, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16797 = torch.constant.int 4
    %int512_16798 = torch.constant.int 512
    %19251 = torch.prim.ListConstruct %int4_16797, %2482, %int512_16798 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19252 = torch.aten.view %19250, %19251 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16799 = torch.constant.int 4
    %19253 = torch.aten.mul.int %int4_16799, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16800 = torch.constant.int 4096
    %19254 = torch.prim.ListConstruct %19253, %int4096_16800 : (!torch.int, !torch.int) -> !torch.list<int>
    %19255 = torch.aten.view %19219, %19254 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19255, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19256 = torch.aten.mm %19255, %19230 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19256, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16801 = torch.constant.int 4
    %int512_16802 = torch.constant.int 512
    %19257 = torch.prim.ListConstruct %int4_16801, %2482, %int512_16802 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19258 = torch.aten.view %19256, %19257 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16803 = torch.constant.int 4
    %19259 = torch.aten.mul.int %int4_16803, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16804 = torch.constant.int 4096
    %19260 = torch.prim.ListConstruct %19259, %int4096_16804 : (!torch.int, !torch.int) -> !torch.list<int>
    %19261 = torch.aten.view %19220, %19260 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19261, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19262 = torch.aten.mm %19261, %19232 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19262, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16805 = torch.constant.int 4
    %int512_16806 = torch.constant.int 512
    %19263 = torch.prim.ListConstruct %int4_16805, %2482, %int512_16806 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19264 = torch.aten.view %19262, %19263 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16807 = torch.constant.int 4
    %19265 = torch.aten.mul.int %int4_16807, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16808 = torch.constant.int 4096
    %19266 = torch.prim.ListConstruct %19265, %int4096_16808 : (!torch.int, !torch.int) -> !torch.list<int>
    %19267 = torch.aten.view %19221, %19266 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19267, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19268 = torch.aten.mm %19267, %19234 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19268, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16809 = torch.constant.int 4
    %int512_16810 = torch.constant.int 512
    %19269 = torch.prim.ListConstruct %int4_16809, %2482, %int512_16810 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19270 = torch.aten.view %19268, %19269 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16811 = torch.constant.int 4
    %19271 = torch.aten.mul.int %int4_16811, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16812 = torch.constant.int 4096
    %19272 = torch.prim.ListConstruct %19271, %int4096_16812 : (!torch.int, !torch.int) -> !torch.list<int>
    %19273 = torch.aten.view %19222, %19272 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19273, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19274 = torch.aten.mm %19273, %19236 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19274, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16813 = torch.constant.int 4
    %int512_16814 = torch.constant.int 512
    %19275 = torch.prim.ListConstruct %int4_16813, %2482, %int512_16814 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19276 = torch.aten.view %19274, %19275 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16815 = torch.constant.int 4
    %19277 = torch.aten.mul.int %int4_16815, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16816 = torch.constant.int 4096
    %19278 = torch.prim.ListConstruct %19277, %int4096_16816 : (!torch.int, !torch.int) -> !torch.list<int>
    %19279 = torch.aten.view %19223, %19278 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19279, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19280 = torch.aten.mm %19279, %19238 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19280, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16817 = torch.constant.int 4
    %int512_16818 = torch.constant.int 512
    %19281 = torch.prim.ListConstruct %int4_16817, %2482, %int512_16818 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19282 = torch.aten.view %19280, %19281 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_16819 = torch.constant.int 4
    %19283 = torch.aten.mul.int %int4_16819, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16820 = torch.constant.int 4096
    %19284 = torch.prim.ListConstruct %19283, %int4096_16820 : (!torch.int, !torch.int) -> !torch.list<int>
    %19285 = torch.aten.view %19224, %19284 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19285, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19286 = torch.aten.mm %19285, %19240 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %19286, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_16821 = torch.constant.int 4
    %int512_16822 = torch.constant.int 512
    %19287 = torch.prim.ListConstruct %int4_16821, %2482, %int512_16822 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19288 = torch.aten.view %19286, %19287 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %19288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_16823 = torch.constant.int 1
    %int0_16824 = torch.constant.int 0
    %19289 = torch.prim.ListConstruct %int1_16823, %int0_16824 : (!torch.int, !torch.int) -> !torch.list<int>
    %19290 = torch.aten.permute %672, %19289 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16825 = torch.constant.int 1
    %int0_16826 = torch.constant.int 0
    %19291 = torch.prim.ListConstruct %int1_16825, %int0_16826 : (!torch.int, !torch.int) -> !torch.list<int>
    %19292 = torch.aten.permute %673, %19291 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16827 = torch.constant.int 1
    %int0_16828 = torch.constant.int 0
    %19293 = torch.prim.ListConstruct %int1_16827, %int0_16828 : (!torch.int, !torch.int) -> !torch.list<int>
    %19294 = torch.aten.permute %674, %19293 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16829 = torch.constant.int 1
    %int0_16830 = torch.constant.int 0
    %19295 = torch.prim.ListConstruct %int1_16829, %int0_16830 : (!torch.int, !torch.int) -> !torch.list<int>
    %19296 = torch.aten.permute %675, %19295 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16831 = torch.constant.int 1
    %int0_16832 = torch.constant.int 0
    %19297 = torch.prim.ListConstruct %int1_16831, %int0_16832 : (!torch.int, !torch.int) -> !torch.list<int>
    %19298 = torch.aten.permute %676, %19297 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16833 = torch.constant.int 1
    %int0_16834 = torch.constant.int 0
    %19299 = torch.prim.ListConstruct %int1_16833, %int0_16834 : (!torch.int, !torch.int) -> !torch.list<int>
    %19300 = torch.aten.permute %677, %19299 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16835 = torch.constant.int 1
    %int0_16836 = torch.constant.int 0
    %19301 = torch.prim.ListConstruct %int1_16835, %int0_16836 : (!torch.int, !torch.int) -> !torch.list<int>
    %19302 = torch.aten.permute %678, %19301 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16837 = torch.constant.int 1
    %int0_16838 = torch.constant.int 0
    %19303 = torch.prim.ListConstruct %int1_16837, %int0_16838 : (!torch.int, !torch.int) -> !torch.list<int>
    %19304 = torch.aten.permute %679, %19303 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_16839 = torch.constant.int 4
    %19305 = torch.aten.mul.int %int4_16839, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16840 = torch.constant.int 4096
    %19306 = torch.prim.ListConstruct %19305, %int4096_16840 : (!torch.int, !torch.int) -> !torch.list<int>
    %19307 = torch.aten.view %19217, %19306 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19307, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19308 = torch.aten.mm %19307, %19290 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19308, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16841 = torch.constant.int 4
    %int128_16842 = torch.constant.int 128
    %19309 = torch.prim.ListConstruct %int4_16841, %2482, %int128_16842 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19310 = torch.aten.view %19308, %19309 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16843 = torch.constant.int 4
    %19311 = torch.aten.mul.int %int4_16843, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16844 = torch.constant.int 4096
    %19312 = torch.prim.ListConstruct %19311, %int4096_16844 : (!torch.int, !torch.int) -> !torch.list<int>
    %19313 = torch.aten.view %19218, %19312 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19313, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19314 = torch.aten.mm %19313, %19292 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19314, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16845 = torch.constant.int 4
    %int128_16846 = torch.constant.int 128
    %19315 = torch.prim.ListConstruct %int4_16845, %2482, %int128_16846 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19316 = torch.aten.view %19314, %19315 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16847 = torch.constant.int 4
    %19317 = torch.aten.mul.int %int4_16847, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16848 = torch.constant.int 4096
    %19318 = torch.prim.ListConstruct %19317, %int4096_16848 : (!torch.int, !torch.int) -> !torch.list<int>
    %19319 = torch.aten.view %19219, %19318 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19319, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19320 = torch.aten.mm %19319, %19294 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19320, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16849 = torch.constant.int 4
    %int128_16850 = torch.constant.int 128
    %19321 = torch.prim.ListConstruct %int4_16849, %2482, %int128_16850 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19322 = torch.aten.view %19320, %19321 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16851 = torch.constant.int 4
    %19323 = torch.aten.mul.int %int4_16851, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16852 = torch.constant.int 4096
    %19324 = torch.prim.ListConstruct %19323, %int4096_16852 : (!torch.int, !torch.int) -> !torch.list<int>
    %19325 = torch.aten.view %19220, %19324 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19325, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19326 = torch.aten.mm %19325, %19296 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19326, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16853 = torch.constant.int 4
    %int128_16854 = torch.constant.int 128
    %19327 = torch.prim.ListConstruct %int4_16853, %2482, %int128_16854 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19328 = torch.aten.view %19326, %19327 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16855 = torch.constant.int 4
    %19329 = torch.aten.mul.int %int4_16855, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16856 = torch.constant.int 4096
    %19330 = torch.prim.ListConstruct %19329, %int4096_16856 : (!torch.int, !torch.int) -> !torch.list<int>
    %19331 = torch.aten.view %19221, %19330 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19331, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19332 = torch.aten.mm %19331, %19298 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19332, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16857 = torch.constant.int 4
    %int128_16858 = torch.constant.int 128
    %19333 = torch.prim.ListConstruct %int4_16857, %2482, %int128_16858 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19334 = torch.aten.view %19332, %19333 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16859 = torch.constant.int 4
    %19335 = torch.aten.mul.int %int4_16859, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16860 = torch.constant.int 4096
    %19336 = torch.prim.ListConstruct %19335, %int4096_16860 : (!torch.int, !torch.int) -> !torch.list<int>
    %19337 = torch.aten.view %19222, %19336 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19337, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19338 = torch.aten.mm %19337, %19300 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19338, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16861 = torch.constant.int 4
    %int128_16862 = torch.constant.int 128
    %19339 = torch.prim.ListConstruct %int4_16861, %2482, %int128_16862 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19340 = torch.aten.view %19338, %19339 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16863 = torch.constant.int 4
    %19341 = torch.aten.mul.int %int4_16863, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16864 = torch.constant.int 4096
    %19342 = torch.prim.ListConstruct %19341, %int4096_16864 : (!torch.int, !torch.int) -> !torch.list<int>
    %19343 = torch.aten.view %19223, %19342 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19343, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19344 = torch.aten.mm %19343, %19302 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19344, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16865 = torch.constant.int 4
    %int128_16866 = torch.constant.int 128
    %19345 = torch.prim.ListConstruct %int4_16865, %2482, %int128_16866 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19346 = torch.aten.view %19344, %19345 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16867 = torch.constant.int 4
    %19347 = torch.aten.mul.int %int4_16867, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16868 = torch.constant.int 4096
    %19348 = torch.prim.ListConstruct %19347, %int4096_16868 : (!torch.int, !torch.int) -> !torch.list<int>
    %19349 = torch.aten.view %19224, %19348 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19349, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19350 = torch.aten.mm %19349, %19304 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19350, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16869 = torch.constant.int 4
    %int128_16870 = torch.constant.int 128
    %19351 = torch.prim.ListConstruct %int4_16869, %2482, %int128_16870 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19352 = torch.aten.view %19350, %19351 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_16871 = torch.constant.int 1
    %int0_16872 = torch.constant.int 0
    %19353 = torch.prim.ListConstruct %int1_16871, %int0_16872 : (!torch.int, !torch.int) -> !torch.list<int>
    %19354 = torch.aten.permute %680, %19353 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16873 = torch.constant.int 1
    %int0_16874 = torch.constant.int 0
    %19355 = torch.prim.ListConstruct %int1_16873, %int0_16874 : (!torch.int, !torch.int) -> !torch.list<int>
    %19356 = torch.aten.permute %681, %19355 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16875 = torch.constant.int 1
    %int0_16876 = torch.constant.int 0
    %19357 = torch.prim.ListConstruct %int1_16875, %int0_16876 : (!torch.int, !torch.int) -> !torch.list<int>
    %19358 = torch.aten.permute %682, %19357 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16877 = torch.constant.int 1
    %int0_16878 = torch.constant.int 0
    %19359 = torch.prim.ListConstruct %int1_16877, %int0_16878 : (!torch.int, !torch.int) -> !torch.list<int>
    %19360 = torch.aten.permute %683, %19359 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16879 = torch.constant.int 1
    %int0_16880 = torch.constant.int 0
    %19361 = torch.prim.ListConstruct %int1_16879, %int0_16880 : (!torch.int, !torch.int) -> !torch.list<int>
    %19362 = torch.aten.permute %684, %19361 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16881 = torch.constant.int 1
    %int0_16882 = torch.constant.int 0
    %19363 = torch.prim.ListConstruct %int1_16881, %int0_16882 : (!torch.int, !torch.int) -> !torch.list<int>
    %19364 = torch.aten.permute %685, %19363 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16883 = torch.constant.int 1
    %int0_16884 = torch.constant.int 0
    %19365 = torch.prim.ListConstruct %int1_16883, %int0_16884 : (!torch.int, !torch.int) -> !torch.list<int>
    %19366 = torch.aten.permute %686, %19365 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_16885 = torch.constant.int 1
    %int0_16886 = torch.constant.int 0
    %19367 = torch.prim.ListConstruct %int1_16885, %int0_16886 : (!torch.int, !torch.int) -> !torch.list<int>
    %19368 = torch.aten.permute %687, %19367 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_16887 = torch.constant.int 4
    %19369 = torch.aten.mul.int %int4_16887, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16888 = torch.constant.int 4096
    %19370 = torch.prim.ListConstruct %19369, %int4096_16888 : (!torch.int, !torch.int) -> !torch.list<int>
    %19371 = torch.aten.view %19217, %19370 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19371, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19372 = torch.aten.mm %19371, %19354 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19372, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16889 = torch.constant.int 4
    %int128_16890 = torch.constant.int 128
    %19373 = torch.prim.ListConstruct %int4_16889, %2482, %int128_16890 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19374 = torch.aten.view %19372, %19373 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16891 = torch.constant.int 4
    %19375 = torch.aten.mul.int %int4_16891, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16892 = torch.constant.int 4096
    %19376 = torch.prim.ListConstruct %19375, %int4096_16892 : (!torch.int, !torch.int) -> !torch.list<int>
    %19377 = torch.aten.view %19218, %19376 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19377, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19378 = torch.aten.mm %19377, %19356 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19378, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16893 = torch.constant.int 4
    %int128_16894 = torch.constant.int 128
    %19379 = torch.prim.ListConstruct %int4_16893, %2482, %int128_16894 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19380 = torch.aten.view %19378, %19379 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16895 = torch.constant.int 4
    %19381 = torch.aten.mul.int %int4_16895, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16896 = torch.constant.int 4096
    %19382 = torch.prim.ListConstruct %19381, %int4096_16896 : (!torch.int, !torch.int) -> !torch.list<int>
    %19383 = torch.aten.view %19219, %19382 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19383, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19384 = torch.aten.mm %19383, %19358 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19384, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16897 = torch.constant.int 4
    %int128_16898 = torch.constant.int 128
    %19385 = torch.prim.ListConstruct %int4_16897, %2482, %int128_16898 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19386 = torch.aten.view %19384, %19385 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16899 = torch.constant.int 4
    %19387 = torch.aten.mul.int %int4_16899, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16900 = torch.constant.int 4096
    %19388 = torch.prim.ListConstruct %19387, %int4096_16900 : (!torch.int, !torch.int) -> !torch.list<int>
    %19389 = torch.aten.view %19220, %19388 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19389, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19390 = torch.aten.mm %19389, %19360 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19390, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16901 = torch.constant.int 4
    %int128_16902 = torch.constant.int 128
    %19391 = torch.prim.ListConstruct %int4_16901, %2482, %int128_16902 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19392 = torch.aten.view %19390, %19391 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16903 = torch.constant.int 4
    %19393 = torch.aten.mul.int %int4_16903, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16904 = torch.constant.int 4096
    %19394 = torch.prim.ListConstruct %19393, %int4096_16904 : (!torch.int, !torch.int) -> !torch.list<int>
    %19395 = torch.aten.view %19221, %19394 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19395, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19396 = torch.aten.mm %19395, %19362 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19396, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16905 = torch.constant.int 4
    %int128_16906 = torch.constant.int 128
    %19397 = torch.prim.ListConstruct %int4_16905, %2482, %int128_16906 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19398 = torch.aten.view %19396, %19397 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16907 = torch.constant.int 4
    %19399 = torch.aten.mul.int %int4_16907, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16908 = torch.constant.int 4096
    %19400 = torch.prim.ListConstruct %19399, %int4096_16908 : (!torch.int, !torch.int) -> !torch.list<int>
    %19401 = torch.aten.view %19222, %19400 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19401, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19402 = torch.aten.mm %19401, %19364 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19402, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16909 = torch.constant.int 4
    %int128_16910 = torch.constant.int 128
    %19403 = torch.prim.ListConstruct %int4_16909, %2482, %int128_16910 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19404 = torch.aten.view %19402, %19403 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16911 = torch.constant.int 4
    %19405 = torch.aten.mul.int %int4_16911, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16912 = torch.constant.int 4096
    %19406 = torch.prim.ListConstruct %19405, %int4096_16912 : (!torch.int, !torch.int) -> !torch.list<int>
    %19407 = torch.aten.view %19223, %19406 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19407, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19408 = torch.aten.mm %19407, %19366 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19408, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16913 = torch.constant.int 4
    %int128_16914 = torch.constant.int 128
    %19409 = torch.prim.ListConstruct %int4_16913, %2482, %int128_16914 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19410 = torch.aten.view %19408, %19409 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16915 = torch.constant.int 4
    %19411 = torch.aten.mul.int %int4_16915, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_16916 = torch.constant.int 4096
    %19412 = torch.prim.ListConstruct %19411, %int4096_16916 : (!torch.int, !torch.int) -> !torch.list<int>
    %19413 = torch.aten.view %19224, %19412 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %19413, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %19414 = torch.aten.mm %19413, %19368 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %19414, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_16917 = torch.constant.int 4
    %int128_16918 = torch.constant.int 128
    %19415 = torch.prim.ListConstruct %int4_16917, %2482, %int128_16918 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19416 = torch.aten.view %19414, %19415 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %19416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_16919 = torch.constant.int 4
    %int4_16920 = torch.constant.int 4
    %int128_16921 = torch.constant.int 128
    %19417 = torch.prim.ListConstruct %int4_16919, %2482, %int4_16920, %int128_16921 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19418 = torch.aten.view %19246, %19417 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16922 = torch.constant.int 4
    %int4_16923 = torch.constant.int 4
    %int128_16924 = torch.constant.int 128
    %19419 = torch.prim.ListConstruct %int4_16922, %2482, %int4_16923, %int128_16924 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19420 = torch.aten.view %19252, %19419 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16925 = torch.constant.int 4
    %int4_16926 = torch.constant.int 4
    %int128_16927 = torch.constant.int 128
    %19421 = torch.prim.ListConstruct %int4_16925, %2482, %int4_16926, %int128_16927 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19422 = torch.aten.view %19258, %19421 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16928 = torch.constant.int 4
    %int4_16929 = torch.constant.int 4
    %int128_16930 = torch.constant.int 128
    %19423 = torch.prim.ListConstruct %int4_16928, %2482, %int4_16929, %int128_16930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19424 = torch.aten.view %19264, %19423 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16931 = torch.constant.int 4
    %int4_16932 = torch.constant.int 4
    %int128_16933 = torch.constant.int 128
    %19425 = torch.prim.ListConstruct %int4_16931, %2482, %int4_16932, %int128_16933 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19426 = torch.aten.view %19270, %19425 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16934 = torch.constant.int 4
    %int4_16935 = torch.constant.int 4
    %int128_16936 = torch.constant.int 128
    %19427 = torch.prim.ListConstruct %int4_16934, %2482, %int4_16935, %int128_16936 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19428 = torch.aten.view %19276, %19427 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16937 = torch.constant.int 4
    %int4_16938 = torch.constant.int 4
    %int128_16939 = torch.constant.int 128
    %19429 = torch.prim.ListConstruct %int4_16937, %2482, %int4_16938, %int128_16939 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19430 = torch.aten.view %19282, %19429 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16940 = torch.constant.int 4
    %int4_16941 = torch.constant.int 4
    %int128_16942 = torch.constant.int 128
    %19431 = torch.prim.ListConstruct %int4_16940, %2482, %int4_16941, %int128_16942 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19432 = torch.aten.view %19288, %19431 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_16943 = torch.constant.int 4
    %int1_16944 = torch.constant.int 1
    %int128_16945 = torch.constant.int 128
    %19433 = torch.prim.ListConstruct %int4_16943, %2482, %int1_16944, %int128_16945 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19434 = torch.aten.view %19310, %19433 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16946 = torch.constant.int 4
    %int1_16947 = torch.constant.int 1
    %int128_16948 = torch.constant.int 128
    %19435 = torch.prim.ListConstruct %int4_16946, %2482, %int1_16947, %int128_16948 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19436 = torch.aten.view %19316, %19435 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16949 = torch.constant.int 4
    %int1_16950 = torch.constant.int 1
    %int128_16951 = torch.constant.int 128
    %19437 = torch.prim.ListConstruct %int4_16949, %2482, %int1_16950, %int128_16951 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19438 = torch.aten.view %19322, %19437 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16952 = torch.constant.int 4
    %int1_16953 = torch.constant.int 1
    %int128_16954 = torch.constant.int 128
    %19439 = torch.prim.ListConstruct %int4_16952, %2482, %int1_16953, %int128_16954 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19440 = torch.aten.view %19328, %19439 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16955 = torch.constant.int 4
    %int1_16956 = torch.constant.int 1
    %int128_16957 = torch.constant.int 128
    %19441 = torch.prim.ListConstruct %int4_16955, %2482, %int1_16956, %int128_16957 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19442 = torch.aten.view %19334, %19441 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16958 = torch.constant.int 4
    %int1_16959 = torch.constant.int 1
    %int128_16960 = torch.constant.int 128
    %19443 = torch.prim.ListConstruct %int4_16958, %2482, %int1_16959, %int128_16960 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19444 = torch.aten.view %19340, %19443 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16961 = torch.constant.int 4
    %int1_16962 = torch.constant.int 1
    %int128_16963 = torch.constant.int 128
    %19445 = torch.prim.ListConstruct %int4_16961, %2482, %int1_16962, %int128_16963 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19446 = torch.aten.view %19346, %19445 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16964 = torch.constant.int 4
    %int1_16965 = torch.constant.int 1
    %int128_16966 = torch.constant.int 128
    %19447 = torch.prim.ListConstruct %int4_16964, %2482, %int1_16965, %int128_16966 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19448 = torch.aten.view %19352, %19447 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16967 = torch.constant.int 4
    %int1_16968 = torch.constant.int 1
    %int128_16969 = torch.constant.int 128
    %19449 = torch.prim.ListConstruct %int4_16967, %2482, %int1_16968, %int128_16969 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19450 = torch.aten.view %19374, %19449 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16970 = torch.constant.int 4
    %int1_16971 = torch.constant.int 1
    %int128_16972 = torch.constant.int 128
    %19451 = torch.prim.ListConstruct %int4_16970, %2482, %int1_16971, %int128_16972 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19452 = torch.aten.view %19380, %19451 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16973 = torch.constant.int 4
    %int1_16974 = torch.constant.int 1
    %int128_16975 = torch.constant.int 128
    %19453 = torch.prim.ListConstruct %int4_16973, %2482, %int1_16974, %int128_16975 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19454 = torch.aten.view %19386, %19453 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16976 = torch.constant.int 4
    %int1_16977 = torch.constant.int 1
    %int128_16978 = torch.constant.int 128
    %19455 = torch.prim.ListConstruct %int4_16976, %2482, %int1_16977, %int128_16978 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19456 = torch.aten.view %19392, %19455 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16979 = torch.constant.int 4
    %int1_16980 = torch.constant.int 1
    %int128_16981 = torch.constant.int 128
    %19457 = torch.prim.ListConstruct %int4_16979, %2482, %int1_16980, %int128_16981 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19458 = torch.aten.view %19398, %19457 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16982 = torch.constant.int 4
    %int1_16983 = torch.constant.int 1
    %int128_16984 = torch.constant.int 128
    %19459 = torch.prim.ListConstruct %int4_16982, %2482, %int1_16983, %int128_16984 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19460 = torch.aten.view %19404, %19459 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16985 = torch.constant.int 4
    %int1_16986 = torch.constant.int 1
    %int128_16987 = torch.constant.int 128
    %19461 = torch.prim.ListConstruct %int4_16985, %2482, %int1_16986, %int128_16987 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19462 = torch.aten.view %19410, %19461 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_16988 = torch.constant.int 4
    %int1_16989 = torch.constant.int 1
    %int128_16990 = torch.constant.int 128
    %19463 = torch.prim.ListConstruct %int4_16988, %2482, %int1_16989, %int128_16990 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19464 = torch.aten.view %19416, %19463 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_16991 = torch.constant.int 131072
    %none_16992 = torch.constant.none
    %none_16993 = torch.constant.none
    %cpu_16994 = torch.constant.device "cpu"
    %false_16995 = torch.constant.bool false
    %19465 = torch.aten.arange %int131072_16991, %none_16992, %none_16993, %cpu_16994, %false_16995 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_16996 = torch.constant.int 0
    %int128_16997 = torch.constant.int 128
    %int2_16998 = torch.constant.int 2
    %none_16999 = torch.constant.none
    %none_17000 = torch.constant.none
    %cpu_17001 = torch.constant.device "cpu"
    %false_17002 = torch.constant.bool false
    %19466 = torch.aten.arange.start_step %int0_16996, %int128_16997, %int2_16998, %none_16999, %none_17000, %cpu_17001, %false_17002 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_17003 = torch.constant.int 0
    %int0_17004 = torch.constant.int 0
    %int64_17005 = torch.constant.int 64
    %int1_17006 = torch.constant.int 1
    %19467 = torch.aten.slice.Tensor %19466, %int0_17003, %int0_17004, %int64_17005, %int1_17006 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_17007 = torch.constant.int 6
    %19468 = torch.prims.convert_element_type %19467, %int6_17007 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_17008 = torch.constant.int 128
    %19469 = torch.aten.div.Scalar %19468, %int128_17008 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_17009 = torch.constant.float 5.000000e+05
    %19470 = torch.aten.pow.Scalar %float5.000000e05_17009, %19469 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %19471 = torch.aten.reciprocal %19470 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_17010 = torch.constant.float 1.000000e+00
    %19472 = torch.aten.mul.Scalar %19471, %float1.000000e00_17010 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_17011 = torch.constant.int 131072
    %int1_17012 = torch.constant.int 1
    %19473 = torch.prim.ListConstruct %int131072_17011, %int1_17012 : (!torch.int, !torch.int) -> !torch.list<int>
    %19474 = torch.aten.view %19465, %19473 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %19475 = torch.aten.mul.Tensor %19474, %19472 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %19476 = torch.aten.cos %19475 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %19477 = torch.aten.sin %19475 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %19478 = torch.aten.complex %19476, %19477 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %19479 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19480 = flow.tensor.transfer %19479 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %19481 = torch_c.from_builtin_tensor %19480 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19482 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19483 = flow.tensor.transfer %19482 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %19484 = torch_c.from_builtin_tensor %19483 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19485 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19486 = flow.tensor.transfer %19485 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %19487 = torch_c.from_builtin_tensor %19486 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19488 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19489 = flow.tensor.transfer %19488 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %19490 = torch_c.from_builtin_tensor %19489 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19491 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19492 = flow.tensor.transfer %19491 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %19493 = torch_c.from_builtin_tensor %19492 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19494 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19495 = flow.tensor.transfer %19494 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %19496 = torch_c.from_builtin_tensor %19495 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19497 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19498 = flow.tensor.transfer %19497 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %19499 = torch_c.from_builtin_tensor %19498 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19500 = torch_c.to_builtin_tensor %19478 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19501 = flow.tensor.transfer %19500 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %19502 = torch_c.from_builtin_tensor %19501 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_17013 = torch.constant.int 1
    %19503 = torch.aten.size.int %19246, %int1_17013 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17014 = torch.constant.int 0
    %19504 = torch.aten.add.int %int0_17014, %19503 : !torch.int, !torch.int -> !torch.int
    %int0_17015 = torch.constant.int 0
    %int0_17016 = torch.constant.int 0
    %int1_17017 = torch.constant.int 1
    %19505 = torch.aten.slice.Tensor %19481, %int0_17015, %int0_17016, %19504, %int1_17017 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19505, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17018 = torch.constant.int 1
    %int0_17019 = torch.constant.int 0
    %int9223372036854775807_17020 = torch.constant.int 9223372036854775807
    %int1_17021 = torch.constant.int 1
    %19506 = torch.aten.slice.Tensor %19505, %int1_17018, %int0_17019, %int9223372036854775807_17020, %int1_17021 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19506, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17022 = torch.constant.int 0
    %19507 = torch.aten.unsqueeze %19506, %int0_17022 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19507, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17023 = torch.constant.int 2
    %19508 = torch.aten.unsqueeze %19507, %int2_17023 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19508, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17024 = torch.constant.int 3
    %int0_17025 = torch.constant.int 0
    %int9223372036854775807_17026 = torch.constant.int 9223372036854775807
    %int1_17027 = torch.constant.int 1
    %19509 = torch.aten.slice.Tensor %19508, %int3_17024, %int0_17025, %int9223372036854775807_17026, %int1_17027 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19509, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19510 = torch_c.to_builtin_tensor %19418 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17028 = arith.constant 1 : index
    %dim_17029 = tensor.dim %19510, %c1_17028 : tensor<4x?x4x128xf16>
    %19511 = flow.tensor.bitcast %19510 : tensor<4x?x4x128xf16>{%dim_17029} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17029}
    %19512 = torch_c.from_builtin_tensor %19511 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19513 = torch.aten.mul.Tensor %19512, %19509 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19514 = torch_c.to_builtin_tensor %19513 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17030 = arith.constant 1 : index
    %dim_17031 = tensor.dim %19514, %c1_17030 : tensor<4x?x4x64xcomplex<f32>>
    %19515 = flow.tensor.bitcast %19514 : tensor<4x?x4x64xcomplex<f32>>{%dim_17031} -> tensor<4x?x4x128xf32>{%dim_17031}
    %19516 = torch_c.from_builtin_tensor %19515 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17032 = torch.constant.int 5
    %19517 = torch.prims.convert_element_type %19516, %int5_17032 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17033 = torch.constant.int 1
    %19518 = torch.aten.size.int %19252, %int1_17033 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17034 = torch.constant.int 0
    %19519 = torch.aten.add.int %int0_17034, %19518 : !torch.int, !torch.int -> !torch.int
    %int0_17035 = torch.constant.int 0
    %int0_17036 = torch.constant.int 0
    %int1_17037 = torch.constant.int 1
    %19520 = torch.aten.slice.Tensor %19484, %int0_17035, %int0_17036, %19519, %int1_17037 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19520, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17038 = torch.constant.int 1
    %int0_17039 = torch.constant.int 0
    %int9223372036854775807_17040 = torch.constant.int 9223372036854775807
    %int1_17041 = torch.constant.int 1
    %19521 = torch.aten.slice.Tensor %19520, %int1_17038, %int0_17039, %int9223372036854775807_17040, %int1_17041 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19521, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17042 = torch.constant.int 0
    %19522 = torch.aten.unsqueeze %19521, %int0_17042 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19522, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17043 = torch.constant.int 2
    %19523 = torch.aten.unsqueeze %19522, %int2_17043 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19523, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17044 = torch.constant.int 3
    %int0_17045 = torch.constant.int 0
    %int9223372036854775807_17046 = torch.constant.int 9223372036854775807
    %int1_17047 = torch.constant.int 1
    %19524 = torch.aten.slice.Tensor %19523, %int3_17044, %int0_17045, %int9223372036854775807_17046, %int1_17047 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19524, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19525 = torch_c.to_builtin_tensor %19420 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17048 = arith.constant 1 : index
    %dim_17049 = tensor.dim %19525, %c1_17048 : tensor<4x?x4x128xf16>
    %19526 = flow.tensor.bitcast %19525 : tensor<4x?x4x128xf16>{%dim_17049} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17049}
    %19527 = torch_c.from_builtin_tensor %19526 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19528 = torch.aten.mul.Tensor %19527, %19524 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19529 = torch_c.to_builtin_tensor %19528 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17050 = arith.constant 1 : index
    %dim_17051 = tensor.dim %19529, %c1_17050 : tensor<4x?x4x64xcomplex<f32>>
    %19530 = flow.tensor.bitcast %19529 : tensor<4x?x4x64xcomplex<f32>>{%dim_17051} -> tensor<4x?x4x128xf32>{%dim_17051}
    %19531 = torch_c.from_builtin_tensor %19530 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17052 = torch.constant.int 5
    %19532 = torch.prims.convert_element_type %19531, %int5_17052 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17053 = torch.constant.int 1
    %19533 = torch.aten.size.int %19258, %int1_17053 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17054 = torch.constant.int 0
    %19534 = torch.aten.add.int %int0_17054, %19533 : !torch.int, !torch.int -> !torch.int
    %int0_17055 = torch.constant.int 0
    %int0_17056 = torch.constant.int 0
    %int1_17057 = torch.constant.int 1
    %19535 = torch.aten.slice.Tensor %19487, %int0_17055, %int0_17056, %19534, %int1_17057 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19535, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17058 = torch.constant.int 1
    %int0_17059 = torch.constant.int 0
    %int9223372036854775807_17060 = torch.constant.int 9223372036854775807
    %int1_17061 = torch.constant.int 1
    %19536 = torch.aten.slice.Tensor %19535, %int1_17058, %int0_17059, %int9223372036854775807_17060, %int1_17061 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19536, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17062 = torch.constant.int 0
    %19537 = torch.aten.unsqueeze %19536, %int0_17062 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19537, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17063 = torch.constant.int 2
    %19538 = torch.aten.unsqueeze %19537, %int2_17063 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19538, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17064 = torch.constant.int 3
    %int0_17065 = torch.constant.int 0
    %int9223372036854775807_17066 = torch.constant.int 9223372036854775807
    %int1_17067 = torch.constant.int 1
    %19539 = torch.aten.slice.Tensor %19538, %int3_17064, %int0_17065, %int9223372036854775807_17066, %int1_17067 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19539, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19540 = torch_c.to_builtin_tensor %19422 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17068 = arith.constant 1 : index
    %dim_17069 = tensor.dim %19540, %c1_17068 : tensor<4x?x4x128xf16>
    %19541 = flow.tensor.bitcast %19540 : tensor<4x?x4x128xf16>{%dim_17069} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17069}
    %19542 = torch_c.from_builtin_tensor %19541 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19543 = torch.aten.mul.Tensor %19542, %19539 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19544 = torch_c.to_builtin_tensor %19543 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17070 = arith.constant 1 : index
    %dim_17071 = tensor.dim %19544, %c1_17070 : tensor<4x?x4x64xcomplex<f32>>
    %19545 = flow.tensor.bitcast %19544 : tensor<4x?x4x64xcomplex<f32>>{%dim_17071} -> tensor<4x?x4x128xf32>{%dim_17071}
    %19546 = torch_c.from_builtin_tensor %19545 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17072 = torch.constant.int 5
    %19547 = torch.prims.convert_element_type %19546, %int5_17072 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17073 = torch.constant.int 1
    %19548 = torch.aten.size.int %19264, %int1_17073 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17074 = torch.constant.int 0
    %19549 = torch.aten.add.int %int0_17074, %19548 : !torch.int, !torch.int -> !torch.int
    %int0_17075 = torch.constant.int 0
    %int0_17076 = torch.constant.int 0
    %int1_17077 = torch.constant.int 1
    %19550 = torch.aten.slice.Tensor %19490, %int0_17075, %int0_17076, %19549, %int1_17077 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19550, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17078 = torch.constant.int 1
    %int0_17079 = torch.constant.int 0
    %int9223372036854775807_17080 = torch.constant.int 9223372036854775807
    %int1_17081 = torch.constant.int 1
    %19551 = torch.aten.slice.Tensor %19550, %int1_17078, %int0_17079, %int9223372036854775807_17080, %int1_17081 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19551, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17082 = torch.constant.int 0
    %19552 = torch.aten.unsqueeze %19551, %int0_17082 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19552, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17083 = torch.constant.int 2
    %19553 = torch.aten.unsqueeze %19552, %int2_17083 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19553, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17084 = torch.constant.int 3
    %int0_17085 = torch.constant.int 0
    %int9223372036854775807_17086 = torch.constant.int 9223372036854775807
    %int1_17087 = torch.constant.int 1
    %19554 = torch.aten.slice.Tensor %19553, %int3_17084, %int0_17085, %int9223372036854775807_17086, %int1_17087 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19554, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19555 = torch_c.to_builtin_tensor %19424 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17088 = arith.constant 1 : index
    %dim_17089 = tensor.dim %19555, %c1_17088 : tensor<4x?x4x128xf16>
    %19556 = flow.tensor.bitcast %19555 : tensor<4x?x4x128xf16>{%dim_17089} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17089}
    %19557 = torch_c.from_builtin_tensor %19556 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19558 = torch.aten.mul.Tensor %19557, %19554 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19559 = torch_c.to_builtin_tensor %19558 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17090 = arith.constant 1 : index
    %dim_17091 = tensor.dim %19559, %c1_17090 : tensor<4x?x4x64xcomplex<f32>>
    %19560 = flow.tensor.bitcast %19559 : tensor<4x?x4x64xcomplex<f32>>{%dim_17091} -> tensor<4x?x4x128xf32>{%dim_17091}
    %19561 = torch_c.from_builtin_tensor %19560 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17092 = torch.constant.int 5
    %19562 = torch.prims.convert_element_type %19561, %int5_17092 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17093 = torch.constant.int 1
    %19563 = torch.aten.size.int %19270, %int1_17093 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17094 = torch.constant.int 0
    %19564 = torch.aten.add.int %int0_17094, %19563 : !torch.int, !torch.int -> !torch.int
    %int0_17095 = torch.constant.int 0
    %int0_17096 = torch.constant.int 0
    %int1_17097 = torch.constant.int 1
    %19565 = torch.aten.slice.Tensor %19493, %int0_17095, %int0_17096, %19564, %int1_17097 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19565, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17098 = torch.constant.int 1
    %int0_17099 = torch.constant.int 0
    %int9223372036854775807_17100 = torch.constant.int 9223372036854775807
    %int1_17101 = torch.constant.int 1
    %19566 = torch.aten.slice.Tensor %19565, %int1_17098, %int0_17099, %int9223372036854775807_17100, %int1_17101 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19566, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17102 = torch.constant.int 0
    %19567 = torch.aten.unsqueeze %19566, %int0_17102 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19567, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17103 = torch.constant.int 2
    %19568 = torch.aten.unsqueeze %19567, %int2_17103 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19568, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17104 = torch.constant.int 3
    %int0_17105 = torch.constant.int 0
    %int9223372036854775807_17106 = torch.constant.int 9223372036854775807
    %int1_17107 = torch.constant.int 1
    %19569 = torch.aten.slice.Tensor %19568, %int3_17104, %int0_17105, %int9223372036854775807_17106, %int1_17107 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19569, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19570 = torch_c.to_builtin_tensor %19426 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17108 = arith.constant 1 : index
    %dim_17109 = tensor.dim %19570, %c1_17108 : tensor<4x?x4x128xf16>
    %19571 = flow.tensor.bitcast %19570 : tensor<4x?x4x128xf16>{%dim_17109} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17109}
    %19572 = torch_c.from_builtin_tensor %19571 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19573 = torch.aten.mul.Tensor %19572, %19569 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19574 = torch_c.to_builtin_tensor %19573 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17110 = arith.constant 1 : index
    %dim_17111 = tensor.dim %19574, %c1_17110 : tensor<4x?x4x64xcomplex<f32>>
    %19575 = flow.tensor.bitcast %19574 : tensor<4x?x4x64xcomplex<f32>>{%dim_17111} -> tensor<4x?x4x128xf32>{%dim_17111}
    %19576 = torch_c.from_builtin_tensor %19575 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17112 = torch.constant.int 5
    %19577 = torch.prims.convert_element_type %19576, %int5_17112 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17113 = torch.constant.int 1
    %19578 = torch.aten.size.int %19276, %int1_17113 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17114 = torch.constant.int 0
    %19579 = torch.aten.add.int %int0_17114, %19578 : !torch.int, !torch.int -> !torch.int
    %int0_17115 = torch.constant.int 0
    %int0_17116 = torch.constant.int 0
    %int1_17117 = torch.constant.int 1
    %19580 = torch.aten.slice.Tensor %19496, %int0_17115, %int0_17116, %19579, %int1_17117 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19580, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17118 = torch.constant.int 1
    %int0_17119 = torch.constant.int 0
    %int9223372036854775807_17120 = torch.constant.int 9223372036854775807
    %int1_17121 = torch.constant.int 1
    %19581 = torch.aten.slice.Tensor %19580, %int1_17118, %int0_17119, %int9223372036854775807_17120, %int1_17121 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19581, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17122 = torch.constant.int 0
    %19582 = torch.aten.unsqueeze %19581, %int0_17122 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19582, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17123 = torch.constant.int 2
    %19583 = torch.aten.unsqueeze %19582, %int2_17123 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19583, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17124 = torch.constant.int 3
    %int0_17125 = torch.constant.int 0
    %int9223372036854775807_17126 = torch.constant.int 9223372036854775807
    %int1_17127 = torch.constant.int 1
    %19584 = torch.aten.slice.Tensor %19583, %int3_17124, %int0_17125, %int9223372036854775807_17126, %int1_17127 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19584, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19585 = torch_c.to_builtin_tensor %19428 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17128 = arith.constant 1 : index
    %dim_17129 = tensor.dim %19585, %c1_17128 : tensor<4x?x4x128xf16>
    %19586 = flow.tensor.bitcast %19585 : tensor<4x?x4x128xf16>{%dim_17129} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17129}
    %19587 = torch_c.from_builtin_tensor %19586 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19588 = torch.aten.mul.Tensor %19587, %19584 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19589 = torch_c.to_builtin_tensor %19588 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17130 = arith.constant 1 : index
    %dim_17131 = tensor.dim %19589, %c1_17130 : tensor<4x?x4x64xcomplex<f32>>
    %19590 = flow.tensor.bitcast %19589 : tensor<4x?x4x64xcomplex<f32>>{%dim_17131} -> tensor<4x?x4x128xf32>{%dim_17131}
    %19591 = torch_c.from_builtin_tensor %19590 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17132 = torch.constant.int 5
    %19592 = torch.prims.convert_element_type %19591, %int5_17132 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17133 = torch.constant.int 1
    %19593 = torch.aten.size.int %19282, %int1_17133 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17134 = torch.constant.int 0
    %19594 = torch.aten.add.int %int0_17134, %19593 : !torch.int, !torch.int -> !torch.int
    %int0_17135 = torch.constant.int 0
    %int0_17136 = torch.constant.int 0
    %int1_17137 = torch.constant.int 1
    %19595 = torch.aten.slice.Tensor %19499, %int0_17135, %int0_17136, %19594, %int1_17137 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19595, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17138 = torch.constant.int 1
    %int0_17139 = torch.constant.int 0
    %int9223372036854775807_17140 = torch.constant.int 9223372036854775807
    %int1_17141 = torch.constant.int 1
    %19596 = torch.aten.slice.Tensor %19595, %int1_17138, %int0_17139, %int9223372036854775807_17140, %int1_17141 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19596, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17142 = torch.constant.int 0
    %19597 = torch.aten.unsqueeze %19596, %int0_17142 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19597, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17143 = torch.constant.int 2
    %19598 = torch.aten.unsqueeze %19597, %int2_17143 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19598, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17144 = torch.constant.int 3
    %int0_17145 = torch.constant.int 0
    %int9223372036854775807_17146 = torch.constant.int 9223372036854775807
    %int1_17147 = torch.constant.int 1
    %19599 = torch.aten.slice.Tensor %19598, %int3_17144, %int0_17145, %int9223372036854775807_17146, %int1_17147 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19599, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19600 = torch_c.to_builtin_tensor %19430 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17148 = arith.constant 1 : index
    %dim_17149 = tensor.dim %19600, %c1_17148 : tensor<4x?x4x128xf16>
    %19601 = flow.tensor.bitcast %19600 : tensor<4x?x4x128xf16>{%dim_17149} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17149}
    %19602 = torch_c.from_builtin_tensor %19601 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19603 = torch.aten.mul.Tensor %19602, %19599 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19604 = torch_c.to_builtin_tensor %19603 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17150 = arith.constant 1 : index
    %dim_17151 = tensor.dim %19604, %c1_17150 : tensor<4x?x4x64xcomplex<f32>>
    %19605 = flow.tensor.bitcast %19604 : tensor<4x?x4x64xcomplex<f32>>{%dim_17151} -> tensor<4x?x4x128xf32>{%dim_17151}
    %19606 = torch_c.from_builtin_tensor %19605 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17152 = torch.constant.int 5
    %19607 = torch.prims.convert_element_type %19606, %int5_17152 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17153 = torch.constant.int 1
    %19608 = torch.aten.size.int %19288, %int1_17153 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_17154 = torch.constant.int 0
    %19609 = torch.aten.add.int %int0_17154, %19608 : !torch.int, !torch.int -> !torch.int
    %int0_17155 = torch.constant.int 0
    %int0_17156 = torch.constant.int 0
    %int1_17157 = torch.constant.int 1
    %19610 = torch.aten.slice.Tensor %19502, %int0_17155, %int0_17156, %19609, %int1_17157 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19610, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17158 = torch.constant.int 1
    %int0_17159 = torch.constant.int 0
    %int9223372036854775807_17160 = torch.constant.int 9223372036854775807
    %int1_17161 = torch.constant.int 1
    %19611 = torch.aten.slice.Tensor %19610, %int1_17158, %int0_17159, %int9223372036854775807_17160, %int1_17161 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19611, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17162 = torch.constant.int 0
    %19612 = torch.aten.unsqueeze %19611, %int0_17162 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19612, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17163 = torch.constant.int 2
    %19613 = torch.aten.unsqueeze %19612, %int2_17163 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19613, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17164 = torch.constant.int 3
    %int0_17165 = torch.constant.int 0
    %int9223372036854775807_17166 = torch.constant.int 9223372036854775807
    %int1_17167 = torch.constant.int 1
    %19614 = torch.aten.slice.Tensor %19613, %int3_17164, %int0_17165, %int9223372036854775807_17166, %int1_17167 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19614, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19615 = torch_c.to_builtin_tensor %19432 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_17168 = arith.constant 1 : index
    %dim_17169 = tensor.dim %19615, %c1_17168 : tensor<4x?x4x128xf16>
    %19616 = flow.tensor.bitcast %19615 : tensor<4x?x4x128xf16>{%dim_17169} -> tensor<4x?x4x64xcomplex<f16>>{%dim_17169}
    %19617 = torch_c.from_builtin_tensor %19616 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %19617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %19618 = torch.aten.mul.Tensor %19617, %19614 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %19618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %19619 = torch_c.to_builtin_tensor %19618 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_17170 = arith.constant 1 : index
    %dim_17171 = tensor.dim %19619, %c1_17170 : tensor<4x?x4x64xcomplex<f32>>
    %19620 = flow.tensor.bitcast %19619 : tensor<4x?x4x64xcomplex<f32>>{%dim_17171} -> tensor<4x?x4x128xf32>{%dim_17171}
    %19621 = torch_c.from_builtin_tensor %19620 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %19621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_17172 = torch.constant.int 5
    %19622 = torch.prims.convert_element_type %19621, %int5_17172 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %19622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_17173 = torch.constant.int 131072
    %none_17174 = torch.constant.none
    %none_17175 = torch.constant.none
    %cpu_17176 = torch.constant.device "cpu"
    %false_17177 = torch.constant.bool false
    %19623 = torch.aten.arange %int131072_17173, %none_17174, %none_17175, %cpu_17176, %false_17177 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_17178 = torch.constant.int 0
    %int128_17179 = torch.constant.int 128
    %int2_17180 = torch.constant.int 2
    %none_17181 = torch.constant.none
    %none_17182 = torch.constant.none
    %cpu_17183 = torch.constant.device "cpu"
    %false_17184 = torch.constant.bool false
    %19624 = torch.aten.arange.start_step %int0_17178, %int128_17179, %int2_17180, %none_17181, %none_17182, %cpu_17183, %false_17184 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_17185 = torch.constant.int 0
    %int0_17186 = torch.constant.int 0
    %int64_17187 = torch.constant.int 64
    %int1_17188 = torch.constant.int 1
    %19625 = torch.aten.slice.Tensor %19624, %int0_17185, %int0_17186, %int64_17187, %int1_17188 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_17189 = torch.constant.int 6
    %19626 = torch.prims.convert_element_type %19625, %int6_17189 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_17190 = torch.constant.int 128
    %19627 = torch.aten.div.Scalar %19626, %int128_17190 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_17191 = torch.constant.float 5.000000e+05
    %19628 = torch.aten.pow.Scalar %float5.000000e05_17191, %19627 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %19629 = torch.aten.reciprocal %19628 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_17192 = torch.constant.float 1.000000e+00
    %19630 = torch.aten.mul.Scalar %19629, %float1.000000e00_17192 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_17193 = torch.constant.int 131072
    %int1_17194 = torch.constant.int 1
    %19631 = torch.prim.ListConstruct %int131072_17193, %int1_17194 : (!torch.int, !torch.int) -> !torch.list<int>
    %19632 = torch.aten.view %19623, %19631 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %19633 = torch.aten.mul.Tensor %19632, %19630 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %19634 = torch.aten.cos %19633 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %19635 = torch.aten.sin %19633 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %19636 = torch.aten.complex %19634, %19635 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %19637 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19638 = flow.tensor.transfer %19637 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %19639 = torch_c.from_builtin_tensor %19638 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19640 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19641 = flow.tensor.transfer %19640 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %19642 = torch_c.from_builtin_tensor %19641 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19643 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19644 = flow.tensor.transfer %19643 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %19645 = torch_c.from_builtin_tensor %19644 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19646 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19647 = flow.tensor.transfer %19646 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %19648 = torch_c.from_builtin_tensor %19647 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19649 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19650 = flow.tensor.transfer %19649 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %19651 = torch_c.from_builtin_tensor %19650 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19652 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19653 = flow.tensor.transfer %19652 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %19654 = torch_c.from_builtin_tensor %19653 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19655 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19656 = flow.tensor.transfer %19655 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %19657 = torch_c.from_builtin_tensor %19656 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %19658 = torch_c.to_builtin_tensor %19636 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %19659 = flow.tensor.transfer %19658 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %19660 = torch_c.from_builtin_tensor %19659 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_17195 = torch.constant.int 1
    %19661 = torch.aten.size.int %19310, %int1_17195 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17196 = torch.constant.int 0
    %19662 = torch.aten.add.int %int0_17196, %19661 : !torch.int, !torch.int -> !torch.int
    %int0_17197 = torch.constant.int 0
    %int0_17198 = torch.constant.int 0
    %int1_17199 = torch.constant.int 1
    %19663 = torch.aten.slice.Tensor %19639, %int0_17197, %int0_17198, %19662, %int1_17199 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19663, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17200 = torch.constant.int 1
    %int0_17201 = torch.constant.int 0
    %int9223372036854775807_17202 = torch.constant.int 9223372036854775807
    %int1_17203 = torch.constant.int 1
    %19664 = torch.aten.slice.Tensor %19663, %int1_17200, %int0_17201, %int9223372036854775807_17202, %int1_17203 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19664, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17204 = torch.constant.int 0
    %19665 = torch.aten.unsqueeze %19664, %int0_17204 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19665, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17205 = torch.constant.int 2
    %19666 = torch.aten.unsqueeze %19665, %int2_17205 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19666, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17206 = torch.constant.int 3
    %int0_17207 = torch.constant.int 0
    %int9223372036854775807_17208 = torch.constant.int 9223372036854775807
    %int1_17209 = torch.constant.int 1
    %19667 = torch.aten.slice.Tensor %19666, %int3_17206, %int0_17207, %int9223372036854775807_17208, %int1_17209 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19667, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19668 = torch_c.to_builtin_tensor %19434 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17210 = arith.constant 1 : index
    %dim_17211 = tensor.dim %19668, %c1_17210 : tensor<4x?x1x128xf16>
    %19669 = flow.tensor.bitcast %19668 : tensor<4x?x1x128xf16>{%dim_17211} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17211}
    %19670 = torch_c.from_builtin_tensor %19669 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19671 = torch.aten.mul.Tensor %19670, %19667 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19672 = torch_c.to_builtin_tensor %19671 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17212 = arith.constant 1 : index
    %dim_17213 = tensor.dim %19672, %c1_17212 : tensor<4x?x1x64xcomplex<f32>>
    %19673 = flow.tensor.bitcast %19672 : tensor<4x?x1x64xcomplex<f32>>{%dim_17213} -> tensor<4x?x1x128xf32>{%dim_17213}
    %19674 = torch_c.from_builtin_tensor %19673 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17214 = torch.constant.int 5
    %19675 = torch.prims.convert_element_type %19674, %int5_17214 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17215 = torch.constant.int 1
    %19676 = torch.aten.size.int %19316, %int1_17215 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17216 = torch.constant.int 0
    %19677 = torch.aten.add.int %int0_17216, %19676 : !torch.int, !torch.int -> !torch.int
    %int0_17217 = torch.constant.int 0
    %int0_17218 = torch.constant.int 0
    %int1_17219 = torch.constant.int 1
    %19678 = torch.aten.slice.Tensor %19642, %int0_17217, %int0_17218, %19677, %int1_17219 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19678, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17220 = torch.constant.int 1
    %int0_17221 = torch.constant.int 0
    %int9223372036854775807_17222 = torch.constant.int 9223372036854775807
    %int1_17223 = torch.constant.int 1
    %19679 = torch.aten.slice.Tensor %19678, %int1_17220, %int0_17221, %int9223372036854775807_17222, %int1_17223 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19679, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17224 = torch.constant.int 0
    %19680 = torch.aten.unsqueeze %19679, %int0_17224 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19680, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17225 = torch.constant.int 2
    %19681 = torch.aten.unsqueeze %19680, %int2_17225 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19681, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17226 = torch.constant.int 3
    %int0_17227 = torch.constant.int 0
    %int9223372036854775807_17228 = torch.constant.int 9223372036854775807
    %int1_17229 = torch.constant.int 1
    %19682 = torch.aten.slice.Tensor %19681, %int3_17226, %int0_17227, %int9223372036854775807_17228, %int1_17229 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19682, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19683 = torch_c.to_builtin_tensor %19436 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17230 = arith.constant 1 : index
    %dim_17231 = tensor.dim %19683, %c1_17230 : tensor<4x?x1x128xf16>
    %19684 = flow.tensor.bitcast %19683 : tensor<4x?x1x128xf16>{%dim_17231} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17231}
    %19685 = torch_c.from_builtin_tensor %19684 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19686 = torch.aten.mul.Tensor %19685, %19682 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19687 = torch_c.to_builtin_tensor %19686 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17232 = arith.constant 1 : index
    %dim_17233 = tensor.dim %19687, %c1_17232 : tensor<4x?x1x64xcomplex<f32>>
    %19688 = flow.tensor.bitcast %19687 : tensor<4x?x1x64xcomplex<f32>>{%dim_17233} -> tensor<4x?x1x128xf32>{%dim_17233}
    %19689 = torch_c.from_builtin_tensor %19688 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17234 = torch.constant.int 5
    %19690 = torch.prims.convert_element_type %19689, %int5_17234 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17235 = torch.constant.int 1
    %19691 = torch.aten.size.int %19322, %int1_17235 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17236 = torch.constant.int 0
    %19692 = torch.aten.add.int %int0_17236, %19691 : !torch.int, !torch.int -> !torch.int
    %int0_17237 = torch.constant.int 0
    %int0_17238 = torch.constant.int 0
    %int1_17239 = torch.constant.int 1
    %19693 = torch.aten.slice.Tensor %19645, %int0_17237, %int0_17238, %19692, %int1_17239 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19693, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17240 = torch.constant.int 1
    %int0_17241 = torch.constant.int 0
    %int9223372036854775807_17242 = torch.constant.int 9223372036854775807
    %int1_17243 = torch.constant.int 1
    %19694 = torch.aten.slice.Tensor %19693, %int1_17240, %int0_17241, %int9223372036854775807_17242, %int1_17243 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19694, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17244 = torch.constant.int 0
    %19695 = torch.aten.unsqueeze %19694, %int0_17244 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19695, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17245 = torch.constant.int 2
    %19696 = torch.aten.unsqueeze %19695, %int2_17245 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19696, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17246 = torch.constant.int 3
    %int0_17247 = torch.constant.int 0
    %int9223372036854775807_17248 = torch.constant.int 9223372036854775807
    %int1_17249 = torch.constant.int 1
    %19697 = torch.aten.slice.Tensor %19696, %int3_17246, %int0_17247, %int9223372036854775807_17248, %int1_17249 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19697, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19698 = torch_c.to_builtin_tensor %19438 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17250 = arith.constant 1 : index
    %dim_17251 = tensor.dim %19698, %c1_17250 : tensor<4x?x1x128xf16>
    %19699 = flow.tensor.bitcast %19698 : tensor<4x?x1x128xf16>{%dim_17251} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17251}
    %19700 = torch_c.from_builtin_tensor %19699 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19701 = torch.aten.mul.Tensor %19700, %19697 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19702 = torch_c.to_builtin_tensor %19701 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17252 = arith.constant 1 : index
    %dim_17253 = tensor.dim %19702, %c1_17252 : tensor<4x?x1x64xcomplex<f32>>
    %19703 = flow.tensor.bitcast %19702 : tensor<4x?x1x64xcomplex<f32>>{%dim_17253} -> tensor<4x?x1x128xf32>{%dim_17253}
    %19704 = torch_c.from_builtin_tensor %19703 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17254 = torch.constant.int 5
    %19705 = torch.prims.convert_element_type %19704, %int5_17254 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17255 = torch.constant.int 1
    %19706 = torch.aten.size.int %19328, %int1_17255 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17256 = torch.constant.int 0
    %19707 = torch.aten.add.int %int0_17256, %19706 : !torch.int, !torch.int -> !torch.int
    %int0_17257 = torch.constant.int 0
    %int0_17258 = torch.constant.int 0
    %int1_17259 = torch.constant.int 1
    %19708 = torch.aten.slice.Tensor %19648, %int0_17257, %int0_17258, %19707, %int1_17259 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19708, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17260 = torch.constant.int 1
    %int0_17261 = torch.constant.int 0
    %int9223372036854775807_17262 = torch.constant.int 9223372036854775807
    %int1_17263 = torch.constant.int 1
    %19709 = torch.aten.slice.Tensor %19708, %int1_17260, %int0_17261, %int9223372036854775807_17262, %int1_17263 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19709, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17264 = torch.constant.int 0
    %19710 = torch.aten.unsqueeze %19709, %int0_17264 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19710, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17265 = torch.constant.int 2
    %19711 = torch.aten.unsqueeze %19710, %int2_17265 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19711, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17266 = torch.constant.int 3
    %int0_17267 = torch.constant.int 0
    %int9223372036854775807_17268 = torch.constant.int 9223372036854775807
    %int1_17269 = torch.constant.int 1
    %19712 = torch.aten.slice.Tensor %19711, %int3_17266, %int0_17267, %int9223372036854775807_17268, %int1_17269 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19712, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19713 = torch_c.to_builtin_tensor %19440 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17270 = arith.constant 1 : index
    %dim_17271 = tensor.dim %19713, %c1_17270 : tensor<4x?x1x128xf16>
    %19714 = flow.tensor.bitcast %19713 : tensor<4x?x1x128xf16>{%dim_17271} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17271}
    %19715 = torch_c.from_builtin_tensor %19714 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19716 = torch.aten.mul.Tensor %19715, %19712 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19717 = torch_c.to_builtin_tensor %19716 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17272 = arith.constant 1 : index
    %dim_17273 = tensor.dim %19717, %c1_17272 : tensor<4x?x1x64xcomplex<f32>>
    %19718 = flow.tensor.bitcast %19717 : tensor<4x?x1x64xcomplex<f32>>{%dim_17273} -> tensor<4x?x1x128xf32>{%dim_17273}
    %19719 = torch_c.from_builtin_tensor %19718 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17274 = torch.constant.int 5
    %19720 = torch.prims.convert_element_type %19719, %int5_17274 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17275 = torch.constant.int 1
    %19721 = torch.aten.size.int %19334, %int1_17275 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17276 = torch.constant.int 0
    %19722 = torch.aten.add.int %int0_17276, %19721 : !torch.int, !torch.int -> !torch.int
    %int0_17277 = torch.constant.int 0
    %int0_17278 = torch.constant.int 0
    %int1_17279 = torch.constant.int 1
    %19723 = torch.aten.slice.Tensor %19651, %int0_17277, %int0_17278, %19722, %int1_17279 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19723, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17280 = torch.constant.int 1
    %int0_17281 = torch.constant.int 0
    %int9223372036854775807_17282 = torch.constant.int 9223372036854775807
    %int1_17283 = torch.constant.int 1
    %19724 = torch.aten.slice.Tensor %19723, %int1_17280, %int0_17281, %int9223372036854775807_17282, %int1_17283 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19724, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17284 = torch.constant.int 0
    %19725 = torch.aten.unsqueeze %19724, %int0_17284 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19725, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17285 = torch.constant.int 2
    %19726 = torch.aten.unsqueeze %19725, %int2_17285 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19726, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17286 = torch.constant.int 3
    %int0_17287 = torch.constant.int 0
    %int9223372036854775807_17288 = torch.constant.int 9223372036854775807
    %int1_17289 = torch.constant.int 1
    %19727 = torch.aten.slice.Tensor %19726, %int3_17286, %int0_17287, %int9223372036854775807_17288, %int1_17289 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19727, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19728 = torch_c.to_builtin_tensor %19442 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17290 = arith.constant 1 : index
    %dim_17291 = tensor.dim %19728, %c1_17290 : tensor<4x?x1x128xf16>
    %19729 = flow.tensor.bitcast %19728 : tensor<4x?x1x128xf16>{%dim_17291} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17291}
    %19730 = torch_c.from_builtin_tensor %19729 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19731 = torch.aten.mul.Tensor %19730, %19727 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19732 = torch_c.to_builtin_tensor %19731 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17292 = arith.constant 1 : index
    %dim_17293 = tensor.dim %19732, %c1_17292 : tensor<4x?x1x64xcomplex<f32>>
    %19733 = flow.tensor.bitcast %19732 : tensor<4x?x1x64xcomplex<f32>>{%dim_17293} -> tensor<4x?x1x128xf32>{%dim_17293}
    %19734 = torch_c.from_builtin_tensor %19733 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17294 = torch.constant.int 5
    %19735 = torch.prims.convert_element_type %19734, %int5_17294 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17295 = torch.constant.int 1
    %19736 = torch.aten.size.int %19340, %int1_17295 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17296 = torch.constant.int 0
    %19737 = torch.aten.add.int %int0_17296, %19736 : !torch.int, !torch.int -> !torch.int
    %int0_17297 = torch.constant.int 0
    %int0_17298 = torch.constant.int 0
    %int1_17299 = torch.constant.int 1
    %19738 = torch.aten.slice.Tensor %19654, %int0_17297, %int0_17298, %19737, %int1_17299 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19738, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17300 = torch.constant.int 1
    %int0_17301 = torch.constant.int 0
    %int9223372036854775807_17302 = torch.constant.int 9223372036854775807
    %int1_17303 = torch.constant.int 1
    %19739 = torch.aten.slice.Tensor %19738, %int1_17300, %int0_17301, %int9223372036854775807_17302, %int1_17303 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19739, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17304 = torch.constant.int 0
    %19740 = torch.aten.unsqueeze %19739, %int0_17304 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19740, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17305 = torch.constant.int 2
    %19741 = torch.aten.unsqueeze %19740, %int2_17305 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19741, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17306 = torch.constant.int 3
    %int0_17307 = torch.constant.int 0
    %int9223372036854775807_17308 = torch.constant.int 9223372036854775807
    %int1_17309 = torch.constant.int 1
    %19742 = torch.aten.slice.Tensor %19741, %int3_17306, %int0_17307, %int9223372036854775807_17308, %int1_17309 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19742, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19743 = torch_c.to_builtin_tensor %19444 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17310 = arith.constant 1 : index
    %dim_17311 = tensor.dim %19743, %c1_17310 : tensor<4x?x1x128xf16>
    %19744 = flow.tensor.bitcast %19743 : tensor<4x?x1x128xf16>{%dim_17311} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17311}
    %19745 = torch_c.from_builtin_tensor %19744 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19746 = torch.aten.mul.Tensor %19745, %19742 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19747 = torch_c.to_builtin_tensor %19746 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17312 = arith.constant 1 : index
    %dim_17313 = tensor.dim %19747, %c1_17312 : tensor<4x?x1x64xcomplex<f32>>
    %19748 = flow.tensor.bitcast %19747 : tensor<4x?x1x64xcomplex<f32>>{%dim_17313} -> tensor<4x?x1x128xf32>{%dim_17313}
    %19749 = torch_c.from_builtin_tensor %19748 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17314 = torch.constant.int 5
    %19750 = torch.prims.convert_element_type %19749, %int5_17314 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17315 = torch.constant.int 1
    %19751 = torch.aten.size.int %19346, %int1_17315 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17316 = torch.constant.int 0
    %19752 = torch.aten.add.int %int0_17316, %19751 : !torch.int, !torch.int -> !torch.int
    %int0_17317 = torch.constant.int 0
    %int0_17318 = torch.constant.int 0
    %int1_17319 = torch.constant.int 1
    %19753 = torch.aten.slice.Tensor %19657, %int0_17317, %int0_17318, %19752, %int1_17319 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19753, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17320 = torch.constant.int 1
    %int0_17321 = torch.constant.int 0
    %int9223372036854775807_17322 = torch.constant.int 9223372036854775807
    %int1_17323 = torch.constant.int 1
    %19754 = torch.aten.slice.Tensor %19753, %int1_17320, %int0_17321, %int9223372036854775807_17322, %int1_17323 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19754, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17324 = torch.constant.int 0
    %19755 = torch.aten.unsqueeze %19754, %int0_17324 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19755, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17325 = torch.constant.int 2
    %19756 = torch.aten.unsqueeze %19755, %int2_17325 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19756, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17326 = torch.constant.int 3
    %int0_17327 = torch.constant.int 0
    %int9223372036854775807_17328 = torch.constant.int 9223372036854775807
    %int1_17329 = torch.constant.int 1
    %19757 = torch.aten.slice.Tensor %19756, %int3_17326, %int0_17327, %int9223372036854775807_17328, %int1_17329 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19757, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19758 = torch_c.to_builtin_tensor %19446 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17330 = arith.constant 1 : index
    %dim_17331 = tensor.dim %19758, %c1_17330 : tensor<4x?x1x128xf16>
    %19759 = flow.tensor.bitcast %19758 : tensor<4x?x1x128xf16>{%dim_17331} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17331}
    %19760 = torch_c.from_builtin_tensor %19759 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19761 = torch.aten.mul.Tensor %19760, %19757 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19762 = torch_c.to_builtin_tensor %19761 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17332 = arith.constant 1 : index
    %dim_17333 = tensor.dim %19762, %c1_17332 : tensor<4x?x1x64xcomplex<f32>>
    %19763 = flow.tensor.bitcast %19762 : tensor<4x?x1x64xcomplex<f32>>{%dim_17333} -> tensor<4x?x1x128xf32>{%dim_17333}
    %19764 = torch_c.from_builtin_tensor %19763 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17334 = torch.constant.int 5
    %19765 = torch.prims.convert_element_type %19764, %int5_17334 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_17335 = torch.constant.int 1
    %19766 = torch.aten.size.int %19352, %int1_17335 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_17336 = torch.constant.int 0
    %19767 = torch.aten.add.int %int0_17336, %19766 : !torch.int, !torch.int -> !torch.int
    %int0_17337 = torch.constant.int 0
    %int0_17338 = torch.constant.int 0
    %int1_17339 = torch.constant.int 1
    %19768 = torch.aten.slice.Tensor %19660, %int0_17337, %int0_17338, %19767, %int1_17339 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19768, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_17340 = torch.constant.int 1
    %int0_17341 = torch.constant.int 0
    %int9223372036854775807_17342 = torch.constant.int 9223372036854775807
    %int1_17343 = torch.constant.int 1
    %19769 = torch.aten.slice.Tensor %19768, %int1_17340, %int0_17341, %int9223372036854775807_17342, %int1_17343 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %19769, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_17344 = torch.constant.int 0
    %19770 = torch.aten.unsqueeze %19769, %int0_17344 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %19770, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_17345 = torch.constant.int 2
    %19771 = torch.aten.unsqueeze %19770, %int2_17345 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19771, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_17346 = torch.constant.int 3
    %int0_17347 = torch.constant.int 0
    %int9223372036854775807_17348 = torch.constant.int 9223372036854775807
    %int1_17349 = torch.constant.int 1
    %19772 = torch.aten.slice.Tensor %19771, %int3_17346, %int0_17347, %int9223372036854775807_17348, %int1_17349 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19772, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %19773 = torch_c.to_builtin_tensor %19448 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_17350 = arith.constant 1 : index
    %dim_17351 = tensor.dim %19773, %c1_17350 : tensor<4x?x1x128xf16>
    %19774 = flow.tensor.bitcast %19773 : tensor<4x?x1x128xf16>{%dim_17351} -> tensor<4x?x1x64xcomplex<f16>>{%dim_17351}
    %19775 = torch_c.from_builtin_tensor %19774 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %19775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %19776 = torch.aten.mul.Tensor %19775, %19772 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %19776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %19777 = torch_c.to_builtin_tensor %19776 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_17352 = arith.constant 1 : index
    %dim_17353 = tensor.dim %19777, %c1_17352 : tensor<4x?x1x64xcomplex<f32>>
    %19778 = flow.tensor.bitcast %19777 : tensor<4x?x1x64xcomplex<f32>>{%dim_17353} -> tensor<4x?x1x128xf32>{%dim_17353}
    %19779 = torch_c.from_builtin_tensor %19778 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %19779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_17354 = torch.constant.int 5
    %19780 = torch.prims.convert_element_type %19779, %int5_17354 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %19780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_17355 = torch.constant.int 64
    %19781 = torch.aten.mul.Scalar %2364, %int64_17355 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19781, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17356 = torch.constant.int 64
    %19782 = torch.aten.mul.Scalar %2367, %int64_17356 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19782, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17357 = torch.constant.int 64
    %19783 = torch.aten.mul.Scalar %2370, %int64_17357 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19783, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17358 = torch.constant.int 64
    %19784 = torch.aten.mul.Scalar %2373, %int64_17358 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19784, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17359 = torch.constant.int 64
    %19785 = torch.aten.mul.Scalar %2376, %int64_17359 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19785, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17360 = torch.constant.int 64
    %19786 = torch.aten.mul.Scalar %2379, %int64_17360 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19786, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17361 = torch.constant.int 64
    %19787 = torch.aten.mul.Scalar %2382, %int64_17361 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19787, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_17362 = torch.constant.int 64
    %19788 = torch.aten.mul.Scalar %2385, %int64_17362 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19788, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18 = torch.constant.int 18
    %int1_17363 = torch.constant.int 1
    %19789 = torch.aten.add.Scalar %19781, %int18, %int1_17363 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19789, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17364 = torch.constant.int 18
    %int1_17365 = torch.constant.int 1
    %19790 = torch.aten.add.Scalar %19782, %int18_17364, %int1_17365 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19790, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17366 = torch.constant.int 18
    %int1_17367 = torch.constant.int 1
    %19791 = torch.aten.add.Scalar %19783, %int18_17366, %int1_17367 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19791, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17368 = torch.constant.int 18
    %int1_17369 = torch.constant.int 1
    %19792 = torch.aten.add.Scalar %19784, %int18_17368, %int1_17369 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19792, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17370 = torch.constant.int 18
    %int1_17371 = torch.constant.int 1
    %19793 = torch.aten.add.Scalar %19785, %int18_17370, %int1_17371 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19793, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17372 = torch.constant.int 18
    %int1_17373 = torch.constant.int 1
    %19794 = torch.aten.add.Scalar %19786, %int18_17372, %int1_17373 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19794, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17374 = torch.constant.int 18
    %int1_17375 = torch.constant.int 1
    %19795 = torch.aten.add.Scalar %19787, %int18_17374, %int1_17375 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19795, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int18_17376 = torch.constant.int 18
    %int1_17377 = torch.constant.int 1
    %19796 = torch.aten.add.Scalar %19788, %int18_17376, %int1_17377 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19796, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_17378 = torch.constant.int 4
    %int16_17379 = torch.constant.int 16
    %int1_17380 = torch.constant.int 1
    %int128_17381 = torch.constant.int 128
    %19797 = torch.prim.ListConstruct %int4_17378, %3095, %int16_17379, %int1_17380, %int128_17381 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19798 = torch.aten.view %19675, %19797 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19798, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17382 = torch.constant.int 4
    %int16_17383 = torch.constant.int 16
    %int1_17384 = torch.constant.int 1
    %int128_17385 = torch.constant.int 128
    %19799 = torch.prim.ListConstruct %int4_17382, %3095, %int16_17383, %int1_17384, %int128_17385 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19800 = torch.aten.view %19690, %19799 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19800, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17386 = torch.constant.int 4
    %int16_17387 = torch.constant.int 16
    %int1_17388 = torch.constant.int 1
    %int128_17389 = torch.constant.int 128
    %19801 = torch.prim.ListConstruct %int4_17386, %3095, %int16_17387, %int1_17388, %int128_17389 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19802 = torch.aten.view %19705, %19801 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19802, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17390 = torch.constant.int 4
    %int16_17391 = torch.constant.int 16
    %int1_17392 = torch.constant.int 1
    %int128_17393 = torch.constant.int 128
    %19803 = torch.prim.ListConstruct %int4_17390, %3095, %int16_17391, %int1_17392, %int128_17393 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19804 = torch.aten.view %19720, %19803 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19804, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17394 = torch.constant.int 4
    %int16_17395 = torch.constant.int 16
    %int1_17396 = torch.constant.int 1
    %int128_17397 = torch.constant.int 128
    %19805 = torch.prim.ListConstruct %int4_17394, %3095, %int16_17395, %int1_17396, %int128_17397 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19806 = torch.aten.view %19735, %19805 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19806, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17398 = torch.constant.int 4
    %int16_17399 = torch.constant.int 16
    %int1_17400 = torch.constant.int 1
    %int128_17401 = torch.constant.int 128
    %19807 = torch.prim.ListConstruct %int4_17398, %3095, %int16_17399, %int1_17400, %int128_17401 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19808 = torch.aten.view %19750, %19807 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19808, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17402 = torch.constant.int 4
    %int16_17403 = torch.constant.int 16
    %int1_17404 = torch.constant.int 1
    %int128_17405 = torch.constant.int 128
    %19809 = torch.prim.ListConstruct %int4_17402, %3095, %int16_17403, %int1_17404, %int128_17405 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19810 = torch.aten.view %19765, %19809 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19810, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17406 = torch.constant.int 4
    %int16_17407 = torch.constant.int 16
    %int1_17408 = torch.constant.int 1
    %int128_17409 = torch.constant.int 128
    %19811 = torch.prim.ListConstruct %int4_17406, %3095, %int16_17407, %int1_17408, %int128_17409 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19812 = torch.aten.view %19780, %19811 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19812, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17410 = torch.constant.int 4
    %19813 = torch.aten.mul.int %int4_17410, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17411 = torch.constant.int 16
    %int1_17412 = torch.constant.int 1
    %int128_17413 = torch.constant.int 128
    %19814 = torch.prim.ListConstruct %19813, %int16_17411, %int1_17412, %int128_17413 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19815 = torch.aten.view %19798, %19814 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19815, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17414 = torch.constant.int 4
    %19816 = torch.aten.mul.int %int4_17414, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17415 = torch.constant.int 16
    %int1_17416 = torch.constant.int 1
    %int128_17417 = torch.constant.int 128
    %19817 = torch.prim.ListConstruct %19816, %int16_17415, %int1_17416, %int128_17417 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19818 = torch.aten.view %19800, %19817 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19818, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17418 = torch.constant.int 4
    %19819 = torch.aten.mul.int %int4_17418, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17419 = torch.constant.int 16
    %int1_17420 = torch.constant.int 1
    %int128_17421 = torch.constant.int 128
    %19820 = torch.prim.ListConstruct %19819, %int16_17419, %int1_17420, %int128_17421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19821 = torch.aten.view %19802, %19820 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19821, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17422 = torch.constant.int 4
    %19822 = torch.aten.mul.int %int4_17422, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17423 = torch.constant.int 16
    %int1_17424 = torch.constant.int 1
    %int128_17425 = torch.constant.int 128
    %19823 = torch.prim.ListConstruct %19822, %int16_17423, %int1_17424, %int128_17425 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19824 = torch.aten.view %19804, %19823 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19824, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17426 = torch.constant.int 4
    %19825 = torch.aten.mul.int %int4_17426, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17427 = torch.constant.int 16
    %int1_17428 = torch.constant.int 1
    %int128_17429 = torch.constant.int 128
    %19826 = torch.prim.ListConstruct %19825, %int16_17427, %int1_17428, %int128_17429 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19827 = torch.aten.view %19806, %19826 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19827, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17430 = torch.constant.int 4
    %19828 = torch.aten.mul.int %int4_17430, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17431 = torch.constant.int 16
    %int1_17432 = torch.constant.int 1
    %int128_17433 = torch.constant.int 128
    %19829 = torch.prim.ListConstruct %19828, %int16_17431, %int1_17432, %int128_17433 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19830 = torch.aten.view %19808, %19829 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19830, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17434 = torch.constant.int 4
    %19831 = torch.aten.mul.int %int4_17434, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17435 = torch.constant.int 16
    %int1_17436 = torch.constant.int 1
    %int128_17437 = torch.constant.int 128
    %19832 = torch.prim.ListConstruct %19831, %int16_17435, %int1_17436, %int128_17437 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19833 = torch.aten.view %19810, %19832 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19833, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17438 = torch.constant.int 4
    %19834 = torch.aten.mul.int %int4_17438, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17439 = torch.constant.int 16
    %int1_17440 = torch.constant.int 1
    %int128_17441 = torch.constant.int 128
    %19835 = torch.prim.ListConstruct %19834, %int16_17439, %int1_17440, %int128_17441 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19836 = torch.aten.view %19812, %19835 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19836, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17442 = torch.constant.int 4
    %19837 = torch.aten.mul.int %int4_17442, %3095 : !torch.int, !torch.int -> !torch.int
    %19838 = torch.prim.ListConstruct %19837 : (!torch.int) -> !torch.list<int>
    %19839 = torch.aten.view %19789, %19838 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19839, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17443 = torch.constant.int 4
    %19840 = torch.aten.mul.int %int4_17443, %3095 : !torch.int, !torch.int -> !torch.int
    %19841 = torch.prim.ListConstruct %19840 : (!torch.int) -> !torch.list<int>
    %19842 = torch.aten.view %19790, %19841 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19842, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17444 = torch.constant.int 4
    %19843 = torch.aten.mul.int %int4_17444, %3095 : !torch.int, !torch.int -> !torch.int
    %19844 = torch.prim.ListConstruct %19843 : (!torch.int) -> !torch.list<int>
    %19845 = torch.aten.view %19791, %19844 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19845, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17445 = torch.constant.int 4
    %19846 = torch.aten.mul.int %int4_17445, %3095 : !torch.int, !torch.int -> !torch.int
    %19847 = torch.prim.ListConstruct %19846 : (!torch.int) -> !torch.list<int>
    %19848 = torch.aten.view %19792, %19847 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19848, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17446 = torch.constant.int 4
    %19849 = torch.aten.mul.int %int4_17446, %3095 : !torch.int, !torch.int -> !torch.int
    %19850 = torch.prim.ListConstruct %19849 : (!torch.int) -> !torch.list<int>
    %19851 = torch.aten.view %19793, %19850 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19851, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17447 = torch.constant.int 4
    %19852 = torch.aten.mul.int %int4_17447, %3095 : !torch.int, !torch.int -> !torch.int
    %19853 = torch.prim.ListConstruct %19852 : (!torch.int) -> !torch.list<int>
    %19854 = torch.aten.view %19794, %19853 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19854, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17448 = torch.constant.int 4
    %19855 = torch.aten.mul.int %int4_17448, %3095 : !torch.int, !torch.int -> !torch.int
    %19856 = torch.prim.ListConstruct %19855 : (!torch.int) -> !torch.list<int>
    %19857 = torch.aten.view %19795, %19856 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19857, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17449 = torch.constant.int 4
    %19858 = torch.aten.mul.int %int4_17449, %3095 : !torch.int, !torch.int -> !torch.int
    %19859 = torch.prim.ListConstruct %19858 : (!torch.int) -> !torch.list<int>
    %19860 = torch.aten.view %19796, %19859 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19860, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17450 = torch.constant.int 4
    %int16_17451 = torch.constant.int 16
    %int1_17452 = torch.constant.int 1
    %int128_17453 = torch.constant.int 128
    %19861 = torch.prim.ListConstruct %int4_17450, %3095, %int16_17451, %int1_17452, %int128_17453 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19862 = torch.aten.view %19450, %19861 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19862, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17454 = torch.constant.int 4
    %int16_17455 = torch.constant.int 16
    %int1_17456 = torch.constant.int 1
    %int128_17457 = torch.constant.int 128
    %19863 = torch.prim.ListConstruct %int4_17454, %3095, %int16_17455, %int1_17456, %int128_17457 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19864 = torch.aten.view %19452, %19863 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19864, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17458 = torch.constant.int 4
    %int16_17459 = torch.constant.int 16
    %int1_17460 = torch.constant.int 1
    %int128_17461 = torch.constant.int 128
    %19865 = torch.prim.ListConstruct %int4_17458, %3095, %int16_17459, %int1_17460, %int128_17461 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19866 = torch.aten.view %19454, %19865 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19866, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17462 = torch.constant.int 4
    %int16_17463 = torch.constant.int 16
    %int1_17464 = torch.constant.int 1
    %int128_17465 = torch.constant.int 128
    %19867 = torch.prim.ListConstruct %int4_17462, %3095, %int16_17463, %int1_17464, %int128_17465 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19868 = torch.aten.view %19456, %19867 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19868, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17466 = torch.constant.int 4
    %int16_17467 = torch.constant.int 16
    %int1_17468 = torch.constant.int 1
    %int128_17469 = torch.constant.int 128
    %19869 = torch.prim.ListConstruct %int4_17466, %3095, %int16_17467, %int1_17468, %int128_17469 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19870 = torch.aten.view %19458, %19869 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19870, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17470 = torch.constant.int 4
    %int16_17471 = torch.constant.int 16
    %int1_17472 = torch.constant.int 1
    %int128_17473 = torch.constant.int 128
    %19871 = torch.prim.ListConstruct %int4_17470, %3095, %int16_17471, %int1_17472, %int128_17473 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19872 = torch.aten.view %19460, %19871 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19872, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17474 = torch.constant.int 4
    %int16_17475 = torch.constant.int 16
    %int1_17476 = torch.constant.int 1
    %int128_17477 = torch.constant.int 128
    %19873 = torch.prim.ListConstruct %int4_17474, %3095, %int16_17475, %int1_17476, %int128_17477 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19874 = torch.aten.view %19462, %19873 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19874, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17478 = torch.constant.int 4
    %int16_17479 = torch.constant.int 16
    %int1_17480 = torch.constant.int 1
    %int128_17481 = torch.constant.int 128
    %19875 = torch.prim.ListConstruct %int4_17478, %3095, %int16_17479, %int1_17480, %int128_17481 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19876 = torch.aten.view %19464, %19875 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %19876, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_17482 = torch.constant.int 4
    %19877 = torch.aten.mul.int %int4_17482, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17483 = torch.constant.int 16
    %int1_17484 = torch.constant.int 1
    %int128_17485 = torch.constant.int 128
    %19878 = torch.prim.ListConstruct %19877, %int16_17483, %int1_17484, %int128_17485 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19879 = torch.aten.view %19862, %19878 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19879, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17486 = torch.constant.int 4
    %19880 = torch.aten.mul.int %int4_17486, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17487 = torch.constant.int 16
    %int1_17488 = torch.constant.int 1
    %int128_17489 = torch.constant.int 128
    %19881 = torch.prim.ListConstruct %19880, %int16_17487, %int1_17488, %int128_17489 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19882 = torch.aten.view %19864, %19881 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19882, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17490 = torch.constant.int 4
    %19883 = torch.aten.mul.int %int4_17490, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17491 = torch.constant.int 16
    %int1_17492 = torch.constant.int 1
    %int128_17493 = torch.constant.int 128
    %19884 = torch.prim.ListConstruct %19883, %int16_17491, %int1_17492, %int128_17493 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19885 = torch.aten.view %19866, %19884 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19885, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17494 = torch.constant.int 4
    %19886 = torch.aten.mul.int %int4_17494, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17495 = torch.constant.int 16
    %int1_17496 = torch.constant.int 1
    %int128_17497 = torch.constant.int 128
    %19887 = torch.prim.ListConstruct %19886, %int16_17495, %int1_17496, %int128_17497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19888 = torch.aten.view %19868, %19887 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19888, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17498 = torch.constant.int 4
    %19889 = torch.aten.mul.int %int4_17498, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17499 = torch.constant.int 16
    %int1_17500 = torch.constant.int 1
    %int128_17501 = torch.constant.int 128
    %19890 = torch.prim.ListConstruct %19889, %int16_17499, %int1_17500, %int128_17501 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19891 = torch.aten.view %19870, %19890 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19891, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17502 = torch.constant.int 4
    %19892 = torch.aten.mul.int %int4_17502, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17503 = torch.constant.int 16
    %int1_17504 = torch.constant.int 1
    %int128_17505 = torch.constant.int 128
    %19893 = torch.prim.ListConstruct %19892, %int16_17503, %int1_17504, %int128_17505 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19894 = torch.aten.view %19872, %19893 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19894, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17506 = torch.constant.int 4
    %19895 = torch.aten.mul.int %int4_17506, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17507 = torch.constant.int 16
    %int1_17508 = torch.constant.int 1
    %int128_17509 = torch.constant.int 128
    %19896 = torch.prim.ListConstruct %19895, %int16_17507, %int1_17508, %int128_17509 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19897 = torch.aten.view %19874, %19896 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19897, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_17510 = torch.constant.int 4
    %19898 = torch.aten.mul.int %int4_17510, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_17511 = torch.constant.int 16
    %int1_17512 = torch.constant.int 1
    %int128_17513 = torch.constant.int 128
    %19899 = torch.prim.ListConstruct %19898, %int16_17511, %int1_17512, %int128_17513 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19900 = torch.aten.view %19876, %19899 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19900, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_17514 = torch.constant.int 1
    %int1_17515 = torch.constant.int 1
    %19901 = torch.aten.add.Scalar %19789, %int1_17514, %int1_17515 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19901, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17516 = torch.constant.int 1
    %int1_17517 = torch.constant.int 1
    %19902 = torch.aten.add.Scalar %19790, %int1_17516, %int1_17517 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19902, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17518 = torch.constant.int 1
    %int1_17519 = torch.constant.int 1
    %19903 = torch.aten.add.Scalar %19791, %int1_17518, %int1_17519 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19903, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17520 = torch.constant.int 1
    %int1_17521 = torch.constant.int 1
    %19904 = torch.aten.add.Scalar %19792, %int1_17520, %int1_17521 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19904, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17522 = torch.constant.int 1
    %int1_17523 = torch.constant.int 1
    %19905 = torch.aten.add.Scalar %19793, %int1_17522, %int1_17523 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19905, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17524 = torch.constant.int 1
    %int1_17525 = torch.constant.int 1
    %19906 = torch.aten.add.Scalar %19794, %int1_17524, %int1_17525 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19906, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17526 = torch.constant.int 1
    %int1_17527 = torch.constant.int 1
    %19907 = torch.aten.add.Scalar %19795, %int1_17526, %int1_17527 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19907, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_17528 = torch.constant.int 1
    %int1_17529 = torch.constant.int 1
    %19908 = torch.aten.add.Scalar %19796, %int1_17528, %int1_17529 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %19908, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_17530 = torch.constant.int 4
    %19909 = torch.aten.mul.int %int4_17530, %3095 : !torch.int, !torch.int -> !torch.int
    %19910 = torch.prim.ListConstruct %19909 : (!torch.int) -> !torch.list<int>
    %19911 = torch.aten.view %19901, %19910 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19911, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17531 = torch.constant.int 4
    %19912 = torch.aten.mul.int %int4_17531, %3095 : !torch.int, !torch.int -> !torch.int
    %19913 = torch.prim.ListConstruct %19912 : (!torch.int) -> !torch.list<int>
    %19914 = torch.aten.view %19902, %19913 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19914, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17532 = torch.constant.int 4
    %19915 = torch.aten.mul.int %int4_17532, %3095 : !torch.int, !torch.int -> !torch.int
    %19916 = torch.prim.ListConstruct %19915 : (!torch.int) -> !torch.list<int>
    %19917 = torch.aten.view %19903, %19916 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19917, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17533 = torch.constant.int 4
    %19918 = torch.aten.mul.int %int4_17533, %3095 : !torch.int, !torch.int -> !torch.int
    %19919 = torch.prim.ListConstruct %19918 : (!torch.int) -> !torch.list<int>
    %19920 = torch.aten.view %19904, %19919 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19920, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17534 = torch.constant.int 4
    %19921 = torch.aten.mul.int %int4_17534, %3095 : !torch.int, !torch.int -> !torch.int
    %19922 = torch.prim.ListConstruct %19921 : (!torch.int) -> !torch.list<int>
    %19923 = torch.aten.view %19905, %19922 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19923, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17535 = torch.constant.int 4
    %19924 = torch.aten.mul.int %int4_17535, %3095 : !torch.int, !torch.int -> !torch.int
    %19925 = torch.prim.ListConstruct %19924 : (!torch.int) -> !torch.list<int>
    %19926 = torch.aten.view %19906, %19925 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19926, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17536 = torch.constant.int 4
    %19927 = torch.aten.mul.int %int4_17536, %3095 : !torch.int, !torch.int -> !torch.int
    %19928 = torch.prim.ListConstruct %19927 : (!torch.int) -> !torch.list<int>
    %19929 = torch.aten.view %19907, %19928 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19929, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_17537 = torch.constant.int 4
    %19930 = torch.aten.mul.int %int4_17537, %3095 : !torch.int, !torch.int -> !torch.int
    %19931 = torch.prim.ListConstruct %19930 : (!torch.int) -> !torch.list<int>
    %19932 = torch.aten.view %19908, %19931 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19932, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %19933 = torch.prim.ListConstruct %19839, %19911 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17538 = torch.constant.int 0
    %19934 = torch.aten.cat %19933, %int0_17538 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19934, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19935 = torch.prim.ListConstruct %19842, %19914 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17539 = torch.constant.int 0
    %19936 = torch.aten.cat %19935, %int0_17539 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19936, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19937 = torch.prim.ListConstruct %19845, %19917 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17540 = torch.constant.int 0
    %19938 = torch.aten.cat %19937, %int0_17540 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19938, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19939 = torch.prim.ListConstruct %19848, %19920 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17541 = torch.constant.int 0
    %19940 = torch.aten.cat %19939, %int0_17541 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19940, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19941 = torch.prim.ListConstruct %19851, %19923 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17542 = torch.constant.int 0
    %19942 = torch.aten.cat %19941, %int0_17542 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19942, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19943 = torch.prim.ListConstruct %19854, %19926 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17543 = torch.constant.int 0
    %19944 = torch.aten.cat %19943, %int0_17543 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19944, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19945 = torch.prim.ListConstruct %19857, %19929 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17544 = torch.constant.int 0
    %19946 = torch.aten.cat %19945, %int0_17544 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19946, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19947 = torch.prim.ListConstruct %19860, %19932 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_17545 = torch.constant.int 0
    %19948 = torch.aten.cat %19947, %int0_17545 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %19948, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %19949 = torch.prim.ListConstruct %19815, %19879 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17546 = torch.constant.int 0
    %19950 = torch.aten.cat %19949, %int0_17546 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19950, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19951 = torch.prim.ListConstruct %19818, %19882 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17547 = torch.constant.int 0
    %19952 = torch.aten.cat %19951, %int0_17547 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19952, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19953 = torch.prim.ListConstruct %19821, %19885 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17548 = torch.constant.int 0
    %19954 = torch.aten.cat %19953, %int0_17548 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19954, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19955 = torch.prim.ListConstruct %19824, %19888 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17549 = torch.constant.int 0
    %19956 = torch.aten.cat %19955, %int0_17549 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19956, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19957 = torch.prim.ListConstruct %19827, %19891 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17550 = torch.constant.int 0
    %19958 = torch.aten.cat %19957, %int0_17550 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19958, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19959 = torch.prim.ListConstruct %19830, %19894 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17551 = torch.constant.int 0
    %19960 = torch.aten.cat %19959, %int0_17551 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19960, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19961 = torch.prim.ListConstruct %19833, %19897 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17552 = torch.constant.int 0
    %19962 = torch.aten.cat %19961, %int0_17552 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19962, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19963 = torch.prim.ListConstruct %19836, %19900 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_17553 = torch.constant.int 0
    %19964 = torch.aten.cat %19963, %int0_17553 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19964, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17554 = torch.constant.int 32
    %int2_17555 = torch.constant.int 2
    %int16_17556 = torch.constant.int 16
    %int1_17557 = torch.constant.int 1
    %int128_17558 = torch.constant.int 128
    %19965 = torch.prim.ListConstruct %3023, %int32_17554, %int2_17555, %int16_17556, %int1_17557, %int128_17558 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19966 = torch.aten.view %18115, %19965 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19966, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17559 = torch.constant.int 32
    %19967 = torch.aten.mul.int %3023, %int32_17559 : !torch.int, !torch.int -> !torch.int
    %int2_17560 = torch.constant.int 2
    %19968 = torch.aten.mul.int %19967, %int2_17560 : !torch.int, !torch.int -> !torch.int
    %int16_17561 = torch.constant.int 16
    %int1_17562 = torch.constant.int 1
    %int128_17563 = torch.constant.int 128
    %19969 = torch.prim.ListConstruct %19968, %int16_17561, %int1_17562, %int128_17563 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19970 = torch.aten.view %19966, %19969 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19970, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19971 = torch.prim.ListConstruct %19934 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17564 = torch.constant.bool false
    %19972 = torch.aten.index_put %19970, %19971, %19950, %false_17564 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19972, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17565 = torch.constant.int 32
    %int2_17566 = torch.constant.int 2
    %int16_17567 = torch.constant.int 16
    %int1_17568 = torch.constant.int 1
    %int128_17569 = torch.constant.int 128
    %19973 = torch.prim.ListConstruct %3023, %int32_17565, %int2_17566, %int16_17567, %int1_17568, %int128_17569 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19974 = torch.aten.view %19972, %19973 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19974, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17570 = torch.constant.int 131072
    %19975 = torch.prim.ListConstruct %3023, %int131072_17570 : (!torch.int, !torch.int) -> !torch.list<int>
    %19976 = torch.aten.view %19974, %19975 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %19976, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17571 = torch.constant.int 32
    %int2_17572 = torch.constant.int 2
    %int16_17573 = torch.constant.int 16
    %int1_17574 = torch.constant.int 1
    %int128_17575 = torch.constant.int 128
    %19977 = torch.prim.ListConstruct %3026, %int32_17571, %int2_17572, %int16_17573, %int1_17574, %int128_17575 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19978 = torch.aten.view %18127, %19977 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19978, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17576 = torch.constant.int 32
    %19979 = torch.aten.mul.int %3026, %int32_17576 : !torch.int, !torch.int -> !torch.int
    %int2_17577 = torch.constant.int 2
    %19980 = torch.aten.mul.int %19979, %int2_17577 : !torch.int, !torch.int -> !torch.int
    %int16_17578 = torch.constant.int 16
    %int1_17579 = torch.constant.int 1
    %int128_17580 = torch.constant.int 128
    %19981 = torch.prim.ListConstruct %19980, %int16_17578, %int1_17579, %int128_17580 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19982 = torch.aten.view %19978, %19981 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19982, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19983 = torch.prim.ListConstruct %19936 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17581 = torch.constant.bool false
    %19984 = torch.aten.index_put %19982, %19983, %19952, %false_17581 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19984, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17582 = torch.constant.int 32
    %int2_17583 = torch.constant.int 2
    %int16_17584 = torch.constant.int 16
    %int1_17585 = torch.constant.int 1
    %int128_17586 = torch.constant.int 128
    %19985 = torch.prim.ListConstruct %3026, %int32_17582, %int2_17583, %int16_17584, %int1_17585, %int128_17586 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19986 = torch.aten.view %19984, %19985 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19986, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17587 = torch.constant.int 131072
    %19987 = torch.prim.ListConstruct %3026, %int131072_17587 : (!torch.int, !torch.int) -> !torch.list<int>
    %19988 = torch.aten.view %19986, %19987 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %19988, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17588 = torch.constant.int 32
    %int2_17589 = torch.constant.int 2
    %int16_17590 = torch.constant.int 16
    %int1_17591 = torch.constant.int 1
    %int128_17592 = torch.constant.int 128
    %19989 = torch.prim.ListConstruct %3029, %int32_17588, %int2_17589, %int16_17590, %int1_17591, %int128_17592 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19990 = torch.aten.view %18139, %19989 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19990, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17593 = torch.constant.int 32
    %19991 = torch.aten.mul.int %3029, %int32_17593 : !torch.int, !torch.int -> !torch.int
    %int2_17594 = torch.constant.int 2
    %19992 = torch.aten.mul.int %19991, %int2_17594 : !torch.int, !torch.int -> !torch.int
    %int16_17595 = torch.constant.int 16
    %int1_17596 = torch.constant.int 1
    %int128_17597 = torch.constant.int 128
    %19993 = torch.prim.ListConstruct %19992, %int16_17595, %int1_17596, %int128_17597 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19994 = torch.aten.view %19990, %19993 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19994, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %19995 = torch.prim.ListConstruct %19938 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17598 = torch.constant.bool false
    %19996 = torch.aten.index_put %19994, %19995, %19954, %false_17598 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %19996, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17599 = torch.constant.int 32
    %int2_17600 = torch.constant.int 2
    %int16_17601 = torch.constant.int 16
    %int1_17602 = torch.constant.int 1
    %int128_17603 = torch.constant.int 128
    %19997 = torch.prim.ListConstruct %3029, %int32_17599, %int2_17600, %int16_17601, %int1_17602, %int128_17603 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %19998 = torch.aten.view %19996, %19997 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %19998, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17604 = torch.constant.int 131072
    %19999 = torch.prim.ListConstruct %3029, %int131072_17604 : (!torch.int, !torch.int) -> !torch.list<int>
    %20000 = torch.aten.view %19998, %19999 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20000, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17605 = torch.constant.int 32
    %int2_17606 = torch.constant.int 2
    %int16_17607 = torch.constant.int 16
    %int1_17608 = torch.constant.int 1
    %int128_17609 = torch.constant.int 128
    %20001 = torch.prim.ListConstruct %3032, %int32_17605, %int2_17606, %int16_17607, %int1_17608, %int128_17609 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20002 = torch.aten.view %18151, %20001 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20002, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17610 = torch.constant.int 32
    %20003 = torch.aten.mul.int %3032, %int32_17610 : !torch.int, !torch.int -> !torch.int
    %int2_17611 = torch.constant.int 2
    %20004 = torch.aten.mul.int %20003, %int2_17611 : !torch.int, !torch.int -> !torch.int
    %int16_17612 = torch.constant.int 16
    %int1_17613 = torch.constant.int 1
    %int128_17614 = torch.constant.int 128
    %20005 = torch.prim.ListConstruct %20004, %int16_17612, %int1_17613, %int128_17614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20006 = torch.aten.view %20002, %20005 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20006, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %20007 = torch.prim.ListConstruct %19940 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17615 = torch.constant.bool false
    %20008 = torch.aten.index_put %20006, %20007, %19956, %false_17615 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20008, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17616 = torch.constant.int 32
    %int2_17617 = torch.constant.int 2
    %int16_17618 = torch.constant.int 16
    %int1_17619 = torch.constant.int 1
    %int128_17620 = torch.constant.int 128
    %20009 = torch.prim.ListConstruct %3032, %int32_17616, %int2_17617, %int16_17618, %int1_17619, %int128_17620 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20010 = torch.aten.view %20008, %20009 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20010, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17621 = torch.constant.int 131072
    %20011 = torch.prim.ListConstruct %3032, %int131072_17621 : (!torch.int, !torch.int) -> !torch.list<int>
    %20012 = torch.aten.view %20010, %20011 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20012, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17622 = torch.constant.int 32
    %int2_17623 = torch.constant.int 2
    %int16_17624 = torch.constant.int 16
    %int1_17625 = torch.constant.int 1
    %int128_17626 = torch.constant.int 128
    %20013 = torch.prim.ListConstruct %3035, %int32_17622, %int2_17623, %int16_17624, %int1_17625, %int128_17626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20014 = torch.aten.view %18163, %20013 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20014, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17627 = torch.constant.int 32
    %20015 = torch.aten.mul.int %3035, %int32_17627 : !torch.int, !torch.int -> !torch.int
    %int2_17628 = torch.constant.int 2
    %20016 = torch.aten.mul.int %20015, %int2_17628 : !torch.int, !torch.int -> !torch.int
    %int16_17629 = torch.constant.int 16
    %int1_17630 = torch.constant.int 1
    %int128_17631 = torch.constant.int 128
    %20017 = torch.prim.ListConstruct %20016, %int16_17629, %int1_17630, %int128_17631 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20018 = torch.aten.view %20014, %20017 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20018, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %20019 = torch.prim.ListConstruct %19942 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17632 = torch.constant.bool false
    %20020 = torch.aten.index_put %20018, %20019, %19958, %false_17632 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20020, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17633 = torch.constant.int 32
    %int2_17634 = torch.constant.int 2
    %int16_17635 = torch.constant.int 16
    %int1_17636 = torch.constant.int 1
    %int128_17637 = torch.constant.int 128
    %20021 = torch.prim.ListConstruct %3035, %int32_17633, %int2_17634, %int16_17635, %int1_17636, %int128_17637 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20022 = torch.aten.view %20020, %20021 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20022, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17638 = torch.constant.int 131072
    %20023 = torch.prim.ListConstruct %3035, %int131072_17638 : (!torch.int, !torch.int) -> !torch.list<int>
    %20024 = torch.aten.view %20022, %20023 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20024, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17639 = torch.constant.int 32
    %int2_17640 = torch.constant.int 2
    %int16_17641 = torch.constant.int 16
    %int1_17642 = torch.constant.int 1
    %int128_17643 = torch.constant.int 128
    %20025 = torch.prim.ListConstruct %3038, %int32_17639, %int2_17640, %int16_17641, %int1_17642, %int128_17643 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20026 = torch.aten.view %18175, %20025 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20026, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17644 = torch.constant.int 32
    %20027 = torch.aten.mul.int %3038, %int32_17644 : !torch.int, !torch.int -> !torch.int
    %int2_17645 = torch.constant.int 2
    %20028 = torch.aten.mul.int %20027, %int2_17645 : !torch.int, !torch.int -> !torch.int
    %int16_17646 = torch.constant.int 16
    %int1_17647 = torch.constant.int 1
    %int128_17648 = torch.constant.int 128
    %20029 = torch.prim.ListConstruct %20028, %int16_17646, %int1_17647, %int128_17648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20030 = torch.aten.view %20026, %20029 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20030, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %20031 = torch.prim.ListConstruct %19944 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17649 = torch.constant.bool false
    %20032 = torch.aten.index_put %20030, %20031, %19960, %false_17649 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20032, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17650 = torch.constant.int 32
    %int2_17651 = torch.constant.int 2
    %int16_17652 = torch.constant.int 16
    %int1_17653 = torch.constant.int 1
    %int128_17654 = torch.constant.int 128
    %20033 = torch.prim.ListConstruct %3038, %int32_17650, %int2_17651, %int16_17652, %int1_17653, %int128_17654 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20034 = torch.aten.view %20032, %20033 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20034, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17655 = torch.constant.int 131072
    %20035 = torch.prim.ListConstruct %3038, %int131072_17655 : (!torch.int, !torch.int) -> !torch.list<int>
    %20036 = torch.aten.view %20034, %20035 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20036, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17656 = torch.constant.int 32
    %int2_17657 = torch.constant.int 2
    %int16_17658 = torch.constant.int 16
    %int1_17659 = torch.constant.int 1
    %int128_17660 = torch.constant.int 128
    %20037 = torch.prim.ListConstruct %3041, %int32_17656, %int2_17657, %int16_17658, %int1_17659, %int128_17660 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20038 = torch.aten.view %18187, %20037 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20038, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17661 = torch.constant.int 32
    %20039 = torch.aten.mul.int %3041, %int32_17661 : !torch.int, !torch.int -> !torch.int
    %int2_17662 = torch.constant.int 2
    %20040 = torch.aten.mul.int %20039, %int2_17662 : !torch.int, !torch.int -> !torch.int
    %int16_17663 = torch.constant.int 16
    %int1_17664 = torch.constant.int 1
    %int128_17665 = torch.constant.int 128
    %20041 = torch.prim.ListConstruct %20040, %int16_17663, %int1_17664, %int128_17665 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20042 = torch.aten.view %20038, %20041 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20042, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %20043 = torch.prim.ListConstruct %19946 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17666 = torch.constant.bool false
    %20044 = torch.aten.index_put %20042, %20043, %19962, %false_17666 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20044, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17667 = torch.constant.int 32
    %int2_17668 = torch.constant.int 2
    %int16_17669 = torch.constant.int 16
    %int1_17670 = torch.constant.int 1
    %int128_17671 = torch.constant.int 128
    %20045 = torch.prim.ListConstruct %3041, %int32_17667, %int2_17668, %int16_17669, %int1_17670, %int128_17671 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20046 = torch.aten.view %20044, %20045 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20046, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17672 = torch.constant.int 131072
    %20047 = torch.prim.ListConstruct %3041, %int131072_17672 : (!torch.int, !torch.int) -> !torch.list<int>
    %20048 = torch.aten.view %20046, %20047 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20048, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_17673 = torch.constant.int 32
    %int2_17674 = torch.constant.int 2
    %int16_17675 = torch.constant.int 16
    %int1_17676 = torch.constant.int 1
    %int128_17677 = torch.constant.int 128
    %20049 = torch.prim.ListConstruct %3044, %int32_17673, %int2_17674, %int16_17675, %int1_17676, %int128_17677 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20050 = torch.aten.view %18199, %20049 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20050, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_17678 = torch.constant.int 32
    %20051 = torch.aten.mul.int %3044, %int32_17678 : !torch.int, !torch.int -> !torch.int
    %int2_17679 = torch.constant.int 2
    %20052 = torch.aten.mul.int %20051, %int2_17679 : !torch.int, !torch.int -> !torch.int
    %int16_17680 = torch.constant.int 16
    %int1_17681 = torch.constant.int 1
    %int128_17682 = torch.constant.int 128
    %20053 = torch.prim.ListConstruct %20052, %int16_17680, %int1_17681, %int128_17682 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20054 = torch.aten.view %20050, %20053 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20054, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %20055 = torch.prim.ListConstruct %19948 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_17683 = torch.constant.bool false
    %20056 = torch.aten.index_put %20054, %20055, %19964, %false_17683 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %20056, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_17684 = torch.constant.int 32
    %int2_17685 = torch.constant.int 2
    %int16_17686 = torch.constant.int 16
    %int1_17687 = torch.constant.int 1
    %int128_17688 = torch.constant.int 128
    %20057 = torch.prim.ListConstruct %3044, %int32_17684, %int2_17685, %int16_17686, %int1_17687, %int128_17688 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20058 = torch.aten.view %20056, %20057 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %20058, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_17689 = torch.constant.int 131072
    %20059 = torch.prim.ListConstruct %3044, %int131072_17689 : (!torch.int, !torch.int) -> !torch.list<int>
    %20060 = torch.aten.view %20058, %20059 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %20060, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_17690 = torch.constant.int -2
    %20061 = torch.aten.unsqueeze %19675, %int-2_17690 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17691 = torch.constant.int -2
    %20062 = torch.aten.unsqueeze %19690, %int-2_17691 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17692 = torch.constant.int -2
    %20063 = torch.aten.unsqueeze %19705, %int-2_17692 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17693 = torch.constant.int -2
    %20064 = torch.aten.unsqueeze %19720, %int-2_17693 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17694 = torch.constant.int -2
    %20065 = torch.aten.unsqueeze %19735, %int-2_17694 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17695 = torch.constant.int -2
    %20066 = torch.aten.unsqueeze %19750, %int-2_17695 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17696 = torch.constant.int -2
    %20067 = torch.aten.unsqueeze %19765, %int-2_17696 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17697 = torch.constant.int -2
    %20068 = torch.aten.unsqueeze %19780, %int-2_17697 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_17698 = torch.constant.int 4
    %int1_17699 = torch.constant.int 1
    %int4_17700 = torch.constant.int 4
    %int128_17701 = torch.constant.int 128
    %20069 = torch.prim.ListConstruct %int4_17698, %19661, %int1_17699, %int4_17700, %int128_17701 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17702 = torch.constant.bool false
    %20070 = torch.aten.expand %20061, %20069, %false_17702 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17703 = torch.constant.int 4
    %int1_17704 = torch.constant.int 1
    %int4_17705 = torch.constant.int 4
    %int128_17706 = torch.constant.int 128
    %20071 = torch.prim.ListConstruct %int4_17703, %19661, %int1_17704, %int4_17705, %int128_17706 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17707 = torch.constant.bool false
    %20072 = torch.aten.expand %20062, %20071, %false_17707 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17708 = torch.constant.int 4
    %int1_17709 = torch.constant.int 1
    %int4_17710 = torch.constant.int 4
    %int128_17711 = torch.constant.int 128
    %20073 = torch.prim.ListConstruct %int4_17708, %19661, %int1_17709, %int4_17710, %int128_17711 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17712 = torch.constant.bool false
    %20074 = torch.aten.expand %20063, %20073, %false_17712 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17713 = torch.constant.int 4
    %int1_17714 = torch.constant.int 1
    %int4_17715 = torch.constant.int 4
    %int128_17716 = torch.constant.int 128
    %20075 = torch.prim.ListConstruct %int4_17713, %19661, %int1_17714, %int4_17715, %int128_17716 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17717 = torch.constant.bool false
    %20076 = torch.aten.expand %20064, %20075, %false_17717 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17718 = torch.constant.int 4
    %int1_17719 = torch.constant.int 1
    %int4_17720 = torch.constant.int 4
    %int128_17721 = torch.constant.int 128
    %20077 = torch.prim.ListConstruct %int4_17718, %19661, %int1_17719, %int4_17720, %int128_17721 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17722 = torch.constant.bool false
    %20078 = torch.aten.expand %20065, %20077, %false_17722 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17723 = torch.constant.int 4
    %int1_17724 = torch.constant.int 1
    %int4_17725 = torch.constant.int 4
    %int128_17726 = torch.constant.int 128
    %20079 = torch.prim.ListConstruct %int4_17723, %19661, %int1_17724, %int4_17725, %int128_17726 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17727 = torch.constant.bool false
    %20080 = torch.aten.expand %20066, %20079, %false_17727 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17728 = torch.constant.int 4
    %int1_17729 = torch.constant.int 1
    %int4_17730 = torch.constant.int 4
    %int128_17731 = torch.constant.int 128
    %20081 = torch.prim.ListConstruct %int4_17728, %19661, %int1_17729, %int4_17730, %int128_17731 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17732 = torch.constant.bool false
    %20082 = torch.aten.expand %20067, %20081, %false_17732 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17733 = torch.constant.int 4
    %int1_17734 = torch.constant.int 1
    %int4_17735 = torch.constant.int 4
    %int128_17736 = torch.constant.int 128
    %20083 = torch.prim.ListConstruct %int4_17733, %19661, %int1_17734, %int4_17735, %int128_17736 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17737 = torch.constant.bool false
    %20084 = torch.aten.expand %20068, %20083, %false_17737 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17738 = torch.constant.int 4
    %int4_17739 = torch.constant.int 4
    %int128_17740 = torch.constant.int 128
    %20085 = torch.prim.ListConstruct %int4_17738, %19661, %int4_17739, %int128_17740 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20086 = torch.aten.view %20070, %20085 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17741 = torch.constant.int 4
    %int4_17742 = torch.constant.int 4
    %int128_17743 = torch.constant.int 128
    %20087 = torch.prim.ListConstruct %int4_17741, %19661, %int4_17742, %int128_17743 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20088 = torch.aten.view %20072, %20087 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17744 = torch.constant.int 4
    %int4_17745 = torch.constant.int 4
    %int128_17746 = torch.constant.int 128
    %20089 = torch.prim.ListConstruct %int4_17744, %19661, %int4_17745, %int128_17746 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20090 = torch.aten.view %20074, %20089 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17747 = torch.constant.int 4
    %int4_17748 = torch.constant.int 4
    %int128_17749 = torch.constant.int 128
    %20091 = torch.prim.ListConstruct %int4_17747, %19661, %int4_17748, %int128_17749 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20092 = torch.aten.view %20076, %20091 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17750 = torch.constant.int 4
    %int4_17751 = torch.constant.int 4
    %int128_17752 = torch.constant.int 128
    %20093 = torch.prim.ListConstruct %int4_17750, %19661, %int4_17751, %int128_17752 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20094 = torch.aten.view %20078, %20093 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17753 = torch.constant.int 4
    %int4_17754 = torch.constant.int 4
    %int128_17755 = torch.constant.int 128
    %20095 = torch.prim.ListConstruct %int4_17753, %19661, %int4_17754, %int128_17755 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20096 = torch.aten.view %20080, %20095 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17756 = torch.constant.int 4
    %int4_17757 = torch.constant.int 4
    %int128_17758 = torch.constant.int 128
    %20097 = torch.prim.ListConstruct %int4_17756, %19661, %int4_17757, %int128_17758 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20098 = torch.aten.view %20082, %20097 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17759 = torch.constant.int 4
    %int4_17760 = torch.constant.int 4
    %int128_17761 = torch.constant.int 128
    %20099 = torch.prim.ListConstruct %int4_17759, %19661, %int4_17760, %int128_17761 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20100 = torch.aten.view %20084, %20099 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_17762 = torch.constant.int -2
    %20101 = torch.aten.unsqueeze %19450, %int-2_17762 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17763 = torch.constant.int -2
    %20102 = torch.aten.unsqueeze %19452, %int-2_17763 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17764 = torch.constant.int -2
    %20103 = torch.aten.unsqueeze %19454, %int-2_17764 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17765 = torch.constant.int -2
    %20104 = torch.aten.unsqueeze %19456, %int-2_17765 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17766 = torch.constant.int -2
    %20105 = torch.aten.unsqueeze %19458, %int-2_17766 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17767 = torch.constant.int -2
    %20106 = torch.aten.unsqueeze %19460, %int-2_17767 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17768 = torch.constant.int -2
    %20107 = torch.aten.unsqueeze %19462, %int-2_17768 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_17769 = torch.constant.int -2
    %20108 = torch.aten.unsqueeze %19464, %int-2_17769 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %20108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_17770 = torch.constant.int 1
    %20109 = torch.aten.size.int %19374, %int1_17770 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_17771 = torch.constant.int 4
    %int1_17772 = torch.constant.int 1
    %int4_17773 = torch.constant.int 4
    %int128_17774 = torch.constant.int 128
    %20110 = torch.prim.ListConstruct %int4_17771, %20109, %int1_17772, %int4_17773, %int128_17774 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17775 = torch.constant.bool false
    %20111 = torch.aten.expand %20101, %20110, %false_17775 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17776 = torch.constant.int 4
    %int1_17777 = torch.constant.int 1
    %int4_17778 = torch.constant.int 4
    %int128_17779 = torch.constant.int 128
    %20112 = torch.prim.ListConstruct %int4_17776, %20109, %int1_17777, %int4_17778, %int128_17779 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17780 = torch.constant.bool false
    %20113 = torch.aten.expand %20102, %20112, %false_17780 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17781 = torch.constant.int 4
    %int1_17782 = torch.constant.int 1
    %int4_17783 = torch.constant.int 4
    %int128_17784 = torch.constant.int 128
    %20114 = torch.prim.ListConstruct %int4_17781, %20109, %int1_17782, %int4_17783, %int128_17784 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17785 = torch.constant.bool false
    %20115 = torch.aten.expand %20103, %20114, %false_17785 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17786 = torch.constant.int 4
    %int1_17787 = torch.constant.int 1
    %int4_17788 = torch.constant.int 4
    %int128_17789 = torch.constant.int 128
    %20116 = torch.prim.ListConstruct %int4_17786, %20109, %int1_17787, %int4_17788, %int128_17789 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17790 = torch.constant.bool false
    %20117 = torch.aten.expand %20104, %20116, %false_17790 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17791 = torch.constant.int 4
    %int1_17792 = torch.constant.int 1
    %int4_17793 = torch.constant.int 4
    %int128_17794 = torch.constant.int 128
    %20118 = torch.prim.ListConstruct %int4_17791, %20109, %int1_17792, %int4_17793, %int128_17794 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17795 = torch.constant.bool false
    %20119 = torch.aten.expand %20105, %20118, %false_17795 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17796 = torch.constant.int 4
    %int1_17797 = torch.constant.int 1
    %int4_17798 = torch.constant.int 4
    %int128_17799 = torch.constant.int 128
    %20120 = torch.prim.ListConstruct %int4_17796, %20109, %int1_17797, %int4_17798, %int128_17799 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17800 = torch.constant.bool false
    %20121 = torch.aten.expand %20106, %20120, %false_17800 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17801 = torch.constant.int 4
    %int1_17802 = torch.constant.int 1
    %int4_17803 = torch.constant.int 4
    %int128_17804 = torch.constant.int 128
    %20122 = torch.prim.ListConstruct %int4_17801, %20109, %int1_17802, %int4_17803, %int128_17804 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17805 = torch.constant.bool false
    %20123 = torch.aten.expand %20107, %20122, %false_17805 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17806 = torch.constant.int 4
    %int1_17807 = torch.constant.int 1
    %int4_17808 = torch.constant.int 4
    %int128_17809 = torch.constant.int 128
    %20124 = torch.prim.ListConstruct %int4_17806, %20109, %int1_17807, %int4_17808, %int128_17809 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_17810 = torch.constant.bool false
    %20125 = torch.aten.expand %20108, %20124, %false_17810 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %20125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_17811 = torch.constant.int 4
    %int4_17812 = torch.constant.int 4
    %int128_17813 = torch.constant.int 128
    %20126 = torch.prim.ListConstruct %int4_17811, %20109, %int4_17812, %int128_17813 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20127 = torch.aten.view %20111, %20126 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17814 = torch.constant.int 4
    %int4_17815 = torch.constant.int 4
    %int128_17816 = torch.constant.int 128
    %20128 = torch.prim.ListConstruct %int4_17814, %20109, %int4_17815, %int128_17816 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20129 = torch.aten.view %20113, %20128 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17817 = torch.constant.int 4
    %int4_17818 = torch.constant.int 4
    %int128_17819 = torch.constant.int 128
    %20130 = torch.prim.ListConstruct %int4_17817, %20109, %int4_17818, %int128_17819 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20131 = torch.aten.view %20115, %20130 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17820 = torch.constant.int 4
    %int4_17821 = torch.constant.int 4
    %int128_17822 = torch.constant.int 128
    %20132 = torch.prim.ListConstruct %int4_17820, %20109, %int4_17821, %int128_17822 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20133 = torch.aten.view %20117, %20132 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17823 = torch.constant.int 4
    %int4_17824 = torch.constant.int 4
    %int128_17825 = torch.constant.int 128
    %20134 = torch.prim.ListConstruct %int4_17823, %20109, %int4_17824, %int128_17825 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20135 = torch.aten.view %20119, %20134 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17826 = torch.constant.int 4
    %int4_17827 = torch.constant.int 4
    %int128_17828 = torch.constant.int 128
    %20136 = torch.prim.ListConstruct %int4_17826, %20109, %int4_17827, %int128_17828 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20137 = torch.aten.view %20121, %20136 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17829 = torch.constant.int 4
    %int4_17830 = torch.constant.int 4
    %int128_17831 = torch.constant.int 128
    %20138 = torch.prim.ListConstruct %int4_17829, %20109, %int4_17830, %int128_17831 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20139 = torch.aten.view %20123, %20138 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17832 = torch.constant.int 4
    %int4_17833 = torch.constant.int 4
    %int128_17834 = torch.constant.int 128
    %20140 = torch.prim.ListConstruct %int4_17832, %20109, %int4_17833, %int128_17834 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20141 = torch.aten.view %20125, %20140 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17835 = torch.constant.int 1
    %int2_17836 = torch.constant.int 2
    %20142 = torch.aten.transpose.int %19517, %int1_17835, %int2_17836 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20142, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17837 = torch.constant.int 1
    %int2_17838 = torch.constant.int 2
    %20143 = torch.aten.transpose.int %19532, %int1_17837, %int2_17838 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20143, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17839 = torch.constant.int 1
    %int2_17840 = torch.constant.int 2
    %20144 = torch.aten.transpose.int %19547, %int1_17839, %int2_17840 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20144, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17841 = torch.constant.int 1
    %int2_17842 = torch.constant.int 2
    %20145 = torch.aten.transpose.int %19562, %int1_17841, %int2_17842 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20145, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17843 = torch.constant.int 1
    %int2_17844 = torch.constant.int 2
    %20146 = torch.aten.transpose.int %19577, %int1_17843, %int2_17844 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20146, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17845 = torch.constant.int 1
    %int2_17846 = torch.constant.int 2
    %20147 = torch.aten.transpose.int %19592, %int1_17845, %int2_17846 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20147, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17847 = torch.constant.int 1
    %int2_17848 = torch.constant.int 2
    %20148 = torch.aten.transpose.int %19607, %int1_17847, %int2_17848 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20148, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17849 = torch.constant.int 1
    %int2_17850 = torch.constant.int 2
    %20149 = torch.aten.transpose.int %19622, %int1_17849, %int2_17850 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20149, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17851 = torch.constant.int 1
    %int2_17852 = torch.constant.int 2
    %20150 = torch.aten.transpose.int %20086, %int1_17851, %int2_17852 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20150, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17853 = torch.constant.int 1
    %int2_17854 = torch.constant.int 2
    %20151 = torch.aten.transpose.int %20088, %int1_17853, %int2_17854 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20151, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17855 = torch.constant.int 1
    %int2_17856 = torch.constant.int 2
    %20152 = torch.aten.transpose.int %20090, %int1_17855, %int2_17856 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20152, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17857 = torch.constant.int 1
    %int2_17858 = torch.constant.int 2
    %20153 = torch.aten.transpose.int %20092, %int1_17857, %int2_17858 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20153, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17859 = torch.constant.int 1
    %int2_17860 = torch.constant.int 2
    %20154 = torch.aten.transpose.int %20094, %int1_17859, %int2_17860 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20154, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17861 = torch.constant.int 1
    %int2_17862 = torch.constant.int 2
    %20155 = torch.aten.transpose.int %20096, %int1_17861, %int2_17862 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20155, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17863 = torch.constant.int 1
    %int2_17864 = torch.constant.int 2
    %20156 = torch.aten.transpose.int %20098, %int1_17863, %int2_17864 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20156, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17865 = torch.constant.int 1
    %int2_17866 = torch.constant.int 2
    %20157 = torch.aten.transpose.int %20100, %int1_17865, %int2_17866 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20157, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17867 = torch.constant.int 1
    %int2_17868 = torch.constant.int 2
    %20158 = torch.aten.transpose.int %20127, %int1_17867, %int2_17868 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20158, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17869 = torch.constant.int 1
    %int2_17870 = torch.constant.int 2
    %20159 = torch.aten.transpose.int %20129, %int1_17869, %int2_17870 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20159, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17871 = torch.constant.int 1
    %int2_17872 = torch.constant.int 2
    %20160 = torch.aten.transpose.int %20131, %int1_17871, %int2_17872 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20160, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17873 = torch.constant.int 1
    %int2_17874 = torch.constant.int 2
    %20161 = torch.aten.transpose.int %20133, %int1_17873, %int2_17874 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20161, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17875 = torch.constant.int 1
    %int2_17876 = torch.constant.int 2
    %20162 = torch.aten.transpose.int %20135, %int1_17875, %int2_17876 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20162, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17877 = torch.constant.int 1
    %int2_17878 = torch.constant.int 2
    %20163 = torch.aten.transpose.int %20137, %int1_17877, %int2_17878 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20163, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17879 = torch.constant.int 1
    %int2_17880 = torch.constant.int 2
    %20164 = torch.aten.transpose.int %20139, %int1_17879, %int2_17880 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20164, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17881 = torch.constant.int 1
    %int2_17882 = torch.constant.int 2
    %20165 = torch.aten.transpose.int %20141, %int1_17881, %int2_17882 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %20165, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17883 = torch.constant.float 0.000000e+00
    %true_17884 = torch.constant.bool true
    %none_17885 = torch.constant.none
    %none_17886 = torch.constant.none
    %20166:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20142, %20150, %20158, %float0.000000e00_17883, %true_17884, %none_17885, %none_17886) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20166#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17887 = torch.constant.float 0.000000e+00
    %true_17888 = torch.constant.bool true
    %none_17889 = torch.constant.none
    %none_17890 = torch.constant.none
    %20167:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20143, %20151, %20159, %float0.000000e00_17887, %true_17888, %none_17889, %none_17890) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20167#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17891 = torch.constant.float 0.000000e+00
    %true_17892 = torch.constant.bool true
    %none_17893 = torch.constant.none
    %none_17894 = torch.constant.none
    %20168:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20144, %20152, %20160, %float0.000000e00_17891, %true_17892, %none_17893, %none_17894) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20168#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17895 = torch.constant.float 0.000000e+00
    %true_17896 = torch.constant.bool true
    %none_17897 = torch.constant.none
    %none_17898 = torch.constant.none
    %20169:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20145, %20153, %20161, %float0.000000e00_17895, %true_17896, %none_17897, %none_17898) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20169#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17899 = torch.constant.float 0.000000e+00
    %true_17900 = torch.constant.bool true
    %none_17901 = torch.constant.none
    %none_17902 = torch.constant.none
    %20170:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20146, %20154, %20162, %float0.000000e00_17899, %true_17900, %none_17901, %none_17902) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20170#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17903 = torch.constant.float 0.000000e+00
    %true_17904 = torch.constant.bool true
    %none_17905 = torch.constant.none
    %none_17906 = torch.constant.none
    %20171:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20147, %20155, %20163, %float0.000000e00_17903, %true_17904, %none_17905, %none_17906) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20171#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17907 = torch.constant.float 0.000000e+00
    %true_17908 = torch.constant.bool true
    %none_17909 = torch.constant.none
    %none_17910 = torch.constant.none
    %20172:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20148, %20156, %20164, %float0.000000e00_17907, %true_17908, %none_17909, %none_17910) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20172#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_17911 = torch.constant.float 0.000000e+00
    %true_17912 = torch.constant.bool true
    %none_17913 = torch.constant.none
    %none_17914 = torch.constant.none
    %20173:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%20149, %20157, %20165, %float0.000000e00_17911, %true_17912, %none_17913, %none_17914) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %20173#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_17915 = torch.constant.int 1
    %int2_17916 = torch.constant.int 2
    %20174 = torch.aten.transpose.int %20166#0, %int1_17915, %int2_17916 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17917 = torch.constant.int 1
    %int2_17918 = torch.constant.int 2
    %20175 = torch.aten.transpose.int %20167#0, %int1_17917, %int2_17918 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17919 = torch.constant.int 1
    %int2_17920 = torch.constant.int 2
    %20176 = torch.aten.transpose.int %20168#0, %int1_17919, %int2_17920 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17921 = torch.constant.int 1
    %int2_17922 = torch.constant.int 2
    %20177 = torch.aten.transpose.int %20169#0, %int1_17921, %int2_17922 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17923 = torch.constant.int 1
    %int2_17924 = torch.constant.int 2
    %20178 = torch.aten.transpose.int %20170#0, %int1_17923, %int2_17924 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17925 = torch.constant.int 1
    %int2_17926 = torch.constant.int 2
    %20179 = torch.aten.transpose.int %20171#0, %int1_17925, %int2_17926 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17927 = torch.constant.int 1
    %int2_17928 = torch.constant.int 2
    %20180 = torch.aten.transpose.int %20172#0, %int1_17927, %int2_17928 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_17929 = torch.constant.int 1
    %int2_17930 = torch.constant.int 2
    %20181 = torch.aten.transpose.int %20173#0, %int1_17929, %int2_17930 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %20181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_17931 = torch.constant.int 4
    %int512_17932 = torch.constant.int 512
    %20182 = torch.prim.ListConstruct %int4_17931, %19503, %int512_17932 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20183 = torch.aten.view %20174, %20182 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17933 = torch.constant.int 4
    %int512_17934 = torch.constant.int 512
    %20184 = torch.prim.ListConstruct %int4_17933, %19518, %int512_17934 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20185 = torch.aten.view %20175, %20184 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17935 = torch.constant.int 4
    %int512_17936 = torch.constant.int 512
    %20186 = torch.prim.ListConstruct %int4_17935, %19533, %int512_17936 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20187 = torch.aten.view %20176, %20186 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17937 = torch.constant.int 4
    %int512_17938 = torch.constant.int 512
    %20188 = torch.prim.ListConstruct %int4_17937, %19548, %int512_17938 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20189 = torch.aten.view %20177, %20188 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17939 = torch.constant.int 4
    %int512_17940 = torch.constant.int 512
    %20190 = torch.prim.ListConstruct %int4_17939, %19563, %int512_17940 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20191 = torch.aten.view %20178, %20190 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17941 = torch.constant.int 4
    %int512_17942 = torch.constant.int 512
    %20192 = torch.prim.ListConstruct %int4_17941, %19578, %int512_17942 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20193 = torch.aten.view %20179, %20192 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17943 = torch.constant.int 4
    %int512_17944 = torch.constant.int 512
    %20194 = torch.prim.ListConstruct %int4_17943, %19593, %int512_17944 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20195 = torch.aten.view %20180, %20194 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_17945 = torch.constant.int 4
    %int512_17946 = torch.constant.int 512
    %20196 = torch.prim.ListConstruct %int4_17945, %19608, %int512_17946 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20197 = torch.aten.view %20181, %20196 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %20197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_17947 = torch.constant.int 1
    %int0_17948 = torch.constant.int 0
    %20198 = torch.prim.ListConstruct %int1_17947, %int0_17948 : (!torch.int, !torch.int) -> !torch.list<int>
    %20199 = torch.aten.permute %688, %20198 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17949 = torch.constant.int 1
    %int0_17950 = torch.constant.int 0
    %20200 = torch.prim.ListConstruct %int1_17949, %int0_17950 : (!torch.int, !torch.int) -> !torch.list<int>
    %20201 = torch.aten.permute %689, %20200 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17951 = torch.constant.int 1
    %int0_17952 = torch.constant.int 0
    %20202 = torch.prim.ListConstruct %int1_17951, %int0_17952 : (!torch.int, !torch.int) -> !torch.list<int>
    %20203 = torch.aten.permute %690, %20202 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17953 = torch.constant.int 1
    %int0_17954 = torch.constant.int 0
    %20204 = torch.prim.ListConstruct %int1_17953, %int0_17954 : (!torch.int, !torch.int) -> !torch.list<int>
    %20205 = torch.aten.permute %691, %20204 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17955 = torch.constant.int 1
    %int0_17956 = torch.constant.int 0
    %20206 = torch.prim.ListConstruct %int1_17955, %int0_17956 : (!torch.int, !torch.int) -> !torch.list<int>
    %20207 = torch.aten.permute %692, %20206 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17957 = torch.constant.int 1
    %int0_17958 = torch.constant.int 0
    %20208 = torch.prim.ListConstruct %int1_17957, %int0_17958 : (!torch.int, !torch.int) -> !torch.list<int>
    %20209 = torch.aten.permute %693, %20208 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17959 = torch.constant.int 1
    %int0_17960 = torch.constant.int 0
    %20210 = torch.prim.ListConstruct %int1_17959, %int0_17960 : (!torch.int, !torch.int) -> !torch.list<int>
    %20211 = torch.aten.permute %694, %20210 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_17961 = torch.constant.int 1
    %int0_17962 = torch.constant.int 0
    %20212 = torch.prim.ListConstruct %int1_17961, %int0_17962 : (!torch.int, !torch.int) -> !torch.list<int>
    %20213 = torch.aten.permute %695, %20212 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_17963 = torch.constant.int 4
    %20214 = torch.aten.mul.int %int4_17963, %19503 : !torch.int, !torch.int -> !torch.int
    %int512_17964 = torch.constant.int 512
    %20215 = torch.prim.ListConstruct %20214, %int512_17964 : (!torch.int, !torch.int) -> !torch.list<int>
    %20216 = torch.aten.view %20183, %20215 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20216, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20217 = torch.aten.mm %20216, %20199 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20217, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17965 = torch.constant.int 4
    %int4096_17966 = torch.constant.int 4096
    %20218 = torch.prim.ListConstruct %int4_17965, %19503, %int4096_17966 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20219 = torch.aten.view %20217, %20218 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17967 = torch.constant.int 4
    %20220 = torch.aten.mul.int %int4_17967, %19518 : !torch.int, !torch.int -> !torch.int
    %int512_17968 = torch.constant.int 512
    %20221 = torch.prim.ListConstruct %20220, %int512_17968 : (!torch.int, !torch.int) -> !torch.list<int>
    %20222 = torch.aten.view %20185, %20221 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20222, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20223 = torch.aten.mm %20222, %20201 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20223, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17969 = torch.constant.int 4
    %int4096_17970 = torch.constant.int 4096
    %20224 = torch.prim.ListConstruct %int4_17969, %19518, %int4096_17970 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20225 = torch.aten.view %20223, %20224 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17971 = torch.constant.int 4
    %20226 = torch.aten.mul.int %int4_17971, %19533 : !torch.int, !torch.int -> !torch.int
    %int512_17972 = torch.constant.int 512
    %20227 = torch.prim.ListConstruct %20226, %int512_17972 : (!torch.int, !torch.int) -> !torch.list<int>
    %20228 = torch.aten.view %20187, %20227 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20228, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20229 = torch.aten.mm %20228, %20203 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20229, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17973 = torch.constant.int 4
    %int4096_17974 = torch.constant.int 4096
    %20230 = torch.prim.ListConstruct %int4_17973, %19533, %int4096_17974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20231 = torch.aten.view %20229, %20230 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17975 = torch.constant.int 4
    %20232 = torch.aten.mul.int %int4_17975, %19548 : !torch.int, !torch.int -> !torch.int
    %int512_17976 = torch.constant.int 512
    %20233 = torch.prim.ListConstruct %20232, %int512_17976 : (!torch.int, !torch.int) -> !torch.list<int>
    %20234 = torch.aten.view %20189, %20233 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20234, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20235 = torch.aten.mm %20234, %20205 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20235, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17977 = torch.constant.int 4
    %int4096_17978 = torch.constant.int 4096
    %20236 = torch.prim.ListConstruct %int4_17977, %19548, %int4096_17978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20237 = torch.aten.view %20235, %20236 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17979 = torch.constant.int 4
    %20238 = torch.aten.mul.int %int4_17979, %19563 : !torch.int, !torch.int -> !torch.int
    %int512_17980 = torch.constant.int 512
    %20239 = torch.prim.ListConstruct %20238, %int512_17980 : (!torch.int, !torch.int) -> !torch.list<int>
    %20240 = torch.aten.view %20191, %20239 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20240, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20241 = torch.aten.mm %20240, %20207 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20241, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17981 = torch.constant.int 4
    %int4096_17982 = torch.constant.int 4096
    %20242 = torch.prim.ListConstruct %int4_17981, %19563, %int4096_17982 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20243 = torch.aten.view %20241, %20242 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17983 = torch.constant.int 4
    %20244 = torch.aten.mul.int %int4_17983, %19578 : !torch.int, !torch.int -> !torch.int
    %int512_17984 = torch.constant.int 512
    %20245 = torch.prim.ListConstruct %20244, %int512_17984 : (!torch.int, !torch.int) -> !torch.list<int>
    %20246 = torch.aten.view %20193, %20245 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20246, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20247 = torch.aten.mm %20246, %20209 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20247, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17985 = torch.constant.int 4
    %int4096_17986 = torch.constant.int 4096
    %20248 = torch.prim.ListConstruct %int4_17985, %19578, %int4096_17986 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20249 = torch.aten.view %20247, %20248 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17987 = torch.constant.int 4
    %20250 = torch.aten.mul.int %int4_17987, %19593 : !torch.int, !torch.int -> !torch.int
    %int512_17988 = torch.constant.int 512
    %20251 = torch.prim.ListConstruct %20250, %int512_17988 : (!torch.int, !torch.int) -> !torch.list<int>
    %20252 = torch.aten.view %20195, %20251 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20252, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20253 = torch.aten.mm %20252, %20211 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20253, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17989 = torch.constant.int 4
    %int4096_17990 = torch.constant.int 4096
    %20254 = torch.prim.ListConstruct %int4_17989, %19593, %int4096_17990 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20255 = torch.aten.view %20253, %20254 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_17991 = torch.constant.int 4
    %20256 = torch.aten.mul.int %int4_17991, %19608 : !torch.int, !torch.int -> !torch.int
    %int512_17992 = torch.constant.int 512
    %20257 = torch.prim.ListConstruct %20256, %int512_17992 : (!torch.int, !torch.int) -> !torch.list<int>
    %20258 = torch.aten.view %20197, %20257 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %20258, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %20259 = torch.aten.mm %20258, %20213 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20259, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_17993 = torch.constant.int 4
    %int4096_17994 = torch.constant.int 4096
    %20260 = torch.prim.ListConstruct %int4_17993, %19608, %int4096_17994 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20261 = torch.aten.view %20259, %20260 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20262 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_17995 = arith.constant 1 : index
    %dim_17996 = tensor.dim %20262, %c1_17995 : tensor<4x?x4096xf16>
    %20263 = flow.tensor.transfer %20262 : tensor<4x?x4096xf16>{%dim_17996} to #hal.device.promise<@__device_0>
    %20264 = torch_c.from_builtin_tensor %20263 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20265 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_17997 = arith.constant 1 : index
    %dim_17998 = tensor.dim %20265, %c1_17997 : tensor<4x?x4096xf16>
    %20266 = flow.tensor.transfer %20265 : tensor<4x?x4096xf16>{%dim_17998} to #hal.device.promise<@__device_0>
    %20267 = torch_c.from_builtin_tensor %20266 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20268 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_17999 = arith.constant 1 : index
    %dim_18000 = tensor.dim %20268, %c1_17999 : tensor<4x?x4096xf16>
    %20269 = flow.tensor.transfer %20268 : tensor<4x?x4096xf16>{%dim_18000} to #hal.device.promise<@__device_0>
    %20270 = torch_c.from_builtin_tensor %20269 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20271 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18001 = arith.constant 1 : index
    %dim_18002 = tensor.dim %20271, %c1_18001 : tensor<4x?x4096xf16>
    %20272 = flow.tensor.transfer %20271 : tensor<4x?x4096xf16>{%dim_18002} to #hal.device.promise<@__device_0>
    %20273 = torch_c.from_builtin_tensor %20272 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20274 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18003 = arith.constant 1 : index
    %dim_18004 = tensor.dim %20274, %c1_18003 : tensor<4x?x4096xf16>
    %20275 = flow.tensor.transfer %20274 : tensor<4x?x4096xf16>{%dim_18004} to #hal.device.promise<@__device_0>
    %20276 = torch_c.from_builtin_tensor %20275 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20277 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18005 = arith.constant 1 : index
    %dim_18006 = tensor.dim %20277, %c1_18005 : tensor<4x?x4096xf16>
    %20278 = flow.tensor.transfer %20277 : tensor<4x?x4096xf16>{%dim_18006} to #hal.device.promise<@__device_0>
    %20279 = torch_c.from_builtin_tensor %20278 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20280 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18007 = arith.constant 1 : index
    %dim_18008 = tensor.dim %20280, %c1_18007 : tensor<4x?x4096xf16>
    %20281 = flow.tensor.transfer %20280 : tensor<4x?x4096xf16>{%dim_18008} to #hal.device.promise<@__device_0>
    %20282 = torch_c.from_builtin_tensor %20281 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18009 = torch.constant.int 1
    %20283 = torch.aten.add.Tensor %20219, %20264, %int1_18009 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18010 = torch.constant.int 1
    %20284 = torch.aten.add.Tensor %20283, %20267, %int1_18010 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18011 = torch.constant.int 1
    %20285 = torch.aten.add.Tensor %20284, %20270, %int1_18011 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18012 = torch.constant.int 1
    %20286 = torch.aten.add.Tensor %20285, %20273, %int1_18012 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18013 = torch.constant.int 1
    %20287 = torch.aten.add.Tensor %20286, %20276, %int1_18013 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18014 = torch.constant.int 1
    %20288 = torch.aten.add.Tensor %20287, %20279, %int1_18014 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18015 = torch.constant.int 1
    %20289 = torch.aten.add.Tensor %20288, %20282, %int1_18015 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20290 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18016 = arith.constant 1 : index
    %dim_18017 = tensor.dim %20290, %c1_18016 : tensor<4x?x4096xf16>
    %20291 = flow.tensor.transfer %20290 : tensor<4x?x4096xf16>{%dim_18017} to #hal.device.promise<@__device_1>
    %20292 = torch_c.from_builtin_tensor %20291 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20293 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18018 = arith.constant 1 : index
    %dim_18019 = tensor.dim %20293, %c1_18018 : tensor<4x?x4096xf16>
    %20294 = flow.tensor.transfer %20293 : tensor<4x?x4096xf16>{%dim_18019} to #hal.device.promise<@__device_1>
    %20295 = torch_c.from_builtin_tensor %20294 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20296 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18020 = arith.constant 1 : index
    %dim_18021 = tensor.dim %20296, %c1_18020 : tensor<4x?x4096xf16>
    %20297 = flow.tensor.transfer %20296 : tensor<4x?x4096xf16>{%dim_18021} to #hal.device.promise<@__device_1>
    %20298 = torch_c.from_builtin_tensor %20297 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20299 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18022 = arith.constant 1 : index
    %dim_18023 = tensor.dim %20299, %c1_18022 : tensor<4x?x4096xf16>
    %20300 = flow.tensor.transfer %20299 : tensor<4x?x4096xf16>{%dim_18023} to #hal.device.promise<@__device_1>
    %20301 = torch_c.from_builtin_tensor %20300 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20302 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18024 = arith.constant 1 : index
    %dim_18025 = tensor.dim %20302, %c1_18024 : tensor<4x?x4096xf16>
    %20303 = flow.tensor.transfer %20302 : tensor<4x?x4096xf16>{%dim_18025} to #hal.device.promise<@__device_1>
    %20304 = torch_c.from_builtin_tensor %20303 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20305 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18026 = arith.constant 1 : index
    %dim_18027 = tensor.dim %20305, %c1_18026 : tensor<4x?x4096xf16>
    %20306 = flow.tensor.transfer %20305 : tensor<4x?x4096xf16>{%dim_18027} to #hal.device.promise<@__device_1>
    %20307 = torch_c.from_builtin_tensor %20306 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20308 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18028 = arith.constant 1 : index
    %dim_18029 = tensor.dim %20308, %c1_18028 : tensor<4x?x4096xf16>
    %20309 = flow.tensor.transfer %20308 : tensor<4x?x4096xf16>{%dim_18029} to #hal.device.promise<@__device_1>
    %20310 = torch_c.from_builtin_tensor %20309 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18030 = torch.constant.int 1
    %20311 = torch.aten.add.Tensor %20292, %20225, %int1_18030 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18031 = torch.constant.int 1
    %20312 = torch.aten.add.Tensor %20311, %20295, %int1_18031 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18032 = torch.constant.int 1
    %20313 = torch.aten.add.Tensor %20312, %20298, %int1_18032 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18033 = torch.constant.int 1
    %20314 = torch.aten.add.Tensor %20313, %20301, %int1_18033 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18034 = torch.constant.int 1
    %20315 = torch.aten.add.Tensor %20314, %20304, %int1_18034 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18035 = torch.constant.int 1
    %20316 = torch.aten.add.Tensor %20315, %20307, %int1_18035 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18036 = torch.constant.int 1
    %20317 = torch.aten.add.Tensor %20316, %20310, %int1_18036 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20318 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18037 = arith.constant 1 : index
    %dim_18038 = tensor.dim %20318, %c1_18037 : tensor<4x?x4096xf16>
    %20319 = flow.tensor.transfer %20318 : tensor<4x?x4096xf16>{%dim_18038} to #hal.device.promise<@__device_2>
    %20320 = torch_c.from_builtin_tensor %20319 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20321 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18039 = arith.constant 1 : index
    %dim_18040 = tensor.dim %20321, %c1_18039 : tensor<4x?x4096xf16>
    %20322 = flow.tensor.transfer %20321 : tensor<4x?x4096xf16>{%dim_18040} to #hal.device.promise<@__device_2>
    %20323 = torch_c.from_builtin_tensor %20322 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20324 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18041 = arith.constant 1 : index
    %dim_18042 = tensor.dim %20324, %c1_18041 : tensor<4x?x4096xf16>
    %20325 = flow.tensor.transfer %20324 : tensor<4x?x4096xf16>{%dim_18042} to #hal.device.promise<@__device_2>
    %20326 = torch_c.from_builtin_tensor %20325 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20327 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18043 = arith.constant 1 : index
    %dim_18044 = tensor.dim %20327, %c1_18043 : tensor<4x?x4096xf16>
    %20328 = flow.tensor.transfer %20327 : tensor<4x?x4096xf16>{%dim_18044} to #hal.device.promise<@__device_2>
    %20329 = torch_c.from_builtin_tensor %20328 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20330 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18045 = arith.constant 1 : index
    %dim_18046 = tensor.dim %20330, %c1_18045 : tensor<4x?x4096xf16>
    %20331 = flow.tensor.transfer %20330 : tensor<4x?x4096xf16>{%dim_18046} to #hal.device.promise<@__device_2>
    %20332 = torch_c.from_builtin_tensor %20331 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20333 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18047 = arith.constant 1 : index
    %dim_18048 = tensor.dim %20333, %c1_18047 : tensor<4x?x4096xf16>
    %20334 = flow.tensor.transfer %20333 : tensor<4x?x4096xf16>{%dim_18048} to #hal.device.promise<@__device_2>
    %20335 = torch_c.from_builtin_tensor %20334 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20336 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18049 = arith.constant 1 : index
    %dim_18050 = tensor.dim %20336, %c1_18049 : tensor<4x?x4096xf16>
    %20337 = flow.tensor.transfer %20336 : tensor<4x?x4096xf16>{%dim_18050} to #hal.device.promise<@__device_2>
    %20338 = torch_c.from_builtin_tensor %20337 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18051 = torch.constant.int 1
    %20339 = torch.aten.add.Tensor %20320, %20323, %int1_18051 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18052 = torch.constant.int 1
    %20340 = torch.aten.add.Tensor %20339, %20231, %int1_18052 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18053 = torch.constant.int 1
    %20341 = torch.aten.add.Tensor %20340, %20326, %int1_18053 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18054 = torch.constant.int 1
    %20342 = torch.aten.add.Tensor %20341, %20329, %int1_18054 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18055 = torch.constant.int 1
    %20343 = torch.aten.add.Tensor %20342, %20332, %int1_18055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18056 = torch.constant.int 1
    %20344 = torch.aten.add.Tensor %20343, %20335, %int1_18056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18057 = torch.constant.int 1
    %20345 = torch.aten.add.Tensor %20344, %20338, %int1_18057 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20346 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18058 = arith.constant 1 : index
    %dim_18059 = tensor.dim %20346, %c1_18058 : tensor<4x?x4096xf16>
    %20347 = flow.tensor.transfer %20346 : tensor<4x?x4096xf16>{%dim_18059} to #hal.device.promise<@__device_3>
    %20348 = torch_c.from_builtin_tensor %20347 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20349 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18060 = arith.constant 1 : index
    %dim_18061 = tensor.dim %20349, %c1_18060 : tensor<4x?x4096xf16>
    %20350 = flow.tensor.transfer %20349 : tensor<4x?x4096xf16>{%dim_18061} to #hal.device.promise<@__device_3>
    %20351 = torch_c.from_builtin_tensor %20350 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20352 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18062 = arith.constant 1 : index
    %dim_18063 = tensor.dim %20352, %c1_18062 : tensor<4x?x4096xf16>
    %20353 = flow.tensor.transfer %20352 : tensor<4x?x4096xf16>{%dim_18063} to #hal.device.promise<@__device_3>
    %20354 = torch_c.from_builtin_tensor %20353 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20355 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18064 = arith.constant 1 : index
    %dim_18065 = tensor.dim %20355, %c1_18064 : tensor<4x?x4096xf16>
    %20356 = flow.tensor.transfer %20355 : tensor<4x?x4096xf16>{%dim_18065} to #hal.device.promise<@__device_3>
    %20357 = torch_c.from_builtin_tensor %20356 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20358 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18066 = arith.constant 1 : index
    %dim_18067 = tensor.dim %20358, %c1_18066 : tensor<4x?x4096xf16>
    %20359 = flow.tensor.transfer %20358 : tensor<4x?x4096xf16>{%dim_18067} to #hal.device.promise<@__device_3>
    %20360 = torch_c.from_builtin_tensor %20359 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20361 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18068 = arith.constant 1 : index
    %dim_18069 = tensor.dim %20361, %c1_18068 : tensor<4x?x4096xf16>
    %20362 = flow.tensor.transfer %20361 : tensor<4x?x4096xf16>{%dim_18069} to #hal.device.promise<@__device_3>
    %20363 = torch_c.from_builtin_tensor %20362 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20364 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18070 = arith.constant 1 : index
    %dim_18071 = tensor.dim %20364, %c1_18070 : tensor<4x?x4096xf16>
    %20365 = flow.tensor.transfer %20364 : tensor<4x?x4096xf16>{%dim_18071} to #hal.device.promise<@__device_3>
    %20366 = torch_c.from_builtin_tensor %20365 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18072 = torch.constant.int 1
    %20367 = torch.aten.add.Tensor %20348, %20351, %int1_18072 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18073 = torch.constant.int 1
    %20368 = torch.aten.add.Tensor %20367, %20354, %int1_18073 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18074 = torch.constant.int 1
    %20369 = torch.aten.add.Tensor %20368, %20237, %int1_18074 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18075 = torch.constant.int 1
    %20370 = torch.aten.add.Tensor %20369, %20357, %int1_18075 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18076 = torch.constant.int 1
    %20371 = torch.aten.add.Tensor %20370, %20360, %int1_18076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18077 = torch.constant.int 1
    %20372 = torch.aten.add.Tensor %20371, %20363, %int1_18077 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18078 = torch.constant.int 1
    %20373 = torch.aten.add.Tensor %20372, %20366, %int1_18078 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20374 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18079 = arith.constant 1 : index
    %dim_18080 = tensor.dim %20374, %c1_18079 : tensor<4x?x4096xf16>
    %20375 = flow.tensor.transfer %20374 : tensor<4x?x4096xf16>{%dim_18080} to #hal.device.promise<@__device_4>
    %20376 = torch_c.from_builtin_tensor %20375 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20377 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18081 = arith.constant 1 : index
    %dim_18082 = tensor.dim %20377, %c1_18081 : tensor<4x?x4096xf16>
    %20378 = flow.tensor.transfer %20377 : tensor<4x?x4096xf16>{%dim_18082} to #hal.device.promise<@__device_4>
    %20379 = torch_c.from_builtin_tensor %20378 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20380 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18083 = arith.constant 1 : index
    %dim_18084 = tensor.dim %20380, %c1_18083 : tensor<4x?x4096xf16>
    %20381 = flow.tensor.transfer %20380 : tensor<4x?x4096xf16>{%dim_18084} to #hal.device.promise<@__device_4>
    %20382 = torch_c.from_builtin_tensor %20381 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20383 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18085 = arith.constant 1 : index
    %dim_18086 = tensor.dim %20383, %c1_18085 : tensor<4x?x4096xf16>
    %20384 = flow.tensor.transfer %20383 : tensor<4x?x4096xf16>{%dim_18086} to #hal.device.promise<@__device_4>
    %20385 = torch_c.from_builtin_tensor %20384 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20386 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18087 = arith.constant 1 : index
    %dim_18088 = tensor.dim %20386, %c1_18087 : tensor<4x?x4096xf16>
    %20387 = flow.tensor.transfer %20386 : tensor<4x?x4096xf16>{%dim_18088} to #hal.device.promise<@__device_4>
    %20388 = torch_c.from_builtin_tensor %20387 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20389 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18089 = arith.constant 1 : index
    %dim_18090 = tensor.dim %20389, %c1_18089 : tensor<4x?x4096xf16>
    %20390 = flow.tensor.transfer %20389 : tensor<4x?x4096xf16>{%dim_18090} to #hal.device.promise<@__device_4>
    %20391 = torch_c.from_builtin_tensor %20390 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20392 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18091 = arith.constant 1 : index
    %dim_18092 = tensor.dim %20392, %c1_18091 : tensor<4x?x4096xf16>
    %20393 = flow.tensor.transfer %20392 : tensor<4x?x4096xf16>{%dim_18092} to #hal.device.promise<@__device_4>
    %20394 = torch_c.from_builtin_tensor %20393 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18093 = torch.constant.int 1
    %20395 = torch.aten.add.Tensor %20376, %20379, %int1_18093 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18094 = torch.constant.int 1
    %20396 = torch.aten.add.Tensor %20395, %20382, %int1_18094 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18095 = torch.constant.int 1
    %20397 = torch.aten.add.Tensor %20396, %20385, %int1_18095 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18096 = torch.constant.int 1
    %20398 = torch.aten.add.Tensor %20397, %20243, %int1_18096 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18097 = torch.constant.int 1
    %20399 = torch.aten.add.Tensor %20398, %20388, %int1_18097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18098 = torch.constant.int 1
    %20400 = torch.aten.add.Tensor %20399, %20391, %int1_18098 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18099 = torch.constant.int 1
    %20401 = torch.aten.add.Tensor %20400, %20394, %int1_18099 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20402 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18100 = arith.constant 1 : index
    %dim_18101 = tensor.dim %20402, %c1_18100 : tensor<4x?x4096xf16>
    %20403 = flow.tensor.transfer %20402 : tensor<4x?x4096xf16>{%dim_18101} to #hal.device.promise<@__device_5>
    %20404 = torch_c.from_builtin_tensor %20403 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20405 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18102 = arith.constant 1 : index
    %dim_18103 = tensor.dim %20405, %c1_18102 : tensor<4x?x4096xf16>
    %20406 = flow.tensor.transfer %20405 : tensor<4x?x4096xf16>{%dim_18103} to #hal.device.promise<@__device_5>
    %20407 = torch_c.from_builtin_tensor %20406 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20408 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18104 = arith.constant 1 : index
    %dim_18105 = tensor.dim %20408, %c1_18104 : tensor<4x?x4096xf16>
    %20409 = flow.tensor.transfer %20408 : tensor<4x?x4096xf16>{%dim_18105} to #hal.device.promise<@__device_5>
    %20410 = torch_c.from_builtin_tensor %20409 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20411 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18106 = arith.constant 1 : index
    %dim_18107 = tensor.dim %20411, %c1_18106 : tensor<4x?x4096xf16>
    %20412 = flow.tensor.transfer %20411 : tensor<4x?x4096xf16>{%dim_18107} to #hal.device.promise<@__device_5>
    %20413 = torch_c.from_builtin_tensor %20412 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20414 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18108 = arith.constant 1 : index
    %dim_18109 = tensor.dim %20414, %c1_18108 : tensor<4x?x4096xf16>
    %20415 = flow.tensor.transfer %20414 : tensor<4x?x4096xf16>{%dim_18109} to #hal.device.promise<@__device_5>
    %20416 = torch_c.from_builtin_tensor %20415 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20417 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18110 = arith.constant 1 : index
    %dim_18111 = tensor.dim %20417, %c1_18110 : tensor<4x?x4096xf16>
    %20418 = flow.tensor.transfer %20417 : tensor<4x?x4096xf16>{%dim_18111} to #hal.device.promise<@__device_5>
    %20419 = torch_c.from_builtin_tensor %20418 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20420 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18112 = arith.constant 1 : index
    %dim_18113 = tensor.dim %20420, %c1_18112 : tensor<4x?x4096xf16>
    %20421 = flow.tensor.transfer %20420 : tensor<4x?x4096xf16>{%dim_18113} to #hal.device.promise<@__device_5>
    %20422 = torch_c.from_builtin_tensor %20421 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18114 = torch.constant.int 1
    %20423 = torch.aten.add.Tensor %20404, %20407, %int1_18114 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18115 = torch.constant.int 1
    %20424 = torch.aten.add.Tensor %20423, %20410, %int1_18115 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18116 = torch.constant.int 1
    %20425 = torch.aten.add.Tensor %20424, %20413, %int1_18116 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18117 = torch.constant.int 1
    %20426 = torch.aten.add.Tensor %20425, %20416, %int1_18117 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18118 = torch.constant.int 1
    %20427 = torch.aten.add.Tensor %20426, %20249, %int1_18118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18119 = torch.constant.int 1
    %20428 = torch.aten.add.Tensor %20427, %20419, %int1_18119 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18120 = torch.constant.int 1
    %20429 = torch.aten.add.Tensor %20428, %20422, %int1_18120 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20430 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18121 = arith.constant 1 : index
    %dim_18122 = tensor.dim %20430, %c1_18121 : tensor<4x?x4096xf16>
    %20431 = flow.tensor.transfer %20430 : tensor<4x?x4096xf16>{%dim_18122} to #hal.device.promise<@__device_6>
    %20432 = torch_c.from_builtin_tensor %20431 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20433 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18123 = arith.constant 1 : index
    %dim_18124 = tensor.dim %20433, %c1_18123 : tensor<4x?x4096xf16>
    %20434 = flow.tensor.transfer %20433 : tensor<4x?x4096xf16>{%dim_18124} to #hal.device.promise<@__device_6>
    %20435 = torch_c.from_builtin_tensor %20434 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20436 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18125 = arith.constant 1 : index
    %dim_18126 = tensor.dim %20436, %c1_18125 : tensor<4x?x4096xf16>
    %20437 = flow.tensor.transfer %20436 : tensor<4x?x4096xf16>{%dim_18126} to #hal.device.promise<@__device_6>
    %20438 = torch_c.from_builtin_tensor %20437 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20439 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18127 = arith.constant 1 : index
    %dim_18128 = tensor.dim %20439, %c1_18127 : tensor<4x?x4096xf16>
    %20440 = flow.tensor.transfer %20439 : tensor<4x?x4096xf16>{%dim_18128} to #hal.device.promise<@__device_6>
    %20441 = torch_c.from_builtin_tensor %20440 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20442 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18129 = arith.constant 1 : index
    %dim_18130 = tensor.dim %20442, %c1_18129 : tensor<4x?x4096xf16>
    %20443 = flow.tensor.transfer %20442 : tensor<4x?x4096xf16>{%dim_18130} to #hal.device.promise<@__device_6>
    %20444 = torch_c.from_builtin_tensor %20443 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20445 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18131 = arith.constant 1 : index
    %dim_18132 = tensor.dim %20445, %c1_18131 : tensor<4x?x4096xf16>
    %20446 = flow.tensor.transfer %20445 : tensor<4x?x4096xf16>{%dim_18132} to #hal.device.promise<@__device_6>
    %20447 = torch_c.from_builtin_tensor %20446 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20448 = torch_c.to_builtin_tensor %20261 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18133 = arith.constant 1 : index
    %dim_18134 = tensor.dim %20448, %c1_18133 : tensor<4x?x4096xf16>
    %20449 = flow.tensor.transfer %20448 : tensor<4x?x4096xf16>{%dim_18134} to #hal.device.promise<@__device_6>
    %20450 = torch_c.from_builtin_tensor %20449 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18135 = torch.constant.int 1
    %20451 = torch.aten.add.Tensor %20432, %20435, %int1_18135 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18136 = torch.constant.int 1
    %20452 = torch.aten.add.Tensor %20451, %20438, %int1_18136 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18137 = torch.constant.int 1
    %20453 = torch.aten.add.Tensor %20452, %20441, %int1_18137 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18138 = torch.constant.int 1
    %20454 = torch.aten.add.Tensor %20453, %20444, %int1_18138 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18139 = torch.constant.int 1
    %20455 = torch.aten.add.Tensor %20454, %20447, %int1_18139 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18140 = torch.constant.int 1
    %20456 = torch.aten.add.Tensor %20455, %20255, %int1_18140 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18141 = torch.constant.int 1
    %20457 = torch.aten.add.Tensor %20456, %20450, %int1_18141 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20458 = torch_c.to_builtin_tensor %20219 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18142 = arith.constant 1 : index
    %dim_18143 = tensor.dim %20458, %c1_18142 : tensor<4x?x4096xf16>
    %20459 = flow.tensor.transfer %20458 : tensor<4x?x4096xf16>{%dim_18143} to #hal.device.promise<@__device_7>
    %20460 = torch_c.from_builtin_tensor %20459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20461 = torch_c.to_builtin_tensor %20225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18144 = arith.constant 1 : index
    %dim_18145 = tensor.dim %20461, %c1_18144 : tensor<4x?x4096xf16>
    %20462 = flow.tensor.transfer %20461 : tensor<4x?x4096xf16>{%dim_18145} to #hal.device.promise<@__device_7>
    %20463 = torch_c.from_builtin_tensor %20462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20464 = torch_c.to_builtin_tensor %20231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18146 = arith.constant 1 : index
    %dim_18147 = tensor.dim %20464, %c1_18146 : tensor<4x?x4096xf16>
    %20465 = flow.tensor.transfer %20464 : tensor<4x?x4096xf16>{%dim_18147} to #hal.device.promise<@__device_7>
    %20466 = torch_c.from_builtin_tensor %20465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20467 = torch_c.to_builtin_tensor %20237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18148 = arith.constant 1 : index
    %dim_18149 = tensor.dim %20467, %c1_18148 : tensor<4x?x4096xf16>
    %20468 = flow.tensor.transfer %20467 : tensor<4x?x4096xf16>{%dim_18149} to #hal.device.promise<@__device_7>
    %20469 = torch_c.from_builtin_tensor %20468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20470 = torch_c.to_builtin_tensor %20243 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18150 = arith.constant 1 : index
    %dim_18151 = tensor.dim %20470, %c1_18150 : tensor<4x?x4096xf16>
    %20471 = flow.tensor.transfer %20470 : tensor<4x?x4096xf16>{%dim_18151} to #hal.device.promise<@__device_7>
    %20472 = torch_c.from_builtin_tensor %20471 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20473 = torch_c.to_builtin_tensor %20249 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18152 = arith.constant 1 : index
    %dim_18153 = tensor.dim %20473, %c1_18152 : tensor<4x?x4096xf16>
    %20474 = flow.tensor.transfer %20473 : tensor<4x?x4096xf16>{%dim_18153} to #hal.device.promise<@__device_7>
    %20475 = torch_c.from_builtin_tensor %20474 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20476 = torch_c.to_builtin_tensor %20255 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18154 = arith.constant 1 : index
    %dim_18155 = tensor.dim %20476, %c1_18154 : tensor<4x?x4096xf16>
    %20477 = flow.tensor.transfer %20476 : tensor<4x?x4096xf16>{%dim_18155} to #hal.device.promise<@__device_7>
    %20478 = torch_c.from_builtin_tensor %20477 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18156 = torch.constant.int 1
    %20479 = torch.aten.add.Tensor %20460, %20463, %int1_18156 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18157 = torch.constant.int 1
    %20480 = torch.aten.add.Tensor %20479, %20466, %int1_18157 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18158 = torch.constant.int 1
    %20481 = torch.aten.add.Tensor %20480, %20469, %int1_18158 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18159 = torch.constant.int 1
    %20482 = torch.aten.add.Tensor %20481, %20472, %int1_18159 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18160 = torch.constant.int 1
    %20483 = torch.aten.add.Tensor %20482, %20475, %int1_18160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18161 = torch.constant.int 1
    %20484 = torch.aten.add.Tensor %20483, %20478, %int1_18161 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18162 = torch.constant.int 1
    %20485 = torch.aten.add.Tensor %20484, %20261, %int1_18162 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18163 = torch.constant.int 1
    %20486 = torch.aten.add.Tensor %19145, %20289, %int1_18163 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18164 = torch.constant.int 1
    %20487 = torch.aten.add.Tensor %19146, %20317, %int1_18164 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18165 = torch.constant.int 1
    %20488 = torch.aten.add.Tensor %19147, %20345, %int1_18165 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18166 = torch.constant.int 1
    %20489 = torch.aten.add.Tensor %19148, %20373, %int1_18166 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18167 = torch.constant.int 1
    %20490 = torch.aten.add.Tensor %19149, %20401, %int1_18167 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18168 = torch.constant.int 1
    %20491 = torch.aten.add.Tensor %19150, %20429, %int1_18168 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18169 = torch.constant.int 1
    %20492 = torch.aten.add.Tensor %19151, %20457, %int1_18169 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18170 = torch.constant.int 1
    %20493 = torch.aten.add.Tensor %19152, %20485, %int1_18170 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_18171 = torch.constant.int 6
    %20494 = torch.prims.convert_element_type %20486, %int6_18171 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18172 = torch.constant.int 6
    %20495 = torch.prims.convert_element_type %20487, %int6_18172 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18173 = torch.constant.int 6
    %20496 = torch.prims.convert_element_type %20488, %int6_18173 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18174 = torch.constant.int 6
    %20497 = torch.prims.convert_element_type %20489, %int6_18174 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18175 = torch.constant.int 6
    %20498 = torch.prims.convert_element_type %20490, %int6_18175 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18176 = torch.constant.int 6
    %20499 = torch.prims.convert_element_type %20491, %int6_18176 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18177 = torch.constant.int 6
    %20500 = torch.prims.convert_element_type %20492, %int6_18177 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18178 = torch.constant.int 6
    %20501 = torch.prims.convert_element_type %20493, %int6_18178 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18179 = torch.constant.int 2
    %20502 = torch.aten.pow.Tensor_Scalar %20494, %int2_18179 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18180 = torch.constant.int 2
    %20503 = torch.aten.pow.Tensor_Scalar %20495, %int2_18180 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18181 = torch.constant.int 2
    %20504 = torch.aten.pow.Tensor_Scalar %20496, %int2_18181 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18182 = torch.constant.int 2
    %20505 = torch.aten.pow.Tensor_Scalar %20497, %int2_18182 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18183 = torch.constant.int 2
    %20506 = torch.aten.pow.Tensor_Scalar %20498, %int2_18183 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18184 = torch.constant.int 2
    %20507 = torch.aten.pow.Tensor_Scalar %20499, %int2_18184 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18185 = torch.constant.int 2
    %20508 = torch.aten.pow.Tensor_Scalar %20500, %int2_18185 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18186 = torch.constant.int 2
    %20509 = torch.aten.pow.Tensor_Scalar %20501, %int2_18186 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_18187 = torch.constant.int -1
    %20510 = torch.prim.ListConstruct %int-1_18187 : (!torch.int) -> !torch.list<int>
    %true_18188 = torch.constant.bool true
    %none_18189 = torch.constant.none
    %20511 = torch.aten.mean.dim %20502, %20510, %true_18188, %none_18189 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18190 = torch.constant.int -1
    %20512 = torch.prim.ListConstruct %int-1_18190 : (!torch.int) -> !torch.list<int>
    %true_18191 = torch.constant.bool true
    %none_18192 = torch.constant.none
    %20513 = torch.aten.mean.dim %20503, %20512, %true_18191, %none_18192 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18193 = torch.constant.int -1
    %20514 = torch.prim.ListConstruct %int-1_18193 : (!torch.int) -> !torch.list<int>
    %true_18194 = torch.constant.bool true
    %none_18195 = torch.constant.none
    %20515 = torch.aten.mean.dim %20504, %20514, %true_18194, %none_18195 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18196 = torch.constant.int -1
    %20516 = torch.prim.ListConstruct %int-1_18196 : (!torch.int) -> !torch.list<int>
    %true_18197 = torch.constant.bool true
    %none_18198 = torch.constant.none
    %20517 = torch.aten.mean.dim %20505, %20516, %true_18197, %none_18198 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18199 = torch.constant.int -1
    %20518 = torch.prim.ListConstruct %int-1_18199 : (!torch.int) -> !torch.list<int>
    %true_18200 = torch.constant.bool true
    %none_18201 = torch.constant.none
    %20519 = torch.aten.mean.dim %20506, %20518, %true_18200, %none_18201 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18202 = torch.constant.int -1
    %20520 = torch.prim.ListConstruct %int-1_18202 : (!torch.int) -> !torch.list<int>
    %true_18203 = torch.constant.bool true
    %none_18204 = torch.constant.none
    %20521 = torch.aten.mean.dim %20507, %20520, %true_18203, %none_18204 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18205 = torch.constant.int -1
    %20522 = torch.prim.ListConstruct %int-1_18205 : (!torch.int) -> !torch.list<int>
    %true_18206 = torch.constant.bool true
    %none_18207 = torch.constant.none
    %20523 = torch.aten.mean.dim %20508, %20522, %true_18206, %none_18207 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18208 = torch.constant.int -1
    %20524 = torch.prim.ListConstruct %int-1_18208 : (!torch.int) -> !torch.list<int>
    %true_18209 = torch.constant.bool true
    %none_18210 = torch.constant.none
    %20525 = torch.aten.mean.dim %20509, %20524, %true_18209, %none_18210 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18211 = torch.constant.float 9.9999997473787516E-6
    %int1_18212 = torch.constant.int 1
    %20526 = torch.aten.add.Scalar %20511, %float9.999990e-06_18211, %int1_18212 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18213 = torch.constant.float 9.9999997473787516E-6
    %int1_18214 = torch.constant.int 1
    %20527 = torch.aten.add.Scalar %20513, %float9.999990e-06_18213, %int1_18214 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18215 = torch.constant.float 9.9999997473787516E-6
    %int1_18216 = torch.constant.int 1
    %20528 = torch.aten.add.Scalar %20515, %float9.999990e-06_18215, %int1_18216 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18217 = torch.constant.float 9.9999997473787516E-6
    %int1_18218 = torch.constant.int 1
    %20529 = torch.aten.add.Scalar %20517, %float9.999990e-06_18217, %int1_18218 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18219 = torch.constant.float 9.9999997473787516E-6
    %int1_18220 = torch.constant.int 1
    %20530 = torch.aten.add.Scalar %20519, %float9.999990e-06_18219, %int1_18220 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18221 = torch.constant.float 9.9999997473787516E-6
    %int1_18222 = torch.constant.int 1
    %20531 = torch.aten.add.Scalar %20521, %float9.999990e-06_18221, %int1_18222 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18223 = torch.constant.float 9.9999997473787516E-6
    %int1_18224 = torch.constant.int 1
    %20532 = torch.aten.add.Scalar %20523, %float9.999990e-06_18223, %int1_18224 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18225 = torch.constant.float 9.9999997473787516E-6
    %int1_18226 = torch.constant.int 1
    %20533 = torch.aten.add.Scalar %20525, %float9.999990e-06_18225, %int1_18226 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20534 = torch.aten.rsqrt %20526 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20535 = torch.aten.rsqrt %20527 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20536 = torch.aten.rsqrt %20528 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20537 = torch.aten.rsqrt %20529 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20538 = torch.aten.rsqrt %20530 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20539 = torch.aten.rsqrt %20531 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20540 = torch.aten.rsqrt %20532 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20541 = torch.aten.rsqrt %20533 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %20541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %20542 = torch.aten.mul.Tensor %20494, %20534 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20543 = torch.aten.mul.Tensor %20495, %20535 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20544 = torch.aten.mul.Tensor %20496, %20536 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20545 = torch.aten.mul.Tensor %20497, %20537 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20546 = torch.aten.mul.Tensor %20498, %20538 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20547 = torch.aten.mul.Tensor %20499, %20539 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20548 = torch.aten.mul.Tensor %20500, %20540 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20549 = torch.aten.mul.Tensor %20501, %20541 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20550 = torch.aten.mul.Tensor %696, %20542 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20551 = torch.aten.mul.Tensor %697, %20543 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20552 = torch.aten.mul.Tensor %698, %20544 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20553 = torch.aten.mul.Tensor %699, %20545 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20554 = torch.aten.mul.Tensor %700, %20546 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20555 = torch.aten.mul.Tensor %701, %20547 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20556 = torch.aten.mul.Tensor %702, %20548 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %20557 = torch.aten.mul.Tensor %703, %20549 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %20557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_18227 = torch.constant.int 5
    %20558 = torch.prims.convert_element_type %20550, %int5_18227 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18228 = torch.constant.int 5
    %20559 = torch.prims.convert_element_type %20551, %int5_18228 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18229 = torch.constant.int 5
    %20560 = torch.prims.convert_element_type %20552, %int5_18229 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18230 = torch.constant.int 5
    %20561 = torch.prims.convert_element_type %20553, %int5_18230 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18231 = torch.constant.int 5
    %20562 = torch.prims.convert_element_type %20554, %int5_18231 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18232 = torch.constant.int 5
    %20563 = torch.prims.convert_element_type %20555, %int5_18232 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18233 = torch.constant.int 5
    %20564 = torch.prims.convert_element_type %20556, %int5_18233 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18234 = torch.constant.int 5
    %20565 = torch.prims.convert_element_type %20557, %int5_18234 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18235 = torch.constant.int 1
    %int0_18236 = torch.constant.int 0
    %20566 = torch.prim.ListConstruct %int1_18235, %int0_18236 : (!torch.int, !torch.int) -> !torch.list<int>
    %20567 = torch.aten.permute %704, %20566 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18237 = torch.constant.int 1
    %int0_18238 = torch.constant.int 0
    %20568 = torch.prim.ListConstruct %int1_18237, %int0_18238 : (!torch.int, !torch.int) -> !torch.list<int>
    %20569 = torch.aten.permute %705, %20568 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18239 = torch.constant.int 1
    %int0_18240 = torch.constant.int 0
    %20570 = torch.prim.ListConstruct %int1_18239, %int0_18240 : (!torch.int, !torch.int) -> !torch.list<int>
    %20571 = torch.aten.permute %706, %20570 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18241 = torch.constant.int 1
    %int0_18242 = torch.constant.int 0
    %20572 = torch.prim.ListConstruct %int1_18241, %int0_18242 : (!torch.int, !torch.int) -> !torch.list<int>
    %20573 = torch.aten.permute %707, %20572 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18243 = torch.constant.int 1
    %int0_18244 = torch.constant.int 0
    %20574 = torch.prim.ListConstruct %int1_18243, %int0_18244 : (!torch.int, !torch.int) -> !torch.list<int>
    %20575 = torch.aten.permute %708, %20574 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18245 = torch.constant.int 1
    %int0_18246 = torch.constant.int 0
    %20576 = torch.prim.ListConstruct %int1_18245, %int0_18246 : (!torch.int, !torch.int) -> !torch.list<int>
    %20577 = torch.aten.permute %709, %20576 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18247 = torch.constant.int 1
    %int0_18248 = torch.constant.int 0
    %20578 = torch.prim.ListConstruct %int1_18247, %int0_18248 : (!torch.int, !torch.int) -> !torch.list<int>
    %20579 = torch.aten.permute %710, %20578 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18249 = torch.constant.int 1
    %int0_18250 = torch.constant.int 0
    %20580 = torch.prim.ListConstruct %int1_18249, %int0_18250 : (!torch.int, !torch.int) -> !torch.list<int>
    %20581 = torch.aten.permute %711, %20580 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_18251 = torch.constant.int 4
    %20582 = torch.aten.mul.int %int4_18251, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18252 = torch.constant.int 4096
    %20583 = torch.prim.ListConstruct %20582, %int4096_18252 : (!torch.int, !torch.int) -> !torch.list<int>
    %20584 = torch.aten.view %20558, %20583 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20584, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20585 = torch.aten.mm %20584, %20567 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20585, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18253 = torch.constant.int 4
    %int1792_18254 = torch.constant.int 1792
    %20586 = torch.prim.ListConstruct %int4_18253, %2482, %int1792_18254 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20587 = torch.aten.view %20585, %20586 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18255 = torch.constant.int 4
    %20588 = torch.aten.mul.int %int4_18255, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18256 = torch.constant.int 4096
    %20589 = torch.prim.ListConstruct %20588, %int4096_18256 : (!torch.int, !torch.int) -> !torch.list<int>
    %20590 = torch.aten.view %20559, %20589 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20590, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20591 = torch.aten.mm %20590, %20569 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20591, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18257 = torch.constant.int 4
    %int1792_18258 = torch.constant.int 1792
    %20592 = torch.prim.ListConstruct %int4_18257, %2482, %int1792_18258 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20593 = torch.aten.view %20591, %20592 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18259 = torch.constant.int 4
    %20594 = torch.aten.mul.int %int4_18259, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18260 = torch.constant.int 4096
    %20595 = torch.prim.ListConstruct %20594, %int4096_18260 : (!torch.int, !torch.int) -> !torch.list<int>
    %20596 = torch.aten.view %20560, %20595 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20596, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20597 = torch.aten.mm %20596, %20571 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20597, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18261 = torch.constant.int 4
    %int1792_18262 = torch.constant.int 1792
    %20598 = torch.prim.ListConstruct %int4_18261, %2482, %int1792_18262 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20599 = torch.aten.view %20597, %20598 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18263 = torch.constant.int 4
    %20600 = torch.aten.mul.int %int4_18263, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18264 = torch.constant.int 4096
    %20601 = torch.prim.ListConstruct %20600, %int4096_18264 : (!torch.int, !torch.int) -> !torch.list<int>
    %20602 = torch.aten.view %20561, %20601 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20602, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20603 = torch.aten.mm %20602, %20573 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20603, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18265 = torch.constant.int 4
    %int1792_18266 = torch.constant.int 1792
    %20604 = torch.prim.ListConstruct %int4_18265, %2482, %int1792_18266 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20605 = torch.aten.view %20603, %20604 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18267 = torch.constant.int 4
    %20606 = torch.aten.mul.int %int4_18267, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18268 = torch.constant.int 4096
    %20607 = torch.prim.ListConstruct %20606, %int4096_18268 : (!torch.int, !torch.int) -> !torch.list<int>
    %20608 = torch.aten.view %20562, %20607 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20608, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20609 = torch.aten.mm %20608, %20575 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20609, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18269 = torch.constant.int 4
    %int1792_18270 = torch.constant.int 1792
    %20610 = torch.prim.ListConstruct %int4_18269, %2482, %int1792_18270 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20611 = torch.aten.view %20609, %20610 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18271 = torch.constant.int 4
    %20612 = torch.aten.mul.int %int4_18271, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18272 = torch.constant.int 4096
    %20613 = torch.prim.ListConstruct %20612, %int4096_18272 : (!torch.int, !torch.int) -> !torch.list<int>
    %20614 = torch.aten.view %20563, %20613 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20614, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20615 = torch.aten.mm %20614, %20577 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20615, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18273 = torch.constant.int 4
    %int1792_18274 = torch.constant.int 1792
    %20616 = torch.prim.ListConstruct %int4_18273, %2482, %int1792_18274 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20617 = torch.aten.view %20615, %20616 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18275 = torch.constant.int 4
    %20618 = torch.aten.mul.int %int4_18275, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18276 = torch.constant.int 4096
    %20619 = torch.prim.ListConstruct %20618, %int4096_18276 : (!torch.int, !torch.int) -> !torch.list<int>
    %20620 = torch.aten.view %20564, %20619 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20620, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20621 = torch.aten.mm %20620, %20579 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20621, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18277 = torch.constant.int 4
    %int1792_18278 = torch.constant.int 1792
    %20622 = torch.prim.ListConstruct %int4_18277, %2482, %int1792_18278 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20623 = torch.aten.view %20621, %20622 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18279 = torch.constant.int 4
    %20624 = torch.aten.mul.int %int4_18279, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18280 = torch.constant.int 4096
    %20625 = torch.prim.ListConstruct %20624, %int4096_18280 : (!torch.int, !torch.int) -> !torch.list<int>
    %20626 = torch.aten.view %20565, %20625 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20626, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20627 = torch.aten.mm %20626, %20581 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20627, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18281 = torch.constant.int 4
    %int1792_18282 = torch.constant.int 1792
    %20628 = torch.prim.ListConstruct %int4_18281, %2482, %int1792_18282 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20629 = torch.aten.view %20627, %20628 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20630 = torch.aten.silu %20587 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20631 = torch.aten.silu %20593 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20632 = torch.aten.silu %20599 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20633 = torch.aten.silu %20605 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20634 = torch.aten.silu %20611 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20635 = torch.aten.silu %20617 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20636 = torch.aten.silu %20623 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20637 = torch.aten.silu %20629 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_18283 = torch.constant.int 1
    %int0_18284 = torch.constant.int 0
    %20638 = torch.prim.ListConstruct %int1_18283, %int0_18284 : (!torch.int, !torch.int) -> !torch.list<int>
    %20639 = torch.aten.permute %712, %20638 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18285 = torch.constant.int 1
    %int0_18286 = torch.constant.int 0
    %20640 = torch.prim.ListConstruct %int1_18285, %int0_18286 : (!torch.int, !torch.int) -> !torch.list<int>
    %20641 = torch.aten.permute %713, %20640 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18287 = torch.constant.int 1
    %int0_18288 = torch.constant.int 0
    %20642 = torch.prim.ListConstruct %int1_18287, %int0_18288 : (!torch.int, !torch.int) -> !torch.list<int>
    %20643 = torch.aten.permute %714, %20642 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18289 = torch.constant.int 1
    %int0_18290 = torch.constant.int 0
    %20644 = torch.prim.ListConstruct %int1_18289, %int0_18290 : (!torch.int, !torch.int) -> !torch.list<int>
    %20645 = torch.aten.permute %715, %20644 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18291 = torch.constant.int 1
    %int0_18292 = torch.constant.int 0
    %20646 = torch.prim.ListConstruct %int1_18291, %int0_18292 : (!torch.int, !torch.int) -> !torch.list<int>
    %20647 = torch.aten.permute %716, %20646 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18293 = torch.constant.int 1
    %int0_18294 = torch.constant.int 0
    %20648 = torch.prim.ListConstruct %int1_18293, %int0_18294 : (!torch.int, !torch.int) -> !torch.list<int>
    %20649 = torch.aten.permute %717, %20648 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18295 = torch.constant.int 1
    %int0_18296 = torch.constant.int 0
    %20650 = torch.prim.ListConstruct %int1_18295, %int0_18296 : (!torch.int, !torch.int) -> !torch.list<int>
    %20651 = torch.aten.permute %718, %20650 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_18297 = torch.constant.int 1
    %int0_18298 = torch.constant.int 0
    %20652 = torch.prim.ListConstruct %int1_18297, %int0_18298 : (!torch.int, !torch.int) -> !torch.list<int>
    %20653 = torch.aten.permute %719, %20652 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_18299 = torch.constant.int 4
    %20654 = torch.aten.mul.int %int4_18299, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18300 = torch.constant.int 4096
    %20655 = torch.prim.ListConstruct %20654, %int4096_18300 : (!torch.int, !torch.int) -> !torch.list<int>
    %20656 = torch.aten.view %20558, %20655 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20656, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20657 = torch.aten.mm %20656, %20639 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20657, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18301 = torch.constant.int 4
    %int1792_18302 = torch.constant.int 1792
    %20658 = torch.prim.ListConstruct %int4_18301, %2482, %int1792_18302 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20659 = torch.aten.view %20657, %20658 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18303 = torch.constant.int 4
    %20660 = torch.aten.mul.int %int4_18303, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18304 = torch.constant.int 4096
    %20661 = torch.prim.ListConstruct %20660, %int4096_18304 : (!torch.int, !torch.int) -> !torch.list<int>
    %20662 = torch.aten.view %20559, %20661 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20662, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20663 = torch.aten.mm %20662, %20641 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20663, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18305 = torch.constant.int 4
    %int1792_18306 = torch.constant.int 1792
    %20664 = torch.prim.ListConstruct %int4_18305, %2482, %int1792_18306 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20665 = torch.aten.view %20663, %20664 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18307 = torch.constant.int 4
    %20666 = torch.aten.mul.int %int4_18307, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18308 = torch.constant.int 4096
    %20667 = torch.prim.ListConstruct %20666, %int4096_18308 : (!torch.int, !torch.int) -> !torch.list<int>
    %20668 = torch.aten.view %20560, %20667 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20668, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20669 = torch.aten.mm %20668, %20643 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20669, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18309 = torch.constant.int 4
    %int1792_18310 = torch.constant.int 1792
    %20670 = torch.prim.ListConstruct %int4_18309, %2482, %int1792_18310 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20671 = torch.aten.view %20669, %20670 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18311 = torch.constant.int 4
    %20672 = torch.aten.mul.int %int4_18311, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18312 = torch.constant.int 4096
    %20673 = torch.prim.ListConstruct %20672, %int4096_18312 : (!torch.int, !torch.int) -> !torch.list<int>
    %20674 = torch.aten.view %20561, %20673 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20674, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20675 = torch.aten.mm %20674, %20645 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20675, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18313 = torch.constant.int 4
    %int1792_18314 = torch.constant.int 1792
    %20676 = torch.prim.ListConstruct %int4_18313, %2482, %int1792_18314 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20677 = torch.aten.view %20675, %20676 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18315 = torch.constant.int 4
    %20678 = torch.aten.mul.int %int4_18315, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18316 = torch.constant.int 4096
    %20679 = torch.prim.ListConstruct %20678, %int4096_18316 : (!torch.int, !torch.int) -> !torch.list<int>
    %20680 = torch.aten.view %20562, %20679 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20680, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20681 = torch.aten.mm %20680, %20647 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20681, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18317 = torch.constant.int 4
    %int1792_18318 = torch.constant.int 1792
    %20682 = torch.prim.ListConstruct %int4_18317, %2482, %int1792_18318 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20683 = torch.aten.view %20681, %20682 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18319 = torch.constant.int 4
    %20684 = torch.aten.mul.int %int4_18319, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18320 = torch.constant.int 4096
    %20685 = torch.prim.ListConstruct %20684, %int4096_18320 : (!torch.int, !torch.int) -> !torch.list<int>
    %20686 = torch.aten.view %20563, %20685 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20686, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20687 = torch.aten.mm %20686, %20649 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20687, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18321 = torch.constant.int 4
    %int1792_18322 = torch.constant.int 1792
    %20688 = torch.prim.ListConstruct %int4_18321, %2482, %int1792_18322 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20689 = torch.aten.view %20687, %20688 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18323 = torch.constant.int 4
    %20690 = torch.aten.mul.int %int4_18323, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18324 = torch.constant.int 4096
    %20691 = torch.prim.ListConstruct %20690, %int4096_18324 : (!torch.int, !torch.int) -> !torch.list<int>
    %20692 = torch.aten.view %20564, %20691 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20692, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20693 = torch.aten.mm %20692, %20651 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20693, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18325 = torch.constant.int 4
    %int1792_18326 = torch.constant.int 1792
    %20694 = torch.prim.ListConstruct %int4_18325, %2482, %int1792_18326 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20695 = torch.aten.view %20693, %20694 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_18327 = torch.constant.int 4
    %20696 = torch.aten.mul.int %int4_18327, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18328 = torch.constant.int 4096
    %20697 = torch.prim.ListConstruct %20696, %int4096_18328 : (!torch.int, !torch.int) -> !torch.list<int>
    %20698 = torch.aten.view %20565, %20697 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20698, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %20699 = torch.aten.mm %20698, %20653 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20699, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_18329 = torch.constant.int 4
    %int1792_18330 = torch.constant.int 1792
    %20700 = torch.prim.ListConstruct %int4_18329, %2482, %int1792_18330 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20701 = torch.aten.view %20699, %20700 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20702 = torch.aten.mul.Tensor %20630, %20659 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20703 = torch.aten.mul.Tensor %20631, %20665 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20704 = torch.aten.mul.Tensor %20632, %20671 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20705 = torch.aten.mul.Tensor %20633, %20677 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20706 = torch.aten.mul.Tensor %20634, %20683 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20707 = torch.aten.mul.Tensor %20635, %20689 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20708 = torch.aten.mul.Tensor %20636, %20695 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %20709 = torch.aten.mul.Tensor %20637, %20701 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %20709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_18331 = torch.constant.int 1
    %int0_18332 = torch.constant.int 0
    %20710 = torch.prim.ListConstruct %int1_18331, %int0_18332 : (!torch.int, !torch.int) -> !torch.list<int>
    %20711 = torch.aten.permute %720, %20710 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18333 = torch.constant.int 1
    %int0_18334 = torch.constant.int 0
    %20712 = torch.prim.ListConstruct %int1_18333, %int0_18334 : (!torch.int, !torch.int) -> !torch.list<int>
    %20713 = torch.aten.permute %721, %20712 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18335 = torch.constant.int 1
    %int0_18336 = torch.constant.int 0
    %20714 = torch.prim.ListConstruct %int1_18335, %int0_18336 : (!torch.int, !torch.int) -> !torch.list<int>
    %20715 = torch.aten.permute %722, %20714 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18337 = torch.constant.int 1
    %int0_18338 = torch.constant.int 0
    %20716 = torch.prim.ListConstruct %int1_18337, %int0_18338 : (!torch.int, !torch.int) -> !torch.list<int>
    %20717 = torch.aten.permute %723, %20716 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18339 = torch.constant.int 1
    %int0_18340 = torch.constant.int 0
    %20718 = torch.prim.ListConstruct %int1_18339, %int0_18340 : (!torch.int, !torch.int) -> !torch.list<int>
    %20719 = torch.aten.permute %724, %20718 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18341 = torch.constant.int 1
    %int0_18342 = torch.constant.int 0
    %20720 = torch.prim.ListConstruct %int1_18341, %int0_18342 : (!torch.int, !torch.int) -> !torch.list<int>
    %20721 = torch.aten.permute %725, %20720 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18343 = torch.constant.int 1
    %int0_18344 = torch.constant.int 0
    %20722 = torch.prim.ListConstruct %int1_18343, %int0_18344 : (!torch.int, !torch.int) -> !torch.list<int>
    %20723 = torch.aten.permute %726, %20722 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18345 = torch.constant.int 1
    %int0_18346 = torch.constant.int 0
    %20724 = torch.prim.ListConstruct %int1_18345, %int0_18346 : (!torch.int, !torch.int) -> !torch.list<int>
    %20725 = torch.aten.permute %727, %20724 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_18347 = torch.constant.int 1
    %20726 = torch.aten.size.int %20587, %int1_18347 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18348 = torch.constant.int 4
    %20727 = torch.aten.mul.int %int4_18348, %20726 : !torch.int, !torch.int -> !torch.int
    %int1792_18349 = torch.constant.int 1792
    %20728 = torch.prim.ListConstruct %20727, %int1792_18349 : (!torch.int, !torch.int) -> !torch.list<int>
    %20729 = torch.aten.view %20702, %20728 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20729, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20730 = torch.aten.mm %20729, %20711 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20730, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18350 = torch.constant.int 4
    %int4096_18351 = torch.constant.int 4096
    %20731 = torch.prim.ListConstruct %int4_18350, %20726, %int4096_18351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20732 = torch.aten.view %20730, %20731 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18352 = torch.constant.int 1
    %20733 = torch.aten.size.int %20593, %int1_18352 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18353 = torch.constant.int 4
    %20734 = torch.aten.mul.int %int4_18353, %20733 : !torch.int, !torch.int -> !torch.int
    %int1792_18354 = torch.constant.int 1792
    %20735 = torch.prim.ListConstruct %20734, %int1792_18354 : (!torch.int, !torch.int) -> !torch.list<int>
    %20736 = torch.aten.view %20703, %20735 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20736, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20737 = torch.aten.mm %20736, %20713 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20737, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18355 = torch.constant.int 4
    %int4096_18356 = torch.constant.int 4096
    %20738 = torch.prim.ListConstruct %int4_18355, %20733, %int4096_18356 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20739 = torch.aten.view %20737, %20738 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18357 = torch.constant.int 1
    %20740 = torch.aten.size.int %20599, %int1_18357 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18358 = torch.constant.int 4
    %20741 = torch.aten.mul.int %int4_18358, %20740 : !torch.int, !torch.int -> !torch.int
    %int1792_18359 = torch.constant.int 1792
    %20742 = torch.prim.ListConstruct %20741, %int1792_18359 : (!torch.int, !torch.int) -> !torch.list<int>
    %20743 = torch.aten.view %20704, %20742 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20743, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20744 = torch.aten.mm %20743, %20715 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20744, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18360 = torch.constant.int 4
    %int4096_18361 = torch.constant.int 4096
    %20745 = torch.prim.ListConstruct %int4_18360, %20740, %int4096_18361 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20746 = torch.aten.view %20744, %20745 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18362 = torch.constant.int 1
    %20747 = torch.aten.size.int %20605, %int1_18362 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18363 = torch.constant.int 4
    %20748 = torch.aten.mul.int %int4_18363, %20747 : !torch.int, !torch.int -> !torch.int
    %int1792_18364 = torch.constant.int 1792
    %20749 = torch.prim.ListConstruct %20748, %int1792_18364 : (!torch.int, !torch.int) -> !torch.list<int>
    %20750 = torch.aten.view %20705, %20749 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20750, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20751 = torch.aten.mm %20750, %20717 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20751, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18365 = torch.constant.int 4
    %int4096_18366 = torch.constant.int 4096
    %20752 = torch.prim.ListConstruct %int4_18365, %20747, %int4096_18366 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20753 = torch.aten.view %20751, %20752 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18367 = torch.constant.int 1
    %20754 = torch.aten.size.int %20611, %int1_18367 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18368 = torch.constant.int 4
    %20755 = torch.aten.mul.int %int4_18368, %20754 : !torch.int, !torch.int -> !torch.int
    %int1792_18369 = torch.constant.int 1792
    %20756 = torch.prim.ListConstruct %20755, %int1792_18369 : (!torch.int, !torch.int) -> !torch.list<int>
    %20757 = torch.aten.view %20706, %20756 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20757, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20758 = torch.aten.mm %20757, %20719 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20758, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18370 = torch.constant.int 4
    %int4096_18371 = torch.constant.int 4096
    %20759 = torch.prim.ListConstruct %int4_18370, %20754, %int4096_18371 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20760 = torch.aten.view %20758, %20759 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18372 = torch.constant.int 1
    %20761 = torch.aten.size.int %20617, %int1_18372 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18373 = torch.constant.int 4
    %20762 = torch.aten.mul.int %int4_18373, %20761 : !torch.int, !torch.int -> !torch.int
    %int1792_18374 = torch.constant.int 1792
    %20763 = torch.prim.ListConstruct %20762, %int1792_18374 : (!torch.int, !torch.int) -> !torch.list<int>
    %20764 = torch.aten.view %20707, %20763 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20764, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20765 = torch.aten.mm %20764, %20721 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20765, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18375 = torch.constant.int 4
    %int4096_18376 = torch.constant.int 4096
    %20766 = torch.prim.ListConstruct %int4_18375, %20761, %int4096_18376 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20767 = torch.aten.view %20765, %20766 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18377 = torch.constant.int 1
    %20768 = torch.aten.size.int %20623, %int1_18377 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18378 = torch.constant.int 4
    %20769 = torch.aten.mul.int %int4_18378, %20768 : !torch.int, !torch.int -> !torch.int
    %int1792_18379 = torch.constant.int 1792
    %20770 = torch.prim.ListConstruct %20769, %int1792_18379 : (!torch.int, !torch.int) -> !torch.list<int>
    %20771 = torch.aten.view %20708, %20770 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20771, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20772 = torch.aten.mm %20771, %20723 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20772, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18380 = torch.constant.int 4
    %int4096_18381 = torch.constant.int 4096
    %20773 = torch.prim.ListConstruct %int4_18380, %20768, %int4096_18381 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20774 = torch.aten.view %20772, %20773 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18382 = torch.constant.int 1
    %20775 = torch.aten.size.int %20629, %int1_18382 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_18383 = torch.constant.int 4
    %20776 = torch.aten.mul.int %int4_18383, %20775 : !torch.int, !torch.int -> !torch.int
    %int1792_18384 = torch.constant.int 1792
    %20777 = torch.prim.ListConstruct %20776, %int1792_18384 : (!torch.int, !torch.int) -> !torch.list<int>
    %20778 = torch.aten.view %20709, %20777 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %20778, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %20779 = torch.aten.mm %20778, %20725 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %20779, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_18385 = torch.constant.int 4
    %int4096_18386 = torch.constant.int 4096
    %20780 = torch.prim.ListConstruct %int4_18385, %20775, %int4096_18386 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %20781 = torch.aten.view %20779, %20780 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20782 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18387 = arith.constant 1 : index
    %dim_18388 = tensor.dim %20782, %c1_18387 : tensor<4x?x4096xf16>
    %20783 = flow.tensor.transfer %20782 : tensor<4x?x4096xf16>{%dim_18388} to #hal.device.promise<@__device_0>
    %20784 = torch_c.from_builtin_tensor %20783 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20785 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18389 = arith.constant 1 : index
    %dim_18390 = tensor.dim %20785, %c1_18389 : tensor<4x?x4096xf16>
    %20786 = flow.tensor.transfer %20785 : tensor<4x?x4096xf16>{%dim_18390} to #hal.device.promise<@__device_0>
    %20787 = torch_c.from_builtin_tensor %20786 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20788 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18391 = arith.constant 1 : index
    %dim_18392 = tensor.dim %20788, %c1_18391 : tensor<4x?x4096xf16>
    %20789 = flow.tensor.transfer %20788 : tensor<4x?x4096xf16>{%dim_18392} to #hal.device.promise<@__device_0>
    %20790 = torch_c.from_builtin_tensor %20789 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20791 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18393 = arith.constant 1 : index
    %dim_18394 = tensor.dim %20791, %c1_18393 : tensor<4x?x4096xf16>
    %20792 = flow.tensor.transfer %20791 : tensor<4x?x4096xf16>{%dim_18394} to #hal.device.promise<@__device_0>
    %20793 = torch_c.from_builtin_tensor %20792 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20794 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18395 = arith.constant 1 : index
    %dim_18396 = tensor.dim %20794, %c1_18395 : tensor<4x?x4096xf16>
    %20795 = flow.tensor.transfer %20794 : tensor<4x?x4096xf16>{%dim_18396} to #hal.device.promise<@__device_0>
    %20796 = torch_c.from_builtin_tensor %20795 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20797 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18397 = arith.constant 1 : index
    %dim_18398 = tensor.dim %20797, %c1_18397 : tensor<4x?x4096xf16>
    %20798 = flow.tensor.transfer %20797 : tensor<4x?x4096xf16>{%dim_18398} to #hal.device.promise<@__device_0>
    %20799 = torch_c.from_builtin_tensor %20798 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20800 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18399 = arith.constant 1 : index
    %dim_18400 = tensor.dim %20800, %c1_18399 : tensor<4x?x4096xf16>
    %20801 = flow.tensor.transfer %20800 : tensor<4x?x4096xf16>{%dim_18400} to #hal.device.promise<@__device_0>
    %20802 = torch_c.from_builtin_tensor %20801 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18401 = torch.constant.int 1
    %20803 = torch.aten.add.Tensor %20732, %20784, %int1_18401 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18402 = torch.constant.int 1
    %20804 = torch.aten.add.Tensor %20803, %20787, %int1_18402 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18403 = torch.constant.int 1
    %20805 = torch.aten.add.Tensor %20804, %20790, %int1_18403 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18404 = torch.constant.int 1
    %20806 = torch.aten.add.Tensor %20805, %20793, %int1_18404 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18405 = torch.constant.int 1
    %20807 = torch.aten.add.Tensor %20806, %20796, %int1_18405 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18406 = torch.constant.int 1
    %20808 = torch.aten.add.Tensor %20807, %20799, %int1_18406 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18407 = torch.constant.int 1
    %20809 = torch.aten.add.Tensor %20808, %20802, %int1_18407 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20810 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18408 = arith.constant 1 : index
    %dim_18409 = tensor.dim %20810, %c1_18408 : tensor<4x?x4096xf16>
    %20811 = flow.tensor.transfer %20810 : tensor<4x?x4096xf16>{%dim_18409} to #hal.device.promise<@__device_1>
    %20812 = torch_c.from_builtin_tensor %20811 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20813 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18410 = arith.constant 1 : index
    %dim_18411 = tensor.dim %20813, %c1_18410 : tensor<4x?x4096xf16>
    %20814 = flow.tensor.transfer %20813 : tensor<4x?x4096xf16>{%dim_18411} to #hal.device.promise<@__device_1>
    %20815 = torch_c.from_builtin_tensor %20814 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20816 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18412 = arith.constant 1 : index
    %dim_18413 = tensor.dim %20816, %c1_18412 : tensor<4x?x4096xf16>
    %20817 = flow.tensor.transfer %20816 : tensor<4x?x4096xf16>{%dim_18413} to #hal.device.promise<@__device_1>
    %20818 = torch_c.from_builtin_tensor %20817 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20819 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18414 = arith.constant 1 : index
    %dim_18415 = tensor.dim %20819, %c1_18414 : tensor<4x?x4096xf16>
    %20820 = flow.tensor.transfer %20819 : tensor<4x?x4096xf16>{%dim_18415} to #hal.device.promise<@__device_1>
    %20821 = torch_c.from_builtin_tensor %20820 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20822 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18416 = arith.constant 1 : index
    %dim_18417 = tensor.dim %20822, %c1_18416 : tensor<4x?x4096xf16>
    %20823 = flow.tensor.transfer %20822 : tensor<4x?x4096xf16>{%dim_18417} to #hal.device.promise<@__device_1>
    %20824 = torch_c.from_builtin_tensor %20823 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20825 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18418 = arith.constant 1 : index
    %dim_18419 = tensor.dim %20825, %c1_18418 : tensor<4x?x4096xf16>
    %20826 = flow.tensor.transfer %20825 : tensor<4x?x4096xf16>{%dim_18419} to #hal.device.promise<@__device_1>
    %20827 = torch_c.from_builtin_tensor %20826 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20828 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18420 = arith.constant 1 : index
    %dim_18421 = tensor.dim %20828, %c1_18420 : tensor<4x?x4096xf16>
    %20829 = flow.tensor.transfer %20828 : tensor<4x?x4096xf16>{%dim_18421} to #hal.device.promise<@__device_1>
    %20830 = torch_c.from_builtin_tensor %20829 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18422 = torch.constant.int 1
    %20831 = torch.aten.add.Tensor %20812, %20739, %int1_18422 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18423 = torch.constant.int 1
    %20832 = torch.aten.add.Tensor %20831, %20815, %int1_18423 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18424 = torch.constant.int 1
    %20833 = torch.aten.add.Tensor %20832, %20818, %int1_18424 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18425 = torch.constant.int 1
    %20834 = torch.aten.add.Tensor %20833, %20821, %int1_18425 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18426 = torch.constant.int 1
    %20835 = torch.aten.add.Tensor %20834, %20824, %int1_18426 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18427 = torch.constant.int 1
    %20836 = torch.aten.add.Tensor %20835, %20827, %int1_18427 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18428 = torch.constant.int 1
    %20837 = torch.aten.add.Tensor %20836, %20830, %int1_18428 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20838 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18429 = arith.constant 1 : index
    %dim_18430 = tensor.dim %20838, %c1_18429 : tensor<4x?x4096xf16>
    %20839 = flow.tensor.transfer %20838 : tensor<4x?x4096xf16>{%dim_18430} to #hal.device.promise<@__device_2>
    %20840 = torch_c.from_builtin_tensor %20839 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20841 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18431 = arith.constant 1 : index
    %dim_18432 = tensor.dim %20841, %c1_18431 : tensor<4x?x4096xf16>
    %20842 = flow.tensor.transfer %20841 : tensor<4x?x4096xf16>{%dim_18432} to #hal.device.promise<@__device_2>
    %20843 = torch_c.from_builtin_tensor %20842 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20844 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18433 = arith.constant 1 : index
    %dim_18434 = tensor.dim %20844, %c1_18433 : tensor<4x?x4096xf16>
    %20845 = flow.tensor.transfer %20844 : tensor<4x?x4096xf16>{%dim_18434} to #hal.device.promise<@__device_2>
    %20846 = torch_c.from_builtin_tensor %20845 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20847 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18435 = arith.constant 1 : index
    %dim_18436 = tensor.dim %20847, %c1_18435 : tensor<4x?x4096xf16>
    %20848 = flow.tensor.transfer %20847 : tensor<4x?x4096xf16>{%dim_18436} to #hal.device.promise<@__device_2>
    %20849 = torch_c.from_builtin_tensor %20848 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20850 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18437 = arith.constant 1 : index
    %dim_18438 = tensor.dim %20850, %c1_18437 : tensor<4x?x4096xf16>
    %20851 = flow.tensor.transfer %20850 : tensor<4x?x4096xf16>{%dim_18438} to #hal.device.promise<@__device_2>
    %20852 = torch_c.from_builtin_tensor %20851 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20853 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18439 = arith.constant 1 : index
    %dim_18440 = tensor.dim %20853, %c1_18439 : tensor<4x?x4096xf16>
    %20854 = flow.tensor.transfer %20853 : tensor<4x?x4096xf16>{%dim_18440} to #hal.device.promise<@__device_2>
    %20855 = torch_c.from_builtin_tensor %20854 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20856 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18441 = arith.constant 1 : index
    %dim_18442 = tensor.dim %20856, %c1_18441 : tensor<4x?x4096xf16>
    %20857 = flow.tensor.transfer %20856 : tensor<4x?x4096xf16>{%dim_18442} to #hal.device.promise<@__device_2>
    %20858 = torch_c.from_builtin_tensor %20857 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18443 = torch.constant.int 1
    %20859 = torch.aten.add.Tensor %20840, %20843, %int1_18443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18444 = torch.constant.int 1
    %20860 = torch.aten.add.Tensor %20859, %20746, %int1_18444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18445 = torch.constant.int 1
    %20861 = torch.aten.add.Tensor %20860, %20846, %int1_18445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18446 = torch.constant.int 1
    %20862 = torch.aten.add.Tensor %20861, %20849, %int1_18446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18447 = torch.constant.int 1
    %20863 = torch.aten.add.Tensor %20862, %20852, %int1_18447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18448 = torch.constant.int 1
    %20864 = torch.aten.add.Tensor %20863, %20855, %int1_18448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18449 = torch.constant.int 1
    %20865 = torch.aten.add.Tensor %20864, %20858, %int1_18449 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20866 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18450 = arith.constant 1 : index
    %dim_18451 = tensor.dim %20866, %c1_18450 : tensor<4x?x4096xf16>
    %20867 = flow.tensor.transfer %20866 : tensor<4x?x4096xf16>{%dim_18451} to #hal.device.promise<@__device_3>
    %20868 = torch_c.from_builtin_tensor %20867 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20869 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18452 = arith.constant 1 : index
    %dim_18453 = tensor.dim %20869, %c1_18452 : tensor<4x?x4096xf16>
    %20870 = flow.tensor.transfer %20869 : tensor<4x?x4096xf16>{%dim_18453} to #hal.device.promise<@__device_3>
    %20871 = torch_c.from_builtin_tensor %20870 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20872 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18454 = arith.constant 1 : index
    %dim_18455 = tensor.dim %20872, %c1_18454 : tensor<4x?x4096xf16>
    %20873 = flow.tensor.transfer %20872 : tensor<4x?x4096xf16>{%dim_18455} to #hal.device.promise<@__device_3>
    %20874 = torch_c.from_builtin_tensor %20873 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20875 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18456 = arith.constant 1 : index
    %dim_18457 = tensor.dim %20875, %c1_18456 : tensor<4x?x4096xf16>
    %20876 = flow.tensor.transfer %20875 : tensor<4x?x4096xf16>{%dim_18457} to #hal.device.promise<@__device_3>
    %20877 = torch_c.from_builtin_tensor %20876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20878 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18458 = arith.constant 1 : index
    %dim_18459 = tensor.dim %20878, %c1_18458 : tensor<4x?x4096xf16>
    %20879 = flow.tensor.transfer %20878 : tensor<4x?x4096xf16>{%dim_18459} to #hal.device.promise<@__device_3>
    %20880 = torch_c.from_builtin_tensor %20879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20881 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18460 = arith.constant 1 : index
    %dim_18461 = tensor.dim %20881, %c1_18460 : tensor<4x?x4096xf16>
    %20882 = flow.tensor.transfer %20881 : tensor<4x?x4096xf16>{%dim_18461} to #hal.device.promise<@__device_3>
    %20883 = torch_c.from_builtin_tensor %20882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20884 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18462 = arith.constant 1 : index
    %dim_18463 = tensor.dim %20884, %c1_18462 : tensor<4x?x4096xf16>
    %20885 = flow.tensor.transfer %20884 : tensor<4x?x4096xf16>{%dim_18463} to #hal.device.promise<@__device_3>
    %20886 = torch_c.from_builtin_tensor %20885 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18464 = torch.constant.int 1
    %20887 = torch.aten.add.Tensor %20868, %20871, %int1_18464 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18465 = torch.constant.int 1
    %20888 = torch.aten.add.Tensor %20887, %20874, %int1_18465 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18466 = torch.constant.int 1
    %20889 = torch.aten.add.Tensor %20888, %20753, %int1_18466 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18467 = torch.constant.int 1
    %20890 = torch.aten.add.Tensor %20889, %20877, %int1_18467 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18468 = torch.constant.int 1
    %20891 = torch.aten.add.Tensor %20890, %20880, %int1_18468 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18469 = torch.constant.int 1
    %20892 = torch.aten.add.Tensor %20891, %20883, %int1_18469 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18470 = torch.constant.int 1
    %20893 = torch.aten.add.Tensor %20892, %20886, %int1_18470 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20894 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18471 = arith.constant 1 : index
    %dim_18472 = tensor.dim %20894, %c1_18471 : tensor<4x?x4096xf16>
    %20895 = flow.tensor.transfer %20894 : tensor<4x?x4096xf16>{%dim_18472} to #hal.device.promise<@__device_4>
    %20896 = torch_c.from_builtin_tensor %20895 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20897 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18473 = arith.constant 1 : index
    %dim_18474 = tensor.dim %20897, %c1_18473 : tensor<4x?x4096xf16>
    %20898 = flow.tensor.transfer %20897 : tensor<4x?x4096xf16>{%dim_18474} to #hal.device.promise<@__device_4>
    %20899 = torch_c.from_builtin_tensor %20898 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20900 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18475 = arith.constant 1 : index
    %dim_18476 = tensor.dim %20900, %c1_18475 : tensor<4x?x4096xf16>
    %20901 = flow.tensor.transfer %20900 : tensor<4x?x4096xf16>{%dim_18476} to #hal.device.promise<@__device_4>
    %20902 = torch_c.from_builtin_tensor %20901 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20903 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18477 = arith.constant 1 : index
    %dim_18478 = tensor.dim %20903, %c1_18477 : tensor<4x?x4096xf16>
    %20904 = flow.tensor.transfer %20903 : tensor<4x?x4096xf16>{%dim_18478} to #hal.device.promise<@__device_4>
    %20905 = torch_c.from_builtin_tensor %20904 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20906 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18479 = arith.constant 1 : index
    %dim_18480 = tensor.dim %20906, %c1_18479 : tensor<4x?x4096xf16>
    %20907 = flow.tensor.transfer %20906 : tensor<4x?x4096xf16>{%dim_18480} to #hal.device.promise<@__device_4>
    %20908 = torch_c.from_builtin_tensor %20907 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20909 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18481 = arith.constant 1 : index
    %dim_18482 = tensor.dim %20909, %c1_18481 : tensor<4x?x4096xf16>
    %20910 = flow.tensor.transfer %20909 : tensor<4x?x4096xf16>{%dim_18482} to #hal.device.promise<@__device_4>
    %20911 = torch_c.from_builtin_tensor %20910 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20912 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18483 = arith.constant 1 : index
    %dim_18484 = tensor.dim %20912, %c1_18483 : tensor<4x?x4096xf16>
    %20913 = flow.tensor.transfer %20912 : tensor<4x?x4096xf16>{%dim_18484} to #hal.device.promise<@__device_4>
    %20914 = torch_c.from_builtin_tensor %20913 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18485 = torch.constant.int 1
    %20915 = torch.aten.add.Tensor %20896, %20899, %int1_18485 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18486 = torch.constant.int 1
    %20916 = torch.aten.add.Tensor %20915, %20902, %int1_18486 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18487 = torch.constant.int 1
    %20917 = torch.aten.add.Tensor %20916, %20905, %int1_18487 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18488 = torch.constant.int 1
    %20918 = torch.aten.add.Tensor %20917, %20760, %int1_18488 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18489 = torch.constant.int 1
    %20919 = torch.aten.add.Tensor %20918, %20908, %int1_18489 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18490 = torch.constant.int 1
    %20920 = torch.aten.add.Tensor %20919, %20911, %int1_18490 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18491 = torch.constant.int 1
    %20921 = torch.aten.add.Tensor %20920, %20914, %int1_18491 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20922 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18492 = arith.constant 1 : index
    %dim_18493 = tensor.dim %20922, %c1_18492 : tensor<4x?x4096xf16>
    %20923 = flow.tensor.transfer %20922 : tensor<4x?x4096xf16>{%dim_18493} to #hal.device.promise<@__device_5>
    %20924 = torch_c.from_builtin_tensor %20923 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20925 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18494 = arith.constant 1 : index
    %dim_18495 = tensor.dim %20925, %c1_18494 : tensor<4x?x4096xf16>
    %20926 = flow.tensor.transfer %20925 : tensor<4x?x4096xf16>{%dim_18495} to #hal.device.promise<@__device_5>
    %20927 = torch_c.from_builtin_tensor %20926 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20928 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18496 = arith.constant 1 : index
    %dim_18497 = tensor.dim %20928, %c1_18496 : tensor<4x?x4096xf16>
    %20929 = flow.tensor.transfer %20928 : tensor<4x?x4096xf16>{%dim_18497} to #hal.device.promise<@__device_5>
    %20930 = torch_c.from_builtin_tensor %20929 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20931 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18498 = arith.constant 1 : index
    %dim_18499 = tensor.dim %20931, %c1_18498 : tensor<4x?x4096xf16>
    %20932 = flow.tensor.transfer %20931 : tensor<4x?x4096xf16>{%dim_18499} to #hal.device.promise<@__device_5>
    %20933 = torch_c.from_builtin_tensor %20932 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20934 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18500 = arith.constant 1 : index
    %dim_18501 = tensor.dim %20934, %c1_18500 : tensor<4x?x4096xf16>
    %20935 = flow.tensor.transfer %20934 : tensor<4x?x4096xf16>{%dim_18501} to #hal.device.promise<@__device_5>
    %20936 = torch_c.from_builtin_tensor %20935 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20937 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18502 = arith.constant 1 : index
    %dim_18503 = tensor.dim %20937, %c1_18502 : tensor<4x?x4096xf16>
    %20938 = flow.tensor.transfer %20937 : tensor<4x?x4096xf16>{%dim_18503} to #hal.device.promise<@__device_5>
    %20939 = torch_c.from_builtin_tensor %20938 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20940 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18504 = arith.constant 1 : index
    %dim_18505 = tensor.dim %20940, %c1_18504 : tensor<4x?x4096xf16>
    %20941 = flow.tensor.transfer %20940 : tensor<4x?x4096xf16>{%dim_18505} to #hal.device.promise<@__device_5>
    %20942 = torch_c.from_builtin_tensor %20941 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18506 = torch.constant.int 1
    %20943 = torch.aten.add.Tensor %20924, %20927, %int1_18506 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18507 = torch.constant.int 1
    %20944 = torch.aten.add.Tensor %20943, %20930, %int1_18507 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18508 = torch.constant.int 1
    %20945 = torch.aten.add.Tensor %20944, %20933, %int1_18508 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18509 = torch.constant.int 1
    %20946 = torch.aten.add.Tensor %20945, %20936, %int1_18509 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18510 = torch.constant.int 1
    %20947 = torch.aten.add.Tensor %20946, %20767, %int1_18510 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18511 = torch.constant.int 1
    %20948 = torch.aten.add.Tensor %20947, %20939, %int1_18511 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18512 = torch.constant.int 1
    %20949 = torch.aten.add.Tensor %20948, %20942, %int1_18512 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20950 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18513 = arith.constant 1 : index
    %dim_18514 = tensor.dim %20950, %c1_18513 : tensor<4x?x4096xf16>
    %20951 = flow.tensor.transfer %20950 : tensor<4x?x4096xf16>{%dim_18514} to #hal.device.promise<@__device_6>
    %20952 = torch_c.from_builtin_tensor %20951 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20953 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18515 = arith.constant 1 : index
    %dim_18516 = tensor.dim %20953, %c1_18515 : tensor<4x?x4096xf16>
    %20954 = flow.tensor.transfer %20953 : tensor<4x?x4096xf16>{%dim_18516} to #hal.device.promise<@__device_6>
    %20955 = torch_c.from_builtin_tensor %20954 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20956 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18517 = arith.constant 1 : index
    %dim_18518 = tensor.dim %20956, %c1_18517 : tensor<4x?x4096xf16>
    %20957 = flow.tensor.transfer %20956 : tensor<4x?x4096xf16>{%dim_18518} to #hal.device.promise<@__device_6>
    %20958 = torch_c.from_builtin_tensor %20957 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20959 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18519 = arith.constant 1 : index
    %dim_18520 = tensor.dim %20959, %c1_18519 : tensor<4x?x4096xf16>
    %20960 = flow.tensor.transfer %20959 : tensor<4x?x4096xf16>{%dim_18520} to #hal.device.promise<@__device_6>
    %20961 = torch_c.from_builtin_tensor %20960 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20962 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18521 = arith.constant 1 : index
    %dim_18522 = tensor.dim %20962, %c1_18521 : tensor<4x?x4096xf16>
    %20963 = flow.tensor.transfer %20962 : tensor<4x?x4096xf16>{%dim_18522} to #hal.device.promise<@__device_6>
    %20964 = torch_c.from_builtin_tensor %20963 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20965 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18523 = arith.constant 1 : index
    %dim_18524 = tensor.dim %20965, %c1_18523 : tensor<4x?x4096xf16>
    %20966 = flow.tensor.transfer %20965 : tensor<4x?x4096xf16>{%dim_18524} to #hal.device.promise<@__device_6>
    %20967 = torch_c.from_builtin_tensor %20966 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20968 = torch_c.to_builtin_tensor %20781 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18525 = arith.constant 1 : index
    %dim_18526 = tensor.dim %20968, %c1_18525 : tensor<4x?x4096xf16>
    %20969 = flow.tensor.transfer %20968 : tensor<4x?x4096xf16>{%dim_18526} to #hal.device.promise<@__device_6>
    %20970 = torch_c.from_builtin_tensor %20969 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18527 = torch.constant.int 1
    %20971 = torch.aten.add.Tensor %20952, %20955, %int1_18527 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18528 = torch.constant.int 1
    %20972 = torch.aten.add.Tensor %20971, %20958, %int1_18528 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18529 = torch.constant.int 1
    %20973 = torch.aten.add.Tensor %20972, %20961, %int1_18529 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18530 = torch.constant.int 1
    %20974 = torch.aten.add.Tensor %20973, %20964, %int1_18530 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18531 = torch.constant.int 1
    %20975 = torch.aten.add.Tensor %20974, %20967, %int1_18531 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18532 = torch.constant.int 1
    %20976 = torch.aten.add.Tensor %20975, %20774, %int1_18532 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18533 = torch.constant.int 1
    %20977 = torch.aten.add.Tensor %20976, %20970, %int1_18533 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20978 = torch_c.to_builtin_tensor %20732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18534 = arith.constant 1 : index
    %dim_18535 = tensor.dim %20978, %c1_18534 : tensor<4x?x4096xf16>
    %20979 = flow.tensor.transfer %20978 : tensor<4x?x4096xf16>{%dim_18535} to #hal.device.promise<@__device_7>
    %20980 = torch_c.from_builtin_tensor %20979 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20981 = torch_c.to_builtin_tensor %20739 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18536 = arith.constant 1 : index
    %dim_18537 = tensor.dim %20981, %c1_18536 : tensor<4x?x4096xf16>
    %20982 = flow.tensor.transfer %20981 : tensor<4x?x4096xf16>{%dim_18537} to #hal.device.promise<@__device_7>
    %20983 = torch_c.from_builtin_tensor %20982 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20984 = torch_c.to_builtin_tensor %20746 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18538 = arith.constant 1 : index
    %dim_18539 = tensor.dim %20984, %c1_18538 : tensor<4x?x4096xf16>
    %20985 = flow.tensor.transfer %20984 : tensor<4x?x4096xf16>{%dim_18539} to #hal.device.promise<@__device_7>
    %20986 = torch_c.from_builtin_tensor %20985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20987 = torch_c.to_builtin_tensor %20753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18540 = arith.constant 1 : index
    %dim_18541 = tensor.dim %20987, %c1_18540 : tensor<4x?x4096xf16>
    %20988 = flow.tensor.transfer %20987 : tensor<4x?x4096xf16>{%dim_18541} to #hal.device.promise<@__device_7>
    %20989 = torch_c.from_builtin_tensor %20988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20990 = torch_c.to_builtin_tensor %20760 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18542 = arith.constant 1 : index
    %dim_18543 = tensor.dim %20990, %c1_18542 : tensor<4x?x4096xf16>
    %20991 = flow.tensor.transfer %20990 : tensor<4x?x4096xf16>{%dim_18543} to #hal.device.promise<@__device_7>
    %20992 = torch_c.from_builtin_tensor %20991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20993 = torch_c.to_builtin_tensor %20767 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18544 = arith.constant 1 : index
    %dim_18545 = tensor.dim %20993, %c1_18544 : tensor<4x?x4096xf16>
    %20994 = flow.tensor.transfer %20993 : tensor<4x?x4096xf16>{%dim_18545} to #hal.device.promise<@__device_7>
    %20995 = torch_c.from_builtin_tensor %20994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %20996 = torch_c.to_builtin_tensor %20774 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_18546 = arith.constant 1 : index
    %dim_18547 = tensor.dim %20996, %c1_18546 : tensor<4x?x4096xf16>
    %20997 = flow.tensor.transfer %20996 : tensor<4x?x4096xf16>{%dim_18547} to #hal.device.promise<@__device_7>
    %20998 = torch_c.from_builtin_tensor %20997 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18548 = torch.constant.int 1
    %20999 = torch.aten.add.Tensor %20980, %20983, %int1_18548 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %20999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18549 = torch.constant.int 1
    %21000 = torch.aten.add.Tensor %20999, %20986, %int1_18549 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18550 = torch.constant.int 1
    %21001 = torch.aten.add.Tensor %21000, %20989, %int1_18550 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18551 = torch.constant.int 1
    %21002 = torch.aten.add.Tensor %21001, %20992, %int1_18551 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18552 = torch.constant.int 1
    %21003 = torch.aten.add.Tensor %21002, %20995, %int1_18552 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18553 = torch.constant.int 1
    %21004 = torch.aten.add.Tensor %21003, %20998, %int1_18553 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18554 = torch.constant.int 1
    %21005 = torch.aten.add.Tensor %21004, %20781, %int1_18554 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18555 = torch.constant.int 1
    %21006 = torch.aten.add.Tensor %20486, %20809, %int1_18555 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18556 = torch.constant.int 1
    %21007 = torch.aten.add.Tensor %20487, %20837, %int1_18556 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18557 = torch.constant.int 1
    %21008 = torch.aten.add.Tensor %20488, %20865, %int1_18557 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18558 = torch.constant.int 1
    %21009 = torch.aten.add.Tensor %20489, %20893, %int1_18558 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18559 = torch.constant.int 1
    %21010 = torch.aten.add.Tensor %20490, %20921, %int1_18559 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18560 = torch.constant.int 1
    %21011 = torch.aten.add.Tensor %20491, %20949, %int1_18560 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18561 = torch.constant.int 1
    %21012 = torch.aten.add.Tensor %20492, %20977, %int1_18561 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18562 = torch.constant.int 1
    %21013 = torch.aten.add.Tensor %20493, %21005, %int1_18562 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_18563 = torch.constant.int 6
    %21014 = torch.prims.convert_element_type %21006, %int6_18563 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18564 = torch.constant.int 6
    %21015 = torch.prims.convert_element_type %21007, %int6_18564 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18565 = torch.constant.int 6
    %21016 = torch.prims.convert_element_type %21008, %int6_18565 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18566 = torch.constant.int 6
    %21017 = torch.prims.convert_element_type %21009, %int6_18566 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18567 = torch.constant.int 6
    %21018 = torch.prims.convert_element_type %21010, %int6_18567 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18568 = torch.constant.int 6
    %21019 = torch.prims.convert_element_type %21011, %int6_18568 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18569 = torch.constant.int 6
    %21020 = torch.prims.convert_element_type %21012, %int6_18569 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_18570 = torch.constant.int 6
    %21021 = torch.prims.convert_element_type %21013, %int6_18570 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18571 = torch.constant.int 2
    %21022 = torch.aten.pow.Tensor_Scalar %21014, %int2_18571 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18572 = torch.constant.int 2
    %21023 = torch.aten.pow.Tensor_Scalar %21015, %int2_18572 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18573 = torch.constant.int 2
    %21024 = torch.aten.pow.Tensor_Scalar %21016, %int2_18573 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18574 = torch.constant.int 2
    %21025 = torch.aten.pow.Tensor_Scalar %21017, %int2_18574 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18575 = torch.constant.int 2
    %21026 = torch.aten.pow.Tensor_Scalar %21018, %int2_18575 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18576 = torch.constant.int 2
    %21027 = torch.aten.pow.Tensor_Scalar %21019, %int2_18576 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18577 = torch.constant.int 2
    %21028 = torch.aten.pow.Tensor_Scalar %21020, %int2_18577 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_18578 = torch.constant.int 2
    %21029 = torch.aten.pow.Tensor_Scalar %21021, %int2_18578 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_18579 = torch.constant.int -1
    %21030 = torch.prim.ListConstruct %int-1_18579 : (!torch.int) -> !torch.list<int>
    %true_18580 = torch.constant.bool true
    %none_18581 = torch.constant.none
    %21031 = torch.aten.mean.dim %21022, %21030, %true_18580, %none_18581 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18582 = torch.constant.int -1
    %21032 = torch.prim.ListConstruct %int-1_18582 : (!torch.int) -> !torch.list<int>
    %true_18583 = torch.constant.bool true
    %none_18584 = torch.constant.none
    %21033 = torch.aten.mean.dim %21023, %21032, %true_18583, %none_18584 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18585 = torch.constant.int -1
    %21034 = torch.prim.ListConstruct %int-1_18585 : (!torch.int) -> !torch.list<int>
    %true_18586 = torch.constant.bool true
    %none_18587 = torch.constant.none
    %21035 = torch.aten.mean.dim %21024, %21034, %true_18586, %none_18587 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18588 = torch.constant.int -1
    %21036 = torch.prim.ListConstruct %int-1_18588 : (!torch.int) -> !torch.list<int>
    %true_18589 = torch.constant.bool true
    %none_18590 = torch.constant.none
    %21037 = torch.aten.mean.dim %21025, %21036, %true_18589, %none_18590 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18591 = torch.constant.int -1
    %21038 = torch.prim.ListConstruct %int-1_18591 : (!torch.int) -> !torch.list<int>
    %true_18592 = torch.constant.bool true
    %none_18593 = torch.constant.none
    %21039 = torch.aten.mean.dim %21026, %21038, %true_18592, %none_18593 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18594 = torch.constant.int -1
    %21040 = torch.prim.ListConstruct %int-1_18594 : (!torch.int) -> !torch.list<int>
    %true_18595 = torch.constant.bool true
    %none_18596 = torch.constant.none
    %21041 = torch.aten.mean.dim %21027, %21040, %true_18595, %none_18596 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18597 = torch.constant.int -1
    %21042 = torch.prim.ListConstruct %int-1_18597 : (!torch.int) -> !torch.list<int>
    %true_18598 = torch.constant.bool true
    %none_18599 = torch.constant.none
    %21043 = torch.aten.mean.dim %21028, %21042, %true_18598, %none_18599 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_18600 = torch.constant.int -1
    %21044 = torch.prim.ListConstruct %int-1_18600 : (!torch.int) -> !torch.list<int>
    %true_18601 = torch.constant.bool true
    %none_18602 = torch.constant.none
    %21045 = torch.aten.mean.dim %21029, %21044, %true_18601, %none_18602 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18603 = torch.constant.float 9.9999997473787516E-6
    %int1_18604 = torch.constant.int 1
    %21046 = torch.aten.add.Scalar %21031, %float9.999990e-06_18603, %int1_18604 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18605 = torch.constant.float 9.9999997473787516E-6
    %int1_18606 = torch.constant.int 1
    %21047 = torch.aten.add.Scalar %21033, %float9.999990e-06_18605, %int1_18606 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18607 = torch.constant.float 9.9999997473787516E-6
    %int1_18608 = torch.constant.int 1
    %21048 = torch.aten.add.Scalar %21035, %float9.999990e-06_18607, %int1_18608 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18609 = torch.constant.float 9.9999997473787516E-6
    %int1_18610 = torch.constant.int 1
    %21049 = torch.aten.add.Scalar %21037, %float9.999990e-06_18609, %int1_18610 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18611 = torch.constant.float 9.9999997473787516E-6
    %int1_18612 = torch.constant.int 1
    %21050 = torch.aten.add.Scalar %21039, %float9.999990e-06_18611, %int1_18612 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18613 = torch.constant.float 9.9999997473787516E-6
    %int1_18614 = torch.constant.int 1
    %21051 = torch.aten.add.Scalar %21041, %float9.999990e-06_18613, %int1_18614 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18615 = torch.constant.float 9.9999997473787516E-6
    %int1_18616 = torch.constant.int 1
    %21052 = torch.aten.add.Scalar %21043, %float9.999990e-06_18615, %int1_18616 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_18617 = torch.constant.float 9.9999997473787516E-6
    %int1_18618 = torch.constant.int 1
    %21053 = torch.aten.add.Scalar %21045, %float9.999990e-06_18617, %int1_18618 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21054 = torch.aten.rsqrt %21046 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21055 = torch.aten.rsqrt %21047 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21056 = torch.aten.rsqrt %21048 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21057 = torch.aten.rsqrt %21049 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21058 = torch.aten.rsqrt %21050 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21059 = torch.aten.rsqrt %21051 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21060 = torch.aten.rsqrt %21052 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21061 = torch.aten.rsqrt %21053 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %21061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %21062 = torch.aten.mul.Tensor %21014, %21054 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21063 = torch.aten.mul.Tensor %21015, %21055 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21064 = torch.aten.mul.Tensor %21016, %21056 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21065 = torch.aten.mul.Tensor %21017, %21057 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21066 = torch.aten.mul.Tensor %21018, %21058 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21067 = torch.aten.mul.Tensor %21019, %21059 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21068 = torch.aten.mul.Tensor %21020, %21060 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21069 = torch.aten.mul.Tensor %21021, %21061 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21070 = torch.aten.mul.Tensor %728, %21062 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21071 = torch.aten.mul.Tensor %729, %21063 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21072 = torch.aten.mul.Tensor %730, %21064 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21073 = torch.aten.mul.Tensor %731, %21065 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21074 = torch.aten.mul.Tensor %732, %21066 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21075 = torch.aten.mul.Tensor %733, %21067 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21076 = torch.aten.mul.Tensor %734, %21068 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %21077 = torch.aten.mul.Tensor %735, %21069 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %21077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_18619 = torch.constant.int 5
    %21078 = torch.prims.convert_element_type %21070, %int5_18619 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18620 = torch.constant.int 5
    %21079 = torch.prims.convert_element_type %21071, %int5_18620 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18621 = torch.constant.int 5
    %21080 = torch.prims.convert_element_type %21072, %int5_18621 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18622 = torch.constant.int 5
    %21081 = torch.prims.convert_element_type %21073, %int5_18622 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18623 = torch.constant.int 5
    %21082 = torch.prims.convert_element_type %21074, %int5_18623 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18624 = torch.constant.int 5
    %21083 = torch.prims.convert_element_type %21075, %int5_18624 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18625 = torch.constant.int 5
    %21084 = torch.prims.convert_element_type %21076, %int5_18625 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_18626 = torch.constant.int 5
    %21085 = torch.prims.convert_element_type %21077, %int5_18626 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %21085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_18627 = torch.constant.int 1
    %int0_18628 = torch.constant.int 0
    %21086 = torch.prim.ListConstruct %int1_18627, %int0_18628 : (!torch.int, !torch.int) -> !torch.list<int>
    %21087 = torch.aten.permute %736, %21086 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18629 = torch.constant.int 1
    %int0_18630 = torch.constant.int 0
    %21088 = torch.prim.ListConstruct %int1_18629, %int0_18630 : (!torch.int, !torch.int) -> !torch.list<int>
    %21089 = torch.aten.permute %737, %21088 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18631 = torch.constant.int 1
    %int0_18632 = torch.constant.int 0
    %21090 = torch.prim.ListConstruct %int1_18631, %int0_18632 : (!torch.int, !torch.int) -> !torch.list<int>
    %21091 = torch.aten.permute %738, %21090 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18633 = torch.constant.int 1
    %int0_18634 = torch.constant.int 0
    %21092 = torch.prim.ListConstruct %int1_18633, %int0_18634 : (!torch.int, !torch.int) -> !torch.list<int>
    %21093 = torch.aten.permute %739, %21092 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18635 = torch.constant.int 1
    %int0_18636 = torch.constant.int 0
    %21094 = torch.prim.ListConstruct %int1_18635, %int0_18636 : (!torch.int, !torch.int) -> !torch.list<int>
    %21095 = torch.aten.permute %740, %21094 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18637 = torch.constant.int 1
    %int0_18638 = torch.constant.int 0
    %21096 = torch.prim.ListConstruct %int1_18637, %int0_18638 : (!torch.int, !torch.int) -> !torch.list<int>
    %21097 = torch.aten.permute %741, %21096 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18639 = torch.constant.int 1
    %int0_18640 = torch.constant.int 0
    %21098 = torch.prim.ListConstruct %int1_18639, %int0_18640 : (!torch.int, !torch.int) -> !torch.list<int>
    %21099 = torch.aten.permute %742, %21098 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_18641 = torch.constant.int 1
    %int0_18642 = torch.constant.int 0
    %21100 = torch.prim.ListConstruct %int1_18641, %int0_18642 : (!torch.int, !torch.int) -> !torch.list<int>
    %21101 = torch.aten.permute %743, %21100 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_18643 = torch.constant.int 4
    %21102 = torch.aten.mul.int %int4_18643, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18644 = torch.constant.int 4096
    %21103 = torch.prim.ListConstruct %21102, %int4096_18644 : (!torch.int, !torch.int) -> !torch.list<int>
    %21104 = torch.aten.view %21078, %21103 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21104, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21105 = torch.aten.mm %21104, %21087 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21105, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18645 = torch.constant.int 4
    %int512_18646 = torch.constant.int 512
    %21106 = torch.prim.ListConstruct %int4_18645, %2482, %int512_18646 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21107 = torch.aten.view %21105, %21106 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18647 = torch.constant.int 4
    %21108 = torch.aten.mul.int %int4_18647, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18648 = torch.constant.int 4096
    %21109 = torch.prim.ListConstruct %21108, %int4096_18648 : (!torch.int, !torch.int) -> !torch.list<int>
    %21110 = torch.aten.view %21079, %21109 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21110, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21111 = torch.aten.mm %21110, %21089 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21111, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18649 = torch.constant.int 4
    %int512_18650 = torch.constant.int 512
    %21112 = torch.prim.ListConstruct %int4_18649, %2482, %int512_18650 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21113 = torch.aten.view %21111, %21112 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18651 = torch.constant.int 4
    %21114 = torch.aten.mul.int %int4_18651, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18652 = torch.constant.int 4096
    %21115 = torch.prim.ListConstruct %21114, %int4096_18652 : (!torch.int, !torch.int) -> !torch.list<int>
    %21116 = torch.aten.view %21080, %21115 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21116, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21117 = torch.aten.mm %21116, %21091 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21117, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18653 = torch.constant.int 4
    %int512_18654 = torch.constant.int 512
    %21118 = torch.prim.ListConstruct %int4_18653, %2482, %int512_18654 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21119 = torch.aten.view %21117, %21118 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18655 = torch.constant.int 4
    %21120 = torch.aten.mul.int %int4_18655, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18656 = torch.constant.int 4096
    %21121 = torch.prim.ListConstruct %21120, %int4096_18656 : (!torch.int, !torch.int) -> !torch.list<int>
    %21122 = torch.aten.view %21081, %21121 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21122, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21123 = torch.aten.mm %21122, %21093 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21123, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18657 = torch.constant.int 4
    %int512_18658 = torch.constant.int 512
    %21124 = torch.prim.ListConstruct %int4_18657, %2482, %int512_18658 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21125 = torch.aten.view %21123, %21124 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18659 = torch.constant.int 4
    %21126 = torch.aten.mul.int %int4_18659, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18660 = torch.constant.int 4096
    %21127 = torch.prim.ListConstruct %21126, %int4096_18660 : (!torch.int, !torch.int) -> !torch.list<int>
    %21128 = torch.aten.view %21082, %21127 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21128, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21129 = torch.aten.mm %21128, %21095 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21129, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18661 = torch.constant.int 4
    %int512_18662 = torch.constant.int 512
    %21130 = torch.prim.ListConstruct %int4_18661, %2482, %int512_18662 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21131 = torch.aten.view %21129, %21130 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18663 = torch.constant.int 4
    %21132 = torch.aten.mul.int %int4_18663, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18664 = torch.constant.int 4096
    %21133 = torch.prim.ListConstruct %21132, %int4096_18664 : (!torch.int, !torch.int) -> !torch.list<int>
    %21134 = torch.aten.view %21083, %21133 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21134, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21135 = torch.aten.mm %21134, %21097 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21135, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18665 = torch.constant.int 4
    %int512_18666 = torch.constant.int 512
    %21136 = torch.prim.ListConstruct %int4_18665, %2482, %int512_18666 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21137 = torch.aten.view %21135, %21136 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18667 = torch.constant.int 4
    %21138 = torch.aten.mul.int %int4_18667, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18668 = torch.constant.int 4096
    %21139 = torch.prim.ListConstruct %21138, %int4096_18668 : (!torch.int, !torch.int) -> !torch.list<int>
    %21140 = torch.aten.view %21084, %21139 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21140, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21141 = torch.aten.mm %21140, %21099 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21141, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18669 = torch.constant.int 4
    %int512_18670 = torch.constant.int 512
    %21142 = torch.prim.ListConstruct %int4_18669, %2482, %int512_18670 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21143 = torch.aten.view %21141, %21142 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_18671 = torch.constant.int 4
    %21144 = torch.aten.mul.int %int4_18671, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18672 = torch.constant.int 4096
    %21145 = torch.prim.ListConstruct %21144, %int4096_18672 : (!torch.int, !torch.int) -> !torch.list<int>
    %21146 = torch.aten.view %21085, %21145 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21146, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21147 = torch.aten.mm %21146, %21101 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %21147, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_18673 = torch.constant.int 4
    %int512_18674 = torch.constant.int 512
    %21148 = torch.prim.ListConstruct %int4_18673, %2482, %int512_18674 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21149 = torch.aten.view %21147, %21148 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %21149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_18675 = torch.constant.int 1
    %int0_18676 = torch.constant.int 0
    %21150 = torch.prim.ListConstruct %int1_18675, %int0_18676 : (!torch.int, !torch.int) -> !torch.list<int>
    %21151 = torch.aten.permute %744, %21150 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18677 = torch.constant.int 1
    %int0_18678 = torch.constant.int 0
    %21152 = torch.prim.ListConstruct %int1_18677, %int0_18678 : (!torch.int, !torch.int) -> !torch.list<int>
    %21153 = torch.aten.permute %745, %21152 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18679 = torch.constant.int 1
    %int0_18680 = torch.constant.int 0
    %21154 = torch.prim.ListConstruct %int1_18679, %int0_18680 : (!torch.int, !torch.int) -> !torch.list<int>
    %21155 = torch.aten.permute %746, %21154 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18681 = torch.constant.int 1
    %int0_18682 = torch.constant.int 0
    %21156 = torch.prim.ListConstruct %int1_18681, %int0_18682 : (!torch.int, !torch.int) -> !torch.list<int>
    %21157 = torch.aten.permute %747, %21156 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18683 = torch.constant.int 1
    %int0_18684 = torch.constant.int 0
    %21158 = torch.prim.ListConstruct %int1_18683, %int0_18684 : (!torch.int, !torch.int) -> !torch.list<int>
    %21159 = torch.aten.permute %748, %21158 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18685 = torch.constant.int 1
    %int0_18686 = torch.constant.int 0
    %21160 = torch.prim.ListConstruct %int1_18685, %int0_18686 : (!torch.int, !torch.int) -> !torch.list<int>
    %21161 = torch.aten.permute %749, %21160 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18687 = torch.constant.int 1
    %int0_18688 = torch.constant.int 0
    %21162 = torch.prim.ListConstruct %int1_18687, %int0_18688 : (!torch.int, !torch.int) -> !torch.list<int>
    %21163 = torch.aten.permute %750, %21162 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18689 = torch.constant.int 1
    %int0_18690 = torch.constant.int 0
    %21164 = torch.prim.ListConstruct %int1_18689, %int0_18690 : (!torch.int, !torch.int) -> !torch.list<int>
    %21165 = torch.aten.permute %751, %21164 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_18691 = torch.constant.int 4
    %21166 = torch.aten.mul.int %int4_18691, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18692 = torch.constant.int 4096
    %21167 = torch.prim.ListConstruct %21166, %int4096_18692 : (!torch.int, !torch.int) -> !torch.list<int>
    %21168 = torch.aten.view %21078, %21167 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21168, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21169 = torch.aten.mm %21168, %21151 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21169, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18693 = torch.constant.int 4
    %int128_18694 = torch.constant.int 128
    %21170 = torch.prim.ListConstruct %int4_18693, %2482, %int128_18694 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21171 = torch.aten.view %21169, %21170 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18695 = torch.constant.int 4
    %21172 = torch.aten.mul.int %int4_18695, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18696 = torch.constant.int 4096
    %21173 = torch.prim.ListConstruct %21172, %int4096_18696 : (!torch.int, !torch.int) -> !torch.list<int>
    %21174 = torch.aten.view %21079, %21173 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21174, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21175 = torch.aten.mm %21174, %21153 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21175, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18697 = torch.constant.int 4
    %int128_18698 = torch.constant.int 128
    %21176 = torch.prim.ListConstruct %int4_18697, %2482, %int128_18698 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21177 = torch.aten.view %21175, %21176 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18699 = torch.constant.int 4
    %21178 = torch.aten.mul.int %int4_18699, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18700 = torch.constant.int 4096
    %21179 = torch.prim.ListConstruct %21178, %int4096_18700 : (!torch.int, !torch.int) -> !torch.list<int>
    %21180 = torch.aten.view %21080, %21179 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21180, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21181 = torch.aten.mm %21180, %21155 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21181, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18701 = torch.constant.int 4
    %int128_18702 = torch.constant.int 128
    %21182 = torch.prim.ListConstruct %int4_18701, %2482, %int128_18702 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21183 = torch.aten.view %21181, %21182 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18703 = torch.constant.int 4
    %21184 = torch.aten.mul.int %int4_18703, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18704 = torch.constant.int 4096
    %21185 = torch.prim.ListConstruct %21184, %int4096_18704 : (!torch.int, !torch.int) -> !torch.list<int>
    %21186 = torch.aten.view %21081, %21185 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21186, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21187 = torch.aten.mm %21186, %21157 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21187, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18705 = torch.constant.int 4
    %int128_18706 = torch.constant.int 128
    %21188 = torch.prim.ListConstruct %int4_18705, %2482, %int128_18706 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21189 = torch.aten.view %21187, %21188 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18707 = torch.constant.int 4
    %21190 = torch.aten.mul.int %int4_18707, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18708 = torch.constant.int 4096
    %21191 = torch.prim.ListConstruct %21190, %int4096_18708 : (!torch.int, !torch.int) -> !torch.list<int>
    %21192 = torch.aten.view %21082, %21191 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21192, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21193 = torch.aten.mm %21192, %21159 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21193, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18709 = torch.constant.int 4
    %int128_18710 = torch.constant.int 128
    %21194 = torch.prim.ListConstruct %int4_18709, %2482, %int128_18710 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21195 = torch.aten.view %21193, %21194 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18711 = torch.constant.int 4
    %21196 = torch.aten.mul.int %int4_18711, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18712 = torch.constant.int 4096
    %21197 = torch.prim.ListConstruct %21196, %int4096_18712 : (!torch.int, !torch.int) -> !torch.list<int>
    %21198 = torch.aten.view %21083, %21197 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21198, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21199 = torch.aten.mm %21198, %21161 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21199, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18713 = torch.constant.int 4
    %int128_18714 = torch.constant.int 128
    %21200 = torch.prim.ListConstruct %int4_18713, %2482, %int128_18714 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21201 = torch.aten.view %21199, %21200 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18715 = torch.constant.int 4
    %21202 = torch.aten.mul.int %int4_18715, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18716 = torch.constant.int 4096
    %21203 = torch.prim.ListConstruct %21202, %int4096_18716 : (!torch.int, !torch.int) -> !torch.list<int>
    %21204 = torch.aten.view %21084, %21203 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21204, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21205 = torch.aten.mm %21204, %21163 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21205, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18717 = torch.constant.int 4
    %int128_18718 = torch.constant.int 128
    %21206 = torch.prim.ListConstruct %int4_18717, %2482, %int128_18718 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21207 = torch.aten.view %21205, %21206 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18719 = torch.constant.int 4
    %21208 = torch.aten.mul.int %int4_18719, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18720 = torch.constant.int 4096
    %21209 = torch.prim.ListConstruct %21208, %int4096_18720 : (!torch.int, !torch.int) -> !torch.list<int>
    %21210 = torch.aten.view %21085, %21209 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21210, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21211 = torch.aten.mm %21210, %21165 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21211, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18721 = torch.constant.int 4
    %int128_18722 = torch.constant.int 128
    %21212 = torch.prim.ListConstruct %int4_18721, %2482, %int128_18722 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21213 = torch.aten.view %21211, %21212 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_18723 = torch.constant.int 1
    %int0_18724 = torch.constant.int 0
    %21214 = torch.prim.ListConstruct %int1_18723, %int0_18724 : (!torch.int, !torch.int) -> !torch.list<int>
    %21215 = torch.aten.permute %752, %21214 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18725 = torch.constant.int 1
    %int0_18726 = torch.constant.int 0
    %21216 = torch.prim.ListConstruct %int1_18725, %int0_18726 : (!torch.int, !torch.int) -> !torch.list<int>
    %21217 = torch.aten.permute %753, %21216 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18727 = torch.constant.int 1
    %int0_18728 = torch.constant.int 0
    %21218 = torch.prim.ListConstruct %int1_18727, %int0_18728 : (!torch.int, !torch.int) -> !torch.list<int>
    %21219 = torch.aten.permute %754, %21218 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18729 = torch.constant.int 1
    %int0_18730 = torch.constant.int 0
    %21220 = torch.prim.ListConstruct %int1_18729, %int0_18730 : (!torch.int, !torch.int) -> !torch.list<int>
    %21221 = torch.aten.permute %755, %21220 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18731 = torch.constant.int 1
    %int0_18732 = torch.constant.int 0
    %21222 = torch.prim.ListConstruct %int1_18731, %int0_18732 : (!torch.int, !torch.int) -> !torch.list<int>
    %21223 = torch.aten.permute %756, %21222 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18733 = torch.constant.int 1
    %int0_18734 = torch.constant.int 0
    %21224 = torch.prim.ListConstruct %int1_18733, %int0_18734 : (!torch.int, !torch.int) -> !torch.list<int>
    %21225 = torch.aten.permute %757, %21224 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18735 = torch.constant.int 1
    %int0_18736 = torch.constant.int 0
    %21226 = torch.prim.ListConstruct %int1_18735, %int0_18736 : (!torch.int, !torch.int) -> !torch.list<int>
    %21227 = torch.aten.permute %758, %21226 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_18737 = torch.constant.int 1
    %int0_18738 = torch.constant.int 0
    %21228 = torch.prim.ListConstruct %int1_18737, %int0_18738 : (!torch.int, !torch.int) -> !torch.list<int>
    %21229 = torch.aten.permute %759, %21228 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_18739 = torch.constant.int 4
    %21230 = torch.aten.mul.int %int4_18739, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18740 = torch.constant.int 4096
    %21231 = torch.prim.ListConstruct %21230, %int4096_18740 : (!torch.int, !torch.int) -> !torch.list<int>
    %21232 = torch.aten.view %21078, %21231 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21232, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21233 = torch.aten.mm %21232, %21215 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21233, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18741 = torch.constant.int 4
    %int128_18742 = torch.constant.int 128
    %21234 = torch.prim.ListConstruct %int4_18741, %2482, %int128_18742 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21235 = torch.aten.view %21233, %21234 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18743 = torch.constant.int 4
    %21236 = torch.aten.mul.int %int4_18743, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18744 = torch.constant.int 4096
    %21237 = torch.prim.ListConstruct %21236, %int4096_18744 : (!torch.int, !torch.int) -> !torch.list<int>
    %21238 = torch.aten.view %21079, %21237 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21238, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21239 = torch.aten.mm %21238, %21217 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21239, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18745 = torch.constant.int 4
    %int128_18746 = torch.constant.int 128
    %21240 = torch.prim.ListConstruct %int4_18745, %2482, %int128_18746 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21241 = torch.aten.view %21239, %21240 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18747 = torch.constant.int 4
    %21242 = torch.aten.mul.int %int4_18747, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18748 = torch.constant.int 4096
    %21243 = torch.prim.ListConstruct %21242, %int4096_18748 : (!torch.int, !torch.int) -> !torch.list<int>
    %21244 = torch.aten.view %21080, %21243 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21244, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21245 = torch.aten.mm %21244, %21219 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21245, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18749 = torch.constant.int 4
    %int128_18750 = torch.constant.int 128
    %21246 = torch.prim.ListConstruct %int4_18749, %2482, %int128_18750 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21247 = torch.aten.view %21245, %21246 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18751 = torch.constant.int 4
    %21248 = torch.aten.mul.int %int4_18751, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18752 = torch.constant.int 4096
    %21249 = torch.prim.ListConstruct %21248, %int4096_18752 : (!torch.int, !torch.int) -> !torch.list<int>
    %21250 = torch.aten.view %21081, %21249 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21250, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21251 = torch.aten.mm %21250, %21221 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21251, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18753 = torch.constant.int 4
    %int128_18754 = torch.constant.int 128
    %21252 = torch.prim.ListConstruct %int4_18753, %2482, %int128_18754 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21253 = torch.aten.view %21251, %21252 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18755 = torch.constant.int 4
    %21254 = torch.aten.mul.int %int4_18755, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18756 = torch.constant.int 4096
    %21255 = torch.prim.ListConstruct %21254, %int4096_18756 : (!torch.int, !torch.int) -> !torch.list<int>
    %21256 = torch.aten.view %21082, %21255 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21256, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21257 = torch.aten.mm %21256, %21223 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21257, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18757 = torch.constant.int 4
    %int128_18758 = torch.constant.int 128
    %21258 = torch.prim.ListConstruct %int4_18757, %2482, %int128_18758 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21259 = torch.aten.view %21257, %21258 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18759 = torch.constant.int 4
    %21260 = torch.aten.mul.int %int4_18759, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18760 = torch.constant.int 4096
    %21261 = torch.prim.ListConstruct %21260, %int4096_18760 : (!torch.int, !torch.int) -> !torch.list<int>
    %21262 = torch.aten.view %21083, %21261 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21262, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21263 = torch.aten.mm %21262, %21225 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21263, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18761 = torch.constant.int 4
    %int128_18762 = torch.constant.int 128
    %21264 = torch.prim.ListConstruct %int4_18761, %2482, %int128_18762 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21265 = torch.aten.view %21263, %21264 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18763 = torch.constant.int 4
    %21266 = torch.aten.mul.int %int4_18763, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18764 = torch.constant.int 4096
    %21267 = torch.prim.ListConstruct %21266, %int4096_18764 : (!torch.int, !torch.int) -> !torch.list<int>
    %21268 = torch.aten.view %21084, %21267 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21268, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21269 = torch.aten.mm %21268, %21227 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21269, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18765 = torch.constant.int 4
    %int128_18766 = torch.constant.int 128
    %21270 = torch.prim.ListConstruct %int4_18765, %2482, %int128_18766 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21271 = torch.aten.view %21269, %21270 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18767 = torch.constant.int 4
    %21272 = torch.aten.mul.int %int4_18767, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_18768 = torch.constant.int 4096
    %21273 = torch.prim.ListConstruct %21272, %int4096_18768 : (!torch.int, !torch.int) -> !torch.list<int>
    %21274 = torch.aten.view %21085, %21273 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %21274, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %21275 = torch.aten.mm %21274, %21229 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %21275, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_18769 = torch.constant.int 4
    %int128_18770 = torch.constant.int 128
    %21276 = torch.prim.ListConstruct %int4_18769, %2482, %int128_18770 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21277 = torch.aten.view %21275, %21276 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %21277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_18771 = torch.constant.int 4
    %int4_18772 = torch.constant.int 4
    %int128_18773 = torch.constant.int 128
    %21278 = torch.prim.ListConstruct %int4_18771, %2482, %int4_18772, %int128_18773 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21279 = torch.aten.view %21107, %21278 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18774 = torch.constant.int 4
    %int4_18775 = torch.constant.int 4
    %int128_18776 = torch.constant.int 128
    %21280 = torch.prim.ListConstruct %int4_18774, %2482, %int4_18775, %int128_18776 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21281 = torch.aten.view %21113, %21280 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18777 = torch.constant.int 4
    %int4_18778 = torch.constant.int 4
    %int128_18779 = torch.constant.int 128
    %21282 = torch.prim.ListConstruct %int4_18777, %2482, %int4_18778, %int128_18779 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21283 = torch.aten.view %21119, %21282 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18780 = torch.constant.int 4
    %int4_18781 = torch.constant.int 4
    %int128_18782 = torch.constant.int 128
    %21284 = torch.prim.ListConstruct %int4_18780, %2482, %int4_18781, %int128_18782 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21285 = torch.aten.view %21125, %21284 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18783 = torch.constant.int 4
    %int4_18784 = torch.constant.int 4
    %int128_18785 = torch.constant.int 128
    %21286 = torch.prim.ListConstruct %int4_18783, %2482, %int4_18784, %int128_18785 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21287 = torch.aten.view %21131, %21286 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18786 = torch.constant.int 4
    %int4_18787 = torch.constant.int 4
    %int128_18788 = torch.constant.int 128
    %21288 = torch.prim.ListConstruct %int4_18786, %2482, %int4_18787, %int128_18788 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21289 = torch.aten.view %21137, %21288 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18789 = torch.constant.int 4
    %int4_18790 = torch.constant.int 4
    %int128_18791 = torch.constant.int 128
    %21290 = torch.prim.ListConstruct %int4_18789, %2482, %int4_18790, %int128_18791 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21291 = torch.aten.view %21143, %21290 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18792 = torch.constant.int 4
    %int4_18793 = torch.constant.int 4
    %int128_18794 = torch.constant.int 128
    %21292 = torch.prim.ListConstruct %int4_18792, %2482, %int4_18793, %int128_18794 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21293 = torch.aten.view %21149, %21292 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_18795 = torch.constant.int 4
    %int1_18796 = torch.constant.int 1
    %int128_18797 = torch.constant.int 128
    %21294 = torch.prim.ListConstruct %int4_18795, %2482, %int1_18796, %int128_18797 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21295 = torch.aten.view %21171, %21294 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18798 = torch.constant.int 4
    %int1_18799 = torch.constant.int 1
    %int128_18800 = torch.constant.int 128
    %21296 = torch.prim.ListConstruct %int4_18798, %2482, %int1_18799, %int128_18800 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21297 = torch.aten.view %21177, %21296 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18801 = torch.constant.int 4
    %int1_18802 = torch.constant.int 1
    %int128_18803 = torch.constant.int 128
    %21298 = torch.prim.ListConstruct %int4_18801, %2482, %int1_18802, %int128_18803 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21299 = torch.aten.view %21183, %21298 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18804 = torch.constant.int 4
    %int1_18805 = torch.constant.int 1
    %int128_18806 = torch.constant.int 128
    %21300 = torch.prim.ListConstruct %int4_18804, %2482, %int1_18805, %int128_18806 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21301 = torch.aten.view %21189, %21300 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18807 = torch.constant.int 4
    %int1_18808 = torch.constant.int 1
    %int128_18809 = torch.constant.int 128
    %21302 = torch.prim.ListConstruct %int4_18807, %2482, %int1_18808, %int128_18809 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21303 = torch.aten.view %21195, %21302 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18810 = torch.constant.int 4
    %int1_18811 = torch.constant.int 1
    %int128_18812 = torch.constant.int 128
    %21304 = torch.prim.ListConstruct %int4_18810, %2482, %int1_18811, %int128_18812 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21305 = torch.aten.view %21201, %21304 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18813 = torch.constant.int 4
    %int1_18814 = torch.constant.int 1
    %int128_18815 = torch.constant.int 128
    %21306 = torch.prim.ListConstruct %int4_18813, %2482, %int1_18814, %int128_18815 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21307 = torch.aten.view %21207, %21306 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18816 = torch.constant.int 4
    %int1_18817 = torch.constant.int 1
    %int128_18818 = torch.constant.int 128
    %21308 = torch.prim.ListConstruct %int4_18816, %2482, %int1_18817, %int128_18818 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21309 = torch.aten.view %21213, %21308 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18819 = torch.constant.int 4
    %int1_18820 = torch.constant.int 1
    %int128_18821 = torch.constant.int 128
    %21310 = torch.prim.ListConstruct %int4_18819, %2482, %int1_18820, %int128_18821 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21311 = torch.aten.view %21235, %21310 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18822 = torch.constant.int 4
    %int1_18823 = torch.constant.int 1
    %int128_18824 = torch.constant.int 128
    %21312 = torch.prim.ListConstruct %int4_18822, %2482, %int1_18823, %int128_18824 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21313 = torch.aten.view %21241, %21312 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18825 = torch.constant.int 4
    %int1_18826 = torch.constant.int 1
    %int128_18827 = torch.constant.int 128
    %21314 = torch.prim.ListConstruct %int4_18825, %2482, %int1_18826, %int128_18827 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21315 = torch.aten.view %21247, %21314 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18828 = torch.constant.int 4
    %int1_18829 = torch.constant.int 1
    %int128_18830 = torch.constant.int 128
    %21316 = torch.prim.ListConstruct %int4_18828, %2482, %int1_18829, %int128_18830 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21317 = torch.aten.view %21253, %21316 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18831 = torch.constant.int 4
    %int1_18832 = torch.constant.int 1
    %int128_18833 = torch.constant.int 128
    %21318 = torch.prim.ListConstruct %int4_18831, %2482, %int1_18832, %int128_18833 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21319 = torch.aten.view %21259, %21318 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18834 = torch.constant.int 4
    %int1_18835 = torch.constant.int 1
    %int128_18836 = torch.constant.int 128
    %21320 = torch.prim.ListConstruct %int4_18834, %2482, %int1_18835, %int128_18836 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21321 = torch.aten.view %21265, %21320 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18837 = torch.constant.int 4
    %int1_18838 = torch.constant.int 1
    %int128_18839 = torch.constant.int 128
    %21322 = torch.prim.ListConstruct %int4_18837, %2482, %int1_18838, %int128_18839 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21323 = torch.aten.view %21271, %21322 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_18840 = torch.constant.int 4
    %int1_18841 = torch.constant.int 1
    %int128_18842 = torch.constant.int 128
    %21324 = torch.prim.ListConstruct %int4_18840, %2482, %int1_18841, %int128_18842 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21325 = torch.aten.view %21277, %21324 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_18843 = torch.constant.int 131072
    %none_18844 = torch.constant.none
    %none_18845 = torch.constant.none
    %cpu_18846 = torch.constant.device "cpu"
    %false_18847 = torch.constant.bool false
    %21326 = torch.aten.arange %int131072_18843, %none_18844, %none_18845, %cpu_18846, %false_18847 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_18848 = torch.constant.int 0
    %int128_18849 = torch.constant.int 128
    %int2_18850 = torch.constant.int 2
    %none_18851 = torch.constant.none
    %none_18852 = torch.constant.none
    %cpu_18853 = torch.constant.device "cpu"
    %false_18854 = torch.constant.bool false
    %21327 = torch.aten.arange.start_step %int0_18848, %int128_18849, %int2_18850, %none_18851, %none_18852, %cpu_18853, %false_18854 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_18855 = torch.constant.int 0
    %int0_18856 = torch.constant.int 0
    %int64_18857 = torch.constant.int 64
    %int1_18858 = torch.constant.int 1
    %21328 = torch.aten.slice.Tensor %21327, %int0_18855, %int0_18856, %int64_18857, %int1_18858 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_18859 = torch.constant.int 6
    %21329 = torch.prims.convert_element_type %21328, %int6_18859 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_18860 = torch.constant.int 128
    %21330 = torch.aten.div.Scalar %21329, %int128_18860 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_18861 = torch.constant.float 5.000000e+05
    %21331 = torch.aten.pow.Scalar %float5.000000e05_18861, %21330 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %21332 = torch.aten.reciprocal %21331 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_18862 = torch.constant.float 1.000000e+00
    %21333 = torch.aten.mul.Scalar %21332, %float1.000000e00_18862 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_18863 = torch.constant.int 131072
    %int1_18864 = torch.constant.int 1
    %21334 = torch.prim.ListConstruct %int131072_18863, %int1_18864 : (!torch.int, !torch.int) -> !torch.list<int>
    %21335 = torch.aten.view %21326, %21334 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %21336 = torch.aten.mul.Tensor %21335, %21333 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %21337 = torch.aten.cos %21336 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %21338 = torch.aten.sin %21336 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %21339 = torch.aten.complex %21337, %21338 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %21340 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21341 = flow.tensor.transfer %21340 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %21342 = torch_c.from_builtin_tensor %21341 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21343 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21344 = flow.tensor.transfer %21343 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %21345 = torch_c.from_builtin_tensor %21344 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21346 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21347 = flow.tensor.transfer %21346 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %21348 = torch_c.from_builtin_tensor %21347 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21349 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21350 = flow.tensor.transfer %21349 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %21351 = torch_c.from_builtin_tensor %21350 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21352 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21353 = flow.tensor.transfer %21352 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %21354 = torch_c.from_builtin_tensor %21353 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21355 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21356 = flow.tensor.transfer %21355 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %21357 = torch_c.from_builtin_tensor %21356 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21358 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21359 = flow.tensor.transfer %21358 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %21360 = torch_c.from_builtin_tensor %21359 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21361 = torch_c.to_builtin_tensor %21339 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21362 = flow.tensor.transfer %21361 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %21363 = torch_c.from_builtin_tensor %21362 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_18865 = torch.constant.int 1
    %21364 = torch.aten.size.int %21107, %int1_18865 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18866 = torch.constant.int 0
    %21365 = torch.aten.add.int %int0_18866, %21364 : !torch.int, !torch.int -> !torch.int
    %int0_18867 = torch.constant.int 0
    %int0_18868 = torch.constant.int 0
    %int1_18869 = torch.constant.int 1
    %21366 = torch.aten.slice.Tensor %21342, %int0_18867, %int0_18868, %21365, %int1_18869 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21366, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18870 = torch.constant.int 1
    %int0_18871 = torch.constant.int 0
    %int9223372036854775807_18872 = torch.constant.int 9223372036854775807
    %int1_18873 = torch.constant.int 1
    %21367 = torch.aten.slice.Tensor %21366, %int1_18870, %int0_18871, %int9223372036854775807_18872, %int1_18873 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21367, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18874 = torch.constant.int 0
    %21368 = torch.aten.unsqueeze %21367, %int0_18874 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21368, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18875 = torch.constant.int 2
    %21369 = torch.aten.unsqueeze %21368, %int2_18875 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21369, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18876 = torch.constant.int 3
    %int0_18877 = torch.constant.int 0
    %int9223372036854775807_18878 = torch.constant.int 9223372036854775807
    %int1_18879 = torch.constant.int 1
    %21370 = torch.aten.slice.Tensor %21369, %int3_18876, %int0_18877, %int9223372036854775807_18878, %int1_18879 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21370, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21371 = torch_c.to_builtin_tensor %21279 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18880 = arith.constant 1 : index
    %dim_18881 = tensor.dim %21371, %c1_18880 : tensor<4x?x4x128xf16>
    %21372 = flow.tensor.bitcast %21371 : tensor<4x?x4x128xf16>{%dim_18881} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18881}
    %21373 = torch_c.from_builtin_tensor %21372 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21374 = torch.aten.mul.Tensor %21373, %21370 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21375 = torch_c.to_builtin_tensor %21374 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18882 = arith.constant 1 : index
    %dim_18883 = tensor.dim %21375, %c1_18882 : tensor<4x?x4x64xcomplex<f32>>
    %21376 = flow.tensor.bitcast %21375 : tensor<4x?x4x64xcomplex<f32>>{%dim_18883} -> tensor<4x?x4x128xf32>{%dim_18883}
    %21377 = torch_c.from_builtin_tensor %21376 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18884 = torch.constant.int 5
    %21378 = torch.prims.convert_element_type %21377, %int5_18884 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18885 = torch.constant.int 1
    %21379 = torch.aten.size.int %21113, %int1_18885 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18886 = torch.constant.int 0
    %21380 = torch.aten.add.int %int0_18886, %21379 : !torch.int, !torch.int -> !torch.int
    %int0_18887 = torch.constant.int 0
    %int0_18888 = torch.constant.int 0
    %int1_18889 = torch.constant.int 1
    %21381 = torch.aten.slice.Tensor %21345, %int0_18887, %int0_18888, %21380, %int1_18889 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21381, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18890 = torch.constant.int 1
    %int0_18891 = torch.constant.int 0
    %int9223372036854775807_18892 = torch.constant.int 9223372036854775807
    %int1_18893 = torch.constant.int 1
    %21382 = torch.aten.slice.Tensor %21381, %int1_18890, %int0_18891, %int9223372036854775807_18892, %int1_18893 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21382, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18894 = torch.constant.int 0
    %21383 = torch.aten.unsqueeze %21382, %int0_18894 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21383, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18895 = torch.constant.int 2
    %21384 = torch.aten.unsqueeze %21383, %int2_18895 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21384, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18896 = torch.constant.int 3
    %int0_18897 = torch.constant.int 0
    %int9223372036854775807_18898 = torch.constant.int 9223372036854775807
    %int1_18899 = torch.constant.int 1
    %21385 = torch.aten.slice.Tensor %21384, %int3_18896, %int0_18897, %int9223372036854775807_18898, %int1_18899 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21385, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21386 = torch_c.to_builtin_tensor %21281 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18900 = arith.constant 1 : index
    %dim_18901 = tensor.dim %21386, %c1_18900 : tensor<4x?x4x128xf16>
    %21387 = flow.tensor.bitcast %21386 : tensor<4x?x4x128xf16>{%dim_18901} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18901}
    %21388 = torch_c.from_builtin_tensor %21387 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21389 = torch.aten.mul.Tensor %21388, %21385 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21390 = torch_c.to_builtin_tensor %21389 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18902 = arith.constant 1 : index
    %dim_18903 = tensor.dim %21390, %c1_18902 : tensor<4x?x4x64xcomplex<f32>>
    %21391 = flow.tensor.bitcast %21390 : tensor<4x?x4x64xcomplex<f32>>{%dim_18903} -> tensor<4x?x4x128xf32>{%dim_18903}
    %21392 = torch_c.from_builtin_tensor %21391 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18904 = torch.constant.int 5
    %21393 = torch.prims.convert_element_type %21392, %int5_18904 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18905 = torch.constant.int 1
    %21394 = torch.aten.size.int %21119, %int1_18905 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18906 = torch.constant.int 0
    %21395 = torch.aten.add.int %int0_18906, %21394 : !torch.int, !torch.int -> !torch.int
    %int0_18907 = torch.constant.int 0
    %int0_18908 = torch.constant.int 0
    %int1_18909 = torch.constant.int 1
    %21396 = torch.aten.slice.Tensor %21348, %int0_18907, %int0_18908, %21395, %int1_18909 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21396, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18910 = torch.constant.int 1
    %int0_18911 = torch.constant.int 0
    %int9223372036854775807_18912 = torch.constant.int 9223372036854775807
    %int1_18913 = torch.constant.int 1
    %21397 = torch.aten.slice.Tensor %21396, %int1_18910, %int0_18911, %int9223372036854775807_18912, %int1_18913 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21397, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18914 = torch.constant.int 0
    %21398 = torch.aten.unsqueeze %21397, %int0_18914 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21398, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18915 = torch.constant.int 2
    %21399 = torch.aten.unsqueeze %21398, %int2_18915 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21399, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18916 = torch.constant.int 3
    %int0_18917 = torch.constant.int 0
    %int9223372036854775807_18918 = torch.constant.int 9223372036854775807
    %int1_18919 = torch.constant.int 1
    %21400 = torch.aten.slice.Tensor %21399, %int3_18916, %int0_18917, %int9223372036854775807_18918, %int1_18919 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21400, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21401 = torch_c.to_builtin_tensor %21283 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18920 = arith.constant 1 : index
    %dim_18921 = tensor.dim %21401, %c1_18920 : tensor<4x?x4x128xf16>
    %21402 = flow.tensor.bitcast %21401 : tensor<4x?x4x128xf16>{%dim_18921} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18921}
    %21403 = torch_c.from_builtin_tensor %21402 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21404 = torch.aten.mul.Tensor %21403, %21400 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21405 = torch_c.to_builtin_tensor %21404 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18922 = arith.constant 1 : index
    %dim_18923 = tensor.dim %21405, %c1_18922 : tensor<4x?x4x64xcomplex<f32>>
    %21406 = flow.tensor.bitcast %21405 : tensor<4x?x4x64xcomplex<f32>>{%dim_18923} -> tensor<4x?x4x128xf32>{%dim_18923}
    %21407 = torch_c.from_builtin_tensor %21406 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18924 = torch.constant.int 5
    %21408 = torch.prims.convert_element_type %21407, %int5_18924 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18925 = torch.constant.int 1
    %21409 = torch.aten.size.int %21125, %int1_18925 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18926 = torch.constant.int 0
    %21410 = torch.aten.add.int %int0_18926, %21409 : !torch.int, !torch.int -> !torch.int
    %int0_18927 = torch.constant.int 0
    %int0_18928 = torch.constant.int 0
    %int1_18929 = torch.constant.int 1
    %21411 = torch.aten.slice.Tensor %21351, %int0_18927, %int0_18928, %21410, %int1_18929 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21411, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18930 = torch.constant.int 1
    %int0_18931 = torch.constant.int 0
    %int9223372036854775807_18932 = torch.constant.int 9223372036854775807
    %int1_18933 = torch.constant.int 1
    %21412 = torch.aten.slice.Tensor %21411, %int1_18930, %int0_18931, %int9223372036854775807_18932, %int1_18933 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21412, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18934 = torch.constant.int 0
    %21413 = torch.aten.unsqueeze %21412, %int0_18934 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21413, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18935 = torch.constant.int 2
    %21414 = torch.aten.unsqueeze %21413, %int2_18935 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21414, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18936 = torch.constant.int 3
    %int0_18937 = torch.constant.int 0
    %int9223372036854775807_18938 = torch.constant.int 9223372036854775807
    %int1_18939 = torch.constant.int 1
    %21415 = torch.aten.slice.Tensor %21414, %int3_18936, %int0_18937, %int9223372036854775807_18938, %int1_18939 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21415, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21416 = torch_c.to_builtin_tensor %21285 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18940 = arith.constant 1 : index
    %dim_18941 = tensor.dim %21416, %c1_18940 : tensor<4x?x4x128xf16>
    %21417 = flow.tensor.bitcast %21416 : tensor<4x?x4x128xf16>{%dim_18941} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18941}
    %21418 = torch_c.from_builtin_tensor %21417 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21419 = torch.aten.mul.Tensor %21418, %21415 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21420 = torch_c.to_builtin_tensor %21419 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18942 = arith.constant 1 : index
    %dim_18943 = tensor.dim %21420, %c1_18942 : tensor<4x?x4x64xcomplex<f32>>
    %21421 = flow.tensor.bitcast %21420 : tensor<4x?x4x64xcomplex<f32>>{%dim_18943} -> tensor<4x?x4x128xf32>{%dim_18943}
    %21422 = torch_c.from_builtin_tensor %21421 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18944 = torch.constant.int 5
    %21423 = torch.prims.convert_element_type %21422, %int5_18944 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18945 = torch.constant.int 1
    %21424 = torch.aten.size.int %21131, %int1_18945 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18946 = torch.constant.int 0
    %21425 = torch.aten.add.int %int0_18946, %21424 : !torch.int, !torch.int -> !torch.int
    %int0_18947 = torch.constant.int 0
    %int0_18948 = torch.constant.int 0
    %int1_18949 = torch.constant.int 1
    %21426 = torch.aten.slice.Tensor %21354, %int0_18947, %int0_18948, %21425, %int1_18949 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21426, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18950 = torch.constant.int 1
    %int0_18951 = torch.constant.int 0
    %int9223372036854775807_18952 = torch.constant.int 9223372036854775807
    %int1_18953 = torch.constant.int 1
    %21427 = torch.aten.slice.Tensor %21426, %int1_18950, %int0_18951, %int9223372036854775807_18952, %int1_18953 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21427, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18954 = torch.constant.int 0
    %21428 = torch.aten.unsqueeze %21427, %int0_18954 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21428, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18955 = torch.constant.int 2
    %21429 = torch.aten.unsqueeze %21428, %int2_18955 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21429, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18956 = torch.constant.int 3
    %int0_18957 = torch.constant.int 0
    %int9223372036854775807_18958 = torch.constant.int 9223372036854775807
    %int1_18959 = torch.constant.int 1
    %21430 = torch.aten.slice.Tensor %21429, %int3_18956, %int0_18957, %int9223372036854775807_18958, %int1_18959 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21430, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21431 = torch_c.to_builtin_tensor %21287 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18960 = arith.constant 1 : index
    %dim_18961 = tensor.dim %21431, %c1_18960 : tensor<4x?x4x128xf16>
    %21432 = flow.tensor.bitcast %21431 : tensor<4x?x4x128xf16>{%dim_18961} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18961}
    %21433 = torch_c.from_builtin_tensor %21432 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21434 = torch.aten.mul.Tensor %21433, %21430 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21435 = torch_c.to_builtin_tensor %21434 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18962 = arith.constant 1 : index
    %dim_18963 = tensor.dim %21435, %c1_18962 : tensor<4x?x4x64xcomplex<f32>>
    %21436 = flow.tensor.bitcast %21435 : tensor<4x?x4x64xcomplex<f32>>{%dim_18963} -> tensor<4x?x4x128xf32>{%dim_18963}
    %21437 = torch_c.from_builtin_tensor %21436 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18964 = torch.constant.int 5
    %21438 = torch.prims.convert_element_type %21437, %int5_18964 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18965 = torch.constant.int 1
    %21439 = torch.aten.size.int %21137, %int1_18965 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18966 = torch.constant.int 0
    %21440 = torch.aten.add.int %int0_18966, %21439 : !torch.int, !torch.int -> !torch.int
    %int0_18967 = torch.constant.int 0
    %int0_18968 = torch.constant.int 0
    %int1_18969 = torch.constant.int 1
    %21441 = torch.aten.slice.Tensor %21357, %int0_18967, %int0_18968, %21440, %int1_18969 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21441, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18970 = torch.constant.int 1
    %int0_18971 = torch.constant.int 0
    %int9223372036854775807_18972 = torch.constant.int 9223372036854775807
    %int1_18973 = torch.constant.int 1
    %21442 = torch.aten.slice.Tensor %21441, %int1_18970, %int0_18971, %int9223372036854775807_18972, %int1_18973 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21442, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18974 = torch.constant.int 0
    %21443 = torch.aten.unsqueeze %21442, %int0_18974 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21443, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18975 = torch.constant.int 2
    %21444 = torch.aten.unsqueeze %21443, %int2_18975 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21444, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18976 = torch.constant.int 3
    %int0_18977 = torch.constant.int 0
    %int9223372036854775807_18978 = torch.constant.int 9223372036854775807
    %int1_18979 = torch.constant.int 1
    %21445 = torch.aten.slice.Tensor %21444, %int3_18976, %int0_18977, %int9223372036854775807_18978, %int1_18979 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21445, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21446 = torch_c.to_builtin_tensor %21289 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_18980 = arith.constant 1 : index
    %dim_18981 = tensor.dim %21446, %c1_18980 : tensor<4x?x4x128xf16>
    %21447 = flow.tensor.bitcast %21446 : tensor<4x?x4x128xf16>{%dim_18981} -> tensor<4x?x4x64xcomplex<f16>>{%dim_18981}
    %21448 = torch_c.from_builtin_tensor %21447 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21449 = torch.aten.mul.Tensor %21448, %21445 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21450 = torch_c.to_builtin_tensor %21449 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_18982 = arith.constant 1 : index
    %dim_18983 = tensor.dim %21450, %c1_18982 : tensor<4x?x4x64xcomplex<f32>>
    %21451 = flow.tensor.bitcast %21450 : tensor<4x?x4x64xcomplex<f32>>{%dim_18983} -> tensor<4x?x4x128xf32>{%dim_18983}
    %21452 = torch_c.from_builtin_tensor %21451 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_18984 = torch.constant.int 5
    %21453 = torch.prims.convert_element_type %21452, %int5_18984 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_18985 = torch.constant.int 1
    %21454 = torch.aten.size.int %21143, %int1_18985 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_18986 = torch.constant.int 0
    %21455 = torch.aten.add.int %int0_18986, %21454 : !torch.int, !torch.int -> !torch.int
    %int0_18987 = torch.constant.int 0
    %int0_18988 = torch.constant.int 0
    %int1_18989 = torch.constant.int 1
    %21456 = torch.aten.slice.Tensor %21360, %int0_18987, %int0_18988, %21455, %int1_18989 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21456, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_18990 = torch.constant.int 1
    %int0_18991 = torch.constant.int 0
    %int9223372036854775807_18992 = torch.constant.int 9223372036854775807
    %int1_18993 = torch.constant.int 1
    %21457 = torch.aten.slice.Tensor %21456, %int1_18990, %int0_18991, %int9223372036854775807_18992, %int1_18993 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21457, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_18994 = torch.constant.int 0
    %21458 = torch.aten.unsqueeze %21457, %int0_18994 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21458, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_18995 = torch.constant.int 2
    %21459 = torch.aten.unsqueeze %21458, %int2_18995 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21459, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_18996 = torch.constant.int 3
    %int0_18997 = torch.constant.int 0
    %int9223372036854775807_18998 = torch.constant.int 9223372036854775807
    %int1_18999 = torch.constant.int 1
    %21460 = torch.aten.slice.Tensor %21459, %int3_18996, %int0_18997, %int9223372036854775807_18998, %int1_18999 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21460, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21461 = torch_c.to_builtin_tensor %21291 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_19000 = arith.constant 1 : index
    %dim_19001 = tensor.dim %21461, %c1_19000 : tensor<4x?x4x128xf16>
    %21462 = flow.tensor.bitcast %21461 : tensor<4x?x4x128xf16>{%dim_19001} -> tensor<4x?x4x64xcomplex<f16>>{%dim_19001}
    %21463 = torch_c.from_builtin_tensor %21462 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21464 = torch.aten.mul.Tensor %21463, %21460 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21465 = torch_c.to_builtin_tensor %21464 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_19002 = arith.constant 1 : index
    %dim_19003 = tensor.dim %21465, %c1_19002 : tensor<4x?x4x64xcomplex<f32>>
    %21466 = flow.tensor.bitcast %21465 : tensor<4x?x4x64xcomplex<f32>>{%dim_19003} -> tensor<4x?x4x128xf32>{%dim_19003}
    %21467 = torch_c.from_builtin_tensor %21466 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_19004 = torch.constant.int 5
    %21468 = torch.prims.convert_element_type %21467, %int5_19004 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19005 = torch.constant.int 1
    %21469 = torch.aten.size.int %21149, %int1_19005 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_19006 = torch.constant.int 0
    %21470 = torch.aten.add.int %int0_19006, %21469 : !torch.int, !torch.int -> !torch.int
    %int0_19007 = torch.constant.int 0
    %int0_19008 = torch.constant.int 0
    %int1_19009 = torch.constant.int 1
    %21471 = torch.aten.slice.Tensor %21363, %int0_19007, %int0_19008, %21470, %int1_19009 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21471, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19010 = torch.constant.int 1
    %int0_19011 = torch.constant.int 0
    %int9223372036854775807_19012 = torch.constant.int 9223372036854775807
    %int1_19013 = torch.constant.int 1
    %21472 = torch.aten.slice.Tensor %21471, %int1_19010, %int0_19011, %int9223372036854775807_19012, %int1_19013 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21472, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19014 = torch.constant.int 0
    %21473 = torch.aten.unsqueeze %21472, %int0_19014 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21473, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19015 = torch.constant.int 2
    %21474 = torch.aten.unsqueeze %21473, %int2_19015 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21474, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19016 = torch.constant.int 3
    %int0_19017 = torch.constant.int 0
    %int9223372036854775807_19018 = torch.constant.int 9223372036854775807
    %int1_19019 = torch.constant.int 1
    %21475 = torch.aten.slice.Tensor %21474, %int3_19016, %int0_19017, %int9223372036854775807_19018, %int1_19019 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21475, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21476 = torch_c.to_builtin_tensor %21293 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_19020 = arith.constant 1 : index
    %dim_19021 = tensor.dim %21476, %c1_19020 : tensor<4x?x4x128xf16>
    %21477 = flow.tensor.bitcast %21476 : tensor<4x?x4x128xf16>{%dim_19021} -> tensor<4x?x4x64xcomplex<f16>>{%dim_19021}
    %21478 = torch_c.from_builtin_tensor %21477 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %21478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %21479 = torch.aten.mul.Tensor %21478, %21475 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %21479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %21480 = torch_c.to_builtin_tensor %21479 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_19022 = arith.constant 1 : index
    %dim_19023 = tensor.dim %21480, %c1_19022 : tensor<4x?x4x64xcomplex<f32>>
    %21481 = flow.tensor.bitcast %21480 : tensor<4x?x4x64xcomplex<f32>>{%dim_19023} -> tensor<4x?x4x128xf32>{%dim_19023}
    %21482 = torch_c.from_builtin_tensor %21481 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %21482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_19024 = torch.constant.int 5
    %21483 = torch.prims.convert_element_type %21482, %int5_19024 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_19025 = torch.constant.int 131072
    %none_19026 = torch.constant.none
    %none_19027 = torch.constant.none
    %cpu_19028 = torch.constant.device "cpu"
    %false_19029 = torch.constant.bool false
    %21484 = torch.aten.arange %int131072_19025, %none_19026, %none_19027, %cpu_19028, %false_19029 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_19030 = torch.constant.int 0
    %int128_19031 = torch.constant.int 128
    %int2_19032 = torch.constant.int 2
    %none_19033 = torch.constant.none
    %none_19034 = torch.constant.none
    %cpu_19035 = torch.constant.device "cpu"
    %false_19036 = torch.constant.bool false
    %21485 = torch.aten.arange.start_step %int0_19030, %int128_19031, %int2_19032, %none_19033, %none_19034, %cpu_19035, %false_19036 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_19037 = torch.constant.int 0
    %int0_19038 = torch.constant.int 0
    %int64_19039 = torch.constant.int 64
    %int1_19040 = torch.constant.int 1
    %21486 = torch.aten.slice.Tensor %21485, %int0_19037, %int0_19038, %int64_19039, %int1_19040 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_19041 = torch.constant.int 6
    %21487 = torch.prims.convert_element_type %21486, %int6_19041 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_19042 = torch.constant.int 128
    %21488 = torch.aten.div.Scalar %21487, %int128_19042 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_19043 = torch.constant.float 5.000000e+05
    %21489 = torch.aten.pow.Scalar %float5.000000e05_19043, %21488 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %21490 = torch.aten.reciprocal %21489 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_19044 = torch.constant.float 1.000000e+00
    %21491 = torch.aten.mul.Scalar %21490, %float1.000000e00_19044 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_19045 = torch.constant.int 131072
    %int1_19046 = torch.constant.int 1
    %21492 = torch.prim.ListConstruct %int131072_19045, %int1_19046 : (!torch.int, !torch.int) -> !torch.list<int>
    %21493 = torch.aten.view %21484, %21492 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %21494 = torch.aten.mul.Tensor %21493, %21491 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %21495 = torch.aten.cos %21494 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %21496 = torch.aten.sin %21494 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %21497 = torch.aten.complex %21495, %21496 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %21498 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21499 = flow.tensor.transfer %21498 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %21500 = torch_c.from_builtin_tensor %21499 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21501 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21502 = flow.tensor.transfer %21501 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %21503 = torch_c.from_builtin_tensor %21502 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21504 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21505 = flow.tensor.transfer %21504 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %21506 = torch_c.from_builtin_tensor %21505 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21507 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21508 = flow.tensor.transfer %21507 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %21509 = torch_c.from_builtin_tensor %21508 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21510 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21511 = flow.tensor.transfer %21510 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %21512 = torch_c.from_builtin_tensor %21511 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21513 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21514 = flow.tensor.transfer %21513 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %21515 = torch_c.from_builtin_tensor %21514 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21516 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21517 = flow.tensor.transfer %21516 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %21518 = torch_c.from_builtin_tensor %21517 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %21519 = torch_c.to_builtin_tensor %21497 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %21520 = flow.tensor.transfer %21519 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %21521 = torch_c.from_builtin_tensor %21520 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_19047 = torch.constant.int 1
    %21522 = torch.aten.size.int %21171, %int1_19047 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19048 = torch.constant.int 0
    %21523 = torch.aten.add.int %int0_19048, %21522 : !torch.int, !torch.int -> !torch.int
    %int0_19049 = torch.constant.int 0
    %int0_19050 = torch.constant.int 0
    %int1_19051 = torch.constant.int 1
    %21524 = torch.aten.slice.Tensor %21500, %int0_19049, %int0_19050, %21523, %int1_19051 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21524, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19052 = torch.constant.int 1
    %int0_19053 = torch.constant.int 0
    %int9223372036854775807_19054 = torch.constant.int 9223372036854775807
    %int1_19055 = torch.constant.int 1
    %21525 = torch.aten.slice.Tensor %21524, %int1_19052, %int0_19053, %int9223372036854775807_19054, %int1_19055 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21525, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19056 = torch.constant.int 0
    %21526 = torch.aten.unsqueeze %21525, %int0_19056 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21526, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19057 = torch.constant.int 2
    %21527 = torch.aten.unsqueeze %21526, %int2_19057 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21527, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19058 = torch.constant.int 3
    %int0_19059 = torch.constant.int 0
    %int9223372036854775807_19060 = torch.constant.int 9223372036854775807
    %int1_19061 = torch.constant.int 1
    %21528 = torch.aten.slice.Tensor %21527, %int3_19058, %int0_19059, %int9223372036854775807_19060, %int1_19061 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21528, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21529 = torch_c.to_builtin_tensor %21295 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19062 = arith.constant 1 : index
    %dim_19063 = tensor.dim %21529, %c1_19062 : tensor<4x?x1x128xf16>
    %21530 = flow.tensor.bitcast %21529 : tensor<4x?x1x128xf16>{%dim_19063} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19063}
    %21531 = torch_c.from_builtin_tensor %21530 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21532 = torch.aten.mul.Tensor %21531, %21528 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21533 = torch_c.to_builtin_tensor %21532 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19064 = arith.constant 1 : index
    %dim_19065 = tensor.dim %21533, %c1_19064 : tensor<4x?x1x64xcomplex<f32>>
    %21534 = flow.tensor.bitcast %21533 : tensor<4x?x1x64xcomplex<f32>>{%dim_19065} -> tensor<4x?x1x128xf32>{%dim_19065}
    %21535 = torch_c.from_builtin_tensor %21534 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19066 = torch.constant.int 5
    %21536 = torch.prims.convert_element_type %21535, %int5_19066 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19067 = torch.constant.int 1
    %21537 = torch.aten.size.int %21177, %int1_19067 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19068 = torch.constant.int 0
    %21538 = torch.aten.add.int %int0_19068, %21537 : !torch.int, !torch.int -> !torch.int
    %int0_19069 = torch.constant.int 0
    %int0_19070 = torch.constant.int 0
    %int1_19071 = torch.constant.int 1
    %21539 = torch.aten.slice.Tensor %21503, %int0_19069, %int0_19070, %21538, %int1_19071 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21539, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19072 = torch.constant.int 1
    %int0_19073 = torch.constant.int 0
    %int9223372036854775807_19074 = torch.constant.int 9223372036854775807
    %int1_19075 = torch.constant.int 1
    %21540 = torch.aten.slice.Tensor %21539, %int1_19072, %int0_19073, %int9223372036854775807_19074, %int1_19075 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21540, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19076 = torch.constant.int 0
    %21541 = torch.aten.unsqueeze %21540, %int0_19076 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21541, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19077 = torch.constant.int 2
    %21542 = torch.aten.unsqueeze %21541, %int2_19077 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21542, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19078 = torch.constant.int 3
    %int0_19079 = torch.constant.int 0
    %int9223372036854775807_19080 = torch.constant.int 9223372036854775807
    %int1_19081 = torch.constant.int 1
    %21543 = torch.aten.slice.Tensor %21542, %int3_19078, %int0_19079, %int9223372036854775807_19080, %int1_19081 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21543, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21544 = torch_c.to_builtin_tensor %21297 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19082 = arith.constant 1 : index
    %dim_19083 = tensor.dim %21544, %c1_19082 : tensor<4x?x1x128xf16>
    %21545 = flow.tensor.bitcast %21544 : tensor<4x?x1x128xf16>{%dim_19083} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19083}
    %21546 = torch_c.from_builtin_tensor %21545 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21547 = torch.aten.mul.Tensor %21546, %21543 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21548 = torch_c.to_builtin_tensor %21547 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19084 = arith.constant 1 : index
    %dim_19085 = tensor.dim %21548, %c1_19084 : tensor<4x?x1x64xcomplex<f32>>
    %21549 = flow.tensor.bitcast %21548 : tensor<4x?x1x64xcomplex<f32>>{%dim_19085} -> tensor<4x?x1x128xf32>{%dim_19085}
    %21550 = torch_c.from_builtin_tensor %21549 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19086 = torch.constant.int 5
    %21551 = torch.prims.convert_element_type %21550, %int5_19086 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19087 = torch.constant.int 1
    %21552 = torch.aten.size.int %21183, %int1_19087 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19088 = torch.constant.int 0
    %21553 = torch.aten.add.int %int0_19088, %21552 : !torch.int, !torch.int -> !torch.int
    %int0_19089 = torch.constant.int 0
    %int0_19090 = torch.constant.int 0
    %int1_19091 = torch.constant.int 1
    %21554 = torch.aten.slice.Tensor %21506, %int0_19089, %int0_19090, %21553, %int1_19091 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21554, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19092 = torch.constant.int 1
    %int0_19093 = torch.constant.int 0
    %int9223372036854775807_19094 = torch.constant.int 9223372036854775807
    %int1_19095 = torch.constant.int 1
    %21555 = torch.aten.slice.Tensor %21554, %int1_19092, %int0_19093, %int9223372036854775807_19094, %int1_19095 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21555, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19096 = torch.constant.int 0
    %21556 = torch.aten.unsqueeze %21555, %int0_19096 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21556, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19097 = torch.constant.int 2
    %21557 = torch.aten.unsqueeze %21556, %int2_19097 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21557, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19098 = torch.constant.int 3
    %int0_19099 = torch.constant.int 0
    %int9223372036854775807_19100 = torch.constant.int 9223372036854775807
    %int1_19101 = torch.constant.int 1
    %21558 = torch.aten.slice.Tensor %21557, %int3_19098, %int0_19099, %int9223372036854775807_19100, %int1_19101 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21558, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21559 = torch_c.to_builtin_tensor %21299 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19102 = arith.constant 1 : index
    %dim_19103 = tensor.dim %21559, %c1_19102 : tensor<4x?x1x128xf16>
    %21560 = flow.tensor.bitcast %21559 : tensor<4x?x1x128xf16>{%dim_19103} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19103}
    %21561 = torch_c.from_builtin_tensor %21560 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21562 = torch.aten.mul.Tensor %21561, %21558 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21563 = torch_c.to_builtin_tensor %21562 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19104 = arith.constant 1 : index
    %dim_19105 = tensor.dim %21563, %c1_19104 : tensor<4x?x1x64xcomplex<f32>>
    %21564 = flow.tensor.bitcast %21563 : tensor<4x?x1x64xcomplex<f32>>{%dim_19105} -> tensor<4x?x1x128xf32>{%dim_19105}
    %21565 = torch_c.from_builtin_tensor %21564 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19106 = torch.constant.int 5
    %21566 = torch.prims.convert_element_type %21565, %int5_19106 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19107 = torch.constant.int 1
    %21567 = torch.aten.size.int %21189, %int1_19107 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19108 = torch.constant.int 0
    %21568 = torch.aten.add.int %int0_19108, %21567 : !torch.int, !torch.int -> !torch.int
    %int0_19109 = torch.constant.int 0
    %int0_19110 = torch.constant.int 0
    %int1_19111 = torch.constant.int 1
    %21569 = torch.aten.slice.Tensor %21509, %int0_19109, %int0_19110, %21568, %int1_19111 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21569, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19112 = torch.constant.int 1
    %int0_19113 = torch.constant.int 0
    %int9223372036854775807_19114 = torch.constant.int 9223372036854775807
    %int1_19115 = torch.constant.int 1
    %21570 = torch.aten.slice.Tensor %21569, %int1_19112, %int0_19113, %int9223372036854775807_19114, %int1_19115 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21570, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19116 = torch.constant.int 0
    %21571 = torch.aten.unsqueeze %21570, %int0_19116 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21571, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19117 = torch.constant.int 2
    %21572 = torch.aten.unsqueeze %21571, %int2_19117 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21572, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19118 = torch.constant.int 3
    %int0_19119 = torch.constant.int 0
    %int9223372036854775807_19120 = torch.constant.int 9223372036854775807
    %int1_19121 = torch.constant.int 1
    %21573 = torch.aten.slice.Tensor %21572, %int3_19118, %int0_19119, %int9223372036854775807_19120, %int1_19121 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21573, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21574 = torch_c.to_builtin_tensor %21301 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19122 = arith.constant 1 : index
    %dim_19123 = tensor.dim %21574, %c1_19122 : tensor<4x?x1x128xf16>
    %21575 = flow.tensor.bitcast %21574 : tensor<4x?x1x128xf16>{%dim_19123} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19123}
    %21576 = torch_c.from_builtin_tensor %21575 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21577 = torch.aten.mul.Tensor %21576, %21573 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21578 = torch_c.to_builtin_tensor %21577 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19124 = arith.constant 1 : index
    %dim_19125 = tensor.dim %21578, %c1_19124 : tensor<4x?x1x64xcomplex<f32>>
    %21579 = flow.tensor.bitcast %21578 : tensor<4x?x1x64xcomplex<f32>>{%dim_19125} -> tensor<4x?x1x128xf32>{%dim_19125}
    %21580 = torch_c.from_builtin_tensor %21579 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19126 = torch.constant.int 5
    %21581 = torch.prims.convert_element_type %21580, %int5_19126 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19127 = torch.constant.int 1
    %21582 = torch.aten.size.int %21195, %int1_19127 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19128 = torch.constant.int 0
    %21583 = torch.aten.add.int %int0_19128, %21582 : !torch.int, !torch.int -> !torch.int
    %int0_19129 = torch.constant.int 0
    %int0_19130 = torch.constant.int 0
    %int1_19131 = torch.constant.int 1
    %21584 = torch.aten.slice.Tensor %21512, %int0_19129, %int0_19130, %21583, %int1_19131 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21584, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19132 = torch.constant.int 1
    %int0_19133 = torch.constant.int 0
    %int9223372036854775807_19134 = torch.constant.int 9223372036854775807
    %int1_19135 = torch.constant.int 1
    %21585 = torch.aten.slice.Tensor %21584, %int1_19132, %int0_19133, %int9223372036854775807_19134, %int1_19135 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21585, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19136 = torch.constant.int 0
    %21586 = torch.aten.unsqueeze %21585, %int0_19136 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21586, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19137 = torch.constant.int 2
    %21587 = torch.aten.unsqueeze %21586, %int2_19137 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21587, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19138 = torch.constant.int 3
    %int0_19139 = torch.constant.int 0
    %int9223372036854775807_19140 = torch.constant.int 9223372036854775807
    %int1_19141 = torch.constant.int 1
    %21588 = torch.aten.slice.Tensor %21587, %int3_19138, %int0_19139, %int9223372036854775807_19140, %int1_19141 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21588, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21589 = torch_c.to_builtin_tensor %21303 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19142 = arith.constant 1 : index
    %dim_19143 = tensor.dim %21589, %c1_19142 : tensor<4x?x1x128xf16>
    %21590 = flow.tensor.bitcast %21589 : tensor<4x?x1x128xf16>{%dim_19143} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19143}
    %21591 = torch_c.from_builtin_tensor %21590 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21592 = torch.aten.mul.Tensor %21591, %21588 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21593 = torch_c.to_builtin_tensor %21592 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19144 = arith.constant 1 : index
    %dim_19145 = tensor.dim %21593, %c1_19144 : tensor<4x?x1x64xcomplex<f32>>
    %21594 = flow.tensor.bitcast %21593 : tensor<4x?x1x64xcomplex<f32>>{%dim_19145} -> tensor<4x?x1x128xf32>{%dim_19145}
    %21595 = torch_c.from_builtin_tensor %21594 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19146 = torch.constant.int 5
    %21596 = torch.prims.convert_element_type %21595, %int5_19146 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19147 = torch.constant.int 1
    %21597 = torch.aten.size.int %21201, %int1_19147 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19148 = torch.constant.int 0
    %21598 = torch.aten.add.int %int0_19148, %21597 : !torch.int, !torch.int -> !torch.int
    %int0_19149 = torch.constant.int 0
    %int0_19150 = torch.constant.int 0
    %int1_19151 = torch.constant.int 1
    %21599 = torch.aten.slice.Tensor %21515, %int0_19149, %int0_19150, %21598, %int1_19151 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21599, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19152 = torch.constant.int 1
    %int0_19153 = torch.constant.int 0
    %int9223372036854775807_19154 = torch.constant.int 9223372036854775807
    %int1_19155 = torch.constant.int 1
    %21600 = torch.aten.slice.Tensor %21599, %int1_19152, %int0_19153, %int9223372036854775807_19154, %int1_19155 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21600, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19156 = torch.constant.int 0
    %21601 = torch.aten.unsqueeze %21600, %int0_19156 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21601, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19157 = torch.constant.int 2
    %21602 = torch.aten.unsqueeze %21601, %int2_19157 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21602, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19158 = torch.constant.int 3
    %int0_19159 = torch.constant.int 0
    %int9223372036854775807_19160 = torch.constant.int 9223372036854775807
    %int1_19161 = torch.constant.int 1
    %21603 = torch.aten.slice.Tensor %21602, %int3_19158, %int0_19159, %int9223372036854775807_19160, %int1_19161 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21603, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21604 = torch_c.to_builtin_tensor %21305 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19162 = arith.constant 1 : index
    %dim_19163 = tensor.dim %21604, %c1_19162 : tensor<4x?x1x128xf16>
    %21605 = flow.tensor.bitcast %21604 : tensor<4x?x1x128xf16>{%dim_19163} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19163}
    %21606 = torch_c.from_builtin_tensor %21605 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21607 = torch.aten.mul.Tensor %21606, %21603 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21608 = torch_c.to_builtin_tensor %21607 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19164 = arith.constant 1 : index
    %dim_19165 = tensor.dim %21608, %c1_19164 : tensor<4x?x1x64xcomplex<f32>>
    %21609 = flow.tensor.bitcast %21608 : tensor<4x?x1x64xcomplex<f32>>{%dim_19165} -> tensor<4x?x1x128xf32>{%dim_19165}
    %21610 = torch_c.from_builtin_tensor %21609 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19166 = torch.constant.int 5
    %21611 = torch.prims.convert_element_type %21610, %int5_19166 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19167 = torch.constant.int 1
    %21612 = torch.aten.size.int %21207, %int1_19167 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19168 = torch.constant.int 0
    %21613 = torch.aten.add.int %int0_19168, %21612 : !torch.int, !torch.int -> !torch.int
    %int0_19169 = torch.constant.int 0
    %int0_19170 = torch.constant.int 0
    %int1_19171 = torch.constant.int 1
    %21614 = torch.aten.slice.Tensor %21518, %int0_19169, %int0_19170, %21613, %int1_19171 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21614, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19172 = torch.constant.int 1
    %int0_19173 = torch.constant.int 0
    %int9223372036854775807_19174 = torch.constant.int 9223372036854775807
    %int1_19175 = torch.constant.int 1
    %21615 = torch.aten.slice.Tensor %21614, %int1_19172, %int0_19173, %int9223372036854775807_19174, %int1_19175 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21615, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19176 = torch.constant.int 0
    %21616 = torch.aten.unsqueeze %21615, %int0_19176 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21616, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19177 = torch.constant.int 2
    %21617 = torch.aten.unsqueeze %21616, %int2_19177 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21617, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19178 = torch.constant.int 3
    %int0_19179 = torch.constant.int 0
    %int9223372036854775807_19180 = torch.constant.int 9223372036854775807
    %int1_19181 = torch.constant.int 1
    %21618 = torch.aten.slice.Tensor %21617, %int3_19178, %int0_19179, %int9223372036854775807_19180, %int1_19181 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21618, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21619 = torch_c.to_builtin_tensor %21307 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19182 = arith.constant 1 : index
    %dim_19183 = tensor.dim %21619, %c1_19182 : tensor<4x?x1x128xf16>
    %21620 = flow.tensor.bitcast %21619 : tensor<4x?x1x128xf16>{%dim_19183} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19183}
    %21621 = torch_c.from_builtin_tensor %21620 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21622 = torch.aten.mul.Tensor %21621, %21618 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21623 = torch_c.to_builtin_tensor %21622 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19184 = arith.constant 1 : index
    %dim_19185 = tensor.dim %21623, %c1_19184 : tensor<4x?x1x64xcomplex<f32>>
    %21624 = flow.tensor.bitcast %21623 : tensor<4x?x1x64xcomplex<f32>>{%dim_19185} -> tensor<4x?x1x128xf32>{%dim_19185}
    %21625 = torch_c.from_builtin_tensor %21624 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19186 = torch.constant.int 5
    %21626 = torch.prims.convert_element_type %21625, %int5_19186 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_19187 = torch.constant.int 1
    %21627 = torch.aten.size.int %21213, %int1_19187 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_19188 = torch.constant.int 0
    %21628 = torch.aten.add.int %int0_19188, %21627 : !torch.int, !torch.int -> !torch.int
    %int0_19189 = torch.constant.int 0
    %int0_19190 = torch.constant.int 0
    %int1_19191 = torch.constant.int 1
    %21629 = torch.aten.slice.Tensor %21521, %int0_19189, %int0_19190, %21628, %int1_19191 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21629, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_19192 = torch.constant.int 1
    %int0_19193 = torch.constant.int 0
    %int9223372036854775807_19194 = torch.constant.int 9223372036854775807
    %int1_19195 = torch.constant.int 1
    %21630 = torch.aten.slice.Tensor %21629, %int1_19192, %int0_19193, %int9223372036854775807_19194, %int1_19195 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %21630, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_19196 = torch.constant.int 0
    %21631 = torch.aten.unsqueeze %21630, %int0_19196 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %21631, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_19197 = torch.constant.int 2
    %21632 = torch.aten.unsqueeze %21631, %int2_19197 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21632, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_19198 = torch.constant.int 3
    %int0_19199 = torch.constant.int 0
    %int9223372036854775807_19200 = torch.constant.int 9223372036854775807
    %int1_19201 = torch.constant.int 1
    %21633 = torch.aten.slice.Tensor %21632, %int3_19198, %int0_19199, %int9223372036854775807_19200, %int1_19201 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21633, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %21634 = torch_c.to_builtin_tensor %21309 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_19202 = arith.constant 1 : index
    %dim_19203 = tensor.dim %21634, %c1_19202 : tensor<4x?x1x128xf16>
    %21635 = flow.tensor.bitcast %21634 : tensor<4x?x1x128xf16>{%dim_19203} -> tensor<4x?x1x64xcomplex<f16>>{%dim_19203}
    %21636 = torch_c.from_builtin_tensor %21635 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %21636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %21637 = torch.aten.mul.Tensor %21636, %21633 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %21637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %21638 = torch_c.to_builtin_tensor %21637 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_19204 = arith.constant 1 : index
    %dim_19205 = tensor.dim %21638, %c1_19204 : tensor<4x?x1x64xcomplex<f32>>
    %21639 = flow.tensor.bitcast %21638 : tensor<4x?x1x64xcomplex<f32>>{%dim_19205} -> tensor<4x?x1x128xf32>{%dim_19205}
    %21640 = torch_c.from_builtin_tensor %21639 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %21640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_19206 = torch.constant.int 5
    %21641 = torch.prims.convert_element_type %21640, %int5_19206 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %21641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_19207 = torch.constant.int 64
    %21642 = torch.aten.mul.Scalar %2364, %int64_19207 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21642, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19208 = torch.constant.int 64
    %21643 = torch.aten.mul.Scalar %2367, %int64_19208 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21643, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19209 = torch.constant.int 64
    %21644 = torch.aten.mul.Scalar %2370, %int64_19209 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21644, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19210 = torch.constant.int 64
    %21645 = torch.aten.mul.Scalar %2373, %int64_19210 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21645, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19211 = torch.constant.int 64
    %21646 = torch.aten.mul.Scalar %2376, %int64_19211 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21646, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19212 = torch.constant.int 64
    %21647 = torch.aten.mul.Scalar %2379, %int64_19212 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21647, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19213 = torch.constant.int 64
    %21648 = torch.aten.mul.Scalar %2382, %int64_19213 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21648, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_19214 = torch.constant.int 64
    %21649 = torch.aten.mul.Scalar %2385, %int64_19214 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21649, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20 = torch.constant.int 20
    %int1_19215 = torch.constant.int 1
    %21650 = torch.aten.add.Scalar %21642, %int20, %int1_19215 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21650, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19216 = torch.constant.int 20
    %int1_19217 = torch.constant.int 1
    %21651 = torch.aten.add.Scalar %21643, %int20_19216, %int1_19217 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21651, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19218 = torch.constant.int 20
    %int1_19219 = torch.constant.int 1
    %21652 = torch.aten.add.Scalar %21644, %int20_19218, %int1_19219 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21652, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19220 = torch.constant.int 20
    %int1_19221 = torch.constant.int 1
    %21653 = torch.aten.add.Scalar %21645, %int20_19220, %int1_19221 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21653, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19222 = torch.constant.int 20
    %int1_19223 = torch.constant.int 1
    %21654 = torch.aten.add.Scalar %21646, %int20_19222, %int1_19223 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21654, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19224 = torch.constant.int 20
    %int1_19225 = torch.constant.int 1
    %21655 = torch.aten.add.Scalar %21647, %int20_19224, %int1_19225 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21655, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19226 = torch.constant.int 20
    %int1_19227 = torch.constant.int 1
    %21656 = torch.aten.add.Scalar %21648, %int20_19226, %int1_19227 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21656, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int20_19228 = torch.constant.int 20
    %int1_19229 = torch.constant.int 1
    %21657 = torch.aten.add.Scalar %21649, %int20_19228, %int1_19229 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21657, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_19230 = torch.constant.int 4
    %int16_19231 = torch.constant.int 16
    %int1_19232 = torch.constant.int 1
    %int128_19233 = torch.constant.int 128
    %21658 = torch.prim.ListConstruct %int4_19230, %3095, %int16_19231, %int1_19232, %int128_19233 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21659 = torch.aten.view %21536, %21658 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21659, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19234 = torch.constant.int 4
    %int16_19235 = torch.constant.int 16
    %int1_19236 = torch.constant.int 1
    %int128_19237 = torch.constant.int 128
    %21660 = torch.prim.ListConstruct %int4_19234, %3095, %int16_19235, %int1_19236, %int128_19237 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21661 = torch.aten.view %21551, %21660 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21661, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19238 = torch.constant.int 4
    %int16_19239 = torch.constant.int 16
    %int1_19240 = torch.constant.int 1
    %int128_19241 = torch.constant.int 128
    %21662 = torch.prim.ListConstruct %int4_19238, %3095, %int16_19239, %int1_19240, %int128_19241 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21663 = torch.aten.view %21566, %21662 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21663, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19242 = torch.constant.int 4
    %int16_19243 = torch.constant.int 16
    %int1_19244 = torch.constant.int 1
    %int128_19245 = torch.constant.int 128
    %21664 = torch.prim.ListConstruct %int4_19242, %3095, %int16_19243, %int1_19244, %int128_19245 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21665 = torch.aten.view %21581, %21664 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21665, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19246 = torch.constant.int 4
    %int16_19247 = torch.constant.int 16
    %int1_19248 = torch.constant.int 1
    %int128_19249 = torch.constant.int 128
    %21666 = torch.prim.ListConstruct %int4_19246, %3095, %int16_19247, %int1_19248, %int128_19249 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21667 = torch.aten.view %21596, %21666 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21667, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19250 = torch.constant.int 4
    %int16_19251 = torch.constant.int 16
    %int1_19252 = torch.constant.int 1
    %int128_19253 = torch.constant.int 128
    %21668 = torch.prim.ListConstruct %int4_19250, %3095, %int16_19251, %int1_19252, %int128_19253 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21669 = torch.aten.view %21611, %21668 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21669, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19254 = torch.constant.int 4
    %int16_19255 = torch.constant.int 16
    %int1_19256 = torch.constant.int 1
    %int128_19257 = torch.constant.int 128
    %21670 = torch.prim.ListConstruct %int4_19254, %3095, %int16_19255, %int1_19256, %int128_19257 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21671 = torch.aten.view %21626, %21670 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21671, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19258 = torch.constant.int 4
    %int16_19259 = torch.constant.int 16
    %int1_19260 = torch.constant.int 1
    %int128_19261 = torch.constant.int 128
    %21672 = torch.prim.ListConstruct %int4_19258, %3095, %int16_19259, %int1_19260, %int128_19261 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21673 = torch.aten.view %21641, %21672 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21673, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19262 = torch.constant.int 4
    %21674 = torch.aten.mul.int %int4_19262, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19263 = torch.constant.int 16
    %int1_19264 = torch.constant.int 1
    %int128_19265 = torch.constant.int 128
    %21675 = torch.prim.ListConstruct %21674, %int16_19263, %int1_19264, %int128_19265 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21676 = torch.aten.view %21659, %21675 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21676, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19266 = torch.constant.int 4
    %21677 = torch.aten.mul.int %int4_19266, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19267 = torch.constant.int 16
    %int1_19268 = torch.constant.int 1
    %int128_19269 = torch.constant.int 128
    %21678 = torch.prim.ListConstruct %21677, %int16_19267, %int1_19268, %int128_19269 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21679 = torch.aten.view %21661, %21678 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21679, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19270 = torch.constant.int 4
    %21680 = torch.aten.mul.int %int4_19270, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19271 = torch.constant.int 16
    %int1_19272 = torch.constant.int 1
    %int128_19273 = torch.constant.int 128
    %21681 = torch.prim.ListConstruct %21680, %int16_19271, %int1_19272, %int128_19273 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21682 = torch.aten.view %21663, %21681 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21682, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19274 = torch.constant.int 4
    %21683 = torch.aten.mul.int %int4_19274, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19275 = torch.constant.int 16
    %int1_19276 = torch.constant.int 1
    %int128_19277 = torch.constant.int 128
    %21684 = torch.prim.ListConstruct %21683, %int16_19275, %int1_19276, %int128_19277 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21685 = torch.aten.view %21665, %21684 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21685, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19278 = torch.constant.int 4
    %21686 = torch.aten.mul.int %int4_19278, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19279 = torch.constant.int 16
    %int1_19280 = torch.constant.int 1
    %int128_19281 = torch.constant.int 128
    %21687 = torch.prim.ListConstruct %21686, %int16_19279, %int1_19280, %int128_19281 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21688 = torch.aten.view %21667, %21687 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21688, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19282 = torch.constant.int 4
    %21689 = torch.aten.mul.int %int4_19282, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19283 = torch.constant.int 16
    %int1_19284 = torch.constant.int 1
    %int128_19285 = torch.constant.int 128
    %21690 = torch.prim.ListConstruct %21689, %int16_19283, %int1_19284, %int128_19285 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21691 = torch.aten.view %21669, %21690 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21691, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19286 = torch.constant.int 4
    %21692 = torch.aten.mul.int %int4_19286, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19287 = torch.constant.int 16
    %int1_19288 = torch.constant.int 1
    %int128_19289 = torch.constant.int 128
    %21693 = torch.prim.ListConstruct %21692, %int16_19287, %int1_19288, %int128_19289 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21694 = torch.aten.view %21671, %21693 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21694, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19290 = torch.constant.int 4
    %21695 = torch.aten.mul.int %int4_19290, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19291 = torch.constant.int 16
    %int1_19292 = torch.constant.int 1
    %int128_19293 = torch.constant.int 128
    %21696 = torch.prim.ListConstruct %21695, %int16_19291, %int1_19292, %int128_19293 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21697 = torch.aten.view %21673, %21696 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21697, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19294 = torch.constant.int 4
    %21698 = torch.aten.mul.int %int4_19294, %3095 : !torch.int, !torch.int -> !torch.int
    %21699 = torch.prim.ListConstruct %21698 : (!torch.int) -> !torch.list<int>
    %21700 = torch.aten.view %21650, %21699 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21700, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19295 = torch.constant.int 4
    %21701 = torch.aten.mul.int %int4_19295, %3095 : !torch.int, !torch.int -> !torch.int
    %21702 = torch.prim.ListConstruct %21701 : (!torch.int) -> !torch.list<int>
    %21703 = torch.aten.view %21651, %21702 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21703, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19296 = torch.constant.int 4
    %21704 = torch.aten.mul.int %int4_19296, %3095 : !torch.int, !torch.int -> !torch.int
    %21705 = torch.prim.ListConstruct %21704 : (!torch.int) -> !torch.list<int>
    %21706 = torch.aten.view %21652, %21705 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21706, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19297 = torch.constant.int 4
    %21707 = torch.aten.mul.int %int4_19297, %3095 : !torch.int, !torch.int -> !torch.int
    %21708 = torch.prim.ListConstruct %21707 : (!torch.int) -> !torch.list<int>
    %21709 = torch.aten.view %21653, %21708 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21709, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19298 = torch.constant.int 4
    %21710 = torch.aten.mul.int %int4_19298, %3095 : !torch.int, !torch.int -> !torch.int
    %21711 = torch.prim.ListConstruct %21710 : (!torch.int) -> !torch.list<int>
    %21712 = torch.aten.view %21654, %21711 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21712, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19299 = torch.constant.int 4
    %21713 = torch.aten.mul.int %int4_19299, %3095 : !torch.int, !torch.int -> !torch.int
    %21714 = torch.prim.ListConstruct %21713 : (!torch.int) -> !torch.list<int>
    %21715 = torch.aten.view %21655, %21714 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21715, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19300 = torch.constant.int 4
    %21716 = torch.aten.mul.int %int4_19300, %3095 : !torch.int, !torch.int -> !torch.int
    %21717 = torch.prim.ListConstruct %21716 : (!torch.int) -> !torch.list<int>
    %21718 = torch.aten.view %21656, %21717 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21718, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19301 = torch.constant.int 4
    %21719 = torch.aten.mul.int %int4_19301, %3095 : !torch.int, !torch.int -> !torch.int
    %21720 = torch.prim.ListConstruct %21719 : (!torch.int) -> !torch.list<int>
    %21721 = torch.aten.view %21657, %21720 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21721, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19302 = torch.constant.int 4
    %int16_19303 = torch.constant.int 16
    %int1_19304 = torch.constant.int 1
    %int128_19305 = torch.constant.int 128
    %21722 = torch.prim.ListConstruct %int4_19302, %3095, %int16_19303, %int1_19304, %int128_19305 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21723 = torch.aten.view %21311, %21722 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21723, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19306 = torch.constant.int 4
    %int16_19307 = torch.constant.int 16
    %int1_19308 = torch.constant.int 1
    %int128_19309 = torch.constant.int 128
    %21724 = torch.prim.ListConstruct %int4_19306, %3095, %int16_19307, %int1_19308, %int128_19309 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21725 = torch.aten.view %21313, %21724 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21725, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19310 = torch.constant.int 4
    %int16_19311 = torch.constant.int 16
    %int1_19312 = torch.constant.int 1
    %int128_19313 = torch.constant.int 128
    %21726 = torch.prim.ListConstruct %int4_19310, %3095, %int16_19311, %int1_19312, %int128_19313 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21727 = torch.aten.view %21315, %21726 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21727, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19314 = torch.constant.int 4
    %int16_19315 = torch.constant.int 16
    %int1_19316 = torch.constant.int 1
    %int128_19317 = torch.constant.int 128
    %21728 = torch.prim.ListConstruct %int4_19314, %3095, %int16_19315, %int1_19316, %int128_19317 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21729 = torch.aten.view %21317, %21728 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21729, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19318 = torch.constant.int 4
    %int16_19319 = torch.constant.int 16
    %int1_19320 = torch.constant.int 1
    %int128_19321 = torch.constant.int 128
    %21730 = torch.prim.ListConstruct %int4_19318, %3095, %int16_19319, %int1_19320, %int128_19321 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21731 = torch.aten.view %21319, %21730 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21731, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19322 = torch.constant.int 4
    %int16_19323 = torch.constant.int 16
    %int1_19324 = torch.constant.int 1
    %int128_19325 = torch.constant.int 128
    %21732 = torch.prim.ListConstruct %int4_19322, %3095, %int16_19323, %int1_19324, %int128_19325 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21733 = torch.aten.view %21321, %21732 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21733, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19326 = torch.constant.int 4
    %int16_19327 = torch.constant.int 16
    %int1_19328 = torch.constant.int 1
    %int128_19329 = torch.constant.int 128
    %21734 = torch.prim.ListConstruct %int4_19326, %3095, %int16_19327, %int1_19328, %int128_19329 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21735 = torch.aten.view %21323, %21734 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21735, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19330 = torch.constant.int 4
    %int16_19331 = torch.constant.int 16
    %int1_19332 = torch.constant.int 1
    %int128_19333 = torch.constant.int 128
    %21736 = torch.prim.ListConstruct %int4_19330, %3095, %int16_19331, %int1_19332, %int128_19333 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21737 = torch.aten.view %21325, %21736 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %21737, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_19334 = torch.constant.int 4
    %21738 = torch.aten.mul.int %int4_19334, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19335 = torch.constant.int 16
    %int1_19336 = torch.constant.int 1
    %int128_19337 = torch.constant.int 128
    %21739 = torch.prim.ListConstruct %21738, %int16_19335, %int1_19336, %int128_19337 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21740 = torch.aten.view %21723, %21739 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21740, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19338 = torch.constant.int 4
    %21741 = torch.aten.mul.int %int4_19338, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19339 = torch.constant.int 16
    %int1_19340 = torch.constant.int 1
    %int128_19341 = torch.constant.int 128
    %21742 = torch.prim.ListConstruct %21741, %int16_19339, %int1_19340, %int128_19341 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21743 = torch.aten.view %21725, %21742 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21743, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19342 = torch.constant.int 4
    %21744 = torch.aten.mul.int %int4_19342, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19343 = torch.constant.int 16
    %int1_19344 = torch.constant.int 1
    %int128_19345 = torch.constant.int 128
    %21745 = torch.prim.ListConstruct %21744, %int16_19343, %int1_19344, %int128_19345 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21746 = torch.aten.view %21727, %21745 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21746, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19346 = torch.constant.int 4
    %21747 = torch.aten.mul.int %int4_19346, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19347 = torch.constant.int 16
    %int1_19348 = torch.constant.int 1
    %int128_19349 = torch.constant.int 128
    %21748 = torch.prim.ListConstruct %21747, %int16_19347, %int1_19348, %int128_19349 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21749 = torch.aten.view %21729, %21748 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21749, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19350 = torch.constant.int 4
    %21750 = torch.aten.mul.int %int4_19350, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19351 = torch.constant.int 16
    %int1_19352 = torch.constant.int 1
    %int128_19353 = torch.constant.int 128
    %21751 = torch.prim.ListConstruct %21750, %int16_19351, %int1_19352, %int128_19353 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21752 = torch.aten.view %21731, %21751 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21752, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19354 = torch.constant.int 4
    %21753 = torch.aten.mul.int %int4_19354, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19355 = torch.constant.int 16
    %int1_19356 = torch.constant.int 1
    %int128_19357 = torch.constant.int 128
    %21754 = torch.prim.ListConstruct %21753, %int16_19355, %int1_19356, %int128_19357 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21755 = torch.aten.view %21733, %21754 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21755, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19358 = torch.constant.int 4
    %21756 = torch.aten.mul.int %int4_19358, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19359 = torch.constant.int 16
    %int1_19360 = torch.constant.int 1
    %int128_19361 = torch.constant.int 128
    %21757 = torch.prim.ListConstruct %21756, %int16_19359, %int1_19360, %int128_19361 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21758 = torch.aten.view %21735, %21757 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21758, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_19362 = torch.constant.int 4
    %21759 = torch.aten.mul.int %int4_19362, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_19363 = torch.constant.int 16
    %int1_19364 = torch.constant.int 1
    %int128_19365 = torch.constant.int 128
    %21760 = torch.prim.ListConstruct %21759, %int16_19363, %int1_19364, %int128_19365 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21761 = torch.aten.view %21737, %21760 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21761, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_19366 = torch.constant.int 1
    %int1_19367 = torch.constant.int 1
    %21762 = torch.aten.add.Scalar %21650, %int1_19366, %int1_19367 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21762, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19368 = torch.constant.int 1
    %int1_19369 = torch.constant.int 1
    %21763 = torch.aten.add.Scalar %21651, %int1_19368, %int1_19369 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21763, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19370 = torch.constant.int 1
    %int1_19371 = torch.constant.int 1
    %21764 = torch.aten.add.Scalar %21652, %int1_19370, %int1_19371 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21764, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19372 = torch.constant.int 1
    %int1_19373 = torch.constant.int 1
    %21765 = torch.aten.add.Scalar %21653, %int1_19372, %int1_19373 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21765, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19374 = torch.constant.int 1
    %int1_19375 = torch.constant.int 1
    %21766 = torch.aten.add.Scalar %21654, %int1_19374, %int1_19375 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21766, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19376 = torch.constant.int 1
    %int1_19377 = torch.constant.int 1
    %21767 = torch.aten.add.Scalar %21655, %int1_19376, %int1_19377 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21767, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19378 = torch.constant.int 1
    %int1_19379 = torch.constant.int 1
    %21768 = torch.aten.add.Scalar %21656, %int1_19378, %int1_19379 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21768, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_19380 = torch.constant.int 1
    %int1_19381 = torch.constant.int 1
    %21769 = torch.aten.add.Scalar %21657, %int1_19380, %int1_19381 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %21769, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_19382 = torch.constant.int 4
    %21770 = torch.aten.mul.int %int4_19382, %3095 : !torch.int, !torch.int -> !torch.int
    %21771 = torch.prim.ListConstruct %21770 : (!torch.int) -> !torch.list<int>
    %21772 = torch.aten.view %21762, %21771 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21772, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19383 = torch.constant.int 4
    %21773 = torch.aten.mul.int %int4_19383, %3095 : !torch.int, !torch.int -> !torch.int
    %21774 = torch.prim.ListConstruct %21773 : (!torch.int) -> !torch.list<int>
    %21775 = torch.aten.view %21763, %21774 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21775, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19384 = torch.constant.int 4
    %21776 = torch.aten.mul.int %int4_19384, %3095 : !torch.int, !torch.int -> !torch.int
    %21777 = torch.prim.ListConstruct %21776 : (!torch.int) -> !torch.list<int>
    %21778 = torch.aten.view %21764, %21777 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21778, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19385 = torch.constant.int 4
    %21779 = torch.aten.mul.int %int4_19385, %3095 : !torch.int, !torch.int -> !torch.int
    %21780 = torch.prim.ListConstruct %21779 : (!torch.int) -> !torch.list<int>
    %21781 = torch.aten.view %21765, %21780 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21781, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19386 = torch.constant.int 4
    %21782 = torch.aten.mul.int %int4_19386, %3095 : !torch.int, !torch.int -> !torch.int
    %21783 = torch.prim.ListConstruct %21782 : (!torch.int) -> !torch.list<int>
    %21784 = torch.aten.view %21766, %21783 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21784, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19387 = torch.constant.int 4
    %21785 = torch.aten.mul.int %int4_19387, %3095 : !torch.int, !torch.int -> !torch.int
    %21786 = torch.prim.ListConstruct %21785 : (!torch.int) -> !torch.list<int>
    %21787 = torch.aten.view %21767, %21786 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21787, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19388 = torch.constant.int 4
    %21788 = torch.aten.mul.int %int4_19388, %3095 : !torch.int, !torch.int -> !torch.int
    %21789 = torch.prim.ListConstruct %21788 : (!torch.int) -> !torch.list<int>
    %21790 = torch.aten.view %21768, %21789 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21790, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_19389 = torch.constant.int 4
    %21791 = torch.aten.mul.int %int4_19389, %3095 : !torch.int, !torch.int -> !torch.int
    %21792 = torch.prim.ListConstruct %21791 : (!torch.int) -> !torch.list<int>
    %21793 = torch.aten.view %21769, %21792 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21793, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %21794 = torch.prim.ListConstruct %21700, %21772 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19390 = torch.constant.int 0
    %21795 = torch.aten.cat %21794, %int0_19390 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21795, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21796 = torch.prim.ListConstruct %21703, %21775 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19391 = torch.constant.int 0
    %21797 = torch.aten.cat %21796, %int0_19391 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21797, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21798 = torch.prim.ListConstruct %21706, %21778 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19392 = torch.constant.int 0
    %21799 = torch.aten.cat %21798, %int0_19392 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21799, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21800 = torch.prim.ListConstruct %21709, %21781 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19393 = torch.constant.int 0
    %21801 = torch.aten.cat %21800, %int0_19393 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21801, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21802 = torch.prim.ListConstruct %21712, %21784 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19394 = torch.constant.int 0
    %21803 = torch.aten.cat %21802, %int0_19394 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21803, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21804 = torch.prim.ListConstruct %21715, %21787 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19395 = torch.constant.int 0
    %21805 = torch.aten.cat %21804, %int0_19395 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21805, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21806 = torch.prim.ListConstruct %21718, %21790 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19396 = torch.constant.int 0
    %21807 = torch.aten.cat %21806, %int0_19396 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21807, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21808 = torch.prim.ListConstruct %21721, %21793 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_19397 = torch.constant.int 0
    %21809 = torch.aten.cat %21808, %int0_19397 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %21809, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %21810 = torch.prim.ListConstruct %21676, %21740 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19398 = torch.constant.int 0
    %21811 = torch.aten.cat %21810, %int0_19398 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21811, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21812 = torch.prim.ListConstruct %21679, %21743 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19399 = torch.constant.int 0
    %21813 = torch.aten.cat %21812, %int0_19399 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21813, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21814 = torch.prim.ListConstruct %21682, %21746 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19400 = torch.constant.int 0
    %21815 = torch.aten.cat %21814, %int0_19400 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21815, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21816 = torch.prim.ListConstruct %21685, %21749 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19401 = torch.constant.int 0
    %21817 = torch.aten.cat %21816, %int0_19401 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21817, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21818 = torch.prim.ListConstruct %21688, %21752 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19402 = torch.constant.int 0
    %21819 = torch.aten.cat %21818, %int0_19402 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21819, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21820 = torch.prim.ListConstruct %21691, %21755 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19403 = torch.constant.int 0
    %21821 = torch.aten.cat %21820, %int0_19403 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21821, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21822 = torch.prim.ListConstruct %21694, %21758 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19404 = torch.constant.int 0
    %21823 = torch.aten.cat %21822, %int0_19404 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21823, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21824 = torch.prim.ListConstruct %21697, %21761 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_19405 = torch.constant.int 0
    %21825 = torch.aten.cat %21824, %int0_19405 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21825, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19406 = torch.constant.int 32
    %int2_19407 = torch.constant.int 2
    %int16_19408 = torch.constant.int 16
    %int1_19409 = torch.constant.int 1
    %int128_19410 = torch.constant.int 128
    %21826 = torch.prim.ListConstruct %3023, %int32_19406, %int2_19407, %int16_19408, %int1_19409, %int128_19410 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21827 = torch.aten.view %19976, %21826 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21827, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19411 = torch.constant.int 32
    %21828 = torch.aten.mul.int %3023, %int32_19411 : !torch.int, !torch.int -> !torch.int
    %int2_19412 = torch.constant.int 2
    %21829 = torch.aten.mul.int %21828, %int2_19412 : !torch.int, !torch.int -> !torch.int
    %int16_19413 = torch.constant.int 16
    %int1_19414 = torch.constant.int 1
    %int128_19415 = torch.constant.int 128
    %21830 = torch.prim.ListConstruct %21829, %int16_19413, %int1_19414, %int128_19415 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21831 = torch.aten.view %21827, %21830 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21831, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21832 = torch.prim.ListConstruct %21795 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19416 = torch.constant.bool false
    %21833 = torch.aten.index_put %21831, %21832, %21811, %false_19416 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21833, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19417 = torch.constant.int 32
    %int2_19418 = torch.constant.int 2
    %int16_19419 = torch.constant.int 16
    %int1_19420 = torch.constant.int 1
    %int128_19421 = torch.constant.int 128
    %21834 = torch.prim.ListConstruct %3023, %int32_19417, %int2_19418, %int16_19419, %int1_19420, %int128_19421 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21835 = torch.aten.view %21833, %21834 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21835, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19422 = torch.constant.int 131072
    %21836 = torch.prim.ListConstruct %3023, %int131072_19422 : (!torch.int, !torch.int) -> !torch.list<int>
    %21837 = torch.aten.view %21835, %21836 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21837, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19423 = torch.constant.int 32
    %int2_19424 = torch.constant.int 2
    %int16_19425 = torch.constant.int 16
    %int1_19426 = torch.constant.int 1
    %int128_19427 = torch.constant.int 128
    %21838 = torch.prim.ListConstruct %3026, %int32_19423, %int2_19424, %int16_19425, %int1_19426, %int128_19427 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21839 = torch.aten.view %19988, %21838 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21839, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19428 = torch.constant.int 32
    %21840 = torch.aten.mul.int %3026, %int32_19428 : !torch.int, !torch.int -> !torch.int
    %int2_19429 = torch.constant.int 2
    %21841 = torch.aten.mul.int %21840, %int2_19429 : !torch.int, !torch.int -> !torch.int
    %int16_19430 = torch.constant.int 16
    %int1_19431 = torch.constant.int 1
    %int128_19432 = torch.constant.int 128
    %21842 = torch.prim.ListConstruct %21841, %int16_19430, %int1_19431, %int128_19432 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21843 = torch.aten.view %21839, %21842 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21843, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21844 = torch.prim.ListConstruct %21797 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19433 = torch.constant.bool false
    %21845 = torch.aten.index_put %21843, %21844, %21813, %false_19433 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21845, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19434 = torch.constant.int 32
    %int2_19435 = torch.constant.int 2
    %int16_19436 = torch.constant.int 16
    %int1_19437 = torch.constant.int 1
    %int128_19438 = torch.constant.int 128
    %21846 = torch.prim.ListConstruct %3026, %int32_19434, %int2_19435, %int16_19436, %int1_19437, %int128_19438 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21847 = torch.aten.view %21845, %21846 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21847, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19439 = torch.constant.int 131072
    %21848 = torch.prim.ListConstruct %3026, %int131072_19439 : (!torch.int, !torch.int) -> !torch.list<int>
    %21849 = torch.aten.view %21847, %21848 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21849, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19440 = torch.constant.int 32
    %int2_19441 = torch.constant.int 2
    %int16_19442 = torch.constant.int 16
    %int1_19443 = torch.constant.int 1
    %int128_19444 = torch.constant.int 128
    %21850 = torch.prim.ListConstruct %3029, %int32_19440, %int2_19441, %int16_19442, %int1_19443, %int128_19444 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21851 = torch.aten.view %20000, %21850 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21851, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19445 = torch.constant.int 32
    %21852 = torch.aten.mul.int %3029, %int32_19445 : !torch.int, !torch.int -> !torch.int
    %int2_19446 = torch.constant.int 2
    %21853 = torch.aten.mul.int %21852, %int2_19446 : !torch.int, !torch.int -> !torch.int
    %int16_19447 = torch.constant.int 16
    %int1_19448 = torch.constant.int 1
    %int128_19449 = torch.constant.int 128
    %21854 = torch.prim.ListConstruct %21853, %int16_19447, %int1_19448, %int128_19449 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21855 = torch.aten.view %21851, %21854 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21855, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21856 = torch.prim.ListConstruct %21799 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19450 = torch.constant.bool false
    %21857 = torch.aten.index_put %21855, %21856, %21815, %false_19450 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21857, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19451 = torch.constant.int 32
    %int2_19452 = torch.constant.int 2
    %int16_19453 = torch.constant.int 16
    %int1_19454 = torch.constant.int 1
    %int128_19455 = torch.constant.int 128
    %21858 = torch.prim.ListConstruct %3029, %int32_19451, %int2_19452, %int16_19453, %int1_19454, %int128_19455 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21859 = torch.aten.view %21857, %21858 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21859, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19456 = torch.constant.int 131072
    %21860 = torch.prim.ListConstruct %3029, %int131072_19456 : (!torch.int, !torch.int) -> !torch.list<int>
    %21861 = torch.aten.view %21859, %21860 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21861, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19457 = torch.constant.int 32
    %int2_19458 = torch.constant.int 2
    %int16_19459 = torch.constant.int 16
    %int1_19460 = torch.constant.int 1
    %int128_19461 = torch.constant.int 128
    %21862 = torch.prim.ListConstruct %3032, %int32_19457, %int2_19458, %int16_19459, %int1_19460, %int128_19461 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21863 = torch.aten.view %20012, %21862 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21863, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19462 = torch.constant.int 32
    %21864 = torch.aten.mul.int %3032, %int32_19462 : !torch.int, !torch.int -> !torch.int
    %int2_19463 = torch.constant.int 2
    %21865 = torch.aten.mul.int %21864, %int2_19463 : !torch.int, !torch.int -> !torch.int
    %int16_19464 = torch.constant.int 16
    %int1_19465 = torch.constant.int 1
    %int128_19466 = torch.constant.int 128
    %21866 = torch.prim.ListConstruct %21865, %int16_19464, %int1_19465, %int128_19466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21867 = torch.aten.view %21863, %21866 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21867, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21868 = torch.prim.ListConstruct %21801 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19467 = torch.constant.bool false
    %21869 = torch.aten.index_put %21867, %21868, %21817, %false_19467 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21869, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19468 = torch.constant.int 32
    %int2_19469 = torch.constant.int 2
    %int16_19470 = torch.constant.int 16
    %int1_19471 = torch.constant.int 1
    %int128_19472 = torch.constant.int 128
    %21870 = torch.prim.ListConstruct %3032, %int32_19468, %int2_19469, %int16_19470, %int1_19471, %int128_19472 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21871 = torch.aten.view %21869, %21870 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21871, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19473 = torch.constant.int 131072
    %21872 = torch.prim.ListConstruct %3032, %int131072_19473 : (!torch.int, !torch.int) -> !torch.list<int>
    %21873 = torch.aten.view %21871, %21872 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21873, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19474 = torch.constant.int 32
    %int2_19475 = torch.constant.int 2
    %int16_19476 = torch.constant.int 16
    %int1_19477 = torch.constant.int 1
    %int128_19478 = torch.constant.int 128
    %21874 = torch.prim.ListConstruct %3035, %int32_19474, %int2_19475, %int16_19476, %int1_19477, %int128_19478 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21875 = torch.aten.view %20024, %21874 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21875, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19479 = torch.constant.int 32
    %21876 = torch.aten.mul.int %3035, %int32_19479 : !torch.int, !torch.int -> !torch.int
    %int2_19480 = torch.constant.int 2
    %21877 = torch.aten.mul.int %21876, %int2_19480 : !torch.int, !torch.int -> !torch.int
    %int16_19481 = torch.constant.int 16
    %int1_19482 = torch.constant.int 1
    %int128_19483 = torch.constant.int 128
    %21878 = torch.prim.ListConstruct %21877, %int16_19481, %int1_19482, %int128_19483 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21879 = torch.aten.view %21875, %21878 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21879, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21880 = torch.prim.ListConstruct %21803 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19484 = torch.constant.bool false
    %21881 = torch.aten.index_put %21879, %21880, %21819, %false_19484 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21881, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19485 = torch.constant.int 32
    %int2_19486 = torch.constant.int 2
    %int16_19487 = torch.constant.int 16
    %int1_19488 = torch.constant.int 1
    %int128_19489 = torch.constant.int 128
    %21882 = torch.prim.ListConstruct %3035, %int32_19485, %int2_19486, %int16_19487, %int1_19488, %int128_19489 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21883 = torch.aten.view %21881, %21882 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21883, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19490 = torch.constant.int 131072
    %21884 = torch.prim.ListConstruct %3035, %int131072_19490 : (!torch.int, !torch.int) -> !torch.list<int>
    %21885 = torch.aten.view %21883, %21884 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21885, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19491 = torch.constant.int 32
    %int2_19492 = torch.constant.int 2
    %int16_19493 = torch.constant.int 16
    %int1_19494 = torch.constant.int 1
    %int128_19495 = torch.constant.int 128
    %21886 = torch.prim.ListConstruct %3038, %int32_19491, %int2_19492, %int16_19493, %int1_19494, %int128_19495 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21887 = torch.aten.view %20036, %21886 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21887, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19496 = torch.constant.int 32
    %21888 = torch.aten.mul.int %3038, %int32_19496 : !torch.int, !torch.int -> !torch.int
    %int2_19497 = torch.constant.int 2
    %21889 = torch.aten.mul.int %21888, %int2_19497 : !torch.int, !torch.int -> !torch.int
    %int16_19498 = torch.constant.int 16
    %int1_19499 = torch.constant.int 1
    %int128_19500 = torch.constant.int 128
    %21890 = torch.prim.ListConstruct %21889, %int16_19498, %int1_19499, %int128_19500 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21891 = torch.aten.view %21887, %21890 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21891, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21892 = torch.prim.ListConstruct %21805 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19501 = torch.constant.bool false
    %21893 = torch.aten.index_put %21891, %21892, %21821, %false_19501 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21893, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19502 = torch.constant.int 32
    %int2_19503 = torch.constant.int 2
    %int16_19504 = torch.constant.int 16
    %int1_19505 = torch.constant.int 1
    %int128_19506 = torch.constant.int 128
    %21894 = torch.prim.ListConstruct %3038, %int32_19502, %int2_19503, %int16_19504, %int1_19505, %int128_19506 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21895 = torch.aten.view %21893, %21894 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21895, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19507 = torch.constant.int 131072
    %21896 = torch.prim.ListConstruct %3038, %int131072_19507 : (!torch.int, !torch.int) -> !torch.list<int>
    %21897 = torch.aten.view %21895, %21896 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21897, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19508 = torch.constant.int 32
    %int2_19509 = torch.constant.int 2
    %int16_19510 = torch.constant.int 16
    %int1_19511 = torch.constant.int 1
    %int128_19512 = torch.constant.int 128
    %21898 = torch.prim.ListConstruct %3041, %int32_19508, %int2_19509, %int16_19510, %int1_19511, %int128_19512 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21899 = torch.aten.view %20048, %21898 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21899, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19513 = torch.constant.int 32
    %21900 = torch.aten.mul.int %3041, %int32_19513 : !torch.int, !torch.int -> !torch.int
    %int2_19514 = torch.constant.int 2
    %21901 = torch.aten.mul.int %21900, %int2_19514 : !torch.int, !torch.int -> !torch.int
    %int16_19515 = torch.constant.int 16
    %int1_19516 = torch.constant.int 1
    %int128_19517 = torch.constant.int 128
    %21902 = torch.prim.ListConstruct %21901, %int16_19515, %int1_19516, %int128_19517 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21903 = torch.aten.view %21899, %21902 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21903, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21904 = torch.prim.ListConstruct %21807 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19518 = torch.constant.bool false
    %21905 = torch.aten.index_put %21903, %21904, %21823, %false_19518 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21905, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19519 = torch.constant.int 32
    %int2_19520 = torch.constant.int 2
    %int16_19521 = torch.constant.int 16
    %int1_19522 = torch.constant.int 1
    %int128_19523 = torch.constant.int 128
    %21906 = torch.prim.ListConstruct %3041, %int32_19519, %int2_19520, %int16_19521, %int1_19522, %int128_19523 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21907 = torch.aten.view %21905, %21906 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21907, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19524 = torch.constant.int 131072
    %21908 = torch.prim.ListConstruct %3041, %int131072_19524 : (!torch.int, !torch.int) -> !torch.list<int>
    %21909 = torch.aten.view %21907, %21908 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21909, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_19525 = torch.constant.int 32
    %int2_19526 = torch.constant.int 2
    %int16_19527 = torch.constant.int 16
    %int1_19528 = torch.constant.int 1
    %int128_19529 = torch.constant.int 128
    %21910 = torch.prim.ListConstruct %3044, %int32_19525, %int2_19526, %int16_19527, %int1_19528, %int128_19529 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21911 = torch.aten.view %20060, %21910 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21911, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_19530 = torch.constant.int 32
    %21912 = torch.aten.mul.int %3044, %int32_19530 : !torch.int, !torch.int -> !torch.int
    %int2_19531 = torch.constant.int 2
    %21913 = torch.aten.mul.int %21912, %int2_19531 : !torch.int, !torch.int -> !torch.int
    %int16_19532 = torch.constant.int 16
    %int1_19533 = torch.constant.int 1
    %int128_19534 = torch.constant.int 128
    %21914 = torch.prim.ListConstruct %21913, %int16_19532, %int1_19533, %int128_19534 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21915 = torch.aten.view %21911, %21914 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21915, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %21916 = torch.prim.ListConstruct %21809 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_19535 = torch.constant.bool false
    %21917 = torch.aten.index_put %21915, %21916, %21825, %false_19535 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %21917, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_19536 = torch.constant.int 32
    %int2_19537 = torch.constant.int 2
    %int16_19538 = torch.constant.int 16
    %int1_19539 = torch.constant.int 1
    %int128_19540 = torch.constant.int 128
    %21918 = torch.prim.ListConstruct %3044, %int32_19536, %int2_19537, %int16_19538, %int1_19539, %int128_19540 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21919 = torch.aten.view %21917, %21918 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %21919, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_19541 = torch.constant.int 131072
    %21920 = torch.prim.ListConstruct %3044, %int131072_19541 : (!torch.int, !torch.int) -> !torch.list<int>
    %21921 = torch.aten.view %21919, %21920 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %21921, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_19542 = torch.constant.int -2
    %21922 = torch.aten.unsqueeze %21536, %int-2_19542 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19543 = torch.constant.int -2
    %21923 = torch.aten.unsqueeze %21551, %int-2_19543 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19544 = torch.constant.int -2
    %21924 = torch.aten.unsqueeze %21566, %int-2_19544 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19545 = torch.constant.int -2
    %21925 = torch.aten.unsqueeze %21581, %int-2_19545 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19546 = torch.constant.int -2
    %21926 = torch.aten.unsqueeze %21596, %int-2_19546 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19547 = torch.constant.int -2
    %21927 = torch.aten.unsqueeze %21611, %int-2_19547 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19548 = torch.constant.int -2
    %21928 = torch.aten.unsqueeze %21626, %int-2_19548 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19549 = torch.constant.int -2
    %21929 = torch.aten.unsqueeze %21641, %int-2_19549 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_19550 = torch.constant.int 4
    %int1_19551 = torch.constant.int 1
    %int4_19552 = torch.constant.int 4
    %int128_19553 = torch.constant.int 128
    %21930 = torch.prim.ListConstruct %int4_19550, %21522, %int1_19551, %int4_19552, %int128_19553 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19554 = torch.constant.bool false
    %21931 = torch.aten.expand %21922, %21930, %false_19554 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19555 = torch.constant.int 4
    %int1_19556 = torch.constant.int 1
    %int4_19557 = torch.constant.int 4
    %int128_19558 = torch.constant.int 128
    %21932 = torch.prim.ListConstruct %int4_19555, %21522, %int1_19556, %int4_19557, %int128_19558 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19559 = torch.constant.bool false
    %21933 = torch.aten.expand %21923, %21932, %false_19559 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19560 = torch.constant.int 4
    %int1_19561 = torch.constant.int 1
    %int4_19562 = torch.constant.int 4
    %int128_19563 = torch.constant.int 128
    %21934 = torch.prim.ListConstruct %int4_19560, %21522, %int1_19561, %int4_19562, %int128_19563 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19564 = torch.constant.bool false
    %21935 = torch.aten.expand %21924, %21934, %false_19564 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19565 = torch.constant.int 4
    %int1_19566 = torch.constant.int 1
    %int4_19567 = torch.constant.int 4
    %int128_19568 = torch.constant.int 128
    %21936 = torch.prim.ListConstruct %int4_19565, %21522, %int1_19566, %int4_19567, %int128_19568 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19569 = torch.constant.bool false
    %21937 = torch.aten.expand %21925, %21936, %false_19569 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19570 = torch.constant.int 4
    %int1_19571 = torch.constant.int 1
    %int4_19572 = torch.constant.int 4
    %int128_19573 = torch.constant.int 128
    %21938 = torch.prim.ListConstruct %int4_19570, %21522, %int1_19571, %int4_19572, %int128_19573 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19574 = torch.constant.bool false
    %21939 = torch.aten.expand %21926, %21938, %false_19574 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19575 = torch.constant.int 4
    %int1_19576 = torch.constant.int 1
    %int4_19577 = torch.constant.int 4
    %int128_19578 = torch.constant.int 128
    %21940 = torch.prim.ListConstruct %int4_19575, %21522, %int1_19576, %int4_19577, %int128_19578 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19579 = torch.constant.bool false
    %21941 = torch.aten.expand %21927, %21940, %false_19579 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19580 = torch.constant.int 4
    %int1_19581 = torch.constant.int 1
    %int4_19582 = torch.constant.int 4
    %int128_19583 = torch.constant.int 128
    %21942 = torch.prim.ListConstruct %int4_19580, %21522, %int1_19581, %int4_19582, %int128_19583 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19584 = torch.constant.bool false
    %21943 = torch.aten.expand %21928, %21942, %false_19584 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19585 = torch.constant.int 4
    %int1_19586 = torch.constant.int 1
    %int4_19587 = torch.constant.int 4
    %int128_19588 = torch.constant.int 128
    %21944 = torch.prim.ListConstruct %int4_19585, %21522, %int1_19586, %int4_19587, %int128_19588 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19589 = torch.constant.bool false
    %21945 = torch.aten.expand %21929, %21944, %false_19589 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19590 = torch.constant.int 4
    %int4_19591 = torch.constant.int 4
    %int128_19592 = torch.constant.int 128
    %21946 = torch.prim.ListConstruct %int4_19590, %21522, %int4_19591, %int128_19592 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21947 = torch.aten.view %21931, %21946 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19593 = torch.constant.int 4
    %int4_19594 = torch.constant.int 4
    %int128_19595 = torch.constant.int 128
    %21948 = torch.prim.ListConstruct %int4_19593, %21522, %int4_19594, %int128_19595 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21949 = torch.aten.view %21933, %21948 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19596 = torch.constant.int 4
    %int4_19597 = torch.constant.int 4
    %int128_19598 = torch.constant.int 128
    %21950 = torch.prim.ListConstruct %int4_19596, %21522, %int4_19597, %int128_19598 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21951 = torch.aten.view %21935, %21950 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19599 = torch.constant.int 4
    %int4_19600 = torch.constant.int 4
    %int128_19601 = torch.constant.int 128
    %21952 = torch.prim.ListConstruct %int4_19599, %21522, %int4_19600, %int128_19601 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21953 = torch.aten.view %21937, %21952 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19602 = torch.constant.int 4
    %int4_19603 = torch.constant.int 4
    %int128_19604 = torch.constant.int 128
    %21954 = torch.prim.ListConstruct %int4_19602, %21522, %int4_19603, %int128_19604 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21955 = torch.aten.view %21939, %21954 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19605 = torch.constant.int 4
    %int4_19606 = torch.constant.int 4
    %int128_19607 = torch.constant.int 128
    %21956 = torch.prim.ListConstruct %int4_19605, %21522, %int4_19606, %int128_19607 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21957 = torch.aten.view %21941, %21956 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19608 = torch.constant.int 4
    %int4_19609 = torch.constant.int 4
    %int128_19610 = torch.constant.int 128
    %21958 = torch.prim.ListConstruct %int4_19608, %21522, %int4_19609, %int128_19610 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21959 = torch.aten.view %21943, %21958 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19611 = torch.constant.int 4
    %int4_19612 = torch.constant.int 4
    %int128_19613 = torch.constant.int 128
    %21960 = torch.prim.ListConstruct %int4_19611, %21522, %int4_19612, %int128_19613 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21961 = torch.aten.view %21945, %21960 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_19614 = torch.constant.int -2
    %21962 = torch.aten.unsqueeze %21311, %int-2_19614 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19615 = torch.constant.int -2
    %21963 = torch.aten.unsqueeze %21313, %int-2_19615 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19616 = torch.constant.int -2
    %21964 = torch.aten.unsqueeze %21315, %int-2_19616 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19617 = torch.constant.int -2
    %21965 = torch.aten.unsqueeze %21317, %int-2_19617 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19618 = torch.constant.int -2
    %21966 = torch.aten.unsqueeze %21319, %int-2_19618 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19619 = torch.constant.int -2
    %21967 = torch.aten.unsqueeze %21321, %int-2_19619 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19620 = torch.constant.int -2
    %21968 = torch.aten.unsqueeze %21323, %int-2_19620 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_19621 = torch.constant.int -2
    %21969 = torch.aten.unsqueeze %21325, %int-2_19621 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %21969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_19622 = torch.constant.int 1
    %21970 = torch.aten.size.int %21235, %int1_19622 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_19623 = torch.constant.int 4
    %int1_19624 = torch.constant.int 1
    %int4_19625 = torch.constant.int 4
    %int128_19626 = torch.constant.int 128
    %21971 = torch.prim.ListConstruct %int4_19623, %21970, %int1_19624, %int4_19625, %int128_19626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19627 = torch.constant.bool false
    %21972 = torch.aten.expand %21962, %21971, %false_19627 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19628 = torch.constant.int 4
    %int1_19629 = torch.constant.int 1
    %int4_19630 = torch.constant.int 4
    %int128_19631 = torch.constant.int 128
    %21973 = torch.prim.ListConstruct %int4_19628, %21970, %int1_19629, %int4_19630, %int128_19631 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19632 = torch.constant.bool false
    %21974 = torch.aten.expand %21963, %21973, %false_19632 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19633 = torch.constant.int 4
    %int1_19634 = torch.constant.int 1
    %int4_19635 = torch.constant.int 4
    %int128_19636 = torch.constant.int 128
    %21975 = torch.prim.ListConstruct %int4_19633, %21970, %int1_19634, %int4_19635, %int128_19636 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19637 = torch.constant.bool false
    %21976 = torch.aten.expand %21964, %21975, %false_19637 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19638 = torch.constant.int 4
    %int1_19639 = torch.constant.int 1
    %int4_19640 = torch.constant.int 4
    %int128_19641 = torch.constant.int 128
    %21977 = torch.prim.ListConstruct %int4_19638, %21970, %int1_19639, %int4_19640, %int128_19641 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19642 = torch.constant.bool false
    %21978 = torch.aten.expand %21965, %21977, %false_19642 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19643 = torch.constant.int 4
    %int1_19644 = torch.constant.int 1
    %int4_19645 = torch.constant.int 4
    %int128_19646 = torch.constant.int 128
    %21979 = torch.prim.ListConstruct %int4_19643, %21970, %int1_19644, %int4_19645, %int128_19646 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19647 = torch.constant.bool false
    %21980 = torch.aten.expand %21966, %21979, %false_19647 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19648 = torch.constant.int 4
    %int1_19649 = torch.constant.int 1
    %int4_19650 = torch.constant.int 4
    %int128_19651 = torch.constant.int 128
    %21981 = torch.prim.ListConstruct %int4_19648, %21970, %int1_19649, %int4_19650, %int128_19651 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19652 = torch.constant.bool false
    %21982 = torch.aten.expand %21967, %21981, %false_19652 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19653 = torch.constant.int 4
    %int1_19654 = torch.constant.int 1
    %int4_19655 = torch.constant.int 4
    %int128_19656 = torch.constant.int 128
    %21983 = torch.prim.ListConstruct %int4_19653, %21970, %int1_19654, %int4_19655, %int128_19656 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19657 = torch.constant.bool false
    %21984 = torch.aten.expand %21968, %21983, %false_19657 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19658 = torch.constant.int 4
    %int1_19659 = torch.constant.int 1
    %int4_19660 = torch.constant.int 4
    %int128_19661 = torch.constant.int 128
    %21985 = torch.prim.ListConstruct %int4_19658, %21970, %int1_19659, %int4_19660, %int128_19661 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_19662 = torch.constant.bool false
    %21986 = torch.aten.expand %21969, %21985, %false_19662 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %21986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_19663 = torch.constant.int 4
    %int4_19664 = torch.constant.int 4
    %int128_19665 = torch.constant.int 128
    %21987 = torch.prim.ListConstruct %int4_19663, %21970, %int4_19664, %int128_19665 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21988 = torch.aten.view %21972, %21987 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19666 = torch.constant.int 4
    %int4_19667 = torch.constant.int 4
    %int128_19668 = torch.constant.int 128
    %21989 = torch.prim.ListConstruct %int4_19666, %21970, %int4_19667, %int128_19668 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21990 = torch.aten.view %21974, %21989 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19669 = torch.constant.int 4
    %int4_19670 = torch.constant.int 4
    %int128_19671 = torch.constant.int 128
    %21991 = torch.prim.ListConstruct %int4_19669, %21970, %int4_19670, %int128_19671 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21992 = torch.aten.view %21976, %21991 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19672 = torch.constant.int 4
    %int4_19673 = torch.constant.int 4
    %int128_19674 = torch.constant.int 128
    %21993 = torch.prim.ListConstruct %int4_19672, %21970, %int4_19673, %int128_19674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21994 = torch.aten.view %21978, %21993 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19675 = torch.constant.int 4
    %int4_19676 = torch.constant.int 4
    %int128_19677 = torch.constant.int 128
    %21995 = torch.prim.ListConstruct %int4_19675, %21970, %int4_19676, %int128_19677 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21996 = torch.aten.view %21980, %21995 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19678 = torch.constant.int 4
    %int4_19679 = torch.constant.int 4
    %int128_19680 = torch.constant.int 128
    %21997 = torch.prim.ListConstruct %int4_19678, %21970, %int4_19679, %int128_19680 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %21998 = torch.aten.view %21982, %21997 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %21998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19681 = torch.constant.int 4
    %int4_19682 = torch.constant.int 4
    %int128_19683 = torch.constant.int 128
    %21999 = torch.prim.ListConstruct %int4_19681, %21970, %int4_19682, %int128_19683 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22000 = torch.aten.view %21984, %21999 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19684 = torch.constant.int 4
    %int4_19685 = torch.constant.int 4
    %int128_19686 = torch.constant.int 128
    %22001 = torch.prim.ListConstruct %int4_19684, %21970, %int4_19685, %int128_19686 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22002 = torch.aten.view %21986, %22001 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19687 = torch.constant.int 1
    %int2_19688 = torch.constant.int 2
    %22003 = torch.aten.transpose.int %21378, %int1_19687, %int2_19688 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22003, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19689 = torch.constant.int 1
    %int2_19690 = torch.constant.int 2
    %22004 = torch.aten.transpose.int %21393, %int1_19689, %int2_19690 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22004, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19691 = torch.constant.int 1
    %int2_19692 = torch.constant.int 2
    %22005 = torch.aten.transpose.int %21408, %int1_19691, %int2_19692 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22005, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19693 = torch.constant.int 1
    %int2_19694 = torch.constant.int 2
    %22006 = torch.aten.transpose.int %21423, %int1_19693, %int2_19694 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22006, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19695 = torch.constant.int 1
    %int2_19696 = torch.constant.int 2
    %22007 = torch.aten.transpose.int %21438, %int1_19695, %int2_19696 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22007, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19697 = torch.constant.int 1
    %int2_19698 = torch.constant.int 2
    %22008 = torch.aten.transpose.int %21453, %int1_19697, %int2_19698 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22008, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19699 = torch.constant.int 1
    %int2_19700 = torch.constant.int 2
    %22009 = torch.aten.transpose.int %21468, %int1_19699, %int2_19700 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22009, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19701 = torch.constant.int 1
    %int2_19702 = torch.constant.int 2
    %22010 = torch.aten.transpose.int %21483, %int1_19701, %int2_19702 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22010, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19703 = torch.constant.int 1
    %int2_19704 = torch.constant.int 2
    %22011 = torch.aten.transpose.int %21947, %int1_19703, %int2_19704 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22011, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19705 = torch.constant.int 1
    %int2_19706 = torch.constant.int 2
    %22012 = torch.aten.transpose.int %21949, %int1_19705, %int2_19706 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22012, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19707 = torch.constant.int 1
    %int2_19708 = torch.constant.int 2
    %22013 = torch.aten.transpose.int %21951, %int1_19707, %int2_19708 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22013, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19709 = torch.constant.int 1
    %int2_19710 = torch.constant.int 2
    %22014 = torch.aten.transpose.int %21953, %int1_19709, %int2_19710 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22014, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19711 = torch.constant.int 1
    %int2_19712 = torch.constant.int 2
    %22015 = torch.aten.transpose.int %21955, %int1_19711, %int2_19712 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22015, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19713 = torch.constant.int 1
    %int2_19714 = torch.constant.int 2
    %22016 = torch.aten.transpose.int %21957, %int1_19713, %int2_19714 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22016, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19715 = torch.constant.int 1
    %int2_19716 = torch.constant.int 2
    %22017 = torch.aten.transpose.int %21959, %int1_19715, %int2_19716 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22017, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19717 = torch.constant.int 1
    %int2_19718 = torch.constant.int 2
    %22018 = torch.aten.transpose.int %21961, %int1_19717, %int2_19718 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22018, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19719 = torch.constant.int 1
    %int2_19720 = torch.constant.int 2
    %22019 = torch.aten.transpose.int %21988, %int1_19719, %int2_19720 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22019, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19721 = torch.constant.int 1
    %int2_19722 = torch.constant.int 2
    %22020 = torch.aten.transpose.int %21990, %int1_19721, %int2_19722 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22020, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19723 = torch.constant.int 1
    %int2_19724 = torch.constant.int 2
    %22021 = torch.aten.transpose.int %21992, %int1_19723, %int2_19724 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22021, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19725 = torch.constant.int 1
    %int2_19726 = torch.constant.int 2
    %22022 = torch.aten.transpose.int %21994, %int1_19725, %int2_19726 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22022, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19727 = torch.constant.int 1
    %int2_19728 = torch.constant.int 2
    %22023 = torch.aten.transpose.int %21996, %int1_19727, %int2_19728 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22023, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19729 = torch.constant.int 1
    %int2_19730 = torch.constant.int 2
    %22024 = torch.aten.transpose.int %21998, %int1_19729, %int2_19730 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22024, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19731 = torch.constant.int 1
    %int2_19732 = torch.constant.int 2
    %22025 = torch.aten.transpose.int %22000, %int1_19731, %int2_19732 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22025, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19733 = torch.constant.int 1
    %int2_19734 = torch.constant.int 2
    %22026 = torch.aten.transpose.int %22002, %int1_19733, %int2_19734 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %22026, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19735 = torch.constant.float 0.000000e+00
    %true_19736 = torch.constant.bool true
    %none_19737 = torch.constant.none
    %none_19738 = torch.constant.none
    %22027:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22003, %22011, %22019, %float0.000000e00_19735, %true_19736, %none_19737, %none_19738) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22027#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19739 = torch.constant.float 0.000000e+00
    %true_19740 = torch.constant.bool true
    %none_19741 = torch.constant.none
    %none_19742 = torch.constant.none
    %22028:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22004, %22012, %22020, %float0.000000e00_19739, %true_19740, %none_19741, %none_19742) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22028#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19743 = torch.constant.float 0.000000e+00
    %true_19744 = torch.constant.bool true
    %none_19745 = torch.constant.none
    %none_19746 = torch.constant.none
    %22029:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22005, %22013, %22021, %float0.000000e00_19743, %true_19744, %none_19745, %none_19746) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22029#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19747 = torch.constant.float 0.000000e+00
    %true_19748 = torch.constant.bool true
    %none_19749 = torch.constant.none
    %none_19750 = torch.constant.none
    %22030:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22006, %22014, %22022, %float0.000000e00_19747, %true_19748, %none_19749, %none_19750) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22030#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19751 = torch.constant.float 0.000000e+00
    %true_19752 = torch.constant.bool true
    %none_19753 = torch.constant.none
    %none_19754 = torch.constant.none
    %22031:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22007, %22015, %22023, %float0.000000e00_19751, %true_19752, %none_19753, %none_19754) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22031#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19755 = torch.constant.float 0.000000e+00
    %true_19756 = torch.constant.bool true
    %none_19757 = torch.constant.none
    %none_19758 = torch.constant.none
    %22032:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22008, %22016, %22024, %float0.000000e00_19755, %true_19756, %none_19757, %none_19758) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22032#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19759 = torch.constant.float 0.000000e+00
    %true_19760 = torch.constant.bool true
    %none_19761 = torch.constant.none
    %none_19762 = torch.constant.none
    %22033:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22009, %22017, %22025, %float0.000000e00_19759, %true_19760, %none_19761, %none_19762) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22033#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_19763 = torch.constant.float 0.000000e+00
    %true_19764 = torch.constant.bool true
    %none_19765 = torch.constant.none
    %none_19766 = torch.constant.none
    %22034:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%22010, %22018, %22026, %float0.000000e00_19763, %true_19764, %none_19765, %none_19766) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %22034#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_19767 = torch.constant.int 1
    %int2_19768 = torch.constant.int 2
    %22035 = torch.aten.transpose.int %22027#0, %int1_19767, %int2_19768 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19769 = torch.constant.int 1
    %int2_19770 = torch.constant.int 2
    %22036 = torch.aten.transpose.int %22028#0, %int1_19769, %int2_19770 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19771 = torch.constant.int 1
    %int2_19772 = torch.constant.int 2
    %22037 = torch.aten.transpose.int %22029#0, %int1_19771, %int2_19772 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19773 = torch.constant.int 1
    %int2_19774 = torch.constant.int 2
    %22038 = torch.aten.transpose.int %22030#0, %int1_19773, %int2_19774 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19775 = torch.constant.int 1
    %int2_19776 = torch.constant.int 2
    %22039 = torch.aten.transpose.int %22031#0, %int1_19775, %int2_19776 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19777 = torch.constant.int 1
    %int2_19778 = torch.constant.int 2
    %22040 = torch.aten.transpose.int %22032#0, %int1_19777, %int2_19778 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19779 = torch.constant.int 1
    %int2_19780 = torch.constant.int 2
    %22041 = torch.aten.transpose.int %22033#0, %int1_19779, %int2_19780 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_19781 = torch.constant.int 1
    %int2_19782 = torch.constant.int 2
    %22042 = torch.aten.transpose.int %22034#0, %int1_19781, %int2_19782 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %22042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_19783 = torch.constant.int 4
    %int512_19784 = torch.constant.int 512
    %22043 = torch.prim.ListConstruct %int4_19783, %21364, %int512_19784 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22044 = torch.aten.view %22035, %22043 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19785 = torch.constant.int 4
    %int512_19786 = torch.constant.int 512
    %22045 = torch.prim.ListConstruct %int4_19785, %21379, %int512_19786 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22046 = torch.aten.view %22036, %22045 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19787 = torch.constant.int 4
    %int512_19788 = torch.constant.int 512
    %22047 = torch.prim.ListConstruct %int4_19787, %21394, %int512_19788 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22048 = torch.aten.view %22037, %22047 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19789 = torch.constant.int 4
    %int512_19790 = torch.constant.int 512
    %22049 = torch.prim.ListConstruct %int4_19789, %21409, %int512_19790 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22050 = torch.aten.view %22038, %22049 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19791 = torch.constant.int 4
    %int512_19792 = torch.constant.int 512
    %22051 = torch.prim.ListConstruct %int4_19791, %21424, %int512_19792 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22052 = torch.aten.view %22039, %22051 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19793 = torch.constant.int 4
    %int512_19794 = torch.constant.int 512
    %22053 = torch.prim.ListConstruct %int4_19793, %21439, %int512_19794 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22054 = torch.aten.view %22040, %22053 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19795 = torch.constant.int 4
    %int512_19796 = torch.constant.int 512
    %22055 = torch.prim.ListConstruct %int4_19795, %21454, %int512_19796 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22056 = torch.aten.view %22041, %22055 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_19797 = torch.constant.int 4
    %int512_19798 = torch.constant.int 512
    %22057 = torch.prim.ListConstruct %int4_19797, %21469, %int512_19798 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22058 = torch.aten.view %22042, %22057 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_19799 = torch.constant.int 1
    %int0_19800 = torch.constant.int 0
    %22059 = torch.prim.ListConstruct %int1_19799, %int0_19800 : (!torch.int, !torch.int) -> !torch.list<int>
    %22060 = torch.aten.permute %760, %22059 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19801 = torch.constant.int 1
    %int0_19802 = torch.constant.int 0
    %22061 = torch.prim.ListConstruct %int1_19801, %int0_19802 : (!torch.int, !torch.int) -> !torch.list<int>
    %22062 = torch.aten.permute %761, %22061 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19803 = torch.constant.int 1
    %int0_19804 = torch.constant.int 0
    %22063 = torch.prim.ListConstruct %int1_19803, %int0_19804 : (!torch.int, !torch.int) -> !torch.list<int>
    %22064 = torch.aten.permute %762, %22063 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19805 = torch.constant.int 1
    %int0_19806 = torch.constant.int 0
    %22065 = torch.prim.ListConstruct %int1_19805, %int0_19806 : (!torch.int, !torch.int) -> !torch.list<int>
    %22066 = torch.aten.permute %763, %22065 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19807 = torch.constant.int 1
    %int0_19808 = torch.constant.int 0
    %22067 = torch.prim.ListConstruct %int1_19807, %int0_19808 : (!torch.int, !torch.int) -> !torch.list<int>
    %22068 = torch.aten.permute %764, %22067 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19809 = torch.constant.int 1
    %int0_19810 = torch.constant.int 0
    %22069 = torch.prim.ListConstruct %int1_19809, %int0_19810 : (!torch.int, !torch.int) -> !torch.list<int>
    %22070 = torch.aten.permute %765, %22069 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19811 = torch.constant.int 1
    %int0_19812 = torch.constant.int 0
    %22071 = torch.prim.ListConstruct %int1_19811, %int0_19812 : (!torch.int, !torch.int) -> !torch.list<int>
    %22072 = torch.aten.permute %766, %22071 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_19813 = torch.constant.int 1
    %int0_19814 = torch.constant.int 0
    %22073 = torch.prim.ListConstruct %int1_19813, %int0_19814 : (!torch.int, !torch.int) -> !torch.list<int>
    %22074 = torch.aten.permute %767, %22073 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_19815 = torch.constant.int 4
    %22075 = torch.aten.mul.int %int4_19815, %21364 : !torch.int, !torch.int -> !torch.int
    %int512_19816 = torch.constant.int 512
    %22076 = torch.prim.ListConstruct %22075, %int512_19816 : (!torch.int, !torch.int) -> !torch.list<int>
    %22077 = torch.aten.view %22044, %22076 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22077, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22078 = torch.aten.mm %22077, %22060 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22078, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19817 = torch.constant.int 4
    %int4096_19818 = torch.constant.int 4096
    %22079 = torch.prim.ListConstruct %int4_19817, %21364, %int4096_19818 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22080 = torch.aten.view %22078, %22079 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19819 = torch.constant.int 4
    %22081 = torch.aten.mul.int %int4_19819, %21379 : !torch.int, !torch.int -> !torch.int
    %int512_19820 = torch.constant.int 512
    %22082 = torch.prim.ListConstruct %22081, %int512_19820 : (!torch.int, !torch.int) -> !torch.list<int>
    %22083 = torch.aten.view %22046, %22082 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22083, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22084 = torch.aten.mm %22083, %22062 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22084, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19821 = torch.constant.int 4
    %int4096_19822 = torch.constant.int 4096
    %22085 = torch.prim.ListConstruct %int4_19821, %21379, %int4096_19822 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22086 = torch.aten.view %22084, %22085 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19823 = torch.constant.int 4
    %22087 = torch.aten.mul.int %int4_19823, %21394 : !torch.int, !torch.int -> !torch.int
    %int512_19824 = torch.constant.int 512
    %22088 = torch.prim.ListConstruct %22087, %int512_19824 : (!torch.int, !torch.int) -> !torch.list<int>
    %22089 = torch.aten.view %22048, %22088 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22089, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22090 = torch.aten.mm %22089, %22064 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22090, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19825 = torch.constant.int 4
    %int4096_19826 = torch.constant.int 4096
    %22091 = torch.prim.ListConstruct %int4_19825, %21394, %int4096_19826 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22092 = torch.aten.view %22090, %22091 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19827 = torch.constant.int 4
    %22093 = torch.aten.mul.int %int4_19827, %21409 : !torch.int, !torch.int -> !torch.int
    %int512_19828 = torch.constant.int 512
    %22094 = torch.prim.ListConstruct %22093, %int512_19828 : (!torch.int, !torch.int) -> !torch.list<int>
    %22095 = torch.aten.view %22050, %22094 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22095, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22096 = torch.aten.mm %22095, %22066 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22096, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19829 = torch.constant.int 4
    %int4096_19830 = torch.constant.int 4096
    %22097 = torch.prim.ListConstruct %int4_19829, %21409, %int4096_19830 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22098 = torch.aten.view %22096, %22097 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19831 = torch.constant.int 4
    %22099 = torch.aten.mul.int %int4_19831, %21424 : !torch.int, !torch.int -> !torch.int
    %int512_19832 = torch.constant.int 512
    %22100 = torch.prim.ListConstruct %22099, %int512_19832 : (!torch.int, !torch.int) -> !torch.list<int>
    %22101 = torch.aten.view %22052, %22100 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22101, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22102 = torch.aten.mm %22101, %22068 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22102, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19833 = torch.constant.int 4
    %int4096_19834 = torch.constant.int 4096
    %22103 = torch.prim.ListConstruct %int4_19833, %21424, %int4096_19834 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22104 = torch.aten.view %22102, %22103 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19835 = torch.constant.int 4
    %22105 = torch.aten.mul.int %int4_19835, %21439 : !torch.int, !torch.int -> !torch.int
    %int512_19836 = torch.constant.int 512
    %22106 = torch.prim.ListConstruct %22105, %int512_19836 : (!torch.int, !torch.int) -> !torch.list<int>
    %22107 = torch.aten.view %22054, %22106 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22107, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22108 = torch.aten.mm %22107, %22070 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22108, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19837 = torch.constant.int 4
    %int4096_19838 = torch.constant.int 4096
    %22109 = torch.prim.ListConstruct %int4_19837, %21439, %int4096_19838 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22110 = torch.aten.view %22108, %22109 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19839 = torch.constant.int 4
    %22111 = torch.aten.mul.int %int4_19839, %21454 : !torch.int, !torch.int -> !torch.int
    %int512_19840 = torch.constant.int 512
    %22112 = torch.prim.ListConstruct %22111, %int512_19840 : (!torch.int, !torch.int) -> !torch.list<int>
    %22113 = torch.aten.view %22056, %22112 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22113, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22114 = torch.aten.mm %22113, %22072 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22114, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19841 = torch.constant.int 4
    %int4096_19842 = torch.constant.int 4096
    %22115 = torch.prim.ListConstruct %int4_19841, %21454, %int4096_19842 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22116 = torch.aten.view %22114, %22115 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_19843 = torch.constant.int 4
    %22117 = torch.aten.mul.int %int4_19843, %21469 : !torch.int, !torch.int -> !torch.int
    %int512_19844 = torch.constant.int 512
    %22118 = torch.prim.ListConstruct %22117, %int512_19844 : (!torch.int, !torch.int) -> !torch.list<int>
    %22119 = torch.aten.view %22058, %22118 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22119, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %22120 = torch.aten.mm %22119, %22074 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22120, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_19845 = torch.constant.int 4
    %int4096_19846 = torch.constant.int 4096
    %22121 = torch.prim.ListConstruct %int4_19845, %21469, %int4096_19846 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22122 = torch.aten.view %22120, %22121 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22123 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19847 = arith.constant 1 : index
    %dim_19848 = tensor.dim %22123, %c1_19847 : tensor<4x?x4096xf16>
    %22124 = flow.tensor.transfer %22123 : tensor<4x?x4096xf16>{%dim_19848} to #hal.device.promise<@__device_0>
    %22125 = torch_c.from_builtin_tensor %22124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22126 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19849 = arith.constant 1 : index
    %dim_19850 = tensor.dim %22126, %c1_19849 : tensor<4x?x4096xf16>
    %22127 = flow.tensor.transfer %22126 : tensor<4x?x4096xf16>{%dim_19850} to #hal.device.promise<@__device_0>
    %22128 = torch_c.from_builtin_tensor %22127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22129 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19851 = arith.constant 1 : index
    %dim_19852 = tensor.dim %22129, %c1_19851 : tensor<4x?x4096xf16>
    %22130 = flow.tensor.transfer %22129 : tensor<4x?x4096xf16>{%dim_19852} to #hal.device.promise<@__device_0>
    %22131 = torch_c.from_builtin_tensor %22130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22132 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19853 = arith.constant 1 : index
    %dim_19854 = tensor.dim %22132, %c1_19853 : tensor<4x?x4096xf16>
    %22133 = flow.tensor.transfer %22132 : tensor<4x?x4096xf16>{%dim_19854} to #hal.device.promise<@__device_0>
    %22134 = torch_c.from_builtin_tensor %22133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22135 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19855 = arith.constant 1 : index
    %dim_19856 = tensor.dim %22135, %c1_19855 : tensor<4x?x4096xf16>
    %22136 = flow.tensor.transfer %22135 : tensor<4x?x4096xf16>{%dim_19856} to #hal.device.promise<@__device_0>
    %22137 = torch_c.from_builtin_tensor %22136 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22138 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19857 = arith.constant 1 : index
    %dim_19858 = tensor.dim %22138, %c1_19857 : tensor<4x?x4096xf16>
    %22139 = flow.tensor.transfer %22138 : tensor<4x?x4096xf16>{%dim_19858} to #hal.device.promise<@__device_0>
    %22140 = torch_c.from_builtin_tensor %22139 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22141 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19859 = arith.constant 1 : index
    %dim_19860 = tensor.dim %22141, %c1_19859 : tensor<4x?x4096xf16>
    %22142 = flow.tensor.transfer %22141 : tensor<4x?x4096xf16>{%dim_19860} to #hal.device.promise<@__device_0>
    %22143 = torch_c.from_builtin_tensor %22142 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19861 = torch.constant.int 1
    %22144 = torch.aten.add.Tensor %22080, %22125, %int1_19861 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19862 = torch.constant.int 1
    %22145 = torch.aten.add.Tensor %22144, %22128, %int1_19862 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19863 = torch.constant.int 1
    %22146 = torch.aten.add.Tensor %22145, %22131, %int1_19863 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19864 = torch.constant.int 1
    %22147 = torch.aten.add.Tensor %22146, %22134, %int1_19864 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19865 = torch.constant.int 1
    %22148 = torch.aten.add.Tensor %22147, %22137, %int1_19865 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19866 = torch.constant.int 1
    %22149 = torch.aten.add.Tensor %22148, %22140, %int1_19866 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19867 = torch.constant.int 1
    %22150 = torch.aten.add.Tensor %22149, %22143, %int1_19867 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22151 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19868 = arith.constant 1 : index
    %dim_19869 = tensor.dim %22151, %c1_19868 : tensor<4x?x4096xf16>
    %22152 = flow.tensor.transfer %22151 : tensor<4x?x4096xf16>{%dim_19869} to #hal.device.promise<@__device_1>
    %22153 = torch_c.from_builtin_tensor %22152 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22154 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19870 = arith.constant 1 : index
    %dim_19871 = tensor.dim %22154, %c1_19870 : tensor<4x?x4096xf16>
    %22155 = flow.tensor.transfer %22154 : tensor<4x?x4096xf16>{%dim_19871} to #hal.device.promise<@__device_1>
    %22156 = torch_c.from_builtin_tensor %22155 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22157 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19872 = arith.constant 1 : index
    %dim_19873 = tensor.dim %22157, %c1_19872 : tensor<4x?x4096xf16>
    %22158 = flow.tensor.transfer %22157 : tensor<4x?x4096xf16>{%dim_19873} to #hal.device.promise<@__device_1>
    %22159 = torch_c.from_builtin_tensor %22158 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22160 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19874 = arith.constant 1 : index
    %dim_19875 = tensor.dim %22160, %c1_19874 : tensor<4x?x4096xf16>
    %22161 = flow.tensor.transfer %22160 : tensor<4x?x4096xf16>{%dim_19875} to #hal.device.promise<@__device_1>
    %22162 = torch_c.from_builtin_tensor %22161 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22163 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19876 = arith.constant 1 : index
    %dim_19877 = tensor.dim %22163, %c1_19876 : tensor<4x?x4096xf16>
    %22164 = flow.tensor.transfer %22163 : tensor<4x?x4096xf16>{%dim_19877} to #hal.device.promise<@__device_1>
    %22165 = torch_c.from_builtin_tensor %22164 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22166 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19878 = arith.constant 1 : index
    %dim_19879 = tensor.dim %22166, %c1_19878 : tensor<4x?x4096xf16>
    %22167 = flow.tensor.transfer %22166 : tensor<4x?x4096xf16>{%dim_19879} to #hal.device.promise<@__device_1>
    %22168 = torch_c.from_builtin_tensor %22167 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22169 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19880 = arith.constant 1 : index
    %dim_19881 = tensor.dim %22169, %c1_19880 : tensor<4x?x4096xf16>
    %22170 = flow.tensor.transfer %22169 : tensor<4x?x4096xf16>{%dim_19881} to #hal.device.promise<@__device_1>
    %22171 = torch_c.from_builtin_tensor %22170 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19882 = torch.constant.int 1
    %22172 = torch.aten.add.Tensor %22153, %22086, %int1_19882 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19883 = torch.constant.int 1
    %22173 = torch.aten.add.Tensor %22172, %22156, %int1_19883 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19884 = torch.constant.int 1
    %22174 = torch.aten.add.Tensor %22173, %22159, %int1_19884 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19885 = torch.constant.int 1
    %22175 = torch.aten.add.Tensor %22174, %22162, %int1_19885 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19886 = torch.constant.int 1
    %22176 = torch.aten.add.Tensor %22175, %22165, %int1_19886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19887 = torch.constant.int 1
    %22177 = torch.aten.add.Tensor %22176, %22168, %int1_19887 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19888 = torch.constant.int 1
    %22178 = torch.aten.add.Tensor %22177, %22171, %int1_19888 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22179 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19889 = arith.constant 1 : index
    %dim_19890 = tensor.dim %22179, %c1_19889 : tensor<4x?x4096xf16>
    %22180 = flow.tensor.transfer %22179 : tensor<4x?x4096xf16>{%dim_19890} to #hal.device.promise<@__device_2>
    %22181 = torch_c.from_builtin_tensor %22180 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22182 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19891 = arith.constant 1 : index
    %dim_19892 = tensor.dim %22182, %c1_19891 : tensor<4x?x4096xf16>
    %22183 = flow.tensor.transfer %22182 : tensor<4x?x4096xf16>{%dim_19892} to #hal.device.promise<@__device_2>
    %22184 = torch_c.from_builtin_tensor %22183 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22185 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19893 = arith.constant 1 : index
    %dim_19894 = tensor.dim %22185, %c1_19893 : tensor<4x?x4096xf16>
    %22186 = flow.tensor.transfer %22185 : tensor<4x?x4096xf16>{%dim_19894} to #hal.device.promise<@__device_2>
    %22187 = torch_c.from_builtin_tensor %22186 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22188 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19895 = arith.constant 1 : index
    %dim_19896 = tensor.dim %22188, %c1_19895 : tensor<4x?x4096xf16>
    %22189 = flow.tensor.transfer %22188 : tensor<4x?x4096xf16>{%dim_19896} to #hal.device.promise<@__device_2>
    %22190 = torch_c.from_builtin_tensor %22189 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22191 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19897 = arith.constant 1 : index
    %dim_19898 = tensor.dim %22191, %c1_19897 : tensor<4x?x4096xf16>
    %22192 = flow.tensor.transfer %22191 : tensor<4x?x4096xf16>{%dim_19898} to #hal.device.promise<@__device_2>
    %22193 = torch_c.from_builtin_tensor %22192 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22194 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19899 = arith.constant 1 : index
    %dim_19900 = tensor.dim %22194, %c1_19899 : tensor<4x?x4096xf16>
    %22195 = flow.tensor.transfer %22194 : tensor<4x?x4096xf16>{%dim_19900} to #hal.device.promise<@__device_2>
    %22196 = torch_c.from_builtin_tensor %22195 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22197 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19901 = arith.constant 1 : index
    %dim_19902 = tensor.dim %22197, %c1_19901 : tensor<4x?x4096xf16>
    %22198 = flow.tensor.transfer %22197 : tensor<4x?x4096xf16>{%dim_19902} to #hal.device.promise<@__device_2>
    %22199 = torch_c.from_builtin_tensor %22198 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19903 = torch.constant.int 1
    %22200 = torch.aten.add.Tensor %22181, %22184, %int1_19903 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19904 = torch.constant.int 1
    %22201 = torch.aten.add.Tensor %22200, %22092, %int1_19904 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19905 = torch.constant.int 1
    %22202 = torch.aten.add.Tensor %22201, %22187, %int1_19905 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19906 = torch.constant.int 1
    %22203 = torch.aten.add.Tensor %22202, %22190, %int1_19906 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19907 = torch.constant.int 1
    %22204 = torch.aten.add.Tensor %22203, %22193, %int1_19907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19908 = torch.constant.int 1
    %22205 = torch.aten.add.Tensor %22204, %22196, %int1_19908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19909 = torch.constant.int 1
    %22206 = torch.aten.add.Tensor %22205, %22199, %int1_19909 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22207 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19910 = arith.constant 1 : index
    %dim_19911 = tensor.dim %22207, %c1_19910 : tensor<4x?x4096xf16>
    %22208 = flow.tensor.transfer %22207 : tensor<4x?x4096xf16>{%dim_19911} to #hal.device.promise<@__device_3>
    %22209 = torch_c.from_builtin_tensor %22208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22210 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19912 = arith.constant 1 : index
    %dim_19913 = tensor.dim %22210, %c1_19912 : tensor<4x?x4096xf16>
    %22211 = flow.tensor.transfer %22210 : tensor<4x?x4096xf16>{%dim_19913} to #hal.device.promise<@__device_3>
    %22212 = torch_c.from_builtin_tensor %22211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22213 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19914 = arith.constant 1 : index
    %dim_19915 = tensor.dim %22213, %c1_19914 : tensor<4x?x4096xf16>
    %22214 = flow.tensor.transfer %22213 : tensor<4x?x4096xf16>{%dim_19915} to #hal.device.promise<@__device_3>
    %22215 = torch_c.from_builtin_tensor %22214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22216 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19916 = arith.constant 1 : index
    %dim_19917 = tensor.dim %22216, %c1_19916 : tensor<4x?x4096xf16>
    %22217 = flow.tensor.transfer %22216 : tensor<4x?x4096xf16>{%dim_19917} to #hal.device.promise<@__device_3>
    %22218 = torch_c.from_builtin_tensor %22217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22219 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19918 = arith.constant 1 : index
    %dim_19919 = tensor.dim %22219, %c1_19918 : tensor<4x?x4096xf16>
    %22220 = flow.tensor.transfer %22219 : tensor<4x?x4096xf16>{%dim_19919} to #hal.device.promise<@__device_3>
    %22221 = torch_c.from_builtin_tensor %22220 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22222 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19920 = arith.constant 1 : index
    %dim_19921 = tensor.dim %22222, %c1_19920 : tensor<4x?x4096xf16>
    %22223 = flow.tensor.transfer %22222 : tensor<4x?x4096xf16>{%dim_19921} to #hal.device.promise<@__device_3>
    %22224 = torch_c.from_builtin_tensor %22223 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22225 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19922 = arith.constant 1 : index
    %dim_19923 = tensor.dim %22225, %c1_19922 : tensor<4x?x4096xf16>
    %22226 = flow.tensor.transfer %22225 : tensor<4x?x4096xf16>{%dim_19923} to #hal.device.promise<@__device_3>
    %22227 = torch_c.from_builtin_tensor %22226 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19924 = torch.constant.int 1
    %22228 = torch.aten.add.Tensor %22209, %22212, %int1_19924 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19925 = torch.constant.int 1
    %22229 = torch.aten.add.Tensor %22228, %22215, %int1_19925 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19926 = torch.constant.int 1
    %22230 = torch.aten.add.Tensor %22229, %22098, %int1_19926 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19927 = torch.constant.int 1
    %22231 = torch.aten.add.Tensor %22230, %22218, %int1_19927 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19928 = torch.constant.int 1
    %22232 = torch.aten.add.Tensor %22231, %22221, %int1_19928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19929 = torch.constant.int 1
    %22233 = torch.aten.add.Tensor %22232, %22224, %int1_19929 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19930 = torch.constant.int 1
    %22234 = torch.aten.add.Tensor %22233, %22227, %int1_19930 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22235 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19931 = arith.constant 1 : index
    %dim_19932 = tensor.dim %22235, %c1_19931 : tensor<4x?x4096xf16>
    %22236 = flow.tensor.transfer %22235 : tensor<4x?x4096xf16>{%dim_19932} to #hal.device.promise<@__device_4>
    %22237 = torch_c.from_builtin_tensor %22236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22238 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19933 = arith.constant 1 : index
    %dim_19934 = tensor.dim %22238, %c1_19933 : tensor<4x?x4096xf16>
    %22239 = flow.tensor.transfer %22238 : tensor<4x?x4096xf16>{%dim_19934} to #hal.device.promise<@__device_4>
    %22240 = torch_c.from_builtin_tensor %22239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22241 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19935 = arith.constant 1 : index
    %dim_19936 = tensor.dim %22241, %c1_19935 : tensor<4x?x4096xf16>
    %22242 = flow.tensor.transfer %22241 : tensor<4x?x4096xf16>{%dim_19936} to #hal.device.promise<@__device_4>
    %22243 = torch_c.from_builtin_tensor %22242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22244 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19937 = arith.constant 1 : index
    %dim_19938 = tensor.dim %22244, %c1_19937 : tensor<4x?x4096xf16>
    %22245 = flow.tensor.transfer %22244 : tensor<4x?x4096xf16>{%dim_19938} to #hal.device.promise<@__device_4>
    %22246 = torch_c.from_builtin_tensor %22245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22247 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19939 = arith.constant 1 : index
    %dim_19940 = tensor.dim %22247, %c1_19939 : tensor<4x?x4096xf16>
    %22248 = flow.tensor.transfer %22247 : tensor<4x?x4096xf16>{%dim_19940} to #hal.device.promise<@__device_4>
    %22249 = torch_c.from_builtin_tensor %22248 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22250 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19941 = arith.constant 1 : index
    %dim_19942 = tensor.dim %22250, %c1_19941 : tensor<4x?x4096xf16>
    %22251 = flow.tensor.transfer %22250 : tensor<4x?x4096xf16>{%dim_19942} to #hal.device.promise<@__device_4>
    %22252 = torch_c.from_builtin_tensor %22251 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22253 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19943 = arith.constant 1 : index
    %dim_19944 = tensor.dim %22253, %c1_19943 : tensor<4x?x4096xf16>
    %22254 = flow.tensor.transfer %22253 : tensor<4x?x4096xf16>{%dim_19944} to #hal.device.promise<@__device_4>
    %22255 = torch_c.from_builtin_tensor %22254 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19945 = torch.constant.int 1
    %22256 = torch.aten.add.Tensor %22237, %22240, %int1_19945 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19946 = torch.constant.int 1
    %22257 = torch.aten.add.Tensor %22256, %22243, %int1_19946 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19947 = torch.constant.int 1
    %22258 = torch.aten.add.Tensor %22257, %22246, %int1_19947 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19948 = torch.constant.int 1
    %22259 = torch.aten.add.Tensor %22258, %22104, %int1_19948 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19949 = torch.constant.int 1
    %22260 = torch.aten.add.Tensor %22259, %22249, %int1_19949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19950 = torch.constant.int 1
    %22261 = torch.aten.add.Tensor %22260, %22252, %int1_19950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19951 = torch.constant.int 1
    %22262 = torch.aten.add.Tensor %22261, %22255, %int1_19951 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22263 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19952 = arith.constant 1 : index
    %dim_19953 = tensor.dim %22263, %c1_19952 : tensor<4x?x4096xf16>
    %22264 = flow.tensor.transfer %22263 : tensor<4x?x4096xf16>{%dim_19953} to #hal.device.promise<@__device_5>
    %22265 = torch_c.from_builtin_tensor %22264 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22266 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19954 = arith.constant 1 : index
    %dim_19955 = tensor.dim %22266, %c1_19954 : tensor<4x?x4096xf16>
    %22267 = flow.tensor.transfer %22266 : tensor<4x?x4096xf16>{%dim_19955} to #hal.device.promise<@__device_5>
    %22268 = torch_c.from_builtin_tensor %22267 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22269 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19956 = arith.constant 1 : index
    %dim_19957 = tensor.dim %22269, %c1_19956 : tensor<4x?x4096xf16>
    %22270 = flow.tensor.transfer %22269 : tensor<4x?x4096xf16>{%dim_19957} to #hal.device.promise<@__device_5>
    %22271 = torch_c.from_builtin_tensor %22270 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22272 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19958 = arith.constant 1 : index
    %dim_19959 = tensor.dim %22272, %c1_19958 : tensor<4x?x4096xf16>
    %22273 = flow.tensor.transfer %22272 : tensor<4x?x4096xf16>{%dim_19959} to #hal.device.promise<@__device_5>
    %22274 = torch_c.from_builtin_tensor %22273 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22275 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19960 = arith.constant 1 : index
    %dim_19961 = tensor.dim %22275, %c1_19960 : tensor<4x?x4096xf16>
    %22276 = flow.tensor.transfer %22275 : tensor<4x?x4096xf16>{%dim_19961} to #hal.device.promise<@__device_5>
    %22277 = torch_c.from_builtin_tensor %22276 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22278 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19962 = arith.constant 1 : index
    %dim_19963 = tensor.dim %22278, %c1_19962 : tensor<4x?x4096xf16>
    %22279 = flow.tensor.transfer %22278 : tensor<4x?x4096xf16>{%dim_19963} to #hal.device.promise<@__device_5>
    %22280 = torch_c.from_builtin_tensor %22279 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22281 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19964 = arith.constant 1 : index
    %dim_19965 = tensor.dim %22281, %c1_19964 : tensor<4x?x4096xf16>
    %22282 = flow.tensor.transfer %22281 : tensor<4x?x4096xf16>{%dim_19965} to #hal.device.promise<@__device_5>
    %22283 = torch_c.from_builtin_tensor %22282 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19966 = torch.constant.int 1
    %22284 = torch.aten.add.Tensor %22265, %22268, %int1_19966 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19967 = torch.constant.int 1
    %22285 = torch.aten.add.Tensor %22284, %22271, %int1_19967 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19968 = torch.constant.int 1
    %22286 = torch.aten.add.Tensor %22285, %22274, %int1_19968 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19969 = torch.constant.int 1
    %22287 = torch.aten.add.Tensor %22286, %22277, %int1_19969 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19970 = torch.constant.int 1
    %22288 = torch.aten.add.Tensor %22287, %22110, %int1_19970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19971 = torch.constant.int 1
    %22289 = torch.aten.add.Tensor %22288, %22280, %int1_19971 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19972 = torch.constant.int 1
    %22290 = torch.aten.add.Tensor %22289, %22283, %int1_19972 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22291 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19973 = arith.constant 1 : index
    %dim_19974 = tensor.dim %22291, %c1_19973 : tensor<4x?x4096xf16>
    %22292 = flow.tensor.transfer %22291 : tensor<4x?x4096xf16>{%dim_19974} to #hal.device.promise<@__device_6>
    %22293 = torch_c.from_builtin_tensor %22292 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22294 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19975 = arith.constant 1 : index
    %dim_19976 = tensor.dim %22294, %c1_19975 : tensor<4x?x4096xf16>
    %22295 = flow.tensor.transfer %22294 : tensor<4x?x4096xf16>{%dim_19976} to #hal.device.promise<@__device_6>
    %22296 = torch_c.from_builtin_tensor %22295 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22297 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19977 = arith.constant 1 : index
    %dim_19978 = tensor.dim %22297, %c1_19977 : tensor<4x?x4096xf16>
    %22298 = flow.tensor.transfer %22297 : tensor<4x?x4096xf16>{%dim_19978} to #hal.device.promise<@__device_6>
    %22299 = torch_c.from_builtin_tensor %22298 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22300 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19979 = arith.constant 1 : index
    %dim_19980 = tensor.dim %22300, %c1_19979 : tensor<4x?x4096xf16>
    %22301 = flow.tensor.transfer %22300 : tensor<4x?x4096xf16>{%dim_19980} to #hal.device.promise<@__device_6>
    %22302 = torch_c.from_builtin_tensor %22301 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22303 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19981 = arith.constant 1 : index
    %dim_19982 = tensor.dim %22303, %c1_19981 : tensor<4x?x4096xf16>
    %22304 = flow.tensor.transfer %22303 : tensor<4x?x4096xf16>{%dim_19982} to #hal.device.promise<@__device_6>
    %22305 = torch_c.from_builtin_tensor %22304 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22306 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19983 = arith.constant 1 : index
    %dim_19984 = tensor.dim %22306, %c1_19983 : tensor<4x?x4096xf16>
    %22307 = flow.tensor.transfer %22306 : tensor<4x?x4096xf16>{%dim_19984} to #hal.device.promise<@__device_6>
    %22308 = torch_c.from_builtin_tensor %22307 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22309 = torch_c.to_builtin_tensor %22122 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19985 = arith.constant 1 : index
    %dim_19986 = tensor.dim %22309, %c1_19985 : tensor<4x?x4096xf16>
    %22310 = flow.tensor.transfer %22309 : tensor<4x?x4096xf16>{%dim_19986} to #hal.device.promise<@__device_6>
    %22311 = torch_c.from_builtin_tensor %22310 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19987 = torch.constant.int 1
    %22312 = torch.aten.add.Tensor %22293, %22296, %int1_19987 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19988 = torch.constant.int 1
    %22313 = torch.aten.add.Tensor %22312, %22299, %int1_19988 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19989 = torch.constant.int 1
    %22314 = torch.aten.add.Tensor %22313, %22302, %int1_19989 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19990 = torch.constant.int 1
    %22315 = torch.aten.add.Tensor %22314, %22305, %int1_19990 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19991 = torch.constant.int 1
    %22316 = torch.aten.add.Tensor %22315, %22308, %int1_19991 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19992 = torch.constant.int 1
    %22317 = torch.aten.add.Tensor %22316, %22116, %int1_19992 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_19993 = torch.constant.int 1
    %22318 = torch.aten.add.Tensor %22317, %22311, %int1_19993 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22319 = torch_c.to_builtin_tensor %22080 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19994 = arith.constant 1 : index
    %dim_19995 = tensor.dim %22319, %c1_19994 : tensor<4x?x4096xf16>
    %22320 = flow.tensor.transfer %22319 : tensor<4x?x4096xf16>{%dim_19995} to #hal.device.promise<@__device_7>
    %22321 = torch_c.from_builtin_tensor %22320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22322 = torch_c.to_builtin_tensor %22086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19996 = arith.constant 1 : index
    %dim_19997 = tensor.dim %22322, %c1_19996 : tensor<4x?x4096xf16>
    %22323 = flow.tensor.transfer %22322 : tensor<4x?x4096xf16>{%dim_19997} to #hal.device.promise<@__device_7>
    %22324 = torch_c.from_builtin_tensor %22323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22325 = torch_c.to_builtin_tensor %22092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_19998 = arith.constant 1 : index
    %dim_19999 = tensor.dim %22325, %c1_19998 : tensor<4x?x4096xf16>
    %22326 = flow.tensor.transfer %22325 : tensor<4x?x4096xf16>{%dim_19999} to #hal.device.promise<@__device_7>
    %22327 = torch_c.from_builtin_tensor %22326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22328 = torch_c.to_builtin_tensor %22098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20000 = arith.constant 1 : index
    %dim_20001 = tensor.dim %22328, %c1_20000 : tensor<4x?x4096xf16>
    %22329 = flow.tensor.transfer %22328 : tensor<4x?x4096xf16>{%dim_20001} to #hal.device.promise<@__device_7>
    %22330 = torch_c.from_builtin_tensor %22329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22331 = torch_c.to_builtin_tensor %22104 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20002 = arith.constant 1 : index
    %dim_20003 = tensor.dim %22331, %c1_20002 : tensor<4x?x4096xf16>
    %22332 = flow.tensor.transfer %22331 : tensor<4x?x4096xf16>{%dim_20003} to #hal.device.promise<@__device_7>
    %22333 = torch_c.from_builtin_tensor %22332 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22334 = torch_c.to_builtin_tensor %22110 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20004 = arith.constant 1 : index
    %dim_20005 = tensor.dim %22334, %c1_20004 : tensor<4x?x4096xf16>
    %22335 = flow.tensor.transfer %22334 : tensor<4x?x4096xf16>{%dim_20005} to #hal.device.promise<@__device_7>
    %22336 = torch_c.from_builtin_tensor %22335 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22337 = torch_c.to_builtin_tensor %22116 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20006 = arith.constant 1 : index
    %dim_20007 = tensor.dim %22337, %c1_20006 : tensor<4x?x4096xf16>
    %22338 = flow.tensor.transfer %22337 : tensor<4x?x4096xf16>{%dim_20007} to #hal.device.promise<@__device_7>
    %22339 = torch_c.from_builtin_tensor %22338 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20008 = torch.constant.int 1
    %22340 = torch.aten.add.Tensor %22321, %22324, %int1_20008 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20009 = torch.constant.int 1
    %22341 = torch.aten.add.Tensor %22340, %22327, %int1_20009 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20010 = torch.constant.int 1
    %22342 = torch.aten.add.Tensor %22341, %22330, %int1_20010 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20011 = torch.constant.int 1
    %22343 = torch.aten.add.Tensor %22342, %22333, %int1_20011 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20012 = torch.constant.int 1
    %22344 = torch.aten.add.Tensor %22343, %22336, %int1_20012 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20013 = torch.constant.int 1
    %22345 = torch.aten.add.Tensor %22344, %22339, %int1_20013 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20014 = torch.constant.int 1
    %22346 = torch.aten.add.Tensor %22345, %22122, %int1_20014 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20015 = torch.constant.int 1
    %22347 = torch.aten.add.Tensor %21006, %22150, %int1_20015 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20016 = torch.constant.int 1
    %22348 = torch.aten.add.Tensor %21007, %22178, %int1_20016 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20017 = torch.constant.int 1
    %22349 = torch.aten.add.Tensor %21008, %22206, %int1_20017 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20018 = torch.constant.int 1
    %22350 = torch.aten.add.Tensor %21009, %22234, %int1_20018 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20019 = torch.constant.int 1
    %22351 = torch.aten.add.Tensor %21010, %22262, %int1_20019 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20020 = torch.constant.int 1
    %22352 = torch.aten.add.Tensor %21011, %22290, %int1_20020 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20021 = torch.constant.int 1
    %22353 = torch.aten.add.Tensor %21012, %22318, %int1_20021 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20022 = torch.constant.int 1
    %22354 = torch.aten.add.Tensor %21013, %22346, %int1_20022 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_20023 = torch.constant.int 6
    %22355 = torch.prims.convert_element_type %22347, %int6_20023 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20024 = torch.constant.int 6
    %22356 = torch.prims.convert_element_type %22348, %int6_20024 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20025 = torch.constant.int 6
    %22357 = torch.prims.convert_element_type %22349, %int6_20025 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20026 = torch.constant.int 6
    %22358 = torch.prims.convert_element_type %22350, %int6_20026 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20027 = torch.constant.int 6
    %22359 = torch.prims.convert_element_type %22351, %int6_20027 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20028 = torch.constant.int 6
    %22360 = torch.prims.convert_element_type %22352, %int6_20028 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20029 = torch.constant.int 6
    %22361 = torch.prims.convert_element_type %22353, %int6_20029 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20030 = torch.constant.int 6
    %22362 = torch.prims.convert_element_type %22354, %int6_20030 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20031 = torch.constant.int 2
    %22363 = torch.aten.pow.Tensor_Scalar %22355, %int2_20031 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20032 = torch.constant.int 2
    %22364 = torch.aten.pow.Tensor_Scalar %22356, %int2_20032 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20033 = torch.constant.int 2
    %22365 = torch.aten.pow.Tensor_Scalar %22357, %int2_20033 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20034 = torch.constant.int 2
    %22366 = torch.aten.pow.Tensor_Scalar %22358, %int2_20034 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20035 = torch.constant.int 2
    %22367 = torch.aten.pow.Tensor_Scalar %22359, %int2_20035 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20036 = torch.constant.int 2
    %22368 = torch.aten.pow.Tensor_Scalar %22360, %int2_20036 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20037 = torch.constant.int 2
    %22369 = torch.aten.pow.Tensor_Scalar %22361, %int2_20037 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20038 = torch.constant.int 2
    %22370 = torch.aten.pow.Tensor_Scalar %22362, %int2_20038 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_20039 = torch.constant.int -1
    %22371 = torch.prim.ListConstruct %int-1_20039 : (!torch.int) -> !torch.list<int>
    %true_20040 = torch.constant.bool true
    %none_20041 = torch.constant.none
    %22372 = torch.aten.mean.dim %22363, %22371, %true_20040, %none_20041 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20042 = torch.constant.int -1
    %22373 = torch.prim.ListConstruct %int-1_20042 : (!torch.int) -> !torch.list<int>
    %true_20043 = torch.constant.bool true
    %none_20044 = torch.constant.none
    %22374 = torch.aten.mean.dim %22364, %22373, %true_20043, %none_20044 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20045 = torch.constant.int -1
    %22375 = torch.prim.ListConstruct %int-1_20045 : (!torch.int) -> !torch.list<int>
    %true_20046 = torch.constant.bool true
    %none_20047 = torch.constant.none
    %22376 = torch.aten.mean.dim %22365, %22375, %true_20046, %none_20047 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20048 = torch.constant.int -1
    %22377 = torch.prim.ListConstruct %int-1_20048 : (!torch.int) -> !torch.list<int>
    %true_20049 = torch.constant.bool true
    %none_20050 = torch.constant.none
    %22378 = torch.aten.mean.dim %22366, %22377, %true_20049, %none_20050 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20051 = torch.constant.int -1
    %22379 = torch.prim.ListConstruct %int-1_20051 : (!torch.int) -> !torch.list<int>
    %true_20052 = torch.constant.bool true
    %none_20053 = torch.constant.none
    %22380 = torch.aten.mean.dim %22367, %22379, %true_20052, %none_20053 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20054 = torch.constant.int -1
    %22381 = torch.prim.ListConstruct %int-1_20054 : (!torch.int) -> !torch.list<int>
    %true_20055 = torch.constant.bool true
    %none_20056 = torch.constant.none
    %22382 = torch.aten.mean.dim %22368, %22381, %true_20055, %none_20056 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20057 = torch.constant.int -1
    %22383 = torch.prim.ListConstruct %int-1_20057 : (!torch.int) -> !torch.list<int>
    %true_20058 = torch.constant.bool true
    %none_20059 = torch.constant.none
    %22384 = torch.aten.mean.dim %22369, %22383, %true_20058, %none_20059 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20060 = torch.constant.int -1
    %22385 = torch.prim.ListConstruct %int-1_20060 : (!torch.int) -> !torch.list<int>
    %true_20061 = torch.constant.bool true
    %none_20062 = torch.constant.none
    %22386 = torch.aten.mean.dim %22370, %22385, %true_20061, %none_20062 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20063 = torch.constant.float 9.9999997473787516E-6
    %int1_20064 = torch.constant.int 1
    %22387 = torch.aten.add.Scalar %22372, %float9.999990e-06_20063, %int1_20064 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20065 = torch.constant.float 9.9999997473787516E-6
    %int1_20066 = torch.constant.int 1
    %22388 = torch.aten.add.Scalar %22374, %float9.999990e-06_20065, %int1_20066 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20067 = torch.constant.float 9.9999997473787516E-6
    %int1_20068 = torch.constant.int 1
    %22389 = torch.aten.add.Scalar %22376, %float9.999990e-06_20067, %int1_20068 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20069 = torch.constant.float 9.9999997473787516E-6
    %int1_20070 = torch.constant.int 1
    %22390 = torch.aten.add.Scalar %22378, %float9.999990e-06_20069, %int1_20070 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20071 = torch.constant.float 9.9999997473787516E-6
    %int1_20072 = torch.constant.int 1
    %22391 = torch.aten.add.Scalar %22380, %float9.999990e-06_20071, %int1_20072 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20073 = torch.constant.float 9.9999997473787516E-6
    %int1_20074 = torch.constant.int 1
    %22392 = torch.aten.add.Scalar %22382, %float9.999990e-06_20073, %int1_20074 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20075 = torch.constant.float 9.9999997473787516E-6
    %int1_20076 = torch.constant.int 1
    %22393 = torch.aten.add.Scalar %22384, %float9.999990e-06_20075, %int1_20076 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20077 = torch.constant.float 9.9999997473787516E-6
    %int1_20078 = torch.constant.int 1
    %22394 = torch.aten.add.Scalar %22386, %float9.999990e-06_20077, %int1_20078 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22395 = torch.aten.rsqrt %22387 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22396 = torch.aten.rsqrt %22388 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22397 = torch.aten.rsqrt %22389 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22398 = torch.aten.rsqrt %22390 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22399 = torch.aten.rsqrt %22391 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22400 = torch.aten.rsqrt %22392 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22401 = torch.aten.rsqrt %22393 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22402 = torch.aten.rsqrt %22394 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22403 = torch.aten.mul.Tensor %22355, %22395 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22404 = torch.aten.mul.Tensor %22356, %22396 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22405 = torch.aten.mul.Tensor %22357, %22397 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22406 = torch.aten.mul.Tensor %22358, %22398 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22407 = torch.aten.mul.Tensor %22359, %22399 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22408 = torch.aten.mul.Tensor %22360, %22400 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22409 = torch.aten.mul.Tensor %22361, %22401 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22410 = torch.aten.mul.Tensor %22362, %22402 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22411 = torch.aten.mul.Tensor %768, %22403 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22412 = torch.aten.mul.Tensor %769, %22404 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22413 = torch.aten.mul.Tensor %770, %22405 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22414 = torch.aten.mul.Tensor %771, %22406 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22415 = torch.aten.mul.Tensor %772, %22407 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22416 = torch.aten.mul.Tensor %773, %22408 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22417 = torch.aten.mul.Tensor %774, %22409 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22418 = torch.aten.mul.Tensor %775, %22410 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_20079 = torch.constant.int 5
    %22419 = torch.prims.convert_element_type %22411, %int5_20079 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20080 = torch.constant.int 5
    %22420 = torch.prims.convert_element_type %22412, %int5_20080 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20081 = torch.constant.int 5
    %22421 = torch.prims.convert_element_type %22413, %int5_20081 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20082 = torch.constant.int 5
    %22422 = torch.prims.convert_element_type %22414, %int5_20082 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20083 = torch.constant.int 5
    %22423 = torch.prims.convert_element_type %22415, %int5_20083 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20084 = torch.constant.int 5
    %22424 = torch.prims.convert_element_type %22416, %int5_20084 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20085 = torch.constant.int 5
    %22425 = torch.prims.convert_element_type %22417, %int5_20085 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20086 = torch.constant.int 5
    %22426 = torch.prims.convert_element_type %22418, %int5_20086 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20087 = torch.constant.int 1
    %int0_20088 = torch.constant.int 0
    %22427 = torch.prim.ListConstruct %int1_20087, %int0_20088 : (!torch.int, !torch.int) -> !torch.list<int>
    %22428 = torch.aten.permute %776, %22427 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20089 = torch.constant.int 1
    %int0_20090 = torch.constant.int 0
    %22429 = torch.prim.ListConstruct %int1_20089, %int0_20090 : (!torch.int, !torch.int) -> !torch.list<int>
    %22430 = torch.aten.permute %777, %22429 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20091 = torch.constant.int 1
    %int0_20092 = torch.constant.int 0
    %22431 = torch.prim.ListConstruct %int1_20091, %int0_20092 : (!torch.int, !torch.int) -> !torch.list<int>
    %22432 = torch.aten.permute %778, %22431 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20093 = torch.constant.int 1
    %int0_20094 = torch.constant.int 0
    %22433 = torch.prim.ListConstruct %int1_20093, %int0_20094 : (!torch.int, !torch.int) -> !torch.list<int>
    %22434 = torch.aten.permute %779, %22433 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20095 = torch.constant.int 1
    %int0_20096 = torch.constant.int 0
    %22435 = torch.prim.ListConstruct %int1_20095, %int0_20096 : (!torch.int, !torch.int) -> !torch.list<int>
    %22436 = torch.aten.permute %780, %22435 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20097 = torch.constant.int 1
    %int0_20098 = torch.constant.int 0
    %22437 = torch.prim.ListConstruct %int1_20097, %int0_20098 : (!torch.int, !torch.int) -> !torch.list<int>
    %22438 = torch.aten.permute %781, %22437 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20099 = torch.constant.int 1
    %int0_20100 = torch.constant.int 0
    %22439 = torch.prim.ListConstruct %int1_20099, %int0_20100 : (!torch.int, !torch.int) -> !torch.list<int>
    %22440 = torch.aten.permute %782, %22439 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20101 = torch.constant.int 1
    %int0_20102 = torch.constant.int 0
    %22441 = torch.prim.ListConstruct %int1_20101, %int0_20102 : (!torch.int, !torch.int) -> !torch.list<int>
    %22442 = torch.aten.permute %783, %22441 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_20103 = torch.constant.int 4
    %22443 = torch.aten.mul.int %int4_20103, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20104 = torch.constant.int 4096
    %22444 = torch.prim.ListConstruct %22443, %int4096_20104 : (!torch.int, !torch.int) -> !torch.list<int>
    %22445 = torch.aten.view %22419, %22444 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22445, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22446 = torch.aten.mm %22445, %22428 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22446, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20105 = torch.constant.int 4
    %int1792_20106 = torch.constant.int 1792
    %22447 = torch.prim.ListConstruct %int4_20105, %2482, %int1792_20106 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22448 = torch.aten.view %22446, %22447 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20107 = torch.constant.int 4
    %22449 = torch.aten.mul.int %int4_20107, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20108 = torch.constant.int 4096
    %22450 = torch.prim.ListConstruct %22449, %int4096_20108 : (!torch.int, !torch.int) -> !torch.list<int>
    %22451 = torch.aten.view %22420, %22450 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22451, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22452 = torch.aten.mm %22451, %22430 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22452, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20109 = torch.constant.int 4
    %int1792_20110 = torch.constant.int 1792
    %22453 = torch.prim.ListConstruct %int4_20109, %2482, %int1792_20110 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22454 = torch.aten.view %22452, %22453 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20111 = torch.constant.int 4
    %22455 = torch.aten.mul.int %int4_20111, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20112 = torch.constant.int 4096
    %22456 = torch.prim.ListConstruct %22455, %int4096_20112 : (!torch.int, !torch.int) -> !torch.list<int>
    %22457 = torch.aten.view %22421, %22456 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22457, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22458 = torch.aten.mm %22457, %22432 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22458, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20113 = torch.constant.int 4
    %int1792_20114 = torch.constant.int 1792
    %22459 = torch.prim.ListConstruct %int4_20113, %2482, %int1792_20114 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22460 = torch.aten.view %22458, %22459 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20115 = torch.constant.int 4
    %22461 = torch.aten.mul.int %int4_20115, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20116 = torch.constant.int 4096
    %22462 = torch.prim.ListConstruct %22461, %int4096_20116 : (!torch.int, !torch.int) -> !torch.list<int>
    %22463 = torch.aten.view %22422, %22462 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22463, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22464 = torch.aten.mm %22463, %22434 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22464, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20117 = torch.constant.int 4
    %int1792_20118 = torch.constant.int 1792
    %22465 = torch.prim.ListConstruct %int4_20117, %2482, %int1792_20118 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22466 = torch.aten.view %22464, %22465 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20119 = torch.constant.int 4
    %22467 = torch.aten.mul.int %int4_20119, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20120 = torch.constant.int 4096
    %22468 = torch.prim.ListConstruct %22467, %int4096_20120 : (!torch.int, !torch.int) -> !torch.list<int>
    %22469 = torch.aten.view %22423, %22468 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22469, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22470 = torch.aten.mm %22469, %22436 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22470, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20121 = torch.constant.int 4
    %int1792_20122 = torch.constant.int 1792
    %22471 = torch.prim.ListConstruct %int4_20121, %2482, %int1792_20122 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22472 = torch.aten.view %22470, %22471 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20123 = torch.constant.int 4
    %22473 = torch.aten.mul.int %int4_20123, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20124 = torch.constant.int 4096
    %22474 = torch.prim.ListConstruct %22473, %int4096_20124 : (!torch.int, !torch.int) -> !torch.list<int>
    %22475 = torch.aten.view %22424, %22474 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22475, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22476 = torch.aten.mm %22475, %22438 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22476, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20125 = torch.constant.int 4
    %int1792_20126 = torch.constant.int 1792
    %22477 = torch.prim.ListConstruct %int4_20125, %2482, %int1792_20126 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22478 = torch.aten.view %22476, %22477 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20127 = torch.constant.int 4
    %22479 = torch.aten.mul.int %int4_20127, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20128 = torch.constant.int 4096
    %22480 = torch.prim.ListConstruct %22479, %int4096_20128 : (!torch.int, !torch.int) -> !torch.list<int>
    %22481 = torch.aten.view %22425, %22480 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22481, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22482 = torch.aten.mm %22481, %22440 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22482, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20129 = torch.constant.int 4
    %int1792_20130 = torch.constant.int 1792
    %22483 = torch.prim.ListConstruct %int4_20129, %2482, %int1792_20130 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22484 = torch.aten.view %22482, %22483 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20131 = torch.constant.int 4
    %22485 = torch.aten.mul.int %int4_20131, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20132 = torch.constant.int 4096
    %22486 = torch.prim.ListConstruct %22485, %int4096_20132 : (!torch.int, !torch.int) -> !torch.list<int>
    %22487 = torch.aten.view %22426, %22486 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22487, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22488 = torch.aten.mm %22487, %22442 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22488, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20133 = torch.constant.int 4
    %int1792_20134 = torch.constant.int 1792
    %22489 = torch.prim.ListConstruct %int4_20133, %2482, %int1792_20134 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22490 = torch.aten.view %22488, %22489 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22491 = torch.aten.silu %22448 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22492 = torch.aten.silu %22454 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22493 = torch.aten.silu %22460 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22494 = torch.aten.silu %22466 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22495 = torch.aten.silu %22472 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22496 = torch.aten.silu %22478 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22497 = torch.aten.silu %22484 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22498 = torch.aten.silu %22490 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_20135 = torch.constant.int 1
    %int0_20136 = torch.constant.int 0
    %22499 = torch.prim.ListConstruct %int1_20135, %int0_20136 : (!torch.int, !torch.int) -> !torch.list<int>
    %22500 = torch.aten.permute %784, %22499 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20137 = torch.constant.int 1
    %int0_20138 = torch.constant.int 0
    %22501 = torch.prim.ListConstruct %int1_20137, %int0_20138 : (!torch.int, !torch.int) -> !torch.list<int>
    %22502 = torch.aten.permute %785, %22501 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20139 = torch.constant.int 1
    %int0_20140 = torch.constant.int 0
    %22503 = torch.prim.ListConstruct %int1_20139, %int0_20140 : (!torch.int, !torch.int) -> !torch.list<int>
    %22504 = torch.aten.permute %786, %22503 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20141 = torch.constant.int 1
    %int0_20142 = torch.constant.int 0
    %22505 = torch.prim.ListConstruct %int1_20141, %int0_20142 : (!torch.int, !torch.int) -> !torch.list<int>
    %22506 = torch.aten.permute %787, %22505 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20143 = torch.constant.int 1
    %int0_20144 = torch.constant.int 0
    %22507 = torch.prim.ListConstruct %int1_20143, %int0_20144 : (!torch.int, !torch.int) -> !torch.list<int>
    %22508 = torch.aten.permute %788, %22507 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20145 = torch.constant.int 1
    %int0_20146 = torch.constant.int 0
    %22509 = torch.prim.ListConstruct %int1_20145, %int0_20146 : (!torch.int, !torch.int) -> !torch.list<int>
    %22510 = torch.aten.permute %789, %22509 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20147 = torch.constant.int 1
    %int0_20148 = torch.constant.int 0
    %22511 = torch.prim.ListConstruct %int1_20147, %int0_20148 : (!torch.int, !torch.int) -> !torch.list<int>
    %22512 = torch.aten.permute %790, %22511 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_20149 = torch.constant.int 1
    %int0_20150 = torch.constant.int 0
    %22513 = torch.prim.ListConstruct %int1_20149, %int0_20150 : (!torch.int, !torch.int) -> !torch.list<int>
    %22514 = torch.aten.permute %791, %22513 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_20151 = torch.constant.int 4
    %22515 = torch.aten.mul.int %int4_20151, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20152 = torch.constant.int 4096
    %22516 = torch.prim.ListConstruct %22515, %int4096_20152 : (!torch.int, !torch.int) -> !torch.list<int>
    %22517 = torch.aten.view %22419, %22516 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22517, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22518 = torch.aten.mm %22517, %22500 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22518, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20153 = torch.constant.int 4
    %int1792_20154 = torch.constant.int 1792
    %22519 = torch.prim.ListConstruct %int4_20153, %2482, %int1792_20154 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22520 = torch.aten.view %22518, %22519 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20155 = torch.constant.int 4
    %22521 = torch.aten.mul.int %int4_20155, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20156 = torch.constant.int 4096
    %22522 = torch.prim.ListConstruct %22521, %int4096_20156 : (!torch.int, !torch.int) -> !torch.list<int>
    %22523 = torch.aten.view %22420, %22522 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22523, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22524 = torch.aten.mm %22523, %22502 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22524, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20157 = torch.constant.int 4
    %int1792_20158 = torch.constant.int 1792
    %22525 = torch.prim.ListConstruct %int4_20157, %2482, %int1792_20158 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22526 = torch.aten.view %22524, %22525 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20159 = torch.constant.int 4
    %22527 = torch.aten.mul.int %int4_20159, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20160 = torch.constant.int 4096
    %22528 = torch.prim.ListConstruct %22527, %int4096_20160 : (!torch.int, !torch.int) -> !torch.list<int>
    %22529 = torch.aten.view %22421, %22528 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22529, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22530 = torch.aten.mm %22529, %22504 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22530, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20161 = torch.constant.int 4
    %int1792_20162 = torch.constant.int 1792
    %22531 = torch.prim.ListConstruct %int4_20161, %2482, %int1792_20162 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22532 = torch.aten.view %22530, %22531 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20163 = torch.constant.int 4
    %22533 = torch.aten.mul.int %int4_20163, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20164 = torch.constant.int 4096
    %22534 = torch.prim.ListConstruct %22533, %int4096_20164 : (!torch.int, !torch.int) -> !torch.list<int>
    %22535 = torch.aten.view %22422, %22534 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22535, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22536 = torch.aten.mm %22535, %22506 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22536, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20165 = torch.constant.int 4
    %int1792_20166 = torch.constant.int 1792
    %22537 = torch.prim.ListConstruct %int4_20165, %2482, %int1792_20166 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22538 = torch.aten.view %22536, %22537 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20167 = torch.constant.int 4
    %22539 = torch.aten.mul.int %int4_20167, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20168 = torch.constant.int 4096
    %22540 = torch.prim.ListConstruct %22539, %int4096_20168 : (!torch.int, !torch.int) -> !torch.list<int>
    %22541 = torch.aten.view %22423, %22540 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22541, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22542 = torch.aten.mm %22541, %22508 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22542, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20169 = torch.constant.int 4
    %int1792_20170 = torch.constant.int 1792
    %22543 = torch.prim.ListConstruct %int4_20169, %2482, %int1792_20170 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22544 = torch.aten.view %22542, %22543 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20171 = torch.constant.int 4
    %22545 = torch.aten.mul.int %int4_20171, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20172 = torch.constant.int 4096
    %22546 = torch.prim.ListConstruct %22545, %int4096_20172 : (!torch.int, !torch.int) -> !torch.list<int>
    %22547 = torch.aten.view %22424, %22546 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22547, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22548 = torch.aten.mm %22547, %22510 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22548, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20173 = torch.constant.int 4
    %int1792_20174 = torch.constant.int 1792
    %22549 = torch.prim.ListConstruct %int4_20173, %2482, %int1792_20174 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22550 = torch.aten.view %22548, %22549 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20175 = torch.constant.int 4
    %22551 = torch.aten.mul.int %int4_20175, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20176 = torch.constant.int 4096
    %22552 = torch.prim.ListConstruct %22551, %int4096_20176 : (!torch.int, !torch.int) -> !torch.list<int>
    %22553 = torch.aten.view %22425, %22552 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22553, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22554 = torch.aten.mm %22553, %22512 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22554, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20177 = torch.constant.int 4
    %int1792_20178 = torch.constant.int 1792
    %22555 = torch.prim.ListConstruct %int4_20177, %2482, %int1792_20178 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22556 = torch.aten.view %22554, %22555 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_20179 = torch.constant.int 4
    %22557 = torch.aten.mul.int %int4_20179, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20180 = torch.constant.int 4096
    %22558 = torch.prim.ListConstruct %22557, %int4096_20180 : (!torch.int, !torch.int) -> !torch.list<int>
    %22559 = torch.aten.view %22426, %22558 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22559, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22560 = torch.aten.mm %22559, %22514 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22560, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_20181 = torch.constant.int 4
    %int1792_20182 = torch.constant.int 1792
    %22561 = torch.prim.ListConstruct %int4_20181, %2482, %int1792_20182 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22562 = torch.aten.view %22560, %22561 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22563 = torch.aten.mul.Tensor %22491, %22520 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22564 = torch.aten.mul.Tensor %22492, %22526 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22565 = torch.aten.mul.Tensor %22493, %22532 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22566 = torch.aten.mul.Tensor %22494, %22538 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22567 = torch.aten.mul.Tensor %22495, %22544 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22568 = torch.aten.mul.Tensor %22496, %22550 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22569 = torch.aten.mul.Tensor %22497, %22556 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %22570 = torch.aten.mul.Tensor %22498, %22562 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %22570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_20183 = torch.constant.int 1
    %int0_20184 = torch.constant.int 0
    %22571 = torch.prim.ListConstruct %int1_20183, %int0_20184 : (!torch.int, !torch.int) -> !torch.list<int>
    %22572 = torch.aten.permute %792, %22571 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20185 = torch.constant.int 1
    %int0_20186 = torch.constant.int 0
    %22573 = torch.prim.ListConstruct %int1_20185, %int0_20186 : (!torch.int, !torch.int) -> !torch.list<int>
    %22574 = torch.aten.permute %793, %22573 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20187 = torch.constant.int 1
    %int0_20188 = torch.constant.int 0
    %22575 = torch.prim.ListConstruct %int1_20187, %int0_20188 : (!torch.int, !torch.int) -> !torch.list<int>
    %22576 = torch.aten.permute %794, %22575 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20189 = torch.constant.int 1
    %int0_20190 = torch.constant.int 0
    %22577 = torch.prim.ListConstruct %int1_20189, %int0_20190 : (!torch.int, !torch.int) -> !torch.list<int>
    %22578 = torch.aten.permute %795, %22577 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20191 = torch.constant.int 1
    %int0_20192 = torch.constant.int 0
    %22579 = torch.prim.ListConstruct %int1_20191, %int0_20192 : (!torch.int, !torch.int) -> !torch.list<int>
    %22580 = torch.aten.permute %796, %22579 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20193 = torch.constant.int 1
    %int0_20194 = torch.constant.int 0
    %22581 = torch.prim.ListConstruct %int1_20193, %int0_20194 : (!torch.int, !torch.int) -> !torch.list<int>
    %22582 = torch.aten.permute %797, %22581 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20195 = torch.constant.int 1
    %int0_20196 = torch.constant.int 0
    %22583 = torch.prim.ListConstruct %int1_20195, %int0_20196 : (!torch.int, !torch.int) -> !torch.list<int>
    %22584 = torch.aten.permute %798, %22583 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20197 = torch.constant.int 1
    %int0_20198 = torch.constant.int 0
    %22585 = torch.prim.ListConstruct %int1_20197, %int0_20198 : (!torch.int, !torch.int) -> !torch.list<int>
    %22586 = torch.aten.permute %799, %22585 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_20199 = torch.constant.int 1
    %22587 = torch.aten.size.int %22448, %int1_20199 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20200 = torch.constant.int 4
    %22588 = torch.aten.mul.int %int4_20200, %22587 : !torch.int, !torch.int -> !torch.int
    %int1792_20201 = torch.constant.int 1792
    %22589 = torch.prim.ListConstruct %22588, %int1792_20201 : (!torch.int, !torch.int) -> !torch.list<int>
    %22590 = torch.aten.view %22563, %22589 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22590, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22591 = torch.aten.mm %22590, %22572 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22591, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20202 = torch.constant.int 4
    %int4096_20203 = torch.constant.int 4096
    %22592 = torch.prim.ListConstruct %int4_20202, %22587, %int4096_20203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22593 = torch.aten.view %22591, %22592 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20204 = torch.constant.int 1
    %22594 = torch.aten.size.int %22454, %int1_20204 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20205 = torch.constant.int 4
    %22595 = torch.aten.mul.int %int4_20205, %22594 : !torch.int, !torch.int -> !torch.int
    %int1792_20206 = torch.constant.int 1792
    %22596 = torch.prim.ListConstruct %22595, %int1792_20206 : (!torch.int, !torch.int) -> !torch.list<int>
    %22597 = torch.aten.view %22564, %22596 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22597, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22598 = torch.aten.mm %22597, %22574 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22598, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20207 = torch.constant.int 4
    %int4096_20208 = torch.constant.int 4096
    %22599 = torch.prim.ListConstruct %int4_20207, %22594, %int4096_20208 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22600 = torch.aten.view %22598, %22599 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20209 = torch.constant.int 1
    %22601 = torch.aten.size.int %22460, %int1_20209 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20210 = torch.constant.int 4
    %22602 = torch.aten.mul.int %int4_20210, %22601 : !torch.int, !torch.int -> !torch.int
    %int1792_20211 = torch.constant.int 1792
    %22603 = torch.prim.ListConstruct %22602, %int1792_20211 : (!torch.int, !torch.int) -> !torch.list<int>
    %22604 = torch.aten.view %22565, %22603 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22604, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22605 = torch.aten.mm %22604, %22576 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22605, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20212 = torch.constant.int 4
    %int4096_20213 = torch.constant.int 4096
    %22606 = torch.prim.ListConstruct %int4_20212, %22601, %int4096_20213 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22607 = torch.aten.view %22605, %22606 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20214 = torch.constant.int 1
    %22608 = torch.aten.size.int %22466, %int1_20214 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20215 = torch.constant.int 4
    %22609 = torch.aten.mul.int %int4_20215, %22608 : !torch.int, !torch.int -> !torch.int
    %int1792_20216 = torch.constant.int 1792
    %22610 = torch.prim.ListConstruct %22609, %int1792_20216 : (!torch.int, !torch.int) -> !torch.list<int>
    %22611 = torch.aten.view %22566, %22610 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22611, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22612 = torch.aten.mm %22611, %22578 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22612, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20217 = torch.constant.int 4
    %int4096_20218 = torch.constant.int 4096
    %22613 = torch.prim.ListConstruct %int4_20217, %22608, %int4096_20218 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22614 = torch.aten.view %22612, %22613 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20219 = torch.constant.int 1
    %22615 = torch.aten.size.int %22472, %int1_20219 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20220 = torch.constant.int 4
    %22616 = torch.aten.mul.int %int4_20220, %22615 : !torch.int, !torch.int -> !torch.int
    %int1792_20221 = torch.constant.int 1792
    %22617 = torch.prim.ListConstruct %22616, %int1792_20221 : (!torch.int, !torch.int) -> !torch.list<int>
    %22618 = torch.aten.view %22567, %22617 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22618, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22619 = torch.aten.mm %22618, %22580 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22619, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20222 = torch.constant.int 4
    %int4096_20223 = torch.constant.int 4096
    %22620 = torch.prim.ListConstruct %int4_20222, %22615, %int4096_20223 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22621 = torch.aten.view %22619, %22620 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20224 = torch.constant.int 1
    %22622 = torch.aten.size.int %22478, %int1_20224 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20225 = torch.constant.int 4
    %22623 = torch.aten.mul.int %int4_20225, %22622 : !torch.int, !torch.int -> !torch.int
    %int1792_20226 = torch.constant.int 1792
    %22624 = torch.prim.ListConstruct %22623, %int1792_20226 : (!torch.int, !torch.int) -> !torch.list<int>
    %22625 = torch.aten.view %22568, %22624 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22625, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22626 = torch.aten.mm %22625, %22582 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22626, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20227 = torch.constant.int 4
    %int4096_20228 = torch.constant.int 4096
    %22627 = torch.prim.ListConstruct %int4_20227, %22622, %int4096_20228 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22628 = torch.aten.view %22626, %22627 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20229 = torch.constant.int 1
    %22629 = torch.aten.size.int %22484, %int1_20229 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20230 = torch.constant.int 4
    %22630 = torch.aten.mul.int %int4_20230, %22629 : !torch.int, !torch.int -> !torch.int
    %int1792_20231 = torch.constant.int 1792
    %22631 = torch.prim.ListConstruct %22630, %int1792_20231 : (!torch.int, !torch.int) -> !torch.list<int>
    %22632 = torch.aten.view %22569, %22631 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22632, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22633 = torch.aten.mm %22632, %22584 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22633, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20232 = torch.constant.int 4
    %int4096_20233 = torch.constant.int 4096
    %22634 = torch.prim.ListConstruct %int4_20232, %22629, %int4096_20233 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22635 = torch.aten.view %22633, %22634 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20234 = torch.constant.int 1
    %22636 = torch.aten.size.int %22490, %int1_20234 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_20235 = torch.constant.int 4
    %22637 = torch.aten.mul.int %int4_20235, %22636 : !torch.int, !torch.int -> !torch.int
    %int1792_20236 = torch.constant.int 1792
    %22638 = torch.prim.ListConstruct %22637, %int1792_20236 : (!torch.int, !torch.int) -> !torch.list<int>
    %22639 = torch.aten.view %22570, %22638 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %22639, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %22640 = torch.aten.mm %22639, %22586 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22640, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_20237 = torch.constant.int 4
    %int4096_20238 = torch.constant.int 4096
    %22641 = torch.prim.ListConstruct %int4_20237, %22636, %int4096_20238 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22642 = torch.aten.view %22640, %22641 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22643 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20239 = arith.constant 1 : index
    %dim_20240 = tensor.dim %22643, %c1_20239 : tensor<4x?x4096xf16>
    %22644 = flow.tensor.transfer %22643 : tensor<4x?x4096xf16>{%dim_20240} to #hal.device.promise<@__device_0>
    %22645 = torch_c.from_builtin_tensor %22644 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22646 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20241 = arith.constant 1 : index
    %dim_20242 = tensor.dim %22646, %c1_20241 : tensor<4x?x4096xf16>
    %22647 = flow.tensor.transfer %22646 : tensor<4x?x4096xf16>{%dim_20242} to #hal.device.promise<@__device_0>
    %22648 = torch_c.from_builtin_tensor %22647 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22649 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20243 = arith.constant 1 : index
    %dim_20244 = tensor.dim %22649, %c1_20243 : tensor<4x?x4096xf16>
    %22650 = flow.tensor.transfer %22649 : tensor<4x?x4096xf16>{%dim_20244} to #hal.device.promise<@__device_0>
    %22651 = torch_c.from_builtin_tensor %22650 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22652 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20245 = arith.constant 1 : index
    %dim_20246 = tensor.dim %22652, %c1_20245 : tensor<4x?x4096xf16>
    %22653 = flow.tensor.transfer %22652 : tensor<4x?x4096xf16>{%dim_20246} to #hal.device.promise<@__device_0>
    %22654 = torch_c.from_builtin_tensor %22653 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22655 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20247 = arith.constant 1 : index
    %dim_20248 = tensor.dim %22655, %c1_20247 : tensor<4x?x4096xf16>
    %22656 = flow.tensor.transfer %22655 : tensor<4x?x4096xf16>{%dim_20248} to #hal.device.promise<@__device_0>
    %22657 = torch_c.from_builtin_tensor %22656 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22658 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20249 = arith.constant 1 : index
    %dim_20250 = tensor.dim %22658, %c1_20249 : tensor<4x?x4096xf16>
    %22659 = flow.tensor.transfer %22658 : tensor<4x?x4096xf16>{%dim_20250} to #hal.device.promise<@__device_0>
    %22660 = torch_c.from_builtin_tensor %22659 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22661 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20251 = arith.constant 1 : index
    %dim_20252 = tensor.dim %22661, %c1_20251 : tensor<4x?x4096xf16>
    %22662 = flow.tensor.transfer %22661 : tensor<4x?x4096xf16>{%dim_20252} to #hal.device.promise<@__device_0>
    %22663 = torch_c.from_builtin_tensor %22662 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20253 = torch.constant.int 1
    %22664 = torch.aten.add.Tensor %22593, %22645, %int1_20253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20254 = torch.constant.int 1
    %22665 = torch.aten.add.Tensor %22664, %22648, %int1_20254 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20255 = torch.constant.int 1
    %22666 = torch.aten.add.Tensor %22665, %22651, %int1_20255 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20256 = torch.constant.int 1
    %22667 = torch.aten.add.Tensor %22666, %22654, %int1_20256 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20257 = torch.constant.int 1
    %22668 = torch.aten.add.Tensor %22667, %22657, %int1_20257 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20258 = torch.constant.int 1
    %22669 = torch.aten.add.Tensor %22668, %22660, %int1_20258 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20259 = torch.constant.int 1
    %22670 = torch.aten.add.Tensor %22669, %22663, %int1_20259 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22671 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20260 = arith.constant 1 : index
    %dim_20261 = tensor.dim %22671, %c1_20260 : tensor<4x?x4096xf16>
    %22672 = flow.tensor.transfer %22671 : tensor<4x?x4096xf16>{%dim_20261} to #hal.device.promise<@__device_1>
    %22673 = torch_c.from_builtin_tensor %22672 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22674 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20262 = arith.constant 1 : index
    %dim_20263 = tensor.dim %22674, %c1_20262 : tensor<4x?x4096xf16>
    %22675 = flow.tensor.transfer %22674 : tensor<4x?x4096xf16>{%dim_20263} to #hal.device.promise<@__device_1>
    %22676 = torch_c.from_builtin_tensor %22675 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22677 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20264 = arith.constant 1 : index
    %dim_20265 = tensor.dim %22677, %c1_20264 : tensor<4x?x4096xf16>
    %22678 = flow.tensor.transfer %22677 : tensor<4x?x4096xf16>{%dim_20265} to #hal.device.promise<@__device_1>
    %22679 = torch_c.from_builtin_tensor %22678 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22680 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20266 = arith.constant 1 : index
    %dim_20267 = tensor.dim %22680, %c1_20266 : tensor<4x?x4096xf16>
    %22681 = flow.tensor.transfer %22680 : tensor<4x?x4096xf16>{%dim_20267} to #hal.device.promise<@__device_1>
    %22682 = torch_c.from_builtin_tensor %22681 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22683 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20268 = arith.constant 1 : index
    %dim_20269 = tensor.dim %22683, %c1_20268 : tensor<4x?x4096xf16>
    %22684 = flow.tensor.transfer %22683 : tensor<4x?x4096xf16>{%dim_20269} to #hal.device.promise<@__device_1>
    %22685 = torch_c.from_builtin_tensor %22684 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22686 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20270 = arith.constant 1 : index
    %dim_20271 = tensor.dim %22686, %c1_20270 : tensor<4x?x4096xf16>
    %22687 = flow.tensor.transfer %22686 : tensor<4x?x4096xf16>{%dim_20271} to #hal.device.promise<@__device_1>
    %22688 = torch_c.from_builtin_tensor %22687 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22689 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20272 = arith.constant 1 : index
    %dim_20273 = tensor.dim %22689, %c1_20272 : tensor<4x?x4096xf16>
    %22690 = flow.tensor.transfer %22689 : tensor<4x?x4096xf16>{%dim_20273} to #hal.device.promise<@__device_1>
    %22691 = torch_c.from_builtin_tensor %22690 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20274 = torch.constant.int 1
    %22692 = torch.aten.add.Tensor %22673, %22600, %int1_20274 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20275 = torch.constant.int 1
    %22693 = torch.aten.add.Tensor %22692, %22676, %int1_20275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20276 = torch.constant.int 1
    %22694 = torch.aten.add.Tensor %22693, %22679, %int1_20276 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20277 = torch.constant.int 1
    %22695 = torch.aten.add.Tensor %22694, %22682, %int1_20277 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20278 = torch.constant.int 1
    %22696 = torch.aten.add.Tensor %22695, %22685, %int1_20278 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20279 = torch.constant.int 1
    %22697 = torch.aten.add.Tensor %22696, %22688, %int1_20279 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20280 = torch.constant.int 1
    %22698 = torch.aten.add.Tensor %22697, %22691, %int1_20280 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22699 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20281 = arith.constant 1 : index
    %dim_20282 = tensor.dim %22699, %c1_20281 : tensor<4x?x4096xf16>
    %22700 = flow.tensor.transfer %22699 : tensor<4x?x4096xf16>{%dim_20282} to #hal.device.promise<@__device_2>
    %22701 = torch_c.from_builtin_tensor %22700 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22702 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20283 = arith.constant 1 : index
    %dim_20284 = tensor.dim %22702, %c1_20283 : tensor<4x?x4096xf16>
    %22703 = flow.tensor.transfer %22702 : tensor<4x?x4096xf16>{%dim_20284} to #hal.device.promise<@__device_2>
    %22704 = torch_c.from_builtin_tensor %22703 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22705 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20285 = arith.constant 1 : index
    %dim_20286 = tensor.dim %22705, %c1_20285 : tensor<4x?x4096xf16>
    %22706 = flow.tensor.transfer %22705 : tensor<4x?x4096xf16>{%dim_20286} to #hal.device.promise<@__device_2>
    %22707 = torch_c.from_builtin_tensor %22706 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22708 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20287 = arith.constant 1 : index
    %dim_20288 = tensor.dim %22708, %c1_20287 : tensor<4x?x4096xf16>
    %22709 = flow.tensor.transfer %22708 : tensor<4x?x4096xf16>{%dim_20288} to #hal.device.promise<@__device_2>
    %22710 = torch_c.from_builtin_tensor %22709 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22711 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20289 = arith.constant 1 : index
    %dim_20290 = tensor.dim %22711, %c1_20289 : tensor<4x?x4096xf16>
    %22712 = flow.tensor.transfer %22711 : tensor<4x?x4096xf16>{%dim_20290} to #hal.device.promise<@__device_2>
    %22713 = torch_c.from_builtin_tensor %22712 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22714 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20291 = arith.constant 1 : index
    %dim_20292 = tensor.dim %22714, %c1_20291 : tensor<4x?x4096xf16>
    %22715 = flow.tensor.transfer %22714 : tensor<4x?x4096xf16>{%dim_20292} to #hal.device.promise<@__device_2>
    %22716 = torch_c.from_builtin_tensor %22715 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22717 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20293 = arith.constant 1 : index
    %dim_20294 = tensor.dim %22717, %c1_20293 : tensor<4x?x4096xf16>
    %22718 = flow.tensor.transfer %22717 : tensor<4x?x4096xf16>{%dim_20294} to #hal.device.promise<@__device_2>
    %22719 = torch_c.from_builtin_tensor %22718 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20295 = torch.constant.int 1
    %22720 = torch.aten.add.Tensor %22701, %22704, %int1_20295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20296 = torch.constant.int 1
    %22721 = torch.aten.add.Tensor %22720, %22607, %int1_20296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20297 = torch.constant.int 1
    %22722 = torch.aten.add.Tensor %22721, %22707, %int1_20297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20298 = torch.constant.int 1
    %22723 = torch.aten.add.Tensor %22722, %22710, %int1_20298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20299 = torch.constant.int 1
    %22724 = torch.aten.add.Tensor %22723, %22713, %int1_20299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20300 = torch.constant.int 1
    %22725 = torch.aten.add.Tensor %22724, %22716, %int1_20300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20301 = torch.constant.int 1
    %22726 = torch.aten.add.Tensor %22725, %22719, %int1_20301 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22727 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20302 = arith.constant 1 : index
    %dim_20303 = tensor.dim %22727, %c1_20302 : tensor<4x?x4096xf16>
    %22728 = flow.tensor.transfer %22727 : tensor<4x?x4096xf16>{%dim_20303} to #hal.device.promise<@__device_3>
    %22729 = torch_c.from_builtin_tensor %22728 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22730 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20304 = arith.constant 1 : index
    %dim_20305 = tensor.dim %22730, %c1_20304 : tensor<4x?x4096xf16>
    %22731 = flow.tensor.transfer %22730 : tensor<4x?x4096xf16>{%dim_20305} to #hal.device.promise<@__device_3>
    %22732 = torch_c.from_builtin_tensor %22731 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22733 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20306 = arith.constant 1 : index
    %dim_20307 = tensor.dim %22733, %c1_20306 : tensor<4x?x4096xf16>
    %22734 = flow.tensor.transfer %22733 : tensor<4x?x4096xf16>{%dim_20307} to #hal.device.promise<@__device_3>
    %22735 = torch_c.from_builtin_tensor %22734 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22736 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20308 = arith.constant 1 : index
    %dim_20309 = tensor.dim %22736, %c1_20308 : tensor<4x?x4096xf16>
    %22737 = flow.tensor.transfer %22736 : tensor<4x?x4096xf16>{%dim_20309} to #hal.device.promise<@__device_3>
    %22738 = torch_c.from_builtin_tensor %22737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22739 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20310 = arith.constant 1 : index
    %dim_20311 = tensor.dim %22739, %c1_20310 : tensor<4x?x4096xf16>
    %22740 = flow.tensor.transfer %22739 : tensor<4x?x4096xf16>{%dim_20311} to #hal.device.promise<@__device_3>
    %22741 = torch_c.from_builtin_tensor %22740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22742 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20312 = arith.constant 1 : index
    %dim_20313 = tensor.dim %22742, %c1_20312 : tensor<4x?x4096xf16>
    %22743 = flow.tensor.transfer %22742 : tensor<4x?x4096xf16>{%dim_20313} to #hal.device.promise<@__device_3>
    %22744 = torch_c.from_builtin_tensor %22743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22745 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20314 = arith.constant 1 : index
    %dim_20315 = tensor.dim %22745, %c1_20314 : tensor<4x?x4096xf16>
    %22746 = flow.tensor.transfer %22745 : tensor<4x?x4096xf16>{%dim_20315} to #hal.device.promise<@__device_3>
    %22747 = torch_c.from_builtin_tensor %22746 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20316 = torch.constant.int 1
    %22748 = torch.aten.add.Tensor %22729, %22732, %int1_20316 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20317 = torch.constant.int 1
    %22749 = torch.aten.add.Tensor %22748, %22735, %int1_20317 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20318 = torch.constant.int 1
    %22750 = torch.aten.add.Tensor %22749, %22614, %int1_20318 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20319 = torch.constant.int 1
    %22751 = torch.aten.add.Tensor %22750, %22738, %int1_20319 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20320 = torch.constant.int 1
    %22752 = torch.aten.add.Tensor %22751, %22741, %int1_20320 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20321 = torch.constant.int 1
    %22753 = torch.aten.add.Tensor %22752, %22744, %int1_20321 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20322 = torch.constant.int 1
    %22754 = torch.aten.add.Tensor %22753, %22747, %int1_20322 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22755 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20323 = arith.constant 1 : index
    %dim_20324 = tensor.dim %22755, %c1_20323 : tensor<4x?x4096xf16>
    %22756 = flow.tensor.transfer %22755 : tensor<4x?x4096xf16>{%dim_20324} to #hal.device.promise<@__device_4>
    %22757 = torch_c.from_builtin_tensor %22756 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22758 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20325 = arith.constant 1 : index
    %dim_20326 = tensor.dim %22758, %c1_20325 : tensor<4x?x4096xf16>
    %22759 = flow.tensor.transfer %22758 : tensor<4x?x4096xf16>{%dim_20326} to #hal.device.promise<@__device_4>
    %22760 = torch_c.from_builtin_tensor %22759 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22761 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20327 = arith.constant 1 : index
    %dim_20328 = tensor.dim %22761, %c1_20327 : tensor<4x?x4096xf16>
    %22762 = flow.tensor.transfer %22761 : tensor<4x?x4096xf16>{%dim_20328} to #hal.device.promise<@__device_4>
    %22763 = torch_c.from_builtin_tensor %22762 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22764 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20329 = arith.constant 1 : index
    %dim_20330 = tensor.dim %22764, %c1_20329 : tensor<4x?x4096xf16>
    %22765 = flow.tensor.transfer %22764 : tensor<4x?x4096xf16>{%dim_20330} to #hal.device.promise<@__device_4>
    %22766 = torch_c.from_builtin_tensor %22765 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22767 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20331 = arith.constant 1 : index
    %dim_20332 = tensor.dim %22767, %c1_20331 : tensor<4x?x4096xf16>
    %22768 = flow.tensor.transfer %22767 : tensor<4x?x4096xf16>{%dim_20332} to #hal.device.promise<@__device_4>
    %22769 = torch_c.from_builtin_tensor %22768 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22770 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20333 = arith.constant 1 : index
    %dim_20334 = tensor.dim %22770, %c1_20333 : tensor<4x?x4096xf16>
    %22771 = flow.tensor.transfer %22770 : tensor<4x?x4096xf16>{%dim_20334} to #hal.device.promise<@__device_4>
    %22772 = torch_c.from_builtin_tensor %22771 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22773 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20335 = arith.constant 1 : index
    %dim_20336 = tensor.dim %22773, %c1_20335 : tensor<4x?x4096xf16>
    %22774 = flow.tensor.transfer %22773 : tensor<4x?x4096xf16>{%dim_20336} to #hal.device.promise<@__device_4>
    %22775 = torch_c.from_builtin_tensor %22774 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20337 = torch.constant.int 1
    %22776 = torch.aten.add.Tensor %22757, %22760, %int1_20337 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20338 = torch.constant.int 1
    %22777 = torch.aten.add.Tensor %22776, %22763, %int1_20338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20339 = torch.constant.int 1
    %22778 = torch.aten.add.Tensor %22777, %22766, %int1_20339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20340 = torch.constant.int 1
    %22779 = torch.aten.add.Tensor %22778, %22621, %int1_20340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20341 = torch.constant.int 1
    %22780 = torch.aten.add.Tensor %22779, %22769, %int1_20341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20342 = torch.constant.int 1
    %22781 = torch.aten.add.Tensor %22780, %22772, %int1_20342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20343 = torch.constant.int 1
    %22782 = torch.aten.add.Tensor %22781, %22775, %int1_20343 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22783 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20344 = arith.constant 1 : index
    %dim_20345 = tensor.dim %22783, %c1_20344 : tensor<4x?x4096xf16>
    %22784 = flow.tensor.transfer %22783 : tensor<4x?x4096xf16>{%dim_20345} to #hal.device.promise<@__device_5>
    %22785 = torch_c.from_builtin_tensor %22784 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22786 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20346 = arith.constant 1 : index
    %dim_20347 = tensor.dim %22786, %c1_20346 : tensor<4x?x4096xf16>
    %22787 = flow.tensor.transfer %22786 : tensor<4x?x4096xf16>{%dim_20347} to #hal.device.promise<@__device_5>
    %22788 = torch_c.from_builtin_tensor %22787 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22789 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20348 = arith.constant 1 : index
    %dim_20349 = tensor.dim %22789, %c1_20348 : tensor<4x?x4096xf16>
    %22790 = flow.tensor.transfer %22789 : tensor<4x?x4096xf16>{%dim_20349} to #hal.device.promise<@__device_5>
    %22791 = torch_c.from_builtin_tensor %22790 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22792 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20350 = arith.constant 1 : index
    %dim_20351 = tensor.dim %22792, %c1_20350 : tensor<4x?x4096xf16>
    %22793 = flow.tensor.transfer %22792 : tensor<4x?x4096xf16>{%dim_20351} to #hal.device.promise<@__device_5>
    %22794 = torch_c.from_builtin_tensor %22793 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22795 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20352 = arith.constant 1 : index
    %dim_20353 = tensor.dim %22795, %c1_20352 : tensor<4x?x4096xf16>
    %22796 = flow.tensor.transfer %22795 : tensor<4x?x4096xf16>{%dim_20353} to #hal.device.promise<@__device_5>
    %22797 = torch_c.from_builtin_tensor %22796 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22798 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20354 = arith.constant 1 : index
    %dim_20355 = tensor.dim %22798, %c1_20354 : tensor<4x?x4096xf16>
    %22799 = flow.tensor.transfer %22798 : tensor<4x?x4096xf16>{%dim_20355} to #hal.device.promise<@__device_5>
    %22800 = torch_c.from_builtin_tensor %22799 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22801 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20356 = arith.constant 1 : index
    %dim_20357 = tensor.dim %22801, %c1_20356 : tensor<4x?x4096xf16>
    %22802 = flow.tensor.transfer %22801 : tensor<4x?x4096xf16>{%dim_20357} to #hal.device.promise<@__device_5>
    %22803 = torch_c.from_builtin_tensor %22802 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20358 = torch.constant.int 1
    %22804 = torch.aten.add.Tensor %22785, %22788, %int1_20358 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20359 = torch.constant.int 1
    %22805 = torch.aten.add.Tensor %22804, %22791, %int1_20359 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20360 = torch.constant.int 1
    %22806 = torch.aten.add.Tensor %22805, %22794, %int1_20360 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20361 = torch.constant.int 1
    %22807 = torch.aten.add.Tensor %22806, %22797, %int1_20361 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20362 = torch.constant.int 1
    %22808 = torch.aten.add.Tensor %22807, %22628, %int1_20362 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20363 = torch.constant.int 1
    %22809 = torch.aten.add.Tensor %22808, %22800, %int1_20363 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20364 = torch.constant.int 1
    %22810 = torch.aten.add.Tensor %22809, %22803, %int1_20364 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22811 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20365 = arith.constant 1 : index
    %dim_20366 = tensor.dim %22811, %c1_20365 : tensor<4x?x4096xf16>
    %22812 = flow.tensor.transfer %22811 : tensor<4x?x4096xf16>{%dim_20366} to #hal.device.promise<@__device_6>
    %22813 = torch_c.from_builtin_tensor %22812 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22814 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20367 = arith.constant 1 : index
    %dim_20368 = tensor.dim %22814, %c1_20367 : tensor<4x?x4096xf16>
    %22815 = flow.tensor.transfer %22814 : tensor<4x?x4096xf16>{%dim_20368} to #hal.device.promise<@__device_6>
    %22816 = torch_c.from_builtin_tensor %22815 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22817 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20369 = arith.constant 1 : index
    %dim_20370 = tensor.dim %22817, %c1_20369 : tensor<4x?x4096xf16>
    %22818 = flow.tensor.transfer %22817 : tensor<4x?x4096xf16>{%dim_20370} to #hal.device.promise<@__device_6>
    %22819 = torch_c.from_builtin_tensor %22818 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22820 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20371 = arith.constant 1 : index
    %dim_20372 = tensor.dim %22820, %c1_20371 : tensor<4x?x4096xf16>
    %22821 = flow.tensor.transfer %22820 : tensor<4x?x4096xf16>{%dim_20372} to #hal.device.promise<@__device_6>
    %22822 = torch_c.from_builtin_tensor %22821 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22823 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20373 = arith.constant 1 : index
    %dim_20374 = tensor.dim %22823, %c1_20373 : tensor<4x?x4096xf16>
    %22824 = flow.tensor.transfer %22823 : tensor<4x?x4096xf16>{%dim_20374} to #hal.device.promise<@__device_6>
    %22825 = torch_c.from_builtin_tensor %22824 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22826 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20375 = arith.constant 1 : index
    %dim_20376 = tensor.dim %22826, %c1_20375 : tensor<4x?x4096xf16>
    %22827 = flow.tensor.transfer %22826 : tensor<4x?x4096xf16>{%dim_20376} to #hal.device.promise<@__device_6>
    %22828 = torch_c.from_builtin_tensor %22827 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22829 = torch_c.to_builtin_tensor %22642 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20377 = arith.constant 1 : index
    %dim_20378 = tensor.dim %22829, %c1_20377 : tensor<4x?x4096xf16>
    %22830 = flow.tensor.transfer %22829 : tensor<4x?x4096xf16>{%dim_20378} to #hal.device.promise<@__device_6>
    %22831 = torch_c.from_builtin_tensor %22830 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20379 = torch.constant.int 1
    %22832 = torch.aten.add.Tensor %22813, %22816, %int1_20379 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20380 = torch.constant.int 1
    %22833 = torch.aten.add.Tensor %22832, %22819, %int1_20380 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20381 = torch.constant.int 1
    %22834 = torch.aten.add.Tensor %22833, %22822, %int1_20381 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20382 = torch.constant.int 1
    %22835 = torch.aten.add.Tensor %22834, %22825, %int1_20382 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20383 = torch.constant.int 1
    %22836 = torch.aten.add.Tensor %22835, %22828, %int1_20383 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20384 = torch.constant.int 1
    %22837 = torch.aten.add.Tensor %22836, %22635, %int1_20384 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20385 = torch.constant.int 1
    %22838 = torch.aten.add.Tensor %22837, %22831, %int1_20385 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22839 = torch_c.to_builtin_tensor %22593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20386 = arith.constant 1 : index
    %dim_20387 = tensor.dim %22839, %c1_20386 : tensor<4x?x4096xf16>
    %22840 = flow.tensor.transfer %22839 : tensor<4x?x4096xf16>{%dim_20387} to #hal.device.promise<@__device_7>
    %22841 = torch_c.from_builtin_tensor %22840 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22842 = torch_c.to_builtin_tensor %22600 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20388 = arith.constant 1 : index
    %dim_20389 = tensor.dim %22842, %c1_20388 : tensor<4x?x4096xf16>
    %22843 = flow.tensor.transfer %22842 : tensor<4x?x4096xf16>{%dim_20389} to #hal.device.promise<@__device_7>
    %22844 = torch_c.from_builtin_tensor %22843 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22845 = torch_c.to_builtin_tensor %22607 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20390 = arith.constant 1 : index
    %dim_20391 = tensor.dim %22845, %c1_20390 : tensor<4x?x4096xf16>
    %22846 = flow.tensor.transfer %22845 : tensor<4x?x4096xf16>{%dim_20391} to #hal.device.promise<@__device_7>
    %22847 = torch_c.from_builtin_tensor %22846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22848 = torch_c.to_builtin_tensor %22614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20392 = arith.constant 1 : index
    %dim_20393 = tensor.dim %22848, %c1_20392 : tensor<4x?x4096xf16>
    %22849 = flow.tensor.transfer %22848 : tensor<4x?x4096xf16>{%dim_20393} to #hal.device.promise<@__device_7>
    %22850 = torch_c.from_builtin_tensor %22849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22851 = torch_c.to_builtin_tensor %22621 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20394 = arith.constant 1 : index
    %dim_20395 = tensor.dim %22851, %c1_20394 : tensor<4x?x4096xf16>
    %22852 = flow.tensor.transfer %22851 : tensor<4x?x4096xf16>{%dim_20395} to #hal.device.promise<@__device_7>
    %22853 = torch_c.from_builtin_tensor %22852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22854 = torch_c.to_builtin_tensor %22628 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20396 = arith.constant 1 : index
    %dim_20397 = tensor.dim %22854, %c1_20396 : tensor<4x?x4096xf16>
    %22855 = flow.tensor.transfer %22854 : tensor<4x?x4096xf16>{%dim_20397} to #hal.device.promise<@__device_7>
    %22856 = torch_c.from_builtin_tensor %22855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %22857 = torch_c.to_builtin_tensor %22635 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_20398 = arith.constant 1 : index
    %dim_20399 = tensor.dim %22857, %c1_20398 : tensor<4x?x4096xf16>
    %22858 = flow.tensor.transfer %22857 : tensor<4x?x4096xf16>{%dim_20399} to #hal.device.promise<@__device_7>
    %22859 = torch_c.from_builtin_tensor %22858 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20400 = torch.constant.int 1
    %22860 = torch.aten.add.Tensor %22841, %22844, %int1_20400 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20401 = torch.constant.int 1
    %22861 = torch.aten.add.Tensor %22860, %22847, %int1_20401 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20402 = torch.constant.int 1
    %22862 = torch.aten.add.Tensor %22861, %22850, %int1_20402 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20403 = torch.constant.int 1
    %22863 = torch.aten.add.Tensor %22862, %22853, %int1_20403 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20404 = torch.constant.int 1
    %22864 = torch.aten.add.Tensor %22863, %22856, %int1_20404 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20405 = torch.constant.int 1
    %22865 = torch.aten.add.Tensor %22864, %22859, %int1_20405 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20406 = torch.constant.int 1
    %22866 = torch.aten.add.Tensor %22865, %22642, %int1_20406 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20407 = torch.constant.int 1
    %22867 = torch.aten.add.Tensor %22347, %22670, %int1_20407 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20408 = torch.constant.int 1
    %22868 = torch.aten.add.Tensor %22348, %22698, %int1_20408 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20409 = torch.constant.int 1
    %22869 = torch.aten.add.Tensor %22349, %22726, %int1_20409 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20410 = torch.constant.int 1
    %22870 = torch.aten.add.Tensor %22350, %22754, %int1_20410 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20411 = torch.constant.int 1
    %22871 = torch.aten.add.Tensor %22351, %22782, %int1_20411 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20412 = torch.constant.int 1
    %22872 = torch.aten.add.Tensor %22352, %22810, %int1_20412 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20413 = torch.constant.int 1
    %22873 = torch.aten.add.Tensor %22353, %22838, %int1_20413 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20414 = torch.constant.int 1
    %22874 = torch.aten.add.Tensor %22354, %22866, %int1_20414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_20415 = torch.constant.int 6
    %22875 = torch.prims.convert_element_type %22867, %int6_20415 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20416 = torch.constant.int 6
    %22876 = torch.prims.convert_element_type %22868, %int6_20416 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20417 = torch.constant.int 6
    %22877 = torch.prims.convert_element_type %22869, %int6_20417 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20418 = torch.constant.int 6
    %22878 = torch.prims.convert_element_type %22870, %int6_20418 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20419 = torch.constant.int 6
    %22879 = torch.prims.convert_element_type %22871, %int6_20419 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20420 = torch.constant.int 6
    %22880 = torch.prims.convert_element_type %22872, %int6_20420 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20421 = torch.constant.int 6
    %22881 = torch.prims.convert_element_type %22873, %int6_20421 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_20422 = torch.constant.int 6
    %22882 = torch.prims.convert_element_type %22874, %int6_20422 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20423 = torch.constant.int 2
    %22883 = torch.aten.pow.Tensor_Scalar %22875, %int2_20423 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20424 = torch.constant.int 2
    %22884 = torch.aten.pow.Tensor_Scalar %22876, %int2_20424 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20425 = torch.constant.int 2
    %22885 = torch.aten.pow.Tensor_Scalar %22877, %int2_20425 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20426 = torch.constant.int 2
    %22886 = torch.aten.pow.Tensor_Scalar %22878, %int2_20426 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20427 = torch.constant.int 2
    %22887 = torch.aten.pow.Tensor_Scalar %22879, %int2_20427 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20428 = torch.constant.int 2
    %22888 = torch.aten.pow.Tensor_Scalar %22880, %int2_20428 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20429 = torch.constant.int 2
    %22889 = torch.aten.pow.Tensor_Scalar %22881, %int2_20429 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_20430 = torch.constant.int 2
    %22890 = torch.aten.pow.Tensor_Scalar %22882, %int2_20430 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_20431 = torch.constant.int -1
    %22891 = torch.prim.ListConstruct %int-1_20431 : (!torch.int) -> !torch.list<int>
    %true_20432 = torch.constant.bool true
    %none_20433 = torch.constant.none
    %22892 = torch.aten.mean.dim %22883, %22891, %true_20432, %none_20433 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20434 = torch.constant.int -1
    %22893 = torch.prim.ListConstruct %int-1_20434 : (!torch.int) -> !torch.list<int>
    %true_20435 = torch.constant.bool true
    %none_20436 = torch.constant.none
    %22894 = torch.aten.mean.dim %22884, %22893, %true_20435, %none_20436 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20437 = torch.constant.int -1
    %22895 = torch.prim.ListConstruct %int-1_20437 : (!torch.int) -> !torch.list<int>
    %true_20438 = torch.constant.bool true
    %none_20439 = torch.constant.none
    %22896 = torch.aten.mean.dim %22885, %22895, %true_20438, %none_20439 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20440 = torch.constant.int -1
    %22897 = torch.prim.ListConstruct %int-1_20440 : (!torch.int) -> !torch.list<int>
    %true_20441 = torch.constant.bool true
    %none_20442 = torch.constant.none
    %22898 = torch.aten.mean.dim %22886, %22897, %true_20441, %none_20442 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20443 = torch.constant.int -1
    %22899 = torch.prim.ListConstruct %int-1_20443 : (!torch.int) -> !torch.list<int>
    %true_20444 = torch.constant.bool true
    %none_20445 = torch.constant.none
    %22900 = torch.aten.mean.dim %22887, %22899, %true_20444, %none_20445 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20446 = torch.constant.int -1
    %22901 = torch.prim.ListConstruct %int-1_20446 : (!torch.int) -> !torch.list<int>
    %true_20447 = torch.constant.bool true
    %none_20448 = torch.constant.none
    %22902 = torch.aten.mean.dim %22888, %22901, %true_20447, %none_20448 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20449 = torch.constant.int -1
    %22903 = torch.prim.ListConstruct %int-1_20449 : (!torch.int) -> !torch.list<int>
    %true_20450 = torch.constant.bool true
    %none_20451 = torch.constant.none
    %22904 = torch.aten.mean.dim %22889, %22903, %true_20450, %none_20451 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_20452 = torch.constant.int -1
    %22905 = torch.prim.ListConstruct %int-1_20452 : (!torch.int) -> !torch.list<int>
    %true_20453 = torch.constant.bool true
    %none_20454 = torch.constant.none
    %22906 = torch.aten.mean.dim %22890, %22905, %true_20453, %none_20454 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20455 = torch.constant.float 9.9999997473787516E-6
    %int1_20456 = torch.constant.int 1
    %22907 = torch.aten.add.Scalar %22892, %float9.999990e-06_20455, %int1_20456 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20457 = torch.constant.float 9.9999997473787516E-6
    %int1_20458 = torch.constant.int 1
    %22908 = torch.aten.add.Scalar %22894, %float9.999990e-06_20457, %int1_20458 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20459 = torch.constant.float 9.9999997473787516E-6
    %int1_20460 = torch.constant.int 1
    %22909 = torch.aten.add.Scalar %22896, %float9.999990e-06_20459, %int1_20460 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20461 = torch.constant.float 9.9999997473787516E-6
    %int1_20462 = torch.constant.int 1
    %22910 = torch.aten.add.Scalar %22898, %float9.999990e-06_20461, %int1_20462 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20463 = torch.constant.float 9.9999997473787516E-6
    %int1_20464 = torch.constant.int 1
    %22911 = torch.aten.add.Scalar %22900, %float9.999990e-06_20463, %int1_20464 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20465 = torch.constant.float 9.9999997473787516E-6
    %int1_20466 = torch.constant.int 1
    %22912 = torch.aten.add.Scalar %22902, %float9.999990e-06_20465, %int1_20466 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20467 = torch.constant.float 9.9999997473787516E-6
    %int1_20468 = torch.constant.int 1
    %22913 = torch.aten.add.Scalar %22904, %float9.999990e-06_20467, %int1_20468 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_20469 = torch.constant.float 9.9999997473787516E-6
    %int1_20470 = torch.constant.int 1
    %22914 = torch.aten.add.Scalar %22906, %float9.999990e-06_20469, %int1_20470 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22915 = torch.aten.rsqrt %22907 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22916 = torch.aten.rsqrt %22908 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22917 = torch.aten.rsqrt %22909 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22918 = torch.aten.rsqrt %22910 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22919 = torch.aten.rsqrt %22911 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22920 = torch.aten.rsqrt %22912 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22921 = torch.aten.rsqrt %22913 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22922 = torch.aten.rsqrt %22914 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %22922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %22923 = torch.aten.mul.Tensor %22875, %22915 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22924 = torch.aten.mul.Tensor %22876, %22916 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22925 = torch.aten.mul.Tensor %22877, %22917 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22926 = torch.aten.mul.Tensor %22878, %22918 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22927 = torch.aten.mul.Tensor %22879, %22919 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22928 = torch.aten.mul.Tensor %22880, %22920 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22929 = torch.aten.mul.Tensor %22881, %22921 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22930 = torch.aten.mul.Tensor %22882, %22922 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22931 = torch.aten.mul.Tensor %800, %22923 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22932 = torch.aten.mul.Tensor %801, %22924 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22933 = torch.aten.mul.Tensor %802, %22925 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22934 = torch.aten.mul.Tensor %803, %22926 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22935 = torch.aten.mul.Tensor %804, %22927 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22936 = torch.aten.mul.Tensor %805, %22928 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22937 = torch.aten.mul.Tensor %806, %22929 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %22938 = torch.aten.mul.Tensor %807, %22930 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %22938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_20471 = torch.constant.int 5
    %22939 = torch.prims.convert_element_type %22931, %int5_20471 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20472 = torch.constant.int 5
    %22940 = torch.prims.convert_element_type %22932, %int5_20472 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20473 = torch.constant.int 5
    %22941 = torch.prims.convert_element_type %22933, %int5_20473 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20474 = torch.constant.int 5
    %22942 = torch.prims.convert_element_type %22934, %int5_20474 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20475 = torch.constant.int 5
    %22943 = torch.prims.convert_element_type %22935, %int5_20475 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20476 = torch.constant.int 5
    %22944 = torch.prims.convert_element_type %22936, %int5_20476 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20477 = torch.constant.int 5
    %22945 = torch.prims.convert_element_type %22937, %int5_20477 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_20478 = torch.constant.int 5
    %22946 = torch.prims.convert_element_type %22938, %int5_20478 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %22946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_20479 = torch.constant.int 1
    %int0_20480 = torch.constant.int 0
    %22947 = torch.prim.ListConstruct %int1_20479, %int0_20480 : (!torch.int, !torch.int) -> !torch.list<int>
    %22948 = torch.aten.permute %808, %22947 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20481 = torch.constant.int 1
    %int0_20482 = torch.constant.int 0
    %22949 = torch.prim.ListConstruct %int1_20481, %int0_20482 : (!torch.int, !torch.int) -> !torch.list<int>
    %22950 = torch.aten.permute %809, %22949 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20483 = torch.constant.int 1
    %int0_20484 = torch.constant.int 0
    %22951 = torch.prim.ListConstruct %int1_20483, %int0_20484 : (!torch.int, !torch.int) -> !torch.list<int>
    %22952 = torch.aten.permute %810, %22951 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20485 = torch.constant.int 1
    %int0_20486 = torch.constant.int 0
    %22953 = torch.prim.ListConstruct %int1_20485, %int0_20486 : (!torch.int, !torch.int) -> !torch.list<int>
    %22954 = torch.aten.permute %811, %22953 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20487 = torch.constant.int 1
    %int0_20488 = torch.constant.int 0
    %22955 = torch.prim.ListConstruct %int1_20487, %int0_20488 : (!torch.int, !torch.int) -> !torch.list<int>
    %22956 = torch.aten.permute %812, %22955 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20489 = torch.constant.int 1
    %int0_20490 = torch.constant.int 0
    %22957 = torch.prim.ListConstruct %int1_20489, %int0_20490 : (!torch.int, !torch.int) -> !torch.list<int>
    %22958 = torch.aten.permute %813, %22957 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20491 = torch.constant.int 1
    %int0_20492 = torch.constant.int 0
    %22959 = torch.prim.ListConstruct %int1_20491, %int0_20492 : (!torch.int, !torch.int) -> !torch.list<int>
    %22960 = torch.aten.permute %814, %22959 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_20493 = torch.constant.int 1
    %int0_20494 = torch.constant.int 0
    %22961 = torch.prim.ListConstruct %int1_20493, %int0_20494 : (!torch.int, !torch.int) -> !torch.list<int>
    %22962 = torch.aten.permute %815, %22961 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_20495 = torch.constant.int 4
    %22963 = torch.aten.mul.int %int4_20495, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20496 = torch.constant.int 4096
    %22964 = torch.prim.ListConstruct %22963, %int4096_20496 : (!torch.int, !torch.int) -> !torch.list<int>
    %22965 = torch.aten.view %22939, %22964 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22965, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22966 = torch.aten.mm %22965, %22948 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22966, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20497 = torch.constant.int 4
    %int512_20498 = torch.constant.int 512
    %22967 = torch.prim.ListConstruct %int4_20497, %2482, %int512_20498 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22968 = torch.aten.view %22966, %22967 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20499 = torch.constant.int 4
    %22969 = torch.aten.mul.int %int4_20499, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20500 = torch.constant.int 4096
    %22970 = torch.prim.ListConstruct %22969, %int4096_20500 : (!torch.int, !torch.int) -> !torch.list<int>
    %22971 = torch.aten.view %22940, %22970 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22971, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22972 = torch.aten.mm %22971, %22950 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22972, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20501 = torch.constant.int 4
    %int512_20502 = torch.constant.int 512
    %22973 = torch.prim.ListConstruct %int4_20501, %2482, %int512_20502 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22974 = torch.aten.view %22972, %22973 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20503 = torch.constant.int 4
    %22975 = torch.aten.mul.int %int4_20503, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20504 = torch.constant.int 4096
    %22976 = torch.prim.ListConstruct %22975, %int4096_20504 : (!torch.int, !torch.int) -> !torch.list<int>
    %22977 = torch.aten.view %22941, %22976 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22977, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22978 = torch.aten.mm %22977, %22952 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22978, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20505 = torch.constant.int 4
    %int512_20506 = torch.constant.int 512
    %22979 = torch.prim.ListConstruct %int4_20505, %2482, %int512_20506 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22980 = torch.aten.view %22978, %22979 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20507 = torch.constant.int 4
    %22981 = torch.aten.mul.int %int4_20507, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20508 = torch.constant.int 4096
    %22982 = torch.prim.ListConstruct %22981, %int4096_20508 : (!torch.int, !torch.int) -> !torch.list<int>
    %22983 = torch.aten.view %22942, %22982 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22983, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22984 = torch.aten.mm %22983, %22954 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22984, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20509 = torch.constant.int 4
    %int512_20510 = torch.constant.int 512
    %22985 = torch.prim.ListConstruct %int4_20509, %2482, %int512_20510 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22986 = torch.aten.view %22984, %22985 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20511 = torch.constant.int 4
    %22987 = torch.aten.mul.int %int4_20511, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20512 = torch.constant.int 4096
    %22988 = torch.prim.ListConstruct %22987, %int4096_20512 : (!torch.int, !torch.int) -> !torch.list<int>
    %22989 = torch.aten.view %22943, %22988 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22989, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22990 = torch.aten.mm %22989, %22956 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22990, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20513 = torch.constant.int 4
    %int512_20514 = torch.constant.int 512
    %22991 = torch.prim.ListConstruct %int4_20513, %2482, %int512_20514 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22992 = torch.aten.view %22990, %22991 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20515 = torch.constant.int 4
    %22993 = torch.aten.mul.int %int4_20515, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20516 = torch.constant.int 4096
    %22994 = torch.prim.ListConstruct %22993, %int4096_20516 : (!torch.int, !torch.int) -> !torch.list<int>
    %22995 = torch.aten.view %22944, %22994 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %22995, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %22996 = torch.aten.mm %22995, %22958 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %22996, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20517 = torch.constant.int 4
    %int512_20518 = torch.constant.int 512
    %22997 = torch.prim.ListConstruct %int4_20517, %2482, %int512_20518 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %22998 = torch.aten.view %22996, %22997 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %22998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20519 = torch.constant.int 4
    %22999 = torch.aten.mul.int %int4_20519, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20520 = torch.constant.int 4096
    %23000 = torch.prim.ListConstruct %22999, %int4096_20520 : (!torch.int, !torch.int) -> !torch.list<int>
    %23001 = torch.aten.view %22945, %23000 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23001, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23002 = torch.aten.mm %23001, %22960 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23002, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20521 = torch.constant.int 4
    %int512_20522 = torch.constant.int 512
    %23003 = torch.prim.ListConstruct %int4_20521, %2482, %int512_20522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23004 = torch.aten.view %23002, %23003 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_20523 = torch.constant.int 4
    %23005 = torch.aten.mul.int %int4_20523, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20524 = torch.constant.int 4096
    %23006 = torch.prim.ListConstruct %23005, %int4096_20524 : (!torch.int, !torch.int) -> !torch.list<int>
    %23007 = torch.aten.view %22946, %23006 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23007, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23008 = torch.aten.mm %23007, %22962 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23008, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_20525 = torch.constant.int 4
    %int512_20526 = torch.constant.int 512
    %23009 = torch.prim.ListConstruct %int4_20525, %2482, %int512_20526 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23010 = torch.aten.view %23008, %23009 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_20527 = torch.constant.int 1
    %int0_20528 = torch.constant.int 0
    %23011 = torch.prim.ListConstruct %int1_20527, %int0_20528 : (!torch.int, !torch.int) -> !torch.list<int>
    %23012 = torch.aten.permute %816, %23011 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20529 = torch.constant.int 1
    %int0_20530 = torch.constant.int 0
    %23013 = torch.prim.ListConstruct %int1_20529, %int0_20530 : (!torch.int, !torch.int) -> !torch.list<int>
    %23014 = torch.aten.permute %817, %23013 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20531 = torch.constant.int 1
    %int0_20532 = torch.constant.int 0
    %23015 = torch.prim.ListConstruct %int1_20531, %int0_20532 : (!torch.int, !torch.int) -> !torch.list<int>
    %23016 = torch.aten.permute %818, %23015 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20533 = torch.constant.int 1
    %int0_20534 = torch.constant.int 0
    %23017 = torch.prim.ListConstruct %int1_20533, %int0_20534 : (!torch.int, !torch.int) -> !torch.list<int>
    %23018 = torch.aten.permute %819, %23017 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20535 = torch.constant.int 1
    %int0_20536 = torch.constant.int 0
    %23019 = torch.prim.ListConstruct %int1_20535, %int0_20536 : (!torch.int, !torch.int) -> !torch.list<int>
    %23020 = torch.aten.permute %820, %23019 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20537 = torch.constant.int 1
    %int0_20538 = torch.constant.int 0
    %23021 = torch.prim.ListConstruct %int1_20537, %int0_20538 : (!torch.int, !torch.int) -> !torch.list<int>
    %23022 = torch.aten.permute %821, %23021 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20539 = torch.constant.int 1
    %int0_20540 = torch.constant.int 0
    %23023 = torch.prim.ListConstruct %int1_20539, %int0_20540 : (!torch.int, !torch.int) -> !torch.list<int>
    %23024 = torch.aten.permute %822, %23023 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20541 = torch.constant.int 1
    %int0_20542 = torch.constant.int 0
    %23025 = torch.prim.ListConstruct %int1_20541, %int0_20542 : (!torch.int, !torch.int) -> !torch.list<int>
    %23026 = torch.aten.permute %823, %23025 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_20543 = torch.constant.int 4
    %23027 = torch.aten.mul.int %int4_20543, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20544 = torch.constant.int 4096
    %23028 = torch.prim.ListConstruct %23027, %int4096_20544 : (!torch.int, !torch.int) -> !torch.list<int>
    %23029 = torch.aten.view %22939, %23028 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23029, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23030 = torch.aten.mm %23029, %23012 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23030, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20545 = torch.constant.int 4
    %int128_20546 = torch.constant.int 128
    %23031 = torch.prim.ListConstruct %int4_20545, %2482, %int128_20546 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23032 = torch.aten.view %23030, %23031 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20547 = torch.constant.int 4
    %23033 = torch.aten.mul.int %int4_20547, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20548 = torch.constant.int 4096
    %23034 = torch.prim.ListConstruct %23033, %int4096_20548 : (!torch.int, !torch.int) -> !torch.list<int>
    %23035 = torch.aten.view %22940, %23034 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23035, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23036 = torch.aten.mm %23035, %23014 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23036, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20549 = torch.constant.int 4
    %int128_20550 = torch.constant.int 128
    %23037 = torch.prim.ListConstruct %int4_20549, %2482, %int128_20550 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23038 = torch.aten.view %23036, %23037 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20551 = torch.constant.int 4
    %23039 = torch.aten.mul.int %int4_20551, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20552 = torch.constant.int 4096
    %23040 = torch.prim.ListConstruct %23039, %int4096_20552 : (!torch.int, !torch.int) -> !torch.list<int>
    %23041 = torch.aten.view %22941, %23040 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23041, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23042 = torch.aten.mm %23041, %23016 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23042, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20553 = torch.constant.int 4
    %int128_20554 = torch.constant.int 128
    %23043 = torch.prim.ListConstruct %int4_20553, %2482, %int128_20554 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23044 = torch.aten.view %23042, %23043 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20555 = torch.constant.int 4
    %23045 = torch.aten.mul.int %int4_20555, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20556 = torch.constant.int 4096
    %23046 = torch.prim.ListConstruct %23045, %int4096_20556 : (!torch.int, !torch.int) -> !torch.list<int>
    %23047 = torch.aten.view %22942, %23046 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23047, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23048 = torch.aten.mm %23047, %23018 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23048, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20557 = torch.constant.int 4
    %int128_20558 = torch.constant.int 128
    %23049 = torch.prim.ListConstruct %int4_20557, %2482, %int128_20558 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23050 = torch.aten.view %23048, %23049 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20559 = torch.constant.int 4
    %23051 = torch.aten.mul.int %int4_20559, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20560 = torch.constant.int 4096
    %23052 = torch.prim.ListConstruct %23051, %int4096_20560 : (!torch.int, !torch.int) -> !torch.list<int>
    %23053 = torch.aten.view %22943, %23052 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23053, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23054 = torch.aten.mm %23053, %23020 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23054, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20561 = torch.constant.int 4
    %int128_20562 = torch.constant.int 128
    %23055 = torch.prim.ListConstruct %int4_20561, %2482, %int128_20562 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23056 = torch.aten.view %23054, %23055 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20563 = torch.constant.int 4
    %23057 = torch.aten.mul.int %int4_20563, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20564 = torch.constant.int 4096
    %23058 = torch.prim.ListConstruct %23057, %int4096_20564 : (!torch.int, !torch.int) -> !torch.list<int>
    %23059 = torch.aten.view %22944, %23058 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23059, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23060 = torch.aten.mm %23059, %23022 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23060, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20565 = torch.constant.int 4
    %int128_20566 = torch.constant.int 128
    %23061 = torch.prim.ListConstruct %int4_20565, %2482, %int128_20566 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23062 = torch.aten.view %23060, %23061 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20567 = torch.constant.int 4
    %23063 = torch.aten.mul.int %int4_20567, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20568 = torch.constant.int 4096
    %23064 = torch.prim.ListConstruct %23063, %int4096_20568 : (!torch.int, !torch.int) -> !torch.list<int>
    %23065 = torch.aten.view %22945, %23064 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23065, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23066 = torch.aten.mm %23065, %23024 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23066, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20569 = torch.constant.int 4
    %int128_20570 = torch.constant.int 128
    %23067 = torch.prim.ListConstruct %int4_20569, %2482, %int128_20570 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23068 = torch.aten.view %23066, %23067 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20571 = torch.constant.int 4
    %23069 = torch.aten.mul.int %int4_20571, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20572 = torch.constant.int 4096
    %23070 = torch.prim.ListConstruct %23069, %int4096_20572 : (!torch.int, !torch.int) -> !torch.list<int>
    %23071 = torch.aten.view %22946, %23070 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23071, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23072 = torch.aten.mm %23071, %23026 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23072, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20573 = torch.constant.int 4
    %int128_20574 = torch.constant.int 128
    %23073 = torch.prim.ListConstruct %int4_20573, %2482, %int128_20574 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23074 = torch.aten.view %23072, %23073 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_20575 = torch.constant.int 1
    %int0_20576 = torch.constant.int 0
    %23075 = torch.prim.ListConstruct %int1_20575, %int0_20576 : (!torch.int, !torch.int) -> !torch.list<int>
    %23076 = torch.aten.permute %824, %23075 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20577 = torch.constant.int 1
    %int0_20578 = torch.constant.int 0
    %23077 = torch.prim.ListConstruct %int1_20577, %int0_20578 : (!torch.int, !torch.int) -> !torch.list<int>
    %23078 = torch.aten.permute %825, %23077 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20579 = torch.constant.int 1
    %int0_20580 = torch.constant.int 0
    %23079 = torch.prim.ListConstruct %int1_20579, %int0_20580 : (!torch.int, !torch.int) -> !torch.list<int>
    %23080 = torch.aten.permute %826, %23079 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20581 = torch.constant.int 1
    %int0_20582 = torch.constant.int 0
    %23081 = torch.prim.ListConstruct %int1_20581, %int0_20582 : (!torch.int, !torch.int) -> !torch.list<int>
    %23082 = torch.aten.permute %827, %23081 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20583 = torch.constant.int 1
    %int0_20584 = torch.constant.int 0
    %23083 = torch.prim.ListConstruct %int1_20583, %int0_20584 : (!torch.int, !torch.int) -> !torch.list<int>
    %23084 = torch.aten.permute %828, %23083 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20585 = torch.constant.int 1
    %int0_20586 = torch.constant.int 0
    %23085 = torch.prim.ListConstruct %int1_20585, %int0_20586 : (!torch.int, !torch.int) -> !torch.list<int>
    %23086 = torch.aten.permute %829, %23085 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20587 = torch.constant.int 1
    %int0_20588 = torch.constant.int 0
    %23087 = torch.prim.ListConstruct %int1_20587, %int0_20588 : (!torch.int, !torch.int) -> !torch.list<int>
    %23088 = torch.aten.permute %830, %23087 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_20589 = torch.constant.int 1
    %int0_20590 = torch.constant.int 0
    %23089 = torch.prim.ListConstruct %int1_20589, %int0_20590 : (!torch.int, !torch.int) -> !torch.list<int>
    %23090 = torch.aten.permute %831, %23089 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_20591 = torch.constant.int 4
    %23091 = torch.aten.mul.int %int4_20591, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20592 = torch.constant.int 4096
    %23092 = torch.prim.ListConstruct %23091, %int4096_20592 : (!torch.int, !torch.int) -> !torch.list<int>
    %23093 = torch.aten.view %22939, %23092 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23093, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23094 = torch.aten.mm %23093, %23076 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23094, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20593 = torch.constant.int 4
    %int128_20594 = torch.constant.int 128
    %23095 = torch.prim.ListConstruct %int4_20593, %2482, %int128_20594 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23096 = torch.aten.view %23094, %23095 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20595 = torch.constant.int 4
    %23097 = torch.aten.mul.int %int4_20595, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20596 = torch.constant.int 4096
    %23098 = torch.prim.ListConstruct %23097, %int4096_20596 : (!torch.int, !torch.int) -> !torch.list<int>
    %23099 = torch.aten.view %22940, %23098 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23099, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23100 = torch.aten.mm %23099, %23078 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23100, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20597 = torch.constant.int 4
    %int128_20598 = torch.constant.int 128
    %23101 = torch.prim.ListConstruct %int4_20597, %2482, %int128_20598 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23102 = torch.aten.view %23100, %23101 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20599 = torch.constant.int 4
    %23103 = torch.aten.mul.int %int4_20599, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20600 = torch.constant.int 4096
    %23104 = torch.prim.ListConstruct %23103, %int4096_20600 : (!torch.int, !torch.int) -> !torch.list<int>
    %23105 = torch.aten.view %22941, %23104 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23105, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23106 = torch.aten.mm %23105, %23080 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23106, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20601 = torch.constant.int 4
    %int128_20602 = torch.constant.int 128
    %23107 = torch.prim.ListConstruct %int4_20601, %2482, %int128_20602 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23108 = torch.aten.view %23106, %23107 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20603 = torch.constant.int 4
    %23109 = torch.aten.mul.int %int4_20603, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20604 = torch.constant.int 4096
    %23110 = torch.prim.ListConstruct %23109, %int4096_20604 : (!torch.int, !torch.int) -> !torch.list<int>
    %23111 = torch.aten.view %22942, %23110 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23111, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23112 = torch.aten.mm %23111, %23082 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23112, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20605 = torch.constant.int 4
    %int128_20606 = torch.constant.int 128
    %23113 = torch.prim.ListConstruct %int4_20605, %2482, %int128_20606 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23114 = torch.aten.view %23112, %23113 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20607 = torch.constant.int 4
    %23115 = torch.aten.mul.int %int4_20607, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20608 = torch.constant.int 4096
    %23116 = torch.prim.ListConstruct %23115, %int4096_20608 : (!torch.int, !torch.int) -> !torch.list<int>
    %23117 = torch.aten.view %22943, %23116 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23117, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23118 = torch.aten.mm %23117, %23084 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23118, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20609 = torch.constant.int 4
    %int128_20610 = torch.constant.int 128
    %23119 = torch.prim.ListConstruct %int4_20609, %2482, %int128_20610 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23120 = torch.aten.view %23118, %23119 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20611 = torch.constant.int 4
    %23121 = torch.aten.mul.int %int4_20611, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20612 = torch.constant.int 4096
    %23122 = torch.prim.ListConstruct %23121, %int4096_20612 : (!torch.int, !torch.int) -> !torch.list<int>
    %23123 = torch.aten.view %22944, %23122 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23123, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23124 = torch.aten.mm %23123, %23086 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23124, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20613 = torch.constant.int 4
    %int128_20614 = torch.constant.int 128
    %23125 = torch.prim.ListConstruct %int4_20613, %2482, %int128_20614 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23126 = torch.aten.view %23124, %23125 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20615 = torch.constant.int 4
    %23127 = torch.aten.mul.int %int4_20615, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20616 = torch.constant.int 4096
    %23128 = torch.prim.ListConstruct %23127, %int4096_20616 : (!torch.int, !torch.int) -> !torch.list<int>
    %23129 = torch.aten.view %22945, %23128 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23129, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23130 = torch.aten.mm %23129, %23088 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23130, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20617 = torch.constant.int 4
    %int128_20618 = torch.constant.int 128
    %23131 = torch.prim.ListConstruct %int4_20617, %2482, %int128_20618 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23132 = torch.aten.view %23130, %23131 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20619 = torch.constant.int 4
    %23133 = torch.aten.mul.int %int4_20619, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_20620 = torch.constant.int 4096
    %23134 = torch.prim.ListConstruct %23133, %int4096_20620 : (!torch.int, !torch.int) -> !torch.list<int>
    %23135 = torch.aten.view %22946, %23134 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23135, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %23136 = torch.aten.mm %23135, %23090 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %23136, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_20621 = torch.constant.int 4
    %int128_20622 = torch.constant.int 128
    %23137 = torch.prim.ListConstruct %int4_20621, %2482, %int128_20622 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23138 = torch.aten.view %23136, %23137 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %23138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_20623 = torch.constant.int 4
    %int4_20624 = torch.constant.int 4
    %int128_20625 = torch.constant.int 128
    %23139 = torch.prim.ListConstruct %int4_20623, %2482, %int4_20624, %int128_20625 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23140 = torch.aten.view %22968, %23139 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20626 = torch.constant.int 4
    %int4_20627 = torch.constant.int 4
    %int128_20628 = torch.constant.int 128
    %23141 = torch.prim.ListConstruct %int4_20626, %2482, %int4_20627, %int128_20628 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23142 = torch.aten.view %22974, %23141 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20629 = torch.constant.int 4
    %int4_20630 = torch.constant.int 4
    %int128_20631 = torch.constant.int 128
    %23143 = torch.prim.ListConstruct %int4_20629, %2482, %int4_20630, %int128_20631 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23144 = torch.aten.view %22980, %23143 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20632 = torch.constant.int 4
    %int4_20633 = torch.constant.int 4
    %int128_20634 = torch.constant.int 128
    %23145 = torch.prim.ListConstruct %int4_20632, %2482, %int4_20633, %int128_20634 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23146 = torch.aten.view %22986, %23145 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20635 = torch.constant.int 4
    %int4_20636 = torch.constant.int 4
    %int128_20637 = torch.constant.int 128
    %23147 = torch.prim.ListConstruct %int4_20635, %2482, %int4_20636, %int128_20637 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23148 = torch.aten.view %22992, %23147 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20638 = torch.constant.int 4
    %int4_20639 = torch.constant.int 4
    %int128_20640 = torch.constant.int 128
    %23149 = torch.prim.ListConstruct %int4_20638, %2482, %int4_20639, %int128_20640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23150 = torch.aten.view %22998, %23149 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20641 = torch.constant.int 4
    %int4_20642 = torch.constant.int 4
    %int128_20643 = torch.constant.int 128
    %23151 = torch.prim.ListConstruct %int4_20641, %2482, %int4_20642, %int128_20643 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23152 = torch.aten.view %23004, %23151 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20644 = torch.constant.int 4
    %int4_20645 = torch.constant.int 4
    %int128_20646 = torch.constant.int 128
    %23153 = torch.prim.ListConstruct %int4_20644, %2482, %int4_20645, %int128_20646 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23154 = torch.aten.view %23010, %23153 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_20647 = torch.constant.int 4
    %int1_20648 = torch.constant.int 1
    %int128_20649 = torch.constant.int 128
    %23155 = torch.prim.ListConstruct %int4_20647, %2482, %int1_20648, %int128_20649 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23156 = torch.aten.view %23032, %23155 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20650 = torch.constant.int 4
    %int1_20651 = torch.constant.int 1
    %int128_20652 = torch.constant.int 128
    %23157 = torch.prim.ListConstruct %int4_20650, %2482, %int1_20651, %int128_20652 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23158 = torch.aten.view %23038, %23157 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20653 = torch.constant.int 4
    %int1_20654 = torch.constant.int 1
    %int128_20655 = torch.constant.int 128
    %23159 = torch.prim.ListConstruct %int4_20653, %2482, %int1_20654, %int128_20655 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23160 = torch.aten.view %23044, %23159 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20656 = torch.constant.int 4
    %int1_20657 = torch.constant.int 1
    %int128_20658 = torch.constant.int 128
    %23161 = torch.prim.ListConstruct %int4_20656, %2482, %int1_20657, %int128_20658 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23162 = torch.aten.view %23050, %23161 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20659 = torch.constant.int 4
    %int1_20660 = torch.constant.int 1
    %int128_20661 = torch.constant.int 128
    %23163 = torch.prim.ListConstruct %int4_20659, %2482, %int1_20660, %int128_20661 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23164 = torch.aten.view %23056, %23163 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20662 = torch.constant.int 4
    %int1_20663 = torch.constant.int 1
    %int128_20664 = torch.constant.int 128
    %23165 = torch.prim.ListConstruct %int4_20662, %2482, %int1_20663, %int128_20664 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23166 = torch.aten.view %23062, %23165 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20665 = torch.constant.int 4
    %int1_20666 = torch.constant.int 1
    %int128_20667 = torch.constant.int 128
    %23167 = torch.prim.ListConstruct %int4_20665, %2482, %int1_20666, %int128_20667 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23168 = torch.aten.view %23068, %23167 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20668 = torch.constant.int 4
    %int1_20669 = torch.constant.int 1
    %int128_20670 = torch.constant.int 128
    %23169 = torch.prim.ListConstruct %int4_20668, %2482, %int1_20669, %int128_20670 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23170 = torch.aten.view %23074, %23169 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20671 = torch.constant.int 4
    %int1_20672 = torch.constant.int 1
    %int128_20673 = torch.constant.int 128
    %23171 = torch.prim.ListConstruct %int4_20671, %2482, %int1_20672, %int128_20673 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23172 = torch.aten.view %23096, %23171 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20674 = torch.constant.int 4
    %int1_20675 = torch.constant.int 1
    %int128_20676 = torch.constant.int 128
    %23173 = torch.prim.ListConstruct %int4_20674, %2482, %int1_20675, %int128_20676 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23174 = torch.aten.view %23102, %23173 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20677 = torch.constant.int 4
    %int1_20678 = torch.constant.int 1
    %int128_20679 = torch.constant.int 128
    %23175 = torch.prim.ListConstruct %int4_20677, %2482, %int1_20678, %int128_20679 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23176 = torch.aten.view %23108, %23175 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20680 = torch.constant.int 4
    %int1_20681 = torch.constant.int 1
    %int128_20682 = torch.constant.int 128
    %23177 = torch.prim.ListConstruct %int4_20680, %2482, %int1_20681, %int128_20682 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23178 = torch.aten.view %23114, %23177 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20683 = torch.constant.int 4
    %int1_20684 = torch.constant.int 1
    %int128_20685 = torch.constant.int 128
    %23179 = torch.prim.ListConstruct %int4_20683, %2482, %int1_20684, %int128_20685 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23180 = torch.aten.view %23120, %23179 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20686 = torch.constant.int 4
    %int1_20687 = torch.constant.int 1
    %int128_20688 = torch.constant.int 128
    %23181 = torch.prim.ListConstruct %int4_20686, %2482, %int1_20687, %int128_20688 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23182 = torch.aten.view %23126, %23181 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20689 = torch.constant.int 4
    %int1_20690 = torch.constant.int 1
    %int128_20691 = torch.constant.int 128
    %23183 = torch.prim.ListConstruct %int4_20689, %2482, %int1_20690, %int128_20691 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23184 = torch.aten.view %23132, %23183 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_20692 = torch.constant.int 4
    %int1_20693 = torch.constant.int 1
    %int128_20694 = torch.constant.int 128
    %23185 = torch.prim.ListConstruct %int4_20692, %2482, %int1_20693, %int128_20694 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23186 = torch.aten.view %23138, %23185 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_20695 = torch.constant.int 131072
    %none_20696 = torch.constant.none
    %none_20697 = torch.constant.none
    %cpu_20698 = torch.constant.device "cpu"
    %false_20699 = torch.constant.bool false
    %23187 = torch.aten.arange %int131072_20695, %none_20696, %none_20697, %cpu_20698, %false_20699 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_20700 = torch.constant.int 0
    %int128_20701 = torch.constant.int 128
    %int2_20702 = torch.constant.int 2
    %none_20703 = torch.constant.none
    %none_20704 = torch.constant.none
    %cpu_20705 = torch.constant.device "cpu"
    %false_20706 = torch.constant.bool false
    %23188 = torch.aten.arange.start_step %int0_20700, %int128_20701, %int2_20702, %none_20703, %none_20704, %cpu_20705, %false_20706 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_20707 = torch.constant.int 0
    %int0_20708 = torch.constant.int 0
    %int64_20709 = torch.constant.int 64
    %int1_20710 = torch.constant.int 1
    %23189 = torch.aten.slice.Tensor %23188, %int0_20707, %int0_20708, %int64_20709, %int1_20710 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_20711 = torch.constant.int 6
    %23190 = torch.prims.convert_element_type %23189, %int6_20711 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_20712 = torch.constant.int 128
    %23191 = torch.aten.div.Scalar %23190, %int128_20712 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_20713 = torch.constant.float 5.000000e+05
    %23192 = torch.aten.pow.Scalar %float5.000000e05_20713, %23191 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %23193 = torch.aten.reciprocal %23192 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_20714 = torch.constant.float 1.000000e+00
    %23194 = torch.aten.mul.Scalar %23193, %float1.000000e00_20714 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_20715 = torch.constant.int 131072
    %int1_20716 = torch.constant.int 1
    %23195 = torch.prim.ListConstruct %int131072_20715, %int1_20716 : (!torch.int, !torch.int) -> !torch.list<int>
    %23196 = torch.aten.view %23187, %23195 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %23197 = torch.aten.mul.Tensor %23196, %23194 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %23198 = torch.aten.cos %23197 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %23199 = torch.aten.sin %23197 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %23200 = torch.aten.complex %23198, %23199 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %23201 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23202 = flow.tensor.transfer %23201 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %23203 = torch_c.from_builtin_tensor %23202 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23204 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23205 = flow.tensor.transfer %23204 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %23206 = torch_c.from_builtin_tensor %23205 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23207 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23208 = flow.tensor.transfer %23207 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %23209 = torch_c.from_builtin_tensor %23208 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23210 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23211 = flow.tensor.transfer %23210 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %23212 = torch_c.from_builtin_tensor %23211 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23213 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23214 = flow.tensor.transfer %23213 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %23215 = torch_c.from_builtin_tensor %23214 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23216 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23217 = flow.tensor.transfer %23216 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %23218 = torch_c.from_builtin_tensor %23217 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23219 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23220 = flow.tensor.transfer %23219 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %23221 = torch_c.from_builtin_tensor %23220 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23222 = torch_c.to_builtin_tensor %23200 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23223 = flow.tensor.transfer %23222 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %23224 = torch_c.from_builtin_tensor %23223 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_20717 = torch.constant.int 1
    %23225 = torch.aten.size.int %22968, %int1_20717 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20718 = torch.constant.int 0
    %23226 = torch.aten.add.int %int0_20718, %23225 : !torch.int, !torch.int -> !torch.int
    %int0_20719 = torch.constant.int 0
    %int0_20720 = torch.constant.int 0
    %int1_20721 = torch.constant.int 1
    %23227 = torch.aten.slice.Tensor %23203, %int0_20719, %int0_20720, %23226, %int1_20721 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23227, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20722 = torch.constant.int 1
    %int0_20723 = torch.constant.int 0
    %int9223372036854775807_20724 = torch.constant.int 9223372036854775807
    %int1_20725 = torch.constant.int 1
    %23228 = torch.aten.slice.Tensor %23227, %int1_20722, %int0_20723, %int9223372036854775807_20724, %int1_20725 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23228, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20726 = torch.constant.int 0
    %23229 = torch.aten.unsqueeze %23228, %int0_20726 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23229, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20727 = torch.constant.int 2
    %23230 = torch.aten.unsqueeze %23229, %int2_20727 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23230, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20728 = torch.constant.int 3
    %int0_20729 = torch.constant.int 0
    %int9223372036854775807_20730 = torch.constant.int 9223372036854775807
    %int1_20731 = torch.constant.int 1
    %23231 = torch.aten.slice.Tensor %23230, %int3_20728, %int0_20729, %int9223372036854775807_20730, %int1_20731 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23231, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23232 = torch_c.to_builtin_tensor %23140 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20732 = arith.constant 1 : index
    %dim_20733 = tensor.dim %23232, %c1_20732 : tensor<4x?x4x128xf16>
    %23233 = flow.tensor.bitcast %23232 : tensor<4x?x4x128xf16>{%dim_20733} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20733}
    %23234 = torch_c.from_builtin_tensor %23233 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23235 = torch.aten.mul.Tensor %23234, %23231 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23236 = torch_c.to_builtin_tensor %23235 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20734 = arith.constant 1 : index
    %dim_20735 = tensor.dim %23236, %c1_20734 : tensor<4x?x4x64xcomplex<f32>>
    %23237 = flow.tensor.bitcast %23236 : tensor<4x?x4x64xcomplex<f32>>{%dim_20735} -> tensor<4x?x4x128xf32>{%dim_20735}
    %23238 = torch_c.from_builtin_tensor %23237 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20736 = torch.constant.int 5
    %23239 = torch.prims.convert_element_type %23238, %int5_20736 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20737 = torch.constant.int 1
    %23240 = torch.aten.size.int %22974, %int1_20737 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20738 = torch.constant.int 0
    %23241 = torch.aten.add.int %int0_20738, %23240 : !torch.int, !torch.int -> !torch.int
    %int0_20739 = torch.constant.int 0
    %int0_20740 = torch.constant.int 0
    %int1_20741 = torch.constant.int 1
    %23242 = torch.aten.slice.Tensor %23206, %int0_20739, %int0_20740, %23241, %int1_20741 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23242, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20742 = torch.constant.int 1
    %int0_20743 = torch.constant.int 0
    %int9223372036854775807_20744 = torch.constant.int 9223372036854775807
    %int1_20745 = torch.constant.int 1
    %23243 = torch.aten.slice.Tensor %23242, %int1_20742, %int0_20743, %int9223372036854775807_20744, %int1_20745 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23243, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20746 = torch.constant.int 0
    %23244 = torch.aten.unsqueeze %23243, %int0_20746 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23244, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20747 = torch.constant.int 2
    %23245 = torch.aten.unsqueeze %23244, %int2_20747 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23245, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20748 = torch.constant.int 3
    %int0_20749 = torch.constant.int 0
    %int9223372036854775807_20750 = torch.constant.int 9223372036854775807
    %int1_20751 = torch.constant.int 1
    %23246 = torch.aten.slice.Tensor %23245, %int3_20748, %int0_20749, %int9223372036854775807_20750, %int1_20751 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23246, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23247 = torch_c.to_builtin_tensor %23142 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20752 = arith.constant 1 : index
    %dim_20753 = tensor.dim %23247, %c1_20752 : tensor<4x?x4x128xf16>
    %23248 = flow.tensor.bitcast %23247 : tensor<4x?x4x128xf16>{%dim_20753} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20753}
    %23249 = torch_c.from_builtin_tensor %23248 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23250 = torch.aten.mul.Tensor %23249, %23246 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23251 = torch_c.to_builtin_tensor %23250 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20754 = arith.constant 1 : index
    %dim_20755 = tensor.dim %23251, %c1_20754 : tensor<4x?x4x64xcomplex<f32>>
    %23252 = flow.tensor.bitcast %23251 : tensor<4x?x4x64xcomplex<f32>>{%dim_20755} -> tensor<4x?x4x128xf32>{%dim_20755}
    %23253 = torch_c.from_builtin_tensor %23252 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20756 = torch.constant.int 5
    %23254 = torch.prims.convert_element_type %23253, %int5_20756 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20757 = torch.constant.int 1
    %23255 = torch.aten.size.int %22980, %int1_20757 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20758 = torch.constant.int 0
    %23256 = torch.aten.add.int %int0_20758, %23255 : !torch.int, !torch.int -> !torch.int
    %int0_20759 = torch.constant.int 0
    %int0_20760 = torch.constant.int 0
    %int1_20761 = torch.constant.int 1
    %23257 = torch.aten.slice.Tensor %23209, %int0_20759, %int0_20760, %23256, %int1_20761 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23257, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20762 = torch.constant.int 1
    %int0_20763 = torch.constant.int 0
    %int9223372036854775807_20764 = torch.constant.int 9223372036854775807
    %int1_20765 = torch.constant.int 1
    %23258 = torch.aten.slice.Tensor %23257, %int1_20762, %int0_20763, %int9223372036854775807_20764, %int1_20765 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23258, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20766 = torch.constant.int 0
    %23259 = torch.aten.unsqueeze %23258, %int0_20766 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23259, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20767 = torch.constant.int 2
    %23260 = torch.aten.unsqueeze %23259, %int2_20767 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23260, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20768 = torch.constant.int 3
    %int0_20769 = torch.constant.int 0
    %int9223372036854775807_20770 = torch.constant.int 9223372036854775807
    %int1_20771 = torch.constant.int 1
    %23261 = torch.aten.slice.Tensor %23260, %int3_20768, %int0_20769, %int9223372036854775807_20770, %int1_20771 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23261, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23262 = torch_c.to_builtin_tensor %23144 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20772 = arith.constant 1 : index
    %dim_20773 = tensor.dim %23262, %c1_20772 : tensor<4x?x4x128xf16>
    %23263 = flow.tensor.bitcast %23262 : tensor<4x?x4x128xf16>{%dim_20773} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20773}
    %23264 = torch_c.from_builtin_tensor %23263 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23265 = torch.aten.mul.Tensor %23264, %23261 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23266 = torch_c.to_builtin_tensor %23265 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20774 = arith.constant 1 : index
    %dim_20775 = tensor.dim %23266, %c1_20774 : tensor<4x?x4x64xcomplex<f32>>
    %23267 = flow.tensor.bitcast %23266 : tensor<4x?x4x64xcomplex<f32>>{%dim_20775} -> tensor<4x?x4x128xf32>{%dim_20775}
    %23268 = torch_c.from_builtin_tensor %23267 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20776 = torch.constant.int 5
    %23269 = torch.prims.convert_element_type %23268, %int5_20776 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20777 = torch.constant.int 1
    %23270 = torch.aten.size.int %22986, %int1_20777 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20778 = torch.constant.int 0
    %23271 = torch.aten.add.int %int0_20778, %23270 : !torch.int, !torch.int -> !torch.int
    %int0_20779 = torch.constant.int 0
    %int0_20780 = torch.constant.int 0
    %int1_20781 = torch.constant.int 1
    %23272 = torch.aten.slice.Tensor %23212, %int0_20779, %int0_20780, %23271, %int1_20781 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23272, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20782 = torch.constant.int 1
    %int0_20783 = torch.constant.int 0
    %int9223372036854775807_20784 = torch.constant.int 9223372036854775807
    %int1_20785 = torch.constant.int 1
    %23273 = torch.aten.slice.Tensor %23272, %int1_20782, %int0_20783, %int9223372036854775807_20784, %int1_20785 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23273, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20786 = torch.constant.int 0
    %23274 = torch.aten.unsqueeze %23273, %int0_20786 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23274, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20787 = torch.constant.int 2
    %23275 = torch.aten.unsqueeze %23274, %int2_20787 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23275, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20788 = torch.constant.int 3
    %int0_20789 = torch.constant.int 0
    %int9223372036854775807_20790 = torch.constant.int 9223372036854775807
    %int1_20791 = torch.constant.int 1
    %23276 = torch.aten.slice.Tensor %23275, %int3_20788, %int0_20789, %int9223372036854775807_20790, %int1_20791 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23276, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23277 = torch_c.to_builtin_tensor %23146 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20792 = arith.constant 1 : index
    %dim_20793 = tensor.dim %23277, %c1_20792 : tensor<4x?x4x128xf16>
    %23278 = flow.tensor.bitcast %23277 : tensor<4x?x4x128xf16>{%dim_20793} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20793}
    %23279 = torch_c.from_builtin_tensor %23278 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23280 = torch.aten.mul.Tensor %23279, %23276 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23281 = torch_c.to_builtin_tensor %23280 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20794 = arith.constant 1 : index
    %dim_20795 = tensor.dim %23281, %c1_20794 : tensor<4x?x4x64xcomplex<f32>>
    %23282 = flow.tensor.bitcast %23281 : tensor<4x?x4x64xcomplex<f32>>{%dim_20795} -> tensor<4x?x4x128xf32>{%dim_20795}
    %23283 = torch_c.from_builtin_tensor %23282 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20796 = torch.constant.int 5
    %23284 = torch.prims.convert_element_type %23283, %int5_20796 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20797 = torch.constant.int 1
    %23285 = torch.aten.size.int %22992, %int1_20797 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20798 = torch.constant.int 0
    %23286 = torch.aten.add.int %int0_20798, %23285 : !torch.int, !torch.int -> !torch.int
    %int0_20799 = torch.constant.int 0
    %int0_20800 = torch.constant.int 0
    %int1_20801 = torch.constant.int 1
    %23287 = torch.aten.slice.Tensor %23215, %int0_20799, %int0_20800, %23286, %int1_20801 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23287, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20802 = torch.constant.int 1
    %int0_20803 = torch.constant.int 0
    %int9223372036854775807_20804 = torch.constant.int 9223372036854775807
    %int1_20805 = torch.constant.int 1
    %23288 = torch.aten.slice.Tensor %23287, %int1_20802, %int0_20803, %int9223372036854775807_20804, %int1_20805 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23288, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20806 = torch.constant.int 0
    %23289 = torch.aten.unsqueeze %23288, %int0_20806 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23289, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20807 = torch.constant.int 2
    %23290 = torch.aten.unsqueeze %23289, %int2_20807 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23290, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20808 = torch.constant.int 3
    %int0_20809 = torch.constant.int 0
    %int9223372036854775807_20810 = torch.constant.int 9223372036854775807
    %int1_20811 = torch.constant.int 1
    %23291 = torch.aten.slice.Tensor %23290, %int3_20808, %int0_20809, %int9223372036854775807_20810, %int1_20811 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23291, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23292 = torch_c.to_builtin_tensor %23148 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20812 = arith.constant 1 : index
    %dim_20813 = tensor.dim %23292, %c1_20812 : tensor<4x?x4x128xf16>
    %23293 = flow.tensor.bitcast %23292 : tensor<4x?x4x128xf16>{%dim_20813} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20813}
    %23294 = torch_c.from_builtin_tensor %23293 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23295 = torch.aten.mul.Tensor %23294, %23291 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23296 = torch_c.to_builtin_tensor %23295 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20814 = arith.constant 1 : index
    %dim_20815 = tensor.dim %23296, %c1_20814 : tensor<4x?x4x64xcomplex<f32>>
    %23297 = flow.tensor.bitcast %23296 : tensor<4x?x4x64xcomplex<f32>>{%dim_20815} -> tensor<4x?x4x128xf32>{%dim_20815}
    %23298 = torch_c.from_builtin_tensor %23297 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20816 = torch.constant.int 5
    %23299 = torch.prims.convert_element_type %23298, %int5_20816 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20817 = torch.constant.int 1
    %23300 = torch.aten.size.int %22998, %int1_20817 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20818 = torch.constant.int 0
    %23301 = torch.aten.add.int %int0_20818, %23300 : !torch.int, !torch.int -> !torch.int
    %int0_20819 = torch.constant.int 0
    %int0_20820 = torch.constant.int 0
    %int1_20821 = torch.constant.int 1
    %23302 = torch.aten.slice.Tensor %23218, %int0_20819, %int0_20820, %23301, %int1_20821 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23302, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20822 = torch.constant.int 1
    %int0_20823 = torch.constant.int 0
    %int9223372036854775807_20824 = torch.constant.int 9223372036854775807
    %int1_20825 = torch.constant.int 1
    %23303 = torch.aten.slice.Tensor %23302, %int1_20822, %int0_20823, %int9223372036854775807_20824, %int1_20825 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23303, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20826 = torch.constant.int 0
    %23304 = torch.aten.unsqueeze %23303, %int0_20826 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23304, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20827 = torch.constant.int 2
    %23305 = torch.aten.unsqueeze %23304, %int2_20827 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23305, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20828 = torch.constant.int 3
    %int0_20829 = torch.constant.int 0
    %int9223372036854775807_20830 = torch.constant.int 9223372036854775807
    %int1_20831 = torch.constant.int 1
    %23306 = torch.aten.slice.Tensor %23305, %int3_20828, %int0_20829, %int9223372036854775807_20830, %int1_20831 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23306, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23307 = torch_c.to_builtin_tensor %23150 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20832 = arith.constant 1 : index
    %dim_20833 = tensor.dim %23307, %c1_20832 : tensor<4x?x4x128xf16>
    %23308 = flow.tensor.bitcast %23307 : tensor<4x?x4x128xf16>{%dim_20833} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20833}
    %23309 = torch_c.from_builtin_tensor %23308 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23310 = torch.aten.mul.Tensor %23309, %23306 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23311 = torch_c.to_builtin_tensor %23310 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20834 = arith.constant 1 : index
    %dim_20835 = tensor.dim %23311, %c1_20834 : tensor<4x?x4x64xcomplex<f32>>
    %23312 = flow.tensor.bitcast %23311 : tensor<4x?x4x64xcomplex<f32>>{%dim_20835} -> tensor<4x?x4x128xf32>{%dim_20835}
    %23313 = torch_c.from_builtin_tensor %23312 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20836 = torch.constant.int 5
    %23314 = torch.prims.convert_element_type %23313, %int5_20836 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20837 = torch.constant.int 1
    %23315 = torch.aten.size.int %23004, %int1_20837 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20838 = torch.constant.int 0
    %23316 = torch.aten.add.int %int0_20838, %23315 : !torch.int, !torch.int -> !torch.int
    %int0_20839 = torch.constant.int 0
    %int0_20840 = torch.constant.int 0
    %int1_20841 = torch.constant.int 1
    %23317 = torch.aten.slice.Tensor %23221, %int0_20839, %int0_20840, %23316, %int1_20841 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23317, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20842 = torch.constant.int 1
    %int0_20843 = torch.constant.int 0
    %int9223372036854775807_20844 = torch.constant.int 9223372036854775807
    %int1_20845 = torch.constant.int 1
    %23318 = torch.aten.slice.Tensor %23317, %int1_20842, %int0_20843, %int9223372036854775807_20844, %int1_20845 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23318, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20846 = torch.constant.int 0
    %23319 = torch.aten.unsqueeze %23318, %int0_20846 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23319, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20847 = torch.constant.int 2
    %23320 = torch.aten.unsqueeze %23319, %int2_20847 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23320, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20848 = torch.constant.int 3
    %int0_20849 = torch.constant.int 0
    %int9223372036854775807_20850 = torch.constant.int 9223372036854775807
    %int1_20851 = torch.constant.int 1
    %23321 = torch.aten.slice.Tensor %23320, %int3_20848, %int0_20849, %int9223372036854775807_20850, %int1_20851 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23321, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23322 = torch_c.to_builtin_tensor %23152 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20852 = arith.constant 1 : index
    %dim_20853 = tensor.dim %23322, %c1_20852 : tensor<4x?x4x128xf16>
    %23323 = flow.tensor.bitcast %23322 : tensor<4x?x4x128xf16>{%dim_20853} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20853}
    %23324 = torch_c.from_builtin_tensor %23323 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23325 = torch.aten.mul.Tensor %23324, %23321 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23326 = torch_c.to_builtin_tensor %23325 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20854 = arith.constant 1 : index
    %dim_20855 = tensor.dim %23326, %c1_20854 : tensor<4x?x4x64xcomplex<f32>>
    %23327 = flow.tensor.bitcast %23326 : tensor<4x?x4x64xcomplex<f32>>{%dim_20855} -> tensor<4x?x4x128xf32>{%dim_20855}
    %23328 = torch_c.from_builtin_tensor %23327 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20856 = torch.constant.int 5
    %23329 = torch.prims.convert_element_type %23328, %int5_20856 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_20857 = torch.constant.int 1
    %23330 = torch.aten.size.int %23010, %int1_20857 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_20858 = torch.constant.int 0
    %23331 = torch.aten.add.int %int0_20858, %23330 : !torch.int, !torch.int -> !torch.int
    %int0_20859 = torch.constant.int 0
    %int0_20860 = torch.constant.int 0
    %int1_20861 = torch.constant.int 1
    %23332 = torch.aten.slice.Tensor %23224, %int0_20859, %int0_20860, %23331, %int1_20861 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23332, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20862 = torch.constant.int 1
    %int0_20863 = torch.constant.int 0
    %int9223372036854775807_20864 = torch.constant.int 9223372036854775807
    %int1_20865 = torch.constant.int 1
    %23333 = torch.aten.slice.Tensor %23332, %int1_20862, %int0_20863, %int9223372036854775807_20864, %int1_20865 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23333, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20866 = torch.constant.int 0
    %23334 = torch.aten.unsqueeze %23333, %int0_20866 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23334, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20867 = torch.constant.int 2
    %23335 = torch.aten.unsqueeze %23334, %int2_20867 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23335, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20868 = torch.constant.int 3
    %int0_20869 = torch.constant.int 0
    %int9223372036854775807_20870 = torch.constant.int 9223372036854775807
    %int1_20871 = torch.constant.int 1
    %23336 = torch.aten.slice.Tensor %23335, %int3_20868, %int0_20869, %int9223372036854775807_20870, %int1_20871 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23336, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23337 = torch_c.to_builtin_tensor %23154 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_20872 = arith.constant 1 : index
    %dim_20873 = tensor.dim %23337, %c1_20872 : tensor<4x?x4x128xf16>
    %23338 = flow.tensor.bitcast %23337 : tensor<4x?x4x128xf16>{%dim_20873} -> tensor<4x?x4x64xcomplex<f16>>{%dim_20873}
    %23339 = torch_c.from_builtin_tensor %23338 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %23339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %23340 = torch.aten.mul.Tensor %23339, %23336 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %23340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %23341 = torch_c.to_builtin_tensor %23340 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_20874 = arith.constant 1 : index
    %dim_20875 = tensor.dim %23341, %c1_20874 : tensor<4x?x4x64xcomplex<f32>>
    %23342 = flow.tensor.bitcast %23341 : tensor<4x?x4x64xcomplex<f32>>{%dim_20875} -> tensor<4x?x4x128xf32>{%dim_20875}
    %23343 = torch_c.from_builtin_tensor %23342 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %23343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_20876 = torch.constant.int 5
    %23344 = torch.prims.convert_element_type %23343, %int5_20876 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_20877 = torch.constant.int 131072
    %none_20878 = torch.constant.none
    %none_20879 = torch.constant.none
    %cpu_20880 = torch.constant.device "cpu"
    %false_20881 = torch.constant.bool false
    %23345 = torch.aten.arange %int131072_20877, %none_20878, %none_20879, %cpu_20880, %false_20881 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_20882 = torch.constant.int 0
    %int128_20883 = torch.constant.int 128
    %int2_20884 = torch.constant.int 2
    %none_20885 = torch.constant.none
    %none_20886 = torch.constant.none
    %cpu_20887 = torch.constant.device "cpu"
    %false_20888 = torch.constant.bool false
    %23346 = torch.aten.arange.start_step %int0_20882, %int128_20883, %int2_20884, %none_20885, %none_20886, %cpu_20887, %false_20888 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_20889 = torch.constant.int 0
    %int0_20890 = torch.constant.int 0
    %int64_20891 = torch.constant.int 64
    %int1_20892 = torch.constant.int 1
    %23347 = torch.aten.slice.Tensor %23346, %int0_20889, %int0_20890, %int64_20891, %int1_20892 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_20893 = torch.constant.int 6
    %23348 = torch.prims.convert_element_type %23347, %int6_20893 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_20894 = torch.constant.int 128
    %23349 = torch.aten.div.Scalar %23348, %int128_20894 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_20895 = torch.constant.float 5.000000e+05
    %23350 = torch.aten.pow.Scalar %float5.000000e05_20895, %23349 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %23351 = torch.aten.reciprocal %23350 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_20896 = torch.constant.float 1.000000e+00
    %23352 = torch.aten.mul.Scalar %23351, %float1.000000e00_20896 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_20897 = torch.constant.int 131072
    %int1_20898 = torch.constant.int 1
    %23353 = torch.prim.ListConstruct %int131072_20897, %int1_20898 : (!torch.int, !torch.int) -> !torch.list<int>
    %23354 = torch.aten.view %23345, %23353 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %23355 = torch.aten.mul.Tensor %23354, %23352 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %23356 = torch.aten.cos %23355 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %23357 = torch.aten.sin %23355 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %23358 = torch.aten.complex %23356, %23357 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %23359 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23360 = flow.tensor.transfer %23359 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %23361 = torch_c.from_builtin_tensor %23360 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23362 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23363 = flow.tensor.transfer %23362 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %23364 = torch_c.from_builtin_tensor %23363 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23365 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23366 = flow.tensor.transfer %23365 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %23367 = torch_c.from_builtin_tensor %23366 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23368 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23369 = flow.tensor.transfer %23368 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %23370 = torch_c.from_builtin_tensor %23369 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23371 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23372 = flow.tensor.transfer %23371 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %23373 = torch_c.from_builtin_tensor %23372 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23374 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23375 = flow.tensor.transfer %23374 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %23376 = torch_c.from_builtin_tensor %23375 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23377 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23378 = flow.tensor.transfer %23377 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %23379 = torch_c.from_builtin_tensor %23378 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %23380 = torch_c.to_builtin_tensor %23358 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %23381 = flow.tensor.transfer %23380 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %23382 = torch_c.from_builtin_tensor %23381 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_20899 = torch.constant.int 1
    %23383 = torch.aten.size.int %23032, %int1_20899 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_20900 = torch.constant.int 0
    %23384 = torch.aten.add.int %int0_20900, %23383 : !torch.int, !torch.int -> !torch.int
    %int0_20901 = torch.constant.int 0
    %int0_20902 = torch.constant.int 0
    %int1_20903 = torch.constant.int 1
    %23385 = torch.aten.slice.Tensor %23361, %int0_20901, %int0_20902, %23384, %int1_20903 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23385, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20904 = torch.constant.int 1
    %int0_20905 = torch.constant.int 0
    %int9223372036854775807_20906 = torch.constant.int 9223372036854775807
    %int1_20907 = torch.constant.int 1
    %23386 = torch.aten.slice.Tensor %23385, %int1_20904, %int0_20905, %int9223372036854775807_20906, %int1_20907 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23386, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20908 = torch.constant.int 0
    %23387 = torch.aten.unsqueeze %23386, %int0_20908 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23387, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20909 = torch.constant.int 2
    %23388 = torch.aten.unsqueeze %23387, %int2_20909 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23388, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20910 = torch.constant.int 3
    %int0_20911 = torch.constant.int 0
    %int9223372036854775807_20912 = torch.constant.int 9223372036854775807
    %int1_20913 = torch.constant.int 1
    %23389 = torch.aten.slice.Tensor %23388, %int3_20910, %int0_20911, %int9223372036854775807_20912, %int1_20913 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23389, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23390 = torch_c.to_builtin_tensor %23156 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_20914 = arith.constant 1 : index
    %dim_20915 = tensor.dim %23390, %c1_20914 : tensor<4x?x1x128xf16>
    %23391 = flow.tensor.bitcast %23390 : tensor<4x?x1x128xf16>{%dim_20915} -> tensor<4x?x1x64xcomplex<f16>>{%dim_20915}
    %23392 = torch_c.from_builtin_tensor %23391 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23393 = torch.aten.mul.Tensor %23392, %23389 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23394 = torch_c.to_builtin_tensor %23393 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_20916 = arith.constant 1 : index
    %dim_20917 = tensor.dim %23394, %c1_20916 : tensor<4x?x1x64xcomplex<f32>>
    %23395 = flow.tensor.bitcast %23394 : tensor<4x?x1x64xcomplex<f32>>{%dim_20917} -> tensor<4x?x1x128xf32>{%dim_20917}
    %23396 = torch_c.from_builtin_tensor %23395 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_20918 = torch.constant.int 5
    %23397 = torch.prims.convert_element_type %23396, %int5_20918 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_20919 = torch.constant.int 1
    %23398 = torch.aten.size.int %23038, %int1_20919 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_20920 = torch.constant.int 0
    %23399 = torch.aten.add.int %int0_20920, %23398 : !torch.int, !torch.int -> !torch.int
    %int0_20921 = torch.constant.int 0
    %int0_20922 = torch.constant.int 0
    %int1_20923 = torch.constant.int 1
    %23400 = torch.aten.slice.Tensor %23364, %int0_20921, %int0_20922, %23399, %int1_20923 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23400, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20924 = torch.constant.int 1
    %int0_20925 = torch.constant.int 0
    %int9223372036854775807_20926 = torch.constant.int 9223372036854775807
    %int1_20927 = torch.constant.int 1
    %23401 = torch.aten.slice.Tensor %23400, %int1_20924, %int0_20925, %int9223372036854775807_20926, %int1_20927 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23401, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20928 = torch.constant.int 0
    %23402 = torch.aten.unsqueeze %23401, %int0_20928 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23402, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20929 = torch.constant.int 2
    %23403 = torch.aten.unsqueeze %23402, %int2_20929 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23403, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20930 = torch.constant.int 3
    %int0_20931 = torch.constant.int 0
    %int9223372036854775807_20932 = torch.constant.int 9223372036854775807
    %int1_20933 = torch.constant.int 1
    %23404 = torch.aten.slice.Tensor %23403, %int3_20930, %int0_20931, %int9223372036854775807_20932, %int1_20933 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23404, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23405 = torch_c.to_builtin_tensor %23158 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_20934 = arith.constant 1 : index
    %dim_20935 = tensor.dim %23405, %c1_20934 : tensor<4x?x1x128xf16>
    %23406 = flow.tensor.bitcast %23405 : tensor<4x?x1x128xf16>{%dim_20935} -> tensor<4x?x1x64xcomplex<f16>>{%dim_20935}
    %23407 = torch_c.from_builtin_tensor %23406 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23408 = torch.aten.mul.Tensor %23407, %23404 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23409 = torch_c.to_builtin_tensor %23408 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_20936 = arith.constant 1 : index
    %dim_20937 = tensor.dim %23409, %c1_20936 : tensor<4x?x1x64xcomplex<f32>>
    %23410 = flow.tensor.bitcast %23409 : tensor<4x?x1x64xcomplex<f32>>{%dim_20937} -> tensor<4x?x1x128xf32>{%dim_20937}
    %23411 = torch_c.from_builtin_tensor %23410 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_20938 = torch.constant.int 5
    %23412 = torch.prims.convert_element_type %23411, %int5_20938 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_20939 = torch.constant.int 1
    %23413 = torch.aten.size.int %23044, %int1_20939 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_20940 = torch.constant.int 0
    %23414 = torch.aten.add.int %int0_20940, %23413 : !torch.int, !torch.int -> !torch.int
    %int0_20941 = torch.constant.int 0
    %int0_20942 = torch.constant.int 0
    %int1_20943 = torch.constant.int 1
    %23415 = torch.aten.slice.Tensor %23367, %int0_20941, %int0_20942, %23414, %int1_20943 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23415, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20944 = torch.constant.int 1
    %int0_20945 = torch.constant.int 0
    %int9223372036854775807_20946 = torch.constant.int 9223372036854775807
    %int1_20947 = torch.constant.int 1
    %23416 = torch.aten.slice.Tensor %23415, %int1_20944, %int0_20945, %int9223372036854775807_20946, %int1_20947 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23416, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20948 = torch.constant.int 0
    %23417 = torch.aten.unsqueeze %23416, %int0_20948 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23417, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20949 = torch.constant.int 2
    %23418 = torch.aten.unsqueeze %23417, %int2_20949 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23418, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20950 = torch.constant.int 3
    %int0_20951 = torch.constant.int 0
    %int9223372036854775807_20952 = torch.constant.int 9223372036854775807
    %int1_20953 = torch.constant.int 1
    %23419 = torch.aten.slice.Tensor %23418, %int3_20950, %int0_20951, %int9223372036854775807_20952, %int1_20953 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23419, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23420 = torch_c.to_builtin_tensor %23160 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_20954 = arith.constant 1 : index
    %dim_20955 = tensor.dim %23420, %c1_20954 : tensor<4x?x1x128xf16>
    %23421 = flow.tensor.bitcast %23420 : tensor<4x?x1x128xf16>{%dim_20955} -> tensor<4x?x1x64xcomplex<f16>>{%dim_20955}
    %23422 = torch_c.from_builtin_tensor %23421 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23423 = torch.aten.mul.Tensor %23422, %23419 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23424 = torch_c.to_builtin_tensor %23423 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_20956 = arith.constant 1 : index
    %dim_20957 = tensor.dim %23424, %c1_20956 : tensor<4x?x1x64xcomplex<f32>>
    %23425 = flow.tensor.bitcast %23424 : tensor<4x?x1x64xcomplex<f32>>{%dim_20957} -> tensor<4x?x1x128xf32>{%dim_20957}
    %23426 = torch_c.from_builtin_tensor %23425 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_20958 = torch.constant.int 5
    %23427 = torch.prims.convert_element_type %23426, %int5_20958 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_20959 = torch.constant.int 1
    %23428 = torch.aten.size.int %23050, %int1_20959 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_20960 = torch.constant.int 0
    %23429 = torch.aten.add.int %int0_20960, %23428 : !torch.int, !torch.int -> !torch.int
    %int0_20961 = torch.constant.int 0
    %int0_20962 = torch.constant.int 0
    %int1_20963 = torch.constant.int 1
    %23430 = torch.aten.slice.Tensor %23370, %int0_20961, %int0_20962, %23429, %int1_20963 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23430, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20964 = torch.constant.int 1
    %int0_20965 = torch.constant.int 0
    %int9223372036854775807_20966 = torch.constant.int 9223372036854775807
    %int1_20967 = torch.constant.int 1
    %23431 = torch.aten.slice.Tensor %23430, %int1_20964, %int0_20965, %int9223372036854775807_20966, %int1_20967 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23431, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20968 = torch.constant.int 0
    %23432 = torch.aten.unsqueeze %23431, %int0_20968 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23432, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20969 = torch.constant.int 2
    %23433 = torch.aten.unsqueeze %23432, %int2_20969 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23433, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20970 = torch.constant.int 3
    %int0_20971 = torch.constant.int 0
    %int9223372036854775807_20972 = torch.constant.int 9223372036854775807
    %int1_20973 = torch.constant.int 1
    %23434 = torch.aten.slice.Tensor %23433, %int3_20970, %int0_20971, %int9223372036854775807_20972, %int1_20973 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23434, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23435 = torch_c.to_builtin_tensor %23162 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_20974 = arith.constant 1 : index
    %dim_20975 = tensor.dim %23435, %c1_20974 : tensor<4x?x1x128xf16>
    %23436 = flow.tensor.bitcast %23435 : tensor<4x?x1x128xf16>{%dim_20975} -> tensor<4x?x1x64xcomplex<f16>>{%dim_20975}
    %23437 = torch_c.from_builtin_tensor %23436 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23438 = torch.aten.mul.Tensor %23437, %23434 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23439 = torch_c.to_builtin_tensor %23438 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_20976 = arith.constant 1 : index
    %dim_20977 = tensor.dim %23439, %c1_20976 : tensor<4x?x1x64xcomplex<f32>>
    %23440 = flow.tensor.bitcast %23439 : tensor<4x?x1x64xcomplex<f32>>{%dim_20977} -> tensor<4x?x1x128xf32>{%dim_20977}
    %23441 = torch_c.from_builtin_tensor %23440 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_20978 = torch.constant.int 5
    %23442 = torch.prims.convert_element_type %23441, %int5_20978 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_20979 = torch.constant.int 1
    %23443 = torch.aten.size.int %23056, %int1_20979 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_20980 = torch.constant.int 0
    %23444 = torch.aten.add.int %int0_20980, %23443 : !torch.int, !torch.int -> !torch.int
    %int0_20981 = torch.constant.int 0
    %int0_20982 = torch.constant.int 0
    %int1_20983 = torch.constant.int 1
    %23445 = torch.aten.slice.Tensor %23373, %int0_20981, %int0_20982, %23444, %int1_20983 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23445, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_20984 = torch.constant.int 1
    %int0_20985 = torch.constant.int 0
    %int9223372036854775807_20986 = torch.constant.int 9223372036854775807
    %int1_20987 = torch.constant.int 1
    %23446 = torch.aten.slice.Tensor %23445, %int1_20984, %int0_20985, %int9223372036854775807_20986, %int1_20987 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23446, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_20988 = torch.constant.int 0
    %23447 = torch.aten.unsqueeze %23446, %int0_20988 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23447, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_20989 = torch.constant.int 2
    %23448 = torch.aten.unsqueeze %23447, %int2_20989 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23448, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_20990 = torch.constant.int 3
    %int0_20991 = torch.constant.int 0
    %int9223372036854775807_20992 = torch.constant.int 9223372036854775807
    %int1_20993 = torch.constant.int 1
    %23449 = torch.aten.slice.Tensor %23448, %int3_20990, %int0_20991, %int9223372036854775807_20992, %int1_20993 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23449, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23450 = torch_c.to_builtin_tensor %23164 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_20994 = arith.constant 1 : index
    %dim_20995 = tensor.dim %23450, %c1_20994 : tensor<4x?x1x128xf16>
    %23451 = flow.tensor.bitcast %23450 : tensor<4x?x1x128xf16>{%dim_20995} -> tensor<4x?x1x64xcomplex<f16>>{%dim_20995}
    %23452 = torch_c.from_builtin_tensor %23451 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23453 = torch.aten.mul.Tensor %23452, %23449 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23454 = torch_c.to_builtin_tensor %23453 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_20996 = arith.constant 1 : index
    %dim_20997 = tensor.dim %23454, %c1_20996 : tensor<4x?x1x64xcomplex<f32>>
    %23455 = flow.tensor.bitcast %23454 : tensor<4x?x1x64xcomplex<f32>>{%dim_20997} -> tensor<4x?x1x128xf32>{%dim_20997}
    %23456 = torch_c.from_builtin_tensor %23455 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_20998 = torch.constant.int 5
    %23457 = torch.prims.convert_element_type %23456, %int5_20998 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_20999 = torch.constant.int 1
    %23458 = torch.aten.size.int %23062, %int1_20999 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_21000 = torch.constant.int 0
    %23459 = torch.aten.add.int %int0_21000, %23458 : !torch.int, !torch.int -> !torch.int
    %int0_21001 = torch.constant.int 0
    %int0_21002 = torch.constant.int 0
    %int1_21003 = torch.constant.int 1
    %23460 = torch.aten.slice.Tensor %23376, %int0_21001, %int0_21002, %23459, %int1_21003 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23460, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_21004 = torch.constant.int 1
    %int0_21005 = torch.constant.int 0
    %int9223372036854775807_21006 = torch.constant.int 9223372036854775807
    %int1_21007 = torch.constant.int 1
    %23461 = torch.aten.slice.Tensor %23460, %int1_21004, %int0_21005, %int9223372036854775807_21006, %int1_21007 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23461, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_21008 = torch.constant.int 0
    %23462 = torch.aten.unsqueeze %23461, %int0_21008 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23462, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_21009 = torch.constant.int 2
    %23463 = torch.aten.unsqueeze %23462, %int2_21009 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23463, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_21010 = torch.constant.int 3
    %int0_21011 = torch.constant.int 0
    %int9223372036854775807_21012 = torch.constant.int 9223372036854775807
    %int1_21013 = torch.constant.int 1
    %23464 = torch.aten.slice.Tensor %23463, %int3_21010, %int0_21011, %int9223372036854775807_21012, %int1_21013 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23464, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23465 = torch_c.to_builtin_tensor %23166 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_21014 = arith.constant 1 : index
    %dim_21015 = tensor.dim %23465, %c1_21014 : tensor<4x?x1x128xf16>
    %23466 = flow.tensor.bitcast %23465 : tensor<4x?x1x128xf16>{%dim_21015} -> tensor<4x?x1x64xcomplex<f16>>{%dim_21015}
    %23467 = torch_c.from_builtin_tensor %23466 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23468 = torch.aten.mul.Tensor %23467, %23464 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23469 = torch_c.to_builtin_tensor %23468 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_21016 = arith.constant 1 : index
    %dim_21017 = tensor.dim %23469, %c1_21016 : tensor<4x?x1x64xcomplex<f32>>
    %23470 = flow.tensor.bitcast %23469 : tensor<4x?x1x64xcomplex<f32>>{%dim_21017} -> tensor<4x?x1x128xf32>{%dim_21017}
    %23471 = torch_c.from_builtin_tensor %23470 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_21018 = torch.constant.int 5
    %23472 = torch.prims.convert_element_type %23471, %int5_21018 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_21019 = torch.constant.int 1
    %23473 = torch.aten.size.int %23068, %int1_21019 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_21020 = torch.constant.int 0
    %23474 = torch.aten.add.int %int0_21020, %23473 : !torch.int, !torch.int -> !torch.int
    %int0_21021 = torch.constant.int 0
    %int0_21022 = torch.constant.int 0
    %int1_21023 = torch.constant.int 1
    %23475 = torch.aten.slice.Tensor %23379, %int0_21021, %int0_21022, %23474, %int1_21023 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23475, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_21024 = torch.constant.int 1
    %int0_21025 = torch.constant.int 0
    %int9223372036854775807_21026 = torch.constant.int 9223372036854775807
    %int1_21027 = torch.constant.int 1
    %23476 = torch.aten.slice.Tensor %23475, %int1_21024, %int0_21025, %int9223372036854775807_21026, %int1_21027 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23476, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_21028 = torch.constant.int 0
    %23477 = torch.aten.unsqueeze %23476, %int0_21028 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23477, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_21029 = torch.constant.int 2
    %23478 = torch.aten.unsqueeze %23477, %int2_21029 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23478, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_21030 = torch.constant.int 3
    %int0_21031 = torch.constant.int 0
    %int9223372036854775807_21032 = torch.constant.int 9223372036854775807
    %int1_21033 = torch.constant.int 1
    %23479 = torch.aten.slice.Tensor %23478, %int3_21030, %int0_21031, %int9223372036854775807_21032, %int1_21033 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23479, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23480 = torch_c.to_builtin_tensor %23168 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_21034 = arith.constant 1 : index
    %dim_21035 = tensor.dim %23480, %c1_21034 : tensor<4x?x1x128xf16>
    %23481 = flow.tensor.bitcast %23480 : tensor<4x?x1x128xf16>{%dim_21035} -> tensor<4x?x1x64xcomplex<f16>>{%dim_21035}
    %23482 = torch_c.from_builtin_tensor %23481 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23483 = torch.aten.mul.Tensor %23482, %23479 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23484 = torch_c.to_builtin_tensor %23483 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_21036 = arith.constant 1 : index
    %dim_21037 = tensor.dim %23484, %c1_21036 : tensor<4x?x1x64xcomplex<f32>>
    %23485 = flow.tensor.bitcast %23484 : tensor<4x?x1x64xcomplex<f32>>{%dim_21037} -> tensor<4x?x1x128xf32>{%dim_21037}
    %23486 = torch_c.from_builtin_tensor %23485 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_21038 = torch.constant.int 5
    %23487 = torch.prims.convert_element_type %23486, %int5_21038 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_21039 = torch.constant.int 1
    %23488 = torch.aten.size.int %23074, %int1_21039 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_21040 = torch.constant.int 0
    %23489 = torch.aten.add.int %int0_21040, %23488 : !torch.int, !torch.int -> !torch.int
    %int0_21041 = torch.constant.int 0
    %int0_21042 = torch.constant.int 0
    %int1_21043 = torch.constant.int 1
    %23490 = torch.aten.slice.Tensor %23382, %int0_21041, %int0_21042, %23489, %int1_21043 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23490, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_21044 = torch.constant.int 1
    %int0_21045 = torch.constant.int 0
    %int9223372036854775807_21046 = torch.constant.int 9223372036854775807
    %int1_21047 = torch.constant.int 1
    %23491 = torch.aten.slice.Tensor %23490, %int1_21044, %int0_21045, %int9223372036854775807_21046, %int1_21047 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %23491, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_21048 = torch.constant.int 0
    %23492 = torch.aten.unsqueeze %23491, %int0_21048 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %23492, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_21049 = torch.constant.int 2
    %23493 = torch.aten.unsqueeze %23492, %int2_21049 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23493, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_21050 = torch.constant.int 3
    %int0_21051 = torch.constant.int 0
    %int9223372036854775807_21052 = torch.constant.int 9223372036854775807
    %int1_21053 = torch.constant.int 1
    %23494 = torch.aten.slice.Tensor %23493, %int3_21050, %int0_21051, %int9223372036854775807_21052, %int1_21053 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23494, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %23495 = torch_c.to_builtin_tensor %23170 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_21054 = arith.constant 1 : index
    %dim_21055 = tensor.dim %23495, %c1_21054 : tensor<4x?x1x128xf16>
    %23496 = flow.tensor.bitcast %23495 : tensor<4x?x1x128xf16>{%dim_21055} -> tensor<4x?x1x64xcomplex<f16>>{%dim_21055}
    %23497 = torch_c.from_builtin_tensor %23496 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %23497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %23498 = torch.aten.mul.Tensor %23497, %23494 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %23498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %23499 = torch_c.to_builtin_tensor %23498 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_21056 = arith.constant 1 : index
    %dim_21057 = tensor.dim %23499, %c1_21056 : tensor<4x?x1x64xcomplex<f32>>
    %23500 = flow.tensor.bitcast %23499 : tensor<4x?x1x64xcomplex<f32>>{%dim_21057} -> tensor<4x?x1x128xf32>{%dim_21057}
    %23501 = torch_c.from_builtin_tensor %23500 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %23501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_21058 = torch.constant.int 5
    %23502 = torch.prims.convert_element_type %23501, %int5_21058 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %23502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_21059 = torch.constant.int 64
    %23503 = torch.aten.mul.Scalar %2364, %int64_21059 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23503, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21060 = torch.constant.int 64
    %23504 = torch.aten.mul.Scalar %2367, %int64_21060 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23504, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21061 = torch.constant.int 64
    %23505 = torch.aten.mul.Scalar %2370, %int64_21061 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23505, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21062 = torch.constant.int 64
    %23506 = torch.aten.mul.Scalar %2373, %int64_21062 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23506, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21063 = torch.constant.int 64
    %23507 = torch.aten.mul.Scalar %2376, %int64_21063 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23507, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21064 = torch.constant.int 64
    %23508 = torch.aten.mul.Scalar %2379, %int64_21064 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23508, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21065 = torch.constant.int 64
    %23509 = torch.aten.mul.Scalar %2382, %int64_21065 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23509, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_21066 = torch.constant.int 64
    %23510 = torch.aten.mul.Scalar %2385, %int64_21066 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23510, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22 = torch.constant.int 22
    %int1_21067 = torch.constant.int 1
    %23511 = torch.aten.add.Scalar %23503, %int22, %int1_21067 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23511, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21068 = torch.constant.int 22
    %int1_21069 = torch.constant.int 1
    %23512 = torch.aten.add.Scalar %23504, %int22_21068, %int1_21069 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23512, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21070 = torch.constant.int 22
    %int1_21071 = torch.constant.int 1
    %23513 = torch.aten.add.Scalar %23505, %int22_21070, %int1_21071 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23513, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21072 = torch.constant.int 22
    %int1_21073 = torch.constant.int 1
    %23514 = torch.aten.add.Scalar %23506, %int22_21072, %int1_21073 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23514, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21074 = torch.constant.int 22
    %int1_21075 = torch.constant.int 1
    %23515 = torch.aten.add.Scalar %23507, %int22_21074, %int1_21075 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23515, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21076 = torch.constant.int 22
    %int1_21077 = torch.constant.int 1
    %23516 = torch.aten.add.Scalar %23508, %int22_21076, %int1_21077 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23516, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21078 = torch.constant.int 22
    %int1_21079 = torch.constant.int 1
    %23517 = torch.aten.add.Scalar %23509, %int22_21078, %int1_21079 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23517, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int22_21080 = torch.constant.int 22
    %int1_21081 = torch.constant.int 1
    %23518 = torch.aten.add.Scalar %23510, %int22_21080, %int1_21081 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23518, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_21082 = torch.constant.int 4
    %int16_21083 = torch.constant.int 16
    %int1_21084 = torch.constant.int 1
    %int128_21085 = torch.constant.int 128
    %23519 = torch.prim.ListConstruct %int4_21082, %3095, %int16_21083, %int1_21084, %int128_21085 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23520 = torch.aten.view %23397, %23519 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23520, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21086 = torch.constant.int 4
    %int16_21087 = torch.constant.int 16
    %int1_21088 = torch.constant.int 1
    %int128_21089 = torch.constant.int 128
    %23521 = torch.prim.ListConstruct %int4_21086, %3095, %int16_21087, %int1_21088, %int128_21089 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23522 = torch.aten.view %23412, %23521 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23522, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21090 = torch.constant.int 4
    %int16_21091 = torch.constant.int 16
    %int1_21092 = torch.constant.int 1
    %int128_21093 = torch.constant.int 128
    %23523 = torch.prim.ListConstruct %int4_21090, %3095, %int16_21091, %int1_21092, %int128_21093 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23524 = torch.aten.view %23427, %23523 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23524, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21094 = torch.constant.int 4
    %int16_21095 = torch.constant.int 16
    %int1_21096 = torch.constant.int 1
    %int128_21097 = torch.constant.int 128
    %23525 = torch.prim.ListConstruct %int4_21094, %3095, %int16_21095, %int1_21096, %int128_21097 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23526 = torch.aten.view %23442, %23525 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23526, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21098 = torch.constant.int 4
    %int16_21099 = torch.constant.int 16
    %int1_21100 = torch.constant.int 1
    %int128_21101 = torch.constant.int 128
    %23527 = torch.prim.ListConstruct %int4_21098, %3095, %int16_21099, %int1_21100, %int128_21101 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23528 = torch.aten.view %23457, %23527 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23528, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21102 = torch.constant.int 4
    %int16_21103 = torch.constant.int 16
    %int1_21104 = torch.constant.int 1
    %int128_21105 = torch.constant.int 128
    %23529 = torch.prim.ListConstruct %int4_21102, %3095, %int16_21103, %int1_21104, %int128_21105 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23530 = torch.aten.view %23472, %23529 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23530, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21106 = torch.constant.int 4
    %int16_21107 = torch.constant.int 16
    %int1_21108 = torch.constant.int 1
    %int128_21109 = torch.constant.int 128
    %23531 = torch.prim.ListConstruct %int4_21106, %3095, %int16_21107, %int1_21108, %int128_21109 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23532 = torch.aten.view %23487, %23531 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23532, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21110 = torch.constant.int 4
    %int16_21111 = torch.constant.int 16
    %int1_21112 = torch.constant.int 1
    %int128_21113 = torch.constant.int 128
    %23533 = torch.prim.ListConstruct %int4_21110, %3095, %int16_21111, %int1_21112, %int128_21113 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23534 = torch.aten.view %23502, %23533 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23534, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21114 = torch.constant.int 4
    %23535 = torch.aten.mul.int %int4_21114, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21115 = torch.constant.int 16
    %int1_21116 = torch.constant.int 1
    %int128_21117 = torch.constant.int 128
    %23536 = torch.prim.ListConstruct %23535, %int16_21115, %int1_21116, %int128_21117 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23537 = torch.aten.view %23520, %23536 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23537, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21118 = torch.constant.int 4
    %23538 = torch.aten.mul.int %int4_21118, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21119 = torch.constant.int 16
    %int1_21120 = torch.constant.int 1
    %int128_21121 = torch.constant.int 128
    %23539 = torch.prim.ListConstruct %23538, %int16_21119, %int1_21120, %int128_21121 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23540 = torch.aten.view %23522, %23539 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23540, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21122 = torch.constant.int 4
    %23541 = torch.aten.mul.int %int4_21122, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21123 = torch.constant.int 16
    %int1_21124 = torch.constant.int 1
    %int128_21125 = torch.constant.int 128
    %23542 = torch.prim.ListConstruct %23541, %int16_21123, %int1_21124, %int128_21125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23543 = torch.aten.view %23524, %23542 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23543, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21126 = torch.constant.int 4
    %23544 = torch.aten.mul.int %int4_21126, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21127 = torch.constant.int 16
    %int1_21128 = torch.constant.int 1
    %int128_21129 = torch.constant.int 128
    %23545 = torch.prim.ListConstruct %23544, %int16_21127, %int1_21128, %int128_21129 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23546 = torch.aten.view %23526, %23545 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23546, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21130 = torch.constant.int 4
    %23547 = torch.aten.mul.int %int4_21130, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21131 = torch.constant.int 16
    %int1_21132 = torch.constant.int 1
    %int128_21133 = torch.constant.int 128
    %23548 = torch.prim.ListConstruct %23547, %int16_21131, %int1_21132, %int128_21133 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23549 = torch.aten.view %23528, %23548 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23549, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21134 = torch.constant.int 4
    %23550 = torch.aten.mul.int %int4_21134, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21135 = torch.constant.int 16
    %int1_21136 = torch.constant.int 1
    %int128_21137 = torch.constant.int 128
    %23551 = torch.prim.ListConstruct %23550, %int16_21135, %int1_21136, %int128_21137 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23552 = torch.aten.view %23530, %23551 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23552, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21138 = torch.constant.int 4
    %23553 = torch.aten.mul.int %int4_21138, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21139 = torch.constant.int 16
    %int1_21140 = torch.constant.int 1
    %int128_21141 = torch.constant.int 128
    %23554 = torch.prim.ListConstruct %23553, %int16_21139, %int1_21140, %int128_21141 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23555 = torch.aten.view %23532, %23554 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23555, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21142 = torch.constant.int 4
    %23556 = torch.aten.mul.int %int4_21142, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21143 = torch.constant.int 16
    %int1_21144 = torch.constant.int 1
    %int128_21145 = torch.constant.int 128
    %23557 = torch.prim.ListConstruct %23556, %int16_21143, %int1_21144, %int128_21145 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23558 = torch.aten.view %23534, %23557 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23558, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21146 = torch.constant.int 4
    %23559 = torch.aten.mul.int %int4_21146, %3095 : !torch.int, !torch.int -> !torch.int
    %23560 = torch.prim.ListConstruct %23559 : (!torch.int) -> !torch.list<int>
    %23561 = torch.aten.view %23511, %23560 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23561, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21147 = torch.constant.int 4
    %23562 = torch.aten.mul.int %int4_21147, %3095 : !torch.int, !torch.int -> !torch.int
    %23563 = torch.prim.ListConstruct %23562 : (!torch.int) -> !torch.list<int>
    %23564 = torch.aten.view %23512, %23563 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23564, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21148 = torch.constant.int 4
    %23565 = torch.aten.mul.int %int4_21148, %3095 : !torch.int, !torch.int -> !torch.int
    %23566 = torch.prim.ListConstruct %23565 : (!torch.int) -> !torch.list<int>
    %23567 = torch.aten.view %23513, %23566 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23567, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21149 = torch.constant.int 4
    %23568 = torch.aten.mul.int %int4_21149, %3095 : !torch.int, !torch.int -> !torch.int
    %23569 = torch.prim.ListConstruct %23568 : (!torch.int) -> !torch.list<int>
    %23570 = torch.aten.view %23514, %23569 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23570, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21150 = torch.constant.int 4
    %23571 = torch.aten.mul.int %int4_21150, %3095 : !torch.int, !torch.int -> !torch.int
    %23572 = torch.prim.ListConstruct %23571 : (!torch.int) -> !torch.list<int>
    %23573 = torch.aten.view %23515, %23572 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23573, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21151 = torch.constant.int 4
    %23574 = torch.aten.mul.int %int4_21151, %3095 : !torch.int, !torch.int -> !torch.int
    %23575 = torch.prim.ListConstruct %23574 : (!torch.int) -> !torch.list<int>
    %23576 = torch.aten.view %23516, %23575 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23576, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21152 = torch.constant.int 4
    %23577 = torch.aten.mul.int %int4_21152, %3095 : !torch.int, !torch.int -> !torch.int
    %23578 = torch.prim.ListConstruct %23577 : (!torch.int) -> !torch.list<int>
    %23579 = torch.aten.view %23517, %23578 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23579, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21153 = torch.constant.int 4
    %23580 = torch.aten.mul.int %int4_21153, %3095 : !torch.int, !torch.int -> !torch.int
    %23581 = torch.prim.ListConstruct %23580 : (!torch.int) -> !torch.list<int>
    %23582 = torch.aten.view %23518, %23581 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23582, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21154 = torch.constant.int 4
    %int16_21155 = torch.constant.int 16
    %int1_21156 = torch.constant.int 1
    %int128_21157 = torch.constant.int 128
    %23583 = torch.prim.ListConstruct %int4_21154, %3095, %int16_21155, %int1_21156, %int128_21157 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23584 = torch.aten.view %23172, %23583 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23584, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21158 = torch.constant.int 4
    %int16_21159 = torch.constant.int 16
    %int1_21160 = torch.constant.int 1
    %int128_21161 = torch.constant.int 128
    %23585 = torch.prim.ListConstruct %int4_21158, %3095, %int16_21159, %int1_21160, %int128_21161 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23586 = torch.aten.view %23174, %23585 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23586, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21162 = torch.constant.int 4
    %int16_21163 = torch.constant.int 16
    %int1_21164 = torch.constant.int 1
    %int128_21165 = torch.constant.int 128
    %23587 = torch.prim.ListConstruct %int4_21162, %3095, %int16_21163, %int1_21164, %int128_21165 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23588 = torch.aten.view %23176, %23587 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23588, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21166 = torch.constant.int 4
    %int16_21167 = torch.constant.int 16
    %int1_21168 = torch.constant.int 1
    %int128_21169 = torch.constant.int 128
    %23589 = torch.prim.ListConstruct %int4_21166, %3095, %int16_21167, %int1_21168, %int128_21169 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23590 = torch.aten.view %23178, %23589 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23590, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21170 = torch.constant.int 4
    %int16_21171 = torch.constant.int 16
    %int1_21172 = torch.constant.int 1
    %int128_21173 = torch.constant.int 128
    %23591 = torch.prim.ListConstruct %int4_21170, %3095, %int16_21171, %int1_21172, %int128_21173 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23592 = torch.aten.view %23180, %23591 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23592, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21174 = torch.constant.int 4
    %int16_21175 = torch.constant.int 16
    %int1_21176 = torch.constant.int 1
    %int128_21177 = torch.constant.int 128
    %23593 = torch.prim.ListConstruct %int4_21174, %3095, %int16_21175, %int1_21176, %int128_21177 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23594 = torch.aten.view %23182, %23593 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23594, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21178 = torch.constant.int 4
    %int16_21179 = torch.constant.int 16
    %int1_21180 = torch.constant.int 1
    %int128_21181 = torch.constant.int 128
    %23595 = torch.prim.ListConstruct %int4_21178, %3095, %int16_21179, %int1_21180, %int128_21181 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23596 = torch.aten.view %23184, %23595 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23596, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21182 = torch.constant.int 4
    %int16_21183 = torch.constant.int 16
    %int1_21184 = torch.constant.int 1
    %int128_21185 = torch.constant.int 128
    %23597 = torch.prim.ListConstruct %int4_21182, %3095, %int16_21183, %int1_21184, %int128_21185 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23598 = torch.aten.view %23186, %23597 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %23598, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_21186 = torch.constant.int 4
    %23599 = torch.aten.mul.int %int4_21186, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21187 = torch.constant.int 16
    %int1_21188 = torch.constant.int 1
    %int128_21189 = torch.constant.int 128
    %23600 = torch.prim.ListConstruct %23599, %int16_21187, %int1_21188, %int128_21189 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23601 = torch.aten.view %23584, %23600 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23601, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21190 = torch.constant.int 4
    %23602 = torch.aten.mul.int %int4_21190, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21191 = torch.constant.int 16
    %int1_21192 = torch.constant.int 1
    %int128_21193 = torch.constant.int 128
    %23603 = torch.prim.ListConstruct %23602, %int16_21191, %int1_21192, %int128_21193 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23604 = torch.aten.view %23586, %23603 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23604, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21194 = torch.constant.int 4
    %23605 = torch.aten.mul.int %int4_21194, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21195 = torch.constant.int 16
    %int1_21196 = torch.constant.int 1
    %int128_21197 = torch.constant.int 128
    %23606 = torch.prim.ListConstruct %23605, %int16_21195, %int1_21196, %int128_21197 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23607 = torch.aten.view %23588, %23606 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23607, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21198 = torch.constant.int 4
    %23608 = torch.aten.mul.int %int4_21198, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21199 = torch.constant.int 16
    %int1_21200 = torch.constant.int 1
    %int128_21201 = torch.constant.int 128
    %23609 = torch.prim.ListConstruct %23608, %int16_21199, %int1_21200, %int128_21201 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23610 = torch.aten.view %23590, %23609 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23610, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21202 = torch.constant.int 4
    %23611 = torch.aten.mul.int %int4_21202, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21203 = torch.constant.int 16
    %int1_21204 = torch.constant.int 1
    %int128_21205 = torch.constant.int 128
    %23612 = torch.prim.ListConstruct %23611, %int16_21203, %int1_21204, %int128_21205 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23613 = torch.aten.view %23592, %23612 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23613, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21206 = torch.constant.int 4
    %23614 = torch.aten.mul.int %int4_21206, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21207 = torch.constant.int 16
    %int1_21208 = torch.constant.int 1
    %int128_21209 = torch.constant.int 128
    %23615 = torch.prim.ListConstruct %23614, %int16_21207, %int1_21208, %int128_21209 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23616 = torch.aten.view %23594, %23615 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23616, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21210 = torch.constant.int 4
    %23617 = torch.aten.mul.int %int4_21210, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21211 = torch.constant.int 16
    %int1_21212 = torch.constant.int 1
    %int128_21213 = torch.constant.int 128
    %23618 = torch.prim.ListConstruct %23617, %int16_21211, %int1_21212, %int128_21213 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23619 = torch.aten.view %23596, %23618 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23619, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_21214 = torch.constant.int 4
    %23620 = torch.aten.mul.int %int4_21214, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_21215 = torch.constant.int 16
    %int1_21216 = torch.constant.int 1
    %int128_21217 = torch.constant.int 128
    %23621 = torch.prim.ListConstruct %23620, %int16_21215, %int1_21216, %int128_21217 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23622 = torch.aten.view %23598, %23621 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23622, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_21218 = torch.constant.int 1
    %int1_21219 = torch.constant.int 1
    %23623 = torch.aten.add.Scalar %23511, %int1_21218, %int1_21219 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23623, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21220 = torch.constant.int 1
    %int1_21221 = torch.constant.int 1
    %23624 = torch.aten.add.Scalar %23512, %int1_21220, %int1_21221 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23624, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21222 = torch.constant.int 1
    %int1_21223 = torch.constant.int 1
    %23625 = torch.aten.add.Scalar %23513, %int1_21222, %int1_21223 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23625, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21224 = torch.constant.int 1
    %int1_21225 = torch.constant.int 1
    %23626 = torch.aten.add.Scalar %23514, %int1_21224, %int1_21225 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23626, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21226 = torch.constant.int 1
    %int1_21227 = torch.constant.int 1
    %23627 = torch.aten.add.Scalar %23515, %int1_21226, %int1_21227 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23627, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21228 = torch.constant.int 1
    %int1_21229 = torch.constant.int 1
    %23628 = torch.aten.add.Scalar %23516, %int1_21228, %int1_21229 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23628, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21230 = torch.constant.int 1
    %int1_21231 = torch.constant.int 1
    %23629 = torch.aten.add.Scalar %23517, %int1_21230, %int1_21231 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23629, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_21232 = torch.constant.int 1
    %int1_21233 = torch.constant.int 1
    %23630 = torch.aten.add.Scalar %23518, %int1_21232, %int1_21233 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %23630, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_21234 = torch.constant.int 4
    %23631 = torch.aten.mul.int %int4_21234, %3095 : !torch.int, !torch.int -> !torch.int
    %23632 = torch.prim.ListConstruct %23631 : (!torch.int) -> !torch.list<int>
    %23633 = torch.aten.view %23623, %23632 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23633, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21235 = torch.constant.int 4
    %23634 = torch.aten.mul.int %int4_21235, %3095 : !torch.int, !torch.int -> !torch.int
    %23635 = torch.prim.ListConstruct %23634 : (!torch.int) -> !torch.list<int>
    %23636 = torch.aten.view %23624, %23635 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23636, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21236 = torch.constant.int 4
    %23637 = torch.aten.mul.int %int4_21236, %3095 : !torch.int, !torch.int -> !torch.int
    %23638 = torch.prim.ListConstruct %23637 : (!torch.int) -> !torch.list<int>
    %23639 = torch.aten.view %23625, %23638 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23639, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21237 = torch.constant.int 4
    %23640 = torch.aten.mul.int %int4_21237, %3095 : !torch.int, !torch.int -> !torch.int
    %23641 = torch.prim.ListConstruct %23640 : (!torch.int) -> !torch.list<int>
    %23642 = torch.aten.view %23626, %23641 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23642, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21238 = torch.constant.int 4
    %23643 = torch.aten.mul.int %int4_21238, %3095 : !torch.int, !torch.int -> !torch.int
    %23644 = torch.prim.ListConstruct %23643 : (!torch.int) -> !torch.list<int>
    %23645 = torch.aten.view %23627, %23644 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23645, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21239 = torch.constant.int 4
    %23646 = torch.aten.mul.int %int4_21239, %3095 : !torch.int, !torch.int -> !torch.int
    %23647 = torch.prim.ListConstruct %23646 : (!torch.int) -> !torch.list<int>
    %23648 = torch.aten.view %23628, %23647 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23648, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21240 = torch.constant.int 4
    %23649 = torch.aten.mul.int %int4_21240, %3095 : !torch.int, !torch.int -> !torch.int
    %23650 = torch.prim.ListConstruct %23649 : (!torch.int) -> !torch.list<int>
    %23651 = torch.aten.view %23629, %23650 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23651, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_21241 = torch.constant.int 4
    %23652 = torch.aten.mul.int %int4_21241, %3095 : !torch.int, !torch.int -> !torch.int
    %23653 = torch.prim.ListConstruct %23652 : (!torch.int) -> !torch.list<int>
    %23654 = torch.aten.view %23630, %23653 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23654, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %23655 = torch.prim.ListConstruct %23561, %23633 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21242 = torch.constant.int 0
    %23656 = torch.aten.cat %23655, %int0_21242 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23656, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23657 = torch.prim.ListConstruct %23564, %23636 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21243 = torch.constant.int 0
    %23658 = torch.aten.cat %23657, %int0_21243 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23658, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23659 = torch.prim.ListConstruct %23567, %23639 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21244 = torch.constant.int 0
    %23660 = torch.aten.cat %23659, %int0_21244 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23660, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23661 = torch.prim.ListConstruct %23570, %23642 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21245 = torch.constant.int 0
    %23662 = torch.aten.cat %23661, %int0_21245 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23662, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23663 = torch.prim.ListConstruct %23573, %23645 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21246 = torch.constant.int 0
    %23664 = torch.aten.cat %23663, %int0_21246 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23664, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23665 = torch.prim.ListConstruct %23576, %23648 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21247 = torch.constant.int 0
    %23666 = torch.aten.cat %23665, %int0_21247 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23666, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23667 = torch.prim.ListConstruct %23579, %23651 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21248 = torch.constant.int 0
    %23668 = torch.aten.cat %23667, %int0_21248 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23668, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23669 = torch.prim.ListConstruct %23582, %23654 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_21249 = torch.constant.int 0
    %23670 = torch.aten.cat %23669, %int0_21249 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %23670, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %23671 = torch.prim.ListConstruct %23537, %23601 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21250 = torch.constant.int 0
    %23672 = torch.aten.cat %23671, %int0_21250 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23672, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23673 = torch.prim.ListConstruct %23540, %23604 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21251 = torch.constant.int 0
    %23674 = torch.aten.cat %23673, %int0_21251 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23674, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23675 = torch.prim.ListConstruct %23543, %23607 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21252 = torch.constant.int 0
    %23676 = torch.aten.cat %23675, %int0_21252 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23676, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23677 = torch.prim.ListConstruct %23546, %23610 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21253 = torch.constant.int 0
    %23678 = torch.aten.cat %23677, %int0_21253 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23678, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23679 = torch.prim.ListConstruct %23549, %23613 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21254 = torch.constant.int 0
    %23680 = torch.aten.cat %23679, %int0_21254 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23680, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23681 = torch.prim.ListConstruct %23552, %23616 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21255 = torch.constant.int 0
    %23682 = torch.aten.cat %23681, %int0_21255 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23682, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23683 = torch.prim.ListConstruct %23555, %23619 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21256 = torch.constant.int 0
    %23684 = torch.aten.cat %23683, %int0_21256 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23684, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23685 = torch.prim.ListConstruct %23558, %23622 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_21257 = torch.constant.int 0
    %23686 = torch.aten.cat %23685, %int0_21257 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23686, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21258 = torch.constant.int 32
    %int2_21259 = torch.constant.int 2
    %int16_21260 = torch.constant.int 16
    %int1_21261 = torch.constant.int 1
    %int128_21262 = torch.constant.int 128
    %23687 = torch.prim.ListConstruct %3023, %int32_21258, %int2_21259, %int16_21260, %int1_21261, %int128_21262 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23688 = torch.aten.view %21837, %23687 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23688, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21263 = torch.constant.int 32
    %23689 = torch.aten.mul.int %3023, %int32_21263 : !torch.int, !torch.int -> !torch.int
    %int2_21264 = torch.constant.int 2
    %23690 = torch.aten.mul.int %23689, %int2_21264 : !torch.int, !torch.int -> !torch.int
    %int16_21265 = torch.constant.int 16
    %int1_21266 = torch.constant.int 1
    %int128_21267 = torch.constant.int 128
    %23691 = torch.prim.ListConstruct %23690, %int16_21265, %int1_21266, %int128_21267 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23692 = torch.aten.view %23688, %23691 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23692, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23693 = torch.prim.ListConstruct %23656 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21268 = torch.constant.bool false
    %23694 = torch.aten.index_put %23692, %23693, %23672, %false_21268 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23694, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21269 = torch.constant.int 32
    %int2_21270 = torch.constant.int 2
    %int16_21271 = torch.constant.int 16
    %int1_21272 = torch.constant.int 1
    %int128_21273 = torch.constant.int 128
    %23695 = torch.prim.ListConstruct %3023, %int32_21269, %int2_21270, %int16_21271, %int1_21272, %int128_21273 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23696 = torch.aten.view %23694, %23695 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23696, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21274 = torch.constant.int 131072
    %23697 = torch.prim.ListConstruct %3023, %int131072_21274 : (!torch.int, !torch.int) -> !torch.list<int>
    %23698 = torch.aten.view %23696, %23697 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23698, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21275 = torch.constant.int 32
    %int2_21276 = torch.constant.int 2
    %int16_21277 = torch.constant.int 16
    %int1_21278 = torch.constant.int 1
    %int128_21279 = torch.constant.int 128
    %23699 = torch.prim.ListConstruct %3026, %int32_21275, %int2_21276, %int16_21277, %int1_21278, %int128_21279 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23700 = torch.aten.view %21849, %23699 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23700, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21280 = torch.constant.int 32
    %23701 = torch.aten.mul.int %3026, %int32_21280 : !torch.int, !torch.int -> !torch.int
    %int2_21281 = torch.constant.int 2
    %23702 = torch.aten.mul.int %23701, %int2_21281 : !torch.int, !torch.int -> !torch.int
    %int16_21282 = torch.constant.int 16
    %int1_21283 = torch.constant.int 1
    %int128_21284 = torch.constant.int 128
    %23703 = torch.prim.ListConstruct %23702, %int16_21282, %int1_21283, %int128_21284 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23704 = torch.aten.view %23700, %23703 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23704, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23705 = torch.prim.ListConstruct %23658 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21285 = torch.constant.bool false
    %23706 = torch.aten.index_put %23704, %23705, %23674, %false_21285 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23706, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21286 = torch.constant.int 32
    %int2_21287 = torch.constant.int 2
    %int16_21288 = torch.constant.int 16
    %int1_21289 = torch.constant.int 1
    %int128_21290 = torch.constant.int 128
    %23707 = torch.prim.ListConstruct %3026, %int32_21286, %int2_21287, %int16_21288, %int1_21289, %int128_21290 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23708 = torch.aten.view %23706, %23707 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23708, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21291 = torch.constant.int 131072
    %23709 = torch.prim.ListConstruct %3026, %int131072_21291 : (!torch.int, !torch.int) -> !torch.list<int>
    %23710 = torch.aten.view %23708, %23709 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23710, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21292 = torch.constant.int 32
    %int2_21293 = torch.constant.int 2
    %int16_21294 = torch.constant.int 16
    %int1_21295 = torch.constant.int 1
    %int128_21296 = torch.constant.int 128
    %23711 = torch.prim.ListConstruct %3029, %int32_21292, %int2_21293, %int16_21294, %int1_21295, %int128_21296 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23712 = torch.aten.view %21861, %23711 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23712, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21297 = torch.constant.int 32
    %23713 = torch.aten.mul.int %3029, %int32_21297 : !torch.int, !torch.int -> !torch.int
    %int2_21298 = torch.constant.int 2
    %23714 = torch.aten.mul.int %23713, %int2_21298 : !torch.int, !torch.int -> !torch.int
    %int16_21299 = torch.constant.int 16
    %int1_21300 = torch.constant.int 1
    %int128_21301 = torch.constant.int 128
    %23715 = torch.prim.ListConstruct %23714, %int16_21299, %int1_21300, %int128_21301 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23716 = torch.aten.view %23712, %23715 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23716, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23717 = torch.prim.ListConstruct %23660 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21302 = torch.constant.bool false
    %23718 = torch.aten.index_put %23716, %23717, %23676, %false_21302 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23718, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21303 = torch.constant.int 32
    %int2_21304 = torch.constant.int 2
    %int16_21305 = torch.constant.int 16
    %int1_21306 = torch.constant.int 1
    %int128_21307 = torch.constant.int 128
    %23719 = torch.prim.ListConstruct %3029, %int32_21303, %int2_21304, %int16_21305, %int1_21306, %int128_21307 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23720 = torch.aten.view %23718, %23719 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23720, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21308 = torch.constant.int 131072
    %23721 = torch.prim.ListConstruct %3029, %int131072_21308 : (!torch.int, !torch.int) -> !torch.list<int>
    %23722 = torch.aten.view %23720, %23721 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23722, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21309 = torch.constant.int 32
    %int2_21310 = torch.constant.int 2
    %int16_21311 = torch.constant.int 16
    %int1_21312 = torch.constant.int 1
    %int128_21313 = torch.constant.int 128
    %23723 = torch.prim.ListConstruct %3032, %int32_21309, %int2_21310, %int16_21311, %int1_21312, %int128_21313 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23724 = torch.aten.view %21873, %23723 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23724, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21314 = torch.constant.int 32
    %23725 = torch.aten.mul.int %3032, %int32_21314 : !torch.int, !torch.int -> !torch.int
    %int2_21315 = torch.constant.int 2
    %23726 = torch.aten.mul.int %23725, %int2_21315 : !torch.int, !torch.int -> !torch.int
    %int16_21316 = torch.constant.int 16
    %int1_21317 = torch.constant.int 1
    %int128_21318 = torch.constant.int 128
    %23727 = torch.prim.ListConstruct %23726, %int16_21316, %int1_21317, %int128_21318 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23728 = torch.aten.view %23724, %23727 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23728, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23729 = torch.prim.ListConstruct %23662 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21319 = torch.constant.bool false
    %23730 = torch.aten.index_put %23728, %23729, %23678, %false_21319 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23730, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21320 = torch.constant.int 32
    %int2_21321 = torch.constant.int 2
    %int16_21322 = torch.constant.int 16
    %int1_21323 = torch.constant.int 1
    %int128_21324 = torch.constant.int 128
    %23731 = torch.prim.ListConstruct %3032, %int32_21320, %int2_21321, %int16_21322, %int1_21323, %int128_21324 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23732 = torch.aten.view %23730, %23731 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23732, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21325 = torch.constant.int 131072
    %23733 = torch.prim.ListConstruct %3032, %int131072_21325 : (!torch.int, !torch.int) -> !torch.list<int>
    %23734 = torch.aten.view %23732, %23733 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23734, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21326 = torch.constant.int 32
    %int2_21327 = torch.constant.int 2
    %int16_21328 = torch.constant.int 16
    %int1_21329 = torch.constant.int 1
    %int128_21330 = torch.constant.int 128
    %23735 = torch.prim.ListConstruct %3035, %int32_21326, %int2_21327, %int16_21328, %int1_21329, %int128_21330 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23736 = torch.aten.view %21885, %23735 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23736, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21331 = torch.constant.int 32
    %23737 = torch.aten.mul.int %3035, %int32_21331 : !torch.int, !torch.int -> !torch.int
    %int2_21332 = torch.constant.int 2
    %23738 = torch.aten.mul.int %23737, %int2_21332 : !torch.int, !torch.int -> !torch.int
    %int16_21333 = torch.constant.int 16
    %int1_21334 = torch.constant.int 1
    %int128_21335 = torch.constant.int 128
    %23739 = torch.prim.ListConstruct %23738, %int16_21333, %int1_21334, %int128_21335 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23740 = torch.aten.view %23736, %23739 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23740, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23741 = torch.prim.ListConstruct %23664 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21336 = torch.constant.bool false
    %23742 = torch.aten.index_put %23740, %23741, %23680, %false_21336 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23742, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21337 = torch.constant.int 32
    %int2_21338 = torch.constant.int 2
    %int16_21339 = torch.constant.int 16
    %int1_21340 = torch.constant.int 1
    %int128_21341 = torch.constant.int 128
    %23743 = torch.prim.ListConstruct %3035, %int32_21337, %int2_21338, %int16_21339, %int1_21340, %int128_21341 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23744 = torch.aten.view %23742, %23743 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23744, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21342 = torch.constant.int 131072
    %23745 = torch.prim.ListConstruct %3035, %int131072_21342 : (!torch.int, !torch.int) -> !torch.list<int>
    %23746 = torch.aten.view %23744, %23745 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23746, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21343 = torch.constant.int 32
    %int2_21344 = torch.constant.int 2
    %int16_21345 = torch.constant.int 16
    %int1_21346 = torch.constant.int 1
    %int128_21347 = torch.constant.int 128
    %23747 = torch.prim.ListConstruct %3038, %int32_21343, %int2_21344, %int16_21345, %int1_21346, %int128_21347 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23748 = torch.aten.view %21897, %23747 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23748, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21348 = torch.constant.int 32
    %23749 = torch.aten.mul.int %3038, %int32_21348 : !torch.int, !torch.int -> !torch.int
    %int2_21349 = torch.constant.int 2
    %23750 = torch.aten.mul.int %23749, %int2_21349 : !torch.int, !torch.int -> !torch.int
    %int16_21350 = torch.constant.int 16
    %int1_21351 = torch.constant.int 1
    %int128_21352 = torch.constant.int 128
    %23751 = torch.prim.ListConstruct %23750, %int16_21350, %int1_21351, %int128_21352 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23752 = torch.aten.view %23748, %23751 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23752, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23753 = torch.prim.ListConstruct %23666 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21353 = torch.constant.bool false
    %23754 = torch.aten.index_put %23752, %23753, %23682, %false_21353 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23754, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21354 = torch.constant.int 32
    %int2_21355 = torch.constant.int 2
    %int16_21356 = torch.constant.int 16
    %int1_21357 = torch.constant.int 1
    %int128_21358 = torch.constant.int 128
    %23755 = torch.prim.ListConstruct %3038, %int32_21354, %int2_21355, %int16_21356, %int1_21357, %int128_21358 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23756 = torch.aten.view %23754, %23755 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23756, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21359 = torch.constant.int 131072
    %23757 = torch.prim.ListConstruct %3038, %int131072_21359 : (!torch.int, !torch.int) -> !torch.list<int>
    %23758 = torch.aten.view %23756, %23757 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23758, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21360 = torch.constant.int 32
    %int2_21361 = torch.constant.int 2
    %int16_21362 = torch.constant.int 16
    %int1_21363 = torch.constant.int 1
    %int128_21364 = torch.constant.int 128
    %23759 = torch.prim.ListConstruct %3041, %int32_21360, %int2_21361, %int16_21362, %int1_21363, %int128_21364 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23760 = torch.aten.view %21909, %23759 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23760, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21365 = torch.constant.int 32
    %23761 = torch.aten.mul.int %3041, %int32_21365 : !torch.int, !torch.int -> !torch.int
    %int2_21366 = torch.constant.int 2
    %23762 = torch.aten.mul.int %23761, %int2_21366 : !torch.int, !torch.int -> !torch.int
    %int16_21367 = torch.constant.int 16
    %int1_21368 = torch.constant.int 1
    %int128_21369 = torch.constant.int 128
    %23763 = torch.prim.ListConstruct %23762, %int16_21367, %int1_21368, %int128_21369 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23764 = torch.aten.view %23760, %23763 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23764, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23765 = torch.prim.ListConstruct %23668 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21370 = torch.constant.bool false
    %23766 = torch.aten.index_put %23764, %23765, %23684, %false_21370 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23766, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21371 = torch.constant.int 32
    %int2_21372 = torch.constant.int 2
    %int16_21373 = torch.constant.int 16
    %int1_21374 = torch.constant.int 1
    %int128_21375 = torch.constant.int 128
    %23767 = torch.prim.ListConstruct %3041, %int32_21371, %int2_21372, %int16_21373, %int1_21374, %int128_21375 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23768 = torch.aten.view %23766, %23767 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23768, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21376 = torch.constant.int 131072
    %23769 = torch.prim.ListConstruct %3041, %int131072_21376 : (!torch.int, !torch.int) -> !torch.list<int>
    %23770 = torch.aten.view %23768, %23769 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23770, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_21377 = torch.constant.int 32
    %int2_21378 = torch.constant.int 2
    %int16_21379 = torch.constant.int 16
    %int1_21380 = torch.constant.int 1
    %int128_21381 = torch.constant.int 128
    %23771 = torch.prim.ListConstruct %3044, %int32_21377, %int2_21378, %int16_21379, %int1_21380, %int128_21381 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23772 = torch.aten.view %21921, %23771 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23772, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_21382 = torch.constant.int 32
    %23773 = torch.aten.mul.int %3044, %int32_21382 : !torch.int, !torch.int -> !torch.int
    %int2_21383 = torch.constant.int 2
    %23774 = torch.aten.mul.int %23773, %int2_21383 : !torch.int, !torch.int -> !torch.int
    %int16_21384 = torch.constant.int 16
    %int1_21385 = torch.constant.int 1
    %int128_21386 = torch.constant.int 128
    %23775 = torch.prim.ListConstruct %23774, %int16_21384, %int1_21385, %int128_21386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23776 = torch.aten.view %23772, %23775 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23776, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %23777 = torch.prim.ListConstruct %23670 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_21387 = torch.constant.bool false
    %23778 = torch.aten.index_put %23776, %23777, %23686, %false_21387 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %23778, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_21388 = torch.constant.int 32
    %int2_21389 = torch.constant.int 2
    %int16_21390 = torch.constant.int 16
    %int1_21391 = torch.constant.int 1
    %int128_21392 = torch.constant.int 128
    %23779 = torch.prim.ListConstruct %3044, %int32_21388, %int2_21389, %int16_21390, %int1_21391, %int128_21392 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23780 = torch.aten.view %23778, %23779 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %23780, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_21393 = torch.constant.int 131072
    %23781 = torch.prim.ListConstruct %3044, %int131072_21393 : (!torch.int, !torch.int) -> !torch.list<int>
    %23782 = torch.aten.view %23780, %23781 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %23782, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_21394 = torch.constant.int -2
    %23783 = torch.aten.unsqueeze %23397, %int-2_21394 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21395 = torch.constant.int -2
    %23784 = torch.aten.unsqueeze %23412, %int-2_21395 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21396 = torch.constant.int -2
    %23785 = torch.aten.unsqueeze %23427, %int-2_21396 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21397 = torch.constant.int -2
    %23786 = torch.aten.unsqueeze %23442, %int-2_21397 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21398 = torch.constant.int -2
    %23787 = torch.aten.unsqueeze %23457, %int-2_21398 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21399 = torch.constant.int -2
    %23788 = torch.aten.unsqueeze %23472, %int-2_21399 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21400 = torch.constant.int -2
    %23789 = torch.aten.unsqueeze %23487, %int-2_21400 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21401 = torch.constant.int -2
    %23790 = torch.aten.unsqueeze %23502, %int-2_21401 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_21402 = torch.constant.int 4
    %int1_21403 = torch.constant.int 1
    %int4_21404 = torch.constant.int 4
    %int128_21405 = torch.constant.int 128
    %23791 = torch.prim.ListConstruct %int4_21402, %23383, %int1_21403, %int4_21404, %int128_21405 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21406 = torch.constant.bool false
    %23792 = torch.aten.expand %23783, %23791, %false_21406 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21407 = torch.constant.int 4
    %int1_21408 = torch.constant.int 1
    %int4_21409 = torch.constant.int 4
    %int128_21410 = torch.constant.int 128
    %23793 = torch.prim.ListConstruct %int4_21407, %23383, %int1_21408, %int4_21409, %int128_21410 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21411 = torch.constant.bool false
    %23794 = torch.aten.expand %23784, %23793, %false_21411 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21412 = torch.constant.int 4
    %int1_21413 = torch.constant.int 1
    %int4_21414 = torch.constant.int 4
    %int128_21415 = torch.constant.int 128
    %23795 = torch.prim.ListConstruct %int4_21412, %23383, %int1_21413, %int4_21414, %int128_21415 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21416 = torch.constant.bool false
    %23796 = torch.aten.expand %23785, %23795, %false_21416 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21417 = torch.constant.int 4
    %int1_21418 = torch.constant.int 1
    %int4_21419 = torch.constant.int 4
    %int128_21420 = torch.constant.int 128
    %23797 = torch.prim.ListConstruct %int4_21417, %23383, %int1_21418, %int4_21419, %int128_21420 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21421 = torch.constant.bool false
    %23798 = torch.aten.expand %23786, %23797, %false_21421 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21422 = torch.constant.int 4
    %int1_21423 = torch.constant.int 1
    %int4_21424 = torch.constant.int 4
    %int128_21425 = torch.constant.int 128
    %23799 = torch.prim.ListConstruct %int4_21422, %23383, %int1_21423, %int4_21424, %int128_21425 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21426 = torch.constant.bool false
    %23800 = torch.aten.expand %23787, %23799, %false_21426 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21427 = torch.constant.int 4
    %int1_21428 = torch.constant.int 1
    %int4_21429 = torch.constant.int 4
    %int128_21430 = torch.constant.int 128
    %23801 = torch.prim.ListConstruct %int4_21427, %23383, %int1_21428, %int4_21429, %int128_21430 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21431 = torch.constant.bool false
    %23802 = torch.aten.expand %23788, %23801, %false_21431 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21432 = torch.constant.int 4
    %int1_21433 = torch.constant.int 1
    %int4_21434 = torch.constant.int 4
    %int128_21435 = torch.constant.int 128
    %23803 = torch.prim.ListConstruct %int4_21432, %23383, %int1_21433, %int4_21434, %int128_21435 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21436 = torch.constant.bool false
    %23804 = torch.aten.expand %23789, %23803, %false_21436 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21437 = torch.constant.int 4
    %int1_21438 = torch.constant.int 1
    %int4_21439 = torch.constant.int 4
    %int128_21440 = torch.constant.int 128
    %23805 = torch.prim.ListConstruct %int4_21437, %23383, %int1_21438, %int4_21439, %int128_21440 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21441 = torch.constant.bool false
    %23806 = torch.aten.expand %23790, %23805, %false_21441 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21442 = torch.constant.int 4
    %int4_21443 = torch.constant.int 4
    %int128_21444 = torch.constant.int 128
    %23807 = torch.prim.ListConstruct %int4_21442, %23383, %int4_21443, %int128_21444 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23808 = torch.aten.view %23792, %23807 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21445 = torch.constant.int 4
    %int4_21446 = torch.constant.int 4
    %int128_21447 = torch.constant.int 128
    %23809 = torch.prim.ListConstruct %int4_21445, %23383, %int4_21446, %int128_21447 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23810 = torch.aten.view %23794, %23809 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21448 = torch.constant.int 4
    %int4_21449 = torch.constant.int 4
    %int128_21450 = torch.constant.int 128
    %23811 = torch.prim.ListConstruct %int4_21448, %23383, %int4_21449, %int128_21450 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23812 = torch.aten.view %23796, %23811 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21451 = torch.constant.int 4
    %int4_21452 = torch.constant.int 4
    %int128_21453 = torch.constant.int 128
    %23813 = torch.prim.ListConstruct %int4_21451, %23383, %int4_21452, %int128_21453 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23814 = torch.aten.view %23798, %23813 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21454 = torch.constant.int 4
    %int4_21455 = torch.constant.int 4
    %int128_21456 = torch.constant.int 128
    %23815 = torch.prim.ListConstruct %int4_21454, %23383, %int4_21455, %int128_21456 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23816 = torch.aten.view %23800, %23815 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21457 = torch.constant.int 4
    %int4_21458 = torch.constant.int 4
    %int128_21459 = torch.constant.int 128
    %23817 = torch.prim.ListConstruct %int4_21457, %23383, %int4_21458, %int128_21459 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23818 = torch.aten.view %23802, %23817 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21460 = torch.constant.int 4
    %int4_21461 = torch.constant.int 4
    %int128_21462 = torch.constant.int 128
    %23819 = torch.prim.ListConstruct %int4_21460, %23383, %int4_21461, %int128_21462 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23820 = torch.aten.view %23804, %23819 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21463 = torch.constant.int 4
    %int4_21464 = torch.constant.int 4
    %int128_21465 = torch.constant.int 128
    %23821 = torch.prim.ListConstruct %int4_21463, %23383, %int4_21464, %int128_21465 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23822 = torch.aten.view %23806, %23821 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_21466 = torch.constant.int -2
    %23823 = torch.aten.unsqueeze %23172, %int-2_21466 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21467 = torch.constant.int -2
    %23824 = torch.aten.unsqueeze %23174, %int-2_21467 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21468 = torch.constant.int -2
    %23825 = torch.aten.unsqueeze %23176, %int-2_21468 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21469 = torch.constant.int -2
    %23826 = torch.aten.unsqueeze %23178, %int-2_21469 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21470 = torch.constant.int -2
    %23827 = torch.aten.unsqueeze %23180, %int-2_21470 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21471 = torch.constant.int -2
    %23828 = torch.aten.unsqueeze %23182, %int-2_21471 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21472 = torch.constant.int -2
    %23829 = torch.aten.unsqueeze %23184, %int-2_21472 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_21473 = torch.constant.int -2
    %23830 = torch.aten.unsqueeze %23186, %int-2_21473 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %23830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_21474 = torch.constant.int 1
    %23831 = torch.aten.size.int %23096, %int1_21474 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_21475 = torch.constant.int 4
    %int1_21476 = torch.constant.int 1
    %int4_21477 = torch.constant.int 4
    %int128_21478 = torch.constant.int 128
    %23832 = torch.prim.ListConstruct %int4_21475, %23831, %int1_21476, %int4_21477, %int128_21478 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21479 = torch.constant.bool false
    %23833 = torch.aten.expand %23823, %23832, %false_21479 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21480 = torch.constant.int 4
    %int1_21481 = torch.constant.int 1
    %int4_21482 = torch.constant.int 4
    %int128_21483 = torch.constant.int 128
    %23834 = torch.prim.ListConstruct %int4_21480, %23831, %int1_21481, %int4_21482, %int128_21483 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21484 = torch.constant.bool false
    %23835 = torch.aten.expand %23824, %23834, %false_21484 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21485 = torch.constant.int 4
    %int1_21486 = torch.constant.int 1
    %int4_21487 = torch.constant.int 4
    %int128_21488 = torch.constant.int 128
    %23836 = torch.prim.ListConstruct %int4_21485, %23831, %int1_21486, %int4_21487, %int128_21488 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21489 = torch.constant.bool false
    %23837 = torch.aten.expand %23825, %23836, %false_21489 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21490 = torch.constant.int 4
    %int1_21491 = torch.constant.int 1
    %int4_21492 = torch.constant.int 4
    %int128_21493 = torch.constant.int 128
    %23838 = torch.prim.ListConstruct %int4_21490, %23831, %int1_21491, %int4_21492, %int128_21493 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21494 = torch.constant.bool false
    %23839 = torch.aten.expand %23826, %23838, %false_21494 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21495 = torch.constant.int 4
    %int1_21496 = torch.constant.int 1
    %int4_21497 = torch.constant.int 4
    %int128_21498 = torch.constant.int 128
    %23840 = torch.prim.ListConstruct %int4_21495, %23831, %int1_21496, %int4_21497, %int128_21498 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21499 = torch.constant.bool false
    %23841 = torch.aten.expand %23827, %23840, %false_21499 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21500 = torch.constant.int 4
    %int1_21501 = torch.constant.int 1
    %int4_21502 = torch.constant.int 4
    %int128_21503 = torch.constant.int 128
    %23842 = torch.prim.ListConstruct %int4_21500, %23831, %int1_21501, %int4_21502, %int128_21503 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21504 = torch.constant.bool false
    %23843 = torch.aten.expand %23828, %23842, %false_21504 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21505 = torch.constant.int 4
    %int1_21506 = torch.constant.int 1
    %int4_21507 = torch.constant.int 4
    %int128_21508 = torch.constant.int 128
    %23844 = torch.prim.ListConstruct %int4_21505, %23831, %int1_21506, %int4_21507, %int128_21508 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21509 = torch.constant.bool false
    %23845 = torch.aten.expand %23829, %23844, %false_21509 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21510 = torch.constant.int 4
    %int1_21511 = torch.constant.int 1
    %int4_21512 = torch.constant.int 4
    %int128_21513 = torch.constant.int 128
    %23846 = torch.prim.ListConstruct %int4_21510, %23831, %int1_21511, %int4_21512, %int128_21513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_21514 = torch.constant.bool false
    %23847 = torch.aten.expand %23830, %23846, %false_21514 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %23847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_21515 = torch.constant.int 4
    %int4_21516 = torch.constant.int 4
    %int128_21517 = torch.constant.int 128
    %23848 = torch.prim.ListConstruct %int4_21515, %23831, %int4_21516, %int128_21517 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23849 = torch.aten.view %23833, %23848 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21518 = torch.constant.int 4
    %int4_21519 = torch.constant.int 4
    %int128_21520 = torch.constant.int 128
    %23850 = torch.prim.ListConstruct %int4_21518, %23831, %int4_21519, %int128_21520 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23851 = torch.aten.view %23835, %23850 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21521 = torch.constant.int 4
    %int4_21522 = torch.constant.int 4
    %int128_21523 = torch.constant.int 128
    %23852 = torch.prim.ListConstruct %int4_21521, %23831, %int4_21522, %int128_21523 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23853 = torch.aten.view %23837, %23852 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21524 = torch.constant.int 4
    %int4_21525 = torch.constant.int 4
    %int128_21526 = torch.constant.int 128
    %23854 = torch.prim.ListConstruct %int4_21524, %23831, %int4_21525, %int128_21526 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23855 = torch.aten.view %23839, %23854 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21527 = torch.constant.int 4
    %int4_21528 = torch.constant.int 4
    %int128_21529 = torch.constant.int 128
    %23856 = torch.prim.ListConstruct %int4_21527, %23831, %int4_21528, %int128_21529 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23857 = torch.aten.view %23841, %23856 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21530 = torch.constant.int 4
    %int4_21531 = torch.constant.int 4
    %int128_21532 = torch.constant.int 128
    %23858 = torch.prim.ListConstruct %int4_21530, %23831, %int4_21531, %int128_21532 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23859 = torch.aten.view %23843, %23858 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21533 = torch.constant.int 4
    %int4_21534 = torch.constant.int 4
    %int128_21535 = torch.constant.int 128
    %23860 = torch.prim.ListConstruct %int4_21533, %23831, %int4_21534, %int128_21535 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23861 = torch.aten.view %23845, %23860 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21536 = torch.constant.int 4
    %int4_21537 = torch.constant.int 4
    %int128_21538 = torch.constant.int 128
    %23862 = torch.prim.ListConstruct %int4_21536, %23831, %int4_21537, %int128_21538 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23863 = torch.aten.view %23847, %23862 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21539 = torch.constant.int 1
    %int2_21540 = torch.constant.int 2
    %23864 = torch.aten.transpose.int %23239, %int1_21539, %int2_21540 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23864, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21541 = torch.constant.int 1
    %int2_21542 = torch.constant.int 2
    %23865 = torch.aten.transpose.int %23254, %int1_21541, %int2_21542 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23865, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21543 = torch.constant.int 1
    %int2_21544 = torch.constant.int 2
    %23866 = torch.aten.transpose.int %23269, %int1_21543, %int2_21544 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23866, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21545 = torch.constant.int 1
    %int2_21546 = torch.constant.int 2
    %23867 = torch.aten.transpose.int %23284, %int1_21545, %int2_21546 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23867, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21547 = torch.constant.int 1
    %int2_21548 = torch.constant.int 2
    %23868 = torch.aten.transpose.int %23299, %int1_21547, %int2_21548 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23868, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21549 = torch.constant.int 1
    %int2_21550 = torch.constant.int 2
    %23869 = torch.aten.transpose.int %23314, %int1_21549, %int2_21550 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23869, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21551 = torch.constant.int 1
    %int2_21552 = torch.constant.int 2
    %23870 = torch.aten.transpose.int %23329, %int1_21551, %int2_21552 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23870, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21553 = torch.constant.int 1
    %int2_21554 = torch.constant.int 2
    %23871 = torch.aten.transpose.int %23344, %int1_21553, %int2_21554 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23871, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21555 = torch.constant.int 1
    %int2_21556 = torch.constant.int 2
    %23872 = torch.aten.transpose.int %23808, %int1_21555, %int2_21556 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23872, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21557 = torch.constant.int 1
    %int2_21558 = torch.constant.int 2
    %23873 = torch.aten.transpose.int %23810, %int1_21557, %int2_21558 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23873, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21559 = torch.constant.int 1
    %int2_21560 = torch.constant.int 2
    %23874 = torch.aten.transpose.int %23812, %int1_21559, %int2_21560 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23874, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21561 = torch.constant.int 1
    %int2_21562 = torch.constant.int 2
    %23875 = torch.aten.transpose.int %23814, %int1_21561, %int2_21562 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23875, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21563 = torch.constant.int 1
    %int2_21564 = torch.constant.int 2
    %23876 = torch.aten.transpose.int %23816, %int1_21563, %int2_21564 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23876, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21565 = torch.constant.int 1
    %int2_21566 = torch.constant.int 2
    %23877 = torch.aten.transpose.int %23818, %int1_21565, %int2_21566 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23877, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21567 = torch.constant.int 1
    %int2_21568 = torch.constant.int 2
    %23878 = torch.aten.transpose.int %23820, %int1_21567, %int2_21568 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23878, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21569 = torch.constant.int 1
    %int2_21570 = torch.constant.int 2
    %23879 = torch.aten.transpose.int %23822, %int1_21569, %int2_21570 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23879, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21571 = torch.constant.int 1
    %int2_21572 = torch.constant.int 2
    %23880 = torch.aten.transpose.int %23849, %int1_21571, %int2_21572 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23880, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21573 = torch.constant.int 1
    %int2_21574 = torch.constant.int 2
    %23881 = torch.aten.transpose.int %23851, %int1_21573, %int2_21574 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23881, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21575 = torch.constant.int 1
    %int2_21576 = torch.constant.int 2
    %23882 = torch.aten.transpose.int %23853, %int1_21575, %int2_21576 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23882, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21577 = torch.constant.int 1
    %int2_21578 = torch.constant.int 2
    %23883 = torch.aten.transpose.int %23855, %int1_21577, %int2_21578 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23883, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21579 = torch.constant.int 1
    %int2_21580 = torch.constant.int 2
    %23884 = torch.aten.transpose.int %23857, %int1_21579, %int2_21580 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23884, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21581 = torch.constant.int 1
    %int2_21582 = torch.constant.int 2
    %23885 = torch.aten.transpose.int %23859, %int1_21581, %int2_21582 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23885, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21583 = torch.constant.int 1
    %int2_21584 = torch.constant.int 2
    %23886 = torch.aten.transpose.int %23861, %int1_21583, %int2_21584 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23886, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21585 = torch.constant.int 1
    %int2_21586 = torch.constant.int 2
    %23887 = torch.aten.transpose.int %23863, %int1_21585, %int2_21586 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %23887, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21587 = torch.constant.float 0.000000e+00
    %true_21588 = torch.constant.bool true
    %none_21589 = torch.constant.none
    %none_21590 = torch.constant.none
    %23888:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23864, %23872, %23880, %float0.000000e00_21587, %true_21588, %none_21589, %none_21590) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23888#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21591 = torch.constant.float 0.000000e+00
    %true_21592 = torch.constant.bool true
    %none_21593 = torch.constant.none
    %none_21594 = torch.constant.none
    %23889:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23865, %23873, %23881, %float0.000000e00_21591, %true_21592, %none_21593, %none_21594) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23889#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21595 = torch.constant.float 0.000000e+00
    %true_21596 = torch.constant.bool true
    %none_21597 = torch.constant.none
    %none_21598 = torch.constant.none
    %23890:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23866, %23874, %23882, %float0.000000e00_21595, %true_21596, %none_21597, %none_21598) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23890#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21599 = torch.constant.float 0.000000e+00
    %true_21600 = torch.constant.bool true
    %none_21601 = torch.constant.none
    %none_21602 = torch.constant.none
    %23891:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23867, %23875, %23883, %float0.000000e00_21599, %true_21600, %none_21601, %none_21602) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23891#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21603 = torch.constant.float 0.000000e+00
    %true_21604 = torch.constant.bool true
    %none_21605 = torch.constant.none
    %none_21606 = torch.constant.none
    %23892:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23868, %23876, %23884, %float0.000000e00_21603, %true_21604, %none_21605, %none_21606) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23892#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21607 = torch.constant.float 0.000000e+00
    %true_21608 = torch.constant.bool true
    %none_21609 = torch.constant.none
    %none_21610 = torch.constant.none
    %23893:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23869, %23877, %23885, %float0.000000e00_21607, %true_21608, %none_21609, %none_21610) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23893#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21611 = torch.constant.float 0.000000e+00
    %true_21612 = torch.constant.bool true
    %none_21613 = torch.constant.none
    %none_21614 = torch.constant.none
    %23894:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23870, %23878, %23886, %float0.000000e00_21611, %true_21612, %none_21613, %none_21614) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23894#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_21615 = torch.constant.float 0.000000e+00
    %true_21616 = torch.constant.bool true
    %none_21617 = torch.constant.none
    %none_21618 = torch.constant.none
    %23895:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%23871, %23879, %23887, %float0.000000e00_21615, %true_21616, %none_21617, %none_21618) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %23895#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_21619 = torch.constant.int 1
    %int2_21620 = torch.constant.int 2
    %23896 = torch.aten.transpose.int %23888#0, %int1_21619, %int2_21620 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21621 = torch.constant.int 1
    %int2_21622 = torch.constant.int 2
    %23897 = torch.aten.transpose.int %23889#0, %int1_21621, %int2_21622 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21623 = torch.constant.int 1
    %int2_21624 = torch.constant.int 2
    %23898 = torch.aten.transpose.int %23890#0, %int1_21623, %int2_21624 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21625 = torch.constant.int 1
    %int2_21626 = torch.constant.int 2
    %23899 = torch.aten.transpose.int %23891#0, %int1_21625, %int2_21626 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21627 = torch.constant.int 1
    %int2_21628 = torch.constant.int 2
    %23900 = torch.aten.transpose.int %23892#0, %int1_21627, %int2_21628 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21629 = torch.constant.int 1
    %int2_21630 = torch.constant.int 2
    %23901 = torch.aten.transpose.int %23893#0, %int1_21629, %int2_21630 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21631 = torch.constant.int 1
    %int2_21632 = torch.constant.int 2
    %23902 = torch.aten.transpose.int %23894#0, %int1_21631, %int2_21632 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_21633 = torch.constant.int 1
    %int2_21634 = torch.constant.int 2
    %23903 = torch.aten.transpose.int %23895#0, %int1_21633, %int2_21634 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %23903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_21635 = torch.constant.int 4
    %int512_21636 = torch.constant.int 512
    %23904 = torch.prim.ListConstruct %int4_21635, %23225, %int512_21636 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23905 = torch.aten.view %23896, %23904 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21637 = torch.constant.int 4
    %int512_21638 = torch.constant.int 512
    %23906 = torch.prim.ListConstruct %int4_21637, %23240, %int512_21638 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23907 = torch.aten.view %23897, %23906 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21639 = torch.constant.int 4
    %int512_21640 = torch.constant.int 512
    %23908 = torch.prim.ListConstruct %int4_21639, %23255, %int512_21640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23909 = torch.aten.view %23898, %23908 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21641 = torch.constant.int 4
    %int512_21642 = torch.constant.int 512
    %23910 = torch.prim.ListConstruct %int4_21641, %23270, %int512_21642 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23911 = torch.aten.view %23899, %23910 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21643 = torch.constant.int 4
    %int512_21644 = torch.constant.int 512
    %23912 = torch.prim.ListConstruct %int4_21643, %23285, %int512_21644 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23913 = torch.aten.view %23900, %23912 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21645 = torch.constant.int 4
    %int512_21646 = torch.constant.int 512
    %23914 = torch.prim.ListConstruct %int4_21645, %23300, %int512_21646 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23915 = torch.aten.view %23901, %23914 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21647 = torch.constant.int 4
    %int512_21648 = torch.constant.int 512
    %23916 = torch.prim.ListConstruct %int4_21647, %23315, %int512_21648 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23917 = torch.aten.view %23902, %23916 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_21649 = torch.constant.int 4
    %int512_21650 = torch.constant.int 512
    %23918 = torch.prim.ListConstruct %int4_21649, %23330, %int512_21650 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23919 = torch.aten.view %23903, %23918 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %23919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_21651 = torch.constant.int 1
    %int0_21652 = torch.constant.int 0
    %23920 = torch.prim.ListConstruct %int1_21651, %int0_21652 : (!torch.int, !torch.int) -> !torch.list<int>
    %23921 = torch.aten.permute %832, %23920 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21653 = torch.constant.int 1
    %int0_21654 = torch.constant.int 0
    %23922 = torch.prim.ListConstruct %int1_21653, %int0_21654 : (!torch.int, !torch.int) -> !torch.list<int>
    %23923 = torch.aten.permute %833, %23922 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21655 = torch.constant.int 1
    %int0_21656 = torch.constant.int 0
    %23924 = torch.prim.ListConstruct %int1_21655, %int0_21656 : (!torch.int, !torch.int) -> !torch.list<int>
    %23925 = torch.aten.permute %834, %23924 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21657 = torch.constant.int 1
    %int0_21658 = torch.constant.int 0
    %23926 = torch.prim.ListConstruct %int1_21657, %int0_21658 : (!torch.int, !torch.int) -> !torch.list<int>
    %23927 = torch.aten.permute %835, %23926 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21659 = torch.constant.int 1
    %int0_21660 = torch.constant.int 0
    %23928 = torch.prim.ListConstruct %int1_21659, %int0_21660 : (!torch.int, !torch.int) -> !torch.list<int>
    %23929 = torch.aten.permute %836, %23928 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21661 = torch.constant.int 1
    %int0_21662 = torch.constant.int 0
    %23930 = torch.prim.ListConstruct %int1_21661, %int0_21662 : (!torch.int, !torch.int) -> !torch.list<int>
    %23931 = torch.aten.permute %837, %23930 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21663 = torch.constant.int 1
    %int0_21664 = torch.constant.int 0
    %23932 = torch.prim.ListConstruct %int1_21663, %int0_21664 : (!torch.int, !torch.int) -> !torch.list<int>
    %23933 = torch.aten.permute %838, %23932 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_21665 = torch.constant.int 1
    %int0_21666 = torch.constant.int 0
    %23934 = torch.prim.ListConstruct %int1_21665, %int0_21666 : (!torch.int, !torch.int) -> !torch.list<int>
    %23935 = torch.aten.permute %839, %23934 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_21667 = torch.constant.int 4
    %23936 = torch.aten.mul.int %int4_21667, %23225 : !torch.int, !torch.int -> !torch.int
    %int512_21668 = torch.constant.int 512
    %23937 = torch.prim.ListConstruct %23936, %int512_21668 : (!torch.int, !torch.int) -> !torch.list<int>
    %23938 = torch.aten.view %23905, %23937 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23938, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23939 = torch.aten.mm %23938, %23921 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23939, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21669 = torch.constant.int 4
    %int4096_21670 = torch.constant.int 4096
    %23940 = torch.prim.ListConstruct %int4_21669, %23225, %int4096_21670 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23941 = torch.aten.view %23939, %23940 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21671 = torch.constant.int 4
    %23942 = torch.aten.mul.int %int4_21671, %23240 : !torch.int, !torch.int -> !torch.int
    %int512_21672 = torch.constant.int 512
    %23943 = torch.prim.ListConstruct %23942, %int512_21672 : (!torch.int, !torch.int) -> !torch.list<int>
    %23944 = torch.aten.view %23907, %23943 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23944, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23945 = torch.aten.mm %23944, %23923 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23945, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21673 = torch.constant.int 4
    %int4096_21674 = torch.constant.int 4096
    %23946 = torch.prim.ListConstruct %int4_21673, %23240, %int4096_21674 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23947 = torch.aten.view %23945, %23946 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21675 = torch.constant.int 4
    %23948 = torch.aten.mul.int %int4_21675, %23255 : !torch.int, !torch.int -> !torch.int
    %int512_21676 = torch.constant.int 512
    %23949 = torch.prim.ListConstruct %23948, %int512_21676 : (!torch.int, !torch.int) -> !torch.list<int>
    %23950 = torch.aten.view %23909, %23949 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23950, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23951 = torch.aten.mm %23950, %23925 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23951, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21677 = torch.constant.int 4
    %int4096_21678 = torch.constant.int 4096
    %23952 = torch.prim.ListConstruct %int4_21677, %23255, %int4096_21678 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23953 = torch.aten.view %23951, %23952 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21679 = torch.constant.int 4
    %23954 = torch.aten.mul.int %int4_21679, %23270 : !torch.int, !torch.int -> !torch.int
    %int512_21680 = torch.constant.int 512
    %23955 = torch.prim.ListConstruct %23954, %int512_21680 : (!torch.int, !torch.int) -> !torch.list<int>
    %23956 = torch.aten.view %23911, %23955 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23956, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23957 = torch.aten.mm %23956, %23927 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23957, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21681 = torch.constant.int 4
    %int4096_21682 = torch.constant.int 4096
    %23958 = torch.prim.ListConstruct %int4_21681, %23270, %int4096_21682 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23959 = torch.aten.view %23957, %23958 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21683 = torch.constant.int 4
    %23960 = torch.aten.mul.int %int4_21683, %23285 : !torch.int, !torch.int -> !torch.int
    %int512_21684 = torch.constant.int 512
    %23961 = torch.prim.ListConstruct %23960, %int512_21684 : (!torch.int, !torch.int) -> !torch.list<int>
    %23962 = torch.aten.view %23913, %23961 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23962, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23963 = torch.aten.mm %23962, %23929 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23963, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21685 = torch.constant.int 4
    %int4096_21686 = torch.constant.int 4096
    %23964 = torch.prim.ListConstruct %int4_21685, %23285, %int4096_21686 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23965 = torch.aten.view %23963, %23964 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21687 = torch.constant.int 4
    %23966 = torch.aten.mul.int %int4_21687, %23300 : !torch.int, !torch.int -> !torch.int
    %int512_21688 = torch.constant.int 512
    %23967 = torch.prim.ListConstruct %23966, %int512_21688 : (!torch.int, !torch.int) -> !torch.list<int>
    %23968 = torch.aten.view %23915, %23967 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23968, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23969 = torch.aten.mm %23968, %23931 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23969, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21689 = torch.constant.int 4
    %int4096_21690 = torch.constant.int 4096
    %23970 = torch.prim.ListConstruct %int4_21689, %23300, %int4096_21690 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23971 = torch.aten.view %23969, %23970 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21691 = torch.constant.int 4
    %23972 = torch.aten.mul.int %int4_21691, %23315 : !torch.int, !torch.int -> !torch.int
    %int512_21692 = torch.constant.int 512
    %23973 = torch.prim.ListConstruct %23972, %int512_21692 : (!torch.int, !torch.int) -> !torch.list<int>
    %23974 = torch.aten.view %23917, %23973 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23974, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23975 = torch.aten.mm %23974, %23933 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23975, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21693 = torch.constant.int 4
    %int4096_21694 = torch.constant.int 4096
    %23976 = torch.prim.ListConstruct %int4_21693, %23315, %int4096_21694 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23977 = torch.aten.view %23975, %23976 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_21695 = torch.constant.int 4
    %23978 = torch.aten.mul.int %int4_21695, %23330 : !torch.int, !torch.int -> !torch.int
    %int512_21696 = torch.constant.int 512
    %23979 = torch.prim.ListConstruct %23978, %int512_21696 : (!torch.int, !torch.int) -> !torch.list<int>
    %23980 = torch.aten.view %23919, %23979 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %23980, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %23981 = torch.aten.mm %23980, %23935 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %23981, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_21697 = torch.constant.int 4
    %int4096_21698 = torch.constant.int 4096
    %23982 = torch.prim.ListConstruct %int4_21697, %23330, %int4096_21698 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %23983 = torch.aten.view %23981, %23982 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23984 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21699 = arith.constant 1 : index
    %dim_21700 = tensor.dim %23984, %c1_21699 : tensor<4x?x4096xf16>
    %23985 = flow.tensor.transfer %23984 : tensor<4x?x4096xf16>{%dim_21700} to #hal.device.promise<@__device_0>
    %23986 = torch_c.from_builtin_tensor %23985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23987 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21701 = arith.constant 1 : index
    %dim_21702 = tensor.dim %23987, %c1_21701 : tensor<4x?x4096xf16>
    %23988 = flow.tensor.transfer %23987 : tensor<4x?x4096xf16>{%dim_21702} to #hal.device.promise<@__device_0>
    %23989 = torch_c.from_builtin_tensor %23988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23990 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21703 = arith.constant 1 : index
    %dim_21704 = tensor.dim %23990, %c1_21703 : tensor<4x?x4096xf16>
    %23991 = flow.tensor.transfer %23990 : tensor<4x?x4096xf16>{%dim_21704} to #hal.device.promise<@__device_0>
    %23992 = torch_c.from_builtin_tensor %23991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23993 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21705 = arith.constant 1 : index
    %dim_21706 = tensor.dim %23993, %c1_21705 : tensor<4x?x4096xf16>
    %23994 = flow.tensor.transfer %23993 : tensor<4x?x4096xf16>{%dim_21706} to #hal.device.promise<@__device_0>
    %23995 = torch_c.from_builtin_tensor %23994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23996 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21707 = arith.constant 1 : index
    %dim_21708 = tensor.dim %23996, %c1_21707 : tensor<4x?x4096xf16>
    %23997 = flow.tensor.transfer %23996 : tensor<4x?x4096xf16>{%dim_21708} to #hal.device.promise<@__device_0>
    %23998 = torch_c.from_builtin_tensor %23997 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %23998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %23999 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21709 = arith.constant 1 : index
    %dim_21710 = tensor.dim %23999, %c1_21709 : tensor<4x?x4096xf16>
    %24000 = flow.tensor.transfer %23999 : tensor<4x?x4096xf16>{%dim_21710} to #hal.device.promise<@__device_0>
    %24001 = torch_c.from_builtin_tensor %24000 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24002 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21711 = arith.constant 1 : index
    %dim_21712 = tensor.dim %24002, %c1_21711 : tensor<4x?x4096xf16>
    %24003 = flow.tensor.transfer %24002 : tensor<4x?x4096xf16>{%dim_21712} to #hal.device.promise<@__device_0>
    %24004 = torch_c.from_builtin_tensor %24003 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21713 = torch.constant.int 1
    %24005 = torch.aten.add.Tensor %23941, %23986, %int1_21713 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21714 = torch.constant.int 1
    %24006 = torch.aten.add.Tensor %24005, %23989, %int1_21714 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21715 = torch.constant.int 1
    %24007 = torch.aten.add.Tensor %24006, %23992, %int1_21715 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21716 = torch.constant.int 1
    %24008 = torch.aten.add.Tensor %24007, %23995, %int1_21716 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21717 = torch.constant.int 1
    %24009 = torch.aten.add.Tensor %24008, %23998, %int1_21717 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21718 = torch.constant.int 1
    %24010 = torch.aten.add.Tensor %24009, %24001, %int1_21718 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21719 = torch.constant.int 1
    %24011 = torch.aten.add.Tensor %24010, %24004, %int1_21719 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24012 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21720 = arith.constant 1 : index
    %dim_21721 = tensor.dim %24012, %c1_21720 : tensor<4x?x4096xf16>
    %24013 = flow.tensor.transfer %24012 : tensor<4x?x4096xf16>{%dim_21721} to #hal.device.promise<@__device_1>
    %24014 = torch_c.from_builtin_tensor %24013 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24015 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21722 = arith.constant 1 : index
    %dim_21723 = tensor.dim %24015, %c1_21722 : tensor<4x?x4096xf16>
    %24016 = flow.tensor.transfer %24015 : tensor<4x?x4096xf16>{%dim_21723} to #hal.device.promise<@__device_1>
    %24017 = torch_c.from_builtin_tensor %24016 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24018 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21724 = arith.constant 1 : index
    %dim_21725 = tensor.dim %24018, %c1_21724 : tensor<4x?x4096xf16>
    %24019 = flow.tensor.transfer %24018 : tensor<4x?x4096xf16>{%dim_21725} to #hal.device.promise<@__device_1>
    %24020 = torch_c.from_builtin_tensor %24019 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24021 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21726 = arith.constant 1 : index
    %dim_21727 = tensor.dim %24021, %c1_21726 : tensor<4x?x4096xf16>
    %24022 = flow.tensor.transfer %24021 : tensor<4x?x4096xf16>{%dim_21727} to #hal.device.promise<@__device_1>
    %24023 = torch_c.from_builtin_tensor %24022 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24024 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21728 = arith.constant 1 : index
    %dim_21729 = tensor.dim %24024, %c1_21728 : tensor<4x?x4096xf16>
    %24025 = flow.tensor.transfer %24024 : tensor<4x?x4096xf16>{%dim_21729} to #hal.device.promise<@__device_1>
    %24026 = torch_c.from_builtin_tensor %24025 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24027 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21730 = arith.constant 1 : index
    %dim_21731 = tensor.dim %24027, %c1_21730 : tensor<4x?x4096xf16>
    %24028 = flow.tensor.transfer %24027 : tensor<4x?x4096xf16>{%dim_21731} to #hal.device.promise<@__device_1>
    %24029 = torch_c.from_builtin_tensor %24028 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24030 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21732 = arith.constant 1 : index
    %dim_21733 = tensor.dim %24030, %c1_21732 : tensor<4x?x4096xf16>
    %24031 = flow.tensor.transfer %24030 : tensor<4x?x4096xf16>{%dim_21733} to #hal.device.promise<@__device_1>
    %24032 = torch_c.from_builtin_tensor %24031 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21734 = torch.constant.int 1
    %24033 = torch.aten.add.Tensor %24014, %23947, %int1_21734 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21735 = torch.constant.int 1
    %24034 = torch.aten.add.Tensor %24033, %24017, %int1_21735 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21736 = torch.constant.int 1
    %24035 = torch.aten.add.Tensor %24034, %24020, %int1_21736 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21737 = torch.constant.int 1
    %24036 = torch.aten.add.Tensor %24035, %24023, %int1_21737 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21738 = torch.constant.int 1
    %24037 = torch.aten.add.Tensor %24036, %24026, %int1_21738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21739 = torch.constant.int 1
    %24038 = torch.aten.add.Tensor %24037, %24029, %int1_21739 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21740 = torch.constant.int 1
    %24039 = torch.aten.add.Tensor %24038, %24032, %int1_21740 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24040 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21741 = arith.constant 1 : index
    %dim_21742 = tensor.dim %24040, %c1_21741 : tensor<4x?x4096xf16>
    %24041 = flow.tensor.transfer %24040 : tensor<4x?x4096xf16>{%dim_21742} to #hal.device.promise<@__device_2>
    %24042 = torch_c.from_builtin_tensor %24041 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24043 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21743 = arith.constant 1 : index
    %dim_21744 = tensor.dim %24043, %c1_21743 : tensor<4x?x4096xf16>
    %24044 = flow.tensor.transfer %24043 : tensor<4x?x4096xf16>{%dim_21744} to #hal.device.promise<@__device_2>
    %24045 = torch_c.from_builtin_tensor %24044 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24046 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21745 = arith.constant 1 : index
    %dim_21746 = tensor.dim %24046, %c1_21745 : tensor<4x?x4096xf16>
    %24047 = flow.tensor.transfer %24046 : tensor<4x?x4096xf16>{%dim_21746} to #hal.device.promise<@__device_2>
    %24048 = torch_c.from_builtin_tensor %24047 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24049 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21747 = arith.constant 1 : index
    %dim_21748 = tensor.dim %24049, %c1_21747 : tensor<4x?x4096xf16>
    %24050 = flow.tensor.transfer %24049 : tensor<4x?x4096xf16>{%dim_21748} to #hal.device.promise<@__device_2>
    %24051 = torch_c.from_builtin_tensor %24050 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24052 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21749 = arith.constant 1 : index
    %dim_21750 = tensor.dim %24052, %c1_21749 : tensor<4x?x4096xf16>
    %24053 = flow.tensor.transfer %24052 : tensor<4x?x4096xf16>{%dim_21750} to #hal.device.promise<@__device_2>
    %24054 = torch_c.from_builtin_tensor %24053 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24055 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21751 = arith.constant 1 : index
    %dim_21752 = tensor.dim %24055, %c1_21751 : tensor<4x?x4096xf16>
    %24056 = flow.tensor.transfer %24055 : tensor<4x?x4096xf16>{%dim_21752} to #hal.device.promise<@__device_2>
    %24057 = torch_c.from_builtin_tensor %24056 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24058 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21753 = arith.constant 1 : index
    %dim_21754 = tensor.dim %24058, %c1_21753 : tensor<4x?x4096xf16>
    %24059 = flow.tensor.transfer %24058 : tensor<4x?x4096xf16>{%dim_21754} to #hal.device.promise<@__device_2>
    %24060 = torch_c.from_builtin_tensor %24059 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21755 = torch.constant.int 1
    %24061 = torch.aten.add.Tensor %24042, %24045, %int1_21755 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21756 = torch.constant.int 1
    %24062 = torch.aten.add.Tensor %24061, %23953, %int1_21756 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21757 = torch.constant.int 1
    %24063 = torch.aten.add.Tensor %24062, %24048, %int1_21757 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21758 = torch.constant.int 1
    %24064 = torch.aten.add.Tensor %24063, %24051, %int1_21758 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21759 = torch.constant.int 1
    %24065 = torch.aten.add.Tensor %24064, %24054, %int1_21759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21760 = torch.constant.int 1
    %24066 = torch.aten.add.Tensor %24065, %24057, %int1_21760 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21761 = torch.constant.int 1
    %24067 = torch.aten.add.Tensor %24066, %24060, %int1_21761 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24068 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21762 = arith.constant 1 : index
    %dim_21763 = tensor.dim %24068, %c1_21762 : tensor<4x?x4096xf16>
    %24069 = flow.tensor.transfer %24068 : tensor<4x?x4096xf16>{%dim_21763} to #hal.device.promise<@__device_3>
    %24070 = torch_c.from_builtin_tensor %24069 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24071 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21764 = arith.constant 1 : index
    %dim_21765 = tensor.dim %24071, %c1_21764 : tensor<4x?x4096xf16>
    %24072 = flow.tensor.transfer %24071 : tensor<4x?x4096xf16>{%dim_21765} to #hal.device.promise<@__device_3>
    %24073 = torch_c.from_builtin_tensor %24072 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24074 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21766 = arith.constant 1 : index
    %dim_21767 = tensor.dim %24074, %c1_21766 : tensor<4x?x4096xf16>
    %24075 = flow.tensor.transfer %24074 : tensor<4x?x4096xf16>{%dim_21767} to #hal.device.promise<@__device_3>
    %24076 = torch_c.from_builtin_tensor %24075 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24077 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21768 = arith.constant 1 : index
    %dim_21769 = tensor.dim %24077, %c1_21768 : tensor<4x?x4096xf16>
    %24078 = flow.tensor.transfer %24077 : tensor<4x?x4096xf16>{%dim_21769} to #hal.device.promise<@__device_3>
    %24079 = torch_c.from_builtin_tensor %24078 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24080 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21770 = arith.constant 1 : index
    %dim_21771 = tensor.dim %24080, %c1_21770 : tensor<4x?x4096xf16>
    %24081 = flow.tensor.transfer %24080 : tensor<4x?x4096xf16>{%dim_21771} to #hal.device.promise<@__device_3>
    %24082 = torch_c.from_builtin_tensor %24081 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24083 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21772 = arith.constant 1 : index
    %dim_21773 = tensor.dim %24083, %c1_21772 : tensor<4x?x4096xf16>
    %24084 = flow.tensor.transfer %24083 : tensor<4x?x4096xf16>{%dim_21773} to #hal.device.promise<@__device_3>
    %24085 = torch_c.from_builtin_tensor %24084 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24086 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21774 = arith.constant 1 : index
    %dim_21775 = tensor.dim %24086, %c1_21774 : tensor<4x?x4096xf16>
    %24087 = flow.tensor.transfer %24086 : tensor<4x?x4096xf16>{%dim_21775} to #hal.device.promise<@__device_3>
    %24088 = torch_c.from_builtin_tensor %24087 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21776 = torch.constant.int 1
    %24089 = torch.aten.add.Tensor %24070, %24073, %int1_21776 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21777 = torch.constant.int 1
    %24090 = torch.aten.add.Tensor %24089, %24076, %int1_21777 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21778 = torch.constant.int 1
    %24091 = torch.aten.add.Tensor %24090, %23959, %int1_21778 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21779 = torch.constant.int 1
    %24092 = torch.aten.add.Tensor %24091, %24079, %int1_21779 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21780 = torch.constant.int 1
    %24093 = torch.aten.add.Tensor %24092, %24082, %int1_21780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21781 = torch.constant.int 1
    %24094 = torch.aten.add.Tensor %24093, %24085, %int1_21781 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21782 = torch.constant.int 1
    %24095 = torch.aten.add.Tensor %24094, %24088, %int1_21782 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24096 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21783 = arith.constant 1 : index
    %dim_21784 = tensor.dim %24096, %c1_21783 : tensor<4x?x4096xf16>
    %24097 = flow.tensor.transfer %24096 : tensor<4x?x4096xf16>{%dim_21784} to #hal.device.promise<@__device_4>
    %24098 = torch_c.from_builtin_tensor %24097 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24099 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21785 = arith.constant 1 : index
    %dim_21786 = tensor.dim %24099, %c1_21785 : tensor<4x?x4096xf16>
    %24100 = flow.tensor.transfer %24099 : tensor<4x?x4096xf16>{%dim_21786} to #hal.device.promise<@__device_4>
    %24101 = torch_c.from_builtin_tensor %24100 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24102 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21787 = arith.constant 1 : index
    %dim_21788 = tensor.dim %24102, %c1_21787 : tensor<4x?x4096xf16>
    %24103 = flow.tensor.transfer %24102 : tensor<4x?x4096xf16>{%dim_21788} to #hal.device.promise<@__device_4>
    %24104 = torch_c.from_builtin_tensor %24103 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24105 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21789 = arith.constant 1 : index
    %dim_21790 = tensor.dim %24105, %c1_21789 : tensor<4x?x4096xf16>
    %24106 = flow.tensor.transfer %24105 : tensor<4x?x4096xf16>{%dim_21790} to #hal.device.promise<@__device_4>
    %24107 = torch_c.from_builtin_tensor %24106 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24108 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21791 = arith.constant 1 : index
    %dim_21792 = tensor.dim %24108, %c1_21791 : tensor<4x?x4096xf16>
    %24109 = flow.tensor.transfer %24108 : tensor<4x?x4096xf16>{%dim_21792} to #hal.device.promise<@__device_4>
    %24110 = torch_c.from_builtin_tensor %24109 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24111 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21793 = arith.constant 1 : index
    %dim_21794 = tensor.dim %24111, %c1_21793 : tensor<4x?x4096xf16>
    %24112 = flow.tensor.transfer %24111 : tensor<4x?x4096xf16>{%dim_21794} to #hal.device.promise<@__device_4>
    %24113 = torch_c.from_builtin_tensor %24112 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24114 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21795 = arith.constant 1 : index
    %dim_21796 = tensor.dim %24114, %c1_21795 : tensor<4x?x4096xf16>
    %24115 = flow.tensor.transfer %24114 : tensor<4x?x4096xf16>{%dim_21796} to #hal.device.promise<@__device_4>
    %24116 = torch_c.from_builtin_tensor %24115 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21797 = torch.constant.int 1
    %24117 = torch.aten.add.Tensor %24098, %24101, %int1_21797 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21798 = torch.constant.int 1
    %24118 = torch.aten.add.Tensor %24117, %24104, %int1_21798 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21799 = torch.constant.int 1
    %24119 = torch.aten.add.Tensor %24118, %24107, %int1_21799 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21800 = torch.constant.int 1
    %24120 = torch.aten.add.Tensor %24119, %23965, %int1_21800 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21801 = torch.constant.int 1
    %24121 = torch.aten.add.Tensor %24120, %24110, %int1_21801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21802 = torch.constant.int 1
    %24122 = torch.aten.add.Tensor %24121, %24113, %int1_21802 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21803 = torch.constant.int 1
    %24123 = torch.aten.add.Tensor %24122, %24116, %int1_21803 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24124 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21804 = arith.constant 1 : index
    %dim_21805 = tensor.dim %24124, %c1_21804 : tensor<4x?x4096xf16>
    %24125 = flow.tensor.transfer %24124 : tensor<4x?x4096xf16>{%dim_21805} to #hal.device.promise<@__device_5>
    %24126 = torch_c.from_builtin_tensor %24125 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24127 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21806 = arith.constant 1 : index
    %dim_21807 = tensor.dim %24127, %c1_21806 : tensor<4x?x4096xf16>
    %24128 = flow.tensor.transfer %24127 : tensor<4x?x4096xf16>{%dim_21807} to #hal.device.promise<@__device_5>
    %24129 = torch_c.from_builtin_tensor %24128 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24130 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21808 = arith.constant 1 : index
    %dim_21809 = tensor.dim %24130, %c1_21808 : tensor<4x?x4096xf16>
    %24131 = flow.tensor.transfer %24130 : tensor<4x?x4096xf16>{%dim_21809} to #hal.device.promise<@__device_5>
    %24132 = torch_c.from_builtin_tensor %24131 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24133 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21810 = arith.constant 1 : index
    %dim_21811 = tensor.dim %24133, %c1_21810 : tensor<4x?x4096xf16>
    %24134 = flow.tensor.transfer %24133 : tensor<4x?x4096xf16>{%dim_21811} to #hal.device.promise<@__device_5>
    %24135 = torch_c.from_builtin_tensor %24134 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24136 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21812 = arith.constant 1 : index
    %dim_21813 = tensor.dim %24136, %c1_21812 : tensor<4x?x4096xf16>
    %24137 = flow.tensor.transfer %24136 : tensor<4x?x4096xf16>{%dim_21813} to #hal.device.promise<@__device_5>
    %24138 = torch_c.from_builtin_tensor %24137 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24139 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21814 = arith.constant 1 : index
    %dim_21815 = tensor.dim %24139, %c1_21814 : tensor<4x?x4096xf16>
    %24140 = flow.tensor.transfer %24139 : tensor<4x?x4096xf16>{%dim_21815} to #hal.device.promise<@__device_5>
    %24141 = torch_c.from_builtin_tensor %24140 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24142 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21816 = arith.constant 1 : index
    %dim_21817 = tensor.dim %24142, %c1_21816 : tensor<4x?x4096xf16>
    %24143 = flow.tensor.transfer %24142 : tensor<4x?x4096xf16>{%dim_21817} to #hal.device.promise<@__device_5>
    %24144 = torch_c.from_builtin_tensor %24143 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21818 = torch.constant.int 1
    %24145 = torch.aten.add.Tensor %24126, %24129, %int1_21818 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21819 = torch.constant.int 1
    %24146 = torch.aten.add.Tensor %24145, %24132, %int1_21819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21820 = torch.constant.int 1
    %24147 = torch.aten.add.Tensor %24146, %24135, %int1_21820 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21821 = torch.constant.int 1
    %24148 = torch.aten.add.Tensor %24147, %24138, %int1_21821 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21822 = torch.constant.int 1
    %24149 = torch.aten.add.Tensor %24148, %23971, %int1_21822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21823 = torch.constant.int 1
    %24150 = torch.aten.add.Tensor %24149, %24141, %int1_21823 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21824 = torch.constant.int 1
    %24151 = torch.aten.add.Tensor %24150, %24144, %int1_21824 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24152 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21825 = arith.constant 1 : index
    %dim_21826 = tensor.dim %24152, %c1_21825 : tensor<4x?x4096xf16>
    %24153 = flow.tensor.transfer %24152 : tensor<4x?x4096xf16>{%dim_21826} to #hal.device.promise<@__device_6>
    %24154 = torch_c.from_builtin_tensor %24153 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24155 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21827 = arith.constant 1 : index
    %dim_21828 = tensor.dim %24155, %c1_21827 : tensor<4x?x4096xf16>
    %24156 = flow.tensor.transfer %24155 : tensor<4x?x4096xf16>{%dim_21828} to #hal.device.promise<@__device_6>
    %24157 = torch_c.from_builtin_tensor %24156 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24158 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21829 = arith.constant 1 : index
    %dim_21830 = tensor.dim %24158, %c1_21829 : tensor<4x?x4096xf16>
    %24159 = flow.tensor.transfer %24158 : tensor<4x?x4096xf16>{%dim_21830} to #hal.device.promise<@__device_6>
    %24160 = torch_c.from_builtin_tensor %24159 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24161 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21831 = arith.constant 1 : index
    %dim_21832 = tensor.dim %24161, %c1_21831 : tensor<4x?x4096xf16>
    %24162 = flow.tensor.transfer %24161 : tensor<4x?x4096xf16>{%dim_21832} to #hal.device.promise<@__device_6>
    %24163 = torch_c.from_builtin_tensor %24162 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24164 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21833 = arith.constant 1 : index
    %dim_21834 = tensor.dim %24164, %c1_21833 : tensor<4x?x4096xf16>
    %24165 = flow.tensor.transfer %24164 : tensor<4x?x4096xf16>{%dim_21834} to #hal.device.promise<@__device_6>
    %24166 = torch_c.from_builtin_tensor %24165 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24167 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21835 = arith.constant 1 : index
    %dim_21836 = tensor.dim %24167, %c1_21835 : tensor<4x?x4096xf16>
    %24168 = flow.tensor.transfer %24167 : tensor<4x?x4096xf16>{%dim_21836} to #hal.device.promise<@__device_6>
    %24169 = torch_c.from_builtin_tensor %24168 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24170 = torch_c.to_builtin_tensor %23983 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21837 = arith.constant 1 : index
    %dim_21838 = tensor.dim %24170, %c1_21837 : tensor<4x?x4096xf16>
    %24171 = flow.tensor.transfer %24170 : tensor<4x?x4096xf16>{%dim_21838} to #hal.device.promise<@__device_6>
    %24172 = torch_c.from_builtin_tensor %24171 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21839 = torch.constant.int 1
    %24173 = torch.aten.add.Tensor %24154, %24157, %int1_21839 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21840 = torch.constant.int 1
    %24174 = torch.aten.add.Tensor %24173, %24160, %int1_21840 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21841 = torch.constant.int 1
    %24175 = torch.aten.add.Tensor %24174, %24163, %int1_21841 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21842 = torch.constant.int 1
    %24176 = torch.aten.add.Tensor %24175, %24166, %int1_21842 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21843 = torch.constant.int 1
    %24177 = torch.aten.add.Tensor %24176, %24169, %int1_21843 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21844 = torch.constant.int 1
    %24178 = torch.aten.add.Tensor %24177, %23977, %int1_21844 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21845 = torch.constant.int 1
    %24179 = torch.aten.add.Tensor %24178, %24172, %int1_21845 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24180 = torch_c.to_builtin_tensor %23941 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21846 = arith.constant 1 : index
    %dim_21847 = tensor.dim %24180, %c1_21846 : tensor<4x?x4096xf16>
    %24181 = flow.tensor.transfer %24180 : tensor<4x?x4096xf16>{%dim_21847} to #hal.device.promise<@__device_7>
    %24182 = torch_c.from_builtin_tensor %24181 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24183 = torch_c.to_builtin_tensor %23947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21848 = arith.constant 1 : index
    %dim_21849 = tensor.dim %24183, %c1_21848 : tensor<4x?x4096xf16>
    %24184 = flow.tensor.transfer %24183 : tensor<4x?x4096xf16>{%dim_21849} to #hal.device.promise<@__device_7>
    %24185 = torch_c.from_builtin_tensor %24184 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24186 = torch_c.to_builtin_tensor %23953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21850 = arith.constant 1 : index
    %dim_21851 = tensor.dim %24186, %c1_21850 : tensor<4x?x4096xf16>
    %24187 = flow.tensor.transfer %24186 : tensor<4x?x4096xf16>{%dim_21851} to #hal.device.promise<@__device_7>
    %24188 = torch_c.from_builtin_tensor %24187 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24189 = torch_c.to_builtin_tensor %23959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21852 = arith.constant 1 : index
    %dim_21853 = tensor.dim %24189, %c1_21852 : tensor<4x?x4096xf16>
    %24190 = flow.tensor.transfer %24189 : tensor<4x?x4096xf16>{%dim_21853} to #hal.device.promise<@__device_7>
    %24191 = torch_c.from_builtin_tensor %24190 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24192 = torch_c.to_builtin_tensor %23965 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21854 = arith.constant 1 : index
    %dim_21855 = tensor.dim %24192, %c1_21854 : tensor<4x?x4096xf16>
    %24193 = flow.tensor.transfer %24192 : tensor<4x?x4096xf16>{%dim_21855} to #hal.device.promise<@__device_7>
    %24194 = torch_c.from_builtin_tensor %24193 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24195 = torch_c.to_builtin_tensor %23971 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21856 = arith.constant 1 : index
    %dim_21857 = tensor.dim %24195, %c1_21856 : tensor<4x?x4096xf16>
    %24196 = flow.tensor.transfer %24195 : tensor<4x?x4096xf16>{%dim_21857} to #hal.device.promise<@__device_7>
    %24197 = torch_c.from_builtin_tensor %24196 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24198 = torch_c.to_builtin_tensor %23977 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_21858 = arith.constant 1 : index
    %dim_21859 = tensor.dim %24198, %c1_21858 : tensor<4x?x4096xf16>
    %24199 = flow.tensor.transfer %24198 : tensor<4x?x4096xf16>{%dim_21859} to #hal.device.promise<@__device_7>
    %24200 = torch_c.from_builtin_tensor %24199 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21860 = torch.constant.int 1
    %24201 = torch.aten.add.Tensor %24182, %24185, %int1_21860 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21861 = torch.constant.int 1
    %24202 = torch.aten.add.Tensor %24201, %24188, %int1_21861 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21862 = torch.constant.int 1
    %24203 = torch.aten.add.Tensor %24202, %24191, %int1_21862 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21863 = torch.constant.int 1
    %24204 = torch.aten.add.Tensor %24203, %24194, %int1_21863 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21864 = torch.constant.int 1
    %24205 = torch.aten.add.Tensor %24204, %24197, %int1_21864 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21865 = torch.constant.int 1
    %24206 = torch.aten.add.Tensor %24205, %24200, %int1_21865 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21866 = torch.constant.int 1
    %24207 = torch.aten.add.Tensor %24206, %23983, %int1_21866 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21867 = torch.constant.int 1
    %24208 = torch.aten.add.Tensor %22867, %24011, %int1_21867 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21868 = torch.constant.int 1
    %24209 = torch.aten.add.Tensor %22868, %24039, %int1_21868 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21869 = torch.constant.int 1
    %24210 = torch.aten.add.Tensor %22869, %24067, %int1_21869 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21870 = torch.constant.int 1
    %24211 = torch.aten.add.Tensor %22870, %24095, %int1_21870 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21871 = torch.constant.int 1
    %24212 = torch.aten.add.Tensor %22871, %24123, %int1_21871 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21872 = torch.constant.int 1
    %24213 = torch.aten.add.Tensor %22872, %24151, %int1_21872 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21873 = torch.constant.int 1
    %24214 = torch.aten.add.Tensor %22873, %24179, %int1_21873 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21874 = torch.constant.int 1
    %24215 = torch.aten.add.Tensor %22874, %24207, %int1_21874 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_21875 = torch.constant.int 6
    %24216 = torch.prims.convert_element_type %24208, %int6_21875 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21876 = torch.constant.int 6
    %24217 = torch.prims.convert_element_type %24209, %int6_21876 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21877 = torch.constant.int 6
    %24218 = torch.prims.convert_element_type %24210, %int6_21877 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21878 = torch.constant.int 6
    %24219 = torch.prims.convert_element_type %24211, %int6_21878 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21879 = torch.constant.int 6
    %24220 = torch.prims.convert_element_type %24212, %int6_21879 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21880 = torch.constant.int 6
    %24221 = torch.prims.convert_element_type %24213, %int6_21880 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21881 = torch.constant.int 6
    %24222 = torch.prims.convert_element_type %24214, %int6_21881 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_21882 = torch.constant.int 6
    %24223 = torch.prims.convert_element_type %24215, %int6_21882 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21883 = torch.constant.int 2
    %24224 = torch.aten.pow.Tensor_Scalar %24216, %int2_21883 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21884 = torch.constant.int 2
    %24225 = torch.aten.pow.Tensor_Scalar %24217, %int2_21884 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21885 = torch.constant.int 2
    %24226 = torch.aten.pow.Tensor_Scalar %24218, %int2_21885 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21886 = torch.constant.int 2
    %24227 = torch.aten.pow.Tensor_Scalar %24219, %int2_21886 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21887 = torch.constant.int 2
    %24228 = torch.aten.pow.Tensor_Scalar %24220, %int2_21887 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21888 = torch.constant.int 2
    %24229 = torch.aten.pow.Tensor_Scalar %24221, %int2_21888 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21889 = torch.constant.int 2
    %24230 = torch.aten.pow.Tensor_Scalar %24222, %int2_21889 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_21890 = torch.constant.int 2
    %24231 = torch.aten.pow.Tensor_Scalar %24223, %int2_21890 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_21891 = torch.constant.int -1
    %24232 = torch.prim.ListConstruct %int-1_21891 : (!torch.int) -> !torch.list<int>
    %true_21892 = torch.constant.bool true
    %none_21893 = torch.constant.none
    %24233 = torch.aten.mean.dim %24224, %24232, %true_21892, %none_21893 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21894 = torch.constant.int -1
    %24234 = torch.prim.ListConstruct %int-1_21894 : (!torch.int) -> !torch.list<int>
    %true_21895 = torch.constant.bool true
    %none_21896 = torch.constant.none
    %24235 = torch.aten.mean.dim %24225, %24234, %true_21895, %none_21896 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21897 = torch.constant.int -1
    %24236 = torch.prim.ListConstruct %int-1_21897 : (!torch.int) -> !torch.list<int>
    %true_21898 = torch.constant.bool true
    %none_21899 = torch.constant.none
    %24237 = torch.aten.mean.dim %24226, %24236, %true_21898, %none_21899 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21900 = torch.constant.int -1
    %24238 = torch.prim.ListConstruct %int-1_21900 : (!torch.int) -> !torch.list<int>
    %true_21901 = torch.constant.bool true
    %none_21902 = torch.constant.none
    %24239 = torch.aten.mean.dim %24227, %24238, %true_21901, %none_21902 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21903 = torch.constant.int -1
    %24240 = torch.prim.ListConstruct %int-1_21903 : (!torch.int) -> !torch.list<int>
    %true_21904 = torch.constant.bool true
    %none_21905 = torch.constant.none
    %24241 = torch.aten.mean.dim %24228, %24240, %true_21904, %none_21905 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21906 = torch.constant.int -1
    %24242 = torch.prim.ListConstruct %int-1_21906 : (!torch.int) -> !torch.list<int>
    %true_21907 = torch.constant.bool true
    %none_21908 = torch.constant.none
    %24243 = torch.aten.mean.dim %24229, %24242, %true_21907, %none_21908 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21909 = torch.constant.int -1
    %24244 = torch.prim.ListConstruct %int-1_21909 : (!torch.int) -> !torch.list<int>
    %true_21910 = torch.constant.bool true
    %none_21911 = torch.constant.none
    %24245 = torch.aten.mean.dim %24230, %24244, %true_21910, %none_21911 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_21912 = torch.constant.int -1
    %24246 = torch.prim.ListConstruct %int-1_21912 : (!torch.int) -> !torch.list<int>
    %true_21913 = torch.constant.bool true
    %none_21914 = torch.constant.none
    %24247 = torch.aten.mean.dim %24231, %24246, %true_21913, %none_21914 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21915 = torch.constant.float 9.9999997473787516E-6
    %int1_21916 = torch.constant.int 1
    %24248 = torch.aten.add.Scalar %24233, %float9.999990e-06_21915, %int1_21916 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21917 = torch.constant.float 9.9999997473787516E-6
    %int1_21918 = torch.constant.int 1
    %24249 = torch.aten.add.Scalar %24235, %float9.999990e-06_21917, %int1_21918 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21919 = torch.constant.float 9.9999997473787516E-6
    %int1_21920 = torch.constant.int 1
    %24250 = torch.aten.add.Scalar %24237, %float9.999990e-06_21919, %int1_21920 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21921 = torch.constant.float 9.9999997473787516E-6
    %int1_21922 = torch.constant.int 1
    %24251 = torch.aten.add.Scalar %24239, %float9.999990e-06_21921, %int1_21922 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21923 = torch.constant.float 9.9999997473787516E-6
    %int1_21924 = torch.constant.int 1
    %24252 = torch.aten.add.Scalar %24241, %float9.999990e-06_21923, %int1_21924 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21925 = torch.constant.float 9.9999997473787516E-6
    %int1_21926 = torch.constant.int 1
    %24253 = torch.aten.add.Scalar %24243, %float9.999990e-06_21925, %int1_21926 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21927 = torch.constant.float 9.9999997473787516E-6
    %int1_21928 = torch.constant.int 1
    %24254 = torch.aten.add.Scalar %24245, %float9.999990e-06_21927, %int1_21928 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_21929 = torch.constant.float 9.9999997473787516E-6
    %int1_21930 = torch.constant.int 1
    %24255 = torch.aten.add.Scalar %24247, %float9.999990e-06_21929, %int1_21930 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24256 = torch.aten.rsqrt %24248 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24257 = torch.aten.rsqrt %24249 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24258 = torch.aten.rsqrt %24250 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24259 = torch.aten.rsqrt %24251 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24260 = torch.aten.rsqrt %24252 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24261 = torch.aten.rsqrt %24253 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24262 = torch.aten.rsqrt %24254 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24263 = torch.aten.rsqrt %24255 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24264 = torch.aten.mul.Tensor %24216, %24256 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24265 = torch.aten.mul.Tensor %24217, %24257 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24266 = torch.aten.mul.Tensor %24218, %24258 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24267 = torch.aten.mul.Tensor %24219, %24259 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24268 = torch.aten.mul.Tensor %24220, %24260 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24269 = torch.aten.mul.Tensor %24221, %24261 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24270 = torch.aten.mul.Tensor %24222, %24262 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24271 = torch.aten.mul.Tensor %24223, %24263 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24272 = torch.aten.mul.Tensor %840, %24264 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24273 = torch.aten.mul.Tensor %841, %24265 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24274 = torch.aten.mul.Tensor %842, %24266 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24275 = torch.aten.mul.Tensor %843, %24267 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24276 = torch.aten.mul.Tensor %844, %24268 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24277 = torch.aten.mul.Tensor %845, %24269 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24278 = torch.aten.mul.Tensor %846, %24270 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24279 = torch.aten.mul.Tensor %847, %24271 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_21931 = torch.constant.int 5
    %24280 = torch.prims.convert_element_type %24272, %int5_21931 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21932 = torch.constant.int 5
    %24281 = torch.prims.convert_element_type %24273, %int5_21932 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21933 = torch.constant.int 5
    %24282 = torch.prims.convert_element_type %24274, %int5_21933 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21934 = torch.constant.int 5
    %24283 = torch.prims.convert_element_type %24275, %int5_21934 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21935 = torch.constant.int 5
    %24284 = torch.prims.convert_element_type %24276, %int5_21935 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21936 = torch.constant.int 5
    %24285 = torch.prims.convert_element_type %24277, %int5_21936 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21937 = torch.constant.int 5
    %24286 = torch.prims.convert_element_type %24278, %int5_21937 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_21938 = torch.constant.int 5
    %24287 = torch.prims.convert_element_type %24279, %int5_21938 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_21939 = torch.constant.int 1
    %int0_21940 = torch.constant.int 0
    %24288 = torch.prim.ListConstruct %int1_21939, %int0_21940 : (!torch.int, !torch.int) -> !torch.list<int>
    %24289 = torch.aten.permute %848, %24288 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21941 = torch.constant.int 1
    %int0_21942 = torch.constant.int 0
    %24290 = torch.prim.ListConstruct %int1_21941, %int0_21942 : (!torch.int, !torch.int) -> !torch.list<int>
    %24291 = torch.aten.permute %849, %24290 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21943 = torch.constant.int 1
    %int0_21944 = torch.constant.int 0
    %24292 = torch.prim.ListConstruct %int1_21943, %int0_21944 : (!torch.int, !torch.int) -> !torch.list<int>
    %24293 = torch.aten.permute %850, %24292 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21945 = torch.constant.int 1
    %int0_21946 = torch.constant.int 0
    %24294 = torch.prim.ListConstruct %int1_21945, %int0_21946 : (!torch.int, !torch.int) -> !torch.list<int>
    %24295 = torch.aten.permute %851, %24294 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21947 = torch.constant.int 1
    %int0_21948 = torch.constant.int 0
    %24296 = torch.prim.ListConstruct %int1_21947, %int0_21948 : (!torch.int, !torch.int) -> !torch.list<int>
    %24297 = torch.aten.permute %852, %24296 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21949 = torch.constant.int 1
    %int0_21950 = torch.constant.int 0
    %24298 = torch.prim.ListConstruct %int1_21949, %int0_21950 : (!torch.int, !torch.int) -> !torch.list<int>
    %24299 = torch.aten.permute %853, %24298 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21951 = torch.constant.int 1
    %int0_21952 = torch.constant.int 0
    %24300 = torch.prim.ListConstruct %int1_21951, %int0_21952 : (!torch.int, !torch.int) -> !torch.list<int>
    %24301 = torch.aten.permute %854, %24300 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21953 = torch.constant.int 1
    %int0_21954 = torch.constant.int 0
    %24302 = torch.prim.ListConstruct %int1_21953, %int0_21954 : (!torch.int, !torch.int) -> !torch.list<int>
    %24303 = torch.aten.permute %855, %24302 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_21955 = torch.constant.int 4
    %24304 = torch.aten.mul.int %int4_21955, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21956 = torch.constant.int 4096
    %24305 = torch.prim.ListConstruct %24304, %int4096_21956 : (!torch.int, !torch.int) -> !torch.list<int>
    %24306 = torch.aten.view %24280, %24305 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24306, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24307 = torch.aten.mm %24306, %24289 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24307, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21957 = torch.constant.int 4
    %int1792_21958 = torch.constant.int 1792
    %24308 = torch.prim.ListConstruct %int4_21957, %2482, %int1792_21958 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24309 = torch.aten.view %24307, %24308 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21959 = torch.constant.int 4
    %24310 = torch.aten.mul.int %int4_21959, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21960 = torch.constant.int 4096
    %24311 = torch.prim.ListConstruct %24310, %int4096_21960 : (!torch.int, !torch.int) -> !torch.list<int>
    %24312 = torch.aten.view %24281, %24311 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24312, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24313 = torch.aten.mm %24312, %24291 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24313, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21961 = torch.constant.int 4
    %int1792_21962 = torch.constant.int 1792
    %24314 = torch.prim.ListConstruct %int4_21961, %2482, %int1792_21962 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24315 = torch.aten.view %24313, %24314 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21963 = torch.constant.int 4
    %24316 = torch.aten.mul.int %int4_21963, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21964 = torch.constant.int 4096
    %24317 = torch.prim.ListConstruct %24316, %int4096_21964 : (!torch.int, !torch.int) -> !torch.list<int>
    %24318 = torch.aten.view %24282, %24317 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24318, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24319 = torch.aten.mm %24318, %24293 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24319, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21965 = torch.constant.int 4
    %int1792_21966 = torch.constant.int 1792
    %24320 = torch.prim.ListConstruct %int4_21965, %2482, %int1792_21966 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24321 = torch.aten.view %24319, %24320 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21967 = torch.constant.int 4
    %24322 = torch.aten.mul.int %int4_21967, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21968 = torch.constant.int 4096
    %24323 = torch.prim.ListConstruct %24322, %int4096_21968 : (!torch.int, !torch.int) -> !torch.list<int>
    %24324 = torch.aten.view %24283, %24323 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24324, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24325 = torch.aten.mm %24324, %24295 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24325, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21969 = torch.constant.int 4
    %int1792_21970 = torch.constant.int 1792
    %24326 = torch.prim.ListConstruct %int4_21969, %2482, %int1792_21970 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24327 = torch.aten.view %24325, %24326 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21971 = torch.constant.int 4
    %24328 = torch.aten.mul.int %int4_21971, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21972 = torch.constant.int 4096
    %24329 = torch.prim.ListConstruct %24328, %int4096_21972 : (!torch.int, !torch.int) -> !torch.list<int>
    %24330 = torch.aten.view %24284, %24329 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24330, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24331 = torch.aten.mm %24330, %24297 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24331, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21973 = torch.constant.int 4
    %int1792_21974 = torch.constant.int 1792
    %24332 = torch.prim.ListConstruct %int4_21973, %2482, %int1792_21974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24333 = torch.aten.view %24331, %24332 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21975 = torch.constant.int 4
    %24334 = torch.aten.mul.int %int4_21975, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21976 = torch.constant.int 4096
    %24335 = torch.prim.ListConstruct %24334, %int4096_21976 : (!torch.int, !torch.int) -> !torch.list<int>
    %24336 = torch.aten.view %24285, %24335 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24336, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24337 = torch.aten.mm %24336, %24299 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24337, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21977 = torch.constant.int 4
    %int1792_21978 = torch.constant.int 1792
    %24338 = torch.prim.ListConstruct %int4_21977, %2482, %int1792_21978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24339 = torch.aten.view %24337, %24338 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21979 = torch.constant.int 4
    %24340 = torch.aten.mul.int %int4_21979, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21980 = torch.constant.int 4096
    %24341 = torch.prim.ListConstruct %24340, %int4096_21980 : (!torch.int, !torch.int) -> !torch.list<int>
    %24342 = torch.aten.view %24286, %24341 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24342, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24343 = torch.aten.mm %24342, %24301 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24343, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21981 = torch.constant.int 4
    %int1792_21982 = torch.constant.int 1792
    %24344 = torch.prim.ListConstruct %int4_21981, %2482, %int1792_21982 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24345 = torch.aten.view %24343, %24344 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_21983 = torch.constant.int 4
    %24346 = torch.aten.mul.int %int4_21983, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_21984 = torch.constant.int 4096
    %24347 = torch.prim.ListConstruct %24346, %int4096_21984 : (!torch.int, !torch.int) -> !torch.list<int>
    %24348 = torch.aten.view %24287, %24347 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24348, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24349 = torch.aten.mm %24348, %24303 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24349, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_21985 = torch.constant.int 4
    %int1792_21986 = torch.constant.int 1792
    %24350 = torch.prim.ListConstruct %int4_21985, %2482, %int1792_21986 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24351 = torch.aten.view %24349, %24350 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24352 = torch.aten.silu %24309 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24353 = torch.aten.silu %24315 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24354 = torch.aten.silu %24321 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24355 = torch.aten.silu %24327 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24356 = torch.aten.silu %24333 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24357 = torch.aten.silu %24339 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24358 = torch.aten.silu %24345 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24359 = torch.aten.silu %24351 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_21987 = torch.constant.int 1
    %int0_21988 = torch.constant.int 0
    %24360 = torch.prim.ListConstruct %int1_21987, %int0_21988 : (!torch.int, !torch.int) -> !torch.list<int>
    %24361 = torch.aten.permute %856, %24360 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21989 = torch.constant.int 1
    %int0_21990 = torch.constant.int 0
    %24362 = torch.prim.ListConstruct %int1_21989, %int0_21990 : (!torch.int, !torch.int) -> !torch.list<int>
    %24363 = torch.aten.permute %857, %24362 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21991 = torch.constant.int 1
    %int0_21992 = torch.constant.int 0
    %24364 = torch.prim.ListConstruct %int1_21991, %int0_21992 : (!torch.int, !torch.int) -> !torch.list<int>
    %24365 = torch.aten.permute %858, %24364 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21993 = torch.constant.int 1
    %int0_21994 = torch.constant.int 0
    %24366 = torch.prim.ListConstruct %int1_21993, %int0_21994 : (!torch.int, !torch.int) -> !torch.list<int>
    %24367 = torch.aten.permute %859, %24366 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21995 = torch.constant.int 1
    %int0_21996 = torch.constant.int 0
    %24368 = torch.prim.ListConstruct %int1_21995, %int0_21996 : (!torch.int, !torch.int) -> !torch.list<int>
    %24369 = torch.aten.permute %860, %24368 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21997 = torch.constant.int 1
    %int0_21998 = torch.constant.int 0
    %24370 = torch.prim.ListConstruct %int1_21997, %int0_21998 : (!torch.int, !torch.int) -> !torch.list<int>
    %24371 = torch.aten.permute %861, %24370 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_21999 = torch.constant.int 1
    %int0_22000 = torch.constant.int 0
    %24372 = torch.prim.ListConstruct %int1_21999, %int0_22000 : (!torch.int, !torch.int) -> !torch.list<int>
    %24373 = torch.aten.permute %862, %24372 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_22001 = torch.constant.int 1
    %int0_22002 = torch.constant.int 0
    %24374 = torch.prim.ListConstruct %int1_22001, %int0_22002 : (!torch.int, !torch.int) -> !torch.list<int>
    %24375 = torch.aten.permute %863, %24374 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_22003 = torch.constant.int 4
    %24376 = torch.aten.mul.int %int4_22003, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22004 = torch.constant.int 4096
    %24377 = torch.prim.ListConstruct %24376, %int4096_22004 : (!torch.int, !torch.int) -> !torch.list<int>
    %24378 = torch.aten.view %24280, %24377 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24378, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24379 = torch.aten.mm %24378, %24361 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24379, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22005 = torch.constant.int 4
    %int1792_22006 = torch.constant.int 1792
    %24380 = torch.prim.ListConstruct %int4_22005, %2482, %int1792_22006 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24381 = torch.aten.view %24379, %24380 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22007 = torch.constant.int 4
    %24382 = torch.aten.mul.int %int4_22007, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22008 = torch.constant.int 4096
    %24383 = torch.prim.ListConstruct %24382, %int4096_22008 : (!torch.int, !torch.int) -> !torch.list<int>
    %24384 = torch.aten.view %24281, %24383 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24384, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24385 = torch.aten.mm %24384, %24363 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24385, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22009 = torch.constant.int 4
    %int1792_22010 = torch.constant.int 1792
    %24386 = torch.prim.ListConstruct %int4_22009, %2482, %int1792_22010 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24387 = torch.aten.view %24385, %24386 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22011 = torch.constant.int 4
    %24388 = torch.aten.mul.int %int4_22011, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22012 = torch.constant.int 4096
    %24389 = torch.prim.ListConstruct %24388, %int4096_22012 : (!torch.int, !torch.int) -> !torch.list<int>
    %24390 = torch.aten.view %24282, %24389 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24390, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24391 = torch.aten.mm %24390, %24365 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24391, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22013 = torch.constant.int 4
    %int1792_22014 = torch.constant.int 1792
    %24392 = torch.prim.ListConstruct %int4_22013, %2482, %int1792_22014 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24393 = torch.aten.view %24391, %24392 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22015 = torch.constant.int 4
    %24394 = torch.aten.mul.int %int4_22015, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22016 = torch.constant.int 4096
    %24395 = torch.prim.ListConstruct %24394, %int4096_22016 : (!torch.int, !torch.int) -> !torch.list<int>
    %24396 = torch.aten.view %24283, %24395 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24396, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24397 = torch.aten.mm %24396, %24367 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24397, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22017 = torch.constant.int 4
    %int1792_22018 = torch.constant.int 1792
    %24398 = torch.prim.ListConstruct %int4_22017, %2482, %int1792_22018 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24399 = torch.aten.view %24397, %24398 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22019 = torch.constant.int 4
    %24400 = torch.aten.mul.int %int4_22019, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22020 = torch.constant.int 4096
    %24401 = torch.prim.ListConstruct %24400, %int4096_22020 : (!torch.int, !torch.int) -> !torch.list<int>
    %24402 = torch.aten.view %24284, %24401 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24402, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24403 = torch.aten.mm %24402, %24369 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24403, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22021 = torch.constant.int 4
    %int1792_22022 = torch.constant.int 1792
    %24404 = torch.prim.ListConstruct %int4_22021, %2482, %int1792_22022 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24405 = torch.aten.view %24403, %24404 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22023 = torch.constant.int 4
    %24406 = torch.aten.mul.int %int4_22023, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22024 = torch.constant.int 4096
    %24407 = torch.prim.ListConstruct %24406, %int4096_22024 : (!torch.int, !torch.int) -> !torch.list<int>
    %24408 = torch.aten.view %24285, %24407 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24408, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24409 = torch.aten.mm %24408, %24371 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24409, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22025 = torch.constant.int 4
    %int1792_22026 = torch.constant.int 1792
    %24410 = torch.prim.ListConstruct %int4_22025, %2482, %int1792_22026 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24411 = torch.aten.view %24409, %24410 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22027 = torch.constant.int 4
    %24412 = torch.aten.mul.int %int4_22027, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22028 = torch.constant.int 4096
    %24413 = torch.prim.ListConstruct %24412, %int4096_22028 : (!torch.int, !torch.int) -> !torch.list<int>
    %24414 = torch.aten.view %24286, %24413 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24414, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24415 = torch.aten.mm %24414, %24373 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24415, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22029 = torch.constant.int 4
    %int1792_22030 = torch.constant.int 1792
    %24416 = torch.prim.ListConstruct %int4_22029, %2482, %int1792_22030 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24417 = torch.aten.view %24415, %24416 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_22031 = torch.constant.int 4
    %24418 = torch.aten.mul.int %int4_22031, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22032 = torch.constant.int 4096
    %24419 = torch.prim.ListConstruct %24418, %int4096_22032 : (!torch.int, !torch.int) -> !torch.list<int>
    %24420 = torch.aten.view %24287, %24419 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24420, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24421 = torch.aten.mm %24420, %24375 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24421, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_22033 = torch.constant.int 4
    %int1792_22034 = torch.constant.int 1792
    %24422 = torch.prim.ListConstruct %int4_22033, %2482, %int1792_22034 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24423 = torch.aten.view %24421, %24422 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24424 = torch.aten.mul.Tensor %24352, %24381 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24425 = torch.aten.mul.Tensor %24353, %24387 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24426 = torch.aten.mul.Tensor %24354, %24393 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24427 = torch.aten.mul.Tensor %24355, %24399 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24428 = torch.aten.mul.Tensor %24356, %24405 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24429 = torch.aten.mul.Tensor %24357, %24411 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24430 = torch.aten.mul.Tensor %24358, %24417 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %24431 = torch.aten.mul.Tensor %24359, %24423 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %24431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_22035 = torch.constant.int 1
    %int0_22036 = torch.constant.int 0
    %24432 = torch.prim.ListConstruct %int1_22035, %int0_22036 : (!torch.int, !torch.int) -> !torch.list<int>
    %24433 = torch.aten.permute %864, %24432 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22037 = torch.constant.int 1
    %int0_22038 = torch.constant.int 0
    %24434 = torch.prim.ListConstruct %int1_22037, %int0_22038 : (!torch.int, !torch.int) -> !torch.list<int>
    %24435 = torch.aten.permute %865, %24434 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22039 = torch.constant.int 1
    %int0_22040 = torch.constant.int 0
    %24436 = torch.prim.ListConstruct %int1_22039, %int0_22040 : (!torch.int, !torch.int) -> !torch.list<int>
    %24437 = torch.aten.permute %866, %24436 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22041 = torch.constant.int 1
    %int0_22042 = torch.constant.int 0
    %24438 = torch.prim.ListConstruct %int1_22041, %int0_22042 : (!torch.int, !torch.int) -> !torch.list<int>
    %24439 = torch.aten.permute %867, %24438 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22043 = torch.constant.int 1
    %int0_22044 = torch.constant.int 0
    %24440 = torch.prim.ListConstruct %int1_22043, %int0_22044 : (!torch.int, !torch.int) -> !torch.list<int>
    %24441 = torch.aten.permute %868, %24440 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22045 = torch.constant.int 1
    %int0_22046 = torch.constant.int 0
    %24442 = torch.prim.ListConstruct %int1_22045, %int0_22046 : (!torch.int, !torch.int) -> !torch.list<int>
    %24443 = torch.aten.permute %869, %24442 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22047 = torch.constant.int 1
    %int0_22048 = torch.constant.int 0
    %24444 = torch.prim.ListConstruct %int1_22047, %int0_22048 : (!torch.int, !torch.int) -> !torch.list<int>
    %24445 = torch.aten.permute %870, %24444 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22049 = torch.constant.int 1
    %int0_22050 = torch.constant.int 0
    %24446 = torch.prim.ListConstruct %int1_22049, %int0_22050 : (!torch.int, !torch.int) -> !torch.list<int>
    %24447 = torch.aten.permute %871, %24446 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_22051 = torch.constant.int 1
    %24448 = torch.aten.size.int %24309, %int1_22051 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22052 = torch.constant.int 4
    %24449 = torch.aten.mul.int %int4_22052, %24448 : !torch.int, !torch.int -> !torch.int
    %int1792_22053 = torch.constant.int 1792
    %24450 = torch.prim.ListConstruct %24449, %int1792_22053 : (!torch.int, !torch.int) -> !torch.list<int>
    %24451 = torch.aten.view %24424, %24450 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24451, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24452 = torch.aten.mm %24451, %24433 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24452, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22054 = torch.constant.int 4
    %int4096_22055 = torch.constant.int 4096
    %24453 = torch.prim.ListConstruct %int4_22054, %24448, %int4096_22055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24454 = torch.aten.view %24452, %24453 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22056 = torch.constant.int 1
    %24455 = torch.aten.size.int %24315, %int1_22056 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22057 = torch.constant.int 4
    %24456 = torch.aten.mul.int %int4_22057, %24455 : !torch.int, !torch.int -> !torch.int
    %int1792_22058 = torch.constant.int 1792
    %24457 = torch.prim.ListConstruct %24456, %int1792_22058 : (!torch.int, !torch.int) -> !torch.list<int>
    %24458 = torch.aten.view %24425, %24457 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24458, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24459 = torch.aten.mm %24458, %24435 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24459, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22059 = torch.constant.int 4
    %int4096_22060 = torch.constant.int 4096
    %24460 = torch.prim.ListConstruct %int4_22059, %24455, %int4096_22060 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24461 = torch.aten.view %24459, %24460 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22061 = torch.constant.int 1
    %24462 = torch.aten.size.int %24321, %int1_22061 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22062 = torch.constant.int 4
    %24463 = torch.aten.mul.int %int4_22062, %24462 : !torch.int, !torch.int -> !torch.int
    %int1792_22063 = torch.constant.int 1792
    %24464 = torch.prim.ListConstruct %24463, %int1792_22063 : (!torch.int, !torch.int) -> !torch.list<int>
    %24465 = torch.aten.view %24426, %24464 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24465, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24466 = torch.aten.mm %24465, %24437 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24466, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22064 = torch.constant.int 4
    %int4096_22065 = torch.constant.int 4096
    %24467 = torch.prim.ListConstruct %int4_22064, %24462, %int4096_22065 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24468 = torch.aten.view %24466, %24467 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22066 = torch.constant.int 1
    %24469 = torch.aten.size.int %24327, %int1_22066 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22067 = torch.constant.int 4
    %24470 = torch.aten.mul.int %int4_22067, %24469 : !torch.int, !torch.int -> !torch.int
    %int1792_22068 = torch.constant.int 1792
    %24471 = torch.prim.ListConstruct %24470, %int1792_22068 : (!torch.int, !torch.int) -> !torch.list<int>
    %24472 = torch.aten.view %24427, %24471 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24472, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24473 = torch.aten.mm %24472, %24439 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24473, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22069 = torch.constant.int 4
    %int4096_22070 = torch.constant.int 4096
    %24474 = torch.prim.ListConstruct %int4_22069, %24469, %int4096_22070 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24475 = torch.aten.view %24473, %24474 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22071 = torch.constant.int 1
    %24476 = torch.aten.size.int %24333, %int1_22071 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22072 = torch.constant.int 4
    %24477 = torch.aten.mul.int %int4_22072, %24476 : !torch.int, !torch.int -> !torch.int
    %int1792_22073 = torch.constant.int 1792
    %24478 = torch.prim.ListConstruct %24477, %int1792_22073 : (!torch.int, !torch.int) -> !torch.list<int>
    %24479 = torch.aten.view %24428, %24478 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24479, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24480 = torch.aten.mm %24479, %24441 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24480, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22074 = torch.constant.int 4
    %int4096_22075 = torch.constant.int 4096
    %24481 = torch.prim.ListConstruct %int4_22074, %24476, %int4096_22075 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24482 = torch.aten.view %24480, %24481 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22076 = torch.constant.int 1
    %24483 = torch.aten.size.int %24339, %int1_22076 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22077 = torch.constant.int 4
    %24484 = torch.aten.mul.int %int4_22077, %24483 : !torch.int, !torch.int -> !torch.int
    %int1792_22078 = torch.constant.int 1792
    %24485 = torch.prim.ListConstruct %24484, %int1792_22078 : (!torch.int, !torch.int) -> !torch.list<int>
    %24486 = torch.aten.view %24429, %24485 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24486, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24487 = torch.aten.mm %24486, %24443 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24487, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22079 = torch.constant.int 4
    %int4096_22080 = torch.constant.int 4096
    %24488 = torch.prim.ListConstruct %int4_22079, %24483, %int4096_22080 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24489 = torch.aten.view %24487, %24488 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22081 = torch.constant.int 1
    %24490 = torch.aten.size.int %24345, %int1_22081 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22082 = torch.constant.int 4
    %24491 = torch.aten.mul.int %int4_22082, %24490 : !torch.int, !torch.int -> !torch.int
    %int1792_22083 = torch.constant.int 1792
    %24492 = torch.prim.ListConstruct %24491, %int1792_22083 : (!torch.int, !torch.int) -> !torch.list<int>
    %24493 = torch.aten.view %24430, %24492 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24493, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24494 = torch.aten.mm %24493, %24445 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24494, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22084 = torch.constant.int 4
    %int4096_22085 = torch.constant.int 4096
    %24495 = torch.prim.ListConstruct %int4_22084, %24490, %int4096_22085 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24496 = torch.aten.view %24494, %24495 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22086 = torch.constant.int 1
    %24497 = torch.aten.size.int %24351, %int1_22086 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_22087 = torch.constant.int 4
    %24498 = torch.aten.mul.int %int4_22087, %24497 : !torch.int, !torch.int -> !torch.int
    %int1792_22088 = torch.constant.int 1792
    %24499 = torch.prim.ListConstruct %24498, %int1792_22088 : (!torch.int, !torch.int) -> !torch.list<int>
    %24500 = torch.aten.view %24431, %24499 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %24500, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %24501 = torch.aten.mm %24500, %24447 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24501, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_22089 = torch.constant.int 4
    %int4096_22090 = torch.constant.int 4096
    %24502 = torch.prim.ListConstruct %int4_22089, %24497, %int4096_22090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24503 = torch.aten.view %24501, %24502 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24504 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22091 = arith.constant 1 : index
    %dim_22092 = tensor.dim %24504, %c1_22091 : tensor<4x?x4096xf16>
    %24505 = flow.tensor.transfer %24504 : tensor<4x?x4096xf16>{%dim_22092} to #hal.device.promise<@__device_0>
    %24506 = torch_c.from_builtin_tensor %24505 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24507 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22093 = arith.constant 1 : index
    %dim_22094 = tensor.dim %24507, %c1_22093 : tensor<4x?x4096xf16>
    %24508 = flow.tensor.transfer %24507 : tensor<4x?x4096xf16>{%dim_22094} to #hal.device.promise<@__device_0>
    %24509 = torch_c.from_builtin_tensor %24508 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24510 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22095 = arith.constant 1 : index
    %dim_22096 = tensor.dim %24510, %c1_22095 : tensor<4x?x4096xf16>
    %24511 = flow.tensor.transfer %24510 : tensor<4x?x4096xf16>{%dim_22096} to #hal.device.promise<@__device_0>
    %24512 = torch_c.from_builtin_tensor %24511 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24513 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22097 = arith.constant 1 : index
    %dim_22098 = tensor.dim %24513, %c1_22097 : tensor<4x?x4096xf16>
    %24514 = flow.tensor.transfer %24513 : tensor<4x?x4096xf16>{%dim_22098} to #hal.device.promise<@__device_0>
    %24515 = torch_c.from_builtin_tensor %24514 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24516 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22099 = arith.constant 1 : index
    %dim_22100 = tensor.dim %24516, %c1_22099 : tensor<4x?x4096xf16>
    %24517 = flow.tensor.transfer %24516 : tensor<4x?x4096xf16>{%dim_22100} to #hal.device.promise<@__device_0>
    %24518 = torch_c.from_builtin_tensor %24517 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24519 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22101 = arith.constant 1 : index
    %dim_22102 = tensor.dim %24519, %c1_22101 : tensor<4x?x4096xf16>
    %24520 = flow.tensor.transfer %24519 : tensor<4x?x4096xf16>{%dim_22102} to #hal.device.promise<@__device_0>
    %24521 = torch_c.from_builtin_tensor %24520 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24522 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22103 = arith.constant 1 : index
    %dim_22104 = tensor.dim %24522, %c1_22103 : tensor<4x?x4096xf16>
    %24523 = flow.tensor.transfer %24522 : tensor<4x?x4096xf16>{%dim_22104} to #hal.device.promise<@__device_0>
    %24524 = torch_c.from_builtin_tensor %24523 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22105 = torch.constant.int 1
    %24525 = torch.aten.add.Tensor %24454, %24506, %int1_22105 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22106 = torch.constant.int 1
    %24526 = torch.aten.add.Tensor %24525, %24509, %int1_22106 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22107 = torch.constant.int 1
    %24527 = torch.aten.add.Tensor %24526, %24512, %int1_22107 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22108 = torch.constant.int 1
    %24528 = torch.aten.add.Tensor %24527, %24515, %int1_22108 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22109 = torch.constant.int 1
    %24529 = torch.aten.add.Tensor %24528, %24518, %int1_22109 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22110 = torch.constant.int 1
    %24530 = torch.aten.add.Tensor %24529, %24521, %int1_22110 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22111 = torch.constant.int 1
    %24531 = torch.aten.add.Tensor %24530, %24524, %int1_22111 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24532 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22112 = arith.constant 1 : index
    %dim_22113 = tensor.dim %24532, %c1_22112 : tensor<4x?x4096xf16>
    %24533 = flow.tensor.transfer %24532 : tensor<4x?x4096xf16>{%dim_22113} to #hal.device.promise<@__device_1>
    %24534 = torch_c.from_builtin_tensor %24533 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24535 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22114 = arith.constant 1 : index
    %dim_22115 = tensor.dim %24535, %c1_22114 : tensor<4x?x4096xf16>
    %24536 = flow.tensor.transfer %24535 : tensor<4x?x4096xf16>{%dim_22115} to #hal.device.promise<@__device_1>
    %24537 = torch_c.from_builtin_tensor %24536 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24538 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22116 = arith.constant 1 : index
    %dim_22117 = tensor.dim %24538, %c1_22116 : tensor<4x?x4096xf16>
    %24539 = flow.tensor.transfer %24538 : tensor<4x?x4096xf16>{%dim_22117} to #hal.device.promise<@__device_1>
    %24540 = torch_c.from_builtin_tensor %24539 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24541 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22118 = arith.constant 1 : index
    %dim_22119 = tensor.dim %24541, %c1_22118 : tensor<4x?x4096xf16>
    %24542 = flow.tensor.transfer %24541 : tensor<4x?x4096xf16>{%dim_22119} to #hal.device.promise<@__device_1>
    %24543 = torch_c.from_builtin_tensor %24542 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24544 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22120 = arith.constant 1 : index
    %dim_22121 = tensor.dim %24544, %c1_22120 : tensor<4x?x4096xf16>
    %24545 = flow.tensor.transfer %24544 : tensor<4x?x4096xf16>{%dim_22121} to #hal.device.promise<@__device_1>
    %24546 = torch_c.from_builtin_tensor %24545 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24547 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22122 = arith.constant 1 : index
    %dim_22123 = tensor.dim %24547, %c1_22122 : tensor<4x?x4096xf16>
    %24548 = flow.tensor.transfer %24547 : tensor<4x?x4096xf16>{%dim_22123} to #hal.device.promise<@__device_1>
    %24549 = torch_c.from_builtin_tensor %24548 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24550 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22124 = arith.constant 1 : index
    %dim_22125 = tensor.dim %24550, %c1_22124 : tensor<4x?x4096xf16>
    %24551 = flow.tensor.transfer %24550 : tensor<4x?x4096xf16>{%dim_22125} to #hal.device.promise<@__device_1>
    %24552 = torch_c.from_builtin_tensor %24551 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22126 = torch.constant.int 1
    %24553 = torch.aten.add.Tensor %24534, %24461, %int1_22126 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22127 = torch.constant.int 1
    %24554 = torch.aten.add.Tensor %24553, %24537, %int1_22127 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22128 = torch.constant.int 1
    %24555 = torch.aten.add.Tensor %24554, %24540, %int1_22128 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22129 = torch.constant.int 1
    %24556 = torch.aten.add.Tensor %24555, %24543, %int1_22129 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22130 = torch.constant.int 1
    %24557 = torch.aten.add.Tensor %24556, %24546, %int1_22130 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22131 = torch.constant.int 1
    %24558 = torch.aten.add.Tensor %24557, %24549, %int1_22131 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22132 = torch.constant.int 1
    %24559 = torch.aten.add.Tensor %24558, %24552, %int1_22132 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24560 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22133 = arith.constant 1 : index
    %dim_22134 = tensor.dim %24560, %c1_22133 : tensor<4x?x4096xf16>
    %24561 = flow.tensor.transfer %24560 : tensor<4x?x4096xf16>{%dim_22134} to #hal.device.promise<@__device_2>
    %24562 = torch_c.from_builtin_tensor %24561 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24563 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22135 = arith.constant 1 : index
    %dim_22136 = tensor.dim %24563, %c1_22135 : tensor<4x?x4096xf16>
    %24564 = flow.tensor.transfer %24563 : tensor<4x?x4096xf16>{%dim_22136} to #hal.device.promise<@__device_2>
    %24565 = torch_c.from_builtin_tensor %24564 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24566 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22137 = arith.constant 1 : index
    %dim_22138 = tensor.dim %24566, %c1_22137 : tensor<4x?x4096xf16>
    %24567 = flow.tensor.transfer %24566 : tensor<4x?x4096xf16>{%dim_22138} to #hal.device.promise<@__device_2>
    %24568 = torch_c.from_builtin_tensor %24567 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24569 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22139 = arith.constant 1 : index
    %dim_22140 = tensor.dim %24569, %c1_22139 : tensor<4x?x4096xf16>
    %24570 = flow.tensor.transfer %24569 : tensor<4x?x4096xf16>{%dim_22140} to #hal.device.promise<@__device_2>
    %24571 = torch_c.from_builtin_tensor %24570 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24572 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22141 = arith.constant 1 : index
    %dim_22142 = tensor.dim %24572, %c1_22141 : tensor<4x?x4096xf16>
    %24573 = flow.tensor.transfer %24572 : tensor<4x?x4096xf16>{%dim_22142} to #hal.device.promise<@__device_2>
    %24574 = torch_c.from_builtin_tensor %24573 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24575 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22143 = arith.constant 1 : index
    %dim_22144 = tensor.dim %24575, %c1_22143 : tensor<4x?x4096xf16>
    %24576 = flow.tensor.transfer %24575 : tensor<4x?x4096xf16>{%dim_22144} to #hal.device.promise<@__device_2>
    %24577 = torch_c.from_builtin_tensor %24576 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24578 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22145 = arith.constant 1 : index
    %dim_22146 = tensor.dim %24578, %c1_22145 : tensor<4x?x4096xf16>
    %24579 = flow.tensor.transfer %24578 : tensor<4x?x4096xf16>{%dim_22146} to #hal.device.promise<@__device_2>
    %24580 = torch_c.from_builtin_tensor %24579 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22147 = torch.constant.int 1
    %24581 = torch.aten.add.Tensor %24562, %24565, %int1_22147 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22148 = torch.constant.int 1
    %24582 = torch.aten.add.Tensor %24581, %24468, %int1_22148 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22149 = torch.constant.int 1
    %24583 = torch.aten.add.Tensor %24582, %24568, %int1_22149 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22150 = torch.constant.int 1
    %24584 = torch.aten.add.Tensor %24583, %24571, %int1_22150 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22151 = torch.constant.int 1
    %24585 = torch.aten.add.Tensor %24584, %24574, %int1_22151 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22152 = torch.constant.int 1
    %24586 = torch.aten.add.Tensor %24585, %24577, %int1_22152 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22153 = torch.constant.int 1
    %24587 = torch.aten.add.Tensor %24586, %24580, %int1_22153 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24588 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22154 = arith.constant 1 : index
    %dim_22155 = tensor.dim %24588, %c1_22154 : tensor<4x?x4096xf16>
    %24589 = flow.tensor.transfer %24588 : tensor<4x?x4096xf16>{%dim_22155} to #hal.device.promise<@__device_3>
    %24590 = torch_c.from_builtin_tensor %24589 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24591 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22156 = arith.constant 1 : index
    %dim_22157 = tensor.dim %24591, %c1_22156 : tensor<4x?x4096xf16>
    %24592 = flow.tensor.transfer %24591 : tensor<4x?x4096xf16>{%dim_22157} to #hal.device.promise<@__device_3>
    %24593 = torch_c.from_builtin_tensor %24592 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24594 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22158 = arith.constant 1 : index
    %dim_22159 = tensor.dim %24594, %c1_22158 : tensor<4x?x4096xf16>
    %24595 = flow.tensor.transfer %24594 : tensor<4x?x4096xf16>{%dim_22159} to #hal.device.promise<@__device_3>
    %24596 = torch_c.from_builtin_tensor %24595 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24597 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22160 = arith.constant 1 : index
    %dim_22161 = tensor.dim %24597, %c1_22160 : tensor<4x?x4096xf16>
    %24598 = flow.tensor.transfer %24597 : tensor<4x?x4096xf16>{%dim_22161} to #hal.device.promise<@__device_3>
    %24599 = torch_c.from_builtin_tensor %24598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24600 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22162 = arith.constant 1 : index
    %dim_22163 = tensor.dim %24600, %c1_22162 : tensor<4x?x4096xf16>
    %24601 = flow.tensor.transfer %24600 : tensor<4x?x4096xf16>{%dim_22163} to #hal.device.promise<@__device_3>
    %24602 = torch_c.from_builtin_tensor %24601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24603 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22164 = arith.constant 1 : index
    %dim_22165 = tensor.dim %24603, %c1_22164 : tensor<4x?x4096xf16>
    %24604 = flow.tensor.transfer %24603 : tensor<4x?x4096xf16>{%dim_22165} to #hal.device.promise<@__device_3>
    %24605 = torch_c.from_builtin_tensor %24604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24606 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22166 = arith.constant 1 : index
    %dim_22167 = tensor.dim %24606, %c1_22166 : tensor<4x?x4096xf16>
    %24607 = flow.tensor.transfer %24606 : tensor<4x?x4096xf16>{%dim_22167} to #hal.device.promise<@__device_3>
    %24608 = torch_c.from_builtin_tensor %24607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22168 = torch.constant.int 1
    %24609 = torch.aten.add.Tensor %24590, %24593, %int1_22168 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22169 = torch.constant.int 1
    %24610 = torch.aten.add.Tensor %24609, %24596, %int1_22169 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22170 = torch.constant.int 1
    %24611 = torch.aten.add.Tensor %24610, %24475, %int1_22170 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22171 = torch.constant.int 1
    %24612 = torch.aten.add.Tensor %24611, %24599, %int1_22171 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22172 = torch.constant.int 1
    %24613 = torch.aten.add.Tensor %24612, %24602, %int1_22172 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22173 = torch.constant.int 1
    %24614 = torch.aten.add.Tensor %24613, %24605, %int1_22173 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22174 = torch.constant.int 1
    %24615 = torch.aten.add.Tensor %24614, %24608, %int1_22174 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24616 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22175 = arith.constant 1 : index
    %dim_22176 = tensor.dim %24616, %c1_22175 : tensor<4x?x4096xf16>
    %24617 = flow.tensor.transfer %24616 : tensor<4x?x4096xf16>{%dim_22176} to #hal.device.promise<@__device_4>
    %24618 = torch_c.from_builtin_tensor %24617 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24619 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22177 = arith.constant 1 : index
    %dim_22178 = tensor.dim %24619, %c1_22177 : tensor<4x?x4096xf16>
    %24620 = flow.tensor.transfer %24619 : tensor<4x?x4096xf16>{%dim_22178} to #hal.device.promise<@__device_4>
    %24621 = torch_c.from_builtin_tensor %24620 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24622 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22179 = arith.constant 1 : index
    %dim_22180 = tensor.dim %24622, %c1_22179 : tensor<4x?x4096xf16>
    %24623 = flow.tensor.transfer %24622 : tensor<4x?x4096xf16>{%dim_22180} to #hal.device.promise<@__device_4>
    %24624 = torch_c.from_builtin_tensor %24623 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24625 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22181 = arith.constant 1 : index
    %dim_22182 = tensor.dim %24625, %c1_22181 : tensor<4x?x4096xf16>
    %24626 = flow.tensor.transfer %24625 : tensor<4x?x4096xf16>{%dim_22182} to #hal.device.promise<@__device_4>
    %24627 = torch_c.from_builtin_tensor %24626 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24628 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22183 = arith.constant 1 : index
    %dim_22184 = tensor.dim %24628, %c1_22183 : tensor<4x?x4096xf16>
    %24629 = flow.tensor.transfer %24628 : tensor<4x?x4096xf16>{%dim_22184} to #hal.device.promise<@__device_4>
    %24630 = torch_c.from_builtin_tensor %24629 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24631 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22185 = arith.constant 1 : index
    %dim_22186 = tensor.dim %24631, %c1_22185 : tensor<4x?x4096xf16>
    %24632 = flow.tensor.transfer %24631 : tensor<4x?x4096xf16>{%dim_22186} to #hal.device.promise<@__device_4>
    %24633 = torch_c.from_builtin_tensor %24632 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24634 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22187 = arith.constant 1 : index
    %dim_22188 = tensor.dim %24634, %c1_22187 : tensor<4x?x4096xf16>
    %24635 = flow.tensor.transfer %24634 : tensor<4x?x4096xf16>{%dim_22188} to #hal.device.promise<@__device_4>
    %24636 = torch_c.from_builtin_tensor %24635 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22189 = torch.constant.int 1
    %24637 = torch.aten.add.Tensor %24618, %24621, %int1_22189 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22190 = torch.constant.int 1
    %24638 = torch.aten.add.Tensor %24637, %24624, %int1_22190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22191 = torch.constant.int 1
    %24639 = torch.aten.add.Tensor %24638, %24627, %int1_22191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22192 = torch.constant.int 1
    %24640 = torch.aten.add.Tensor %24639, %24482, %int1_22192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22193 = torch.constant.int 1
    %24641 = torch.aten.add.Tensor %24640, %24630, %int1_22193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22194 = torch.constant.int 1
    %24642 = torch.aten.add.Tensor %24641, %24633, %int1_22194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22195 = torch.constant.int 1
    %24643 = torch.aten.add.Tensor %24642, %24636, %int1_22195 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24644 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22196 = arith.constant 1 : index
    %dim_22197 = tensor.dim %24644, %c1_22196 : tensor<4x?x4096xf16>
    %24645 = flow.tensor.transfer %24644 : tensor<4x?x4096xf16>{%dim_22197} to #hal.device.promise<@__device_5>
    %24646 = torch_c.from_builtin_tensor %24645 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24647 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22198 = arith.constant 1 : index
    %dim_22199 = tensor.dim %24647, %c1_22198 : tensor<4x?x4096xf16>
    %24648 = flow.tensor.transfer %24647 : tensor<4x?x4096xf16>{%dim_22199} to #hal.device.promise<@__device_5>
    %24649 = torch_c.from_builtin_tensor %24648 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24650 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22200 = arith.constant 1 : index
    %dim_22201 = tensor.dim %24650, %c1_22200 : tensor<4x?x4096xf16>
    %24651 = flow.tensor.transfer %24650 : tensor<4x?x4096xf16>{%dim_22201} to #hal.device.promise<@__device_5>
    %24652 = torch_c.from_builtin_tensor %24651 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24653 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22202 = arith.constant 1 : index
    %dim_22203 = tensor.dim %24653, %c1_22202 : tensor<4x?x4096xf16>
    %24654 = flow.tensor.transfer %24653 : tensor<4x?x4096xf16>{%dim_22203} to #hal.device.promise<@__device_5>
    %24655 = torch_c.from_builtin_tensor %24654 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24656 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22204 = arith.constant 1 : index
    %dim_22205 = tensor.dim %24656, %c1_22204 : tensor<4x?x4096xf16>
    %24657 = flow.tensor.transfer %24656 : tensor<4x?x4096xf16>{%dim_22205} to #hal.device.promise<@__device_5>
    %24658 = torch_c.from_builtin_tensor %24657 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24659 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22206 = arith.constant 1 : index
    %dim_22207 = tensor.dim %24659, %c1_22206 : tensor<4x?x4096xf16>
    %24660 = flow.tensor.transfer %24659 : tensor<4x?x4096xf16>{%dim_22207} to #hal.device.promise<@__device_5>
    %24661 = torch_c.from_builtin_tensor %24660 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24662 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22208 = arith.constant 1 : index
    %dim_22209 = tensor.dim %24662, %c1_22208 : tensor<4x?x4096xf16>
    %24663 = flow.tensor.transfer %24662 : tensor<4x?x4096xf16>{%dim_22209} to #hal.device.promise<@__device_5>
    %24664 = torch_c.from_builtin_tensor %24663 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22210 = torch.constant.int 1
    %24665 = torch.aten.add.Tensor %24646, %24649, %int1_22210 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22211 = torch.constant.int 1
    %24666 = torch.aten.add.Tensor %24665, %24652, %int1_22211 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22212 = torch.constant.int 1
    %24667 = torch.aten.add.Tensor %24666, %24655, %int1_22212 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22213 = torch.constant.int 1
    %24668 = torch.aten.add.Tensor %24667, %24658, %int1_22213 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22214 = torch.constant.int 1
    %24669 = torch.aten.add.Tensor %24668, %24489, %int1_22214 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22215 = torch.constant.int 1
    %24670 = torch.aten.add.Tensor %24669, %24661, %int1_22215 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22216 = torch.constant.int 1
    %24671 = torch.aten.add.Tensor %24670, %24664, %int1_22216 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24672 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22217 = arith.constant 1 : index
    %dim_22218 = tensor.dim %24672, %c1_22217 : tensor<4x?x4096xf16>
    %24673 = flow.tensor.transfer %24672 : tensor<4x?x4096xf16>{%dim_22218} to #hal.device.promise<@__device_6>
    %24674 = torch_c.from_builtin_tensor %24673 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24675 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22219 = arith.constant 1 : index
    %dim_22220 = tensor.dim %24675, %c1_22219 : tensor<4x?x4096xf16>
    %24676 = flow.tensor.transfer %24675 : tensor<4x?x4096xf16>{%dim_22220} to #hal.device.promise<@__device_6>
    %24677 = torch_c.from_builtin_tensor %24676 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24678 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22221 = arith.constant 1 : index
    %dim_22222 = tensor.dim %24678, %c1_22221 : tensor<4x?x4096xf16>
    %24679 = flow.tensor.transfer %24678 : tensor<4x?x4096xf16>{%dim_22222} to #hal.device.promise<@__device_6>
    %24680 = torch_c.from_builtin_tensor %24679 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24681 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22223 = arith.constant 1 : index
    %dim_22224 = tensor.dim %24681, %c1_22223 : tensor<4x?x4096xf16>
    %24682 = flow.tensor.transfer %24681 : tensor<4x?x4096xf16>{%dim_22224} to #hal.device.promise<@__device_6>
    %24683 = torch_c.from_builtin_tensor %24682 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24684 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22225 = arith.constant 1 : index
    %dim_22226 = tensor.dim %24684, %c1_22225 : tensor<4x?x4096xf16>
    %24685 = flow.tensor.transfer %24684 : tensor<4x?x4096xf16>{%dim_22226} to #hal.device.promise<@__device_6>
    %24686 = torch_c.from_builtin_tensor %24685 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24687 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22227 = arith.constant 1 : index
    %dim_22228 = tensor.dim %24687, %c1_22227 : tensor<4x?x4096xf16>
    %24688 = flow.tensor.transfer %24687 : tensor<4x?x4096xf16>{%dim_22228} to #hal.device.promise<@__device_6>
    %24689 = torch_c.from_builtin_tensor %24688 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24690 = torch_c.to_builtin_tensor %24503 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22229 = arith.constant 1 : index
    %dim_22230 = tensor.dim %24690, %c1_22229 : tensor<4x?x4096xf16>
    %24691 = flow.tensor.transfer %24690 : tensor<4x?x4096xf16>{%dim_22230} to #hal.device.promise<@__device_6>
    %24692 = torch_c.from_builtin_tensor %24691 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22231 = torch.constant.int 1
    %24693 = torch.aten.add.Tensor %24674, %24677, %int1_22231 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22232 = torch.constant.int 1
    %24694 = torch.aten.add.Tensor %24693, %24680, %int1_22232 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22233 = torch.constant.int 1
    %24695 = torch.aten.add.Tensor %24694, %24683, %int1_22233 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22234 = torch.constant.int 1
    %24696 = torch.aten.add.Tensor %24695, %24686, %int1_22234 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22235 = torch.constant.int 1
    %24697 = torch.aten.add.Tensor %24696, %24689, %int1_22235 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22236 = torch.constant.int 1
    %24698 = torch.aten.add.Tensor %24697, %24496, %int1_22236 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22237 = torch.constant.int 1
    %24699 = torch.aten.add.Tensor %24698, %24692, %int1_22237 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24700 = torch_c.to_builtin_tensor %24454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22238 = arith.constant 1 : index
    %dim_22239 = tensor.dim %24700, %c1_22238 : tensor<4x?x4096xf16>
    %24701 = flow.tensor.transfer %24700 : tensor<4x?x4096xf16>{%dim_22239} to #hal.device.promise<@__device_7>
    %24702 = torch_c.from_builtin_tensor %24701 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24703 = torch_c.to_builtin_tensor %24461 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22240 = arith.constant 1 : index
    %dim_22241 = tensor.dim %24703, %c1_22240 : tensor<4x?x4096xf16>
    %24704 = flow.tensor.transfer %24703 : tensor<4x?x4096xf16>{%dim_22241} to #hal.device.promise<@__device_7>
    %24705 = torch_c.from_builtin_tensor %24704 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24706 = torch_c.to_builtin_tensor %24468 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22242 = arith.constant 1 : index
    %dim_22243 = tensor.dim %24706, %c1_22242 : tensor<4x?x4096xf16>
    %24707 = flow.tensor.transfer %24706 : tensor<4x?x4096xf16>{%dim_22243} to #hal.device.promise<@__device_7>
    %24708 = torch_c.from_builtin_tensor %24707 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24709 = torch_c.to_builtin_tensor %24475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22244 = arith.constant 1 : index
    %dim_22245 = tensor.dim %24709, %c1_22244 : tensor<4x?x4096xf16>
    %24710 = flow.tensor.transfer %24709 : tensor<4x?x4096xf16>{%dim_22245} to #hal.device.promise<@__device_7>
    %24711 = torch_c.from_builtin_tensor %24710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24712 = torch_c.to_builtin_tensor %24482 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22246 = arith.constant 1 : index
    %dim_22247 = tensor.dim %24712, %c1_22246 : tensor<4x?x4096xf16>
    %24713 = flow.tensor.transfer %24712 : tensor<4x?x4096xf16>{%dim_22247} to #hal.device.promise<@__device_7>
    %24714 = torch_c.from_builtin_tensor %24713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24715 = torch_c.to_builtin_tensor %24489 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22248 = arith.constant 1 : index
    %dim_22249 = tensor.dim %24715, %c1_22248 : tensor<4x?x4096xf16>
    %24716 = flow.tensor.transfer %24715 : tensor<4x?x4096xf16>{%dim_22249} to #hal.device.promise<@__device_7>
    %24717 = torch_c.from_builtin_tensor %24716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %24718 = torch_c.to_builtin_tensor %24496 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_22250 = arith.constant 1 : index
    %dim_22251 = tensor.dim %24718, %c1_22250 : tensor<4x?x4096xf16>
    %24719 = flow.tensor.transfer %24718 : tensor<4x?x4096xf16>{%dim_22251} to #hal.device.promise<@__device_7>
    %24720 = torch_c.from_builtin_tensor %24719 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22252 = torch.constant.int 1
    %24721 = torch.aten.add.Tensor %24702, %24705, %int1_22252 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22253 = torch.constant.int 1
    %24722 = torch.aten.add.Tensor %24721, %24708, %int1_22253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22254 = torch.constant.int 1
    %24723 = torch.aten.add.Tensor %24722, %24711, %int1_22254 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22255 = torch.constant.int 1
    %24724 = torch.aten.add.Tensor %24723, %24714, %int1_22255 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22256 = torch.constant.int 1
    %24725 = torch.aten.add.Tensor %24724, %24717, %int1_22256 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22257 = torch.constant.int 1
    %24726 = torch.aten.add.Tensor %24725, %24720, %int1_22257 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22258 = torch.constant.int 1
    %24727 = torch.aten.add.Tensor %24726, %24503, %int1_22258 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22259 = torch.constant.int 1
    %24728 = torch.aten.add.Tensor %24208, %24531, %int1_22259 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22260 = torch.constant.int 1
    %24729 = torch.aten.add.Tensor %24209, %24559, %int1_22260 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22261 = torch.constant.int 1
    %24730 = torch.aten.add.Tensor %24210, %24587, %int1_22261 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22262 = torch.constant.int 1
    %24731 = torch.aten.add.Tensor %24211, %24615, %int1_22262 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22263 = torch.constant.int 1
    %24732 = torch.aten.add.Tensor %24212, %24643, %int1_22263 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22264 = torch.constant.int 1
    %24733 = torch.aten.add.Tensor %24213, %24671, %int1_22264 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22265 = torch.constant.int 1
    %24734 = torch.aten.add.Tensor %24214, %24699, %int1_22265 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22266 = torch.constant.int 1
    %24735 = torch.aten.add.Tensor %24215, %24727, %int1_22266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_22267 = torch.constant.int 6
    %24736 = torch.prims.convert_element_type %24728, %int6_22267 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22268 = torch.constant.int 6
    %24737 = torch.prims.convert_element_type %24729, %int6_22268 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22269 = torch.constant.int 6
    %24738 = torch.prims.convert_element_type %24730, %int6_22269 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22270 = torch.constant.int 6
    %24739 = torch.prims.convert_element_type %24731, %int6_22270 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22271 = torch.constant.int 6
    %24740 = torch.prims.convert_element_type %24732, %int6_22271 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22272 = torch.constant.int 6
    %24741 = torch.prims.convert_element_type %24733, %int6_22272 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22273 = torch.constant.int 6
    %24742 = torch.prims.convert_element_type %24734, %int6_22273 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_22274 = torch.constant.int 6
    %24743 = torch.prims.convert_element_type %24735, %int6_22274 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22275 = torch.constant.int 2
    %24744 = torch.aten.pow.Tensor_Scalar %24736, %int2_22275 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22276 = torch.constant.int 2
    %24745 = torch.aten.pow.Tensor_Scalar %24737, %int2_22276 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22277 = torch.constant.int 2
    %24746 = torch.aten.pow.Tensor_Scalar %24738, %int2_22277 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22278 = torch.constant.int 2
    %24747 = torch.aten.pow.Tensor_Scalar %24739, %int2_22278 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22279 = torch.constant.int 2
    %24748 = torch.aten.pow.Tensor_Scalar %24740, %int2_22279 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22280 = torch.constant.int 2
    %24749 = torch.aten.pow.Tensor_Scalar %24741, %int2_22280 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22281 = torch.constant.int 2
    %24750 = torch.aten.pow.Tensor_Scalar %24742, %int2_22281 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_22282 = torch.constant.int 2
    %24751 = torch.aten.pow.Tensor_Scalar %24743, %int2_22282 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_22283 = torch.constant.int -1
    %24752 = torch.prim.ListConstruct %int-1_22283 : (!torch.int) -> !torch.list<int>
    %true_22284 = torch.constant.bool true
    %none_22285 = torch.constant.none
    %24753 = torch.aten.mean.dim %24744, %24752, %true_22284, %none_22285 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22286 = torch.constant.int -1
    %24754 = torch.prim.ListConstruct %int-1_22286 : (!torch.int) -> !torch.list<int>
    %true_22287 = torch.constant.bool true
    %none_22288 = torch.constant.none
    %24755 = torch.aten.mean.dim %24745, %24754, %true_22287, %none_22288 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22289 = torch.constant.int -1
    %24756 = torch.prim.ListConstruct %int-1_22289 : (!torch.int) -> !torch.list<int>
    %true_22290 = torch.constant.bool true
    %none_22291 = torch.constant.none
    %24757 = torch.aten.mean.dim %24746, %24756, %true_22290, %none_22291 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22292 = torch.constant.int -1
    %24758 = torch.prim.ListConstruct %int-1_22292 : (!torch.int) -> !torch.list<int>
    %true_22293 = torch.constant.bool true
    %none_22294 = torch.constant.none
    %24759 = torch.aten.mean.dim %24747, %24758, %true_22293, %none_22294 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22295 = torch.constant.int -1
    %24760 = torch.prim.ListConstruct %int-1_22295 : (!torch.int) -> !torch.list<int>
    %true_22296 = torch.constant.bool true
    %none_22297 = torch.constant.none
    %24761 = torch.aten.mean.dim %24748, %24760, %true_22296, %none_22297 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22298 = torch.constant.int -1
    %24762 = torch.prim.ListConstruct %int-1_22298 : (!torch.int) -> !torch.list<int>
    %true_22299 = torch.constant.bool true
    %none_22300 = torch.constant.none
    %24763 = torch.aten.mean.dim %24749, %24762, %true_22299, %none_22300 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22301 = torch.constant.int -1
    %24764 = torch.prim.ListConstruct %int-1_22301 : (!torch.int) -> !torch.list<int>
    %true_22302 = torch.constant.bool true
    %none_22303 = torch.constant.none
    %24765 = torch.aten.mean.dim %24750, %24764, %true_22302, %none_22303 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_22304 = torch.constant.int -1
    %24766 = torch.prim.ListConstruct %int-1_22304 : (!torch.int) -> !torch.list<int>
    %true_22305 = torch.constant.bool true
    %none_22306 = torch.constant.none
    %24767 = torch.aten.mean.dim %24751, %24766, %true_22305, %none_22306 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22307 = torch.constant.float 9.9999997473787516E-6
    %int1_22308 = torch.constant.int 1
    %24768 = torch.aten.add.Scalar %24753, %float9.999990e-06_22307, %int1_22308 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22309 = torch.constant.float 9.9999997473787516E-6
    %int1_22310 = torch.constant.int 1
    %24769 = torch.aten.add.Scalar %24755, %float9.999990e-06_22309, %int1_22310 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22311 = torch.constant.float 9.9999997473787516E-6
    %int1_22312 = torch.constant.int 1
    %24770 = torch.aten.add.Scalar %24757, %float9.999990e-06_22311, %int1_22312 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22313 = torch.constant.float 9.9999997473787516E-6
    %int1_22314 = torch.constant.int 1
    %24771 = torch.aten.add.Scalar %24759, %float9.999990e-06_22313, %int1_22314 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22315 = torch.constant.float 9.9999997473787516E-6
    %int1_22316 = torch.constant.int 1
    %24772 = torch.aten.add.Scalar %24761, %float9.999990e-06_22315, %int1_22316 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22317 = torch.constant.float 9.9999997473787516E-6
    %int1_22318 = torch.constant.int 1
    %24773 = torch.aten.add.Scalar %24763, %float9.999990e-06_22317, %int1_22318 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22319 = torch.constant.float 9.9999997473787516E-6
    %int1_22320 = torch.constant.int 1
    %24774 = torch.aten.add.Scalar %24765, %float9.999990e-06_22319, %int1_22320 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_22321 = torch.constant.float 9.9999997473787516E-6
    %int1_22322 = torch.constant.int 1
    %24775 = torch.aten.add.Scalar %24767, %float9.999990e-06_22321, %int1_22322 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24776 = torch.aten.rsqrt %24768 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24777 = torch.aten.rsqrt %24769 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24778 = torch.aten.rsqrt %24770 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24779 = torch.aten.rsqrt %24771 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24780 = torch.aten.rsqrt %24772 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24781 = torch.aten.rsqrt %24773 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24782 = torch.aten.rsqrt %24774 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24783 = torch.aten.rsqrt %24775 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %24783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %24784 = torch.aten.mul.Tensor %24736, %24776 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24785 = torch.aten.mul.Tensor %24737, %24777 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24786 = torch.aten.mul.Tensor %24738, %24778 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24787 = torch.aten.mul.Tensor %24739, %24779 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24788 = torch.aten.mul.Tensor %24740, %24780 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24789 = torch.aten.mul.Tensor %24741, %24781 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24790 = torch.aten.mul.Tensor %24742, %24782 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24791 = torch.aten.mul.Tensor %24743, %24783 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24792 = torch.aten.mul.Tensor %872, %24784 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24793 = torch.aten.mul.Tensor %873, %24785 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24794 = torch.aten.mul.Tensor %874, %24786 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24795 = torch.aten.mul.Tensor %875, %24787 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24796 = torch.aten.mul.Tensor %876, %24788 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24797 = torch.aten.mul.Tensor %877, %24789 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24798 = torch.aten.mul.Tensor %878, %24790 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %24799 = torch.aten.mul.Tensor %879, %24791 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %24799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_22323 = torch.constant.int 5
    %24800 = torch.prims.convert_element_type %24792, %int5_22323 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22324 = torch.constant.int 5
    %24801 = torch.prims.convert_element_type %24793, %int5_22324 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22325 = torch.constant.int 5
    %24802 = torch.prims.convert_element_type %24794, %int5_22325 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22326 = torch.constant.int 5
    %24803 = torch.prims.convert_element_type %24795, %int5_22326 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22327 = torch.constant.int 5
    %24804 = torch.prims.convert_element_type %24796, %int5_22327 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22328 = torch.constant.int 5
    %24805 = torch.prims.convert_element_type %24797, %int5_22328 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22329 = torch.constant.int 5
    %24806 = torch.prims.convert_element_type %24798, %int5_22329 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_22330 = torch.constant.int 5
    %24807 = torch.prims.convert_element_type %24799, %int5_22330 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %24807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_22331 = torch.constant.int 1
    %int0_22332 = torch.constant.int 0
    %24808 = torch.prim.ListConstruct %int1_22331, %int0_22332 : (!torch.int, !torch.int) -> !torch.list<int>
    %24809 = torch.aten.permute %880, %24808 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22333 = torch.constant.int 1
    %int0_22334 = torch.constant.int 0
    %24810 = torch.prim.ListConstruct %int1_22333, %int0_22334 : (!torch.int, !torch.int) -> !torch.list<int>
    %24811 = torch.aten.permute %881, %24810 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22335 = torch.constant.int 1
    %int0_22336 = torch.constant.int 0
    %24812 = torch.prim.ListConstruct %int1_22335, %int0_22336 : (!torch.int, !torch.int) -> !torch.list<int>
    %24813 = torch.aten.permute %882, %24812 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22337 = torch.constant.int 1
    %int0_22338 = torch.constant.int 0
    %24814 = torch.prim.ListConstruct %int1_22337, %int0_22338 : (!torch.int, !torch.int) -> !torch.list<int>
    %24815 = torch.aten.permute %883, %24814 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22339 = torch.constant.int 1
    %int0_22340 = torch.constant.int 0
    %24816 = torch.prim.ListConstruct %int1_22339, %int0_22340 : (!torch.int, !torch.int) -> !torch.list<int>
    %24817 = torch.aten.permute %884, %24816 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22341 = torch.constant.int 1
    %int0_22342 = torch.constant.int 0
    %24818 = torch.prim.ListConstruct %int1_22341, %int0_22342 : (!torch.int, !torch.int) -> !torch.list<int>
    %24819 = torch.aten.permute %885, %24818 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22343 = torch.constant.int 1
    %int0_22344 = torch.constant.int 0
    %24820 = torch.prim.ListConstruct %int1_22343, %int0_22344 : (!torch.int, !torch.int) -> !torch.list<int>
    %24821 = torch.aten.permute %886, %24820 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_22345 = torch.constant.int 1
    %int0_22346 = torch.constant.int 0
    %24822 = torch.prim.ListConstruct %int1_22345, %int0_22346 : (!torch.int, !torch.int) -> !torch.list<int>
    %24823 = torch.aten.permute %887, %24822 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_22347 = torch.constant.int 4
    %24824 = torch.aten.mul.int %int4_22347, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22348 = torch.constant.int 4096
    %24825 = torch.prim.ListConstruct %24824, %int4096_22348 : (!torch.int, !torch.int) -> !torch.list<int>
    %24826 = torch.aten.view %24800, %24825 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24826, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24827 = torch.aten.mm %24826, %24809 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24827, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22349 = torch.constant.int 4
    %int512_22350 = torch.constant.int 512
    %24828 = torch.prim.ListConstruct %int4_22349, %2482, %int512_22350 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24829 = torch.aten.view %24827, %24828 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22351 = torch.constant.int 4
    %24830 = torch.aten.mul.int %int4_22351, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22352 = torch.constant.int 4096
    %24831 = torch.prim.ListConstruct %24830, %int4096_22352 : (!torch.int, !torch.int) -> !torch.list<int>
    %24832 = torch.aten.view %24801, %24831 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24832, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24833 = torch.aten.mm %24832, %24811 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24833, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22353 = torch.constant.int 4
    %int512_22354 = torch.constant.int 512
    %24834 = torch.prim.ListConstruct %int4_22353, %2482, %int512_22354 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24835 = torch.aten.view %24833, %24834 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22355 = torch.constant.int 4
    %24836 = torch.aten.mul.int %int4_22355, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22356 = torch.constant.int 4096
    %24837 = torch.prim.ListConstruct %24836, %int4096_22356 : (!torch.int, !torch.int) -> !torch.list<int>
    %24838 = torch.aten.view %24802, %24837 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24838, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24839 = torch.aten.mm %24838, %24813 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24839, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22357 = torch.constant.int 4
    %int512_22358 = torch.constant.int 512
    %24840 = torch.prim.ListConstruct %int4_22357, %2482, %int512_22358 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24841 = torch.aten.view %24839, %24840 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22359 = torch.constant.int 4
    %24842 = torch.aten.mul.int %int4_22359, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22360 = torch.constant.int 4096
    %24843 = torch.prim.ListConstruct %24842, %int4096_22360 : (!torch.int, !torch.int) -> !torch.list<int>
    %24844 = torch.aten.view %24803, %24843 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24844, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24845 = torch.aten.mm %24844, %24815 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24845, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22361 = torch.constant.int 4
    %int512_22362 = torch.constant.int 512
    %24846 = torch.prim.ListConstruct %int4_22361, %2482, %int512_22362 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24847 = torch.aten.view %24845, %24846 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22363 = torch.constant.int 4
    %24848 = torch.aten.mul.int %int4_22363, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22364 = torch.constant.int 4096
    %24849 = torch.prim.ListConstruct %24848, %int4096_22364 : (!torch.int, !torch.int) -> !torch.list<int>
    %24850 = torch.aten.view %24804, %24849 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24850, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24851 = torch.aten.mm %24850, %24817 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24851, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22365 = torch.constant.int 4
    %int512_22366 = torch.constant.int 512
    %24852 = torch.prim.ListConstruct %int4_22365, %2482, %int512_22366 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24853 = torch.aten.view %24851, %24852 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22367 = torch.constant.int 4
    %24854 = torch.aten.mul.int %int4_22367, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22368 = torch.constant.int 4096
    %24855 = torch.prim.ListConstruct %24854, %int4096_22368 : (!torch.int, !torch.int) -> !torch.list<int>
    %24856 = torch.aten.view %24805, %24855 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24856, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24857 = torch.aten.mm %24856, %24819 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24857, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22369 = torch.constant.int 4
    %int512_22370 = torch.constant.int 512
    %24858 = torch.prim.ListConstruct %int4_22369, %2482, %int512_22370 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24859 = torch.aten.view %24857, %24858 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22371 = torch.constant.int 4
    %24860 = torch.aten.mul.int %int4_22371, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22372 = torch.constant.int 4096
    %24861 = torch.prim.ListConstruct %24860, %int4096_22372 : (!torch.int, !torch.int) -> !torch.list<int>
    %24862 = torch.aten.view %24806, %24861 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24862, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24863 = torch.aten.mm %24862, %24821 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24863, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22373 = torch.constant.int 4
    %int512_22374 = torch.constant.int 512
    %24864 = torch.prim.ListConstruct %int4_22373, %2482, %int512_22374 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24865 = torch.aten.view %24863, %24864 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_22375 = torch.constant.int 4
    %24866 = torch.aten.mul.int %int4_22375, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22376 = torch.constant.int 4096
    %24867 = torch.prim.ListConstruct %24866, %int4096_22376 : (!torch.int, !torch.int) -> !torch.list<int>
    %24868 = torch.aten.view %24807, %24867 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24868, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24869 = torch.aten.mm %24868, %24823 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %24869, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_22377 = torch.constant.int 4
    %int512_22378 = torch.constant.int 512
    %24870 = torch.prim.ListConstruct %int4_22377, %2482, %int512_22378 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24871 = torch.aten.view %24869, %24870 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %24871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_22379 = torch.constant.int 1
    %int0_22380 = torch.constant.int 0
    %24872 = torch.prim.ListConstruct %int1_22379, %int0_22380 : (!torch.int, !torch.int) -> !torch.list<int>
    %24873 = torch.aten.permute %888, %24872 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22381 = torch.constant.int 1
    %int0_22382 = torch.constant.int 0
    %24874 = torch.prim.ListConstruct %int1_22381, %int0_22382 : (!torch.int, !torch.int) -> !torch.list<int>
    %24875 = torch.aten.permute %889, %24874 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22383 = torch.constant.int 1
    %int0_22384 = torch.constant.int 0
    %24876 = torch.prim.ListConstruct %int1_22383, %int0_22384 : (!torch.int, !torch.int) -> !torch.list<int>
    %24877 = torch.aten.permute %890, %24876 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22385 = torch.constant.int 1
    %int0_22386 = torch.constant.int 0
    %24878 = torch.prim.ListConstruct %int1_22385, %int0_22386 : (!torch.int, !torch.int) -> !torch.list<int>
    %24879 = torch.aten.permute %891, %24878 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22387 = torch.constant.int 1
    %int0_22388 = torch.constant.int 0
    %24880 = torch.prim.ListConstruct %int1_22387, %int0_22388 : (!torch.int, !torch.int) -> !torch.list<int>
    %24881 = torch.aten.permute %892, %24880 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22389 = torch.constant.int 1
    %int0_22390 = torch.constant.int 0
    %24882 = torch.prim.ListConstruct %int1_22389, %int0_22390 : (!torch.int, !torch.int) -> !torch.list<int>
    %24883 = torch.aten.permute %893, %24882 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22391 = torch.constant.int 1
    %int0_22392 = torch.constant.int 0
    %24884 = torch.prim.ListConstruct %int1_22391, %int0_22392 : (!torch.int, !torch.int) -> !torch.list<int>
    %24885 = torch.aten.permute %894, %24884 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22393 = torch.constant.int 1
    %int0_22394 = torch.constant.int 0
    %24886 = torch.prim.ListConstruct %int1_22393, %int0_22394 : (!torch.int, !torch.int) -> !torch.list<int>
    %24887 = torch.aten.permute %895, %24886 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_22395 = torch.constant.int 4
    %24888 = torch.aten.mul.int %int4_22395, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22396 = torch.constant.int 4096
    %24889 = torch.prim.ListConstruct %24888, %int4096_22396 : (!torch.int, !torch.int) -> !torch.list<int>
    %24890 = torch.aten.view %24800, %24889 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24890, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24891 = torch.aten.mm %24890, %24873 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24891, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22397 = torch.constant.int 4
    %int128_22398 = torch.constant.int 128
    %24892 = torch.prim.ListConstruct %int4_22397, %2482, %int128_22398 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24893 = torch.aten.view %24891, %24892 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22399 = torch.constant.int 4
    %24894 = torch.aten.mul.int %int4_22399, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22400 = torch.constant.int 4096
    %24895 = torch.prim.ListConstruct %24894, %int4096_22400 : (!torch.int, !torch.int) -> !torch.list<int>
    %24896 = torch.aten.view %24801, %24895 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24896, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24897 = torch.aten.mm %24896, %24875 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24897, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22401 = torch.constant.int 4
    %int128_22402 = torch.constant.int 128
    %24898 = torch.prim.ListConstruct %int4_22401, %2482, %int128_22402 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24899 = torch.aten.view %24897, %24898 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22403 = torch.constant.int 4
    %24900 = torch.aten.mul.int %int4_22403, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22404 = torch.constant.int 4096
    %24901 = torch.prim.ListConstruct %24900, %int4096_22404 : (!torch.int, !torch.int) -> !torch.list<int>
    %24902 = torch.aten.view %24802, %24901 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24902, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24903 = torch.aten.mm %24902, %24877 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24903, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22405 = torch.constant.int 4
    %int128_22406 = torch.constant.int 128
    %24904 = torch.prim.ListConstruct %int4_22405, %2482, %int128_22406 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24905 = torch.aten.view %24903, %24904 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22407 = torch.constant.int 4
    %24906 = torch.aten.mul.int %int4_22407, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22408 = torch.constant.int 4096
    %24907 = torch.prim.ListConstruct %24906, %int4096_22408 : (!torch.int, !torch.int) -> !torch.list<int>
    %24908 = torch.aten.view %24803, %24907 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24908, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24909 = torch.aten.mm %24908, %24879 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24909, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22409 = torch.constant.int 4
    %int128_22410 = torch.constant.int 128
    %24910 = torch.prim.ListConstruct %int4_22409, %2482, %int128_22410 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24911 = torch.aten.view %24909, %24910 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22411 = torch.constant.int 4
    %24912 = torch.aten.mul.int %int4_22411, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22412 = torch.constant.int 4096
    %24913 = torch.prim.ListConstruct %24912, %int4096_22412 : (!torch.int, !torch.int) -> !torch.list<int>
    %24914 = torch.aten.view %24804, %24913 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24914, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24915 = torch.aten.mm %24914, %24881 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24915, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22413 = torch.constant.int 4
    %int128_22414 = torch.constant.int 128
    %24916 = torch.prim.ListConstruct %int4_22413, %2482, %int128_22414 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24917 = torch.aten.view %24915, %24916 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22415 = torch.constant.int 4
    %24918 = torch.aten.mul.int %int4_22415, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22416 = torch.constant.int 4096
    %24919 = torch.prim.ListConstruct %24918, %int4096_22416 : (!torch.int, !torch.int) -> !torch.list<int>
    %24920 = torch.aten.view %24805, %24919 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24920, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24921 = torch.aten.mm %24920, %24883 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24921, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22417 = torch.constant.int 4
    %int128_22418 = torch.constant.int 128
    %24922 = torch.prim.ListConstruct %int4_22417, %2482, %int128_22418 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24923 = torch.aten.view %24921, %24922 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22419 = torch.constant.int 4
    %24924 = torch.aten.mul.int %int4_22419, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22420 = torch.constant.int 4096
    %24925 = torch.prim.ListConstruct %24924, %int4096_22420 : (!torch.int, !torch.int) -> !torch.list<int>
    %24926 = torch.aten.view %24806, %24925 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24926, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24927 = torch.aten.mm %24926, %24885 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24927, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22421 = torch.constant.int 4
    %int128_22422 = torch.constant.int 128
    %24928 = torch.prim.ListConstruct %int4_22421, %2482, %int128_22422 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24929 = torch.aten.view %24927, %24928 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22423 = torch.constant.int 4
    %24930 = torch.aten.mul.int %int4_22423, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22424 = torch.constant.int 4096
    %24931 = torch.prim.ListConstruct %24930, %int4096_22424 : (!torch.int, !torch.int) -> !torch.list<int>
    %24932 = torch.aten.view %24807, %24931 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24932, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24933 = torch.aten.mm %24932, %24887 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24933, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22425 = torch.constant.int 4
    %int128_22426 = torch.constant.int 128
    %24934 = torch.prim.ListConstruct %int4_22425, %2482, %int128_22426 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24935 = torch.aten.view %24933, %24934 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_22427 = torch.constant.int 1
    %int0_22428 = torch.constant.int 0
    %24936 = torch.prim.ListConstruct %int1_22427, %int0_22428 : (!torch.int, !torch.int) -> !torch.list<int>
    %24937 = torch.aten.permute %896, %24936 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22429 = torch.constant.int 1
    %int0_22430 = torch.constant.int 0
    %24938 = torch.prim.ListConstruct %int1_22429, %int0_22430 : (!torch.int, !torch.int) -> !torch.list<int>
    %24939 = torch.aten.permute %897, %24938 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22431 = torch.constant.int 1
    %int0_22432 = torch.constant.int 0
    %24940 = torch.prim.ListConstruct %int1_22431, %int0_22432 : (!torch.int, !torch.int) -> !torch.list<int>
    %24941 = torch.aten.permute %898, %24940 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22433 = torch.constant.int 1
    %int0_22434 = torch.constant.int 0
    %24942 = torch.prim.ListConstruct %int1_22433, %int0_22434 : (!torch.int, !torch.int) -> !torch.list<int>
    %24943 = torch.aten.permute %899, %24942 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22435 = torch.constant.int 1
    %int0_22436 = torch.constant.int 0
    %24944 = torch.prim.ListConstruct %int1_22435, %int0_22436 : (!torch.int, !torch.int) -> !torch.list<int>
    %24945 = torch.aten.permute %900, %24944 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22437 = torch.constant.int 1
    %int0_22438 = torch.constant.int 0
    %24946 = torch.prim.ListConstruct %int1_22437, %int0_22438 : (!torch.int, !torch.int) -> !torch.list<int>
    %24947 = torch.aten.permute %901, %24946 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22439 = torch.constant.int 1
    %int0_22440 = torch.constant.int 0
    %24948 = torch.prim.ListConstruct %int1_22439, %int0_22440 : (!torch.int, !torch.int) -> !torch.list<int>
    %24949 = torch.aten.permute %902, %24948 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_22441 = torch.constant.int 1
    %int0_22442 = torch.constant.int 0
    %24950 = torch.prim.ListConstruct %int1_22441, %int0_22442 : (!torch.int, !torch.int) -> !torch.list<int>
    %24951 = torch.aten.permute %903, %24950 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_22443 = torch.constant.int 4
    %24952 = torch.aten.mul.int %int4_22443, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22444 = torch.constant.int 4096
    %24953 = torch.prim.ListConstruct %24952, %int4096_22444 : (!torch.int, !torch.int) -> !torch.list<int>
    %24954 = torch.aten.view %24800, %24953 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24954, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24955 = torch.aten.mm %24954, %24937 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24955, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22445 = torch.constant.int 4
    %int128_22446 = torch.constant.int 128
    %24956 = torch.prim.ListConstruct %int4_22445, %2482, %int128_22446 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24957 = torch.aten.view %24955, %24956 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22447 = torch.constant.int 4
    %24958 = torch.aten.mul.int %int4_22447, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22448 = torch.constant.int 4096
    %24959 = torch.prim.ListConstruct %24958, %int4096_22448 : (!torch.int, !torch.int) -> !torch.list<int>
    %24960 = torch.aten.view %24801, %24959 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24960, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24961 = torch.aten.mm %24960, %24939 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24961, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22449 = torch.constant.int 4
    %int128_22450 = torch.constant.int 128
    %24962 = torch.prim.ListConstruct %int4_22449, %2482, %int128_22450 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24963 = torch.aten.view %24961, %24962 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22451 = torch.constant.int 4
    %24964 = torch.aten.mul.int %int4_22451, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22452 = torch.constant.int 4096
    %24965 = torch.prim.ListConstruct %24964, %int4096_22452 : (!torch.int, !torch.int) -> !torch.list<int>
    %24966 = torch.aten.view %24802, %24965 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24966, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24967 = torch.aten.mm %24966, %24941 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24967, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22453 = torch.constant.int 4
    %int128_22454 = torch.constant.int 128
    %24968 = torch.prim.ListConstruct %int4_22453, %2482, %int128_22454 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24969 = torch.aten.view %24967, %24968 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22455 = torch.constant.int 4
    %24970 = torch.aten.mul.int %int4_22455, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22456 = torch.constant.int 4096
    %24971 = torch.prim.ListConstruct %24970, %int4096_22456 : (!torch.int, !torch.int) -> !torch.list<int>
    %24972 = torch.aten.view %24803, %24971 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24972, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24973 = torch.aten.mm %24972, %24943 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24973, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22457 = torch.constant.int 4
    %int128_22458 = torch.constant.int 128
    %24974 = torch.prim.ListConstruct %int4_22457, %2482, %int128_22458 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24975 = torch.aten.view %24973, %24974 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22459 = torch.constant.int 4
    %24976 = torch.aten.mul.int %int4_22459, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22460 = torch.constant.int 4096
    %24977 = torch.prim.ListConstruct %24976, %int4096_22460 : (!torch.int, !torch.int) -> !torch.list<int>
    %24978 = torch.aten.view %24804, %24977 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24978, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24979 = torch.aten.mm %24978, %24945 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24979, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22461 = torch.constant.int 4
    %int128_22462 = torch.constant.int 128
    %24980 = torch.prim.ListConstruct %int4_22461, %2482, %int128_22462 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24981 = torch.aten.view %24979, %24980 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22463 = torch.constant.int 4
    %24982 = torch.aten.mul.int %int4_22463, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22464 = torch.constant.int 4096
    %24983 = torch.prim.ListConstruct %24982, %int4096_22464 : (!torch.int, !torch.int) -> !torch.list<int>
    %24984 = torch.aten.view %24805, %24983 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24984, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24985 = torch.aten.mm %24984, %24947 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24985, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22465 = torch.constant.int 4
    %int128_22466 = torch.constant.int 128
    %24986 = torch.prim.ListConstruct %int4_22465, %2482, %int128_22466 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24987 = torch.aten.view %24985, %24986 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22467 = torch.constant.int 4
    %24988 = torch.aten.mul.int %int4_22467, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22468 = torch.constant.int 4096
    %24989 = torch.prim.ListConstruct %24988, %int4096_22468 : (!torch.int, !torch.int) -> !torch.list<int>
    %24990 = torch.aten.view %24806, %24989 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24990, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24991 = torch.aten.mm %24990, %24949 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24991, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22469 = torch.constant.int 4
    %int128_22470 = torch.constant.int 128
    %24992 = torch.prim.ListConstruct %int4_22469, %2482, %int128_22470 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24993 = torch.aten.view %24991, %24992 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22471 = torch.constant.int 4
    %24994 = torch.aten.mul.int %int4_22471, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_22472 = torch.constant.int 4096
    %24995 = torch.prim.ListConstruct %24994, %int4096_22472 : (!torch.int, !torch.int) -> !torch.list<int>
    %24996 = torch.aten.view %24807, %24995 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %24996, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %24997 = torch.aten.mm %24996, %24951 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %24997, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_22473 = torch.constant.int 4
    %int128_22474 = torch.constant.int 128
    %24998 = torch.prim.ListConstruct %int4_22473, %2482, %int128_22474 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %24999 = torch.aten.view %24997, %24998 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %24999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_22475 = torch.constant.int 4
    %int4_22476 = torch.constant.int 4
    %int128_22477 = torch.constant.int 128
    %25000 = torch.prim.ListConstruct %int4_22475, %2482, %int4_22476, %int128_22477 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25001 = torch.aten.view %24829, %25000 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22478 = torch.constant.int 4
    %int4_22479 = torch.constant.int 4
    %int128_22480 = torch.constant.int 128
    %25002 = torch.prim.ListConstruct %int4_22478, %2482, %int4_22479, %int128_22480 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25003 = torch.aten.view %24835, %25002 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22481 = torch.constant.int 4
    %int4_22482 = torch.constant.int 4
    %int128_22483 = torch.constant.int 128
    %25004 = torch.prim.ListConstruct %int4_22481, %2482, %int4_22482, %int128_22483 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25005 = torch.aten.view %24841, %25004 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22484 = torch.constant.int 4
    %int4_22485 = torch.constant.int 4
    %int128_22486 = torch.constant.int 128
    %25006 = torch.prim.ListConstruct %int4_22484, %2482, %int4_22485, %int128_22486 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25007 = torch.aten.view %24847, %25006 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22487 = torch.constant.int 4
    %int4_22488 = torch.constant.int 4
    %int128_22489 = torch.constant.int 128
    %25008 = torch.prim.ListConstruct %int4_22487, %2482, %int4_22488, %int128_22489 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25009 = torch.aten.view %24853, %25008 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22490 = torch.constant.int 4
    %int4_22491 = torch.constant.int 4
    %int128_22492 = torch.constant.int 128
    %25010 = torch.prim.ListConstruct %int4_22490, %2482, %int4_22491, %int128_22492 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25011 = torch.aten.view %24859, %25010 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22493 = torch.constant.int 4
    %int4_22494 = torch.constant.int 4
    %int128_22495 = torch.constant.int 128
    %25012 = torch.prim.ListConstruct %int4_22493, %2482, %int4_22494, %int128_22495 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25013 = torch.aten.view %24865, %25012 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22496 = torch.constant.int 4
    %int4_22497 = torch.constant.int 4
    %int128_22498 = torch.constant.int 128
    %25014 = torch.prim.ListConstruct %int4_22496, %2482, %int4_22497, %int128_22498 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25015 = torch.aten.view %24871, %25014 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_22499 = torch.constant.int 4
    %int1_22500 = torch.constant.int 1
    %int128_22501 = torch.constant.int 128
    %25016 = torch.prim.ListConstruct %int4_22499, %2482, %int1_22500, %int128_22501 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25017 = torch.aten.view %24893, %25016 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22502 = torch.constant.int 4
    %int1_22503 = torch.constant.int 1
    %int128_22504 = torch.constant.int 128
    %25018 = torch.prim.ListConstruct %int4_22502, %2482, %int1_22503, %int128_22504 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25019 = torch.aten.view %24899, %25018 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22505 = torch.constant.int 4
    %int1_22506 = torch.constant.int 1
    %int128_22507 = torch.constant.int 128
    %25020 = torch.prim.ListConstruct %int4_22505, %2482, %int1_22506, %int128_22507 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25021 = torch.aten.view %24905, %25020 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22508 = torch.constant.int 4
    %int1_22509 = torch.constant.int 1
    %int128_22510 = torch.constant.int 128
    %25022 = torch.prim.ListConstruct %int4_22508, %2482, %int1_22509, %int128_22510 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25023 = torch.aten.view %24911, %25022 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22511 = torch.constant.int 4
    %int1_22512 = torch.constant.int 1
    %int128_22513 = torch.constant.int 128
    %25024 = torch.prim.ListConstruct %int4_22511, %2482, %int1_22512, %int128_22513 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25025 = torch.aten.view %24917, %25024 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22514 = torch.constant.int 4
    %int1_22515 = torch.constant.int 1
    %int128_22516 = torch.constant.int 128
    %25026 = torch.prim.ListConstruct %int4_22514, %2482, %int1_22515, %int128_22516 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25027 = torch.aten.view %24923, %25026 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22517 = torch.constant.int 4
    %int1_22518 = torch.constant.int 1
    %int128_22519 = torch.constant.int 128
    %25028 = torch.prim.ListConstruct %int4_22517, %2482, %int1_22518, %int128_22519 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25029 = torch.aten.view %24929, %25028 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22520 = torch.constant.int 4
    %int1_22521 = torch.constant.int 1
    %int128_22522 = torch.constant.int 128
    %25030 = torch.prim.ListConstruct %int4_22520, %2482, %int1_22521, %int128_22522 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25031 = torch.aten.view %24935, %25030 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22523 = torch.constant.int 4
    %int1_22524 = torch.constant.int 1
    %int128_22525 = torch.constant.int 128
    %25032 = torch.prim.ListConstruct %int4_22523, %2482, %int1_22524, %int128_22525 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25033 = torch.aten.view %24957, %25032 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22526 = torch.constant.int 4
    %int1_22527 = torch.constant.int 1
    %int128_22528 = torch.constant.int 128
    %25034 = torch.prim.ListConstruct %int4_22526, %2482, %int1_22527, %int128_22528 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25035 = torch.aten.view %24963, %25034 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22529 = torch.constant.int 4
    %int1_22530 = torch.constant.int 1
    %int128_22531 = torch.constant.int 128
    %25036 = torch.prim.ListConstruct %int4_22529, %2482, %int1_22530, %int128_22531 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25037 = torch.aten.view %24969, %25036 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22532 = torch.constant.int 4
    %int1_22533 = torch.constant.int 1
    %int128_22534 = torch.constant.int 128
    %25038 = torch.prim.ListConstruct %int4_22532, %2482, %int1_22533, %int128_22534 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25039 = torch.aten.view %24975, %25038 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22535 = torch.constant.int 4
    %int1_22536 = torch.constant.int 1
    %int128_22537 = torch.constant.int 128
    %25040 = torch.prim.ListConstruct %int4_22535, %2482, %int1_22536, %int128_22537 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25041 = torch.aten.view %24981, %25040 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22538 = torch.constant.int 4
    %int1_22539 = torch.constant.int 1
    %int128_22540 = torch.constant.int 128
    %25042 = torch.prim.ListConstruct %int4_22538, %2482, %int1_22539, %int128_22540 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25043 = torch.aten.view %24987, %25042 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22541 = torch.constant.int 4
    %int1_22542 = torch.constant.int 1
    %int128_22543 = torch.constant.int 128
    %25044 = torch.prim.ListConstruct %int4_22541, %2482, %int1_22542, %int128_22543 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25045 = torch.aten.view %24993, %25044 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_22544 = torch.constant.int 4
    %int1_22545 = torch.constant.int 1
    %int128_22546 = torch.constant.int 128
    %25046 = torch.prim.ListConstruct %int4_22544, %2482, %int1_22545, %int128_22546 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25047 = torch.aten.view %24999, %25046 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_22547 = torch.constant.int 131072
    %none_22548 = torch.constant.none
    %none_22549 = torch.constant.none
    %cpu_22550 = torch.constant.device "cpu"
    %false_22551 = torch.constant.bool false
    %25048 = torch.aten.arange %int131072_22547, %none_22548, %none_22549, %cpu_22550, %false_22551 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_22552 = torch.constant.int 0
    %int128_22553 = torch.constant.int 128
    %int2_22554 = torch.constant.int 2
    %none_22555 = torch.constant.none
    %none_22556 = torch.constant.none
    %cpu_22557 = torch.constant.device "cpu"
    %false_22558 = torch.constant.bool false
    %25049 = torch.aten.arange.start_step %int0_22552, %int128_22553, %int2_22554, %none_22555, %none_22556, %cpu_22557, %false_22558 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_22559 = torch.constant.int 0
    %int0_22560 = torch.constant.int 0
    %int64_22561 = torch.constant.int 64
    %int1_22562 = torch.constant.int 1
    %25050 = torch.aten.slice.Tensor %25049, %int0_22559, %int0_22560, %int64_22561, %int1_22562 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_22563 = torch.constant.int 6
    %25051 = torch.prims.convert_element_type %25050, %int6_22563 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_22564 = torch.constant.int 128
    %25052 = torch.aten.div.Scalar %25051, %int128_22564 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_22565 = torch.constant.float 5.000000e+05
    %25053 = torch.aten.pow.Scalar %float5.000000e05_22565, %25052 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %25054 = torch.aten.reciprocal %25053 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_22566 = torch.constant.float 1.000000e+00
    %25055 = torch.aten.mul.Scalar %25054, %float1.000000e00_22566 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_22567 = torch.constant.int 131072
    %int1_22568 = torch.constant.int 1
    %25056 = torch.prim.ListConstruct %int131072_22567, %int1_22568 : (!torch.int, !torch.int) -> !torch.list<int>
    %25057 = torch.aten.view %25048, %25056 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %25058 = torch.aten.mul.Tensor %25057, %25055 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %25059 = torch.aten.cos %25058 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %25060 = torch.aten.sin %25058 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %25061 = torch.aten.complex %25059, %25060 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %25062 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25063 = flow.tensor.transfer %25062 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %25064 = torch_c.from_builtin_tensor %25063 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25065 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25066 = flow.tensor.transfer %25065 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %25067 = torch_c.from_builtin_tensor %25066 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25068 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25069 = flow.tensor.transfer %25068 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %25070 = torch_c.from_builtin_tensor %25069 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25071 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25072 = flow.tensor.transfer %25071 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %25073 = torch_c.from_builtin_tensor %25072 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25074 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25075 = flow.tensor.transfer %25074 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %25076 = torch_c.from_builtin_tensor %25075 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25077 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25078 = flow.tensor.transfer %25077 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %25079 = torch_c.from_builtin_tensor %25078 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25080 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25081 = flow.tensor.transfer %25080 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %25082 = torch_c.from_builtin_tensor %25081 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25083 = torch_c.to_builtin_tensor %25061 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25084 = flow.tensor.transfer %25083 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %25085 = torch_c.from_builtin_tensor %25084 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_22569 = torch.constant.int 1
    %25086 = torch.aten.size.int %24829, %int1_22569 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22570 = torch.constant.int 0
    %25087 = torch.aten.add.int %int0_22570, %25086 : !torch.int, !torch.int -> !torch.int
    %int0_22571 = torch.constant.int 0
    %int0_22572 = torch.constant.int 0
    %int1_22573 = torch.constant.int 1
    %25088 = torch.aten.slice.Tensor %25064, %int0_22571, %int0_22572, %25087, %int1_22573 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25088, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22574 = torch.constant.int 1
    %int0_22575 = torch.constant.int 0
    %int9223372036854775807_22576 = torch.constant.int 9223372036854775807
    %int1_22577 = torch.constant.int 1
    %25089 = torch.aten.slice.Tensor %25088, %int1_22574, %int0_22575, %int9223372036854775807_22576, %int1_22577 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25089, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22578 = torch.constant.int 0
    %25090 = torch.aten.unsqueeze %25089, %int0_22578 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25090, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22579 = torch.constant.int 2
    %25091 = torch.aten.unsqueeze %25090, %int2_22579 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25091, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22580 = torch.constant.int 3
    %int0_22581 = torch.constant.int 0
    %int9223372036854775807_22582 = torch.constant.int 9223372036854775807
    %int1_22583 = torch.constant.int 1
    %25092 = torch.aten.slice.Tensor %25091, %int3_22580, %int0_22581, %int9223372036854775807_22582, %int1_22583 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25092, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25093 = torch_c.to_builtin_tensor %25001 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22584 = arith.constant 1 : index
    %dim_22585 = tensor.dim %25093, %c1_22584 : tensor<4x?x4x128xf16>
    %25094 = flow.tensor.bitcast %25093 : tensor<4x?x4x128xf16>{%dim_22585} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22585}
    %25095 = torch_c.from_builtin_tensor %25094 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25096 = torch.aten.mul.Tensor %25095, %25092 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25097 = torch_c.to_builtin_tensor %25096 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22586 = arith.constant 1 : index
    %dim_22587 = tensor.dim %25097, %c1_22586 : tensor<4x?x4x64xcomplex<f32>>
    %25098 = flow.tensor.bitcast %25097 : tensor<4x?x4x64xcomplex<f32>>{%dim_22587} -> tensor<4x?x4x128xf32>{%dim_22587}
    %25099 = torch_c.from_builtin_tensor %25098 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22588 = torch.constant.int 5
    %25100 = torch.prims.convert_element_type %25099, %int5_22588 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22589 = torch.constant.int 1
    %25101 = torch.aten.size.int %24835, %int1_22589 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22590 = torch.constant.int 0
    %25102 = torch.aten.add.int %int0_22590, %25101 : !torch.int, !torch.int -> !torch.int
    %int0_22591 = torch.constant.int 0
    %int0_22592 = torch.constant.int 0
    %int1_22593 = torch.constant.int 1
    %25103 = torch.aten.slice.Tensor %25067, %int0_22591, %int0_22592, %25102, %int1_22593 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25103, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22594 = torch.constant.int 1
    %int0_22595 = torch.constant.int 0
    %int9223372036854775807_22596 = torch.constant.int 9223372036854775807
    %int1_22597 = torch.constant.int 1
    %25104 = torch.aten.slice.Tensor %25103, %int1_22594, %int0_22595, %int9223372036854775807_22596, %int1_22597 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25104, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22598 = torch.constant.int 0
    %25105 = torch.aten.unsqueeze %25104, %int0_22598 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25105, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22599 = torch.constant.int 2
    %25106 = torch.aten.unsqueeze %25105, %int2_22599 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25106, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22600 = torch.constant.int 3
    %int0_22601 = torch.constant.int 0
    %int9223372036854775807_22602 = torch.constant.int 9223372036854775807
    %int1_22603 = torch.constant.int 1
    %25107 = torch.aten.slice.Tensor %25106, %int3_22600, %int0_22601, %int9223372036854775807_22602, %int1_22603 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25107, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25108 = torch_c.to_builtin_tensor %25003 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22604 = arith.constant 1 : index
    %dim_22605 = tensor.dim %25108, %c1_22604 : tensor<4x?x4x128xf16>
    %25109 = flow.tensor.bitcast %25108 : tensor<4x?x4x128xf16>{%dim_22605} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22605}
    %25110 = torch_c.from_builtin_tensor %25109 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25111 = torch.aten.mul.Tensor %25110, %25107 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25112 = torch_c.to_builtin_tensor %25111 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22606 = arith.constant 1 : index
    %dim_22607 = tensor.dim %25112, %c1_22606 : tensor<4x?x4x64xcomplex<f32>>
    %25113 = flow.tensor.bitcast %25112 : tensor<4x?x4x64xcomplex<f32>>{%dim_22607} -> tensor<4x?x4x128xf32>{%dim_22607}
    %25114 = torch_c.from_builtin_tensor %25113 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22608 = torch.constant.int 5
    %25115 = torch.prims.convert_element_type %25114, %int5_22608 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22609 = torch.constant.int 1
    %25116 = torch.aten.size.int %24841, %int1_22609 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22610 = torch.constant.int 0
    %25117 = torch.aten.add.int %int0_22610, %25116 : !torch.int, !torch.int -> !torch.int
    %int0_22611 = torch.constant.int 0
    %int0_22612 = torch.constant.int 0
    %int1_22613 = torch.constant.int 1
    %25118 = torch.aten.slice.Tensor %25070, %int0_22611, %int0_22612, %25117, %int1_22613 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25118, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22614 = torch.constant.int 1
    %int0_22615 = torch.constant.int 0
    %int9223372036854775807_22616 = torch.constant.int 9223372036854775807
    %int1_22617 = torch.constant.int 1
    %25119 = torch.aten.slice.Tensor %25118, %int1_22614, %int0_22615, %int9223372036854775807_22616, %int1_22617 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25119, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22618 = torch.constant.int 0
    %25120 = torch.aten.unsqueeze %25119, %int0_22618 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25120, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22619 = torch.constant.int 2
    %25121 = torch.aten.unsqueeze %25120, %int2_22619 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25121, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22620 = torch.constant.int 3
    %int0_22621 = torch.constant.int 0
    %int9223372036854775807_22622 = torch.constant.int 9223372036854775807
    %int1_22623 = torch.constant.int 1
    %25122 = torch.aten.slice.Tensor %25121, %int3_22620, %int0_22621, %int9223372036854775807_22622, %int1_22623 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25122, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25123 = torch_c.to_builtin_tensor %25005 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22624 = arith.constant 1 : index
    %dim_22625 = tensor.dim %25123, %c1_22624 : tensor<4x?x4x128xf16>
    %25124 = flow.tensor.bitcast %25123 : tensor<4x?x4x128xf16>{%dim_22625} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22625}
    %25125 = torch_c.from_builtin_tensor %25124 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25126 = torch.aten.mul.Tensor %25125, %25122 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25127 = torch_c.to_builtin_tensor %25126 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22626 = arith.constant 1 : index
    %dim_22627 = tensor.dim %25127, %c1_22626 : tensor<4x?x4x64xcomplex<f32>>
    %25128 = flow.tensor.bitcast %25127 : tensor<4x?x4x64xcomplex<f32>>{%dim_22627} -> tensor<4x?x4x128xf32>{%dim_22627}
    %25129 = torch_c.from_builtin_tensor %25128 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22628 = torch.constant.int 5
    %25130 = torch.prims.convert_element_type %25129, %int5_22628 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22629 = torch.constant.int 1
    %25131 = torch.aten.size.int %24847, %int1_22629 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22630 = torch.constant.int 0
    %25132 = torch.aten.add.int %int0_22630, %25131 : !torch.int, !torch.int -> !torch.int
    %int0_22631 = torch.constant.int 0
    %int0_22632 = torch.constant.int 0
    %int1_22633 = torch.constant.int 1
    %25133 = torch.aten.slice.Tensor %25073, %int0_22631, %int0_22632, %25132, %int1_22633 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25133, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22634 = torch.constant.int 1
    %int0_22635 = torch.constant.int 0
    %int9223372036854775807_22636 = torch.constant.int 9223372036854775807
    %int1_22637 = torch.constant.int 1
    %25134 = torch.aten.slice.Tensor %25133, %int1_22634, %int0_22635, %int9223372036854775807_22636, %int1_22637 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25134, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22638 = torch.constant.int 0
    %25135 = torch.aten.unsqueeze %25134, %int0_22638 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25135, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22639 = torch.constant.int 2
    %25136 = torch.aten.unsqueeze %25135, %int2_22639 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25136, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22640 = torch.constant.int 3
    %int0_22641 = torch.constant.int 0
    %int9223372036854775807_22642 = torch.constant.int 9223372036854775807
    %int1_22643 = torch.constant.int 1
    %25137 = torch.aten.slice.Tensor %25136, %int3_22640, %int0_22641, %int9223372036854775807_22642, %int1_22643 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25137, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25138 = torch_c.to_builtin_tensor %25007 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22644 = arith.constant 1 : index
    %dim_22645 = tensor.dim %25138, %c1_22644 : tensor<4x?x4x128xf16>
    %25139 = flow.tensor.bitcast %25138 : tensor<4x?x4x128xf16>{%dim_22645} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22645}
    %25140 = torch_c.from_builtin_tensor %25139 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25141 = torch.aten.mul.Tensor %25140, %25137 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25142 = torch_c.to_builtin_tensor %25141 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22646 = arith.constant 1 : index
    %dim_22647 = tensor.dim %25142, %c1_22646 : tensor<4x?x4x64xcomplex<f32>>
    %25143 = flow.tensor.bitcast %25142 : tensor<4x?x4x64xcomplex<f32>>{%dim_22647} -> tensor<4x?x4x128xf32>{%dim_22647}
    %25144 = torch_c.from_builtin_tensor %25143 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22648 = torch.constant.int 5
    %25145 = torch.prims.convert_element_type %25144, %int5_22648 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22649 = torch.constant.int 1
    %25146 = torch.aten.size.int %24853, %int1_22649 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22650 = torch.constant.int 0
    %25147 = torch.aten.add.int %int0_22650, %25146 : !torch.int, !torch.int -> !torch.int
    %int0_22651 = torch.constant.int 0
    %int0_22652 = torch.constant.int 0
    %int1_22653 = torch.constant.int 1
    %25148 = torch.aten.slice.Tensor %25076, %int0_22651, %int0_22652, %25147, %int1_22653 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25148, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22654 = torch.constant.int 1
    %int0_22655 = torch.constant.int 0
    %int9223372036854775807_22656 = torch.constant.int 9223372036854775807
    %int1_22657 = torch.constant.int 1
    %25149 = torch.aten.slice.Tensor %25148, %int1_22654, %int0_22655, %int9223372036854775807_22656, %int1_22657 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25149, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22658 = torch.constant.int 0
    %25150 = torch.aten.unsqueeze %25149, %int0_22658 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25150, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22659 = torch.constant.int 2
    %25151 = torch.aten.unsqueeze %25150, %int2_22659 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25151, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22660 = torch.constant.int 3
    %int0_22661 = torch.constant.int 0
    %int9223372036854775807_22662 = torch.constant.int 9223372036854775807
    %int1_22663 = torch.constant.int 1
    %25152 = torch.aten.slice.Tensor %25151, %int3_22660, %int0_22661, %int9223372036854775807_22662, %int1_22663 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25152, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25153 = torch_c.to_builtin_tensor %25009 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22664 = arith.constant 1 : index
    %dim_22665 = tensor.dim %25153, %c1_22664 : tensor<4x?x4x128xf16>
    %25154 = flow.tensor.bitcast %25153 : tensor<4x?x4x128xf16>{%dim_22665} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22665}
    %25155 = torch_c.from_builtin_tensor %25154 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25156 = torch.aten.mul.Tensor %25155, %25152 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25157 = torch_c.to_builtin_tensor %25156 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22666 = arith.constant 1 : index
    %dim_22667 = tensor.dim %25157, %c1_22666 : tensor<4x?x4x64xcomplex<f32>>
    %25158 = flow.tensor.bitcast %25157 : tensor<4x?x4x64xcomplex<f32>>{%dim_22667} -> tensor<4x?x4x128xf32>{%dim_22667}
    %25159 = torch_c.from_builtin_tensor %25158 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22668 = torch.constant.int 5
    %25160 = torch.prims.convert_element_type %25159, %int5_22668 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22669 = torch.constant.int 1
    %25161 = torch.aten.size.int %24859, %int1_22669 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22670 = torch.constant.int 0
    %25162 = torch.aten.add.int %int0_22670, %25161 : !torch.int, !torch.int -> !torch.int
    %int0_22671 = torch.constant.int 0
    %int0_22672 = torch.constant.int 0
    %int1_22673 = torch.constant.int 1
    %25163 = torch.aten.slice.Tensor %25079, %int0_22671, %int0_22672, %25162, %int1_22673 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25163, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22674 = torch.constant.int 1
    %int0_22675 = torch.constant.int 0
    %int9223372036854775807_22676 = torch.constant.int 9223372036854775807
    %int1_22677 = torch.constant.int 1
    %25164 = torch.aten.slice.Tensor %25163, %int1_22674, %int0_22675, %int9223372036854775807_22676, %int1_22677 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25164, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22678 = torch.constant.int 0
    %25165 = torch.aten.unsqueeze %25164, %int0_22678 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25165, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22679 = torch.constant.int 2
    %25166 = torch.aten.unsqueeze %25165, %int2_22679 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25166, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22680 = torch.constant.int 3
    %int0_22681 = torch.constant.int 0
    %int9223372036854775807_22682 = torch.constant.int 9223372036854775807
    %int1_22683 = torch.constant.int 1
    %25167 = torch.aten.slice.Tensor %25166, %int3_22680, %int0_22681, %int9223372036854775807_22682, %int1_22683 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25167, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25168 = torch_c.to_builtin_tensor %25011 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22684 = arith.constant 1 : index
    %dim_22685 = tensor.dim %25168, %c1_22684 : tensor<4x?x4x128xf16>
    %25169 = flow.tensor.bitcast %25168 : tensor<4x?x4x128xf16>{%dim_22685} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22685}
    %25170 = torch_c.from_builtin_tensor %25169 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25171 = torch.aten.mul.Tensor %25170, %25167 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25172 = torch_c.to_builtin_tensor %25171 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22686 = arith.constant 1 : index
    %dim_22687 = tensor.dim %25172, %c1_22686 : tensor<4x?x4x64xcomplex<f32>>
    %25173 = flow.tensor.bitcast %25172 : tensor<4x?x4x64xcomplex<f32>>{%dim_22687} -> tensor<4x?x4x128xf32>{%dim_22687}
    %25174 = torch_c.from_builtin_tensor %25173 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22688 = torch.constant.int 5
    %25175 = torch.prims.convert_element_type %25174, %int5_22688 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22689 = torch.constant.int 1
    %25176 = torch.aten.size.int %24865, %int1_22689 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22690 = torch.constant.int 0
    %25177 = torch.aten.add.int %int0_22690, %25176 : !torch.int, !torch.int -> !torch.int
    %int0_22691 = torch.constant.int 0
    %int0_22692 = torch.constant.int 0
    %int1_22693 = torch.constant.int 1
    %25178 = torch.aten.slice.Tensor %25082, %int0_22691, %int0_22692, %25177, %int1_22693 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25178, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22694 = torch.constant.int 1
    %int0_22695 = torch.constant.int 0
    %int9223372036854775807_22696 = torch.constant.int 9223372036854775807
    %int1_22697 = torch.constant.int 1
    %25179 = torch.aten.slice.Tensor %25178, %int1_22694, %int0_22695, %int9223372036854775807_22696, %int1_22697 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25179, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22698 = torch.constant.int 0
    %25180 = torch.aten.unsqueeze %25179, %int0_22698 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25180, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22699 = torch.constant.int 2
    %25181 = torch.aten.unsqueeze %25180, %int2_22699 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25181, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22700 = torch.constant.int 3
    %int0_22701 = torch.constant.int 0
    %int9223372036854775807_22702 = torch.constant.int 9223372036854775807
    %int1_22703 = torch.constant.int 1
    %25182 = torch.aten.slice.Tensor %25181, %int3_22700, %int0_22701, %int9223372036854775807_22702, %int1_22703 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25182, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25183 = torch_c.to_builtin_tensor %25013 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22704 = arith.constant 1 : index
    %dim_22705 = tensor.dim %25183, %c1_22704 : tensor<4x?x4x128xf16>
    %25184 = flow.tensor.bitcast %25183 : tensor<4x?x4x128xf16>{%dim_22705} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22705}
    %25185 = torch_c.from_builtin_tensor %25184 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25186 = torch.aten.mul.Tensor %25185, %25182 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25187 = torch_c.to_builtin_tensor %25186 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22706 = arith.constant 1 : index
    %dim_22707 = tensor.dim %25187, %c1_22706 : tensor<4x?x4x64xcomplex<f32>>
    %25188 = flow.tensor.bitcast %25187 : tensor<4x?x4x64xcomplex<f32>>{%dim_22707} -> tensor<4x?x4x128xf32>{%dim_22707}
    %25189 = torch_c.from_builtin_tensor %25188 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22708 = torch.constant.int 5
    %25190 = torch.prims.convert_element_type %25189, %int5_22708 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_22709 = torch.constant.int 1
    %25191 = torch.aten.size.int %24871, %int1_22709 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_22710 = torch.constant.int 0
    %25192 = torch.aten.add.int %int0_22710, %25191 : !torch.int, !torch.int -> !torch.int
    %int0_22711 = torch.constant.int 0
    %int0_22712 = torch.constant.int 0
    %int1_22713 = torch.constant.int 1
    %25193 = torch.aten.slice.Tensor %25085, %int0_22711, %int0_22712, %25192, %int1_22713 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25193, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22714 = torch.constant.int 1
    %int0_22715 = torch.constant.int 0
    %int9223372036854775807_22716 = torch.constant.int 9223372036854775807
    %int1_22717 = torch.constant.int 1
    %25194 = torch.aten.slice.Tensor %25193, %int1_22714, %int0_22715, %int9223372036854775807_22716, %int1_22717 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25194, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22718 = torch.constant.int 0
    %25195 = torch.aten.unsqueeze %25194, %int0_22718 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25195, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22719 = torch.constant.int 2
    %25196 = torch.aten.unsqueeze %25195, %int2_22719 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25196, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22720 = torch.constant.int 3
    %int0_22721 = torch.constant.int 0
    %int9223372036854775807_22722 = torch.constant.int 9223372036854775807
    %int1_22723 = torch.constant.int 1
    %25197 = torch.aten.slice.Tensor %25196, %int3_22720, %int0_22721, %int9223372036854775807_22722, %int1_22723 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25197, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25198 = torch_c.to_builtin_tensor %25015 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_22724 = arith.constant 1 : index
    %dim_22725 = tensor.dim %25198, %c1_22724 : tensor<4x?x4x128xf16>
    %25199 = flow.tensor.bitcast %25198 : tensor<4x?x4x128xf16>{%dim_22725} -> tensor<4x?x4x64xcomplex<f16>>{%dim_22725}
    %25200 = torch_c.from_builtin_tensor %25199 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %25200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %25201 = torch.aten.mul.Tensor %25200, %25197 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %25201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %25202 = torch_c.to_builtin_tensor %25201 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_22726 = arith.constant 1 : index
    %dim_22727 = tensor.dim %25202, %c1_22726 : tensor<4x?x4x64xcomplex<f32>>
    %25203 = flow.tensor.bitcast %25202 : tensor<4x?x4x64xcomplex<f32>>{%dim_22727} -> tensor<4x?x4x128xf32>{%dim_22727}
    %25204 = torch_c.from_builtin_tensor %25203 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %25204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_22728 = torch.constant.int 5
    %25205 = torch.prims.convert_element_type %25204, %int5_22728 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_22729 = torch.constant.int 131072
    %none_22730 = torch.constant.none
    %none_22731 = torch.constant.none
    %cpu_22732 = torch.constant.device "cpu"
    %false_22733 = torch.constant.bool false
    %25206 = torch.aten.arange %int131072_22729, %none_22730, %none_22731, %cpu_22732, %false_22733 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_22734 = torch.constant.int 0
    %int128_22735 = torch.constant.int 128
    %int2_22736 = torch.constant.int 2
    %none_22737 = torch.constant.none
    %none_22738 = torch.constant.none
    %cpu_22739 = torch.constant.device "cpu"
    %false_22740 = torch.constant.bool false
    %25207 = torch.aten.arange.start_step %int0_22734, %int128_22735, %int2_22736, %none_22737, %none_22738, %cpu_22739, %false_22740 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_22741 = torch.constant.int 0
    %int0_22742 = torch.constant.int 0
    %int64_22743 = torch.constant.int 64
    %int1_22744 = torch.constant.int 1
    %25208 = torch.aten.slice.Tensor %25207, %int0_22741, %int0_22742, %int64_22743, %int1_22744 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_22745 = torch.constant.int 6
    %25209 = torch.prims.convert_element_type %25208, %int6_22745 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_22746 = torch.constant.int 128
    %25210 = torch.aten.div.Scalar %25209, %int128_22746 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_22747 = torch.constant.float 5.000000e+05
    %25211 = torch.aten.pow.Scalar %float5.000000e05_22747, %25210 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %25212 = torch.aten.reciprocal %25211 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_22748 = torch.constant.float 1.000000e+00
    %25213 = torch.aten.mul.Scalar %25212, %float1.000000e00_22748 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_22749 = torch.constant.int 131072
    %int1_22750 = torch.constant.int 1
    %25214 = torch.prim.ListConstruct %int131072_22749, %int1_22750 : (!torch.int, !torch.int) -> !torch.list<int>
    %25215 = torch.aten.view %25206, %25214 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %25216 = torch.aten.mul.Tensor %25215, %25213 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %25217 = torch.aten.cos %25216 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %25218 = torch.aten.sin %25216 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %25219 = torch.aten.complex %25217, %25218 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %25220 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25221 = flow.tensor.transfer %25220 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %25222 = torch_c.from_builtin_tensor %25221 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25223 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25224 = flow.tensor.transfer %25223 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %25225 = torch_c.from_builtin_tensor %25224 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25226 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25227 = flow.tensor.transfer %25226 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %25228 = torch_c.from_builtin_tensor %25227 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25229 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25230 = flow.tensor.transfer %25229 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %25231 = torch_c.from_builtin_tensor %25230 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25232 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25233 = flow.tensor.transfer %25232 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %25234 = torch_c.from_builtin_tensor %25233 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25235 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25236 = flow.tensor.transfer %25235 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %25237 = torch_c.from_builtin_tensor %25236 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25238 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25239 = flow.tensor.transfer %25238 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %25240 = torch_c.from_builtin_tensor %25239 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %25241 = torch_c.to_builtin_tensor %25219 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %25242 = flow.tensor.transfer %25241 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %25243 = torch_c.from_builtin_tensor %25242 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_22751 = torch.constant.int 1
    %25244 = torch.aten.size.int %24893, %int1_22751 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22752 = torch.constant.int 0
    %25245 = torch.aten.add.int %int0_22752, %25244 : !torch.int, !torch.int -> !torch.int
    %int0_22753 = torch.constant.int 0
    %int0_22754 = torch.constant.int 0
    %int1_22755 = torch.constant.int 1
    %25246 = torch.aten.slice.Tensor %25222, %int0_22753, %int0_22754, %25245, %int1_22755 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25246, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22756 = torch.constant.int 1
    %int0_22757 = torch.constant.int 0
    %int9223372036854775807_22758 = torch.constant.int 9223372036854775807
    %int1_22759 = torch.constant.int 1
    %25247 = torch.aten.slice.Tensor %25246, %int1_22756, %int0_22757, %int9223372036854775807_22758, %int1_22759 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25247, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22760 = torch.constant.int 0
    %25248 = torch.aten.unsqueeze %25247, %int0_22760 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25248, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22761 = torch.constant.int 2
    %25249 = torch.aten.unsqueeze %25248, %int2_22761 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25249, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22762 = torch.constant.int 3
    %int0_22763 = torch.constant.int 0
    %int9223372036854775807_22764 = torch.constant.int 9223372036854775807
    %int1_22765 = torch.constant.int 1
    %25250 = torch.aten.slice.Tensor %25249, %int3_22762, %int0_22763, %int9223372036854775807_22764, %int1_22765 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25250, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25251 = torch_c.to_builtin_tensor %25017 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22766 = arith.constant 1 : index
    %dim_22767 = tensor.dim %25251, %c1_22766 : tensor<4x?x1x128xf16>
    %25252 = flow.tensor.bitcast %25251 : tensor<4x?x1x128xf16>{%dim_22767} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22767}
    %25253 = torch_c.from_builtin_tensor %25252 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25254 = torch.aten.mul.Tensor %25253, %25250 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25255 = torch_c.to_builtin_tensor %25254 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22768 = arith.constant 1 : index
    %dim_22769 = tensor.dim %25255, %c1_22768 : tensor<4x?x1x64xcomplex<f32>>
    %25256 = flow.tensor.bitcast %25255 : tensor<4x?x1x64xcomplex<f32>>{%dim_22769} -> tensor<4x?x1x128xf32>{%dim_22769}
    %25257 = torch_c.from_builtin_tensor %25256 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22770 = torch.constant.int 5
    %25258 = torch.prims.convert_element_type %25257, %int5_22770 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22771 = torch.constant.int 1
    %25259 = torch.aten.size.int %24899, %int1_22771 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22772 = torch.constant.int 0
    %25260 = torch.aten.add.int %int0_22772, %25259 : !torch.int, !torch.int -> !torch.int
    %int0_22773 = torch.constant.int 0
    %int0_22774 = torch.constant.int 0
    %int1_22775 = torch.constant.int 1
    %25261 = torch.aten.slice.Tensor %25225, %int0_22773, %int0_22774, %25260, %int1_22775 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25261, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22776 = torch.constant.int 1
    %int0_22777 = torch.constant.int 0
    %int9223372036854775807_22778 = torch.constant.int 9223372036854775807
    %int1_22779 = torch.constant.int 1
    %25262 = torch.aten.slice.Tensor %25261, %int1_22776, %int0_22777, %int9223372036854775807_22778, %int1_22779 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25262, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22780 = torch.constant.int 0
    %25263 = torch.aten.unsqueeze %25262, %int0_22780 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25263, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22781 = torch.constant.int 2
    %25264 = torch.aten.unsqueeze %25263, %int2_22781 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25264, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22782 = torch.constant.int 3
    %int0_22783 = torch.constant.int 0
    %int9223372036854775807_22784 = torch.constant.int 9223372036854775807
    %int1_22785 = torch.constant.int 1
    %25265 = torch.aten.slice.Tensor %25264, %int3_22782, %int0_22783, %int9223372036854775807_22784, %int1_22785 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25265, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25266 = torch_c.to_builtin_tensor %25019 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22786 = arith.constant 1 : index
    %dim_22787 = tensor.dim %25266, %c1_22786 : tensor<4x?x1x128xf16>
    %25267 = flow.tensor.bitcast %25266 : tensor<4x?x1x128xf16>{%dim_22787} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22787}
    %25268 = torch_c.from_builtin_tensor %25267 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25269 = torch.aten.mul.Tensor %25268, %25265 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25270 = torch_c.to_builtin_tensor %25269 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22788 = arith.constant 1 : index
    %dim_22789 = tensor.dim %25270, %c1_22788 : tensor<4x?x1x64xcomplex<f32>>
    %25271 = flow.tensor.bitcast %25270 : tensor<4x?x1x64xcomplex<f32>>{%dim_22789} -> tensor<4x?x1x128xf32>{%dim_22789}
    %25272 = torch_c.from_builtin_tensor %25271 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22790 = torch.constant.int 5
    %25273 = torch.prims.convert_element_type %25272, %int5_22790 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22791 = torch.constant.int 1
    %25274 = torch.aten.size.int %24905, %int1_22791 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22792 = torch.constant.int 0
    %25275 = torch.aten.add.int %int0_22792, %25274 : !torch.int, !torch.int -> !torch.int
    %int0_22793 = torch.constant.int 0
    %int0_22794 = torch.constant.int 0
    %int1_22795 = torch.constant.int 1
    %25276 = torch.aten.slice.Tensor %25228, %int0_22793, %int0_22794, %25275, %int1_22795 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25276, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22796 = torch.constant.int 1
    %int0_22797 = torch.constant.int 0
    %int9223372036854775807_22798 = torch.constant.int 9223372036854775807
    %int1_22799 = torch.constant.int 1
    %25277 = torch.aten.slice.Tensor %25276, %int1_22796, %int0_22797, %int9223372036854775807_22798, %int1_22799 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25277, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22800 = torch.constant.int 0
    %25278 = torch.aten.unsqueeze %25277, %int0_22800 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25278, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22801 = torch.constant.int 2
    %25279 = torch.aten.unsqueeze %25278, %int2_22801 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25279, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22802 = torch.constant.int 3
    %int0_22803 = torch.constant.int 0
    %int9223372036854775807_22804 = torch.constant.int 9223372036854775807
    %int1_22805 = torch.constant.int 1
    %25280 = torch.aten.slice.Tensor %25279, %int3_22802, %int0_22803, %int9223372036854775807_22804, %int1_22805 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25280, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25281 = torch_c.to_builtin_tensor %25021 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22806 = arith.constant 1 : index
    %dim_22807 = tensor.dim %25281, %c1_22806 : tensor<4x?x1x128xf16>
    %25282 = flow.tensor.bitcast %25281 : tensor<4x?x1x128xf16>{%dim_22807} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22807}
    %25283 = torch_c.from_builtin_tensor %25282 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25284 = torch.aten.mul.Tensor %25283, %25280 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25285 = torch_c.to_builtin_tensor %25284 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22808 = arith.constant 1 : index
    %dim_22809 = tensor.dim %25285, %c1_22808 : tensor<4x?x1x64xcomplex<f32>>
    %25286 = flow.tensor.bitcast %25285 : tensor<4x?x1x64xcomplex<f32>>{%dim_22809} -> tensor<4x?x1x128xf32>{%dim_22809}
    %25287 = torch_c.from_builtin_tensor %25286 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22810 = torch.constant.int 5
    %25288 = torch.prims.convert_element_type %25287, %int5_22810 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22811 = torch.constant.int 1
    %25289 = torch.aten.size.int %24911, %int1_22811 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22812 = torch.constant.int 0
    %25290 = torch.aten.add.int %int0_22812, %25289 : !torch.int, !torch.int -> !torch.int
    %int0_22813 = torch.constant.int 0
    %int0_22814 = torch.constant.int 0
    %int1_22815 = torch.constant.int 1
    %25291 = torch.aten.slice.Tensor %25231, %int0_22813, %int0_22814, %25290, %int1_22815 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25291, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22816 = torch.constant.int 1
    %int0_22817 = torch.constant.int 0
    %int9223372036854775807_22818 = torch.constant.int 9223372036854775807
    %int1_22819 = torch.constant.int 1
    %25292 = torch.aten.slice.Tensor %25291, %int1_22816, %int0_22817, %int9223372036854775807_22818, %int1_22819 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25292, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22820 = torch.constant.int 0
    %25293 = torch.aten.unsqueeze %25292, %int0_22820 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25293, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22821 = torch.constant.int 2
    %25294 = torch.aten.unsqueeze %25293, %int2_22821 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25294, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22822 = torch.constant.int 3
    %int0_22823 = torch.constant.int 0
    %int9223372036854775807_22824 = torch.constant.int 9223372036854775807
    %int1_22825 = torch.constant.int 1
    %25295 = torch.aten.slice.Tensor %25294, %int3_22822, %int0_22823, %int9223372036854775807_22824, %int1_22825 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25295, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25296 = torch_c.to_builtin_tensor %25023 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22826 = arith.constant 1 : index
    %dim_22827 = tensor.dim %25296, %c1_22826 : tensor<4x?x1x128xf16>
    %25297 = flow.tensor.bitcast %25296 : tensor<4x?x1x128xf16>{%dim_22827} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22827}
    %25298 = torch_c.from_builtin_tensor %25297 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25299 = torch.aten.mul.Tensor %25298, %25295 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25300 = torch_c.to_builtin_tensor %25299 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22828 = arith.constant 1 : index
    %dim_22829 = tensor.dim %25300, %c1_22828 : tensor<4x?x1x64xcomplex<f32>>
    %25301 = flow.tensor.bitcast %25300 : tensor<4x?x1x64xcomplex<f32>>{%dim_22829} -> tensor<4x?x1x128xf32>{%dim_22829}
    %25302 = torch_c.from_builtin_tensor %25301 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22830 = torch.constant.int 5
    %25303 = torch.prims.convert_element_type %25302, %int5_22830 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22831 = torch.constant.int 1
    %25304 = torch.aten.size.int %24917, %int1_22831 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22832 = torch.constant.int 0
    %25305 = torch.aten.add.int %int0_22832, %25304 : !torch.int, !torch.int -> !torch.int
    %int0_22833 = torch.constant.int 0
    %int0_22834 = torch.constant.int 0
    %int1_22835 = torch.constant.int 1
    %25306 = torch.aten.slice.Tensor %25234, %int0_22833, %int0_22834, %25305, %int1_22835 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25306, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22836 = torch.constant.int 1
    %int0_22837 = torch.constant.int 0
    %int9223372036854775807_22838 = torch.constant.int 9223372036854775807
    %int1_22839 = torch.constant.int 1
    %25307 = torch.aten.slice.Tensor %25306, %int1_22836, %int0_22837, %int9223372036854775807_22838, %int1_22839 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25307, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22840 = torch.constant.int 0
    %25308 = torch.aten.unsqueeze %25307, %int0_22840 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25308, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22841 = torch.constant.int 2
    %25309 = torch.aten.unsqueeze %25308, %int2_22841 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25309, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22842 = torch.constant.int 3
    %int0_22843 = torch.constant.int 0
    %int9223372036854775807_22844 = torch.constant.int 9223372036854775807
    %int1_22845 = torch.constant.int 1
    %25310 = torch.aten.slice.Tensor %25309, %int3_22842, %int0_22843, %int9223372036854775807_22844, %int1_22845 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25310, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25311 = torch_c.to_builtin_tensor %25025 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22846 = arith.constant 1 : index
    %dim_22847 = tensor.dim %25311, %c1_22846 : tensor<4x?x1x128xf16>
    %25312 = flow.tensor.bitcast %25311 : tensor<4x?x1x128xf16>{%dim_22847} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22847}
    %25313 = torch_c.from_builtin_tensor %25312 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25314 = torch.aten.mul.Tensor %25313, %25310 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25315 = torch_c.to_builtin_tensor %25314 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22848 = arith.constant 1 : index
    %dim_22849 = tensor.dim %25315, %c1_22848 : tensor<4x?x1x64xcomplex<f32>>
    %25316 = flow.tensor.bitcast %25315 : tensor<4x?x1x64xcomplex<f32>>{%dim_22849} -> tensor<4x?x1x128xf32>{%dim_22849}
    %25317 = torch_c.from_builtin_tensor %25316 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22850 = torch.constant.int 5
    %25318 = torch.prims.convert_element_type %25317, %int5_22850 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22851 = torch.constant.int 1
    %25319 = torch.aten.size.int %24923, %int1_22851 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22852 = torch.constant.int 0
    %25320 = torch.aten.add.int %int0_22852, %25319 : !torch.int, !torch.int -> !torch.int
    %int0_22853 = torch.constant.int 0
    %int0_22854 = torch.constant.int 0
    %int1_22855 = torch.constant.int 1
    %25321 = torch.aten.slice.Tensor %25237, %int0_22853, %int0_22854, %25320, %int1_22855 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25321, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22856 = torch.constant.int 1
    %int0_22857 = torch.constant.int 0
    %int9223372036854775807_22858 = torch.constant.int 9223372036854775807
    %int1_22859 = torch.constant.int 1
    %25322 = torch.aten.slice.Tensor %25321, %int1_22856, %int0_22857, %int9223372036854775807_22858, %int1_22859 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25322, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22860 = torch.constant.int 0
    %25323 = torch.aten.unsqueeze %25322, %int0_22860 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25323, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22861 = torch.constant.int 2
    %25324 = torch.aten.unsqueeze %25323, %int2_22861 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25324, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22862 = torch.constant.int 3
    %int0_22863 = torch.constant.int 0
    %int9223372036854775807_22864 = torch.constant.int 9223372036854775807
    %int1_22865 = torch.constant.int 1
    %25325 = torch.aten.slice.Tensor %25324, %int3_22862, %int0_22863, %int9223372036854775807_22864, %int1_22865 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25325, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25326 = torch_c.to_builtin_tensor %25027 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22866 = arith.constant 1 : index
    %dim_22867 = tensor.dim %25326, %c1_22866 : tensor<4x?x1x128xf16>
    %25327 = flow.tensor.bitcast %25326 : tensor<4x?x1x128xf16>{%dim_22867} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22867}
    %25328 = torch_c.from_builtin_tensor %25327 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25329 = torch.aten.mul.Tensor %25328, %25325 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25330 = torch_c.to_builtin_tensor %25329 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22868 = arith.constant 1 : index
    %dim_22869 = tensor.dim %25330, %c1_22868 : tensor<4x?x1x64xcomplex<f32>>
    %25331 = flow.tensor.bitcast %25330 : tensor<4x?x1x64xcomplex<f32>>{%dim_22869} -> tensor<4x?x1x128xf32>{%dim_22869}
    %25332 = torch_c.from_builtin_tensor %25331 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22870 = torch.constant.int 5
    %25333 = torch.prims.convert_element_type %25332, %int5_22870 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22871 = torch.constant.int 1
    %25334 = torch.aten.size.int %24929, %int1_22871 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22872 = torch.constant.int 0
    %25335 = torch.aten.add.int %int0_22872, %25334 : !torch.int, !torch.int -> !torch.int
    %int0_22873 = torch.constant.int 0
    %int0_22874 = torch.constant.int 0
    %int1_22875 = torch.constant.int 1
    %25336 = torch.aten.slice.Tensor %25240, %int0_22873, %int0_22874, %25335, %int1_22875 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25336, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22876 = torch.constant.int 1
    %int0_22877 = torch.constant.int 0
    %int9223372036854775807_22878 = torch.constant.int 9223372036854775807
    %int1_22879 = torch.constant.int 1
    %25337 = torch.aten.slice.Tensor %25336, %int1_22876, %int0_22877, %int9223372036854775807_22878, %int1_22879 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25337, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22880 = torch.constant.int 0
    %25338 = torch.aten.unsqueeze %25337, %int0_22880 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25338, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22881 = torch.constant.int 2
    %25339 = torch.aten.unsqueeze %25338, %int2_22881 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25339, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22882 = torch.constant.int 3
    %int0_22883 = torch.constant.int 0
    %int9223372036854775807_22884 = torch.constant.int 9223372036854775807
    %int1_22885 = torch.constant.int 1
    %25340 = torch.aten.slice.Tensor %25339, %int3_22882, %int0_22883, %int9223372036854775807_22884, %int1_22885 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25340, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25341 = torch_c.to_builtin_tensor %25029 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22886 = arith.constant 1 : index
    %dim_22887 = tensor.dim %25341, %c1_22886 : tensor<4x?x1x128xf16>
    %25342 = flow.tensor.bitcast %25341 : tensor<4x?x1x128xf16>{%dim_22887} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22887}
    %25343 = torch_c.from_builtin_tensor %25342 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25344 = torch.aten.mul.Tensor %25343, %25340 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25345 = torch_c.to_builtin_tensor %25344 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22888 = arith.constant 1 : index
    %dim_22889 = tensor.dim %25345, %c1_22888 : tensor<4x?x1x64xcomplex<f32>>
    %25346 = flow.tensor.bitcast %25345 : tensor<4x?x1x64xcomplex<f32>>{%dim_22889} -> tensor<4x?x1x128xf32>{%dim_22889}
    %25347 = torch_c.from_builtin_tensor %25346 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22890 = torch.constant.int 5
    %25348 = torch.prims.convert_element_type %25347, %int5_22890 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_22891 = torch.constant.int 1
    %25349 = torch.aten.size.int %24935, %int1_22891 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_22892 = torch.constant.int 0
    %25350 = torch.aten.add.int %int0_22892, %25349 : !torch.int, !torch.int -> !torch.int
    %int0_22893 = torch.constant.int 0
    %int0_22894 = torch.constant.int 0
    %int1_22895 = torch.constant.int 1
    %25351 = torch.aten.slice.Tensor %25243, %int0_22893, %int0_22894, %25350, %int1_22895 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25351, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_22896 = torch.constant.int 1
    %int0_22897 = torch.constant.int 0
    %int9223372036854775807_22898 = torch.constant.int 9223372036854775807
    %int1_22899 = torch.constant.int 1
    %25352 = torch.aten.slice.Tensor %25351, %int1_22896, %int0_22897, %int9223372036854775807_22898, %int1_22899 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %25352, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_22900 = torch.constant.int 0
    %25353 = torch.aten.unsqueeze %25352, %int0_22900 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %25353, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_22901 = torch.constant.int 2
    %25354 = torch.aten.unsqueeze %25353, %int2_22901 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25354, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_22902 = torch.constant.int 3
    %int0_22903 = torch.constant.int 0
    %int9223372036854775807_22904 = torch.constant.int 9223372036854775807
    %int1_22905 = torch.constant.int 1
    %25355 = torch.aten.slice.Tensor %25354, %int3_22902, %int0_22903, %int9223372036854775807_22904, %int1_22905 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25355, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %25356 = torch_c.to_builtin_tensor %25031 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_22906 = arith.constant 1 : index
    %dim_22907 = tensor.dim %25356, %c1_22906 : tensor<4x?x1x128xf16>
    %25357 = flow.tensor.bitcast %25356 : tensor<4x?x1x128xf16>{%dim_22907} -> tensor<4x?x1x64xcomplex<f16>>{%dim_22907}
    %25358 = torch_c.from_builtin_tensor %25357 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %25358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %25359 = torch.aten.mul.Tensor %25358, %25355 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %25359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %25360 = torch_c.to_builtin_tensor %25359 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_22908 = arith.constant 1 : index
    %dim_22909 = tensor.dim %25360, %c1_22908 : tensor<4x?x1x64xcomplex<f32>>
    %25361 = flow.tensor.bitcast %25360 : tensor<4x?x1x64xcomplex<f32>>{%dim_22909} -> tensor<4x?x1x128xf32>{%dim_22909}
    %25362 = torch_c.from_builtin_tensor %25361 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %25362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_22910 = torch.constant.int 5
    %25363 = torch.prims.convert_element_type %25362, %int5_22910 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %25363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_22911 = torch.constant.int 64
    %25364 = torch.aten.mul.Scalar %2364, %int64_22911 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25364, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22912 = torch.constant.int 64
    %25365 = torch.aten.mul.Scalar %2367, %int64_22912 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25365, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22913 = torch.constant.int 64
    %25366 = torch.aten.mul.Scalar %2370, %int64_22913 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25366, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22914 = torch.constant.int 64
    %25367 = torch.aten.mul.Scalar %2373, %int64_22914 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25367, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22915 = torch.constant.int 64
    %25368 = torch.aten.mul.Scalar %2376, %int64_22915 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25368, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22916 = torch.constant.int 64
    %25369 = torch.aten.mul.Scalar %2379, %int64_22916 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25369, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22917 = torch.constant.int 64
    %25370 = torch.aten.mul.Scalar %2382, %int64_22917 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25370, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_22918 = torch.constant.int 64
    %25371 = torch.aten.mul.Scalar %2385, %int64_22918 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25371, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24 = torch.constant.int 24
    %int1_22919 = torch.constant.int 1
    %25372 = torch.aten.add.Scalar %25364, %int24, %int1_22919 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25372, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22920 = torch.constant.int 24
    %int1_22921 = torch.constant.int 1
    %25373 = torch.aten.add.Scalar %25365, %int24_22920, %int1_22921 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25373, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22922 = torch.constant.int 24
    %int1_22923 = torch.constant.int 1
    %25374 = torch.aten.add.Scalar %25366, %int24_22922, %int1_22923 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25374, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22924 = torch.constant.int 24
    %int1_22925 = torch.constant.int 1
    %25375 = torch.aten.add.Scalar %25367, %int24_22924, %int1_22925 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25375, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22926 = torch.constant.int 24
    %int1_22927 = torch.constant.int 1
    %25376 = torch.aten.add.Scalar %25368, %int24_22926, %int1_22927 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25376, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22928 = torch.constant.int 24
    %int1_22929 = torch.constant.int 1
    %25377 = torch.aten.add.Scalar %25369, %int24_22928, %int1_22929 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25377, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22930 = torch.constant.int 24
    %int1_22931 = torch.constant.int 1
    %25378 = torch.aten.add.Scalar %25370, %int24_22930, %int1_22931 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25378, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int24_22932 = torch.constant.int 24
    %int1_22933 = torch.constant.int 1
    %25379 = torch.aten.add.Scalar %25371, %int24_22932, %int1_22933 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25379, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_22934 = torch.constant.int 4
    %int16_22935 = torch.constant.int 16
    %int1_22936 = torch.constant.int 1
    %int128_22937 = torch.constant.int 128
    %25380 = torch.prim.ListConstruct %int4_22934, %3095, %int16_22935, %int1_22936, %int128_22937 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25381 = torch.aten.view %25258, %25380 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25381, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22938 = torch.constant.int 4
    %int16_22939 = torch.constant.int 16
    %int1_22940 = torch.constant.int 1
    %int128_22941 = torch.constant.int 128
    %25382 = torch.prim.ListConstruct %int4_22938, %3095, %int16_22939, %int1_22940, %int128_22941 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25383 = torch.aten.view %25273, %25382 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25383, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22942 = torch.constant.int 4
    %int16_22943 = torch.constant.int 16
    %int1_22944 = torch.constant.int 1
    %int128_22945 = torch.constant.int 128
    %25384 = torch.prim.ListConstruct %int4_22942, %3095, %int16_22943, %int1_22944, %int128_22945 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25385 = torch.aten.view %25288, %25384 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25385, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22946 = torch.constant.int 4
    %int16_22947 = torch.constant.int 16
    %int1_22948 = torch.constant.int 1
    %int128_22949 = torch.constant.int 128
    %25386 = torch.prim.ListConstruct %int4_22946, %3095, %int16_22947, %int1_22948, %int128_22949 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25387 = torch.aten.view %25303, %25386 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25387, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22950 = torch.constant.int 4
    %int16_22951 = torch.constant.int 16
    %int1_22952 = torch.constant.int 1
    %int128_22953 = torch.constant.int 128
    %25388 = torch.prim.ListConstruct %int4_22950, %3095, %int16_22951, %int1_22952, %int128_22953 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25389 = torch.aten.view %25318, %25388 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25389, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22954 = torch.constant.int 4
    %int16_22955 = torch.constant.int 16
    %int1_22956 = torch.constant.int 1
    %int128_22957 = torch.constant.int 128
    %25390 = torch.prim.ListConstruct %int4_22954, %3095, %int16_22955, %int1_22956, %int128_22957 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25391 = torch.aten.view %25333, %25390 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25391, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22958 = torch.constant.int 4
    %int16_22959 = torch.constant.int 16
    %int1_22960 = torch.constant.int 1
    %int128_22961 = torch.constant.int 128
    %25392 = torch.prim.ListConstruct %int4_22958, %3095, %int16_22959, %int1_22960, %int128_22961 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25393 = torch.aten.view %25348, %25392 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25393, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22962 = torch.constant.int 4
    %int16_22963 = torch.constant.int 16
    %int1_22964 = torch.constant.int 1
    %int128_22965 = torch.constant.int 128
    %25394 = torch.prim.ListConstruct %int4_22962, %3095, %int16_22963, %int1_22964, %int128_22965 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25395 = torch.aten.view %25363, %25394 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25395, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_22966 = torch.constant.int 4
    %25396 = torch.aten.mul.int %int4_22966, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22967 = torch.constant.int 16
    %int1_22968 = torch.constant.int 1
    %int128_22969 = torch.constant.int 128
    %25397 = torch.prim.ListConstruct %25396, %int16_22967, %int1_22968, %int128_22969 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25398 = torch.aten.view %25381, %25397 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25398, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22970 = torch.constant.int 4
    %25399 = torch.aten.mul.int %int4_22970, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22971 = torch.constant.int 16
    %int1_22972 = torch.constant.int 1
    %int128_22973 = torch.constant.int 128
    %25400 = torch.prim.ListConstruct %25399, %int16_22971, %int1_22972, %int128_22973 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25401 = torch.aten.view %25383, %25400 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25401, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22974 = torch.constant.int 4
    %25402 = torch.aten.mul.int %int4_22974, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22975 = torch.constant.int 16
    %int1_22976 = torch.constant.int 1
    %int128_22977 = torch.constant.int 128
    %25403 = torch.prim.ListConstruct %25402, %int16_22975, %int1_22976, %int128_22977 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25404 = torch.aten.view %25385, %25403 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25404, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22978 = torch.constant.int 4
    %25405 = torch.aten.mul.int %int4_22978, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22979 = torch.constant.int 16
    %int1_22980 = torch.constant.int 1
    %int128_22981 = torch.constant.int 128
    %25406 = torch.prim.ListConstruct %25405, %int16_22979, %int1_22980, %int128_22981 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25407 = torch.aten.view %25387, %25406 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25407, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22982 = torch.constant.int 4
    %25408 = torch.aten.mul.int %int4_22982, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22983 = torch.constant.int 16
    %int1_22984 = torch.constant.int 1
    %int128_22985 = torch.constant.int 128
    %25409 = torch.prim.ListConstruct %25408, %int16_22983, %int1_22984, %int128_22985 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25410 = torch.aten.view %25389, %25409 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25410, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22986 = torch.constant.int 4
    %25411 = torch.aten.mul.int %int4_22986, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22987 = torch.constant.int 16
    %int1_22988 = torch.constant.int 1
    %int128_22989 = torch.constant.int 128
    %25412 = torch.prim.ListConstruct %25411, %int16_22987, %int1_22988, %int128_22989 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25413 = torch.aten.view %25391, %25412 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25413, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22990 = torch.constant.int 4
    %25414 = torch.aten.mul.int %int4_22990, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22991 = torch.constant.int 16
    %int1_22992 = torch.constant.int 1
    %int128_22993 = torch.constant.int 128
    %25415 = torch.prim.ListConstruct %25414, %int16_22991, %int1_22992, %int128_22993 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25416 = torch.aten.view %25393, %25415 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25416, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22994 = torch.constant.int 4
    %25417 = torch.aten.mul.int %int4_22994, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_22995 = torch.constant.int 16
    %int1_22996 = torch.constant.int 1
    %int128_22997 = torch.constant.int 128
    %25418 = torch.prim.ListConstruct %25417, %int16_22995, %int1_22996, %int128_22997 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25419 = torch.aten.view %25395, %25418 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25419, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_22998 = torch.constant.int 4
    %25420 = torch.aten.mul.int %int4_22998, %3095 : !torch.int, !torch.int -> !torch.int
    %25421 = torch.prim.ListConstruct %25420 : (!torch.int) -> !torch.list<int>
    %25422 = torch.aten.view %25372, %25421 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25422, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_22999 = torch.constant.int 4
    %25423 = torch.aten.mul.int %int4_22999, %3095 : !torch.int, !torch.int -> !torch.int
    %25424 = torch.prim.ListConstruct %25423 : (!torch.int) -> !torch.list<int>
    %25425 = torch.aten.view %25373, %25424 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25425, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23000 = torch.constant.int 4
    %25426 = torch.aten.mul.int %int4_23000, %3095 : !torch.int, !torch.int -> !torch.int
    %25427 = torch.prim.ListConstruct %25426 : (!torch.int) -> !torch.list<int>
    %25428 = torch.aten.view %25374, %25427 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25428, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23001 = torch.constant.int 4
    %25429 = torch.aten.mul.int %int4_23001, %3095 : !torch.int, !torch.int -> !torch.int
    %25430 = torch.prim.ListConstruct %25429 : (!torch.int) -> !torch.list<int>
    %25431 = torch.aten.view %25375, %25430 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25431, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23002 = torch.constant.int 4
    %25432 = torch.aten.mul.int %int4_23002, %3095 : !torch.int, !torch.int -> !torch.int
    %25433 = torch.prim.ListConstruct %25432 : (!torch.int) -> !torch.list<int>
    %25434 = torch.aten.view %25376, %25433 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25434, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23003 = torch.constant.int 4
    %25435 = torch.aten.mul.int %int4_23003, %3095 : !torch.int, !torch.int -> !torch.int
    %25436 = torch.prim.ListConstruct %25435 : (!torch.int) -> !torch.list<int>
    %25437 = torch.aten.view %25377, %25436 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25437, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23004 = torch.constant.int 4
    %25438 = torch.aten.mul.int %int4_23004, %3095 : !torch.int, !torch.int -> !torch.int
    %25439 = torch.prim.ListConstruct %25438 : (!torch.int) -> !torch.list<int>
    %25440 = torch.aten.view %25378, %25439 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25440, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23005 = torch.constant.int 4
    %25441 = torch.aten.mul.int %int4_23005, %3095 : !torch.int, !torch.int -> !torch.int
    %25442 = torch.prim.ListConstruct %25441 : (!torch.int) -> !torch.list<int>
    %25443 = torch.aten.view %25379, %25442 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25443, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23006 = torch.constant.int 4
    %int16_23007 = torch.constant.int 16
    %int1_23008 = torch.constant.int 1
    %int128_23009 = torch.constant.int 128
    %25444 = torch.prim.ListConstruct %int4_23006, %3095, %int16_23007, %int1_23008, %int128_23009 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25445 = torch.aten.view %25033, %25444 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25445, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23010 = torch.constant.int 4
    %int16_23011 = torch.constant.int 16
    %int1_23012 = torch.constant.int 1
    %int128_23013 = torch.constant.int 128
    %25446 = torch.prim.ListConstruct %int4_23010, %3095, %int16_23011, %int1_23012, %int128_23013 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25447 = torch.aten.view %25035, %25446 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25447, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23014 = torch.constant.int 4
    %int16_23015 = torch.constant.int 16
    %int1_23016 = torch.constant.int 1
    %int128_23017 = torch.constant.int 128
    %25448 = torch.prim.ListConstruct %int4_23014, %3095, %int16_23015, %int1_23016, %int128_23017 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25449 = torch.aten.view %25037, %25448 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25449, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23018 = torch.constant.int 4
    %int16_23019 = torch.constant.int 16
    %int1_23020 = torch.constant.int 1
    %int128_23021 = torch.constant.int 128
    %25450 = torch.prim.ListConstruct %int4_23018, %3095, %int16_23019, %int1_23020, %int128_23021 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25451 = torch.aten.view %25039, %25450 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25451, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23022 = torch.constant.int 4
    %int16_23023 = torch.constant.int 16
    %int1_23024 = torch.constant.int 1
    %int128_23025 = torch.constant.int 128
    %25452 = torch.prim.ListConstruct %int4_23022, %3095, %int16_23023, %int1_23024, %int128_23025 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25453 = torch.aten.view %25041, %25452 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25453, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23026 = torch.constant.int 4
    %int16_23027 = torch.constant.int 16
    %int1_23028 = torch.constant.int 1
    %int128_23029 = torch.constant.int 128
    %25454 = torch.prim.ListConstruct %int4_23026, %3095, %int16_23027, %int1_23028, %int128_23029 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25455 = torch.aten.view %25043, %25454 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25455, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23030 = torch.constant.int 4
    %int16_23031 = torch.constant.int 16
    %int1_23032 = torch.constant.int 1
    %int128_23033 = torch.constant.int 128
    %25456 = torch.prim.ListConstruct %int4_23030, %3095, %int16_23031, %int1_23032, %int128_23033 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25457 = torch.aten.view %25045, %25456 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25457, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23034 = torch.constant.int 4
    %int16_23035 = torch.constant.int 16
    %int1_23036 = torch.constant.int 1
    %int128_23037 = torch.constant.int 128
    %25458 = torch.prim.ListConstruct %int4_23034, %3095, %int16_23035, %int1_23036, %int128_23037 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25459 = torch.aten.view %25047, %25458 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %25459, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_23038 = torch.constant.int 4
    %25460 = torch.aten.mul.int %int4_23038, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23039 = torch.constant.int 16
    %int1_23040 = torch.constant.int 1
    %int128_23041 = torch.constant.int 128
    %25461 = torch.prim.ListConstruct %25460, %int16_23039, %int1_23040, %int128_23041 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25462 = torch.aten.view %25445, %25461 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25462, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23042 = torch.constant.int 4
    %25463 = torch.aten.mul.int %int4_23042, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23043 = torch.constant.int 16
    %int1_23044 = torch.constant.int 1
    %int128_23045 = torch.constant.int 128
    %25464 = torch.prim.ListConstruct %25463, %int16_23043, %int1_23044, %int128_23045 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25465 = torch.aten.view %25447, %25464 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25465, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23046 = torch.constant.int 4
    %25466 = torch.aten.mul.int %int4_23046, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23047 = torch.constant.int 16
    %int1_23048 = torch.constant.int 1
    %int128_23049 = torch.constant.int 128
    %25467 = torch.prim.ListConstruct %25466, %int16_23047, %int1_23048, %int128_23049 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25468 = torch.aten.view %25449, %25467 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25468, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23050 = torch.constant.int 4
    %25469 = torch.aten.mul.int %int4_23050, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23051 = torch.constant.int 16
    %int1_23052 = torch.constant.int 1
    %int128_23053 = torch.constant.int 128
    %25470 = torch.prim.ListConstruct %25469, %int16_23051, %int1_23052, %int128_23053 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25471 = torch.aten.view %25451, %25470 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25471, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23054 = torch.constant.int 4
    %25472 = torch.aten.mul.int %int4_23054, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23055 = torch.constant.int 16
    %int1_23056 = torch.constant.int 1
    %int128_23057 = torch.constant.int 128
    %25473 = torch.prim.ListConstruct %25472, %int16_23055, %int1_23056, %int128_23057 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25474 = torch.aten.view %25453, %25473 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25474, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23058 = torch.constant.int 4
    %25475 = torch.aten.mul.int %int4_23058, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23059 = torch.constant.int 16
    %int1_23060 = torch.constant.int 1
    %int128_23061 = torch.constant.int 128
    %25476 = torch.prim.ListConstruct %25475, %int16_23059, %int1_23060, %int128_23061 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25477 = torch.aten.view %25455, %25476 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25477, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23062 = torch.constant.int 4
    %25478 = torch.aten.mul.int %int4_23062, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23063 = torch.constant.int 16
    %int1_23064 = torch.constant.int 1
    %int128_23065 = torch.constant.int 128
    %25479 = torch.prim.ListConstruct %25478, %int16_23063, %int1_23064, %int128_23065 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25480 = torch.aten.view %25457, %25479 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25480, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_23066 = torch.constant.int 4
    %25481 = torch.aten.mul.int %int4_23066, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_23067 = torch.constant.int 16
    %int1_23068 = torch.constant.int 1
    %int128_23069 = torch.constant.int 128
    %25482 = torch.prim.ListConstruct %25481, %int16_23067, %int1_23068, %int128_23069 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25483 = torch.aten.view %25459, %25482 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25483, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_23070 = torch.constant.int 1
    %int1_23071 = torch.constant.int 1
    %25484 = torch.aten.add.Scalar %25372, %int1_23070, %int1_23071 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25484, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23072 = torch.constant.int 1
    %int1_23073 = torch.constant.int 1
    %25485 = torch.aten.add.Scalar %25373, %int1_23072, %int1_23073 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25485, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23074 = torch.constant.int 1
    %int1_23075 = torch.constant.int 1
    %25486 = torch.aten.add.Scalar %25374, %int1_23074, %int1_23075 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25486, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23076 = torch.constant.int 1
    %int1_23077 = torch.constant.int 1
    %25487 = torch.aten.add.Scalar %25375, %int1_23076, %int1_23077 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25487, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23078 = torch.constant.int 1
    %int1_23079 = torch.constant.int 1
    %25488 = torch.aten.add.Scalar %25376, %int1_23078, %int1_23079 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25488, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23080 = torch.constant.int 1
    %int1_23081 = torch.constant.int 1
    %25489 = torch.aten.add.Scalar %25377, %int1_23080, %int1_23081 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25489, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23082 = torch.constant.int 1
    %int1_23083 = torch.constant.int 1
    %25490 = torch.aten.add.Scalar %25378, %int1_23082, %int1_23083 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25490, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_23084 = torch.constant.int 1
    %int1_23085 = torch.constant.int 1
    %25491 = torch.aten.add.Scalar %25379, %int1_23084, %int1_23085 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %25491, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_23086 = torch.constant.int 4
    %25492 = torch.aten.mul.int %int4_23086, %3095 : !torch.int, !torch.int -> !torch.int
    %25493 = torch.prim.ListConstruct %25492 : (!torch.int) -> !torch.list<int>
    %25494 = torch.aten.view %25484, %25493 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25494, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23087 = torch.constant.int 4
    %25495 = torch.aten.mul.int %int4_23087, %3095 : !torch.int, !torch.int -> !torch.int
    %25496 = torch.prim.ListConstruct %25495 : (!torch.int) -> !torch.list<int>
    %25497 = torch.aten.view %25485, %25496 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25497, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23088 = torch.constant.int 4
    %25498 = torch.aten.mul.int %int4_23088, %3095 : !torch.int, !torch.int -> !torch.int
    %25499 = torch.prim.ListConstruct %25498 : (!torch.int) -> !torch.list<int>
    %25500 = torch.aten.view %25486, %25499 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25500, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23089 = torch.constant.int 4
    %25501 = torch.aten.mul.int %int4_23089, %3095 : !torch.int, !torch.int -> !torch.int
    %25502 = torch.prim.ListConstruct %25501 : (!torch.int) -> !torch.list<int>
    %25503 = torch.aten.view %25487, %25502 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25503, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23090 = torch.constant.int 4
    %25504 = torch.aten.mul.int %int4_23090, %3095 : !torch.int, !torch.int -> !torch.int
    %25505 = torch.prim.ListConstruct %25504 : (!torch.int) -> !torch.list<int>
    %25506 = torch.aten.view %25488, %25505 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25506, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23091 = torch.constant.int 4
    %25507 = torch.aten.mul.int %int4_23091, %3095 : !torch.int, !torch.int -> !torch.int
    %25508 = torch.prim.ListConstruct %25507 : (!torch.int) -> !torch.list<int>
    %25509 = torch.aten.view %25489, %25508 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25509, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23092 = torch.constant.int 4
    %25510 = torch.aten.mul.int %int4_23092, %3095 : !torch.int, !torch.int -> !torch.int
    %25511 = torch.prim.ListConstruct %25510 : (!torch.int) -> !torch.list<int>
    %25512 = torch.aten.view %25490, %25511 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25512, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_23093 = torch.constant.int 4
    %25513 = torch.aten.mul.int %int4_23093, %3095 : !torch.int, !torch.int -> !torch.int
    %25514 = torch.prim.ListConstruct %25513 : (!torch.int) -> !torch.list<int>
    %25515 = torch.aten.view %25491, %25514 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25515, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %25516 = torch.prim.ListConstruct %25422, %25494 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23094 = torch.constant.int 0
    %25517 = torch.aten.cat %25516, %int0_23094 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25517, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25518 = torch.prim.ListConstruct %25425, %25497 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23095 = torch.constant.int 0
    %25519 = torch.aten.cat %25518, %int0_23095 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25519, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25520 = torch.prim.ListConstruct %25428, %25500 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23096 = torch.constant.int 0
    %25521 = torch.aten.cat %25520, %int0_23096 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25521, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25522 = torch.prim.ListConstruct %25431, %25503 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23097 = torch.constant.int 0
    %25523 = torch.aten.cat %25522, %int0_23097 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25523, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25524 = torch.prim.ListConstruct %25434, %25506 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23098 = torch.constant.int 0
    %25525 = torch.aten.cat %25524, %int0_23098 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25525, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25526 = torch.prim.ListConstruct %25437, %25509 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23099 = torch.constant.int 0
    %25527 = torch.aten.cat %25526, %int0_23099 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25527, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25528 = torch.prim.ListConstruct %25440, %25512 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23100 = torch.constant.int 0
    %25529 = torch.aten.cat %25528, %int0_23100 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25529, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25530 = torch.prim.ListConstruct %25443, %25515 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_23101 = torch.constant.int 0
    %25531 = torch.aten.cat %25530, %int0_23101 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %25531, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %25532 = torch.prim.ListConstruct %25398, %25462 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23102 = torch.constant.int 0
    %25533 = torch.aten.cat %25532, %int0_23102 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25533, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25534 = torch.prim.ListConstruct %25401, %25465 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23103 = torch.constant.int 0
    %25535 = torch.aten.cat %25534, %int0_23103 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25535, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25536 = torch.prim.ListConstruct %25404, %25468 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23104 = torch.constant.int 0
    %25537 = torch.aten.cat %25536, %int0_23104 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25537, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25538 = torch.prim.ListConstruct %25407, %25471 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23105 = torch.constant.int 0
    %25539 = torch.aten.cat %25538, %int0_23105 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25539, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25540 = torch.prim.ListConstruct %25410, %25474 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23106 = torch.constant.int 0
    %25541 = torch.aten.cat %25540, %int0_23106 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25541, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25542 = torch.prim.ListConstruct %25413, %25477 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23107 = torch.constant.int 0
    %25543 = torch.aten.cat %25542, %int0_23107 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25543, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25544 = torch.prim.ListConstruct %25416, %25480 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23108 = torch.constant.int 0
    %25545 = torch.aten.cat %25544, %int0_23108 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25545, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25546 = torch.prim.ListConstruct %25419, %25483 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_23109 = torch.constant.int 0
    %25547 = torch.aten.cat %25546, %int0_23109 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25547, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23110 = torch.constant.int 32
    %int2_23111 = torch.constant.int 2
    %int16_23112 = torch.constant.int 16
    %int1_23113 = torch.constant.int 1
    %int128_23114 = torch.constant.int 128
    %25548 = torch.prim.ListConstruct %3023, %int32_23110, %int2_23111, %int16_23112, %int1_23113, %int128_23114 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25549 = torch.aten.view %23698, %25548 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25549, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23115 = torch.constant.int 32
    %25550 = torch.aten.mul.int %3023, %int32_23115 : !torch.int, !torch.int -> !torch.int
    %int2_23116 = torch.constant.int 2
    %25551 = torch.aten.mul.int %25550, %int2_23116 : !torch.int, !torch.int -> !torch.int
    %int16_23117 = torch.constant.int 16
    %int1_23118 = torch.constant.int 1
    %int128_23119 = torch.constant.int 128
    %25552 = torch.prim.ListConstruct %25551, %int16_23117, %int1_23118, %int128_23119 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25553 = torch.aten.view %25549, %25552 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25553, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25554 = torch.prim.ListConstruct %25517 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23120 = torch.constant.bool false
    %25555 = torch.aten.index_put %25553, %25554, %25533, %false_23120 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25555, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23121 = torch.constant.int 32
    %int2_23122 = torch.constant.int 2
    %int16_23123 = torch.constant.int 16
    %int1_23124 = torch.constant.int 1
    %int128_23125 = torch.constant.int 128
    %25556 = torch.prim.ListConstruct %3023, %int32_23121, %int2_23122, %int16_23123, %int1_23124, %int128_23125 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25557 = torch.aten.view %25555, %25556 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25557, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23126 = torch.constant.int 131072
    %25558 = torch.prim.ListConstruct %3023, %int131072_23126 : (!torch.int, !torch.int) -> !torch.list<int>
    %25559 = torch.aten.view %25557, %25558 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25559, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23127 = torch.constant.int 32
    %int2_23128 = torch.constant.int 2
    %int16_23129 = torch.constant.int 16
    %int1_23130 = torch.constant.int 1
    %int128_23131 = torch.constant.int 128
    %25560 = torch.prim.ListConstruct %3026, %int32_23127, %int2_23128, %int16_23129, %int1_23130, %int128_23131 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25561 = torch.aten.view %23710, %25560 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25561, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23132 = torch.constant.int 32
    %25562 = torch.aten.mul.int %3026, %int32_23132 : !torch.int, !torch.int -> !torch.int
    %int2_23133 = torch.constant.int 2
    %25563 = torch.aten.mul.int %25562, %int2_23133 : !torch.int, !torch.int -> !torch.int
    %int16_23134 = torch.constant.int 16
    %int1_23135 = torch.constant.int 1
    %int128_23136 = torch.constant.int 128
    %25564 = torch.prim.ListConstruct %25563, %int16_23134, %int1_23135, %int128_23136 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25565 = torch.aten.view %25561, %25564 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25565, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25566 = torch.prim.ListConstruct %25519 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23137 = torch.constant.bool false
    %25567 = torch.aten.index_put %25565, %25566, %25535, %false_23137 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25567, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23138 = torch.constant.int 32
    %int2_23139 = torch.constant.int 2
    %int16_23140 = torch.constant.int 16
    %int1_23141 = torch.constant.int 1
    %int128_23142 = torch.constant.int 128
    %25568 = torch.prim.ListConstruct %3026, %int32_23138, %int2_23139, %int16_23140, %int1_23141, %int128_23142 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25569 = torch.aten.view %25567, %25568 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25569, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23143 = torch.constant.int 131072
    %25570 = torch.prim.ListConstruct %3026, %int131072_23143 : (!torch.int, !torch.int) -> !torch.list<int>
    %25571 = torch.aten.view %25569, %25570 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25571, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23144 = torch.constant.int 32
    %int2_23145 = torch.constant.int 2
    %int16_23146 = torch.constant.int 16
    %int1_23147 = torch.constant.int 1
    %int128_23148 = torch.constant.int 128
    %25572 = torch.prim.ListConstruct %3029, %int32_23144, %int2_23145, %int16_23146, %int1_23147, %int128_23148 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25573 = torch.aten.view %23722, %25572 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25573, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23149 = torch.constant.int 32
    %25574 = torch.aten.mul.int %3029, %int32_23149 : !torch.int, !torch.int -> !torch.int
    %int2_23150 = torch.constant.int 2
    %25575 = torch.aten.mul.int %25574, %int2_23150 : !torch.int, !torch.int -> !torch.int
    %int16_23151 = torch.constant.int 16
    %int1_23152 = torch.constant.int 1
    %int128_23153 = torch.constant.int 128
    %25576 = torch.prim.ListConstruct %25575, %int16_23151, %int1_23152, %int128_23153 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25577 = torch.aten.view %25573, %25576 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25577, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25578 = torch.prim.ListConstruct %25521 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23154 = torch.constant.bool false
    %25579 = torch.aten.index_put %25577, %25578, %25537, %false_23154 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25579, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23155 = torch.constant.int 32
    %int2_23156 = torch.constant.int 2
    %int16_23157 = torch.constant.int 16
    %int1_23158 = torch.constant.int 1
    %int128_23159 = torch.constant.int 128
    %25580 = torch.prim.ListConstruct %3029, %int32_23155, %int2_23156, %int16_23157, %int1_23158, %int128_23159 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25581 = torch.aten.view %25579, %25580 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25581, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23160 = torch.constant.int 131072
    %25582 = torch.prim.ListConstruct %3029, %int131072_23160 : (!torch.int, !torch.int) -> !torch.list<int>
    %25583 = torch.aten.view %25581, %25582 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25583, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23161 = torch.constant.int 32
    %int2_23162 = torch.constant.int 2
    %int16_23163 = torch.constant.int 16
    %int1_23164 = torch.constant.int 1
    %int128_23165 = torch.constant.int 128
    %25584 = torch.prim.ListConstruct %3032, %int32_23161, %int2_23162, %int16_23163, %int1_23164, %int128_23165 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25585 = torch.aten.view %23734, %25584 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25585, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23166 = torch.constant.int 32
    %25586 = torch.aten.mul.int %3032, %int32_23166 : !torch.int, !torch.int -> !torch.int
    %int2_23167 = torch.constant.int 2
    %25587 = torch.aten.mul.int %25586, %int2_23167 : !torch.int, !torch.int -> !torch.int
    %int16_23168 = torch.constant.int 16
    %int1_23169 = torch.constant.int 1
    %int128_23170 = torch.constant.int 128
    %25588 = torch.prim.ListConstruct %25587, %int16_23168, %int1_23169, %int128_23170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25589 = torch.aten.view %25585, %25588 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25589, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25590 = torch.prim.ListConstruct %25523 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23171 = torch.constant.bool false
    %25591 = torch.aten.index_put %25589, %25590, %25539, %false_23171 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25591, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23172 = torch.constant.int 32
    %int2_23173 = torch.constant.int 2
    %int16_23174 = torch.constant.int 16
    %int1_23175 = torch.constant.int 1
    %int128_23176 = torch.constant.int 128
    %25592 = torch.prim.ListConstruct %3032, %int32_23172, %int2_23173, %int16_23174, %int1_23175, %int128_23176 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25593 = torch.aten.view %25591, %25592 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25593, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23177 = torch.constant.int 131072
    %25594 = torch.prim.ListConstruct %3032, %int131072_23177 : (!torch.int, !torch.int) -> !torch.list<int>
    %25595 = torch.aten.view %25593, %25594 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25595, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23178 = torch.constant.int 32
    %int2_23179 = torch.constant.int 2
    %int16_23180 = torch.constant.int 16
    %int1_23181 = torch.constant.int 1
    %int128_23182 = torch.constant.int 128
    %25596 = torch.prim.ListConstruct %3035, %int32_23178, %int2_23179, %int16_23180, %int1_23181, %int128_23182 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25597 = torch.aten.view %23746, %25596 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25597, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23183 = torch.constant.int 32
    %25598 = torch.aten.mul.int %3035, %int32_23183 : !torch.int, !torch.int -> !torch.int
    %int2_23184 = torch.constant.int 2
    %25599 = torch.aten.mul.int %25598, %int2_23184 : !torch.int, !torch.int -> !torch.int
    %int16_23185 = torch.constant.int 16
    %int1_23186 = torch.constant.int 1
    %int128_23187 = torch.constant.int 128
    %25600 = torch.prim.ListConstruct %25599, %int16_23185, %int1_23186, %int128_23187 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25601 = torch.aten.view %25597, %25600 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25601, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25602 = torch.prim.ListConstruct %25525 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23188 = torch.constant.bool false
    %25603 = torch.aten.index_put %25601, %25602, %25541, %false_23188 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25603, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23189 = torch.constant.int 32
    %int2_23190 = torch.constant.int 2
    %int16_23191 = torch.constant.int 16
    %int1_23192 = torch.constant.int 1
    %int128_23193 = torch.constant.int 128
    %25604 = torch.prim.ListConstruct %3035, %int32_23189, %int2_23190, %int16_23191, %int1_23192, %int128_23193 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25605 = torch.aten.view %25603, %25604 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25605, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23194 = torch.constant.int 131072
    %25606 = torch.prim.ListConstruct %3035, %int131072_23194 : (!torch.int, !torch.int) -> !torch.list<int>
    %25607 = torch.aten.view %25605, %25606 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25607, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23195 = torch.constant.int 32
    %int2_23196 = torch.constant.int 2
    %int16_23197 = torch.constant.int 16
    %int1_23198 = torch.constant.int 1
    %int128_23199 = torch.constant.int 128
    %25608 = torch.prim.ListConstruct %3038, %int32_23195, %int2_23196, %int16_23197, %int1_23198, %int128_23199 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25609 = torch.aten.view %23758, %25608 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25609, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23200 = torch.constant.int 32
    %25610 = torch.aten.mul.int %3038, %int32_23200 : !torch.int, !torch.int -> !torch.int
    %int2_23201 = torch.constant.int 2
    %25611 = torch.aten.mul.int %25610, %int2_23201 : !torch.int, !torch.int -> !torch.int
    %int16_23202 = torch.constant.int 16
    %int1_23203 = torch.constant.int 1
    %int128_23204 = torch.constant.int 128
    %25612 = torch.prim.ListConstruct %25611, %int16_23202, %int1_23203, %int128_23204 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25613 = torch.aten.view %25609, %25612 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25613, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25614 = torch.prim.ListConstruct %25527 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23205 = torch.constant.bool false
    %25615 = torch.aten.index_put %25613, %25614, %25543, %false_23205 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25615, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23206 = torch.constant.int 32
    %int2_23207 = torch.constant.int 2
    %int16_23208 = torch.constant.int 16
    %int1_23209 = torch.constant.int 1
    %int128_23210 = torch.constant.int 128
    %25616 = torch.prim.ListConstruct %3038, %int32_23206, %int2_23207, %int16_23208, %int1_23209, %int128_23210 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25617 = torch.aten.view %25615, %25616 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25617, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23211 = torch.constant.int 131072
    %25618 = torch.prim.ListConstruct %3038, %int131072_23211 : (!torch.int, !torch.int) -> !torch.list<int>
    %25619 = torch.aten.view %25617, %25618 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25619, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23212 = torch.constant.int 32
    %int2_23213 = torch.constant.int 2
    %int16_23214 = torch.constant.int 16
    %int1_23215 = torch.constant.int 1
    %int128_23216 = torch.constant.int 128
    %25620 = torch.prim.ListConstruct %3041, %int32_23212, %int2_23213, %int16_23214, %int1_23215, %int128_23216 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25621 = torch.aten.view %23770, %25620 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25621, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23217 = torch.constant.int 32
    %25622 = torch.aten.mul.int %3041, %int32_23217 : !torch.int, !torch.int -> !torch.int
    %int2_23218 = torch.constant.int 2
    %25623 = torch.aten.mul.int %25622, %int2_23218 : !torch.int, !torch.int -> !torch.int
    %int16_23219 = torch.constant.int 16
    %int1_23220 = torch.constant.int 1
    %int128_23221 = torch.constant.int 128
    %25624 = torch.prim.ListConstruct %25623, %int16_23219, %int1_23220, %int128_23221 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25625 = torch.aten.view %25621, %25624 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25625, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25626 = torch.prim.ListConstruct %25529 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23222 = torch.constant.bool false
    %25627 = torch.aten.index_put %25625, %25626, %25545, %false_23222 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25627, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23223 = torch.constant.int 32
    %int2_23224 = torch.constant.int 2
    %int16_23225 = torch.constant.int 16
    %int1_23226 = torch.constant.int 1
    %int128_23227 = torch.constant.int 128
    %25628 = torch.prim.ListConstruct %3041, %int32_23223, %int2_23224, %int16_23225, %int1_23226, %int128_23227 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25629 = torch.aten.view %25627, %25628 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25629, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23228 = torch.constant.int 131072
    %25630 = torch.prim.ListConstruct %3041, %int131072_23228 : (!torch.int, !torch.int) -> !torch.list<int>
    %25631 = torch.aten.view %25629, %25630 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25631, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_23229 = torch.constant.int 32
    %int2_23230 = torch.constant.int 2
    %int16_23231 = torch.constant.int 16
    %int1_23232 = torch.constant.int 1
    %int128_23233 = torch.constant.int 128
    %25632 = torch.prim.ListConstruct %3044, %int32_23229, %int2_23230, %int16_23231, %int1_23232, %int128_23233 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25633 = torch.aten.view %23782, %25632 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25633, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_23234 = torch.constant.int 32
    %25634 = torch.aten.mul.int %3044, %int32_23234 : !torch.int, !torch.int -> !torch.int
    %int2_23235 = torch.constant.int 2
    %25635 = torch.aten.mul.int %25634, %int2_23235 : !torch.int, !torch.int -> !torch.int
    %int16_23236 = torch.constant.int 16
    %int1_23237 = torch.constant.int 1
    %int128_23238 = torch.constant.int 128
    %25636 = torch.prim.ListConstruct %25635, %int16_23236, %int1_23237, %int128_23238 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25637 = torch.aten.view %25633, %25636 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25637, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %25638 = torch.prim.ListConstruct %25531 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_23239 = torch.constant.bool false
    %25639 = torch.aten.index_put %25637, %25638, %25547, %false_23239 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %25639, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_23240 = torch.constant.int 32
    %int2_23241 = torch.constant.int 2
    %int16_23242 = torch.constant.int 16
    %int1_23243 = torch.constant.int 1
    %int128_23244 = torch.constant.int 128
    %25640 = torch.prim.ListConstruct %3044, %int32_23240, %int2_23241, %int16_23242, %int1_23243, %int128_23244 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25641 = torch.aten.view %25639, %25640 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %25641, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_23245 = torch.constant.int 131072
    %25642 = torch.prim.ListConstruct %3044, %int131072_23245 : (!torch.int, !torch.int) -> !torch.list<int>
    %25643 = torch.aten.view %25641, %25642 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %25643, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_23246 = torch.constant.int -2
    %25644 = torch.aten.unsqueeze %25258, %int-2_23246 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23247 = torch.constant.int -2
    %25645 = torch.aten.unsqueeze %25273, %int-2_23247 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23248 = torch.constant.int -2
    %25646 = torch.aten.unsqueeze %25288, %int-2_23248 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23249 = torch.constant.int -2
    %25647 = torch.aten.unsqueeze %25303, %int-2_23249 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23250 = torch.constant.int -2
    %25648 = torch.aten.unsqueeze %25318, %int-2_23250 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23251 = torch.constant.int -2
    %25649 = torch.aten.unsqueeze %25333, %int-2_23251 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23252 = torch.constant.int -2
    %25650 = torch.aten.unsqueeze %25348, %int-2_23252 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23253 = torch.constant.int -2
    %25651 = torch.aten.unsqueeze %25363, %int-2_23253 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_23254 = torch.constant.int 4
    %int1_23255 = torch.constant.int 1
    %int4_23256 = torch.constant.int 4
    %int128_23257 = torch.constant.int 128
    %25652 = torch.prim.ListConstruct %int4_23254, %25244, %int1_23255, %int4_23256, %int128_23257 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23258 = torch.constant.bool false
    %25653 = torch.aten.expand %25644, %25652, %false_23258 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23259 = torch.constant.int 4
    %int1_23260 = torch.constant.int 1
    %int4_23261 = torch.constant.int 4
    %int128_23262 = torch.constant.int 128
    %25654 = torch.prim.ListConstruct %int4_23259, %25244, %int1_23260, %int4_23261, %int128_23262 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23263 = torch.constant.bool false
    %25655 = torch.aten.expand %25645, %25654, %false_23263 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23264 = torch.constant.int 4
    %int1_23265 = torch.constant.int 1
    %int4_23266 = torch.constant.int 4
    %int128_23267 = torch.constant.int 128
    %25656 = torch.prim.ListConstruct %int4_23264, %25244, %int1_23265, %int4_23266, %int128_23267 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23268 = torch.constant.bool false
    %25657 = torch.aten.expand %25646, %25656, %false_23268 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23269 = torch.constant.int 4
    %int1_23270 = torch.constant.int 1
    %int4_23271 = torch.constant.int 4
    %int128_23272 = torch.constant.int 128
    %25658 = torch.prim.ListConstruct %int4_23269, %25244, %int1_23270, %int4_23271, %int128_23272 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23273 = torch.constant.bool false
    %25659 = torch.aten.expand %25647, %25658, %false_23273 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23274 = torch.constant.int 4
    %int1_23275 = torch.constant.int 1
    %int4_23276 = torch.constant.int 4
    %int128_23277 = torch.constant.int 128
    %25660 = torch.prim.ListConstruct %int4_23274, %25244, %int1_23275, %int4_23276, %int128_23277 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23278 = torch.constant.bool false
    %25661 = torch.aten.expand %25648, %25660, %false_23278 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23279 = torch.constant.int 4
    %int1_23280 = torch.constant.int 1
    %int4_23281 = torch.constant.int 4
    %int128_23282 = torch.constant.int 128
    %25662 = torch.prim.ListConstruct %int4_23279, %25244, %int1_23280, %int4_23281, %int128_23282 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23283 = torch.constant.bool false
    %25663 = torch.aten.expand %25649, %25662, %false_23283 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23284 = torch.constant.int 4
    %int1_23285 = torch.constant.int 1
    %int4_23286 = torch.constant.int 4
    %int128_23287 = torch.constant.int 128
    %25664 = torch.prim.ListConstruct %int4_23284, %25244, %int1_23285, %int4_23286, %int128_23287 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23288 = torch.constant.bool false
    %25665 = torch.aten.expand %25650, %25664, %false_23288 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23289 = torch.constant.int 4
    %int1_23290 = torch.constant.int 1
    %int4_23291 = torch.constant.int 4
    %int128_23292 = torch.constant.int 128
    %25666 = torch.prim.ListConstruct %int4_23289, %25244, %int1_23290, %int4_23291, %int128_23292 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23293 = torch.constant.bool false
    %25667 = torch.aten.expand %25651, %25666, %false_23293 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23294 = torch.constant.int 4
    %int4_23295 = torch.constant.int 4
    %int128_23296 = torch.constant.int 128
    %25668 = torch.prim.ListConstruct %int4_23294, %25244, %int4_23295, %int128_23296 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25669 = torch.aten.view %25653, %25668 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23297 = torch.constant.int 4
    %int4_23298 = torch.constant.int 4
    %int128_23299 = torch.constant.int 128
    %25670 = torch.prim.ListConstruct %int4_23297, %25244, %int4_23298, %int128_23299 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25671 = torch.aten.view %25655, %25670 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23300 = torch.constant.int 4
    %int4_23301 = torch.constant.int 4
    %int128_23302 = torch.constant.int 128
    %25672 = torch.prim.ListConstruct %int4_23300, %25244, %int4_23301, %int128_23302 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25673 = torch.aten.view %25657, %25672 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23303 = torch.constant.int 4
    %int4_23304 = torch.constant.int 4
    %int128_23305 = torch.constant.int 128
    %25674 = torch.prim.ListConstruct %int4_23303, %25244, %int4_23304, %int128_23305 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25675 = torch.aten.view %25659, %25674 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23306 = torch.constant.int 4
    %int4_23307 = torch.constant.int 4
    %int128_23308 = torch.constant.int 128
    %25676 = torch.prim.ListConstruct %int4_23306, %25244, %int4_23307, %int128_23308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25677 = torch.aten.view %25661, %25676 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23309 = torch.constant.int 4
    %int4_23310 = torch.constant.int 4
    %int128_23311 = torch.constant.int 128
    %25678 = torch.prim.ListConstruct %int4_23309, %25244, %int4_23310, %int128_23311 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25679 = torch.aten.view %25663, %25678 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23312 = torch.constant.int 4
    %int4_23313 = torch.constant.int 4
    %int128_23314 = torch.constant.int 128
    %25680 = torch.prim.ListConstruct %int4_23312, %25244, %int4_23313, %int128_23314 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25681 = torch.aten.view %25665, %25680 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23315 = torch.constant.int 4
    %int4_23316 = torch.constant.int 4
    %int128_23317 = torch.constant.int 128
    %25682 = torch.prim.ListConstruct %int4_23315, %25244, %int4_23316, %int128_23317 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25683 = torch.aten.view %25667, %25682 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_23318 = torch.constant.int -2
    %25684 = torch.aten.unsqueeze %25033, %int-2_23318 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23319 = torch.constant.int -2
    %25685 = torch.aten.unsqueeze %25035, %int-2_23319 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23320 = torch.constant.int -2
    %25686 = torch.aten.unsqueeze %25037, %int-2_23320 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23321 = torch.constant.int -2
    %25687 = torch.aten.unsqueeze %25039, %int-2_23321 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23322 = torch.constant.int -2
    %25688 = torch.aten.unsqueeze %25041, %int-2_23322 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23323 = torch.constant.int -2
    %25689 = torch.aten.unsqueeze %25043, %int-2_23323 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23324 = torch.constant.int -2
    %25690 = torch.aten.unsqueeze %25045, %int-2_23324 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_23325 = torch.constant.int -2
    %25691 = torch.aten.unsqueeze %25047, %int-2_23325 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %25691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_23326 = torch.constant.int 1
    %25692 = torch.aten.size.int %24957, %int1_23326 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_23327 = torch.constant.int 4
    %int1_23328 = torch.constant.int 1
    %int4_23329 = torch.constant.int 4
    %int128_23330 = torch.constant.int 128
    %25693 = torch.prim.ListConstruct %int4_23327, %25692, %int1_23328, %int4_23329, %int128_23330 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23331 = torch.constant.bool false
    %25694 = torch.aten.expand %25684, %25693, %false_23331 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23332 = torch.constant.int 4
    %int1_23333 = torch.constant.int 1
    %int4_23334 = torch.constant.int 4
    %int128_23335 = torch.constant.int 128
    %25695 = torch.prim.ListConstruct %int4_23332, %25692, %int1_23333, %int4_23334, %int128_23335 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23336 = torch.constant.bool false
    %25696 = torch.aten.expand %25685, %25695, %false_23336 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23337 = torch.constant.int 4
    %int1_23338 = torch.constant.int 1
    %int4_23339 = torch.constant.int 4
    %int128_23340 = torch.constant.int 128
    %25697 = torch.prim.ListConstruct %int4_23337, %25692, %int1_23338, %int4_23339, %int128_23340 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23341 = torch.constant.bool false
    %25698 = torch.aten.expand %25686, %25697, %false_23341 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23342 = torch.constant.int 4
    %int1_23343 = torch.constant.int 1
    %int4_23344 = torch.constant.int 4
    %int128_23345 = torch.constant.int 128
    %25699 = torch.prim.ListConstruct %int4_23342, %25692, %int1_23343, %int4_23344, %int128_23345 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23346 = torch.constant.bool false
    %25700 = torch.aten.expand %25687, %25699, %false_23346 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23347 = torch.constant.int 4
    %int1_23348 = torch.constant.int 1
    %int4_23349 = torch.constant.int 4
    %int128_23350 = torch.constant.int 128
    %25701 = torch.prim.ListConstruct %int4_23347, %25692, %int1_23348, %int4_23349, %int128_23350 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23351 = torch.constant.bool false
    %25702 = torch.aten.expand %25688, %25701, %false_23351 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23352 = torch.constant.int 4
    %int1_23353 = torch.constant.int 1
    %int4_23354 = torch.constant.int 4
    %int128_23355 = torch.constant.int 128
    %25703 = torch.prim.ListConstruct %int4_23352, %25692, %int1_23353, %int4_23354, %int128_23355 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23356 = torch.constant.bool false
    %25704 = torch.aten.expand %25689, %25703, %false_23356 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23357 = torch.constant.int 4
    %int1_23358 = torch.constant.int 1
    %int4_23359 = torch.constant.int 4
    %int128_23360 = torch.constant.int 128
    %25705 = torch.prim.ListConstruct %int4_23357, %25692, %int1_23358, %int4_23359, %int128_23360 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23361 = torch.constant.bool false
    %25706 = torch.aten.expand %25690, %25705, %false_23361 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23362 = torch.constant.int 4
    %int1_23363 = torch.constant.int 1
    %int4_23364 = torch.constant.int 4
    %int128_23365 = torch.constant.int 128
    %25707 = torch.prim.ListConstruct %int4_23362, %25692, %int1_23363, %int4_23364, %int128_23365 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_23366 = torch.constant.bool false
    %25708 = torch.aten.expand %25691, %25707, %false_23366 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %25708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_23367 = torch.constant.int 4
    %int4_23368 = torch.constant.int 4
    %int128_23369 = torch.constant.int 128
    %25709 = torch.prim.ListConstruct %int4_23367, %25692, %int4_23368, %int128_23369 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25710 = torch.aten.view %25694, %25709 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23370 = torch.constant.int 4
    %int4_23371 = torch.constant.int 4
    %int128_23372 = torch.constant.int 128
    %25711 = torch.prim.ListConstruct %int4_23370, %25692, %int4_23371, %int128_23372 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25712 = torch.aten.view %25696, %25711 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23373 = torch.constant.int 4
    %int4_23374 = torch.constant.int 4
    %int128_23375 = torch.constant.int 128
    %25713 = torch.prim.ListConstruct %int4_23373, %25692, %int4_23374, %int128_23375 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25714 = torch.aten.view %25698, %25713 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23376 = torch.constant.int 4
    %int4_23377 = torch.constant.int 4
    %int128_23378 = torch.constant.int 128
    %25715 = torch.prim.ListConstruct %int4_23376, %25692, %int4_23377, %int128_23378 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25716 = torch.aten.view %25700, %25715 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23379 = torch.constant.int 4
    %int4_23380 = torch.constant.int 4
    %int128_23381 = torch.constant.int 128
    %25717 = torch.prim.ListConstruct %int4_23379, %25692, %int4_23380, %int128_23381 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25718 = torch.aten.view %25702, %25717 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23382 = torch.constant.int 4
    %int4_23383 = torch.constant.int 4
    %int128_23384 = torch.constant.int 128
    %25719 = torch.prim.ListConstruct %int4_23382, %25692, %int4_23383, %int128_23384 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25720 = torch.aten.view %25704, %25719 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23385 = torch.constant.int 4
    %int4_23386 = torch.constant.int 4
    %int128_23387 = torch.constant.int 128
    %25721 = torch.prim.ListConstruct %int4_23385, %25692, %int4_23386, %int128_23387 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25722 = torch.aten.view %25706, %25721 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23388 = torch.constant.int 4
    %int4_23389 = torch.constant.int 4
    %int128_23390 = torch.constant.int 128
    %25723 = torch.prim.ListConstruct %int4_23388, %25692, %int4_23389, %int128_23390 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25724 = torch.aten.view %25708, %25723 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23391 = torch.constant.int 1
    %int2_23392 = torch.constant.int 2
    %25725 = torch.aten.transpose.int %25100, %int1_23391, %int2_23392 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25725, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23393 = torch.constant.int 1
    %int2_23394 = torch.constant.int 2
    %25726 = torch.aten.transpose.int %25115, %int1_23393, %int2_23394 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25726, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23395 = torch.constant.int 1
    %int2_23396 = torch.constant.int 2
    %25727 = torch.aten.transpose.int %25130, %int1_23395, %int2_23396 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25727, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23397 = torch.constant.int 1
    %int2_23398 = torch.constant.int 2
    %25728 = torch.aten.transpose.int %25145, %int1_23397, %int2_23398 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25728, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23399 = torch.constant.int 1
    %int2_23400 = torch.constant.int 2
    %25729 = torch.aten.transpose.int %25160, %int1_23399, %int2_23400 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25729, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23401 = torch.constant.int 1
    %int2_23402 = torch.constant.int 2
    %25730 = torch.aten.transpose.int %25175, %int1_23401, %int2_23402 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25730, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23403 = torch.constant.int 1
    %int2_23404 = torch.constant.int 2
    %25731 = torch.aten.transpose.int %25190, %int1_23403, %int2_23404 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25731, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23405 = torch.constant.int 1
    %int2_23406 = torch.constant.int 2
    %25732 = torch.aten.transpose.int %25205, %int1_23405, %int2_23406 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25732, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23407 = torch.constant.int 1
    %int2_23408 = torch.constant.int 2
    %25733 = torch.aten.transpose.int %25669, %int1_23407, %int2_23408 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25733, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23409 = torch.constant.int 1
    %int2_23410 = torch.constant.int 2
    %25734 = torch.aten.transpose.int %25671, %int1_23409, %int2_23410 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25734, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23411 = torch.constant.int 1
    %int2_23412 = torch.constant.int 2
    %25735 = torch.aten.transpose.int %25673, %int1_23411, %int2_23412 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25735, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23413 = torch.constant.int 1
    %int2_23414 = torch.constant.int 2
    %25736 = torch.aten.transpose.int %25675, %int1_23413, %int2_23414 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25736, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23415 = torch.constant.int 1
    %int2_23416 = torch.constant.int 2
    %25737 = torch.aten.transpose.int %25677, %int1_23415, %int2_23416 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25737, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23417 = torch.constant.int 1
    %int2_23418 = torch.constant.int 2
    %25738 = torch.aten.transpose.int %25679, %int1_23417, %int2_23418 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25738, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23419 = torch.constant.int 1
    %int2_23420 = torch.constant.int 2
    %25739 = torch.aten.transpose.int %25681, %int1_23419, %int2_23420 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25739, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23421 = torch.constant.int 1
    %int2_23422 = torch.constant.int 2
    %25740 = torch.aten.transpose.int %25683, %int1_23421, %int2_23422 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25740, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23423 = torch.constant.int 1
    %int2_23424 = torch.constant.int 2
    %25741 = torch.aten.transpose.int %25710, %int1_23423, %int2_23424 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25741, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23425 = torch.constant.int 1
    %int2_23426 = torch.constant.int 2
    %25742 = torch.aten.transpose.int %25712, %int1_23425, %int2_23426 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25742, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23427 = torch.constant.int 1
    %int2_23428 = torch.constant.int 2
    %25743 = torch.aten.transpose.int %25714, %int1_23427, %int2_23428 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25743, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23429 = torch.constant.int 1
    %int2_23430 = torch.constant.int 2
    %25744 = torch.aten.transpose.int %25716, %int1_23429, %int2_23430 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25744, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23431 = torch.constant.int 1
    %int2_23432 = torch.constant.int 2
    %25745 = torch.aten.transpose.int %25718, %int1_23431, %int2_23432 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25745, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23433 = torch.constant.int 1
    %int2_23434 = torch.constant.int 2
    %25746 = torch.aten.transpose.int %25720, %int1_23433, %int2_23434 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25746, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23435 = torch.constant.int 1
    %int2_23436 = torch.constant.int 2
    %25747 = torch.aten.transpose.int %25722, %int1_23435, %int2_23436 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25747, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23437 = torch.constant.int 1
    %int2_23438 = torch.constant.int 2
    %25748 = torch.aten.transpose.int %25724, %int1_23437, %int2_23438 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %25748, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23439 = torch.constant.float 0.000000e+00
    %true_23440 = torch.constant.bool true
    %none_23441 = torch.constant.none
    %none_23442 = torch.constant.none
    %25749:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25725, %25733, %25741, %float0.000000e00_23439, %true_23440, %none_23441, %none_23442) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25749#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23443 = torch.constant.float 0.000000e+00
    %true_23444 = torch.constant.bool true
    %none_23445 = torch.constant.none
    %none_23446 = torch.constant.none
    %25750:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25726, %25734, %25742, %float0.000000e00_23443, %true_23444, %none_23445, %none_23446) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25750#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23447 = torch.constant.float 0.000000e+00
    %true_23448 = torch.constant.bool true
    %none_23449 = torch.constant.none
    %none_23450 = torch.constant.none
    %25751:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25727, %25735, %25743, %float0.000000e00_23447, %true_23448, %none_23449, %none_23450) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25751#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23451 = torch.constant.float 0.000000e+00
    %true_23452 = torch.constant.bool true
    %none_23453 = torch.constant.none
    %none_23454 = torch.constant.none
    %25752:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25728, %25736, %25744, %float0.000000e00_23451, %true_23452, %none_23453, %none_23454) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25752#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23455 = torch.constant.float 0.000000e+00
    %true_23456 = torch.constant.bool true
    %none_23457 = torch.constant.none
    %none_23458 = torch.constant.none
    %25753:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25729, %25737, %25745, %float0.000000e00_23455, %true_23456, %none_23457, %none_23458) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25753#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23459 = torch.constant.float 0.000000e+00
    %true_23460 = torch.constant.bool true
    %none_23461 = torch.constant.none
    %none_23462 = torch.constant.none
    %25754:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25730, %25738, %25746, %float0.000000e00_23459, %true_23460, %none_23461, %none_23462) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25754#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23463 = torch.constant.float 0.000000e+00
    %true_23464 = torch.constant.bool true
    %none_23465 = torch.constant.none
    %none_23466 = torch.constant.none
    %25755:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25731, %25739, %25747, %float0.000000e00_23463, %true_23464, %none_23465, %none_23466) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25755#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_23467 = torch.constant.float 0.000000e+00
    %true_23468 = torch.constant.bool true
    %none_23469 = torch.constant.none
    %none_23470 = torch.constant.none
    %25756:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%25732, %25740, %25748, %float0.000000e00_23467, %true_23468, %none_23469, %none_23470) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %25756#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_23471 = torch.constant.int 1
    %int2_23472 = torch.constant.int 2
    %25757 = torch.aten.transpose.int %25749#0, %int1_23471, %int2_23472 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23473 = torch.constant.int 1
    %int2_23474 = torch.constant.int 2
    %25758 = torch.aten.transpose.int %25750#0, %int1_23473, %int2_23474 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23475 = torch.constant.int 1
    %int2_23476 = torch.constant.int 2
    %25759 = torch.aten.transpose.int %25751#0, %int1_23475, %int2_23476 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23477 = torch.constant.int 1
    %int2_23478 = torch.constant.int 2
    %25760 = torch.aten.transpose.int %25752#0, %int1_23477, %int2_23478 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23479 = torch.constant.int 1
    %int2_23480 = torch.constant.int 2
    %25761 = torch.aten.transpose.int %25753#0, %int1_23479, %int2_23480 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23481 = torch.constant.int 1
    %int2_23482 = torch.constant.int 2
    %25762 = torch.aten.transpose.int %25754#0, %int1_23481, %int2_23482 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23483 = torch.constant.int 1
    %int2_23484 = torch.constant.int 2
    %25763 = torch.aten.transpose.int %25755#0, %int1_23483, %int2_23484 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_23485 = torch.constant.int 1
    %int2_23486 = torch.constant.int 2
    %25764 = torch.aten.transpose.int %25756#0, %int1_23485, %int2_23486 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %25764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_23487 = torch.constant.int 4
    %int512_23488 = torch.constant.int 512
    %25765 = torch.prim.ListConstruct %int4_23487, %25086, %int512_23488 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25766 = torch.aten.view %25757, %25765 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23489 = torch.constant.int 4
    %int512_23490 = torch.constant.int 512
    %25767 = torch.prim.ListConstruct %int4_23489, %25101, %int512_23490 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25768 = torch.aten.view %25758, %25767 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23491 = torch.constant.int 4
    %int512_23492 = torch.constant.int 512
    %25769 = torch.prim.ListConstruct %int4_23491, %25116, %int512_23492 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25770 = torch.aten.view %25759, %25769 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23493 = torch.constant.int 4
    %int512_23494 = torch.constant.int 512
    %25771 = torch.prim.ListConstruct %int4_23493, %25131, %int512_23494 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25772 = torch.aten.view %25760, %25771 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23495 = torch.constant.int 4
    %int512_23496 = torch.constant.int 512
    %25773 = torch.prim.ListConstruct %int4_23495, %25146, %int512_23496 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25774 = torch.aten.view %25761, %25773 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23497 = torch.constant.int 4
    %int512_23498 = torch.constant.int 512
    %25775 = torch.prim.ListConstruct %int4_23497, %25161, %int512_23498 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25776 = torch.aten.view %25762, %25775 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23499 = torch.constant.int 4
    %int512_23500 = torch.constant.int 512
    %25777 = torch.prim.ListConstruct %int4_23499, %25176, %int512_23500 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25778 = torch.aten.view %25763, %25777 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_23501 = torch.constant.int 4
    %int512_23502 = torch.constant.int 512
    %25779 = torch.prim.ListConstruct %int4_23501, %25191, %int512_23502 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25780 = torch.aten.view %25764, %25779 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %25780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_23503 = torch.constant.int 1
    %int0_23504 = torch.constant.int 0
    %25781 = torch.prim.ListConstruct %int1_23503, %int0_23504 : (!torch.int, !torch.int) -> !torch.list<int>
    %25782 = torch.aten.permute %904, %25781 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23505 = torch.constant.int 1
    %int0_23506 = torch.constant.int 0
    %25783 = torch.prim.ListConstruct %int1_23505, %int0_23506 : (!torch.int, !torch.int) -> !torch.list<int>
    %25784 = torch.aten.permute %905, %25783 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23507 = torch.constant.int 1
    %int0_23508 = torch.constant.int 0
    %25785 = torch.prim.ListConstruct %int1_23507, %int0_23508 : (!torch.int, !torch.int) -> !torch.list<int>
    %25786 = torch.aten.permute %906, %25785 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23509 = torch.constant.int 1
    %int0_23510 = torch.constant.int 0
    %25787 = torch.prim.ListConstruct %int1_23509, %int0_23510 : (!torch.int, !torch.int) -> !torch.list<int>
    %25788 = torch.aten.permute %907, %25787 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23511 = torch.constant.int 1
    %int0_23512 = torch.constant.int 0
    %25789 = torch.prim.ListConstruct %int1_23511, %int0_23512 : (!torch.int, !torch.int) -> !torch.list<int>
    %25790 = torch.aten.permute %908, %25789 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23513 = torch.constant.int 1
    %int0_23514 = torch.constant.int 0
    %25791 = torch.prim.ListConstruct %int1_23513, %int0_23514 : (!torch.int, !torch.int) -> !torch.list<int>
    %25792 = torch.aten.permute %909, %25791 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23515 = torch.constant.int 1
    %int0_23516 = torch.constant.int 0
    %25793 = torch.prim.ListConstruct %int1_23515, %int0_23516 : (!torch.int, !torch.int) -> !torch.list<int>
    %25794 = torch.aten.permute %910, %25793 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_23517 = torch.constant.int 1
    %int0_23518 = torch.constant.int 0
    %25795 = torch.prim.ListConstruct %int1_23517, %int0_23518 : (!torch.int, !torch.int) -> !torch.list<int>
    %25796 = torch.aten.permute %911, %25795 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_23519 = torch.constant.int 4
    %25797 = torch.aten.mul.int %int4_23519, %25086 : !torch.int, !torch.int -> !torch.int
    %int512_23520 = torch.constant.int 512
    %25798 = torch.prim.ListConstruct %25797, %int512_23520 : (!torch.int, !torch.int) -> !torch.list<int>
    %25799 = torch.aten.view %25766, %25798 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25799, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25800 = torch.aten.mm %25799, %25782 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25800, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23521 = torch.constant.int 4
    %int4096_23522 = torch.constant.int 4096
    %25801 = torch.prim.ListConstruct %int4_23521, %25086, %int4096_23522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25802 = torch.aten.view %25800, %25801 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23523 = torch.constant.int 4
    %25803 = torch.aten.mul.int %int4_23523, %25101 : !torch.int, !torch.int -> !torch.int
    %int512_23524 = torch.constant.int 512
    %25804 = torch.prim.ListConstruct %25803, %int512_23524 : (!torch.int, !torch.int) -> !torch.list<int>
    %25805 = torch.aten.view %25768, %25804 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25805, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25806 = torch.aten.mm %25805, %25784 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25806, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23525 = torch.constant.int 4
    %int4096_23526 = torch.constant.int 4096
    %25807 = torch.prim.ListConstruct %int4_23525, %25101, %int4096_23526 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25808 = torch.aten.view %25806, %25807 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23527 = torch.constant.int 4
    %25809 = torch.aten.mul.int %int4_23527, %25116 : !torch.int, !torch.int -> !torch.int
    %int512_23528 = torch.constant.int 512
    %25810 = torch.prim.ListConstruct %25809, %int512_23528 : (!torch.int, !torch.int) -> !torch.list<int>
    %25811 = torch.aten.view %25770, %25810 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25811, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25812 = torch.aten.mm %25811, %25786 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25812, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23529 = torch.constant.int 4
    %int4096_23530 = torch.constant.int 4096
    %25813 = torch.prim.ListConstruct %int4_23529, %25116, %int4096_23530 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25814 = torch.aten.view %25812, %25813 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23531 = torch.constant.int 4
    %25815 = torch.aten.mul.int %int4_23531, %25131 : !torch.int, !torch.int -> !torch.int
    %int512_23532 = torch.constant.int 512
    %25816 = torch.prim.ListConstruct %25815, %int512_23532 : (!torch.int, !torch.int) -> !torch.list<int>
    %25817 = torch.aten.view %25772, %25816 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25817, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25818 = torch.aten.mm %25817, %25788 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25818, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23533 = torch.constant.int 4
    %int4096_23534 = torch.constant.int 4096
    %25819 = torch.prim.ListConstruct %int4_23533, %25131, %int4096_23534 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25820 = torch.aten.view %25818, %25819 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23535 = torch.constant.int 4
    %25821 = torch.aten.mul.int %int4_23535, %25146 : !torch.int, !torch.int -> !torch.int
    %int512_23536 = torch.constant.int 512
    %25822 = torch.prim.ListConstruct %25821, %int512_23536 : (!torch.int, !torch.int) -> !torch.list<int>
    %25823 = torch.aten.view %25774, %25822 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25823, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25824 = torch.aten.mm %25823, %25790 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25824, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23537 = torch.constant.int 4
    %int4096_23538 = torch.constant.int 4096
    %25825 = torch.prim.ListConstruct %int4_23537, %25146, %int4096_23538 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25826 = torch.aten.view %25824, %25825 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23539 = torch.constant.int 4
    %25827 = torch.aten.mul.int %int4_23539, %25161 : !torch.int, !torch.int -> !torch.int
    %int512_23540 = torch.constant.int 512
    %25828 = torch.prim.ListConstruct %25827, %int512_23540 : (!torch.int, !torch.int) -> !torch.list<int>
    %25829 = torch.aten.view %25776, %25828 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25829, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25830 = torch.aten.mm %25829, %25792 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25830, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23541 = torch.constant.int 4
    %int4096_23542 = torch.constant.int 4096
    %25831 = torch.prim.ListConstruct %int4_23541, %25161, %int4096_23542 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25832 = torch.aten.view %25830, %25831 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23543 = torch.constant.int 4
    %25833 = torch.aten.mul.int %int4_23543, %25176 : !torch.int, !torch.int -> !torch.int
    %int512_23544 = torch.constant.int 512
    %25834 = torch.prim.ListConstruct %25833, %int512_23544 : (!torch.int, !torch.int) -> !torch.list<int>
    %25835 = torch.aten.view %25778, %25834 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25835, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25836 = torch.aten.mm %25835, %25794 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25836, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23545 = torch.constant.int 4
    %int4096_23546 = torch.constant.int 4096
    %25837 = torch.prim.ListConstruct %int4_23545, %25176, %int4096_23546 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25838 = torch.aten.view %25836, %25837 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_23547 = torch.constant.int 4
    %25839 = torch.aten.mul.int %int4_23547, %25191 : !torch.int, !torch.int -> !torch.int
    %int512_23548 = torch.constant.int 512
    %25840 = torch.prim.ListConstruct %25839, %int512_23548 : (!torch.int, !torch.int) -> !torch.list<int>
    %25841 = torch.aten.view %25780, %25840 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %25841, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %25842 = torch.aten.mm %25841, %25796 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %25842, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23549 = torch.constant.int 4
    %int4096_23550 = torch.constant.int 4096
    %25843 = torch.prim.ListConstruct %int4_23549, %25191, %int4096_23550 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %25844 = torch.aten.view %25842, %25843 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25845 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23551 = arith.constant 1 : index
    %dim_23552 = tensor.dim %25845, %c1_23551 : tensor<4x?x4096xf16>
    %25846 = flow.tensor.transfer %25845 : tensor<4x?x4096xf16>{%dim_23552} to #hal.device.promise<@__device_0>
    %25847 = torch_c.from_builtin_tensor %25846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25848 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23553 = arith.constant 1 : index
    %dim_23554 = tensor.dim %25848, %c1_23553 : tensor<4x?x4096xf16>
    %25849 = flow.tensor.transfer %25848 : tensor<4x?x4096xf16>{%dim_23554} to #hal.device.promise<@__device_0>
    %25850 = torch_c.from_builtin_tensor %25849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25851 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23555 = arith.constant 1 : index
    %dim_23556 = tensor.dim %25851, %c1_23555 : tensor<4x?x4096xf16>
    %25852 = flow.tensor.transfer %25851 : tensor<4x?x4096xf16>{%dim_23556} to #hal.device.promise<@__device_0>
    %25853 = torch_c.from_builtin_tensor %25852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25854 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23557 = arith.constant 1 : index
    %dim_23558 = tensor.dim %25854, %c1_23557 : tensor<4x?x4096xf16>
    %25855 = flow.tensor.transfer %25854 : tensor<4x?x4096xf16>{%dim_23558} to #hal.device.promise<@__device_0>
    %25856 = torch_c.from_builtin_tensor %25855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25857 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23559 = arith.constant 1 : index
    %dim_23560 = tensor.dim %25857, %c1_23559 : tensor<4x?x4096xf16>
    %25858 = flow.tensor.transfer %25857 : tensor<4x?x4096xf16>{%dim_23560} to #hal.device.promise<@__device_0>
    %25859 = torch_c.from_builtin_tensor %25858 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25860 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23561 = arith.constant 1 : index
    %dim_23562 = tensor.dim %25860, %c1_23561 : tensor<4x?x4096xf16>
    %25861 = flow.tensor.transfer %25860 : tensor<4x?x4096xf16>{%dim_23562} to #hal.device.promise<@__device_0>
    %25862 = torch_c.from_builtin_tensor %25861 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25863 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23563 = arith.constant 1 : index
    %dim_23564 = tensor.dim %25863, %c1_23563 : tensor<4x?x4096xf16>
    %25864 = flow.tensor.transfer %25863 : tensor<4x?x4096xf16>{%dim_23564} to #hal.device.promise<@__device_0>
    %25865 = torch_c.from_builtin_tensor %25864 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23565 = torch.constant.int 1
    %25866 = torch.aten.add.Tensor %25802, %25847, %int1_23565 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23566 = torch.constant.int 1
    %25867 = torch.aten.add.Tensor %25866, %25850, %int1_23566 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23567 = torch.constant.int 1
    %25868 = torch.aten.add.Tensor %25867, %25853, %int1_23567 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23568 = torch.constant.int 1
    %25869 = torch.aten.add.Tensor %25868, %25856, %int1_23568 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23569 = torch.constant.int 1
    %25870 = torch.aten.add.Tensor %25869, %25859, %int1_23569 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23570 = torch.constant.int 1
    %25871 = torch.aten.add.Tensor %25870, %25862, %int1_23570 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23571 = torch.constant.int 1
    %25872 = torch.aten.add.Tensor %25871, %25865, %int1_23571 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25873 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23572 = arith.constant 1 : index
    %dim_23573 = tensor.dim %25873, %c1_23572 : tensor<4x?x4096xf16>
    %25874 = flow.tensor.transfer %25873 : tensor<4x?x4096xf16>{%dim_23573} to #hal.device.promise<@__device_1>
    %25875 = torch_c.from_builtin_tensor %25874 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25876 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23574 = arith.constant 1 : index
    %dim_23575 = tensor.dim %25876, %c1_23574 : tensor<4x?x4096xf16>
    %25877 = flow.tensor.transfer %25876 : tensor<4x?x4096xf16>{%dim_23575} to #hal.device.promise<@__device_1>
    %25878 = torch_c.from_builtin_tensor %25877 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25879 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23576 = arith.constant 1 : index
    %dim_23577 = tensor.dim %25879, %c1_23576 : tensor<4x?x4096xf16>
    %25880 = flow.tensor.transfer %25879 : tensor<4x?x4096xf16>{%dim_23577} to #hal.device.promise<@__device_1>
    %25881 = torch_c.from_builtin_tensor %25880 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25882 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23578 = arith.constant 1 : index
    %dim_23579 = tensor.dim %25882, %c1_23578 : tensor<4x?x4096xf16>
    %25883 = flow.tensor.transfer %25882 : tensor<4x?x4096xf16>{%dim_23579} to #hal.device.promise<@__device_1>
    %25884 = torch_c.from_builtin_tensor %25883 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25885 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23580 = arith.constant 1 : index
    %dim_23581 = tensor.dim %25885, %c1_23580 : tensor<4x?x4096xf16>
    %25886 = flow.tensor.transfer %25885 : tensor<4x?x4096xf16>{%dim_23581} to #hal.device.promise<@__device_1>
    %25887 = torch_c.from_builtin_tensor %25886 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25888 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23582 = arith.constant 1 : index
    %dim_23583 = tensor.dim %25888, %c1_23582 : tensor<4x?x4096xf16>
    %25889 = flow.tensor.transfer %25888 : tensor<4x?x4096xf16>{%dim_23583} to #hal.device.promise<@__device_1>
    %25890 = torch_c.from_builtin_tensor %25889 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25891 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23584 = arith.constant 1 : index
    %dim_23585 = tensor.dim %25891, %c1_23584 : tensor<4x?x4096xf16>
    %25892 = flow.tensor.transfer %25891 : tensor<4x?x4096xf16>{%dim_23585} to #hal.device.promise<@__device_1>
    %25893 = torch_c.from_builtin_tensor %25892 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23586 = torch.constant.int 1
    %25894 = torch.aten.add.Tensor %25875, %25808, %int1_23586 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23587 = torch.constant.int 1
    %25895 = torch.aten.add.Tensor %25894, %25878, %int1_23587 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23588 = torch.constant.int 1
    %25896 = torch.aten.add.Tensor %25895, %25881, %int1_23588 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23589 = torch.constant.int 1
    %25897 = torch.aten.add.Tensor %25896, %25884, %int1_23589 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23590 = torch.constant.int 1
    %25898 = torch.aten.add.Tensor %25897, %25887, %int1_23590 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23591 = torch.constant.int 1
    %25899 = torch.aten.add.Tensor %25898, %25890, %int1_23591 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23592 = torch.constant.int 1
    %25900 = torch.aten.add.Tensor %25899, %25893, %int1_23592 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25901 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23593 = arith.constant 1 : index
    %dim_23594 = tensor.dim %25901, %c1_23593 : tensor<4x?x4096xf16>
    %25902 = flow.tensor.transfer %25901 : tensor<4x?x4096xf16>{%dim_23594} to #hal.device.promise<@__device_2>
    %25903 = torch_c.from_builtin_tensor %25902 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25904 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23595 = arith.constant 1 : index
    %dim_23596 = tensor.dim %25904, %c1_23595 : tensor<4x?x4096xf16>
    %25905 = flow.tensor.transfer %25904 : tensor<4x?x4096xf16>{%dim_23596} to #hal.device.promise<@__device_2>
    %25906 = torch_c.from_builtin_tensor %25905 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25907 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23597 = arith.constant 1 : index
    %dim_23598 = tensor.dim %25907, %c1_23597 : tensor<4x?x4096xf16>
    %25908 = flow.tensor.transfer %25907 : tensor<4x?x4096xf16>{%dim_23598} to #hal.device.promise<@__device_2>
    %25909 = torch_c.from_builtin_tensor %25908 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25910 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23599 = arith.constant 1 : index
    %dim_23600 = tensor.dim %25910, %c1_23599 : tensor<4x?x4096xf16>
    %25911 = flow.tensor.transfer %25910 : tensor<4x?x4096xf16>{%dim_23600} to #hal.device.promise<@__device_2>
    %25912 = torch_c.from_builtin_tensor %25911 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25913 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23601 = arith.constant 1 : index
    %dim_23602 = tensor.dim %25913, %c1_23601 : tensor<4x?x4096xf16>
    %25914 = flow.tensor.transfer %25913 : tensor<4x?x4096xf16>{%dim_23602} to #hal.device.promise<@__device_2>
    %25915 = torch_c.from_builtin_tensor %25914 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25916 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23603 = arith.constant 1 : index
    %dim_23604 = tensor.dim %25916, %c1_23603 : tensor<4x?x4096xf16>
    %25917 = flow.tensor.transfer %25916 : tensor<4x?x4096xf16>{%dim_23604} to #hal.device.promise<@__device_2>
    %25918 = torch_c.from_builtin_tensor %25917 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25919 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23605 = arith.constant 1 : index
    %dim_23606 = tensor.dim %25919, %c1_23605 : tensor<4x?x4096xf16>
    %25920 = flow.tensor.transfer %25919 : tensor<4x?x4096xf16>{%dim_23606} to #hal.device.promise<@__device_2>
    %25921 = torch_c.from_builtin_tensor %25920 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23607 = torch.constant.int 1
    %25922 = torch.aten.add.Tensor %25903, %25906, %int1_23607 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23608 = torch.constant.int 1
    %25923 = torch.aten.add.Tensor %25922, %25814, %int1_23608 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23609 = torch.constant.int 1
    %25924 = torch.aten.add.Tensor %25923, %25909, %int1_23609 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23610 = torch.constant.int 1
    %25925 = torch.aten.add.Tensor %25924, %25912, %int1_23610 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23611 = torch.constant.int 1
    %25926 = torch.aten.add.Tensor %25925, %25915, %int1_23611 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23612 = torch.constant.int 1
    %25927 = torch.aten.add.Tensor %25926, %25918, %int1_23612 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23613 = torch.constant.int 1
    %25928 = torch.aten.add.Tensor %25927, %25921, %int1_23613 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25929 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23614 = arith.constant 1 : index
    %dim_23615 = tensor.dim %25929, %c1_23614 : tensor<4x?x4096xf16>
    %25930 = flow.tensor.transfer %25929 : tensor<4x?x4096xf16>{%dim_23615} to #hal.device.promise<@__device_3>
    %25931 = torch_c.from_builtin_tensor %25930 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25932 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23616 = arith.constant 1 : index
    %dim_23617 = tensor.dim %25932, %c1_23616 : tensor<4x?x4096xf16>
    %25933 = flow.tensor.transfer %25932 : tensor<4x?x4096xf16>{%dim_23617} to #hal.device.promise<@__device_3>
    %25934 = torch_c.from_builtin_tensor %25933 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25935 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23618 = arith.constant 1 : index
    %dim_23619 = tensor.dim %25935, %c1_23618 : tensor<4x?x4096xf16>
    %25936 = flow.tensor.transfer %25935 : tensor<4x?x4096xf16>{%dim_23619} to #hal.device.promise<@__device_3>
    %25937 = torch_c.from_builtin_tensor %25936 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25938 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23620 = arith.constant 1 : index
    %dim_23621 = tensor.dim %25938, %c1_23620 : tensor<4x?x4096xf16>
    %25939 = flow.tensor.transfer %25938 : tensor<4x?x4096xf16>{%dim_23621} to #hal.device.promise<@__device_3>
    %25940 = torch_c.from_builtin_tensor %25939 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25941 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23622 = arith.constant 1 : index
    %dim_23623 = tensor.dim %25941, %c1_23622 : tensor<4x?x4096xf16>
    %25942 = flow.tensor.transfer %25941 : tensor<4x?x4096xf16>{%dim_23623} to #hal.device.promise<@__device_3>
    %25943 = torch_c.from_builtin_tensor %25942 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25944 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23624 = arith.constant 1 : index
    %dim_23625 = tensor.dim %25944, %c1_23624 : tensor<4x?x4096xf16>
    %25945 = flow.tensor.transfer %25944 : tensor<4x?x4096xf16>{%dim_23625} to #hal.device.promise<@__device_3>
    %25946 = torch_c.from_builtin_tensor %25945 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25947 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23626 = arith.constant 1 : index
    %dim_23627 = tensor.dim %25947, %c1_23626 : tensor<4x?x4096xf16>
    %25948 = flow.tensor.transfer %25947 : tensor<4x?x4096xf16>{%dim_23627} to #hal.device.promise<@__device_3>
    %25949 = torch_c.from_builtin_tensor %25948 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23628 = torch.constant.int 1
    %25950 = torch.aten.add.Tensor %25931, %25934, %int1_23628 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23629 = torch.constant.int 1
    %25951 = torch.aten.add.Tensor %25950, %25937, %int1_23629 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23630 = torch.constant.int 1
    %25952 = torch.aten.add.Tensor %25951, %25820, %int1_23630 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23631 = torch.constant.int 1
    %25953 = torch.aten.add.Tensor %25952, %25940, %int1_23631 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23632 = torch.constant.int 1
    %25954 = torch.aten.add.Tensor %25953, %25943, %int1_23632 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23633 = torch.constant.int 1
    %25955 = torch.aten.add.Tensor %25954, %25946, %int1_23633 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23634 = torch.constant.int 1
    %25956 = torch.aten.add.Tensor %25955, %25949, %int1_23634 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25957 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23635 = arith.constant 1 : index
    %dim_23636 = tensor.dim %25957, %c1_23635 : tensor<4x?x4096xf16>
    %25958 = flow.tensor.transfer %25957 : tensor<4x?x4096xf16>{%dim_23636} to #hal.device.promise<@__device_4>
    %25959 = torch_c.from_builtin_tensor %25958 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25960 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23637 = arith.constant 1 : index
    %dim_23638 = tensor.dim %25960, %c1_23637 : tensor<4x?x4096xf16>
    %25961 = flow.tensor.transfer %25960 : tensor<4x?x4096xf16>{%dim_23638} to #hal.device.promise<@__device_4>
    %25962 = torch_c.from_builtin_tensor %25961 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25963 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23639 = arith.constant 1 : index
    %dim_23640 = tensor.dim %25963, %c1_23639 : tensor<4x?x4096xf16>
    %25964 = flow.tensor.transfer %25963 : tensor<4x?x4096xf16>{%dim_23640} to #hal.device.promise<@__device_4>
    %25965 = torch_c.from_builtin_tensor %25964 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25966 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23641 = arith.constant 1 : index
    %dim_23642 = tensor.dim %25966, %c1_23641 : tensor<4x?x4096xf16>
    %25967 = flow.tensor.transfer %25966 : tensor<4x?x4096xf16>{%dim_23642} to #hal.device.promise<@__device_4>
    %25968 = torch_c.from_builtin_tensor %25967 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25969 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23643 = arith.constant 1 : index
    %dim_23644 = tensor.dim %25969, %c1_23643 : tensor<4x?x4096xf16>
    %25970 = flow.tensor.transfer %25969 : tensor<4x?x4096xf16>{%dim_23644} to #hal.device.promise<@__device_4>
    %25971 = torch_c.from_builtin_tensor %25970 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25972 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23645 = arith.constant 1 : index
    %dim_23646 = tensor.dim %25972, %c1_23645 : tensor<4x?x4096xf16>
    %25973 = flow.tensor.transfer %25972 : tensor<4x?x4096xf16>{%dim_23646} to #hal.device.promise<@__device_4>
    %25974 = torch_c.from_builtin_tensor %25973 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25975 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23647 = arith.constant 1 : index
    %dim_23648 = tensor.dim %25975, %c1_23647 : tensor<4x?x4096xf16>
    %25976 = flow.tensor.transfer %25975 : tensor<4x?x4096xf16>{%dim_23648} to #hal.device.promise<@__device_4>
    %25977 = torch_c.from_builtin_tensor %25976 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23649 = torch.constant.int 1
    %25978 = torch.aten.add.Tensor %25959, %25962, %int1_23649 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23650 = torch.constant.int 1
    %25979 = torch.aten.add.Tensor %25978, %25965, %int1_23650 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23651 = torch.constant.int 1
    %25980 = torch.aten.add.Tensor %25979, %25968, %int1_23651 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23652 = torch.constant.int 1
    %25981 = torch.aten.add.Tensor %25980, %25826, %int1_23652 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23653 = torch.constant.int 1
    %25982 = torch.aten.add.Tensor %25981, %25971, %int1_23653 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23654 = torch.constant.int 1
    %25983 = torch.aten.add.Tensor %25982, %25974, %int1_23654 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23655 = torch.constant.int 1
    %25984 = torch.aten.add.Tensor %25983, %25977, %int1_23655 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25985 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23656 = arith.constant 1 : index
    %dim_23657 = tensor.dim %25985, %c1_23656 : tensor<4x?x4096xf16>
    %25986 = flow.tensor.transfer %25985 : tensor<4x?x4096xf16>{%dim_23657} to #hal.device.promise<@__device_5>
    %25987 = torch_c.from_builtin_tensor %25986 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25988 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23658 = arith.constant 1 : index
    %dim_23659 = tensor.dim %25988, %c1_23658 : tensor<4x?x4096xf16>
    %25989 = flow.tensor.transfer %25988 : tensor<4x?x4096xf16>{%dim_23659} to #hal.device.promise<@__device_5>
    %25990 = torch_c.from_builtin_tensor %25989 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25991 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23660 = arith.constant 1 : index
    %dim_23661 = tensor.dim %25991, %c1_23660 : tensor<4x?x4096xf16>
    %25992 = flow.tensor.transfer %25991 : tensor<4x?x4096xf16>{%dim_23661} to #hal.device.promise<@__device_5>
    %25993 = torch_c.from_builtin_tensor %25992 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25994 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23662 = arith.constant 1 : index
    %dim_23663 = tensor.dim %25994, %c1_23662 : tensor<4x?x4096xf16>
    %25995 = flow.tensor.transfer %25994 : tensor<4x?x4096xf16>{%dim_23663} to #hal.device.promise<@__device_5>
    %25996 = torch_c.from_builtin_tensor %25995 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %25997 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23664 = arith.constant 1 : index
    %dim_23665 = tensor.dim %25997, %c1_23664 : tensor<4x?x4096xf16>
    %25998 = flow.tensor.transfer %25997 : tensor<4x?x4096xf16>{%dim_23665} to #hal.device.promise<@__device_5>
    %25999 = torch_c.from_builtin_tensor %25998 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %25999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26000 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23666 = arith.constant 1 : index
    %dim_23667 = tensor.dim %26000, %c1_23666 : tensor<4x?x4096xf16>
    %26001 = flow.tensor.transfer %26000 : tensor<4x?x4096xf16>{%dim_23667} to #hal.device.promise<@__device_5>
    %26002 = torch_c.from_builtin_tensor %26001 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26003 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23668 = arith.constant 1 : index
    %dim_23669 = tensor.dim %26003, %c1_23668 : tensor<4x?x4096xf16>
    %26004 = flow.tensor.transfer %26003 : tensor<4x?x4096xf16>{%dim_23669} to #hal.device.promise<@__device_5>
    %26005 = torch_c.from_builtin_tensor %26004 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23670 = torch.constant.int 1
    %26006 = torch.aten.add.Tensor %25987, %25990, %int1_23670 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23671 = torch.constant.int 1
    %26007 = torch.aten.add.Tensor %26006, %25993, %int1_23671 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23672 = torch.constant.int 1
    %26008 = torch.aten.add.Tensor %26007, %25996, %int1_23672 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23673 = torch.constant.int 1
    %26009 = torch.aten.add.Tensor %26008, %25999, %int1_23673 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23674 = torch.constant.int 1
    %26010 = torch.aten.add.Tensor %26009, %25832, %int1_23674 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23675 = torch.constant.int 1
    %26011 = torch.aten.add.Tensor %26010, %26002, %int1_23675 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23676 = torch.constant.int 1
    %26012 = torch.aten.add.Tensor %26011, %26005, %int1_23676 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26013 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23677 = arith.constant 1 : index
    %dim_23678 = tensor.dim %26013, %c1_23677 : tensor<4x?x4096xf16>
    %26014 = flow.tensor.transfer %26013 : tensor<4x?x4096xf16>{%dim_23678} to #hal.device.promise<@__device_6>
    %26015 = torch_c.from_builtin_tensor %26014 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26016 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23679 = arith.constant 1 : index
    %dim_23680 = tensor.dim %26016, %c1_23679 : tensor<4x?x4096xf16>
    %26017 = flow.tensor.transfer %26016 : tensor<4x?x4096xf16>{%dim_23680} to #hal.device.promise<@__device_6>
    %26018 = torch_c.from_builtin_tensor %26017 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26019 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23681 = arith.constant 1 : index
    %dim_23682 = tensor.dim %26019, %c1_23681 : tensor<4x?x4096xf16>
    %26020 = flow.tensor.transfer %26019 : tensor<4x?x4096xf16>{%dim_23682} to #hal.device.promise<@__device_6>
    %26021 = torch_c.from_builtin_tensor %26020 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26022 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23683 = arith.constant 1 : index
    %dim_23684 = tensor.dim %26022, %c1_23683 : tensor<4x?x4096xf16>
    %26023 = flow.tensor.transfer %26022 : tensor<4x?x4096xf16>{%dim_23684} to #hal.device.promise<@__device_6>
    %26024 = torch_c.from_builtin_tensor %26023 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26025 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23685 = arith.constant 1 : index
    %dim_23686 = tensor.dim %26025, %c1_23685 : tensor<4x?x4096xf16>
    %26026 = flow.tensor.transfer %26025 : tensor<4x?x4096xf16>{%dim_23686} to #hal.device.promise<@__device_6>
    %26027 = torch_c.from_builtin_tensor %26026 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26028 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23687 = arith.constant 1 : index
    %dim_23688 = tensor.dim %26028, %c1_23687 : tensor<4x?x4096xf16>
    %26029 = flow.tensor.transfer %26028 : tensor<4x?x4096xf16>{%dim_23688} to #hal.device.promise<@__device_6>
    %26030 = torch_c.from_builtin_tensor %26029 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26031 = torch_c.to_builtin_tensor %25844 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23689 = arith.constant 1 : index
    %dim_23690 = tensor.dim %26031, %c1_23689 : tensor<4x?x4096xf16>
    %26032 = flow.tensor.transfer %26031 : tensor<4x?x4096xf16>{%dim_23690} to #hal.device.promise<@__device_6>
    %26033 = torch_c.from_builtin_tensor %26032 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23691 = torch.constant.int 1
    %26034 = torch.aten.add.Tensor %26015, %26018, %int1_23691 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23692 = torch.constant.int 1
    %26035 = torch.aten.add.Tensor %26034, %26021, %int1_23692 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23693 = torch.constant.int 1
    %26036 = torch.aten.add.Tensor %26035, %26024, %int1_23693 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23694 = torch.constant.int 1
    %26037 = torch.aten.add.Tensor %26036, %26027, %int1_23694 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23695 = torch.constant.int 1
    %26038 = torch.aten.add.Tensor %26037, %26030, %int1_23695 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23696 = torch.constant.int 1
    %26039 = torch.aten.add.Tensor %26038, %25838, %int1_23696 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23697 = torch.constant.int 1
    %26040 = torch.aten.add.Tensor %26039, %26033, %int1_23697 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26041 = torch_c.to_builtin_tensor %25802 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23698 = arith.constant 1 : index
    %dim_23699 = tensor.dim %26041, %c1_23698 : tensor<4x?x4096xf16>
    %26042 = flow.tensor.transfer %26041 : tensor<4x?x4096xf16>{%dim_23699} to #hal.device.promise<@__device_7>
    %26043 = torch_c.from_builtin_tensor %26042 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26044 = torch_c.to_builtin_tensor %25808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23700 = arith.constant 1 : index
    %dim_23701 = tensor.dim %26044, %c1_23700 : tensor<4x?x4096xf16>
    %26045 = flow.tensor.transfer %26044 : tensor<4x?x4096xf16>{%dim_23701} to #hal.device.promise<@__device_7>
    %26046 = torch_c.from_builtin_tensor %26045 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26047 = torch_c.to_builtin_tensor %25814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23702 = arith.constant 1 : index
    %dim_23703 = tensor.dim %26047, %c1_23702 : tensor<4x?x4096xf16>
    %26048 = flow.tensor.transfer %26047 : tensor<4x?x4096xf16>{%dim_23703} to #hal.device.promise<@__device_7>
    %26049 = torch_c.from_builtin_tensor %26048 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26050 = torch_c.to_builtin_tensor %25820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23704 = arith.constant 1 : index
    %dim_23705 = tensor.dim %26050, %c1_23704 : tensor<4x?x4096xf16>
    %26051 = flow.tensor.transfer %26050 : tensor<4x?x4096xf16>{%dim_23705} to #hal.device.promise<@__device_7>
    %26052 = torch_c.from_builtin_tensor %26051 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26053 = torch_c.to_builtin_tensor %25826 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23706 = arith.constant 1 : index
    %dim_23707 = tensor.dim %26053, %c1_23706 : tensor<4x?x4096xf16>
    %26054 = flow.tensor.transfer %26053 : tensor<4x?x4096xf16>{%dim_23707} to #hal.device.promise<@__device_7>
    %26055 = torch_c.from_builtin_tensor %26054 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26056 = torch_c.to_builtin_tensor %25832 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23708 = arith.constant 1 : index
    %dim_23709 = tensor.dim %26056, %c1_23708 : tensor<4x?x4096xf16>
    %26057 = flow.tensor.transfer %26056 : tensor<4x?x4096xf16>{%dim_23709} to #hal.device.promise<@__device_7>
    %26058 = torch_c.from_builtin_tensor %26057 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26059 = torch_c.to_builtin_tensor %25838 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23710 = arith.constant 1 : index
    %dim_23711 = tensor.dim %26059, %c1_23710 : tensor<4x?x4096xf16>
    %26060 = flow.tensor.transfer %26059 : tensor<4x?x4096xf16>{%dim_23711} to #hal.device.promise<@__device_7>
    %26061 = torch_c.from_builtin_tensor %26060 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23712 = torch.constant.int 1
    %26062 = torch.aten.add.Tensor %26043, %26046, %int1_23712 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23713 = torch.constant.int 1
    %26063 = torch.aten.add.Tensor %26062, %26049, %int1_23713 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23714 = torch.constant.int 1
    %26064 = torch.aten.add.Tensor %26063, %26052, %int1_23714 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23715 = torch.constant.int 1
    %26065 = torch.aten.add.Tensor %26064, %26055, %int1_23715 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23716 = torch.constant.int 1
    %26066 = torch.aten.add.Tensor %26065, %26058, %int1_23716 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23717 = torch.constant.int 1
    %26067 = torch.aten.add.Tensor %26066, %26061, %int1_23717 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23718 = torch.constant.int 1
    %26068 = torch.aten.add.Tensor %26067, %25844, %int1_23718 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23719 = torch.constant.int 1
    %26069 = torch.aten.add.Tensor %24728, %25872, %int1_23719 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23720 = torch.constant.int 1
    %26070 = torch.aten.add.Tensor %24729, %25900, %int1_23720 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23721 = torch.constant.int 1
    %26071 = torch.aten.add.Tensor %24730, %25928, %int1_23721 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23722 = torch.constant.int 1
    %26072 = torch.aten.add.Tensor %24731, %25956, %int1_23722 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23723 = torch.constant.int 1
    %26073 = torch.aten.add.Tensor %24732, %25984, %int1_23723 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23724 = torch.constant.int 1
    %26074 = torch.aten.add.Tensor %24733, %26012, %int1_23724 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23725 = torch.constant.int 1
    %26075 = torch.aten.add.Tensor %24734, %26040, %int1_23725 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23726 = torch.constant.int 1
    %26076 = torch.aten.add.Tensor %24735, %26068, %int1_23726 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_23727 = torch.constant.int 6
    %26077 = torch.prims.convert_element_type %26069, %int6_23727 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23728 = torch.constant.int 6
    %26078 = torch.prims.convert_element_type %26070, %int6_23728 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23729 = torch.constant.int 6
    %26079 = torch.prims.convert_element_type %26071, %int6_23729 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23730 = torch.constant.int 6
    %26080 = torch.prims.convert_element_type %26072, %int6_23730 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23731 = torch.constant.int 6
    %26081 = torch.prims.convert_element_type %26073, %int6_23731 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23732 = torch.constant.int 6
    %26082 = torch.prims.convert_element_type %26074, %int6_23732 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23733 = torch.constant.int 6
    %26083 = torch.prims.convert_element_type %26075, %int6_23733 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_23734 = torch.constant.int 6
    %26084 = torch.prims.convert_element_type %26076, %int6_23734 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23735 = torch.constant.int 2
    %26085 = torch.aten.pow.Tensor_Scalar %26077, %int2_23735 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23736 = torch.constant.int 2
    %26086 = torch.aten.pow.Tensor_Scalar %26078, %int2_23736 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23737 = torch.constant.int 2
    %26087 = torch.aten.pow.Tensor_Scalar %26079, %int2_23737 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23738 = torch.constant.int 2
    %26088 = torch.aten.pow.Tensor_Scalar %26080, %int2_23738 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23739 = torch.constant.int 2
    %26089 = torch.aten.pow.Tensor_Scalar %26081, %int2_23739 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23740 = torch.constant.int 2
    %26090 = torch.aten.pow.Tensor_Scalar %26082, %int2_23740 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23741 = torch.constant.int 2
    %26091 = torch.aten.pow.Tensor_Scalar %26083, %int2_23741 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_23742 = torch.constant.int 2
    %26092 = torch.aten.pow.Tensor_Scalar %26084, %int2_23742 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_23743 = torch.constant.int -1
    %26093 = torch.prim.ListConstruct %int-1_23743 : (!torch.int) -> !torch.list<int>
    %true_23744 = torch.constant.bool true
    %none_23745 = torch.constant.none
    %26094 = torch.aten.mean.dim %26085, %26093, %true_23744, %none_23745 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23746 = torch.constant.int -1
    %26095 = torch.prim.ListConstruct %int-1_23746 : (!torch.int) -> !torch.list<int>
    %true_23747 = torch.constant.bool true
    %none_23748 = torch.constant.none
    %26096 = torch.aten.mean.dim %26086, %26095, %true_23747, %none_23748 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23749 = torch.constant.int -1
    %26097 = torch.prim.ListConstruct %int-1_23749 : (!torch.int) -> !torch.list<int>
    %true_23750 = torch.constant.bool true
    %none_23751 = torch.constant.none
    %26098 = torch.aten.mean.dim %26087, %26097, %true_23750, %none_23751 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23752 = torch.constant.int -1
    %26099 = torch.prim.ListConstruct %int-1_23752 : (!torch.int) -> !torch.list<int>
    %true_23753 = torch.constant.bool true
    %none_23754 = torch.constant.none
    %26100 = torch.aten.mean.dim %26088, %26099, %true_23753, %none_23754 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23755 = torch.constant.int -1
    %26101 = torch.prim.ListConstruct %int-1_23755 : (!torch.int) -> !torch.list<int>
    %true_23756 = torch.constant.bool true
    %none_23757 = torch.constant.none
    %26102 = torch.aten.mean.dim %26089, %26101, %true_23756, %none_23757 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23758 = torch.constant.int -1
    %26103 = torch.prim.ListConstruct %int-1_23758 : (!torch.int) -> !torch.list<int>
    %true_23759 = torch.constant.bool true
    %none_23760 = torch.constant.none
    %26104 = torch.aten.mean.dim %26090, %26103, %true_23759, %none_23760 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23761 = torch.constant.int -1
    %26105 = torch.prim.ListConstruct %int-1_23761 : (!torch.int) -> !torch.list<int>
    %true_23762 = torch.constant.bool true
    %none_23763 = torch.constant.none
    %26106 = torch.aten.mean.dim %26091, %26105, %true_23762, %none_23763 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_23764 = torch.constant.int -1
    %26107 = torch.prim.ListConstruct %int-1_23764 : (!torch.int) -> !torch.list<int>
    %true_23765 = torch.constant.bool true
    %none_23766 = torch.constant.none
    %26108 = torch.aten.mean.dim %26092, %26107, %true_23765, %none_23766 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23767 = torch.constant.float 9.9999997473787516E-6
    %int1_23768 = torch.constant.int 1
    %26109 = torch.aten.add.Scalar %26094, %float9.999990e-06_23767, %int1_23768 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23769 = torch.constant.float 9.9999997473787516E-6
    %int1_23770 = torch.constant.int 1
    %26110 = torch.aten.add.Scalar %26096, %float9.999990e-06_23769, %int1_23770 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23771 = torch.constant.float 9.9999997473787516E-6
    %int1_23772 = torch.constant.int 1
    %26111 = torch.aten.add.Scalar %26098, %float9.999990e-06_23771, %int1_23772 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23773 = torch.constant.float 9.9999997473787516E-6
    %int1_23774 = torch.constant.int 1
    %26112 = torch.aten.add.Scalar %26100, %float9.999990e-06_23773, %int1_23774 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23775 = torch.constant.float 9.9999997473787516E-6
    %int1_23776 = torch.constant.int 1
    %26113 = torch.aten.add.Scalar %26102, %float9.999990e-06_23775, %int1_23776 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23777 = torch.constant.float 9.9999997473787516E-6
    %int1_23778 = torch.constant.int 1
    %26114 = torch.aten.add.Scalar %26104, %float9.999990e-06_23777, %int1_23778 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23779 = torch.constant.float 9.9999997473787516E-6
    %int1_23780 = torch.constant.int 1
    %26115 = torch.aten.add.Scalar %26106, %float9.999990e-06_23779, %int1_23780 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_23781 = torch.constant.float 9.9999997473787516E-6
    %int1_23782 = torch.constant.int 1
    %26116 = torch.aten.add.Scalar %26108, %float9.999990e-06_23781, %int1_23782 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26117 = torch.aten.rsqrt %26109 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26118 = torch.aten.rsqrt %26110 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26119 = torch.aten.rsqrt %26111 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26120 = torch.aten.rsqrt %26112 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26121 = torch.aten.rsqrt %26113 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26122 = torch.aten.rsqrt %26114 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26123 = torch.aten.rsqrt %26115 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26124 = torch.aten.rsqrt %26116 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26125 = torch.aten.mul.Tensor %26077, %26117 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26126 = torch.aten.mul.Tensor %26078, %26118 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26127 = torch.aten.mul.Tensor %26079, %26119 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26128 = torch.aten.mul.Tensor %26080, %26120 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26129 = torch.aten.mul.Tensor %26081, %26121 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26130 = torch.aten.mul.Tensor %26082, %26122 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26131 = torch.aten.mul.Tensor %26083, %26123 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26132 = torch.aten.mul.Tensor %26084, %26124 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26133 = torch.aten.mul.Tensor %912, %26125 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26134 = torch.aten.mul.Tensor %913, %26126 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26135 = torch.aten.mul.Tensor %914, %26127 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26136 = torch.aten.mul.Tensor %915, %26128 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26137 = torch.aten.mul.Tensor %916, %26129 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26138 = torch.aten.mul.Tensor %917, %26130 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26139 = torch.aten.mul.Tensor %918, %26131 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26140 = torch.aten.mul.Tensor %919, %26132 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_23783 = torch.constant.int 5
    %26141 = torch.prims.convert_element_type %26133, %int5_23783 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23784 = torch.constant.int 5
    %26142 = torch.prims.convert_element_type %26134, %int5_23784 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23785 = torch.constant.int 5
    %26143 = torch.prims.convert_element_type %26135, %int5_23785 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23786 = torch.constant.int 5
    %26144 = torch.prims.convert_element_type %26136, %int5_23786 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23787 = torch.constant.int 5
    %26145 = torch.prims.convert_element_type %26137, %int5_23787 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23788 = torch.constant.int 5
    %26146 = torch.prims.convert_element_type %26138, %int5_23788 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23789 = torch.constant.int 5
    %26147 = torch.prims.convert_element_type %26139, %int5_23789 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_23790 = torch.constant.int 5
    %26148 = torch.prims.convert_element_type %26140, %int5_23790 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23791 = torch.constant.int 1
    %int0_23792 = torch.constant.int 0
    %26149 = torch.prim.ListConstruct %int1_23791, %int0_23792 : (!torch.int, !torch.int) -> !torch.list<int>
    %26150 = torch.aten.permute %920, %26149 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23793 = torch.constant.int 1
    %int0_23794 = torch.constant.int 0
    %26151 = torch.prim.ListConstruct %int1_23793, %int0_23794 : (!torch.int, !torch.int) -> !torch.list<int>
    %26152 = torch.aten.permute %921, %26151 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23795 = torch.constant.int 1
    %int0_23796 = torch.constant.int 0
    %26153 = torch.prim.ListConstruct %int1_23795, %int0_23796 : (!torch.int, !torch.int) -> !torch.list<int>
    %26154 = torch.aten.permute %922, %26153 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23797 = torch.constant.int 1
    %int0_23798 = torch.constant.int 0
    %26155 = torch.prim.ListConstruct %int1_23797, %int0_23798 : (!torch.int, !torch.int) -> !torch.list<int>
    %26156 = torch.aten.permute %923, %26155 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23799 = torch.constant.int 1
    %int0_23800 = torch.constant.int 0
    %26157 = torch.prim.ListConstruct %int1_23799, %int0_23800 : (!torch.int, !torch.int) -> !torch.list<int>
    %26158 = torch.aten.permute %924, %26157 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23801 = torch.constant.int 1
    %int0_23802 = torch.constant.int 0
    %26159 = torch.prim.ListConstruct %int1_23801, %int0_23802 : (!torch.int, !torch.int) -> !torch.list<int>
    %26160 = torch.aten.permute %925, %26159 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23803 = torch.constant.int 1
    %int0_23804 = torch.constant.int 0
    %26161 = torch.prim.ListConstruct %int1_23803, %int0_23804 : (!torch.int, !torch.int) -> !torch.list<int>
    %26162 = torch.aten.permute %926, %26161 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23805 = torch.constant.int 1
    %int0_23806 = torch.constant.int 0
    %26163 = torch.prim.ListConstruct %int1_23805, %int0_23806 : (!torch.int, !torch.int) -> !torch.list<int>
    %26164 = torch.aten.permute %927, %26163 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_23807 = torch.constant.int 4
    %26165 = torch.aten.mul.int %int4_23807, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23808 = torch.constant.int 4096
    %26166 = torch.prim.ListConstruct %26165, %int4096_23808 : (!torch.int, !torch.int) -> !torch.list<int>
    %26167 = torch.aten.view %26141, %26166 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26167, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26168 = torch.aten.mm %26167, %26150 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26168, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23809 = torch.constant.int 4
    %int1792_23810 = torch.constant.int 1792
    %26169 = torch.prim.ListConstruct %int4_23809, %2482, %int1792_23810 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26170 = torch.aten.view %26168, %26169 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23811 = torch.constant.int 4
    %26171 = torch.aten.mul.int %int4_23811, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23812 = torch.constant.int 4096
    %26172 = torch.prim.ListConstruct %26171, %int4096_23812 : (!torch.int, !torch.int) -> !torch.list<int>
    %26173 = torch.aten.view %26142, %26172 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26173, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26174 = torch.aten.mm %26173, %26152 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26174, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23813 = torch.constant.int 4
    %int1792_23814 = torch.constant.int 1792
    %26175 = torch.prim.ListConstruct %int4_23813, %2482, %int1792_23814 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26176 = torch.aten.view %26174, %26175 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23815 = torch.constant.int 4
    %26177 = torch.aten.mul.int %int4_23815, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23816 = torch.constant.int 4096
    %26178 = torch.prim.ListConstruct %26177, %int4096_23816 : (!torch.int, !torch.int) -> !torch.list<int>
    %26179 = torch.aten.view %26143, %26178 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26179, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26180 = torch.aten.mm %26179, %26154 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26180, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23817 = torch.constant.int 4
    %int1792_23818 = torch.constant.int 1792
    %26181 = torch.prim.ListConstruct %int4_23817, %2482, %int1792_23818 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26182 = torch.aten.view %26180, %26181 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23819 = torch.constant.int 4
    %26183 = torch.aten.mul.int %int4_23819, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23820 = torch.constant.int 4096
    %26184 = torch.prim.ListConstruct %26183, %int4096_23820 : (!torch.int, !torch.int) -> !torch.list<int>
    %26185 = torch.aten.view %26144, %26184 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26185, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26186 = torch.aten.mm %26185, %26156 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26186, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23821 = torch.constant.int 4
    %int1792_23822 = torch.constant.int 1792
    %26187 = torch.prim.ListConstruct %int4_23821, %2482, %int1792_23822 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26188 = torch.aten.view %26186, %26187 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23823 = torch.constant.int 4
    %26189 = torch.aten.mul.int %int4_23823, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23824 = torch.constant.int 4096
    %26190 = torch.prim.ListConstruct %26189, %int4096_23824 : (!torch.int, !torch.int) -> !torch.list<int>
    %26191 = torch.aten.view %26145, %26190 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26191, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26192 = torch.aten.mm %26191, %26158 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26192, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23825 = torch.constant.int 4
    %int1792_23826 = torch.constant.int 1792
    %26193 = torch.prim.ListConstruct %int4_23825, %2482, %int1792_23826 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26194 = torch.aten.view %26192, %26193 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23827 = torch.constant.int 4
    %26195 = torch.aten.mul.int %int4_23827, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23828 = torch.constant.int 4096
    %26196 = torch.prim.ListConstruct %26195, %int4096_23828 : (!torch.int, !torch.int) -> !torch.list<int>
    %26197 = torch.aten.view %26146, %26196 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26197, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26198 = torch.aten.mm %26197, %26160 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26198, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23829 = torch.constant.int 4
    %int1792_23830 = torch.constant.int 1792
    %26199 = torch.prim.ListConstruct %int4_23829, %2482, %int1792_23830 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26200 = torch.aten.view %26198, %26199 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23831 = torch.constant.int 4
    %26201 = torch.aten.mul.int %int4_23831, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23832 = torch.constant.int 4096
    %26202 = torch.prim.ListConstruct %26201, %int4096_23832 : (!torch.int, !torch.int) -> !torch.list<int>
    %26203 = torch.aten.view %26147, %26202 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26203, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26204 = torch.aten.mm %26203, %26162 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26204, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23833 = torch.constant.int 4
    %int1792_23834 = torch.constant.int 1792
    %26205 = torch.prim.ListConstruct %int4_23833, %2482, %int1792_23834 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26206 = torch.aten.view %26204, %26205 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23835 = torch.constant.int 4
    %26207 = torch.aten.mul.int %int4_23835, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23836 = torch.constant.int 4096
    %26208 = torch.prim.ListConstruct %26207, %int4096_23836 : (!torch.int, !torch.int) -> !torch.list<int>
    %26209 = torch.aten.view %26148, %26208 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26209, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26210 = torch.aten.mm %26209, %26164 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26210, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23837 = torch.constant.int 4
    %int1792_23838 = torch.constant.int 1792
    %26211 = torch.prim.ListConstruct %int4_23837, %2482, %int1792_23838 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26212 = torch.aten.view %26210, %26211 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26213 = torch.aten.silu %26170 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26214 = torch.aten.silu %26176 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26215 = torch.aten.silu %26182 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26216 = torch.aten.silu %26188 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26217 = torch.aten.silu %26194 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26218 = torch.aten.silu %26200 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26219 = torch.aten.silu %26206 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26220 = torch.aten.silu %26212 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_23839 = torch.constant.int 1
    %int0_23840 = torch.constant.int 0
    %26221 = torch.prim.ListConstruct %int1_23839, %int0_23840 : (!torch.int, !torch.int) -> !torch.list<int>
    %26222 = torch.aten.permute %928, %26221 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23841 = torch.constant.int 1
    %int0_23842 = torch.constant.int 0
    %26223 = torch.prim.ListConstruct %int1_23841, %int0_23842 : (!torch.int, !torch.int) -> !torch.list<int>
    %26224 = torch.aten.permute %929, %26223 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23843 = torch.constant.int 1
    %int0_23844 = torch.constant.int 0
    %26225 = torch.prim.ListConstruct %int1_23843, %int0_23844 : (!torch.int, !torch.int) -> !torch.list<int>
    %26226 = torch.aten.permute %930, %26225 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23845 = torch.constant.int 1
    %int0_23846 = torch.constant.int 0
    %26227 = torch.prim.ListConstruct %int1_23845, %int0_23846 : (!torch.int, !torch.int) -> !torch.list<int>
    %26228 = torch.aten.permute %931, %26227 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23847 = torch.constant.int 1
    %int0_23848 = torch.constant.int 0
    %26229 = torch.prim.ListConstruct %int1_23847, %int0_23848 : (!torch.int, !torch.int) -> !torch.list<int>
    %26230 = torch.aten.permute %932, %26229 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23849 = torch.constant.int 1
    %int0_23850 = torch.constant.int 0
    %26231 = torch.prim.ListConstruct %int1_23849, %int0_23850 : (!torch.int, !torch.int) -> !torch.list<int>
    %26232 = torch.aten.permute %933, %26231 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23851 = torch.constant.int 1
    %int0_23852 = torch.constant.int 0
    %26233 = torch.prim.ListConstruct %int1_23851, %int0_23852 : (!torch.int, !torch.int) -> !torch.list<int>
    %26234 = torch.aten.permute %934, %26233 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_23853 = torch.constant.int 1
    %int0_23854 = torch.constant.int 0
    %26235 = torch.prim.ListConstruct %int1_23853, %int0_23854 : (!torch.int, !torch.int) -> !torch.list<int>
    %26236 = torch.aten.permute %935, %26235 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_23855 = torch.constant.int 4
    %26237 = torch.aten.mul.int %int4_23855, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23856 = torch.constant.int 4096
    %26238 = torch.prim.ListConstruct %26237, %int4096_23856 : (!torch.int, !torch.int) -> !torch.list<int>
    %26239 = torch.aten.view %26141, %26238 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26239, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26240 = torch.aten.mm %26239, %26222 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26240, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23857 = torch.constant.int 4
    %int1792_23858 = torch.constant.int 1792
    %26241 = torch.prim.ListConstruct %int4_23857, %2482, %int1792_23858 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26242 = torch.aten.view %26240, %26241 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23859 = torch.constant.int 4
    %26243 = torch.aten.mul.int %int4_23859, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23860 = torch.constant.int 4096
    %26244 = torch.prim.ListConstruct %26243, %int4096_23860 : (!torch.int, !torch.int) -> !torch.list<int>
    %26245 = torch.aten.view %26142, %26244 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26245, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26246 = torch.aten.mm %26245, %26224 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26246, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23861 = torch.constant.int 4
    %int1792_23862 = torch.constant.int 1792
    %26247 = torch.prim.ListConstruct %int4_23861, %2482, %int1792_23862 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26248 = torch.aten.view %26246, %26247 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23863 = torch.constant.int 4
    %26249 = torch.aten.mul.int %int4_23863, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23864 = torch.constant.int 4096
    %26250 = torch.prim.ListConstruct %26249, %int4096_23864 : (!torch.int, !torch.int) -> !torch.list<int>
    %26251 = torch.aten.view %26143, %26250 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26251, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26252 = torch.aten.mm %26251, %26226 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26252, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23865 = torch.constant.int 4
    %int1792_23866 = torch.constant.int 1792
    %26253 = torch.prim.ListConstruct %int4_23865, %2482, %int1792_23866 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26254 = torch.aten.view %26252, %26253 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23867 = torch.constant.int 4
    %26255 = torch.aten.mul.int %int4_23867, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23868 = torch.constant.int 4096
    %26256 = torch.prim.ListConstruct %26255, %int4096_23868 : (!torch.int, !torch.int) -> !torch.list<int>
    %26257 = torch.aten.view %26144, %26256 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26257, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26258 = torch.aten.mm %26257, %26228 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26258, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23869 = torch.constant.int 4
    %int1792_23870 = torch.constant.int 1792
    %26259 = torch.prim.ListConstruct %int4_23869, %2482, %int1792_23870 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26260 = torch.aten.view %26258, %26259 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23871 = torch.constant.int 4
    %26261 = torch.aten.mul.int %int4_23871, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23872 = torch.constant.int 4096
    %26262 = torch.prim.ListConstruct %26261, %int4096_23872 : (!torch.int, !torch.int) -> !torch.list<int>
    %26263 = torch.aten.view %26145, %26262 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26263, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26264 = torch.aten.mm %26263, %26230 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26264, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23873 = torch.constant.int 4
    %int1792_23874 = torch.constant.int 1792
    %26265 = torch.prim.ListConstruct %int4_23873, %2482, %int1792_23874 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26266 = torch.aten.view %26264, %26265 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23875 = torch.constant.int 4
    %26267 = torch.aten.mul.int %int4_23875, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23876 = torch.constant.int 4096
    %26268 = torch.prim.ListConstruct %26267, %int4096_23876 : (!torch.int, !torch.int) -> !torch.list<int>
    %26269 = torch.aten.view %26146, %26268 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26269, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26270 = torch.aten.mm %26269, %26232 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26270, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23877 = torch.constant.int 4
    %int1792_23878 = torch.constant.int 1792
    %26271 = torch.prim.ListConstruct %int4_23877, %2482, %int1792_23878 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26272 = torch.aten.view %26270, %26271 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23879 = torch.constant.int 4
    %26273 = torch.aten.mul.int %int4_23879, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23880 = torch.constant.int 4096
    %26274 = torch.prim.ListConstruct %26273, %int4096_23880 : (!torch.int, !torch.int) -> !torch.list<int>
    %26275 = torch.aten.view %26147, %26274 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26275, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26276 = torch.aten.mm %26275, %26234 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26276, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23881 = torch.constant.int 4
    %int1792_23882 = torch.constant.int 1792
    %26277 = torch.prim.ListConstruct %int4_23881, %2482, %int1792_23882 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26278 = torch.aten.view %26276, %26277 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_23883 = torch.constant.int 4
    %26279 = torch.aten.mul.int %int4_23883, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_23884 = torch.constant.int 4096
    %26280 = torch.prim.ListConstruct %26279, %int4096_23884 : (!torch.int, !torch.int) -> !torch.list<int>
    %26281 = torch.aten.view %26148, %26280 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26281, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26282 = torch.aten.mm %26281, %26236 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26282, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_23885 = torch.constant.int 4
    %int1792_23886 = torch.constant.int 1792
    %26283 = torch.prim.ListConstruct %int4_23885, %2482, %int1792_23886 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26284 = torch.aten.view %26282, %26283 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26285 = torch.aten.mul.Tensor %26213, %26242 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26286 = torch.aten.mul.Tensor %26214, %26248 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26287 = torch.aten.mul.Tensor %26215, %26254 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26288 = torch.aten.mul.Tensor %26216, %26260 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26289 = torch.aten.mul.Tensor %26217, %26266 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26290 = torch.aten.mul.Tensor %26218, %26272 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26291 = torch.aten.mul.Tensor %26219, %26278 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %26292 = torch.aten.mul.Tensor %26220, %26284 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %26292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_23887 = torch.constant.int 1
    %int0_23888 = torch.constant.int 0
    %26293 = torch.prim.ListConstruct %int1_23887, %int0_23888 : (!torch.int, !torch.int) -> !torch.list<int>
    %26294 = torch.aten.permute %936, %26293 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23889 = torch.constant.int 1
    %int0_23890 = torch.constant.int 0
    %26295 = torch.prim.ListConstruct %int1_23889, %int0_23890 : (!torch.int, !torch.int) -> !torch.list<int>
    %26296 = torch.aten.permute %937, %26295 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23891 = torch.constant.int 1
    %int0_23892 = torch.constant.int 0
    %26297 = torch.prim.ListConstruct %int1_23891, %int0_23892 : (!torch.int, !torch.int) -> !torch.list<int>
    %26298 = torch.aten.permute %938, %26297 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23893 = torch.constant.int 1
    %int0_23894 = torch.constant.int 0
    %26299 = torch.prim.ListConstruct %int1_23893, %int0_23894 : (!torch.int, !torch.int) -> !torch.list<int>
    %26300 = torch.aten.permute %939, %26299 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23895 = torch.constant.int 1
    %int0_23896 = torch.constant.int 0
    %26301 = torch.prim.ListConstruct %int1_23895, %int0_23896 : (!torch.int, !torch.int) -> !torch.list<int>
    %26302 = torch.aten.permute %940, %26301 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23897 = torch.constant.int 1
    %int0_23898 = torch.constant.int 0
    %26303 = torch.prim.ListConstruct %int1_23897, %int0_23898 : (!torch.int, !torch.int) -> !torch.list<int>
    %26304 = torch.aten.permute %941, %26303 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23899 = torch.constant.int 1
    %int0_23900 = torch.constant.int 0
    %26305 = torch.prim.ListConstruct %int1_23899, %int0_23900 : (!torch.int, !torch.int) -> !torch.list<int>
    %26306 = torch.aten.permute %942, %26305 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23901 = torch.constant.int 1
    %int0_23902 = torch.constant.int 0
    %26307 = torch.prim.ListConstruct %int1_23901, %int0_23902 : (!torch.int, !torch.int) -> !torch.list<int>
    %26308 = torch.aten.permute %943, %26307 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_23903 = torch.constant.int 1
    %26309 = torch.aten.size.int %26170, %int1_23903 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23904 = torch.constant.int 4
    %26310 = torch.aten.mul.int %int4_23904, %26309 : !torch.int, !torch.int -> !torch.int
    %int1792_23905 = torch.constant.int 1792
    %26311 = torch.prim.ListConstruct %26310, %int1792_23905 : (!torch.int, !torch.int) -> !torch.list<int>
    %26312 = torch.aten.view %26285, %26311 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26312, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26313 = torch.aten.mm %26312, %26294 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26313, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23906 = torch.constant.int 4
    %int4096_23907 = torch.constant.int 4096
    %26314 = torch.prim.ListConstruct %int4_23906, %26309, %int4096_23907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26315 = torch.aten.view %26313, %26314 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23908 = torch.constant.int 1
    %26316 = torch.aten.size.int %26176, %int1_23908 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23909 = torch.constant.int 4
    %26317 = torch.aten.mul.int %int4_23909, %26316 : !torch.int, !torch.int -> !torch.int
    %int1792_23910 = torch.constant.int 1792
    %26318 = torch.prim.ListConstruct %26317, %int1792_23910 : (!torch.int, !torch.int) -> !torch.list<int>
    %26319 = torch.aten.view %26286, %26318 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26319, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26320 = torch.aten.mm %26319, %26296 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26320, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23911 = torch.constant.int 4
    %int4096_23912 = torch.constant.int 4096
    %26321 = torch.prim.ListConstruct %int4_23911, %26316, %int4096_23912 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26322 = torch.aten.view %26320, %26321 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23913 = torch.constant.int 1
    %26323 = torch.aten.size.int %26182, %int1_23913 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23914 = torch.constant.int 4
    %26324 = torch.aten.mul.int %int4_23914, %26323 : !torch.int, !torch.int -> !torch.int
    %int1792_23915 = torch.constant.int 1792
    %26325 = torch.prim.ListConstruct %26324, %int1792_23915 : (!torch.int, !torch.int) -> !torch.list<int>
    %26326 = torch.aten.view %26287, %26325 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26326, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26327 = torch.aten.mm %26326, %26298 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26327, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23916 = torch.constant.int 4
    %int4096_23917 = torch.constant.int 4096
    %26328 = torch.prim.ListConstruct %int4_23916, %26323, %int4096_23917 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26329 = torch.aten.view %26327, %26328 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23918 = torch.constant.int 1
    %26330 = torch.aten.size.int %26188, %int1_23918 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23919 = torch.constant.int 4
    %26331 = torch.aten.mul.int %int4_23919, %26330 : !torch.int, !torch.int -> !torch.int
    %int1792_23920 = torch.constant.int 1792
    %26332 = torch.prim.ListConstruct %26331, %int1792_23920 : (!torch.int, !torch.int) -> !torch.list<int>
    %26333 = torch.aten.view %26288, %26332 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26333, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26334 = torch.aten.mm %26333, %26300 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26334, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23921 = torch.constant.int 4
    %int4096_23922 = torch.constant.int 4096
    %26335 = torch.prim.ListConstruct %int4_23921, %26330, %int4096_23922 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26336 = torch.aten.view %26334, %26335 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23923 = torch.constant.int 1
    %26337 = torch.aten.size.int %26194, %int1_23923 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23924 = torch.constant.int 4
    %26338 = torch.aten.mul.int %int4_23924, %26337 : !torch.int, !torch.int -> !torch.int
    %int1792_23925 = torch.constant.int 1792
    %26339 = torch.prim.ListConstruct %26338, %int1792_23925 : (!torch.int, !torch.int) -> !torch.list<int>
    %26340 = torch.aten.view %26289, %26339 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26340, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26341 = torch.aten.mm %26340, %26302 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26341, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23926 = torch.constant.int 4
    %int4096_23927 = torch.constant.int 4096
    %26342 = torch.prim.ListConstruct %int4_23926, %26337, %int4096_23927 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26343 = torch.aten.view %26341, %26342 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23928 = torch.constant.int 1
    %26344 = torch.aten.size.int %26200, %int1_23928 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23929 = torch.constant.int 4
    %26345 = torch.aten.mul.int %int4_23929, %26344 : !torch.int, !torch.int -> !torch.int
    %int1792_23930 = torch.constant.int 1792
    %26346 = torch.prim.ListConstruct %26345, %int1792_23930 : (!torch.int, !torch.int) -> !torch.list<int>
    %26347 = torch.aten.view %26290, %26346 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26347, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26348 = torch.aten.mm %26347, %26304 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26348, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23931 = torch.constant.int 4
    %int4096_23932 = torch.constant.int 4096
    %26349 = torch.prim.ListConstruct %int4_23931, %26344, %int4096_23932 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26350 = torch.aten.view %26348, %26349 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23933 = torch.constant.int 1
    %26351 = torch.aten.size.int %26206, %int1_23933 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23934 = torch.constant.int 4
    %26352 = torch.aten.mul.int %int4_23934, %26351 : !torch.int, !torch.int -> !torch.int
    %int1792_23935 = torch.constant.int 1792
    %26353 = torch.prim.ListConstruct %26352, %int1792_23935 : (!torch.int, !torch.int) -> !torch.list<int>
    %26354 = torch.aten.view %26291, %26353 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26354, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26355 = torch.aten.mm %26354, %26306 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26355, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23936 = torch.constant.int 4
    %int4096_23937 = torch.constant.int 4096
    %26356 = torch.prim.ListConstruct %int4_23936, %26351, %int4096_23937 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26357 = torch.aten.view %26355, %26356 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23938 = torch.constant.int 1
    %26358 = torch.aten.size.int %26212, %int1_23938 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_23939 = torch.constant.int 4
    %26359 = torch.aten.mul.int %int4_23939, %26358 : !torch.int, !torch.int -> !torch.int
    %int1792_23940 = torch.constant.int 1792
    %26360 = torch.prim.ListConstruct %26359, %int1792_23940 : (!torch.int, !torch.int) -> !torch.list<int>
    %26361 = torch.aten.view %26292, %26360 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %26361, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %26362 = torch.aten.mm %26361, %26308 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26362, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_23941 = torch.constant.int 4
    %int4096_23942 = torch.constant.int 4096
    %26363 = torch.prim.ListConstruct %int4_23941, %26358, %int4096_23942 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26364 = torch.aten.view %26362, %26363 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26365 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23943 = arith.constant 1 : index
    %dim_23944 = tensor.dim %26365, %c1_23943 : tensor<4x?x4096xf16>
    %26366 = flow.tensor.transfer %26365 : tensor<4x?x4096xf16>{%dim_23944} to #hal.device.promise<@__device_0>
    %26367 = torch_c.from_builtin_tensor %26366 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26368 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23945 = arith.constant 1 : index
    %dim_23946 = tensor.dim %26368, %c1_23945 : tensor<4x?x4096xf16>
    %26369 = flow.tensor.transfer %26368 : tensor<4x?x4096xf16>{%dim_23946} to #hal.device.promise<@__device_0>
    %26370 = torch_c.from_builtin_tensor %26369 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26371 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23947 = arith.constant 1 : index
    %dim_23948 = tensor.dim %26371, %c1_23947 : tensor<4x?x4096xf16>
    %26372 = flow.tensor.transfer %26371 : tensor<4x?x4096xf16>{%dim_23948} to #hal.device.promise<@__device_0>
    %26373 = torch_c.from_builtin_tensor %26372 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26374 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23949 = arith.constant 1 : index
    %dim_23950 = tensor.dim %26374, %c1_23949 : tensor<4x?x4096xf16>
    %26375 = flow.tensor.transfer %26374 : tensor<4x?x4096xf16>{%dim_23950} to #hal.device.promise<@__device_0>
    %26376 = torch_c.from_builtin_tensor %26375 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26377 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23951 = arith.constant 1 : index
    %dim_23952 = tensor.dim %26377, %c1_23951 : tensor<4x?x4096xf16>
    %26378 = flow.tensor.transfer %26377 : tensor<4x?x4096xf16>{%dim_23952} to #hal.device.promise<@__device_0>
    %26379 = torch_c.from_builtin_tensor %26378 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26380 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23953 = arith.constant 1 : index
    %dim_23954 = tensor.dim %26380, %c1_23953 : tensor<4x?x4096xf16>
    %26381 = flow.tensor.transfer %26380 : tensor<4x?x4096xf16>{%dim_23954} to #hal.device.promise<@__device_0>
    %26382 = torch_c.from_builtin_tensor %26381 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26383 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23955 = arith.constant 1 : index
    %dim_23956 = tensor.dim %26383, %c1_23955 : tensor<4x?x4096xf16>
    %26384 = flow.tensor.transfer %26383 : tensor<4x?x4096xf16>{%dim_23956} to #hal.device.promise<@__device_0>
    %26385 = torch_c.from_builtin_tensor %26384 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23957 = torch.constant.int 1
    %26386 = torch.aten.add.Tensor %26315, %26367, %int1_23957 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23958 = torch.constant.int 1
    %26387 = torch.aten.add.Tensor %26386, %26370, %int1_23958 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23959 = torch.constant.int 1
    %26388 = torch.aten.add.Tensor %26387, %26373, %int1_23959 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23960 = torch.constant.int 1
    %26389 = torch.aten.add.Tensor %26388, %26376, %int1_23960 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23961 = torch.constant.int 1
    %26390 = torch.aten.add.Tensor %26389, %26379, %int1_23961 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23962 = torch.constant.int 1
    %26391 = torch.aten.add.Tensor %26390, %26382, %int1_23962 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23963 = torch.constant.int 1
    %26392 = torch.aten.add.Tensor %26391, %26385, %int1_23963 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26393 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23964 = arith.constant 1 : index
    %dim_23965 = tensor.dim %26393, %c1_23964 : tensor<4x?x4096xf16>
    %26394 = flow.tensor.transfer %26393 : tensor<4x?x4096xf16>{%dim_23965} to #hal.device.promise<@__device_1>
    %26395 = torch_c.from_builtin_tensor %26394 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26396 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23966 = arith.constant 1 : index
    %dim_23967 = tensor.dim %26396, %c1_23966 : tensor<4x?x4096xf16>
    %26397 = flow.tensor.transfer %26396 : tensor<4x?x4096xf16>{%dim_23967} to #hal.device.promise<@__device_1>
    %26398 = torch_c.from_builtin_tensor %26397 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26399 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23968 = arith.constant 1 : index
    %dim_23969 = tensor.dim %26399, %c1_23968 : tensor<4x?x4096xf16>
    %26400 = flow.tensor.transfer %26399 : tensor<4x?x4096xf16>{%dim_23969} to #hal.device.promise<@__device_1>
    %26401 = torch_c.from_builtin_tensor %26400 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26402 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23970 = arith.constant 1 : index
    %dim_23971 = tensor.dim %26402, %c1_23970 : tensor<4x?x4096xf16>
    %26403 = flow.tensor.transfer %26402 : tensor<4x?x4096xf16>{%dim_23971} to #hal.device.promise<@__device_1>
    %26404 = torch_c.from_builtin_tensor %26403 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26405 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23972 = arith.constant 1 : index
    %dim_23973 = tensor.dim %26405, %c1_23972 : tensor<4x?x4096xf16>
    %26406 = flow.tensor.transfer %26405 : tensor<4x?x4096xf16>{%dim_23973} to #hal.device.promise<@__device_1>
    %26407 = torch_c.from_builtin_tensor %26406 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26408 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23974 = arith.constant 1 : index
    %dim_23975 = tensor.dim %26408, %c1_23974 : tensor<4x?x4096xf16>
    %26409 = flow.tensor.transfer %26408 : tensor<4x?x4096xf16>{%dim_23975} to #hal.device.promise<@__device_1>
    %26410 = torch_c.from_builtin_tensor %26409 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26411 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23976 = arith.constant 1 : index
    %dim_23977 = tensor.dim %26411, %c1_23976 : tensor<4x?x4096xf16>
    %26412 = flow.tensor.transfer %26411 : tensor<4x?x4096xf16>{%dim_23977} to #hal.device.promise<@__device_1>
    %26413 = torch_c.from_builtin_tensor %26412 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23978 = torch.constant.int 1
    %26414 = torch.aten.add.Tensor %26395, %26322, %int1_23978 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23979 = torch.constant.int 1
    %26415 = torch.aten.add.Tensor %26414, %26398, %int1_23979 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23980 = torch.constant.int 1
    %26416 = torch.aten.add.Tensor %26415, %26401, %int1_23980 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23981 = torch.constant.int 1
    %26417 = torch.aten.add.Tensor %26416, %26404, %int1_23981 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23982 = torch.constant.int 1
    %26418 = torch.aten.add.Tensor %26417, %26407, %int1_23982 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23983 = torch.constant.int 1
    %26419 = torch.aten.add.Tensor %26418, %26410, %int1_23983 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23984 = torch.constant.int 1
    %26420 = torch.aten.add.Tensor %26419, %26413, %int1_23984 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26421 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23985 = arith.constant 1 : index
    %dim_23986 = tensor.dim %26421, %c1_23985 : tensor<4x?x4096xf16>
    %26422 = flow.tensor.transfer %26421 : tensor<4x?x4096xf16>{%dim_23986} to #hal.device.promise<@__device_2>
    %26423 = torch_c.from_builtin_tensor %26422 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26424 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23987 = arith.constant 1 : index
    %dim_23988 = tensor.dim %26424, %c1_23987 : tensor<4x?x4096xf16>
    %26425 = flow.tensor.transfer %26424 : tensor<4x?x4096xf16>{%dim_23988} to #hal.device.promise<@__device_2>
    %26426 = torch_c.from_builtin_tensor %26425 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26427 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23989 = arith.constant 1 : index
    %dim_23990 = tensor.dim %26427, %c1_23989 : tensor<4x?x4096xf16>
    %26428 = flow.tensor.transfer %26427 : tensor<4x?x4096xf16>{%dim_23990} to #hal.device.promise<@__device_2>
    %26429 = torch_c.from_builtin_tensor %26428 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26430 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23991 = arith.constant 1 : index
    %dim_23992 = tensor.dim %26430, %c1_23991 : tensor<4x?x4096xf16>
    %26431 = flow.tensor.transfer %26430 : tensor<4x?x4096xf16>{%dim_23992} to #hal.device.promise<@__device_2>
    %26432 = torch_c.from_builtin_tensor %26431 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26433 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23993 = arith.constant 1 : index
    %dim_23994 = tensor.dim %26433, %c1_23993 : tensor<4x?x4096xf16>
    %26434 = flow.tensor.transfer %26433 : tensor<4x?x4096xf16>{%dim_23994} to #hal.device.promise<@__device_2>
    %26435 = torch_c.from_builtin_tensor %26434 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26436 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23995 = arith.constant 1 : index
    %dim_23996 = tensor.dim %26436, %c1_23995 : tensor<4x?x4096xf16>
    %26437 = flow.tensor.transfer %26436 : tensor<4x?x4096xf16>{%dim_23996} to #hal.device.promise<@__device_2>
    %26438 = torch_c.from_builtin_tensor %26437 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26439 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_23997 = arith.constant 1 : index
    %dim_23998 = tensor.dim %26439, %c1_23997 : tensor<4x?x4096xf16>
    %26440 = flow.tensor.transfer %26439 : tensor<4x?x4096xf16>{%dim_23998} to #hal.device.promise<@__device_2>
    %26441 = torch_c.from_builtin_tensor %26440 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_23999 = torch.constant.int 1
    %26442 = torch.aten.add.Tensor %26423, %26426, %int1_23999 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24000 = torch.constant.int 1
    %26443 = torch.aten.add.Tensor %26442, %26329, %int1_24000 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24001 = torch.constant.int 1
    %26444 = torch.aten.add.Tensor %26443, %26429, %int1_24001 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24002 = torch.constant.int 1
    %26445 = torch.aten.add.Tensor %26444, %26432, %int1_24002 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24003 = torch.constant.int 1
    %26446 = torch.aten.add.Tensor %26445, %26435, %int1_24003 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24004 = torch.constant.int 1
    %26447 = torch.aten.add.Tensor %26446, %26438, %int1_24004 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24005 = torch.constant.int 1
    %26448 = torch.aten.add.Tensor %26447, %26441, %int1_24005 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26449 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24006 = arith.constant 1 : index
    %dim_24007 = tensor.dim %26449, %c1_24006 : tensor<4x?x4096xf16>
    %26450 = flow.tensor.transfer %26449 : tensor<4x?x4096xf16>{%dim_24007} to #hal.device.promise<@__device_3>
    %26451 = torch_c.from_builtin_tensor %26450 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26452 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24008 = arith.constant 1 : index
    %dim_24009 = tensor.dim %26452, %c1_24008 : tensor<4x?x4096xf16>
    %26453 = flow.tensor.transfer %26452 : tensor<4x?x4096xf16>{%dim_24009} to #hal.device.promise<@__device_3>
    %26454 = torch_c.from_builtin_tensor %26453 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26455 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24010 = arith.constant 1 : index
    %dim_24011 = tensor.dim %26455, %c1_24010 : tensor<4x?x4096xf16>
    %26456 = flow.tensor.transfer %26455 : tensor<4x?x4096xf16>{%dim_24011} to #hal.device.promise<@__device_3>
    %26457 = torch_c.from_builtin_tensor %26456 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26458 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24012 = arith.constant 1 : index
    %dim_24013 = tensor.dim %26458, %c1_24012 : tensor<4x?x4096xf16>
    %26459 = flow.tensor.transfer %26458 : tensor<4x?x4096xf16>{%dim_24013} to #hal.device.promise<@__device_3>
    %26460 = torch_c.from_builtin_tensor %26459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26461 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24014 = arith.constant 1 : index
    %dim_24015 = tensor.dim %26461, %c1_24014 : tensor<4x?x4096xf16>
    %26462 = flow.tensor.transfer %26461 : tensor<4x?x4096xf16>{%dim_24015} to #hal.device.promise<@__device_3>
    %26463 = torch_c.from_builtin_tensor %26462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26464 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24016 = arith.constant 1 : index
    %dim_24017 = tensor.dim %26464, %c1_24016 : tensor<4x?x4096xf16>
    %26465 = flow.tensor.transfer %26464 : tensor<4x?x4096xf16>{%dim_24017} to #hal.device.promise<@__device_3>
    %26466 = torch_c.from_builtin_tensor %26465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26467 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24018 = arith.constant 1 : index
    %dim_24019 = tensor.dim %26467, %c1_24018 : tensor<4x?x4096xf16>
    %26468 = flow.tensor.transfer %26467 : tensor<4x?x4096xf16>{%dim_24019} to #hal.device.promise<@__device_3>
    %26469 = torch_c.from_builtin_tensor %26468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24020 = torch.constant.int 1
    %26470 = torch.aten.add.Tensor %26451, %26454, %int1_24020 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24021 = torch.constant.int 1
    %26471 = torch.aten.add.Tensor %26470, %26457, %int1_24021 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24022 = torch.constant.int 1
    %26472 = torch.aten.add.Tensor %26471, %26336, %int1_24022 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24023 = torch.constant.int 1
    %26473 = torch.aten.add.Tensor %26472, %26460, %int1_24023 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24024 = torch.constant.int 1
    %26474 = torch.aten.add.Tensor %26473, %26463, %int1_24024 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24025 = torch.constant.int 1
    %26475 = torch.aten.add.Tensor %26474, %26466, %int1_24025 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24026 = torch.constant.int 1
    %26476 = torch.aten.add.Tensor %26475, %26469, %int1_24026 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26477 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24027 = arith.constant 1 : index
    %dim_24028 = tensor.dim %26477, %c1_24027 : tensor<4x?x4096xf16>
    %26478 = flow.tensor.transfer %26477 : tensor<4x?x4096xf16>{%dim_24028} to #hal.device.promise<@__device_4>
    %26479 = torch_c.from_builtin_tensor %26478 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26480 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24029 = arith.constant 1 : index
    %dim_24030 = tensor.dim %26480, %c1_24029 : tensor<4x?x4096xf16>
    %26481 = flow.tensor.transfer %26480 : tensor<4x?x4096xf16>{%dim_24030} to #hal.device.promise<@__device_4>
    %26482 = torch_c.from_builtin_tensor %26481 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26483 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24031 = arith.constant 1 : index
    %dim_24032 = tensor.dim %26483, %c1_24031 : tensor<4x?x4096xf16>
    %26484 = flow.tensor.transfer %26483 : tensor<4x?x4096xf16>{%dim_24032} to #hal.device.promise<@__device_4>
    %26485 = torch_c.from_builtin_tensor %26484 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26486 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24033 = arith.constant 1 : index
    %dim_24034 = tensor.dim %26486, %c1_24033 : tensor<4x?x4096xf16>
    %26487 = flow.tensor.transfer %26486 : tensor<4x?x4096xf16>{%dim_24034} to #hal.device.promise<@__device_4>
    %26488 = torch_c.from_builtin_tensor %26487 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26489 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24035 = arith.constant 1 : index
    %dim_24036 = tensor.dim %26489, %c1_24035 : tensor<4x?x4096xf16>
    %26490 = flow.tensor.transfer %26489 : tensor<4x?x4096xf16>{%dim_24036} to #hal.device.promise<@__device_4>
    %26491 = torch_c.from_builtin_tensor %26490 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26492 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24037 = arith.constant 1 : index
    %dim_24038 = tensor.dim %26492, %c1_24037 : tensor<4x?x4096xf16>
    %26493 = flow.tensor.transfer %26492 : tensor<4x?x4096xf16>{%dim_24038} to #hal.device.promise<@__device_4>
    %26494 = torch_c.from_builtin_tensor %26493 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26495 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24039 = arith.constant 1 : index
    %dim_24040 = tensor.dim %26495, %c1_24039 : tensor<4x?x4096xf16>
    %26496 = flow.tensor.transfer %26495 : tensor<4x?x4096xf16>{%dim_24040} to #hal.device.promise<@__device_4>
    %26497 = torch_c.from_builtin_tensor %26496 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24041 = torch.constant.int 1
    %26498 = torch.aten.add.Tensor %26479, %26482, %int1_24041 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24042 = torch.constant.int 1
    %26499 = torch.aten.add.Tensor %26498, %26485, %int1_24042 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24043 = torch.constant.int 1
    %26500 = torch.aten.add.Tensor %26499, %26488, %int1_24043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24044 = torch.constant.int 1
    %26501 = torch.aten.add.Tensor %26500, %26343, %int1_24044 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24045 = torch.constant.int 1
    %26502 = torch.aten.add.Tensor %26501, %26491, %int1_24045 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24046 = torch.constant.int 1
    %26503 = torch.aten.add.Tensor %26502, %26494, %int1_24046 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24047 = torch.constant.int 1
    %26504 = torch.aten.add.Tensor %26503, %26497, %int1_24047 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26505 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24048 = arith.constant 1 : index
    %dim_24049 = tensor.dim %26505, %c1_24048 : tensor<4x?x4096xf16>
    %26506 = flow.tensor.transfer %26505 : tensor<4x?x4096xf16>{%dim_24049} to #hal.device.promise<@__device_5>
    %26507 = torch_c.from_builtin_tensor %26506 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26508 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24050 = arith.constant 1 : index
    %dim_24051 = tensor.dim %26508, %c1_24050 : tensor<4x?x4096xf16>
    %26509 = flow.tensor.transfer %26508 : tensor<4x?x4096xf16>{%dim_24051} to #hal.device.promise<@__device_5>
    %26510 = torch_c.from_builtin_tensor %26509 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26511 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24052 = arith.constant 1 : index
    %dim_24053 = tensor.dim %26511, %c1_24052 : tensor<4x?x4096xf16>
    %26512 = flow.tensor.transfer %26511 : tensor<4x?x4096xf16>{%dim_24053} to #hal.device.promise<@__device_5>
    %26513 = torch_c.from_builtin_tensor %26512 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26514 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24054 = arith.constant 1 : index
    %dim_24055 = tensor.dim %26514, %c1_24054 : tensor<4x?x4096xf16>
    %26515 = flow.tensor.transfer %26514 : tensor<4x?x4096xf16>{%dim_24055} to #hal.device.promise<@__device_5>
    %26516 = torch_c.from_builtin_tensor %26515 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26517 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24056 = arith.constant 1 : index
    %dim_24057 = tensor.dim %26517, %c1_24056 : tensor<4x?x4096xf16>
    %26518 = flow.tensor.transfer %26517 : tensor<4x?x4096xf16>{%dim_24057} to #hal.device.promise<@__device_5>
    %26519 = torch_c.from_builtin_tensor %26518 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26520 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24058 = arith.constant 1 : index
    %dim_24059 = tensor.dim %26520, %c1_24058 : tensor<4x?x4096xf16>
    %26521 = flow.tensor.transfer %26520 : tensor<4x?x4096xf16>{%dim_24059} to #hal.device.promise<@__device_5>
    %26522 = torch_c.from_builtin_tensor %26521 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26523 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24060 = arith.constant 1 : index
    %dim_24061 = tensor.dim %26523, %c1_24060 : tensor<4x?x4096xf16>
    %26524 = flow.tensor.transfer %26523 : tensor<4x?x4096xf16>{%dim_24061} to #hal.device.promise<@__device_5>
    %26525 = torch_c.from_builtin_tensor %26524 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24062 = torch.constant.int 1
    %26526 = torch.aten.add.Tensor %26507, %26510, %int1_24062 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24063 = torch.constant.int 1
    %26527 = torch.aten.add.Tensor %26526, %26513, %int1_24063 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24064 = torch.constant.int 1
    %26528 = torch.aten.add.Tensor %26527, %26516, %int1_24064 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24065 = torch.constant.int 1
    %26529 = torch.aten.add.Tensor %26528, %26519, %int1_24065 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24066 = torch.constant.int 1
    %26530 = torch.aten.add.Tensor %26529, %26350, %int1_24066 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24067 = torch.constant.int 1
    %26531 = torch.aten.add.Tensor %26530, %26522, %int1_24067 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24068 = torch.constant.int 1
    %26532 = torch.aten.add.Tensor %26531, %26525, %int1_24068 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26533 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24069 = arith.constant 1 : index
    %dim_24070 = tensor.dim %26533, %c1_24069 : tensor<4x?x4096xf16>
    %26534 = flow.tensor.transfer %26533 : tensor<4x?x4096xf16>{%dim_24070} to #hal.device.promise<@__device_6>
    %26535 = torch_c.from_builtin_tensor %26534 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26536 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24071 = arith.constant 1 : index
    %dim_24072 = tensor.dim %26536, %c1_24071 : tensor<4x?x4096xf16>
    %26537 = flow.tensor.transfer %26536 : tensor<4x?x4096xf16>{%dim_24072} to #hal.device.promise<@__device_6>
    %26538 = torch_c.from_builtin_tensor %26537 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26539 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24073 = arith.constant 1 : index
    %dim_24074 = tensor.dim %26539, %c1_24073 : tensor<4x?x4096xf16>
    %26540 = flow.tensor.transfer %26539 : tensor<4x?x4096xf16>{%dim_24074} to #hal.device.promise<@__device_6>
    %26541 = torch_c.from_builtin_tensor %26540 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26542 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24075 = arith.constant 1 : index
    %dim_24076 = tensor.dim %26542, %c1_24075 : tensor<4x?x4096xf16>
    %26543 = flow.tensor.transfer %26542 : tensor<4x?x4096xf16>{%dim_24076} to #hal.device.promise<@__device_6>
    %26544 = torch_c.from_builtin_tensor %26543 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26545 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24077 = arith.constant 1 : index
    %dim_24078 = tensor.dim %26545, %c1_24077 : tensor<4x?x4096xf16>
    %26546 = flow.tensor.transfer %26545 : tensor<4x?x4096xf16>{%dim_24078} to #hal.device.promise<@__device_6>
    %26547 = torch_c.from_builtin_tensor %26546 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26548 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24079 = arith.constant 1 : index
    %dim_24080 = tensor.dim %26548, %c1_24079 : tensor<4x?x4096xf16>
    %26549 = flow.tensor.transfer %26548 : tensor<4x?x4096xf16>{%dim_24080} to #hal.device.promise<@__device_6>
    %26550 = torch_c.from_builtin_tensor %26549 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26551 = torch_c.to_builtin_tensor %26364 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24081 = arith.constant 1 : index
    %dim_24082 = tensor.dim %26551, %c1_24081 : tensor<4x?x4096xf16>
    %26552 = flow.tensor.transfer %26551 : tensor<4x?x4096xf16>{%dim_24082} to #hal.device.promise<@__device_6>
    %26553 = torch_c.from_builtin_tensor %26552 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24083 = torch.constant.int 1
    %26554 = torch.aten.add.Tensor %26535, %26538, %int1_24083 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24084 = torch.constant.int 1
    %26555 = torch.aten.add.Tensor %26554, %26541, %int1_24084 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24085 = torch.constant.int 1
    %26556 = torch.aten.add.Tensor %26555, %26544, %int1_24085 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24086 = torch.constant.int 1
    %26557 = torch.aten.add.Tensor %26556, %26547, %int1_24086 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24087 = torch.constant.int 1
    %26558 = torch.aten.add.Tensor %26557, %26550, %int1_24087 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24088 = torch.constant.int 1
    %26559 = torch.aten.add.Tensor %26558, %26357, %int1_24088 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24089 = torch.constant.int 1
    %26560 = torch.aten.add.Tensor %26559, %26553, %int1_24089 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26561 = torch_c.to_builtin_tensor %26315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24090 = arith.constant 1 : index
    %dim_24091 = tensor.dim %26561, %c1_24090 : tensor<4x?x4096xf16>
    %26562 = flow.tensor.transfer %26561 : tensor<4x?x4096xf16>{%dim_24091} to #hal.device.promise<@__device_7>
    %26563 = torch_c.from_builtin_tensor %26562 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26564 = torch_c.to_builtin_tensor %26322 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24092 = arith.constant 1 : index
    %dim_24093 = tensor.dim %26564, %c1_24092 : tensor<4x?x4096xf16>
    %26565 = flow.tensor.transfer %26564 : tensor<4x?x4096xf16>{%dim_24093} to #hal.device.promise<@__device_7>
    %26566 = torch_c.from_builtin_tensor %26565 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26567 = torch_c.to_builtin_tensor %26329 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24094 = arith.constant 1 : index
    %dim_24095 = tensor.dim %26567, %c1_24094 : tensor<4x?x4096xf16>
    %26568 = flow.tensor.transfer %26567 : tensor<4x?x4096xf16>{%dim_24095} to #hal.device.promise<@__device_7>
    %26569 = torch_c.from_builtin_tensor %26568 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26570 = torch_c.to_builtin_tensor %26336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24096 = arith.constant 1 : index
    %dim_24097 = tensor.dim %26570, %c1_24096 : tensor<4x?x4096xf16>
    %26571 = flow.tensor.transfer %26570 : tensor<4x?x4096xf16>{%dim_24097} to #hal.device.promise<@__device_7>
    %26572 = torch_c.from_builtin_tensor %26571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26573 = torch_c.to_builtin_tensor %26343 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24098 = arith.constant 1 : index
    %dim_24099 = tensor.dim %26573, %c1_24098 : tensor<4x?x4096xf16>
    %26574 = flow.tensor.transfer %26573 : tensor<4x?x4096xf16>{%dim_24099} to #hal.device.promise<@__device_7>
    %26575 = torch_c.from_builtin_tensor %26574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26576 = torch_c.to_builtin_tensor %26350 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24100 = arith.constant 1 : index
    %dim_24101 = tensor.dim %26576, %c1_24100 : tensor<4x?x4096xf16>
    %26577 = flow.tensor.transfer %26576 : tensor<4x?x4096xf16>{%dim_24101} to #hal.device.promise<@__device_7>
    %26578 = torch_c.from_builtin_tensor %26577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %26579 = torch_c.to_builtin_tensor %26357 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_24102 = arith.constant 1 : index
    %dim_24103 = tensor.dim %26579, %c1_24102 : tensor<4x?x4096xf16>
    %26580 = flow.tensor.transfer %26579 : tensor<4x?x4096xf16>{%dim_24103} to #hal.device.promise<@__device_7>
    %26581 = torch_c.from_builtin_tensor %26580 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24104 = torch.constant.int 1
    %26582 = torch.aten.add.Tensor %26563, %26566, %int1_24104 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24105 = torch.constant.int 1
    %26583 = torch.aten.add.Tensor %26582, %26569, %int1_24105 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24106 = torch.constant.int 1
    %26584 = torch.aten.add.Tensor %26583, %26572, %int1_24106 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24107 = torch.constant.int 1
    %26585 = torch.aten.add.Tensor %26584, %26575, %int1_24107 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24108 = torch.constant.int 1
    %26586 = torch.aten.add.Tensor %26585, %26578, %int1_24108 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24109 = torch.constant.int 1
    %26587 = torch.aten.add.Tensor %26586, %26581, %int1_24109 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24110 = torch.constant.int 1
    %26588 = torch.aten.add.Tensor %26587, %26364, %int1_24110 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24111 = torch.constant.int 1
    %26589 = torch.aten.add.Tensor %26069, %26392, %int1_24111 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24112 = torch.constant.int 1
    %26590 = torch.aten.add.Tensor %26070, %26420, %int1_24112 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24113 = torch.constant.int 1
    %26591 = torch.aten.add.Tensor %26071, %26448, %int1_24113 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24114 = torch.constant.int 1
    %26592 = torch.aten.add.Tensor %26072, %26476, %int1_24114 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24115 = torch.constant.int 1
    %26593 = torch.aten.add.Tensor %26073, %26504, %int1_24115 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24116 = torch.constant.int 1
    %26594 = torch.aten.add.Tensor %26074, %26532, %int1_24116 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24117 = torch.constant.int 1
    %26595 = torch.aten.add.Tensor %26075, %26560, %int1_24117 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24118 = torch.constant.int 1
    %26596 = torch.aten.add.Tensor %26076, %26588, %int1_24118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_24119 = torch.constant.int 6
    %26597 = torch.prims.convert_element_type %26589, %int6_24119 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24120 = torch.constant.int 6
    %26598 = torch.prims.convert_element_type %26590, %int6_24120 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24121 = torch.constant.int 6
    %26599 = torch.prims.convert_element_type %26591, %int6_24121 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24122 = torch.constant.int 6
    %26600 = torch.prims.convert_element_type %26592, %int6_24122 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24123 = torch.constant.int 6
    %26601 = torch.prims.convert_element_type %26593, %int6_24123 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24124 = torch.constant.int 6
    %26602 = torch.prims.convert_element_type %26594, %int6_24124 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24125 = torch.constant.int 6
    %26603 = torch.prims.convert_element_type %26595, %int6_24125 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_24126 = torch.constant.int 6
    %26604 = torch.prims.convert_element_type %26596, %int6_24126 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24127 = torch.constant.int 2
    %26605 = torch.aten.pow.Tensor_Scalar %26597, %int2_24127 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24128 = torch.constant.int 2
    %26606 = torch.aten.pow.Tensor_Scalar %26598, %int2_24128 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24129 = torch.constant.int 2
    %26607 = torch.aten.pow.Tensor_Scalar %26599, %int2_24129 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24130 = torch.constant.int 2
    %26608 = torch.aten.pow.Tensor_Scalar %26600, %int2_24130 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24131 = torch.constant.int 2
    %26609 = torch.aten.pow.Tensor_Scalar %26601, %int2_24131 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24132 = torch.constant.int 2
    %26610 = torch.aten.pow.Tensor_Scalar %26602, %int2_24132 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24133 = torch.constant.int 2
    %26611 = torch.aten.pow.Tensor_Scalar %26603, %int2_24133 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_24134 = torch.constant.int 2
    %26612 = torch.aten.pow.Tensor_Scalar %26604, %int2_24134 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_24135 = torch.constant.int -1
    %26613 = torch.prim.ListConstruct %int-1_24135 : (!torch.int) -> !torch.list<int>
    %true_24136 = torch.constant.bool true
    %none_24137 = torch.constant.none
    %26614 = torch.aten.mean.dim %26605, %26613, %true_24136, %none_24137 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24138 = torch.constant.int -1
    %26615 = torch.prim.ListConstruct %int-1_24138 : (!torch.int) -> !torch.list<int>
    %true_24139 = torch.constant.bool true
    %none_24140 = torch.constant.none
    %26616 = torch.aten.mean.dim %26606, %26615, %true_24139, %none_24140 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24141 = torch.constant.int -1
    %26617 = torch.prim.ListConstruct %int-1_24141 : (!torch.int) -> !torch.list<int>
    %true_24142 = torch.constant.bool true
    %none_24143 = torch.constant.none
    %26618 = torch.aten.mean.dim %26607, %26617, %true_24142, %none_24143 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24144 = torch.constant.int -1
    %26619 = torch.prim.ListConstruct %int-1_24144 : (!torch.int) -> !torch.list<int>
    %true_24145 = torch.constant.bool true
    %none_24146 = torch.constant.none
    %26620 = torch.aten.mean.dim %26608, %26619, %true_24145, %none_24146 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24147 = torch.constant.int -1
    %26621 = torch.prim.ListConstruct %int-1_24147 : (!torch.int) -> !torch.list<int>
    %true_24148 = torch.constant.bool true
    %none_24149 = torch.constant.none
    %26622 = torch.aten.mean.dim %26609, %26621, %true_24148, %none_24149 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24150 = torch.constant.int -1
    %26623 = torch.prim.ListConstruct %int-1_24150 : (!torch.int) -> !torch.list<int>
    %true_24151 = torch.constant.bool true
    %none_24152 = torch.constant.none
    %26624 = torch.aten.mean.dim %26610, %26623, %true_24151, %none_24152 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24153 = torch.constant.int -1
    %26625 = torch.prim.ListConstruct %int-1_24153 : (!torch.int) -> !torch.list<int>
    %true_24154 = torch.constant.bool true
    %none_24155 = torch.constant.none
    %26626 = torch.aten.mean.dim %26611, %26625, %true_24154, %none_24155 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_24156 = torch.constant.int -1
    %26627 = torch.prim.ListConstruct %int-1_24156 : (!torch.int) -> !torch.list<int>
    %true_24157 = torch.constant.bool true
    %none_24158 = torch.constant.none
    %26628 = torch.aten.mean.dim %26612, %26627, %true_24157, %none_24158 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24159 = torch.constant.float 9.9999997473787516E-6
    %int1_24160 = torch.constant.int 1
    %26629 = torch.aten.add.Scalar %26614, %float9.999990e-06_24159, %int1_24160 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24161 = torch.constant.float 9.9999997473787516E-6
    %int1_24162 = torch.constant.int 1
    %26630 = torch.aten.add.Scalar %26616, %float9.999990e-06_24161, %int1_24162 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24163 = torch.constant.float 9.9999997473787516E-6
    %int1_24164 = torch.constant.int 1
    %26631 = torch.aten.add.Scalar %26618, %float9.999990e-06_24163, %int1_24164 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24165 = torch.constant.float 9.9999997473787516E-6
    %int1_24166 = torch.constant.int 1
    %26632 = torch.aten.add.Scalar %26620, %float9.999990e-06_24165, %int1_24166 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24167 = torch.constant.float 9.9999997473787516E-6
    %int1_24168 = torch.constant.int 1
    %26633 = torch.aten.add.Scalar %26622, %float9.999990e-06_24167, %int1_24168 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24169 = torch.constant.float 9.9999997473787516E-6
    %int1_24170 = torch.constant.int 1
    %26634 = torch.aten.add.Scalar %26624, %float9.999990e-06_24169, %int1_24170 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24171 = torch.constant.float 9.9999997473787516E-6
    %int1_24172 = torch.constant.int 1
    %26635 = torch.aten.add.Scalar %26626, %float9.999990e-06_24171, %int1_24172 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_24173 = torch.constant.float 9.9999997473787516E-6
    %int1_24174 = torch.constant.int 1
    %26636 = torch.aten.add.Scalar %26628, %float9.999990e-06_24173, %int1_24174 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26637 = torch.aten.rsqrt %26629 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26638 = torch.aten.rsqrt %26630 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26639 = torch.aten.rsqrt %26631 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26640 = torch.aten.rsqrt %26632 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26641 = torch.aten.rsqrt %26633 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26642 = torch.aten.rsqrt %26634 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26643 = torch.aten.rsqrt %26635 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26644 = torch.aten.rsqrt %26636 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %26644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %26645 = torch.aten.mul.Tensor %26597, %26637 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26646 = torch.aten.mul.Tensor %26598, %26638 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26647 = torch.aten.mul.Tensor %26599, %26639 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26648 = torch.aten.mul.Tensor %26600, %26640 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26649 = torch.aten.mul.Tensor %26601, %26641 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26650 = torch.aten.mul.Tensor %26602, %26642 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26651 = torch.aten.mul.Tensor %26603, %26643 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26652 = torch.aten.mul.Tensor %26604, %26644 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26653 = torch.aten.mul.Tensor %944, %26645 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26654 = torch.aten.mul.Tensor %945, %26646 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26655 = torch.aten.mul.Tensor %946, %26647 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26656 = torch.aten.mul.Tensor %947, %26648 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26657 = torch.aten.mul.Tensor %948, %26649 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26658 = torch.aten.mul.Tensor %949, %26650 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26659 = torch.aten.mul.Tensor %950, %26651 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %26660 = torch.aten.mul.Tensor %951, %26652 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %26660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_24175 = torch.constant.int 5
    %26661 = torch.prims.convert_element_type %26653, %int5_24175 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24176 = torch.constant.int 5
    %26662 = torch.prims.convert_element_type %26654, %int5_24176 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24177 = torch.constant.int 5
    %26663 = torch.prims.convert_element_type %26655, %int5_24177 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24178 = torch.constant.int 5
    %26664 = torch.prims.convert_element_type %26656, %int5_24178 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24179 = torch.constant.int 5
    %26665 = torch.prims.convert_element_type %26657, %int5_24179 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24180 = torch.constant.int 5
    %26666 = torch.prims.convert_element_type %26658, %int5_24180 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24181 = torch.constant.int 5
    %26667 = torch.prims.convert_element_type %26659, %int5_24181 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_24182 = torch.constant.int 5
    %26668 = torch.prims.convert_element_type %26660, %int5_24182 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %26668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_24183 = torch.constant.int 1
    %int0_24184 = torch.constant.int 0
    %26669 = torch.prim.ListConstruct %int1_24183, %int0_24184 : (!torch.int, !torch.int) -> !torch.list<int>
    %26670 = torch.aten.permute %952, %26669 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24185 = torch.constant.int 1
    %int0_24186 = torch.constant.int 0
    %26671 = torch.prim.ListConstruct %int1_24185, %int0_24186 : (!torch.int, !torch.int) -> !torch.list<int>
    %26672 = torch.aten.permute %953, %26671 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24187 = torch.constant.int 1
    %int0_24188 = torch.constant.int 0
    %26673 = torch.prim.ListConstruct %int1_24187, %int0_24188 : (!torch.int, !torch.int) -> !torch.list<int>
    %26674 = torch.aten.permute %954, %26673 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24189 = torch.constant.int 1
    %int0_24190 = torch.constant.int 0
    %26675 = torch.prim.ListConstruct %int1_24189, %int0_24190 : (!torch.int, !torch.int) -> !torch.list<int>
    %26676 = torch.aten.permute %955, %26675 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24191 = torch.constant.int 1
    %int0_24192 = torch.constant.int 0
    %26677 = torch.prim.ListConstruct %int1_24191, %int0_24192 : (!torch.int, !torch.int) -> !torch.list<int>
    %26678 = torch.aten.permute %956, %26677 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24193 = torch.constant.int 1
    %int0_24194 = torch.constant.int 0
    %26679 = torch.prim.ListConstruct %int1_24193, %int0_24194 : (!torch.int, !torch.int) -> !torch.list<int>
    %26680 = torch.aten.permute %957, %26679 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24195 = torch.constant.int 1
    %int0_24196 = torch.constant.int 0
    %26681 = torch.prim.ListConstruct %int1_24195, %int0_24196 : (!torch.int, !torch.int) -> !torch.list<int>
    %26682 = torch.aten.permute %958, %26681 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_24197 = torch.constant.int 1
    %int0_24198 = torch.constant.int 0
    %26683 = torch.prim.ListConstruct %int1_24197, %int0_24198 : (!torch.int, !torch.int) -> !torch.list<int>
    %26684 = torch.aten.permute %959, %26683 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_24199 = torch.constant.int 4
    %26685 = torch.aten.mul.int %int4_24199, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24200 = torch.constant.int 4096
    %26686 = torch.prim.ListConstruct %26685, %int4096_24200 : (!torch.int, !torch.int) -> !torch.list<int>
    %26687 = torch.aten.view %26661, %26686 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26687, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26688 = torch.aten.mm %26687, %26670 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26688, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24201 = torch.constant.int 4
    %int512_24202 = torch.constant.int 512
    %26689 = torch.prim.ListConstruct %int4_24201, %2482, %int512_24202 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26690 = torch.aten.view %26688, %26689 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24203 = torch.constant.int 4
    %26691 = torch.aten.mul.int %int4_24203, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24204 = torch.constant.int 4096
    %26692 = torch.prim.ListConstruct %26691, %int4096_24204 : (!torch.int, !torch.int) -> !torch.list<int>
    %26693 = torch.aten.view %26662, %26692 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26693, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26694 = torch.aten.mm %26693, %26672 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26694, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24205 = torch.constant.int 4
    %int512_24206 = torch.constant.int 512
    %26695 = torch.prim.ListConstruct %int4_24205, %2482, %int512_24206 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26696 = torch.aten.view %26694, %26695 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24207 = torch.constant.int 4
    %26697 = torch.aten.mul.int %int4_24207, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24208 = torch.constant.int 4096
    %26698 = torch.prim.ListConstruct %26697, %int4096_24208 : (!torch.int, !torch.int) -> !torch.list<int>
    %26699 = torch.aten.view %26663, %26698 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26699, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26700 = torch.aten.mm %26699, %26674 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26700, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24209 = torch.constant.int 4
    %int512_24210 = torch.constant.int 512
    %26701 = torch.prim.ListConstruct %int4_24209, %2482, %int512_24210 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26702 = torch.aten.view %26700, %26701 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24211 = torch.constant.int 4
    %26703 = torch.aten.mul.int %int4_24211, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24212 = torch.constant.int 4096
    %26704 = torch.prim.ListConstruct %26703, %int4096_24212 : (!torch.int, !torch.int) -> !torch.list<int>
    %26705 = torch.aten.view %26664, %26704 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26705, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26706 = torch.aten.mm %26705, %26676 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26706, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24213 = torch.constant.int 4
    %int512_24214 = torch.constant.int 512
    %26707 = torch.prim.ListConstruct %int4_24213, %2482, %int512_24214 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26708 = torch.aten.view %26706, %26707 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24215 = torch.constant.int 4
    %26709 = torch.aten.mul.int %int4_24215, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24216 = torch.constant.int 4096
    %26710 = torch.prim.ListConstruct %26709, %int4096_24216 : (!torch.int, !torch.int) -> !torch.list<int>
    %26711 = torch.aten.view %26665, %26710 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26711, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26712 = torch.aten.mm %26711, %26678 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26712, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24217 = torch.constant.int 4
    %int512_24218 = torch.constant.int 512
    %26713 = torch.prim.ListConstruct %int4_24217, %2482, %int512_24218 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26714 = torch.aten.view %26712, %26713 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24219 = torch.constant.int 4
    %26715 = torch.aten.mul.int %int4_24219, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24220 = torch.constant.int 4096
    %26716 = torch.prim.ListConstruct %26715, %int4096_24220 : (!torch.int, !torch.int) -> !torch.list<int>
    %26717 = torch.aten.view %26666, %26716 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26717, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26718 = torch.aten.mm %26717, %26680 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26718, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24221 = torch.constant.int 4
    %int512_24222 = torch.constant.int 512
    %26719 = torch.prim.ListConstruct %int4_24221, %2482, %int512_24222 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26720 = torch.aten.view %26718, %26719 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24223 = torch.constant.int 4
    %26721 = torch.aten.mul.int %int4_24223, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24224 = torch.constant.int 4096
    %26722 = torch.prim.ListConstruct %26721, %int4096_24224 : (!torch.int, !torch.int) -> !torch.list<int>
    %26723 = torch.aten.view %26667, %26722 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26723, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26724 = torch.aten.mm %26723, %26682 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26724, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24225 = torch.constant.int 4
    %int512_24226 = torch.constant.int 512
    %26725 = torch.prim.ListConstruct %int4_24225, %2482, %int512_24226 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26726 = torch.aten.view %26724, %26725 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_24227 = torch.constant.int 4
    %26727 = torch.aten.mul.int %int4_24227, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24228 = torch.constant.int 4096
    %26728 = torch.prim.ListConstruct %26727, %int4096_24228 : (!torch.int, !torch.int) -> !torch.list<int>
    %26729 = torch.aten.view %26668, %26728 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26729, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26730 = torch.aten.mm %26729, %26684 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %26730, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_24229 = torch.constant.int 4
    %int512_24230 = torch.constant.int 512
    %26731 = torch.prim.ListConstruct %int4_24229, %2482, %int512_24230 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26732 = torch.aten.view %26730, %26731 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %26732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_24231 = torch.constant.int 1
    %int0_24232 = torch.constant.int 0
    %26733 = torch.prim.ListConstruct %int1_24231, %int0_24232 : (!torch.int, !torch.int) -> !torch.list<int>
    %26734 = torch.aten.permute %960, %26733 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24233 = torch.constant.int 1
    %int0_24234 = torch.constant.int 0
    %26735 = torch.prim.ListConstruct %int1_24233, %int0_24234 : (!torch.int, !torch.int) -> !torch.list<int>
    %26736 = torch.aten.permute %961, %26735 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24235 = torch.constant.int 1
    %int0_24236 = torch.constant.int 0
    %26737 = torch.prim.ListConstruct %int1_24235, %int0_24236 : (!torch.int, !torch.int) -> !torch.list<int>
    %26738 = torch.aten.permute %962, %26737 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24237 = torch.constant.int 1
    %int0_24238 = torch.constant.int 0
    %26739 = torch.prim.ListConstruct %int1_24237, %int0_24238 : (!torch.int, !torch.int) -> !torch.list<int>
    %26740 = torch.aten.permute %963, %26739 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24239 = torch.constant.int 1
    %int0_24240 = torch.constant.int 0
    %26741 = torch.prim.ListConstruct %int1_24239, %int0_24240 : (!torch.int, !torch.int) -> !torch.list<int>
    %26742 = torch.aten.permute %964, %26741 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24241 = torch.constant.int 1
    %int0_24242 = torch.constant.int 0
    %26743 = torch.prim.ListConstruct %int1_24241, %int0_24242 : (!torch.int, !torch.int) -> !torch.list<int>
    %26744 = torch.aten.permute %965, %26743 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24243 = torch.constant.int 1
    %int0_24244 = torch.constant.int 0
    %26745 = torch.prim.ListConstruct %int1_24243, %int0_24244 : (!torch.int, !torch.int) -> !torch.list<int>
    %26746 = torch.aten.permute %966, %26745 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24245 = torch.constant.int 1
    %int0_24246 = torch.constant.int 0
    %26747 = torch.prim.ListConstruct %int1_24245, %int0_24246 : (!torch.int, !torch.int) -> !torch.list<int>
    %26748 = torch.aten.permute %967, %26747 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_24247 = torch.constant.int 4
    %26749 = torch.aten.mul.int %int4_24247, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24248 = torch.constant.int 4096
    %26750 = torch.prim.ListConstruct %26749, %int4096_24248 : (!torch.int, !torch.int) -> !torch.list<int>
    %26751 = torch.aten.view %26661, %26750 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26751, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26752 = torch.aten.mm %26751, %26734 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26752, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24249 = torch.constant.int 4
    %int128_24250 = torch.constant.int 128
    %26753 = torch.prim.ListConstruct %int4_24249, %2482, %int128_24250 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26754 = torch.aten.view %26752, %26753 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24251 = torch.constant.int 4
    %26755 = torch.aten.mul.int %int4_24251, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24252 = torch.constant.int 4096
    %26756 = torch.prim.ListConstruct %26755, %int4096_24252 : (!torch.int, !torch.int) -> !torch.list<int>
    %26757 = torch.aten.view %26662, %26756 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26757, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26758 = torch.aten.mm %26757, %26736 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26758, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24253 = torch.constant.int 4
    %int128_24254 = torch.constant.int 128
    %26759 = torch.prim.ListConstruct %int4_24253, %2482, %int128_24254 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26760 = torch.aten.view %26758, %26759 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24255 = torch.constant.int 4
    %26761 = torch.aten.mul.int %int4_24255, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24256 = torch.constant.int 4096
    %26762 = torch.prim.ListConstruct %26761, %int4096_24256 : (!torch.int, !torch.int) -> !torch.list<int>
    %26763 = torch.aten.view %26663, %26762 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26763, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26764 = torch.aten.mm %26763, %26738 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26764, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24257 = torch.constant.int 4
    %int128_24258 = torch.constant.int 128
    %26765 = torch.prim.ListConstruct %int4_24257, %2482, %int128_24258 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26766 = torch.aten.view %26764, %26765 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24259 = torch.constant.int 4
    %26767 = torch.aten.mul.int %int4_24259, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24260 = torch.constant.int 4096
    %26768 = torch.prim.ListConstruct %26767, %int4096_24260 : (!torch.int, !torch.int) -> !torch.list<int>
    %26769 = torch.aten.view %26664, %26768 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26769, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26770 = torch.aten.mm %26769, %26740 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26770, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24261 = torch.constant.int 4
    %int128_24262 = torch.constant.int 128
    %26771 = torch.prim.ListConstruct %int4_24261, %2482, %int128_24262 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26772 = torch.aten.view %26770, %26771 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24263 = torch.constant.int 4
    %26773 = torch.aten.mul.int %int4_24263, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24264 = torch.constant.int 4096
    %26774 = torch.prim.ListConstruct %26773, %int4096_24264 : (!torch.int, !torch.int) -> !torch.list<int>
    %26775 = torch.aten.view %26665, %26774 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26775, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26776 = torch.aten.mm %26775, %26742 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26776, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24265 = torch.constant.int 4
    %int128_24266 = torch.constant.int 128
    %26777 = torch.prim.ListConstruct %int4_24265, %2482, %int128_24266 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26778 = torch.aten.view %26776, %26777 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24267 = torch.constant.int 4
    %26779 = torch.aten.mul.int %int4_24267, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24268 = torch.constant.int 4096
    %26780 = torch.prim.ListConstruct %26779, %int4096_24268 : (!torch.int, !torch.int) -> !torch.list<int>
    %26781 = torch.aten.view %26666, %26780 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26781, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26782 = torch.aten.mm %26781, %26744 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26782, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24269 = torch.constant.int 4
    %int128_24270 = torch.constant.int 128
    %26783 = torch.prim.ListConstruct %int4_24269, %2482, %int128_24270 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26784 = torch.aten.view %26782, %26783 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24271 = torch.constant.int 4
    %26785 = torch.aten.mul.int %int4_24271, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24272 = torch.constant.int 4096
    %26786 = torch.prim.ListConstruct %26785, %int4096_24272 : (!torch.int, !torch.int) -> !torch.list<int>
    %26787 = torch.aten.view %26667, %26786 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26787, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26788 = torch.aten.mm %26787, %26746 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26788, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24273 = torch.constant.int 4
    %int128_24274 = torch.constant.int 128
    %26789 = torch.prim.ListConstruct %int4_24273, %2482, %int128_24274 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26790 = torch.aten.view %26788, %26789 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24275 = torch.constant.int 4
    %26791 = torch.aten.mul.int %int4_24275, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24276 = torch.constant.int 4096
    %26792 = torch.prim.ListConstruct %26791, %int4096_24276 : (!torch.int, !torch.int) -> !torch.list<int>
    %26793 = torch.aten.view %26668, %26792 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26793, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26794 = torch.aten.mm %26793, %26748 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26794, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24277 = torch.constant.int 4
    %int128_24278 = torch.constant.int 128
    %26795 = torch.prim.ListConstruct %int4_24277, %2482, %int128_24278 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26796 = torch.aten.view %26794, %26795 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_24279 = torch.constant.int 1
    %int0_24280 = torch.constant.int 0
    %26797 = torch.prim.ListConstruct %int1_24279, %int0_24280 : (!torch.int, !torch.int) -> !torch.list<int>
    %26798 = torch.aten.permute %968, %26797 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24281 = torch.constant.int 1
    %int0_24282 = torch.constant.int 0
    %26799 = torch.prim.ListConstruct %int1_24281, %int0_24282 : (!torch.int, !torch.int) -> !torch.list<int>
    %26800 = torch.aten.permute %969, %26799 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24283 = torch.constant.int 1
    %int0_24284 = torch.constant.int 0
    %26801 = torch.prim.ListConstruct %int1_24283, %int0_24284 : (!torch.int, !torch.int) -> !torch.list<int>
    %26802 = torch.aten.permute %970, %26801 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24285 = torch.constant.int 1
    %int0_24286 = torch.constant.int 0
    %26803 = torch.prim.ListConstruct %int1_24285, %int0_24286 : (!torch.int, !torch.int) -> !torch.list<int>
    %26804 = torch.aten.permute %971, %26803 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24287 = torch.constant.int 1
    %int0_24288 = torch.constant.int 0
    %26805 = torch.prim.ListConstruct %int1_24287, %int0_24288 : (!torch.int, !torch.int) -> !torch.list<int>
    %26806 = torch.aten.permute %972, %26805 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24289 = torch.constant.int 1
    %int0_24290 = torch.constant.int 0
    %26807 = torch.prim.ListConstruct %int1_24289, %int0_24290 : (!torch.int, !torch.int) -> !torch.list<int>
    %26808 = torch.aten.permute %973, %26807 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24291 = torch.constant.int 1
    %int0_24292 = torch.constant.int 0
    %26809 = torch.prim.ListConstruct %int1_24291, %int0_24292 : (!torch.int, !torch.int) -> !torch.list<int>
    %26810 = torch.aten.permute %974, %26809 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_24293 = torch.constant.int 1
    %int0_24294 = torch.constant.int 0
    %26811 = torch.prim.ListConstruct %int1_24293, %int0_24294 : (!torch.int, !torch.int) -> !torch.list<int>
    %26812 = torch.aten.permute %975, %26811 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_24295 = torch.constant.int 4
    %26813 = torch.aten.mul.int %int4_24295, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24296 = torch.constant.int 4096
    %26814 = torch.prim.ListConstruct %26813, %int4096_24296 : (!torch.int, !torch.int) -> !torch.list<int>
    %26815 = torch.aten.view %26661, %26814 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26815, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26816 = torch.aten.mm %26815, %26798 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26816, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24297 = torch.constant.int 4
    %int128_24298 = torch.constant.int 128
    %26817 = torch.prim.ListConstruct %int4_24297, %2482, %int128_24298 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26818 = torch.aten.view %26816, %26817 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24299 = torch.constant.int 4
    %26819 = torch.aten.mul.int %int4_24299, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24300 = torch.constant.int 4096
    %26820 = torch.prim.ListConstruct %26819, %int4096_24300 : (!torch.int, !torch.int) -> !torch.list<int>
    %26821 = torch.aten.view %26662, %26820 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26821, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26822 = torch.aten.mm %26821, %26800 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26822, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24301 = torch.constant.int 4
    %int128_24302 = torch.constant.int 128
    %26823 = torch.prim.ListConstruct %int4_24301, %2482, %int128_24302 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26824 = torch.aten.view %26822, %26823 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24303 = torch.constant.int 4
    %26825 = torch.aten.mul.int %int4_24303, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24304 = torch.constant.int 4096
    %26826 = torch.prim.ListConstruct %26825, %int4096_24304 : (!torch.int, !torch.int) -> !torch.list<int>
    %26827 = torch.aten.view %26663, %26826 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26827, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26828 = torch.aten.mm %26827, %26802 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26828, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24305 = torch.constant.int 4
    %int128_24306 = torch.constant.int 128
    %26829 = torch.prim.ListConstruct %int4_24305, %2482, %int128_24306 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26830 = torch.aten.view %26828, %26829 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24307 = torch.constant.int 4
    %26831 = torch.aten.mul.int %int4_24307, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24308 = torch.constant.int 4096
    %26832 = torch.prim.ListConstruct %26831, %int4096_24308 : (!torch.int, !torch.int) -> !torch.list<int>
    %26833 = torch.aten.view %26664, %26832 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26833, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26834 = torch.aten.mm %26833, %26804 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26834, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24309 = torch.constant.int 4
    %int128_24310 = torch.constant.int 128
    %26835 = torch.prim.ListConstruct %int4_24309, %2482, %int128_24310 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26836 = torch.aten.view %26834, %26835 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24311 = torch.constant.int 4
    %26837 = torch.aten.mul.int %int4_24311, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24312 = torch.constant.int 4096
    %26838 = torch.prim.ListConstruct %26837, %int4096_24312 : (!torch.int, !torch.int) -> !torch.list<int>
    %26839 = torch.aten.view %26665, %26838 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26839, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26840 = torch.aten.mm %26839, %26806 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26840, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24313 = torch.constant.int 4
    %int128_24314 = torch.constant.int 128
    %26841 = torch.prim.ListConstruct %int4_24313, %2482, %int128_24314 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26842 = torch.aten.view %26840, %26841 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24315 = torch.constant.int 4
    %26843 = torch.aten.mul.int %int4_24315, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24316 = torch.constant.int 4096
    %26844 = torch.prim.ListConstruct %26843, %int4096_24316 : (!torch.int, !torch.int) -> !torch.list<int>
    %26845 = torch.aten.view %26666, %26844 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26845, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26846 = torch.aten.mm %26845, %26808 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26846, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24317 = torch.constant.int 4
    %int128_24318 = torch.constant.int 128
    %26847 = torch.prim.ListConstruct %int4_24317, %2482, %int128_24318 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26848 = torch.aten.view %26846, %26847 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24319 = torch.constant.int 4
    %26849 = torch.aten.mul.int %int4_24319, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24320 = torch.constant.int 4096
    %26850 = torch.prim.ListConstruct %26849, %int4096_24320 : (!torch.int, !torch.int) -> !torch.list<int>
    %26851 = torch.aten.view %26667, %26850 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26851, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26852 = torch.aten.mm %26851, %26810 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26852, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24321 = torch.constant.int 4
    %int128_24322 = torch.constant.int 128
    %26853 = torch.prim.ListConstruct %int4_24321, %2482, %int128_24322 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26854 = torch.aten.view %26852, %26853 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24323 = torch.constant.int 4
    %26855 = torch.aten.mul.int %int4_24323, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_24324 = torch.constant.int 4096
    %26856 = torch.prim.ListConstruct %26855, %int4096_24324 : (!torch.int, !torch.int) -> !torch.list<int>
    %26857 = torch.aten.view %26668, %26856 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %26857, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %26858 = torch.aten.mm %26857, %26812 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %26858, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_24325 = torch.constant.int 4
    %int128_24326 = torch.constant.int 128
    %26859 = torch.prim.ListConstruct %int4_24325, %2482, %int128_24326 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26860 = torch.aten.view %26858, %26859 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %26860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_24327 = torch.constant.int 4
    %int4_24328 = torch.constant.int 4
    %int128_24329 = torch.constant.int 128
    %26861 = torch.prim.ListConstruct %int4_24327, %2482, %int4_24328, %int128_24329 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26862 = torch.aten.view %26690, %26861 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24330 = torch.constant.int 4
    %int4_24331 = torch.constant.int 4
    %int128_24332 = torch.constant.int 128
    %26863 = torch.prim.ListConstruct %int4_24330, %2482, %int4_24331, %int128_24332 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26864 = torch.aten.view %26696, %26863 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24333 = torch.constant.int 4
    %int4_24334 = torch.constant.int 4
    %int128_24335 = torch.constant.int 128
    %26865 = torch.prim.ListConstruct %int4_24333, %2482, %int4_24334, %int128_24335 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26866 = torch.aten.view %26702, %26865 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24336 = torch.constant.int 4
    %int4_24337 = torch.constant.int 4
    %int128_24338 = torch.constant.int 128
    %26867 = torch.prim.ListConstruct %int4_24336, %2482, %int4_24337, %int128_24338 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26868 = torch.aten.view %26708, %26867 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24339 = torch.constant.int 4
    %int4_24340 = torch.constant.int 4
    %int128_24341 = torch.constant.int 128
    %26869 = torch.prim.ListConstruct %int4_24339, %2482, %int4_24340, %int128_24341 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26870 = torch.aten.view %26714, %26869 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24342 = torch.constant.int 4
    %int4_24343 = torch.constant.int 4
    %int128_24344 = torch.constant.int 128
    %26871 = torch.prim.ListConstruct %int4_24342, %2482, %int4_24343, %int128_24344 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26872 = torch.aten.view %26720, %26871 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24345 = torch.constant.int 4
    %int4_24346 = torch.constant.int 4
    %int128_24347 = torch.constant.int 128
    %26873 = torch.prim.ListConstruct %int4_24345, %2482, %int4_24346, %int128_24347 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26874 = torch.aten.view %26726, %26873 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24348 = torch.constant.int 4
    %int4_24349 = torch.constant.int 4
    %int128_24350 = torch.constant.int 128
    %26875 = torch.prim.ListConstruct %int4_24348, %2482, %int4_24349, %int128_24350 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26876 = torch.aten.view %26732, %26875 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_24351 = torch.constant.int 4
    %int1_24352 = torch.constant.int 1
    %int128_24353 = torch.constant.int 128
    %26877 = torch.prim.ListConstruct %int4_24351, %2482, %int1_24352, %int128_24353 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26878 = torch.aten.view %26754, %26877 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24354 = torch.constant.int 4
    %int1_24355 = torch.constant.int 1
    %int128_24356 = torch.constant.int 128
    %26879 = torch.prim.ListConstruct %int4_24354, %2482, %int1_24355, %int128_24356 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26880 = torch.aten.view %26760, %26879 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24357 = torch.constant.int 4
    %int1_24358 = torch.constant.int 1
    %int128_24359 = torch.constant.int 128
    %26881 = torch.prim.ListConstruct %int4_24357, %2482, %int1_24358, %int128_24359 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26882 = torch.aten.view %26766, %26881 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24360 = torch.constant.int 4
    %int1_24361 = torch.constant.int 1
    %int128_24362 = torch.constant.int 128
    %26883 = torch.prim.ListConstruct %int4_24360, %2482, %int1_24361, %int128_24362 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26884 = torch.aten.view %26772, %26883 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24363 = torch.constant.int 4
    %int1_24364 = torch.constant.int 1
    %int128_24365 = torch.constant.int 128
    %26885 = torch.prim.ListConstruct %int4_24363, %2482, %int1_24364, %int128_24365 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26886 = torch.aten.view %26778, %26885 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24366 = torch.constant.int 4
    %int1_24367 = torch.constant.int 1
    %int128_24368 = torch.constant.int 128
    %26887 = torch.prim.ListConstruct %int4_24366, %2482, %int1_24367, %int128_24368 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26888 = torch.aten.view %26784, %26887 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24369 = torch.constant.int 4
    %int1_24370 = torch.constant.int 1
    %int128_24371 = torch.constant.int 128
    %26889 = torch.prim.ListConstruct %int4_24369, %2482, %int1_24370, %int128_24371 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26890 = torch.aten.view %26790, %26889 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24372 = torch.constant.int 4
    %int1_24373 = torch.constant.int 1
    %int128_24374 = torch.constant.int 128
    %26891 = torch.prim.ListConstruct %int4_24372, %2482, %int1_24373, %int128_24374 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26892 = torch.aten.view %26796, %26891 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24375 = torch.constant.int 4
    %int1_24376 = torch.constant.int 1
    %int128_24377 = torch.constant.int 128
    %26893 = torch.prim.ListConstruct %int4_24375, %2482, %int1_24376, %int128_24377 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26894 = torch.aten.view %26818, %26893 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24378 = torch.constant.int 4
    %int1_24379 = torch.constant.int 1
    %int128_24380 = torch.constant.int 128
    %26895 = torch.prim.ListConstruct %int4_24378, %2482, %int1_24379, %int128_24380 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26896 = torch.aten.view %26824, %26895 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24381 = torch.constant.int 4
    %int1_24382 = torch.constant.int 1
    %int128_24383 = torch.constant.int 128
    %26897 = torch.prim.ListConstruct %int4_24381, %2482, %int1_24382, %int128_24383 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26898 = torch.aten.view %26830, %26897 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24384 = torch.constant.int 4
    %int1_24385 = torch.constant.int 1
    %int128_24386 = torch.constant.int 128
    %26899 = torch.prim.ListConstruct %int4_24384, %2482, %int1_24385, %int128_24386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26900 = torch.aten.view %26836, %26899 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24387 = torch.constant.int 4
    %int1_24388 = torch.constant.int 1
    %int128_24389 = torch.constant.int 128
    %26901 = torch.prim.ListConstruct %int4_24387, %2482, %int1_24388, %int128_24389 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26902 = torch.aten.view %26842, %26901 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24390 = torch.constant.int 4
    %int1_24391 = torch.constant.int 1
    %int128_24392 = torch.constant.int 128
    %26903 = torch.prim.ListConstruct %int4_24390, %2482, %int1_24391, %int128_24392 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26904 = torch.aten.view %26848, %26903 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24393 = torch.constant.int 4
    %int1_24394 = torch.constant.int 1
    %int128_24395 = torch.constant.int 128
    %26905 = torch.prim.ListConstruct %int4_24393, %2482, %int1_24394, %int128_24395 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26906 = torch.aten.view %26854, %26905 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_24396 = torch.constant.int 4
    %int1_24397 = torch.constant.int 1
    %int128_24398 = torch.constant.int 128
    %26907 = torch.prim.ListConstruct %int4_24396, %2482, %int1_24397, %int128_24398 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %26908 = torch.aten.view %26860, %26907 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %26908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_24399 = torch.constant.int 131072
    %none_24400 = torch.constant.none
    %none_24401 = torch.constant.none
    %cpu_24402 = torch.constant.device "cpu"
    %false_24403 = torch.constant.bool false
    %26909 = torch.aten.arange %int131072_24399, %none_24400, %none_24401, %cpu_24402, %false_24403 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_24404 = torch.constant.int 0
    %int128_24405 = torch.constant.int 128
    %int2_24406 = torch.constant.int 2
    %none_24407 = torch.constant.none
    %none_24408 = torch.constant.none
    %cpu_24409 = torch.constant.device "cpu"
    %false_24410 = torch.constant.bool false
    %26910 = torch.aten.arange.start_step %int0_24404, %int128_24405, %int2_24406, %none_24407, %none_24408, %cpu_24409, %false_24410 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_24411 = torch.constant.int 0
    %int0_24412 = torch.constant.int 0
    %int64_24413 = torch.constant.int 64
    %int1_24414 = torch.constant.int 1
    %26911 = torch.aten.slice.Tensor %26910, %int0_24411, %int0_24412, %int64_24413, %int1_24414 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_24415 = torch.constant.int 6
    %26912 = torch.prims.convert_element_type %26911, %int6_24415 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_24416 = torch.constant.int 128
    %26913 = torch.aten.div.Scalar %26912, %int128_24416 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_24417 = torch.constant.float 5.000000e+05
    %26914 = torch.aten.pow.Scalar %float5.000000e05_24417, %26913 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %26915 = torch.aten.reciprocal %26914 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_24418 = torch.constant.float 1.000000e+00
    %26916 = torch.aten.mul.Scalar %26915, %float1.000000e00_24418 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_24419 = torch.constant.int 131072
    %int1_24420 = torch.constant.int 1
    %26917 = torch.prim.ListConstruct %int131072_24419, %int1_24420 : (!torch.int, !torch.int) -> !torch.list<int>
    %26918 = torch.aten.view %26909, %26917 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %26919 = torch.aten.mul.Tensor %26918, %26916 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %26920 = torch.aten.cos %26919 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %26921 = torch.aten.sin %26919 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %26922 = torch.aten.complex %26920, %26921 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %26923 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26924 = flow.tensor.transfer %26923 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %26925 = torch_c.from_builtin_tensor %26924 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26926 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26927 = flow.tensor.transfer %26926 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %26928 = torch_c.from_builtin_tensor %26927 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26929 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26930 = flow.tensor.transfer %26929 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %26931 = torch_c.from_builtin_tensor %26930 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26932 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26933 = flow.tensor.transfer %26932 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %26934 = torch_c.from_builtin_tensor %26933 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26935 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26936 = flow.tensor.transfer %26935 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %26937 = torch_c.from_builtin_tensor %26936 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26938 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26939 = flow.tensor.transfer %26938 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %26940 = torch_c.from_builtin_tensor %26939 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26941 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26942 = flow.tensor.transfer %26941 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %26943 = torch_c.from_builtin_tensor %26942 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %26944 = torch_c.to_builtin_tensor %26922 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %26945 = flow.tensor.transfer %26944 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %26946 = torch_c.from_builtin_tensor %26945 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_24421 = torch.constant.int 1
    %26947 = torch.aten.size.int %26690, %int1_24421 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24422 = torch.constant.int 0
    %26948 = torch.aten.add.int %int0_24422, %26947 : !torch.int, !torch.int -> !torch.int
    %int0_24423 = torch.constant.int 0
    %int0_24424 = torch.constant.int 0
    %int1_24425 = torch.constant.int 1
    %26949 = torch.aten.slice.Tensor %26925, %int0_24423, %int0_24424, %26948, %int1_24425 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26949, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24426 = torch.constant.int 1
    %int0_24427 = torch.constant.int 0
    %int9223372036854775807_24428 = torch.constant.int 9223372036854775807
    %int1_24429 = torch.constant.int 1
    %26950 = torch.aten.slice.Tensor %26949, %int1_24426, %int0_24427, %int9223372036854775807_24428, %int1_24429 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26950, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24430 = torch.constant.int 0
    %26951 = torch.aten.unsqueeze %26950, %int0_24430 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %26951, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24431 = torch.constant.int 2
    %26952 = torch.aten.unsqueeze %26951, %int2_24431 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26952, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24432 = torch.constant.int 3
    %int0_24433 = torch.constant.int 0
    %int9223372036854775807_24434 = torch.constant.int 9223372036854775807
    %int1_24435 = torch.constant.int 1
    %26953 = torch.aten.slice.Tensor %26952, %int3_24432, %int0_24433, %int9223372036854775807_24434, %int1_24435 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26953, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %26954 = torch_c.to_builtin_tensor %26862 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24436 = arith.constant 1 : index
    %dim_24437 = tensor.dim %26954, %c1_24436 : tensor<4x?x4x128xf16>
    %26955 = flow.tensor.bitcast %26954 : tensor<4x?x4x128xf16>{%dim_24437} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24437}
    %26956 = torch_c.from_builtin_tensor %26955 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %26956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %26957 = torch.aten.mul.Tensor %26956, %26953 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %26957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %26958 = torch_c.to_builtin_tensor %26957 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24438 = arith.constant 1 : index
    %dim_24439 = tensor.dim %26958, %c1_24438 : tensor<4x?x4x64xcomplex<f32>>
    %26959 = flow.tensor.bitcast %26958 : tensor<4x?x4x64xcomplex<f32>>{%dim_24439} -> tensor<4x?x4x128xf32>{%dim_24439}
    %26960 = torch_c.from_builtin_tensor %26959 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %26960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24440 = torch.constant.int 5
    %26961 = torch.prims.convert_element_type %26960, %int5_24440 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24441 = torch.constant.int 1
    %26962 = torch.aten.size.int %26696, %int1_24441 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24442 = torch.constant.int 0
    %26963 = torch.aten.add.int %int0_24442, %26962 : !torch.int, !torch.int -> !torch.int
    %int0_24443 = torch.constant.int 0
    %int0_24444 = torch.constant.int 0
    %int1_24445 = torch.constant.int 1
    %26964 = torch.aten.slice.Tensor %26928, %int0_24443, %int0_24444, %26963, %int1_24445 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26964, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24446 = torch.constant.int 1
    %int0_24447 = torch.constant.int 0
    %int9223372036854775807_24448 = torch.constant.int 9223372036854775807
    %int1_24449 = torch.constant.int 1
    %26965 = torch.aten.slice.Tensor %26964, %int1_24446, %int0_24447, %int9223372036854775807_24448, %int1_24449 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26965, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24450 = torch.constant.int 0
    %26966 = torch.aten.unsqueeze %26965, %int0_24450 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %26966, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24451 = torch.constant.int 2
    %26967 = torch.aten.unsqueeze %26966, %int2_24451 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26967, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24452 = torch.constant.int 3
    %int0_24453 = torch.constant.int 0
    %int9223372036854775807_24454 = torch.constant.int 9223372036854775807
    %int1_24455 = torch.constant.int 1
    %26968 = torch.aten.slice.Tensor %26967, %int3_24452, %int0_24453, %int9223372036854775807_24454, %int1_24455 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26968, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %26969 = torch_c.to_builtin_tensor %26864 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24456 = arith.constant 1 : index
    %dim_24457 = tensor.dim %26969, %c1_24456 : tensor<4x?x4x128xf16>
    %26970 = flow.tensor.bitcast %26969 : tensor<4x?x4x128xf16>{%dim_24457} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24457}
    %26971 = torch_c.from_builtin_tensor %26970 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %26971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %26972 = torch.aten.mul.Tensor %26971, %26968 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %26972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %26973 = torch_c.to_builtin_tensor %26972 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24458 = arith.constant 1 : index
    %dim_24459 = tensor.dim %26973, %c1_24458 : tensor<4x?x4x64xcomplex<f32>>
    %26974 = flow.tensor.bitcast %26973 : tensor<4x?x4x64xcomplex<f32>>{%dim_24459} -> tensor<4x?x4x128xf32>{%dim_24459}
    %26975 = torch_c.from_builtin_tensor %26974 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %26975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24460 = torch.constant.int 5
    %26976 = torch.prims.convert_element_type %26975, %int5_24460 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24461 = torch.constant.int 1
    %26977 = torch.aten.size.int %26702, %int1_24461 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24462 = torch.constant.int 0
    %26978 = torch.aten.add.int %int0_24462, %26977 : !torch.int, !torch.int -> !torch.int
    %int0_24463 = torch.constant.int 0
    %int0_24464 = torch.constant.int 0
    %int1_24465 = torch.constant.int 1
    %26979 = torch.aten.slice.Tensor %26931, %int0_24463, %int0_24464, %26978, %int1_24465 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26979, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24466 = torch.constant.int 1
    %int0_24467 = torch.constant.int 0
    %int9223372036854775807_24468 = torch.constant.int 9223372036854775807
    %int1_24469 = torch.constant.int 1
    %26980 = torch.aten.slice.Tensor %26979, %int1_24466, %int0_24467, %int9223372036854775807_24468, %int1_24469 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26980, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24470 = torch.constant.int 0
    %26981 = torch.aten.unsqueeze %26980, %int0_24470 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %26981, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24471 = torch.constant.int 2
    %26982 = torch.aten.unsqueeze %26981, %int2_24471 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26982, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24472 = torch.constant.int 3
    %int0_24473 = torch.constant.int 0
    %int9223372036854775807_24474 = torch.constant.int 9223372036854775807
    %int1_24475 = torch.constant.int 1
    %26983 = torch.aten.slice.Tensor %26982, %int3_24472, %int0_24473, %int9223372036854775807_24474, %int1_24475 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26983, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %26984 = torch_c.to_builtin_tensor %26866 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24476 = arith.constant 1 : index
    %dim_24477 = tensor.dim %26984, %c1_24476 : tensor<4x?x4x128xf16>
    %26985 = flow.tensor.bitcast %26984 : tensor<4x?x4x128xf16>{%dim_24477} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24477}
    %26986 = torch_c.from_builtin_tensor %26985 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %26986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %26987 = torch.aten.mul.Tensor %26986, %26983 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %26987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %26988 = torch_c.to_builtin_tensor %26987 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24478 = arith.constant 1 : index
    %dim_24479 = tensor.dim %26988, %c1_24478 : tensor<4x?x4x64xcomplex<f32>>
    %26989 = flow.tensor.bitcast %26988 : tensor<4x?x4x64xcomplex<f32>>{%dim_24479} -> tensor<4x?x4x128xf32>{%dim_24479}
    %26990 = torch_c.from_builtin_tensor %26989 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %26990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24480 = torch.constant.int 5
    %26991 = torch.prims.convert_element_type %26990, %int5_24480 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %26991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24481 = torch.constant.int 1
    %26992 = torch.aten.size.int %26708, %int1_24481 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24482 = torch.constant.int 0
    %26993 = torch.aten.add.int %int0_24482, %26992 : !torch.int, !torch.int -> !torch.int
    %int0_24483 = torch.constant.int 0
    %int0_24484 = torch.constant.int 0
    %int1_24485 = torch.constant.int 1
    %26994 = torch.aten.slice.Tensor %26934, %int0_24483, %int0_24484, %26993, %int1_24485 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26994, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24486 = torch.constant.int 1
    %int0_24487 = torch.constant.int 0
    %int9223372036854775807_24488 = torch.constant.int 9223372036854775807
    %int1_24489 = torch.constant.int 1
    %26995 = torch.aten.slice.Tensor %26994, %int1_24486, %int0_24487, %int9223372036854775807_24488, %int1_24489 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %26995, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24490 = torch.constant.int 0
    %26996 = torch.aten.unsqueeze %26995, %int0_24490 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %26996, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24491 = torch.constant.int 2
    %26997 = torch.aten.unsqueeze %26996, %int2_24491 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26997, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24492 = torch.constant.int 3
    %int0_24493 = torch.constant.int 0
    %int9223372036854775807_24494 = torch.constant.int 9223372036854775807
    %int1_24495 = torch.constant.int 1
    %26998 = torch.aten.slice.Tensor %26997, %int3_24492, %int0_24493, %int9223372036854775807_24494, %int1_24495 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %26998, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %26999 = torch_c.to_builtin_tensor %26868 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24496 = arith.constant 1 : index
    %dim_24497 = tensor.dim %26999, %c1_24496 : tensor<4x?x4x128xf16>
    %27000 = flow.tensor.bitcast %26999 : tensor<4x?x4x128xf16>{%dim_24497} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24497}
    %27001 = torch_c.from_builtin_tensor %27000 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %27001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %27002 = torch.aten.mul.Tensor %27001, %26998 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %27002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %27003 = torch_c.to_builtin_tensor %27002 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24498 = arith.constant 1 : index
    %dim_24499 = tensor.dim %27003, %c1_24498 : tensor<4x?x4x64xcomplex<f32>>
    %27004 = flow.tensor.bitcast %27003 : tensor<4x?x4x64xcomplex<f32>>{%dim_24499} -> tensor<4x?x4x128xf32>{%dim_24499}
    %27005 = torch_c.from_builtin_tensor %27004 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %27005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24500 = torch.constant.int 5
    %27006 = torch.prims.convert_element_type %27005, %int5_24500 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24501 = torch.constant.int 1
    %27007 = torch.aten.size.int %26714, %int1_24501 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24502 = torch.constant.int 0
    %27008 = torch.aten.add.int %int0_24502, %27007 : !torch.int, !torch.int -> !torch.int
    %int0_24503 = torch.constant.int 0
    %int0_24504 = torch.constant.int 0
    %int1_24505 = torch.constant.int 1
    %27009 = torch.aten.slice.Tensor %26937, %int0_24503, %int0_24504, %27008, %int1_24505 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27009, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24506 = torch.constant.int 1
    %int0_24507 = torch.constant.int 0
    %int9223372036854775807_24508 = torch.constant.int 9223372036854775807
    %int1_24509 = torch.constant.int 1
    %27010 = torch.aten.slice.Tensor %27009, %int1_24506, %int0_24507, %int9223372036854775807_24508, %int1_24509 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27010, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24510 = torch.constant.int 0
    %27011 = torch.aten.unsqueeze %27010, %int0_24510 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27011, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24511 = torch.constant.int 2
    %27012 = torch.aten.unsqueeze %27011, %int2_24511 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27012, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24512 = torch.constant.int 3
    %int0_24513 = torch.constant.int 0
    %int9223372036854775807_24514 = torch.constant.int 9223372036854775807
    %int1_24515 = torch.constant.int 1
    %27013 = torch.aten.slice.Tensor %27012, %int3_24512, %int0_24513, %int9223372036854775807_24514, %int1_24515 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27013, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27014 = torch_c.to_builtin_tensor %26870 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24516 = arith.constant 1 : index
    %dim_24517 = tensor.dim %27014, %c1_24516 : tensor<4x?x4x128xf16>
    %27015 = flow.tensor.bitcast %27014 : tensor<4x?x4x128xf16>{%dim_24517} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24517}
    %27016 = torch_c.from_builtin_tensor %27015 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %27016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %27017 = torch.aten.mul.Tensor %27016, %27013 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %27017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %27018 = torch_c.to_builtin_tensor %27017 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24518 = arith.constant 1 : index
    %dim_24519 = tensor.dim %27018, %c1_24518 : tensor<4x?x4x64xcomplex<f32>>
    %27019 = flow.tensor.bitcast %27018 : tensor<4x?x4x64xcomplex<f32>>{%dim_24519} -> tensor<4x?x4x128xf32>{%dim_24519}
    %27020 = torch_c.from_builtin_tensor %27019 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %27020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24520 = torch.constant.int 5
    %27021 = torch.prims.convert_element_type %27020, %int5_24520 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24521 = torch.constant.int 1
    %27022 = torch.aten.size.int %26720, %int1_24521 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24522 = torch.constant.int 0
    %27023 = torch.aten.add.int %int0_24522, %27022 : !torch.int, !torch.int -> !torch.int
    %int0_24523 = torch.constant.int 0
    %int0_24524 = torch.constant.int 0
    %int1_24525 = torch.constant.int 1
    %27024 = torch.aten.slice.Tensor %26940, %int0_24523, %int0_24524, %27023, %int1_24525 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27024, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24526 = torch.constant.int 1
    %int0_24527 = torch.constant.int 0
    %int9223372036854775807_24528 = torch.constant.int 9223372036854775807
    %int1_24529 = torch.constant.int 1
    %27025 = torch.aten.slice.Tensor %27024, %int1_24526, %int0_24527, %int9223372036854775807_24528, %int1_24529 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27025, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24530 = torch.constant.int 0
    %27026 = torch.aten.unsqueeze %27025, %int0_24530 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27026, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24531 = torch.constant.int 2
    %27027 = torch.aten.unsqueeze %27026, %int2_24531 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27027, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24532 = torch.constant.int 3
    %int0_24533 = torch.constant.int 0
    %int9223372036854775807_24534 = torch.constant.int 9223372036854775807
    %int1_24535 = torch.constant.int 1
    %27028 = torch.aten.slice.Tensor %27027, %int3_24532, %int0_24533, %int9223372036854775807_24534, %int1_24535 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27028, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27029 = torch_c.to_builtin_tensor %26872 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24536 = arith.constant 1 : index
    %dim_24537 = tensor.dim %27029, %c1_24536 : tensor<4x?x4x128xf16>
    %27030 = flow.tensor.bitcast %27029 : tensor<4x?x4x128xf16>{%dim_24537} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24537}
    %27031 = torch_c.from_builtin_tensor %27030 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %27031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %27032 = torch.aten.mul.Tensor %27031, %27028 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %27032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %27033 = torch_c.to_builtin_tensor %27032 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24538 = arith.constant 1 : index
    %dim_24539 = tensor.dim %27033, %c1_24538 : tensor<4x?x4x64xcomplex<f32>>
    %27034 = flow.tensor.bitcast %27033 : tensor<4x?x4x64xcomplex<f32>>{%dim_24539} -> tensor<4x?x4x128xf32>{%dim_24539}
    %27035 = torch_c.from_builtin_tensor %27034 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %27035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24540 = torch.constant.int 5
    %27036 = torch.prims.convert_element_type %27035, %int5_24540 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24541 = torch.constant.int 1
    %27037 = torch.aten.size.int %26726, %int1_24541 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24542 = torch.constant.int 0
    %27038 = torch.aten.add.int %int0_24542, %27037 : !torch.int, !torch.int -> !torch.int
    %int0_24543 = torch.constant.int 0
    %int0_24544 = torch.constant.int 0
    %int1_24545 = torch.constant.int 1
    %27039 = torch.aten.slice.Tensor %26943, %int0_24543, %int0_24544, %27038, %int1_24545 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27039, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24546 = torch.constant.int 1
    %int0_24547 = torch.constant.int 0
    %int9223372036854775807_24548 = torch.constant.int 9223372036854775807
    %int1_24549 = torch.constant.int 1
    %27040 = torch.aten.slice.Tensor %27039, %int1_24546, %int0_24547, %int9223372036854775807_24548, %int1_24549 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27040, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24550 = torch.constant.int 0
    %27041 = torch.aten.unsqueeze %27040, %int0_24550 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27041, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24551 = torch.constant.int 2
    %27042 = torch.aten.unsqueeze %27041, %int2_24551 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27042, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24552 = torch.constant.int 3
    %int0_24553 = torch.constant.int 0
    %int9223372036854775807_24554 = torch.constant.int 9223372036854775807
    %int1_24555 = torch.constant.int 1
    %27043 = torch.aten.slice.Tensor %27042, %int3_24552, %int0_24553, %int9223372036854775807_24554, %int1_24555 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27043, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27044 = torch_c.to_builtin_tensor %26874 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24556 = arith.constant 1 : index
    %dim_24557 = tensor.dim %27044, %c1_24556 : tensor<4x?x4x128xf16>
    %27045 = flow.tensor.bitcast %27044 : tensor<4x?x4x128xf16>{%dim_24557} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24557}
    %27046 = torch_c.from_builtin_tensor %27045 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %27046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %27047 = torch.aten.mul.Tensor %27046, %27043 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %27047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %27048 = torch_c.to_builtin_tensor %27047 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24558 = arith.constant 1 : index
    %dim_24559 = tensor.dim %27048, %c1_24558 : tensor<4x?x4x64xcomplex<f32>>
    %27049 = flow.tensor.bitcast %27048 : tensor<4x?x4x64xcomplex<f32>>{%dim_24559} -> tensor<4x?x4x128xf32>{%dim_24559}
    %27050 = torch_c.from_builtin_tensor %27049 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %27050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24560 = torch.constant.int 5
    %27051 = torch.prims.convert_element_type %27050, %int5_24560 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_24561 = torch.constant.int 1
    %27052 = torch.aten.size.int %26732, %int1_24561 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_24562 = torch.constant.int 0
    %27053 = torch.aten.add.int %int0_24562, %27052 : !torch.int, !torch.int -> !torch.int
    %int0_24563 = torch.constant.int 0
    %int0_24564 = torch.constant.int 0
    %int1_24565 = torch.constant.int 1
    %27054 = torch.aten.slice.Tensor %26946, %int0_24563, %int0_24564, %27053, %int1_24565 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27054, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24566 = torch.constant.int 1
    %int0_24567 = torch.constant.int 0
    %int9223372036854775807_24568 = torch.constant.int 9223372036854775807
    %int1_24569 = torch.constant.int 1
    %27055 = torch.aten.slice.Tensor %27054, %int1_24566, %int0_24567, %int9223372036854775807_24568, %int1_24569 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27055, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24570 = torch.constant.int 0
    %27056 = torch.aten.unsqueeze %27055, %int0_24570 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27056, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24571 = torch.constant.int 2
    %27057 = torch.aten.unsqueeze %27056, %int2_24571 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27057, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24572 = torch.constant.int 3
    %int0_24573 = torch.constant.int 0
    %int9223372036854775807_24574 = torch.constant.int 9223372036854775807
    %int1_24575 = torch.constant.int 1
    %27058 = torch.aten.slice.Tensor %27057, %int3_24572, %int0_24573, %int9223372036854775807_24574, %int1_24575 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27058, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27059 = torch_c.to_builtin_tensor %26876 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_24576 = arith.constant 1 : index
    %dim_24577 = tensor.dim %27059, %c1_24576 : tensor<4x?x4x128xf16>
    %27060 = flow.tensor.bitcast %27059 : tensor<4x?x4x128xf16>{%dim_24577} -> tensor<4x?x4x64xcomplex<f16>>{%dim_24577}
    %27061 = torch_c.from_builtin_tensor %27060 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %27061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %27062 = torch.aten.mul.Tensor %27061, %27058 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %27062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %27063 = torch_c.to_builtin_tensor %27062 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_24578 = arith.constant 1 : index
    %dim_24579 = tensor.dim %27063, %c1_24578 : tensor<4x?x4x64xcomplex<f32>>
    %27064 = flow.tensor.bitcast %27063 : tensor<4x?x4x64xcomplex<f32>>{%dim_24579} -> tensor<4x?x4x128xf32>{%dim_24579}
    %27065 = torch_c.from_builtin_tensor %27064 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %27065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_24580 = torch.constant.int 5
    %27066 = torch.prims.convert_element_type %27065, %int5_24580 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_24581 = torch.constant.int 131072
    %none_24582 = torch.constant.none
    %none_24583 = torch.constant.none
    %cpu_24584 = torch.constant.device "cpu"
    %false_24585 = torch.constant.bool false
    %27067 = torch.aten.arange %int131072_24581, %none_24582, %none_24583, %cpu_24584, %false_24585 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_24586 = torch.constant.int 0
    %int128_24587 = torch.constant.int 128
    %int2_24588 = torch.constant.int 2
    %none_24589 = torch.constant.none
    %none_24590 = torch.constant.none
    %cpu_24591 = torch.constant.device "cpu"
    %false_24592 = torch.constant.bool false
    %27068 = torch.aten.arange.start_step %int0_24586, %int128_24587, %int2_24588, %none_24589, %none_24590, %cpu_24591, %false_24592 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_24593 = torch.constant.int 0
    %int0_24594 = torch.constant.int 0
    %int64_24595 = torch.constant.int 64
    %int1_24596 = torch.constant.int 1
    %27069 = torch.aten.slice.Tensor %27068, %int0_24593, %int0_24594, %int64_24595, %int1_24596 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_24597 = torch.constant.int 6
    %27070 = torch.prims.convert_element_type %27069, %int6_24597 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_24598 = torch.constant.int 128
    %27071 = torch.aten.div.Scalar %27070, %int128_24598 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_24599 = torch.constant.float 5.000000e+05
    %27072 = torch.aten.pow.Scalar %float5.000000e05_24599, %27071 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %27073 = torch.aten.reciprocal %27072 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_24600 = torch.constant.float 1.000000e+00
    %27074 = torch.aten.mul.Scalar %27073, %float1.000000e00_24600 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_24601 = torch.constant.int 131072
    %int1_24602 = torch.constant.int 1
    %27075 = torch.prim.ListConstruct %int131072_24601, %int1_24602 : (!torch.int, !torch.int) -> !torch.list<int>
    %27076 = torch.aten.view %27067, %27075 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %27077 = torch.aten.mul.Tensor %27076, %27074 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %27078 = torch.aten.cos %27077 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %27079 = torch.aten.sin %27077 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %27080 = torch.aten.complex %27078, %27079 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %27081 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27082 = flow.tensor.transfer %27081 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %27083 = torch_c.from_builtin_tensor %27082 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27084 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27085 = flow.tensor.transfer %27084 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %27086 = torch_c.from_builtin_tensor %27085 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27087 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27088 = flow.tensor.transfer %27087 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %27089 = torch_c.from_builtin_tensor %27088 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27090 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27091 = flow.tensor.transfer %27090 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %27092 = torch_c.from_builtin_tensor %27091 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27093 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27094 = flow.tensor.transfer %27093 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %27095 = torch_c.from_builtin_tensor %27094 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27096 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27097 = flow.tensor.transfer %27096 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %27098 = torch_c.from_builtin_tensor %27097 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27099 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27100 = flow.tensor.transfer %27099 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %27101 = torch_c.from_builtin_tensor %27100 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %27102 = torch_c.to_builtin_tensor %27080 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %27103 = flow.tensor.transfer %27102 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %27104 = torch_c.from_builtin_tensor %27103 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_24603 = torch.constant.int 1
    %27105 = torch.aten.size.int %26754, %int1_24603 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24604 = torch.constant.int 0
    %27106 = torch.aten.add.int %int0_24604, %27105 : !torch.int, !torch.int -> !torch.int
    %int0_24605 = torch.constant.int 0
    %int0_24606 = torch.constant.int 0
    %int1_24607 = torch.constant.int 1
    %27107 = torch.aten.slice.Tensor %27083, %int0_24605, %int0_24606, %27106, %int1_24607 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27107, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24608 = torch.constant.int 1
    %int0_24609 = torch.constant.int 0
    %int9223372036854775807_24610 = torch.constant.int 9223372036854775807
    %int1_24611 = torch.constant.int 1
    %27108 = torch.aten.slice.Tensor %27107, %int1_24608, %int0_24609, %int9223372036854775807_24610, %int1_24611 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27108, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24612 = torch.constant.int 0
    %27109 = torch.aten.unsqueeze %27108, %int0_24612 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27109, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24613 = torch.constant.int 2
    %27110 = torch.aten.unsqueeze %27109, %int2_24613 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27110, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24614 = torch.constant.int 3
    %int0_24615 = torch.constant.int 0
    %int9223372036854775807_24616 = torch.constant.int 9223372036854775807
    %int1_24617 = torch.constant.int 1
    %27111 = torch.aten.slice.Tensor %27110, %int3_24614, %int0_24615, %int9223372036854775807_24616, %int1_24617 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27111, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27112 = torch_c.to_builtin_tensor %26878 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24618 = arith.constant 1 : index
    %dim_24619 = tensor.dim %27112, %c1_24618 : tensor<4x?x1x128xf16>
    %27113 = flow.tensor.bitcast %27112 : tensor<4x?x1x128xf16>{%dim_24619} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24619}
    %27114 = torch_c.from_builtin_tensor %27113 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27115 = torch.aten.mul.Tensor %27114, %27111 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27116 = torch_c.to_builtin_tensor %27115 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24620 = arith.constant 1 : index
    %dim_24621 = tensor.dim %27116, %c1_24620 : tensor<4x?x1x64xcomplex<f32>>
    %27117 = flow.tensor.bitcast %27116 : tensor<4x?x1x64xcomplex<f32>>{%dim_24621} -> tensor<4x?x1x128xf32>{%dim_24621}
    %27118 = torch_c.from_builtin_tensor %27117 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24622 = torch.constant.int 5
    %27119 = torch.prims.convert_element_type %27118, %int5_24622 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24623 = torch.constant.int 1
    %27120 = torch.aten.size.int %26760, %int1_24623 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24624 = torch.constant.int 0
    %27121 = torch.aten.add.int %int0_24624, %27120 : !torch.int, !torch.int -> !torch.int
    %int0_24625 = torch.constant.int 0
    %int0_24626 = torch.constant.int 0
    %int1_24627 = torch.constant.int 1
    %27122 = torch.aten.slice.Tensor %27086, %int0_24625, %int0_24626, %27121, %int1_24627 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27122, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24628 = torch.constant.int 1
    %int0_24629 = torch.constant.int 0
    %int9223372036854775807_24630 = torch.constant.int 9223372036854775807
    %int1_24631 = torch.constant.int 1
    %27123 = torch.aten.slice.Tensor %27122, %int1_24628, %int0_24629, %int9223372036854775807_24630, %int1_24631 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27123, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24632 = torch.constant.int 0
    %27124 = torch.aten.unsqueeze %27123, %int0_24632 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27124, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24633 = torch.constant.int 2
    %27125 = torch.aten.unsqueeze %27124, %int2_24633 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27125, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24634 = torch.constant.int 3
    %int0_24635 = torch.constant.int 0
    %int9223372036854775807_24636 = torch.constant.int 9223372036854775807
    %int1_24637 = torch.constant.int 1
    %27126 = torch.aten.slice.Tensor %27125, %int3_24634, %int0_24635, %int9223372036854775807_24636, %int1_24637 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27126, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27127 = torch_c.to_builtin_tensor %26880 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24638 = arith.constant 1 : index
    %dim_24639 = tensor.dim %27127, %c1_24638 : tensor<4x?x1x128xf16>
    %27128 = flow.tensor.bitcast %27127 : tensor<4x?x1x128xf16>{%dim_24639} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24639}
    %27129 = torch_c.from_builtin_tensor %27128 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27130 = torch.aten.mul.Tensor %27129, %27126 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27131 = torch_c.to_builtin_tensor %27130 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24640 = arith.constant 1 : index
    %dim_24641 = tensor.dim %27131, %c1_24640 : tensor<4x?x1x64xcomplex<f32>>
    %27132 = flow.tensor.bitcast %27131 : tensor<4x?x1x64xcomplex<f32>>{%dim_24641} -> tensor<4x?x1x128xf32>{%dim_24641}
    %27133 = torch_c.from_builtin_tensor %27132 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24642 = torch.constant.int 5
    %27134 = torch.prims.convert_element_type %27133, %int5_24642 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24643 = torch.constant.int 1
    %27135 = torch.aten.size.int %26766, %int1_24643 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24644 = torch.constant.int 0
    %27136 = torch.aten.add.int %int0_24644, %27135 : !torch.int, !torch.int -> !torch.int
    %int0_24645 = torch.constant.int 0
    %int0_24646 = torch.constant.int 0
    %int1_24647 = torch.constant.int 1
    %27137 = torch.aten.slice.Tensor %27089, %int0_24645, %int0_24646, %27136, %int1_24647 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27137, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24648 = torch.constant.int 1
    %int0_24649 = torch.constant.int 0
    %int9223372036854775807_24650 = torch.constant.int 9223372036854775807
    %int1_24651 = torch.constant.int 1
    %27138 = torch.aten.slice.Tensor %27137, %int1_24648, %int0_24649, %int9223372036854775807_24650, %int1_24651 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27138, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24652 = torch.constant.int 0
    %27139 = torch.aten.unsqueeze %27138, %int0_24652 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27139, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24653 = torch.constant.int 2
    %27140 = torch.aten.unsqueeze %27139, %int2_24653 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27140, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24654 = torch.constant.int 3
    %int0_24655 = torch.constant.int 0
    %int9223372036854775807_24656 = torch.constant.int 9223372036854775807
    %int1_24657 = torch.constant.int 1
    %27141 = torch.aten.slice.Tensor %27140, %int3_24654, %int0_24655, %int9223372036854775807_24656, %int1_24657 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27141, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27142 = torch_c.to_builtin_tensor %26882 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24658 = arith.constant 1 : index
    %dim_24659 = tensor.dim %27142, %c1_24658 : tensor<4x?x1x128xf16>
    %27143 = flow.tensor.bitcast %27142 : tensor<4x?x1x128xf16>{%dim_24659} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24659}
    %27144 = torch_c.from_builtin_tensor %27143 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27145 = torch.aten.mul.Tensor %27144, %27141 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27146 = torch_c.to_builtin_tensor %27145 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24660 = arith.constant 1 : index
    %dim_24661 = tensor.dim %27146, %c1_24660 : tensor<4x?x1x64xcomplex<f32>>
    %27147 = flow.tensor.bitcast %27146 : tensor<4x?x1x64xcomplex<f32>>{%dim_24661} -> tensor<4x?x1x128xf32>{%dim_24661}
    %27148 = torch_c.from_builtin_tensor %27147 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24662 = torch.constant.int 5
    %27149 = torch.prims.convert_element_type %27148, %int5_24662 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24663 = torch.constant.int 1
    %27150 = torch.aten.size.int %26772, %int1_24663 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24664 = torch.constant.int 0
    %27151 = torch.aten.add.int %int0_24664, %27150 : !torch.int, !torch.int -> !torch.int
    %int0_24665 = torch.constant.int 0
    %int0_24666 = torch.constant.int 0
    %int1_24667 = torch.constant.int 1
    %27152 = torch.aten.slice.Tensor %27092, %int0_24665, %int0_24666, %27151, %int1_24667 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27152, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24668 = torch.constant.int 1
    %int0_24669 = torch.constant.int 0
    %int9223372036854775807_24670 = torch.constant.int 9223372036854775807
    %int1_24671 = torch.constant.int 1
    %27153 = torch.aten.slice.Tensor %27152, %int1_24668, %int0_24669, %int9223372036854775807_24670, %int1_24671 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27153, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24672 = torch.constant.int 0
    %27154 = torch.aten.unsqueeze %27153, %int0_24672 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27154, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24673 = torch.constant.int 2
    %27155 = torch.aten.unsqueeze %27154, %int2_24673 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27155, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24674 = torch.constant.int 3
    %int0_24675 = torch.constant.int 0
    %int9223372036854775807_24676 = torch.constant.int 9223372036854775807
    %int1_24677 = torch.constant.int 1
    %27156 = torch.aten.slice.Tensor %27155, %int3_24674, %int0_24675, %int9223372036854775807_24676, %int1_24677 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27156, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27157 = torch_c.to_builtin_tensor %26884 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24678 = arith.constant 1 : index
    %dim_24679 = tensor.dim %27157, %c1_24678 : tensor<4x?x1x128xf16>
    %27158 = flow.tensor.bitcast %27157 : tensor<4x?x1x128xf16>{%dim_24679} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24679}
    %27159 = torch_c.from_builtin_tensor %27158 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27160 = torch.aten.mul.Tensor %27159, %27156 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27161 = torch_c.to_builtin_tensor %27160 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24680 = arith.constant 1 : index
    %dim_24681 = tensor.dim %27161, %c1_24680 : tensor<4x?x1x64xcomplex<f32>>
    %27162 = flow.tensor.bitcast %27161 : tensor<4x?x1x64xcomplex<f32>>{%dim_24681} -> tensor<4x?x1x128xf32>{%dim_24681}
    %27163 = torch_c.from_builtin_tensor %27162 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24682 = torch.constant.int 5
    %27164 = torch.prims.convert_element_type %27163, %int5_24682 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24683 = torch.constant.int 1
    %27165 = torch.aten.size.int %26778, %int1_24683 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24684 = torch.constant.int 0
    %27166 = torch.aten.add.int %int0_24684, %27165 : !torch.int, !torch.int -> !torch.int
    %int0_24685 = torch.constant.int 0
    %int0_24686 = torch.constant.int 0
    %int1_24687 = torch.constant.int 1
    %27167 = torch.aten.slice.Tensor %27095, %int0_24685, %int0_24686, %27166, %int1_24687 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27167, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24688 = torch.constant.int 1
    %int0_24689 = torch.constant.int 0
    %int9223372036854775807_24690 = torch.constant.int 9223372036854775807
    %int1_24691 = torch.constant.int 1
    %27168 = torch.aten.slice.Tensor %27167, %int1_24688, %int0_24689, %int9223372036854775807_24690, %int1_24691 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27168, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24692 = torch.constant.int 0
    %27169 = torch.aten.unsqueeze %27168, %int0_24692 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27169, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24693 = torch.constant.int 2
    %27170 = torch.aten.unsqueeze %27169, %int2_24693 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27170, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24694 = torch.constant.int 3
    %int0_24695 = torch.constant.int 0
    %int9223372036854775807_24696 = torch.constant.int 9223372036854775807
    %int1_24697 = torch.constant.int 1
    %27171 = torch.aten.slice.Tensor %27170, %int3_24694, %int0_24695, %int9223372036854775807_24696, %int1_24697 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27171, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27172 = torch_c.to_builtin_tensor %26886 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24698 = arith.constant 1 : index
    %dim_24699 = tensor.dim %27172, %c1_24698 : tensor<4x?x1x128xf16>
    %27173 = flow.tensor.bitcast %27172 : tensor<4x?x1x128xf16>{%dim_24699} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24699}
    %27174 = torch_c.from_builtin_tensor %27173 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27175 = torch.aten.mul.Tensor %27174, %27171 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27176 = torch_c.to_builtin_tensor %27175 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24700 = arith.constant 1 : index
    %dim_24701 = tensor.dim %27176, %c1_24700 : tensor<4x?x1x64xcomplex<f32>>
    %27177 = flow.tensor.bitcast %27176 : tensor<4x?x1x64xcomplex<f32>>{%dim_24701} -> tensor<4x?x1x128xf32>{%dim_24701}
    %27178 = torch_c.from_builtin_tensor %27177 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24702 = torch.constant.int 5
    %27179 = torch.prims.convert_element_type %27178, %int5_24702 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24703 = torch.constant.int 1
    %27180 = torch.aten.size.int %26784, %int1_24703 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24704 = torch.constant.int 0
    %27181 = torch.aten.add.int %int0_24704, %27180 : !torch.int, !torch.int -> !torch.int
    %int0_24705 = torch.constant.int 0
    %int0_24706 = torch.constant.int 0
    %int1_24707 = torch.constant.int 1
    %27182 = torch.aten.slice.Tensor %27098, %int0_24705, %int0_24706, %27181, %int1_24707 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27182, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24708 = torch.constant.int 1
    %int0_24709 = torch.constant.int 0
    %int9223372036854775807_24710 = torch.constant.int 9223372036854775807
    %int1_24711 = torch.constant.int 1
    %27183 = torch.aten.slice.Tensor %27182, %int1_24708, %int0_24709, %int9223372036854775807_24710, %int1_24711 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27183, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24712 = torch.constant.int 0
    %27184 = torch.aten.unsqueeze %27183, %int0_24712 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27184, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24713 = torch.constant.int 2
    %27185 = torch.aten.unsqueeze %27184, %int2_24713 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27185, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24714 = torch.constant.int 3
    %int0_24715 = torch.constant.int 0
    %int9223372036854775807_24716 = torch.constant.int 9223372036854775807
    %int1_24717 = torch.constant.int 1
    %27186 = torch.aten.slice.Tensor %27185, %int3_24714, %int0_24715, %int9223372036854775807_24716, %int1_24717 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27186, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27187 = torch_c.to_builtin_tensor %26888 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24718 = arith.constant 1 : index
    %dim_24719 = tensor.dim %27187, %c1_24718 : tensor<4x?x1x128xf16>
    %27188 = flow.tensor.bitcast %27187 : tensor<4x?x1x128xf16>{%dim_24719} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24719}
    %27189 = torch_c.from_builtin_tensor %27188 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27190 = torch.aten.mul.Tensor %27189, %27186 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27191 = torch_c.to_builtin_tensor %27190 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24720 = arith.constant 1 : index
    %dim_24721 = tensor.dim %27191, %c1_24720 : tensor<4x?x1x64xcomplex<f32>>
    %27192 = flow.tensor.bitcast %27191 : tensor<4x?x1x64xcomplex<f32>>{%dim_24721} -> tensor<4x?x1x128xf32>{%dim_24721}
    %27193 = torch_c.from_builtin_tensor %27192 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24722 = torch.constant.int 5
    %27194 = torch.prims.convert_element_type %27193, %int5_24722 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24723 = torch.constant.int 1
    %27195 = torch.aten.size.int %26790, %int1_24723 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24724 = torch.constant.int 0
    %27196 = torch.aten.add.int %int0_24724, %27195 : !torch.int, !torch.int -> !torch.int
    %int0_24725 = torch.constant.int 0
    %int0_24726 = torch.constant.int 0
    %int1_24727 = torch.constant.int 1
    %27197 = torch.aten.slice.Tensor %27101, %int0_24725, %int0_24726, %27196, %int1_24727 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27197, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24728 = torch.constant.int 1
    %int0_24729 = torch.constant.int 0
    %int9223372036854775807_24730 = torch.constant.int 9223372036854775807
    %int1_24731 = torch.constant.int 1
    %27198 = torch.aten.slice.Tensor %27197, %int1_24728, %int0_24729, %int9223372036854775807_24730, %int1_24731 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27198, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24732 = torch.constant.int 0
    %27199 = torch.aten.unsqueeze %27198, %int0_24732 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27199, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24733 = torch.constant.int 2
    %27200 = torch.aten.unsqueeze %27199, %int2_24733 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27200, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24734 = torch.constant.int 3
    %int0_24735 = torch.constant.int 0
    %int9223372036854775807_24736 = torch.constant.int 9223372036854775807
    %int1_24737 = torch.constant.int 1
    %27201 = torch.aten.slice.Tensor %27200, %int3_24734, %int0_24735, %int9223372036854775807_24736, %int1_24737 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27201, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27202 = torch_c.to_builtin_tensor %26890 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24738 = arith.constant 1 : index
    %dim_24739 = tensor.dim %27202, %c1_24738 : tensor<4x?x1x128xf16>
    %27203 = flow.tensor.bitcast %27202 : tensor<4x?x1x128xf16>{%dim_24739} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24739}
    %27204 = torch_c.from_builtin_tensor %27203 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27205 = torch.aten.mul.Tensor %27204, %27201 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27206 = torch_c.to_builtin_tensor %27205 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24740 = arith.constant 1 : index
    %dim_24741 = tensor.dim %27206, %c1_24740 : tensor<4x?x1x64xcomplex<f32>>
    %27207 = flow.tensor.bitcast %27206 : tensor<4x?x1x64xcomplex<f32>>{%dim_24741} -> tensor<4x?x1x128xf32>{%dim_24741}
    %27208 = torch_c.from_builtin_tensor %27207 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24742 = torch.constant.int 5
    %27209 = torch.prims.convert_element_type %27208, %int5_24742 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_24743 = torch.constant.int 1
    %27210 = torch.aten.size.int %26796, %int1_24743 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_24744 = torch.constant.int 0
    %27211 = torch.aten.add.int %int0_24744, %27210 : !torch.int, !torch.int -> !torch.int
    %int0_24745 = torch.constant.int 0
    %int0_24746 = torch.constant.int 0
    %int1_24747 = torch.constant.int 1
    %27212 = torch.aten.slice.Tensor %27104, %int0_24745, %int0_24746, %27211, %int1_24747 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27212, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_24748 = torch.constant.int 1
    %int0_24749 = torch.constant.int 0
    %int9223372036854775807_24750 = torch.constant.int 9223372036854775807
    %int1_24751 = torch.constant.int 1
    %27213 = torch.aten.slice.Tensor %27212, %int1_24748, %int0_24749, %int9223372036854775807_24750, %int1_24751 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %27213, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_24752 = torch.constant.int 0
    %27214 = torch.aten.unsqueeze %27213, %int0_24752 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %27214, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_24753 = torch.constant.int 2
    %27215 = torch.aten.unsqueeze %27214, %int2_24753 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27215, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_24754 = torch.constant.int 3
    %int0_24755 = torch.constant.int 0
    %int9223372036854775807_24756 = torch.constant.int 9223372036854775807
    %int1_24757 = torch.constant.int 1
    %27216 = torch.aten.slice.Tensor %27215, %int3_24754, %int0_24755, %int9223372036854775807_24756, %int1_24757 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27216, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %27217 = torch_c.to_builtin_tensor %26892 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_24758 = arith.constant 1 : index
    %dim_24759 = tensor.dim %27217, %c1_24758 : tensor<4x?x1x128xf16>
    %27218 = flow.tensor.bitcast %27217 : tensor<4x?x1x128xf16>{%dim_24759} -> tensor<4x?x1x64xcomplex<f16>>{%dim_24759}
    %27219 = torch_c.from_builtin_tensor %27218 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %27219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %27220 = torch.aten.mul.Tensor %27219, %27216 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %27220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %27221 = torch_c.to_builtin_tensor %27220 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_24760 = arith.constant 1 : index
    %dim_24761 = tensor.dim %27221, %c1_24760 : tensor<4x?x1x64xcomplex<f32>>
    %27222 = flow.tensor.bitcast %27221 : tensor<4x?x1x64xcomplex<f32>>{%dim_24761} -> tensor<4x?x1x128xf32>{%dim_24761}
    %27223 = torch_c.from_builtin_tensor %27222 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %27223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_24762 = torch.constant.int 5
    %27224 = torch.prims.convert_element_type %27223, %int5_24762 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %27224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_24763 = torch.constant.int 64
    %27225 = torch.aten.mul.Scalar %2364, %int64_24763 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27225, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24764 = torch.constant.int 64
    %27226 = torch.aten.mul.Scalar %2367, %int64_24764 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27226, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24765 = torch.constant.int 64
    %27227 = torch.aten.mul.Scalar %2370, %int64_24765 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27227, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24766 = torch.constant.int 64
    %27228 = torch.aten.mul.Scalar %2373, %int64_24766 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27228, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24767 = torch.constant.int 64
    %27229 = torch.aten.mul.Scalar %2376, %int64_24767 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27229, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24768 = torch.constant.int 64
    %27230 = torch.aten.mul.Scalar %2379, %int64_24768 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27230, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24769 = torch.constant.int 64
    %27231 = torch.aten.mul.Scalar %2382, %int64_24769 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27231, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_24770 = torch.constant.int 64
    %27232 = torch.aten.mul.Scalar %2385, %int64_24770 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27232, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26 = torch.constant.int 26
    %int1_24771 = torch.constant.int 1
    %27233 = torch.aten.add.Scalar %27225, %int26, %int1_24771 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27233, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24772 = torch.constant.int 26
    %int1_24773 = torch.constant.int 1
    %27234 = torch.aten.add.Scalar %27226, %int26_24772, %int1_24773 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27234, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24774 = torch.constant.int 26
    %int1_24775 = torch.constant.int 1
    %27235 = torch.aten.add.Scalar %27227, %int26_24774, %int1_24775 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27235, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24776 = torch.constant.int 26
    %int1_24777 = torch.constant.int 1
    %27236 = torch.aten.add.Scalar %27228, %int26_24776, %int1_24777 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27236, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24778 = torch.constant.int 26
    %int1_24779 = torch.constant.int 1
    %27237 = torch.aten.add.Scalar %27229, %int26_24778, %int1_24779 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27237, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24780 = torch.constant.int 26
    %int1_24781 = torch.constant.int 1
    %27238 = torch.aten.add.Scalar %27230, %int26_24780, %int1_24781 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27238, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24782 = torch.constant.int 26
    %int1_24783 = torch.constant.int 1
    %27239 = torch.aten.add.Scalar %27231, %int26_24782, %int1_24783 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27239, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int26_24784 = torch.constant.int 26
    %int1_24785 = torch.constant.int 1
    %27240 = torch.aten.add.Scalar %27232, %int26_24784, %int1_24785 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27240, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_24786 = torch.constant.int 4
    %int16_24787 = torch.constant.int 16
    %int1_24788 = torch.constant.int 1
    %int128_24789 = torch.constant.int 128
    %27241 = torch.prim.ListConstruct %int4_24786, %3095, %int16_24787, %int1_24788, %int128_24789 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27242 = torch.aten.view %27119, %27241 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27242, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24790 = torch.constant.int 4
    %int16_24791 = torch.constant.int 16
    %int1_24792 = torch.constant.int 1
    %int128_24793 = torch.constant.int 128
    %27243 = torch.prim.ListConstruct %int4_24790, %3095, %int16_24791, %int1_24792, %int128_24793 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27244 = torch.aten.view %27134, %27243 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27244, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24794 = torch.constant.int 4
    %int16_24795 = torch.constant.int 16
    %int1_24796 = torch.constant.int 1
    %int128_24797 = torch.constant.int 128
    %27245 = torch.prim.ListConstruct %int4_24794, %3095, %int16_24795, %int1_24796, %int128_24797 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27246 = torch.aten.view %27149, %27245 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27246, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24798 = torch.constant.int 4
    %int16_24799 = torch.constant.int 16
    %int1_24800 = torch.constant.int 1
    %int128_24801 = torch.constant.int 128
    %27247 = torch.prim.ListConstruct %int4_24798, %3095, %int16_24799, %int1_24800, %int128_24801 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27248 = torch.aten.view %27164, %27247 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27248, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24802 = torch.constant.int 4
    %int16_24803 = torch.constant.int 16
    %int1_24804 = torch.constant.int 1
    %int128_24805 = torch.constant.int 128
    %27249 = torch.prim.ListConstruct %int4_24802, %3095, %int16_24803, %int1_24804, %int128_24805 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27250 = torch.aten.view %27179, %27249 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27250, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24806 = torch.constant.int 4
    %int16_24807 = torch.constant.int 16
    %int1_24808 = torch.constant.int 1
    %int128_24809 = torch.constant.int 128
    %27251 = torch.prim.ListConstruct %int4_24806, %3095, %int16_24807, %int1_24808, %int128_24809 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27252 = torch.aten.view %27194, %27251 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27252, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24810 = torch.constant.int 4
    %int16_24811 = torch.constant.int 16
    %int1_24812 = torch.constant.int 1
    %int128_24813 = torch.constant.int 128
    %27253 = torch.prim.ListConstruct %int4_24810, %3095, %int16_24811, %int1_24812, %int128_24813 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27254 = torch.aten.view %27209, %27253 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27254, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24814 = torch.constant.int 4
    %int16_24815 = torch.constant.int 16
    %int1_24816 = torch.constant.int 1
    %int128_24817 = torch.constant.int 128
    %27255 = torch.prim.ListConstruct %int4_24814, %3095, %int16_24815, %int1_24816, %int128_24817 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27256 = torch.aten.view %27224, %27255 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27256, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24818 = torch.constant.int 4
    %27257 = torch.aten.mul.int %int4_24818, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24819 = torch.constant.int 16
    %int1_24820 = torch.constant.int 1
    %int128_24821 = torch.constant.int 128
    %27258 = torch.prim.ListConstruct %27257, %int16_24819, %int1_24820, %int128_24821 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27259 = torch.aten.view %27242, %27258 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27259, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24822 = torch.constant.int 4
    %27260 = torch.aten.mul.int %int4_24822, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24823 = torch.constant.int 16
    %int1_24824 = torch.constant.int 1
    %int128_24825 = torch.constant.int 128
    %27261 = torch.prim.ListConstruct %27260, %int16_24823, %int1_24824, %int128_24825 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27262 = torch.aten.view %27244, %27261 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27262, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24826 = torch.constant.int 4
    %27263 = torch.aten.mul.int %int4_24826, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24827 = torch.constant.int 16
    %int1_24828 = torch.constant.int 1
    %int128_24829 = torch.constant.int 128
    %27264 = torch.prim.ListConstruct %27263, %int16_24827, %int1_24828, %int128_24829 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27265 = torch.aten.view %27246, %27264 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27265, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24830 = torch.constant.int 4
    %27266 = torch.aten.mul.int %int4_24830, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24831 = torch.constant.int 16
    %int1_24832 = torch.constant.int 1
    %int128_24833 = torch.constant.int 128
    %27267 = torch.prim.ListConstruct %27266, %int16_24831, %int1_24832, %int128_24833 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27268 = torch.aten.view %27248, %27267 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27268, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24834 = torch.constant.int 4
    %27269 = torch.aten.mul.int %int4_24834, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24835 = torch.constant.int 16
    %int1_24836 = torch.constant.int 1
    %int128_24837 = torch.constant.int 128
    %27270 = torch.prim.ListConstruct %27269, %int16_24835, %int1_24836, %int128_24837 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27271 = torch.aten.view %27250, %27270 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27271, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24838 = torch.constant.int 4
    %27272 = torch.aten.mul.int %int4_24838, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24839 = torch.constant.int 16
    %int1_24840 = torch.constant.int 1
    %int128_24841 = torch.constant.int 128
    %27273 = torch.prim.ListConstruct %27272, %int16_24839, %int1_24840, %int128_24841 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27274 = torch.aten.view %27252, %27273 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27274, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24842 = torch.constant.int 4
    %27275 = torch.aten.mul.int %int4_24842, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24843 = torch.constant.int 16
    %int1_24844 = torch.constant.int 1
    %int128_24845 = torch.constant.int 128
    %27276 = torch.prim.ListConstruct %27275, %int16_24843, %int1_24844, %int128_24845 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27277 = torch.aten.view %27254, %27276 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27277, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24846 = torch.constant.int 4
    %27278 = torch.aten.mul.int %int4_24846, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24847 = torch.constant.int 16
    %int1_24848 = torch.constant.int 1
    %int128_24849 = torch.constant.int 128
    %27279 = torch.prim.ListConstruct %27278, %int16_24847, %int1_24848, %int128_24849 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27280 = torch.aten.view %27256, %27279 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27280, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24850 = torch.constant.int 4
    %27281 = torch.aten.mul.int %int4_24850, %3095 : !torch.int, !torch.int -> !torch.int
    %27282 = torch.prim.ListConstruct %27281 : (!torch.int) -> !torch.list<int>
    %27283 = torch.aten.view %27233, %27282 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27283, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24851 = torch.constant.int 4
    %27284 = torch.aten.mul.int %int4_24851, %3095 : !torch.int, !torch.int -> !torch.int
    %27285 = torch.prim.ListConstruct %27284 : (!torch.int) -> !torch.list<int>
    %27286 = torch.aten.view %27234, %27285 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27286, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24852 = torch.constant.int 4
    %27287 = torch.aten.mul.int %int4_24852, %3095 : !torch.int, !torch.int -> !torch.int
    %27288 = torch.prim.ListConstruct %27287 : (!torch.int) -> !torch.list<int>
    %27289 = torch.aten.view %27235, %27288 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27289, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24853 = torch.constant.int 4
    %27290 = torch.aten.mul.int %int4_24853, %3095 : !torch.int, !torch.int -> !torch.int
    %27291 = torch.prim.ListConstruct %27290 : (!torch.int) -> !torch.list<int>
    %27292 = torch.aten.view %27236, %27291 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27292, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24854 = torch.constant.int 4
    %27293 = torch.aten.mul.int %int4_24854, %3095 : !torch.int, !torch.int -> !torch.int
    %27294 = torch.prim.ListConstruct %27293 : (!torch.int) -> !torch.list<int>
    %27295 = torch.aten.view %27237, %27294 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27295, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24855 = torch.constant.int 4
    %27296 = torch.aten.mul.int %int4_24855, %3095 : !torch.int, !torch.int -> !torch.int
    %27297 = torch.prim.ListConstruct %27296 : (!torch.int) -> !torch.list<int>
    %27298 = torch.aten.view %27238, %27297 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27298, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24856 = torch.constant.int 4
    %27299 = torch.aten.mul.int %int4_24856, %3095 : !torch.int, !torch.int -> !torch.int
    %27300 = torch.prim.ListConstruct %27299 : (!torch.int) -> !torch.list<int>
    %27301 = torch.aten.view %27239, %27300 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27301, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24857 = torch.constant.int 4
    %27302 = torch.aten.mul.int %int4_24857, %3095 : !torch.int, !torch.int -> !torch.int
    %27303 = torch.prim.ListConstruct %27302 : (!torch.int) -> !torch.list<int>
    %27304 = torch.aten.view %27240, %27303 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27304, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24858 = torch.constant.int 4
    %int16_24859 = torch.constant.int 16
    %int1_24860 = torch.constant.int 1
    %int128_24861 = torch.constant.int 128
    %27305 = torch.prim.ListConstruct %int4_24858, %3095, %int16_24859, %int1_24860, %int128_24861 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27306 = torch.aten.view %26894, %27305 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27306, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24862 = torch.constant.int 4
    %int16_24863 = torch.constant.int 16
    %int1_24864 = torch.constant.int 1
    %int128_24865 = torch.constant.int 128
    %27307 = torch.prim.ListConstruct %int4_24862, %3095, %int16_24863, %int1_24864, %int128_24865 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27308 = torch.aten.view %26896, %27307 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27308, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24866 = torch.constant.int 4
    %int16_24867 = torch.constant.int 16
    %int1_24868 = torch.constant.int 1
    %int128_24869 = torch.constant.int 128
    %27309 = torch.prim.ListConstruct %int4_24866, %3095, %int16_24867, %int1_24868, %int128_24869 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27310 = torch.aten.view %26898, %27309 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27310, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24870 = torch.constant.int 4
    %int16_24871 = torch.constant.int 16
    %int1_24872 = torch.constant.int 1
    %int128_24873 = torch.constant.int 128
    %27311 = torch.prim.ListConstruct %int4_24870, %3095, %int16_24871, %int1_24872, %int128_24873 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27312 = torch.aten.view %26900, %27311 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27312, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24874 = torch.constant.int 4
    %int16_24875 = torch.constant.int 16
    %int1_24876 = torch.constant.int 1
    %int128_24877 = torch.constant.int 128
    %27313 = torch.prim.ListConstruct %int4_24874, %3095, %int16_24875, %int1_24876, %int128_24877 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27314 = torch.aten.view %26902, %27313 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27314, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24878 = torch.constant.int 4
    %int16_24879 = torch.constant.int 16
    %int1_24880 = torch.constant.int 1
    %int128_24881 = torch.constant.int 128
    %27315 = torch.prim.ListConstruct %int4_24878, %3095, %int16_24879, %int1_24880, %int128_24881 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27316 = torch.aten.view %26904, %27315 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27316, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24882 = torch.constant.int 4
    %int16_24883 = torch.constant.int 16
    %int1_24884 = torch.constant.int 1
    %int128_24885 = torch.constant.int 128
    %27317 = torch.prim.ListConstruct %int4_24882, %3095, %int16_24883, %int1_24884, %int128_24885 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27318 = torch.aten.view %26906, %27317 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27318, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24886 = torch.constant.int 4
    %int16_24887 = torch.constant.int 16
    %int1_24888 = torch.constant.int 1
    %int128_24889 = torch.constant.int 128
    %27319 = torch.prim.ListConstruct %int4_24886, %3095, %int16_24887, %int1_24888, %int128_24889 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27320 = torch.aten.view %26908, %27319 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %27320, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_24890 = torch.constant.int 4
    %27321 = torch.aten.mul.int %int4_24890, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24891 = torch.constant.int 16
    %int1_24892 = torch.constant.int 1
    %int128_24893 = torch.constant.int 128
    %27322 = torch.prim.ListConstruct %27321, %int16_24891, %int1_24892, %int128_24893 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27323 = torch.aten.view %27306, %27322 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27323, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24894 = torch.constant.int 4
    %27324 = torch.aten.mul.int %int4_24894, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24895 = torch.constant.int 16
    %int1_24896 = torch.constant.int 1
    %int128_24897 = torch.constant.int 128
    %27325 = torch.prim.ListConstruct %27324, %int16_24895, %int1_24896, %int128_24897 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27326 = torch.aten.view %27308, %27325 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27326, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24898 = torch.constant.int 4
    %27327 = torch.aten.mul.int %int4_24898, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24899 = torch.constant.int 16
    %int1_24900 = torch.constant.int 1
    %int128_24901 = torch.constant.int 128
    %27328 = torch.prim.ListConstruct %27327, %int16_24899, %int1_24900, %int128_24901 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27329 = torch.aten.view %27310, %27328 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27329, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24902 = torch.constant.int 4
    %27330 = torch.aten.mul.int %int4_24902, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24903 = torch.constant.int 16
    %int1_24904 = torch.constant.int 1
    %int128_24905 = torch.constant.int 128
    %27331 = torch.prim.ListConstruct %27330, %int16_24903, %int1_24904, %int128_24905 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27332 = torch.aten.view %27312, %27331 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27332, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24906 = torch.constant.int 4
    %27333 = torch.aten.mul.int %int4_24906, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24907 = torch.constant.int 16
    %int1_24908 = torch.constant.int 1
    %int128_24909 = torch.constant.int 128
    %27334 = torch.prim.ListConstruct %27333, %int16_24907, %int1_24908, %int128_24909 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27335 = torch.aten.view %27314, %27334 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27335, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24910 = torch.constant.int 4
    %27336 = torch.aten.mul.int %int4_24910, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24911 = torch.constant.int 16
    %int1_24912 = torch.constant.int 1
    %int128_24913 = torch.constant.int 128
    %27337 = torch.prim.ListConstruct %27336, %int16_24911, %int1_24912, %int128_24913 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27338 = torch.aten.view %27316, %27337 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27338, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24914 = torch.constant.int 4
    %27339 = torch.aten.mul.int %int4_24914, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24915 = torch.constant.int 16
    %int1_24916 = torch.constant.int 1
    %int128_24917 = torch.constant.int 128
    %27340 = torch.prim.ListConstruct %27339, %int16_24915, %int1_24916, %int128_24917 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27341 = torch.aten.view %27318, %27340 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27341, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_24918 = torch.constant.int 4
    %27342 = torch.aten.mul.int %int4_24918, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_24919 = torch.constant.int 16
    %int1_24920 = torch.constant.int 1
    %int128_24921 = torch.constant.int 128
    %27343 = torch.prim.ListConstruct %27342, %int16_24919, %int1_24920, %int128_24921 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27344 = torch.aten.view %27320, %27343 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27344, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_24922 = torch.constant.int 1
    %int1_24923 = torch.constant.int 1
    %27345 = torch.aten.add.Scalar %27233, %int1_24922, %int1_24923 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27345, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24924 = torch.constant.int 1
    %int1_24925 = torch.constant.int 1
    %27346 = torch.aten.add.Scalar %27234, %int1_24924, %int1_24925 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27346, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24926 = torch.constant.int 1
    %int1_24927 = torch.constant.int 1
    %27347 = torch.aten.add.Scalar %27235, %int1_24926, %int1_24927 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27347, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24928 = torch.constant.int 1
    %int1_24929 = torch.constant.int 1
    %27348 = torch.aten.add.Scalar %27236, %int1_24928, %int1_24929 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27348, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24930 = torch.constant.int 1
    %int1_24931 = torch.constant.int 1
    %27349 = torch.aten.add.Scalar %27237, %int1_24930, %int1_24931 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27349, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24932 = torch.constant.int 1
    %int1_24933 = torch.constant.int 1
    %27350 = torch.aten.add.Scalar %27238, %int1_24932, %int1_24933 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27350, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24934 = torch.constant.int 1
    %int1_24935 = torch.constant.int 1
    %27351 = torch.aten.add.Scalar %27239, %int1_24934, %int1_24935 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27351, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_24936 = torch.constant.int 1
    %int1_24937 = torch.constant.int 1
    %27352 = torch.aten.add.Scalar %27240, %int1_24936, %int1_24937 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %27352, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_24938 = torch.constant.int 4
    %27353 = torch.aten.mul.int %int4_24938, %3095 : !torch.int, !torch.int -> !torch.int
    %27354 = torch.prim.ListConstruct %27353 : (!torch.int) -> !torch.list<int>
    %27355 = torch.aten.view %27345, %27354 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27355, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24939 = torch.constant.int 4
    %27356 = torch.aten.mul.int %int4_24939, %3095 : !torch.int, !torch.int -> !torch.int
    %27357 = torch.prim.ListConstruct %27356 : (!torch.int) -> !torch.list<int>
    %27358 = torch.aten.view %27346, %27357 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27358, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24940 = torch.constant.int 4
    %27359 = torch.aten.mul.int %int4_24940, %3095 : !torch.int, !torch.int -> !torch.int
    %27360 = torch.prim.ListConstruct %27359 : (!torch.int) -> !torch.list<int>
    %27361 = torch.aten.view %27347, %27360 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27361, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24941 = torch.constant.int 4
    %27362 = torch.aten.mul.int %int4_24941, %3095 : !torch.int, !torch.int -> !torch.int
    %27363 = torch.prim.ListConstruct %27362 : (!torch.int) -> !torch.list<int>
    %27364 = torch.aten.view %27348, %27363 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27364, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24942 = torch.constant.int 4
    %27365 = torch.aten.mul.int %int4_24942, %3095 : !torch.int, !torch.int -> !torch.int
    %27366 = torch.prim.ListConstruct %27365 : (!torch.int) -> !torch.list<int>
    %27367 = torch.aten.view %27349, %27366 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27367, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24943 = torch.constant.int 4
    %27368 = torch.aten.mul.int %int4_24943, %3095 : !torch.int, !torch.int -> !torch.int
    %27369 = torch.prim.ListConstruct %27368 : (!torch.int) -> !torch.list<int>
    %27370 = torch.aten.view %27350, %27369 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27370, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24944 = torch.constant.int 4
    %27371 = torch.aten.mul.int %int4_24944, %3095 : !torch.int, !torch.int -> !torch.int
    %27372 = torch.prim.ListConstruct %27371 : (!torch.int) -> !torch.list<int>
    %27373 = torch.aten.view %27351, %27372 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27373, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_24945 = torch.constant.int 4
    %27374 = torch.aten.mul.int %int4_24945, %3095 : !torch.int, !torch.int -> !torch.int
    %27375 = torch.prim.ListConstruct %27374 : (!torch.int) -> !torch.list<int>
    %27376 = torch.aten.view %27352, %27375 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27376, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %27377 = torch.prim.ListConstruct %27283, %27355 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24946 = torch.constant.int 0
    %27378 = torch.aten.cat %27377, %int0_24946 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27378, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27379 = torch.prim.ListConstruct %27286, %27358 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24947 = torch.constant.int 0
    %27380 = torch.aten.cat %27379, %int0_24947 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27380, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27381 = torch.prim.ListConstruct %27289, %27361 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24948 = torch.constant.int 0
    %27382 = torch.aten.cat %27381, %int0_24948 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27382, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27383 = torch.prim.ListConstruct %27292, %27364 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24949 = torch.constant.int 0
    %27384 = torch.aten.cat %27383, %int0_24949 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27384, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27385 = torch.prim.ListConstruct %27295, %27367 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24950 = torch.constant.int 0
    %27386 = torch.aten.cat %27385, %int0_24950 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27386, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27387 = torch.prim.ListConstruct %27298, %27370 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24951 = torch.constant.int 0
    %27388 = torch.aten.cat %27387, %int0_24951 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27388, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27389 = torch.prim.ListConstruct %27301, %27373 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24952 = torch.constant.int 0
    %27390 = torch.aten.cat %27389, %int0_24952 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27390, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27391 = torch.prim.ListConstruct %27304, %27376 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_24953 = torch.constant.int 0
    %27392 = torch.aten.cat %27391, %int0_24953 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %27392, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %27393 = torch.prim.ListConstruct %27259, %27323 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24954 = torch.constant.int 0
    %27394 = torch.aten.cat %27393, %int0_24954 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27394, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27395 = torch.prim.ListConstruct %27262, %27326 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24955 = torch.constant.int 0
    %27396 = torch.aten.cat %27395, %int0_24955 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27396, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27397 = torch.prim.ListConstruct %27265, %27329 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24956 = torch.constant.int 0
    %27398 = torch.aten.cat %27397, %int0_24956 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27398, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27399 = torch.prim.ListConstruct %27268, %27332 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24957 = torch.constant.int 0
    %27400 = torch.aten.cat %27399, %int0_24957 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27400, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27401 = torch.prim.ListConstruct %27271, %27335 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24958 = torch.constant.int 0
    %27402 = torch.aten.cat %27401, %int0_24958 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27402, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27403 = torch.prim.ListConstruct %27274, %27338 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24959 = torch.constant.int 0
    %27404 = torch.aten.cat %27403, %int0_24959 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27404, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27405 = torch.prim.ListConstruct %27277, %27341 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24960 = torch.constant.int 0
    %27406 = torch.aten.cat %27405, %int0_24960 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27406, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27407 = torch.prim.ListConstruct %27280, %27344 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_24961 = torch.constant.int 0
    %27408 = torch.aten.cat %27407, %int0_24961 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27408, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_24962 = torch.constant.int 32
    %int2_24963 = torch.constant.int 2
    %int16_24964 = torch.constant.int 16
    %int1_24965 = torch.constant.int 1
    %int128_24966 = torch.constant.int 128
    %27409 = torch.prim.ListConstruct %3023, %int32_24962, %int2_24963, %int16_24964, %int1_24965, %int128_24966 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27410 = torch.aten.view %25559, %27409 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27410, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_24967 = torch.constant.int 32
    %27411 = torch.aten.mul.int %3023, %int32_24967 : !torch.int, !torch.int -> !torch.int
    %int2_24968 = torch.constant.int 2
    %27412 = torch.aten.mul.int %27411, %int2_24968 : !torch.int, !torch.int -> !torch.int
    %int16_24969 = torch.constant.int 16
    %int1_24970 = torch.constant.int 1
    %int128_24971 = torch.constant.int 128
    %27413 = torch.prim.ListConstruct %27412, %int16_24969, %int1_24970, %int128_24971 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27414 = torch.aten.view %27410, %27413 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27414, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27415 = torch.prim.ListConstruct %27378 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_24972 = torch.constant.bool false
    %27416 = torch.aten.index_put %27414, %27415, %27394, %false_24972 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27416, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_24973 = torch.constant.int 32
    %int2_24974 = torch.constant.int 2
    %int16_24975 = torch.constant.int 16
    %int1_24976 = torch.constant.int 1
    %int128_24977 = torch.constant.int 128
    %27417 = torch.prim.ListConstruct %3023, %int32_24973, %int2_24974, %int16_24975, %int1_24976, %int128_24977 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27418 = torch.aten.view %27416, %27417 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27418, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_24978 = torch.constant.int 131072
    %27419 = torch.prim.ListConstruct %3023, %int131072_24978 : (!torch.int, !torch.int) -> !torch.list<int>
    %27420 = torch.aten.view %27418, %27419 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27420, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_24979 = torch.constant.int 32
    %int2_24980 = torch.constant.int 2
    %int16_24981 = torch.constant.int 16
    %int1_24982 = torch.constant.int 1
    %int128_24983 = torch.constant.int 128
    %27421 = torch.prim.ListConstruct %3026, %int32_24979, %int2_24980, %int16_24981, %int1_24982, %int128_24983 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27422 = torch.aten.view %25571, %27421 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27422, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_24984 = torch.constant.int 32
    %27423 = torch.aten.mul.int %3026, %int32_24984 : !torch.int, !torch.int -> !torch.int
    %int2_24985 = torch.constant.int 2
    %27424 = torch.aten.mul.int %27423, %int2_24985 : !torch.int, !torch.int -> !torch.int
    %int16_24986 = torch.constant.int 16
    %int1_24987 = torch.constant.int 1
    %int128_24988 = torch.constant.int 128
    %27425 = torch.prim.ListConstruct %27424, %int16_24986, %int1_24987, %int128_24988 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27426 = torch.aten.view %27422, %27425 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27426, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27427 = torch.prim.ListConstruct %27380 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_24989 = torch.constant.bool false
    %27428 = torch.aten.index_put %27426, %27427, %27396, %false_24989 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27428, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_24990 = torch.constant.int 32
    %int2_24991 = torch.constant.int 2
    %int16_24992 = torch.constant.int 16
    %int1_24993 = torch.constant.int 1
    %int128_24994 = torch.constant.int 128
    %27429 = torch.prim.ListConstruct %3026, %int32_24990, %int2_24991, %int16_24992, %int1_24993, %int128_24994 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27430 = torch.aten.view %27428, %27429 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27430, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_24995 = torch.constant.int 131072
    %27431 = torch.prim.ListConstruct %3026, %int131072_24995 : (!torch.int, !torch.int) -> !torch.list<int>
    %27432 = torch.aten.view %27430, %27431 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27432, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_24996 = torch.constant.int 32
    %int2_24997 = torch.constant.int 2
    %int16_24998 = torch.constant.int 16
    %int1_24999 = torch.constant.int 1
    %int128_25000 = torch.constant.int 128
    %27433 = torch.prim.ListConstruct %3029, %int32_24996, %int2_24997, %int16_24998, %int1_24999, %int128_25000 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27434 = torch.aten.view %25583, %27433 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27434, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25001 = torch.constant.int 32
    %27435 = torch.aten.mul.int %3029, %int32_25001 : !torch.int, !torch.int -> !torch.int
    %int2_25002 = torch.constant.int 2
    %27436 = torch.aten.mul.int %27435, %int2_25002 : !torch.int, !torch.int -> !torch.int
    %int16_25003 = torch.constant.int 16
    %int1_25004 = torch.constant.int 1
    %int128_25005 = torch.constant.int 128
    %27437 = torch.prim.ListConstruct %27436, %int16_25003, %int1_25004, %int128_25005 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27438 = torch.aten.view %27434, %27437 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27438, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27439 = torch.prim.ListConstruct %27382 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25006 = torch.constant.bool false
    %27440 = torch.aten.index_put %27438, %27439, %27398, %false_25006 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27440, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25007 = torch.constant.int 32
    %int2_25008 = torch.constant.int 2
    %int16_25009 = torch.constant.int 16
    %int1_25010 = torch.constant.int 1
    %int128_25011 = torch.constant.int 128
    %27441 = torch.prim.ListConstruct %3029, %int32_25007, %int2_25008, %int16_25009, %int1_25010, %int128_25011 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27442 = torch.aten.view %27440, %27441 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27442, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25012 = torch.constant.int 131072
    %27443 = torch.prim.ListConstruct %3029, %int131072_25012 : (!torch.int, !torch.int) -> !torch.list<int>
    %27444 = torch.aten.view %27442, %27443 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27444, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_25013 = torch.constant.int 32
    %int2_25014 = torch.constant.int 2
    %int16_25015 = torch.constant.int 16
    %int1_25016 = torch.constant.int 1
    %int128_25017 = torch.constant.int 128
    %27445 = torch.prim.ListConstruct %3032, %int32_25013, %int2_25014, %int16_25015, %int1_25016, %int128_25017 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27446 = torch.aten.view %25595, %27445 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27446, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25018 = torch.constant.int 32
    %27447 = torch.aten.mul.int %3032, %int32_25018 : !torch.int, !torch.int -> !torch.int
    %int2_25019 = torch.constant.int 2
    %27448 = torch.aten.mul.int %27447, %int2_25019 : !torch.int, !torch.int -> !torch.int
    %int16_25020 = torch.constant.int 16
    %int1_25021 = torch.constant.int 1
    %int128_25022 = torch.constant.int 128
    %27449 = torch.prim.ListConstruct %27448, %int16_25020, %int1_25021, %int128_25022 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27450 = torch.aten.view %27446, %27449 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27450, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27451 = torch.prim.ListConstruct %27384 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25023 = torch.constant.bool false
    %27452 = torch.aten.index_put %27450, %27451, %27400, %false_25023 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27452, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25024 = torch.constant.int 32
    %int2_25025 = torch.constant.int 2
    %int16_25026 = torch.constant.int 16
    %int1_25027 = torch.constant.int 1
    %int128_25028 = torch.constant.int 128
    %27453 = torch.prim.ListConstruct %3032, %int32_25024, %int2_25025, %int16_25026, %int1_25027, %int128_25028 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27454 = torch.aten.view %27452, %27453 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27454, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25029 = torch.constant.int 131072
    %27455 = torch.prim.ListConstruct %3032, %int131072_25029 : (!torch.int, !torch.int) -> !torch.list<int>
    %27456 = torch.aten.view %27454, %27455 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27456, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_25030 = torch.constant.int 32
    %int2_25031 = torch.constant.int 2
    %int16_25032 = torch.constant.int 16
    %int1_25033 = torch.constant.int 1
    %int128_25034 = torch.constant.int 128
    %27457 = torch.prim.ListConstruct %3035, %int32_25030, %int2_25031, %int16_25032, %int1_25033, %int128_25034 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27458 = torch.aten.view %25607, %27457 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27458, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25035 = torch.constant.int 32
    %27459 = torch.aten.mul.int %3035, %int32_25035 : !torch.int, !torch.int -> !torch.int
    %int2_25036 = torch.constant.int 2
    %27460 = torch.aten.mul.int %27459, %int2_25036 : !torch.int, !torch.int -> !torch.int
    %int16_25037 = torch.constant.int 16
    %int1_25038 = torch.constant.int 1
    %int128_25039 = torch.constant.int 128
    %27461 = torch.prim.ListConstruct %27460, %int16_25037, %int1_25038, %int128_25039 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27462 = torch.aten.view %27458, %27461 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27462, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27463 = torch.prim.ListConstruct %27386 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25040 = torch.constant.bool false
    %27464 = torch.aten.index_put %27462, %27463, %27402, %false_25040 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27464, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25041 = torch.constant.int 32
    %int2_25042 = torch.constant.int 2
    %int16_25043 = torch.constant.int 16
    %int1_25044 = torch.constant.int 1
    %int128_25045 = torch.constant.int 128
    %27465 = torch.prim.ListConstruct %3035, %int32_25041, %int2_25042, %int16_25043, %int1_25044, %int128_25045 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27466 = torch.aten.view %27464, %27465 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27466, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25046 = torch.constant.int 131072
    %27467 = torch.prim.ListConstruct %3035, %int131072_25046 : (!torch.int, !torch.int) -> !torch.list<int>
    %27468 = torch.aten.view %27466, %27467 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27468, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_25047 = torch.constant.int 32
    %int2_25048 = torch.constant.int 2
    %int16_25049 = torch.constant.int 16
    %int1_25050 = torch.constant.int 1
    %int128_25051 = torch.constant.int 128
    %27469 = torch.prim.ListConstruct %3038, %int32_25047, %int2_25048, %int16_25049, %int1_25050, %int128_25051 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27470 = torch.aten.view %25619, %27469 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27470, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25052 = torch.constant.int 32
    %27471 = torch.aten.mul.int %3038, %int32_25052 : !torch.int, !torch.int -> !torch.int
    %int2_25053 = torch.constant.int 2
    %27472 = torch.aten.mul.int %27471, %int2_25053 : !torch.int, !torch.int -> !torch.int
    %int16_25054 = torch.constant.int 16
    %int1_25055 = torch.constant.int 1
    %int128_25056 = torch.constant.int 128
    %27473 = torch.prim.ListConstruct %27472, %int16_25054, %int1_25055, %int128_25056 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27474 = torch.aten.view %27470, %27473 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27474, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27475 = torch.prim.ListConstruct %27388 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25057 = torch.constant.bool false
    %27476 = torch.aten.index_put %27474, %27475, %27404, %false_25057 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27476, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25058 = torch.constant.int 32
    %int2_25059 = torch.constant.int 2
    %int16_25060 = torch.constant.int 16
    %int1_25061 = torch.constant.int 1
    %int128_25062 = torch.constant.int 128
    %27477 = torch.prim.ListConstruct %3038, %int32_25058, %int2_25059, %int16_25060, %int1_25061, %int128_25062 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27478 = torch.aten.view %27476, %27477 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27478, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25063 = torch.constant.int 131072
    %27479 = torch.prim.ListConstruct %3038, %int131072_25063 : (!torch.int, !torch.int) -> !torch.list<int>
    %27480 = torch.aten.view %27478, %27479 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27480, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_25064 = torch.constant.int 32
    %int2_25065 = torch.constant.int 2
    %int16_25066 = torch.constant.int 16
    %int1_25067 = torch.constant.int 1
    %int128_25068 = torch.constant.int 128
    %27481 = torch.prim.ListConstruct %3041, %int32_25064, %int2_25065, %int16_25066, %int1_25067, %int128_25068 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27482 = torch.aten.view %25631, %27481 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27482, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25069 = torch.constant.int 32
    %27483 = torch.aten.mul.int %3041, %int32_25069 : !torch.int, !torch.int -> !torch.int
    %int2_25070 = torch.constant.int 2
    %27484 = torch.aten.mul.int %27483, %int2_25070 : !torch.int, !torch.int -> !torch.int
    %int16_25071 = torch.constant.int 16
    %int1_25072 = torch.constant.int 1
    %int128_25073 = torch.constant.int 128
    %27485 = torch.prim.ListConstruct %27484, %int16_25071, %int1_25072, %int128_25073 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27486 = torch.aten.view %27482, %27485 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27486, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27487 = torch.prim.ListConstruct %27390 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25074 = torch.constant.bool false
    %27488 = torch.aten.index_put %27486, %27487, %27406, %false_25074 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27488, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25075 = torch.constant.int 32
    %int2_25076 = torch.constant.int 2
    %int16_25077 = torch.constant.int 16
    %int1_25078 = torch.constant.int 1
    %int128_25079 = torch.constant.int 128
    %27489 = torch.prim.ListConstruct %3041, %int32_25075, %int2_25076, %int16_25077, %int1_25078, %int128_25079 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27490 = torch.aten.view %27488, %27489 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27490, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25080 = torch.constant.int 131072
    %27491 = torch.prim.ListConstruct %3041, %int131072_25080 : (!torch.int, !torch.int) -> !torch.list<int>
    %27492 = torch.aten.view %27490, %27491 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27492, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_25081 = torch.constant.int 32
    %int2_25082 = torch.constant.int 2
    %int16_25083 = torch.constant.int 16
    %int1_25084 = torch.constant.int 1
    %int128_25085 = torch.constant.int 128
    %27493 = torch.prim.ListConstruct %3044, %int32_25081, %int2_25082, %int16_25083, %int1_25084, %int128_25085 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27494 = torch.aten.view %25643, %27493 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27494, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_25086 = torch.constant.int 32
    %27495 = torch.aten.mul.int %3044, %int32_25086 : !torch.int, !torch.int -> !torch.int
    %int2_25087 = torch.constant.int 2
    %27496 = torch.aten.mul.int %27495, %int2_25087 : !torch.int, !torch.int -> !torch.int
    %int16_25088 = torch.constant.int 16
    %int1_25089 = torch.constant.int 1
    %int128_25090 = torch.constant.int 128
    %27497 = torch.prim.ListConstruct %27496, %int16_25088, %int1_25089, %int128_25090 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27498 = torch.aten.view %27494, %27497 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27498, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %27499 = torch.prim.ListConstruct %27392 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_25091 = torch.constant.bool false
    %27500 = torch.aten.index_put %27498, %27499, %27408, %false_25091 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %27500, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_25092 = torch.constant.int 32
    %int2_25093 = torch.constant.int 2
    %int16_25094 = torch.constant.int 16
    %int1_25095 = torch.constant.int 1
    %int128_25096 = torch.constant.int 128
    %27501 = torch.prim.ListConstruct %3044, %int32_25092, %int2_25093, %int16_25094, %int1_25095, %int128_25096 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27502 = torch.aten.view %27500, %27501 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %27502, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_25097 = torch.constant.int 131072
    %27503 = torch.prim.ListConstruct %3044, %int131072_25097 : (!torch.int, !torch.int) -> !torch.list<int>
    %27504 = torch.aten.view %27502, %27503 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %27504, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_25098 = torch.constant.int -2
    %27505 = torch.aten.unsqueeze %27119, %int-2_25098 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25099 = torch.constant.int -2
    %27506 = torch.aten.unsqueeze %27134, %int-2_25099 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25100 = torch.constant.int -2
    %27507 = torch.aten.unsqueeze %27149, %int-2_25100 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25101 = torch.constant.int -2
    %27508 = torch.aten.unsqueeze %27164, %int-2_25101 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25102 = torch.constant.int -2
    %27509 = torch.aten.unsqueeze %27179, %int-2_25102 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25103 = torch.constant.int -2
    %27510 = torch.aten.unsqueeze %27194, %int-2_25103 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25104 = torch.constant.int -2
    %27511 = torch.aten.unsqueeze %27209, %int-2_25104 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25105 = torch.constant.int -2
    %27512 = torch.aten.unsqueeze %27224, %int-2_25105 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_25106 = torch.constant.int 4
    %int1_25107 = torch.constant.int 1
    %int4_25108 = torch.constant.int 4
    %int128_25109 = torch.constant.int 128
    %27513 = torch.prim.ListConstruct %int4_25106, %27105, %int1_25107, %int4_25108, %int128_25109 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25110 = torch.constant.bool false
    %27514 = torch.aten.expand %27505, %27513, %false_25110 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25111 = torch.constant.int 4
    %int1_25112 = torch.constant.int 1
    %int4_25113 = torch.constant.int 4
    %int128_25114 = torch.constant.int 128
    %27515 = torch.prim.ListConstruct %int4_25111, %27105, %int1_25112, %int4_25113, %int128_25114 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25115 = torch.constant.bool false
    %27516 = torch.aten.expand %27506, %27515, %false_25115 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25116 = torch.constant.int 4
    %int1_25117 = torch.constant.int 1
    %int4_25118 = torch.constant.int 4
    %int128_25119 = torch.constant.int 128
    %27517 = torch.prim.ListConstruct %int4_25116, %27105, %int1_25117, %int4_25118, %int128_25119 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25120 = torch.constant.bool false
    %27518 = torch.aten.expand %27507, %27517, %false_25120 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25121 = torch.constant.int 4
    %int1_25122 = torch.constant.int 1
    %int4_25123 = torch.constant.int 4
    %int128_25124 = torch.constant.int 128
    %27519 = torch.prim.ListConstruct %int4_25121, %27105, %int1_25122, %int4_25123, %int128_25124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25125 = torch.constant.bool false
    %27520 = torch.aten.expand %27508, %27519, %false_25125 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25126 = torch.constant.int 4
    %int1_25127 = torch.constant.int 1
    %int4_25128 = torch.constant.int 4
    %int128_25129 = torch.constant.int 128
    %27521 = torch.prim.ListConstruct %int4_25126, %27105, %int1_25127, %int4_25128, %int128_25129 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25130 = torch.constant.bool false
    %27522 = torch.aten.expand %27509, %27521, %false_25130 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25131 = torch.constant.int 4
    %int1_25132 = torch.constant.int 1
    %int4_25133 = torch.constant.int 4
    %int128_25134 = torch.constant.int 128
    %27523 = torch.prim.ListConstruct %int4_25131, %27105, %int1_25132, %int4_25133, %int128_25134 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25135 = torch.constant.bool false
    %27524 = torch.aten.expand %27510, %27523, %false_25135 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25136 = torch.constant.int 4
    %int1_25137 = torch.constant.int 1
    %int4_25138 = torch.constant.int 4
    %int128_25139 = torch.constant.int 128
    %27525 = torch.prim.ListConstruct %int4_25136, %27105, %int1_25137, %int4_25138, %int128_25139 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25140 = torch.constant.bool false
    %27526 = torch.aten.expand %27511, %27525, %false_25140 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25141 = torch.constant.int 4
    %int1_25142 = torch.constant.int 1
    %int4_25143 = torch.constant.int 4
    %int128_25144 = torch.constant.int 128
    %27527 = torch.prim.ListConstruct %int4_25141, %27105, %int1_25142, %int4_25143, %int128_25144 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25145 = torch.constant.bool false
    %27528 = torch.aten.expand %27512, %27527, %false_25145 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25146 = torch.constant.int 4
    %int4_25147 = torch.constant.int 4
    %int128_25148 = torch.constant.int 128
    %27529 = torch.prim.ListConstruct %int4_25146, %27105, %int4_25147, %int128_25148 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27530 = torch.aten.view %27514, %27529 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25149 = torch.constant.int 4
    %int4_25150 = torch.constant.int 4
    %int128_25151 = torch.constant.int 128
    %27531 = torch.prim.ListConstruct %int4_25149, %27105, %int4_25150, %int128_25151 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27532 = torch.aten.view %27516, %27531 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25152 = torch.constant.int 4
    %int4_25153 = torch.constant.int 4
    %int128_25154 = torch.constant.int 128
    %27533 = torch.prim.ListConstruct %int4_25152, %27105, %int4_25153, %int128_25154 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27534 = torch.aten.view %27518, %27533 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25155 = torch.constant.int 4
    %int4_25156 = torch.constant.int 4
    %int128_25157 = torch.constant.int 128
    %27535 = torch.prim.ListConstruct %int4_25155, %27105, %int4_25156, %int128_25157 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27536 = torch.aten.view %27520, %27535 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25158 = torch.constant.int 4
    %int4_25159 = torch.constant.int 4
    %int128_25160 = torch.constant.int 128
    %27537 = torch.prim.ListConstruct %int4_25158, %27105, %int4_25159, %int128_25160 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27538 = torch.aten.view %27522, %27537 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25161 = torch.constant.int 4
    %int4_25162 = torch.constant.int 4
    %int128_25163 = torch.constant.int 128
    %27539 = torch.prim.ListConstruct %int4_25161, %27105, %int4_25162, %int128_25163 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27540 = torch.aten.view %27524, %27539 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25164 = torch.constant.int 4
    %int4_25165 = torch.constant.int 4
    %int128_25166 = torch.constant.int 128
    %27541 = torch.prim.ListConstruct %int4_25164, %27105, %int4_25165, %int128_25166 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27542 = torch.aten.view %27526, %27541 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25167 = torch.constant.int 4
    %int4_25168 = torch.constant.int 4
    %int128_25169 = torch.constant.int 128
    %27543 = torch.prim.ListConstruct %int4_25167, %27105, %int4_25168, %int128_25169 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27544 = torch.aten.view %27528, %27543 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_25170 = torch.constant.int -2
    %27545 = torch.aten.unsqueeze %26894, %int-2_25170 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25171 = torch.constant.int -2
    %27546 = torch.aten.unsqueeze %26896, %int-2_25171 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25172 = torch.constant.int -2
    %27547 = torch.aten.unsqueeze %26898, %int-2_25172 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25173 = torch.constant.int -2
    %27548 = torch.aten.unsqueeze %26900, %int-2_25173 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25174 = torch.constant.int -2
    %27549 = torch.aten.unsqueeze %26902, %int-2_25174 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25175 = torch.constant.int -2
    %27550 = torch.aten.unsqueeze %26904, %int-2_25175 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25176 = torch.constant.int -2
    %27551 = torch.aten.unsqueeze %26906, %int-2_25176 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_25177 = torch.constant.int -2
    %27552 = torch.aten.unsqueeze %26908, %int-2_25177 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %27552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_25178 = torch.constant.int 1
    %27553 = torch.aten.size.int %26818, %int1_25178 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_25179 = torch.constant.int 4
    %int1_25180 = torch.constant.int 1
    %int4_25181 = torch.constant.int 4
    %int128_25182 = torch.constant.int 128
    %27554 = torch.prim.ListConstruct %int4_25179, %27553, %int1_25180, %int4_25181, %int128_25182 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25183 = torch.constant.bool false
    %27555 = torch.aten.expand %27545, %27554, %false_25183 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25184 = torch.constant.int 4
    %int1_25185 = torch.constant.int 1
    %int4_25186 = torch.constant.int 4
    %int128_25187 = torch.constant.int 128
    %27556 = torch.prim.ListConstruct %int4_25184, %27553, %int1_25185, %int4_25186, %int128_25187 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25188 = torch.constant.bool false
    %27557 = torch.aten.expand %27546, %27556, %false_25188 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25189 = torch.constant.int 4
    %int1_25190 = torch.constant.int 1
    %int4_25191 = torch.constant.int 4
    %int128_25192 = torch.constant.int 128
    %27558 = torch.prim.ListConstruct %int4_25189, %27553, %int1_25190, %int4_25191, %int128_25192 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25193 = torch.constant.bool false
    %27559 = torch.aten.expand %27547, %27558, %false_25193 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25194 = torch.constant.int 4
    %int1_25195 = torch.constant.int 1
    %int4_25196 = torch.constant.int 4
    %int128_25197 = torch.constant.int 128
    %27560 = torch.prim.ListConstruct %int4_25194, %27553, %int1_25195, %int4_25196, %int128_25197 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25198 = torch.constant.bool false
    %27561 = torch.aten.expand %27548, %27560, %false_25198 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25199 = torch.constant.int 4
    %int1_25200 = torch.constant.int 1
    %int4_25201 = torch.constant.int 4
    %int128_25202 = torch.constant.int 128
    %27562 = torch.prim.ListConstruct %int4_25199, %27553, %int1_25200, %int4_25201, %int128_25202 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25203 = torch.constant.bool false
    %27563 = torch.aten.expand %27549, %27562, %false_25203 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25204 = torch.constant.int 4
    %int1_25205 = torch.constant.int 1
    %int4_25206 = torch.constant.int 4
    %int128_25207 = torch.constant.int 128
    %27564 = torch.prim.ListConstruct %int4_25204, %27553, %int1_25205, %int4_25206, %int128_25207 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25208 = torch.constant.bool false
    %27565 = torch.aten.expand %27550, %27564, %false_25208 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25209 = torch.constant.int 4
    %int1_25210 = torch.constant.int 1
    %int4_25211 = torch.constant.int 4
    %int128_25212 = torch.constant.int 128
    %27566 = torch.prim.ListConstruct %int4_25209, %27553, %int1_25210, %int4_25211, %int128_25212 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25213 = torch.constant.bool false
    %27567 = torch.aten.expand %27551, %27566, %false_25213 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25214 = torch.constant.int 4
    %int1_25215 = torch.constant.int 1
    %int4_25216 = torch.constant.int 4
    %int128_25217 = torch.constant.int 128
    %27568 = torch.prim.ListConstruct %int4_25214, %27553, %int1_25215, %int4_25216, %int128_25217 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_25218 = torch.constant.bool false
    %27569 = torch.aten.expand %27552, %27568, %false_25218 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %27569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_25219 = torch.constant.int 4
    %int4_25220 = torch.constant.int 4
    %int128_25221 = torch.constant.int 128
    %27570 = torch.prim.ListConstruct %int4_25219, %27553, %int4_25220, %int128_25221 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27571 = torch.aten.view %27555, %27570 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25222 = torch.constant.int 4
    %int4_25223 = torch.constant.int 4
    %int128_25224 = torch.constant.int 128
    %27572 = torch.prim.ListConstruct %int4_25222, %27553, %int4_25223, %int128_25224 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27573 = torch.aten.view %27557, %27572 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25225 = torch.constant.int 4
    %int4_25226 = torch.constant.int 4
    %int128_25227 = torch.constant.int 128
    %27574 = torch.prim.ListConstruct %int4_25225, %27553, %int4_25226, %int128_25227 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27575 = torch.aten.view %27559, %27574 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25228 = torch.constant.int 4
    %int4_25229 = torch.constant.int 4
    %int128_25230 = torch.constant.int 128
    %27576 = torch.prim.ListConstruct %int4_25228, %27553, %int4_25229, %int128_25230 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27577 = torch.aten.view %27561, %27576 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25231 = torch.constant.int 4
    %int4_25232 = torch.constant.int 4
    %int128_25233 = torch.constant.int 128
    %27578 = torch.prim.ListConstruct %int4_25231, %27553, %int4_25232, %int128_25233 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27579 = torch.aten.view %27563, %27578 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25234 = torch.constant.int 4
    %int4_25235 = torch.constant.int 4
    %int128_25236 = torch.constant.int 128
    %27580 = torch.prim.ListConstruct %int4_25234, %27553, %int4_25235, %int128_25236 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27581 = torch.aten.view %27565, %27580 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25237 = torch.constant.int 4
    %int4_25238 = torch.constant.int 4
    %int128_25239 = torch.constant.int 128
    %27582 = torch.prim.ListConstruct %int4_25237, %27553, %int4_25238, %int128_25239 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27583 = torch.aten.view %27567, %27582 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25240 = torch.constant.int 4
    %int4_25241 = torch.constant.int 4
    %int128_25242 = torch.constant.int 128
    %27584 = torch.prim.ListConstruct %int4_25240, %27553, %int4_25241, %int128_25242 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27585 = torch.aten.view %27569, %27584 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25243 = torch.constant.int 1
    %int2_25244 = torch.constant.int 2
    %27586 = torch.aten.transpose.int %26961, %int1_25243, %int2_25244 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27586, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25245 = torch.constant.int 1
    %int2_25246 = torch.constant.int 2
    %27587 = torch.aten.transpose.int %26976, %int1_25245, %int2_25246 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27587, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25247 = torch.constant.int 1
    %int2_25248 = torch.constant.int 2
    %27588 = torch.aten.transpose.int %26991, %int1_25247, %int2_25248 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27588, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25249 = torch.constant.int 1
    %int2_25250 = torch.constant.int 2
    %27589 = torch.aten.transpose.int %27006, %int1_25249, %int2_25250 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27589, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25251 = torch.constant.int 1
    %int2_25252 = torch.constant.int 2
    %27590 = torch.aten.transpose.int %27021, %int1_25251, %int2_25252 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27590, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25253 = torch.constant.int 1
    %int2_25254 = torch.constant.int 2
    %27591 = torch.aten.transpose.int %27036, %int1_25253, %int2_25254 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27591, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25255 = torch.constant.int 1
    %int2_25256 = torch.constant.int 2
    %27592 = torch.aten.transpose.int %27051, %int1_25255, %int2_25256 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27592, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25257 = torch.constant.int 1
    %int2_25258 = torch.constant.int 2
    %27593 = torch.aten.transpose.int %27066, %int1_25257, %int2_25258 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27593, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25259 = torch.constant.int 1
    %int2_25260 = torch.constant.int 2
    %27594 = torch.aten.transpose.int %27530, %int1_25259, %int2_25260 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27594, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25261 = torch.constant.int 1
    %int2_25262 = torch.constant.int 2
    %27595 = torch.aten.transpose.int %27532, %int1_25261, %int2_25262 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27595, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25263 = torch.constant.int 1
    %int2_25264 = torch.constant.int 2
    %27596 = torch.aten.transpose.int %27534, %int1_25263, %int2_25264 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27596, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25265 = torch.constant.int 1
    %int2_25266 = torch.constant.int 2
    %27597 = torch.aten.transpose.int %27536, %int1_25265, %int2_25266 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27597, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25267 = torch.constant.int 1
    %int2_25268 = torch.constant.int 2
    %27598 = torch.aten.transpose.int %27538, %int1_25267, %int2_25268 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27598, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25269 = torch.constant.int 1
    %int2_25270 = torch.constant.int 2
    %27599 = torch.aten.transpose.int %27540, %int1_25269, %int2_25270 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27599, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25271 = torch.constant.int 1
    %int2_25272 = torch.constant.int 2
    %27600 = torch.aten.transpose.int %27542, %int1_25271, %int2_25272 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27600, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25273 = torch.constant.int 1
    %int2_25274 = torch.constant.int 2
    %27601 = torch.aten.transpose.int %27544, %int1_25273, %int2_25274 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27601, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25275 = torch.constant.int 1
    %int2_25276 = torch.constant.int 2
    %27602 = torch.aten.transpose.int %27571, %int1_25275, %int2_25276 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27602, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25277 = torch.constant.int 1
    %int2_25278 = torch.constant.int 2
    %27603 = torch.aten.transpose.int %27573, %int1_25277, %int2_25278 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27603, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25279 = torch.constant.int 1
    %int2_25280 = torch.constant.int 2
    %27604 = torch.aten.transpose.int %27575, %int1_25279, %int2_25280 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27604, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25281 = torch.constant.int 1
    %int2_25282 = torch.constant.int 2
    %27605 = torch.aten.transpose.int %27577, %int1_25281, %int2_25282 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27605, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25283 = torch.constant.int 1
    %int2_25284 = torch.constant.int 2
    %27606 = torch.aten.transpose.int %27579, %int1_25283, %int2_25284 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27606, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25285 = torch.constant.int 1
    %int2_25286 = torch.constant.int 2
    %27607 = torch.aten.transpose.int %27581, %int1_25285, %int2_25286 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27607, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25287 = torch.constant.int 1
    %int2_25288 = torch.constant.int 2
    %27608 = torch.aten.transpose.int %27583, %int1_25287, %int2_25288 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27608, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25289 = torch.constant.int 1
    %int2_25290 = torch.constant.int 2
    %27609 = torch.aten.transpose.int %27585, %int1_25289, %int2_25290 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %27609, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25291 = torch.constant.float 0.000000e+00
    %true_25292 = torch.constant.bool true
    %none_25293 = torch.constant.none
    %none_25294 = torch.constant.none
    %27610:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27586, %27594, %27602, %float0.000000e00_25291, %true_25292, %none_25293, %none_25294) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27610#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25295 = torch.constant.float 0.000000e+00
    %true_25296 = torch.constant.bool true
    %none_25297 = torch.constant.none
    %none_25298 = torch.constant.none
    %27611:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27587, %27595, %27603, %float0.000000e00_25295, %true_25296, %none_25297, %none_25298) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27611#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25299 = torch.constant.float 0.000000e+00
    %true_25300 = torch.constant.bool true
    %none_25301 = torch.constant.none
    %none_25302 = torch.constant.none
    %27612:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27588, %27596, %27604, %float0.000000e00_25299, %true_25300, %none_25301, %none_25302) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27612#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25303 = torch.constant.float 0.000000e+00
    %true_25304 = torch.constant.bool true
    %none_25305 = torch.constant.none
    %none_25306 = torch.constant.none
    %27613:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27589, %27597, %27605, %float0.000000e00_25303, %true_25304, %none_25305, %none_25306) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27613#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25307 = torch.constant.float 0.000000e+00
    %true_25308 = torch.constant.bool true
    %none_25309 = torch.constant.none
    %none_25310 = torch.constant.none
    %27614:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27590, %27598, %27606, %float0.000000e00_25307, %true_25308, %none_25309, %none_25310) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27614#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25311 = torch.constant.float 0.000000e+00
    %true_25312 = torch.constant.bool true
    %none_25313 = torch.constant.none
    %none_25314 = torch.constant.none
    %27615:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27591, %27599, %27607, %float0.000000e00_25311, %true_25312, %none_25313, %none_25314) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27615#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25315 = torch.constant.float 0.000000e+00
    %true_25316 = torch.constant.bool true
    %none_25317 = torch.constant.none
    %none_25318 = torch.constant.none
    %27616:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27592, %27600, %27608, %float0.000000e00_25315, %true_25316, %none_25317, %none_25318) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27616#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_25319 = torch.constant.float 0.000000e+00
    %true_25320 = torch.constant.bool true
    %none_25321 = torch.constant.none
    %none_25322 = torch.constant.none
    %27617:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%27593, %27601, %27609, %float0.000000e00_25319, %true_25320, %none_25321, %none_25322) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %27617#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_25323 = torch.constant.int 1
    %int2_25324 = torch.constant.int 2
    %27618 = torch.aten.transpose.int %27610#0, %int1_25323, %int2_25324 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25325 = torch.constant.int 1
    %int2_25326 = torch.constant.int 2
    %27619 = torch.aten.transpose.int %27611#0, %int1_25325, %int2_25326 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25327 = torch.constant.int 1
    %int2_25328 = torch.constant.int 2
    %27620 = torch.aten.transpose.int %27612#0, %int1_25327, %int2_25328 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25329 = torch.constant.int 1
    %int2_25330 = torch.constant.int 2
    %27621 = torch.aten.transpose.int %27613#0, %int1_25329, %int2_25330 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25331 = torch.constant.int 1
    %int2_25332 = torch.constant.int 2
    %27622 = torch.aten.transpose.int %27614#0, %int1_25331, %int2_25332 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25333 = torch.constant.int 1
    %int2_25334 = torch.constant.int 2
    %27623 = torch.aten.transpose.int %27615#0, %int1_25333, %int2_25334 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25335 = torch.constant.int 1
    %int2_25336 = torch.constant.int 2
    %27624 = torch.aten.transpose.int %27616#0, %int1_25335, %int2_25336 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_25337 = torch.constant.int 1
    %int2_25338 = torch.constant.int 2
    %27625 = torch.aten.transpose.int %27617#0, %int1_25337, %int2_25338 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %27625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_25339 = torch.constant.int 4
    %int512_25340 = torch.constant.int 512
    %27626 = torch.prim.ListConstruct %int4_25339, %26947, %int512_25340 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27627 = torch.aten.view %27618, %27626 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25341 = torch.constant.int 4
    %int512_25342 = torch.constant.int 512
    %27628 = torch.prim.ListConstruct %int4_25341, %26962, %int512_25342 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27629 = torch.aten.view %27619, %27628 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25343 = torch.constant.int 4
    %int512_25344 = torch.constant.int 512
    %27630 = torch.prim.ListConstruct %int4_25343, %26977, %int512_25344 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27631 = torch.aten.view %27620, %27630 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25345 = torch.constant.int 4
    %int512_25346 = torch.constant.int 512
    %27632 = torch.prim.ListConstruct %int4_25345, %26992, %int512_25346 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27633 = torch.aten.view %27621, %27632 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25347 = torch.constant.int 4
    %int512_25348 = torch.constant.int 512
    %27634 = torch.prim.ListConstruct %int4_25347, %27007, %int512_25348 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27635 = torch.aten.view %27622, %27634 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25349 = torch.constant.int 4
    %int512_25350 = torch.constant.int 512
    %27636 = torch.prim.ListConstruct %int4_25349, %27022, %int512_25350 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27637 = torch.aten.view %27623, %27636 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25351 = torch.constant.int 4
    %int512_25352 = torch.constant.int 512
    %27638 = torch.prim.ListConstruct %int4_25351, %27037, %int512_25352 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27639 = torch.aten.view %27624, %27638 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_25353 = torch.constant.int 4
    %int512_25354 = torch.constant.int 512
    %27640 = torch.prim.ListConstruct %int4_25353, %27052, %int512_25354 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27641 = torch.aten.view %27625, %27640 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %27641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_25355 = torch.constant.int 1
    %int0_25356 = torch.constant.int 0
    %27642 = torch.prim.ListConstruct %int1_25355, %int0_25356 : (!torch.int, !torch.int) -> !torch.list<int>
    %27643 = torch.aten.permute %976, %27642 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25357 = torch.constant.int 1
    %int0_25358 = torch.constant.int 0
    %27644 = torch.prim.ListConstruct %int1_25357, %int0_25358 : (!torch.int, !torch.int) -> !torch.list<int>
    %27645 = torch.aten.permute %977, %27644 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25359 = torch.constant.int 1
    %int0_25360 = torch.constant.int 0
    %27646 = torch.prim.ListConstruct %int1_25359, %int0_25360 : (!torch.int, !torch.int) -> !torch.list<int>
    %27647 = torch.aten.permute %978, %27646 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25361 = torch.constant.int 1
    %int0_25362 = torch.constant.int 0
    %27648 = torch.prim.ListConstruct %int1_25361, %int0_25362 : (!torch.int, !torch.int) -> !torch.list<int>
    %27649 = torch.aten.permute %979, %27648 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25363 = torch.constant.int 1
    %int0_25364 = torch.constant.int 0
    %27650 = torch.prim.ListConstruct %int1_25363, %int0_25364 : (!torch.int, !torch.int) -> !torch.list<int>
    %27651 = torch.aten.permute %980, %27650 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25365 = torch.constant.int 1
    %int0_25366 = torch.constant.int 0
    %27652 = torch.prim.ListConstruct %int1_25365, %int0_25366 : (!torch.int, !torch.int) -> !torch.list<int>
    %27653 = torch.aten.permute %981, %27652 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25367 = torch.constant.int 1
    %int0_25368 = torch.constant.int 0
    %27654 = torch.prim.ListConstruct %int1_25367, %int0_25368 : (!torch.int, !torch.int) -> !torch.list<int>
    %27655 = torch.aten.permute %982, %27654 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_25369 = torch.constant.int 1
    %int0_25370 = torch.constant.int 0
    %27656 = torch.prim.ListConstruct %int1_25369, %int0_25370 : (!torch.int, !torch.int) -> !torch.list<int>
    %27657 = torch.aten.permute %983, %27656 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_25371 = torch.constant.int 4
    %27658 = torch.aten.mul.int %int4_25371, %26947 : !torch.int, !torch.int -> !torch.int
    %int512_25372 = torch.constant.int 512
    %27659 = torch.prim.ListConstruct %27658, %int512_25372 : (!torch.int, !torch.int) -> !torch.list<int>
    %27660 = torch.aten.view %27627, %27659 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27660, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27661 = torch.aten.mm %27660, %27643 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27661, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25373 = torch.constant.int 4
    %int4096_25374 = torch.constant.int 4096
    %27662 = torch.prim.ListConstruct %int4_25373, %26947, %int4096_25374 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27663 = torch.aten.view %27661, %27662 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25375 = torch.constant.int 4
    %27664 = torch.aten.mul.int %int4_25375, %26962 : !torch.int, !torch.int -> !torch.int
    %int512_25376 = torch.constant.int 512
    %27665 = torch.prim.ListConstruct %27664, %int512_25376 : (!torch.int, !torch.int) -> !torch.list<int>
    %27666 = torch.aten.view %27629, %27665 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27666, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27667 = torch.aten.mm %27666, %27645 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27667, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25377 = torch.constant.int 4
    %int4096_25378 = torch.constant.int 4096
    %27668 = torch.prim.ListConstruct %int4_25377, %26962, %int4096_25378 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27669 = torch.aten.view %27667, %27668 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25379 = torch.constant.int 4
    %27670 = torch.aten.mul.int %int4_25379, %26977 : !torch.int, !torch.int -> !torch.int
    %int512_25380 = torch.constant.int 512
    %27671 = torch.prim.ListConstruct %27670, %int512_25380 : (!torch.int, !torch.int) -> !torch.list<int>
    %27672 = torch.aten.view %27631, %27671 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27672, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27673 = torch.aten.mm %27672, %27647 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27673, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25381 = torch.constant.int 4
    %int4096_25382 = torch.constant.int 4096
    %27674 = torch.prim.ListConstruct %int4_25381, %26977, %int4096_25382 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27675 = torch.aten.view %27673, %27674 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25383 = torch.constant.int 4
    %27676 = torch.aten.mul.int %int4_25383, %26992 : !torch.int, !torch.int -> !torch.int
    %int512_25384 = torch.constant.int 512
    %27677 = torch.prim.ListConstruct %27676, %int512_25384 : (!torch.int, !torch.int) -> !torch.list<int>
    %27678 = torch.aten.view %27633, %27677 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27678, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27679 = torch.aten.mm %27678, %27649 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27679, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25385 = torch.constant.int 4
    %int4096_25386 = torch.constant.int 4096
    %27680 = torch.prim.ListConstruct %int4_25385, %26992, %int4096_25386 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27681 = torch.aten.view %27679, %27680 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25387 = torch.constant.int 4
    %27682 = torch.aten.mul.int %int4_25387, %27007 : !torch.int, !torch.int -> !torch.int
    %int512_25388 = torch.constant.int 512
    %27683 = torch.prim.ListConstruct %27682, %int512_25388 : (!torch.int, !torch.int) -> !torch.list<int>
    %27684 = torch.aten.view %27635, %27683 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27684, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27685 = torch.aten.mm %27684, %27651 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27685, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25389 = torch.constant.int 4
    %int4096_25390 = torch.constant.int 4096
    %27686 = torch.prim.ListConstruct %int4_25389, %27007, %int4096_25390 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27687 = torch.aten.view %27685, %27686 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25391 = torch.constant.int 4
    %27688 = torch.aten.mul.int %int4_25391, %27022 : !torch.int, !torch.int -> !torch.int
    %int512_25392 = torch.constant.int 512
    %27689 = torch.prim.ListConstruct %27688, %int512_25392 : (!torch.int, !torch.int) -> !torch.list<int>
    %27690 = torch.aten.view %27637, %27689 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27690, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27691 = torch.aten.mm %27690, %27653 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27691, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25393 = torch.constant.int 4
    %int4096_25394 = torch.constant.int 4096
    %27692 = torch.prim.ListConstruct %int4_25393, %27022, %int4096_25394 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27693 = torch.aten.view %27691, %27692 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25395 = torch.constant.int 4
    %27694 = torch.aten.mul.int %int4_25395, %27037 : !torch.int, !torch.int -> !torch.int
    %int512_25396 = torch.constant.int 512
    %27695 = torch.prim.ListConstruct %27694, %int512_25396 : (!torch.int, !torch.int) -> !torch.list<int>
    %27696 = torch.aten.view %27639, %27695 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27696, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27697 = torch.aten.mm %27696, %27655 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27697, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25397 = torch.constant.int 4
    %int4096_25398 = torch.constant.int 4096
    %27698 = torch.prim.ListConstruct %int4_25397, %27037, %int4096_25398 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27699 = torch.aten.view %27697, %27698 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_25399 = torch.constant.int 4
    %27700 = torch.aten.mul.int %int4_25399, %27052 : !torch.int, !torch.int -> !torch.int
    %int512_25400 = torch.constant.int 512
    %27701 = torch.prim.ListConstruct %27700, %int512_25400 : (!torch.int, !torch.int) -> !torch.list<int>
    %27702 = torch.aten.view %27641, %27701 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %27702, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %27703 = torch.aten.mm %27702, %27657 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %27703, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25401 = torch.constant.int 4
    %int4096_25402 = torch.constant.int 4096
    %27704 = torch.prim.ListConstruct %int4_25401, %27052, %int4096_25402 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %27705 = torch.aten.view %27703, %27704 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27706 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25403 = arith.constant 1 : index
    %dim_25404 = tensor.dim %27706, %c1_25403 : tensor<4x?x4096xf16>
    %27707 = flow.tensor.transfer %27706 : tensor<4x?x4096xf16>{%dim_25404} to #hal.device.promise<@__device_0>
    %27708 = torch_c.from_builtin_tensor %27707 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27709 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25405 = arith.constant 1 : index
    %dim_25406 = tensor.dim %27709, %c1_25405 : tensor<4x?x4096xf16>
    %27710 = flow.tensor.transfer %27709 : tensor<4x?x4096xf16>{%dim_25406} to #hal.device.promise<@__device_0>
    %27711 = torch_c.from_builtin_tensor %27710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27712 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25407 = arith.constant 1 : index
    %dim_25408 = tensor.dim %27712, %c1_25407 : tensor<4x?x4096xf16>
    %27713 = flow.tensor.transfer %27712 : tensor<4x?x4096xf16>{%dim_25408} to #hal.device.promise<@__device_0>
    %27714 = torch_c.from_builtin_tensor %27713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27715 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25409 = arith.constant 1 : index
    %dim_25410 = tensor.dim %27715, %c1_25409 : tensor<4x?x4096xf16>
    %27716 = flow.tensor.transfer %27715 : tensor<4x?x4096xf16>{%dim_25410} to #hal.device.promise<@__device_0>
    %27717 = torch_c.from_builtin_tensor %27716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27718 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25411 = arith.constant 1 : index
    %dim_25412 = tensor.dim %27718, %c1_25411 : tensor<4x?x4096xf16>
    %27719 = flow.tensor.transfer %27718 : tensor<4x?x4096xf16>{%dim_25412} to #hal.device.promise<@__device_0>
    %27720 = torch_c.from_builtin_tensor %27719 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27721 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25413 = arith.constant 1 : index
    %dim_25414 = tensor.dim %27721, %c1_25413 : tensor<4x?x4096xf16>
    %27722 = flow.tensor.transfer %27721 : tensor<4x?x4096xf16>{%dim_25414} to #hal.device.promise<@__device_0>
    %27723 = torch_c.from_builtin_tensor %27722 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27724 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25415 = arith.constant 1 : index
    %dim_25416 = tensor.dim %27724, %c1_25415 : tensor<4x?x4096xf16>
    %27725 = flow.tensor.transfer %27724 : tensor<4x?x4096xf16>{%dim_25416} to #hal.device.promise<@__device_0>
    %27726 = torch_c.from_builtin_tensor %27725 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25417 = torch.constant.int 1
    %27727 = torch.aten.add.Tensor %27663, %27708, %int1_25417 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25418 = torch.constant.int 1
    %27728 = torch.aten.add.Tensor %27727, %27711, %int1_25418 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25419 = torch.constant.int 1
    %27729 = torch.aten.add.Tensor %27728, %27714, %int1_25419 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25420 = torch.constant.int 1
    %27730 = torch.aten.add.Tensor %27729, %27717, %int1_25420 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25421 = torch.constant.int 1
    %27731 = torch.aten.add.Tensor %27730, %27720, %int1_25421 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25422 = torch.constant.int 1
    %27732 = torch.aten.add.Tensor %27731, %27723, %int1_25422 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25423 = torch.constant.int 1
    %27733 = torch.aten.add.Tensor %27732, %27726, %int1_25423 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27734 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25424 = arith.constant 1 : index
    %dim_25425 = tensor.dim %27734, %c1_25424 : tensor<4x?x4096xf16>
    %27735 = flow.tensor.transfer %27734 : tensor<4x?x4096xf16>{%dim_25425} to #hal.device.promise<@__device_1>
    %27736 = torch_c.from_builtin_tensor %27735 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27737 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25426 = arith.constant 1 : index
    %dim_25427 = tensor.dim %27737, %c1_25426 : tensor<4x?x4096xf16>
    %27738 = flow.tensor.transfer %27737 : tensor<4x?x4096xf16>{%dim_25427} to #hal.device.promise<@__device_1>
    %27739 = torch_c.from_builtin_tensor %27738 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27740 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25428 = arith.constant 1 : index
    %dim_25429 = tensor.dim %27740, %c1_25428 : tensor<4x?x4096xf16>
    %27741 = flow.tensor.transfer %27740 : tensor<4x?x4096xf16>{%dim_25429} to #hal.device.promise<@__device_1>
    %27742 = torch_c.from_builtin_tensor %27741 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27743 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25430 = arith.constant 1 : index
    %dim_25431 = tensor.dim %27743, %c1_25430 : tensor<4x?x4096xf16>
    %27744 = flow.tensor.transfer %27743 : tensor<4x?x4096xf16>{%dim_25431} to #hal.device.promise<@__device_1>
    %27745 = torch_c.from_builtin_tensor %27744 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27746 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25432 = arith.constant 1 : index
    %dim_25433 = tensor.dim %27746, %c1_25432 : tensor<4x?x4096xf16>
    %27747 = flow.tensor.transfer %27746 : tensor<4x?x4096xf16>{%dim_25433} to #hal.device.promise<@__device_1>
    %27748 = torch_c.from_builtin_tensor %27747 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27749 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25434 = arith.constant 1 : index
    %dim_25435 = tensor.dim %27749, %c1_25434 : tensor<4x?x4096xf16>
    %27750 = flow.tensor.transfer %27749 : tensor<4x?x4096xf16>{%dim_25435} to #hal.device.promise<@__device_1>
    %27751 = torch_c.from_builtin_tensor %27750 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27752 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25436 = arith.constant 1 : index
    %dim_25437 = tensor.dim %27752, %c1_25436 : tensor<4x?x4096xf16>
    %27753 = flow.tensor.transfer %27752 : tensor<4x?x4096xf16>{%dim_25437} to #hal.device.promise<@__device_1>
    %27754 = torch_c.from_builtin_tensor %27753 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25438 = torch.constant.int 1
    %27755 = torch.aten.add.Tensor %27736, %27669, %int1_25438 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25439 = torch.constant.int 1
    %27756 = torch.aten.add.Tensor %27755, %27739, %int1_25439 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25440 = torch.constant.int 1
    %27757 = torch.aten.add.Tensor %27756, %27742, %int1_25440 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25441 = torch.constant.int 1
    %27758 = torch.aten.add.Tensor %27757, %27745, %int1_25441 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25442 = torch.constant.int 1
    %27759 = torch.aten.add.Tensor %27758, %27748, %int1_25442 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25443 = torch.constant.int 1
    %27760 = torch.aten.add.Tensor %27759, %27751, %int1_25443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25444 = torch.constant.int 1
    %27761 = torch.aten.add.Tensor %27760, %27754, %int1_25444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27762 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25445 = arith.constant 1 : index
    %dim_25446 = tensor.dim %27762, %c1_25445 : tensor<4x?x4096xf16>
    %27763 = flow.tensor.transfer %27762 : tensor<4x?x4096xf16>{%dim_25446} to #hal.device.promise<@__device_2>
    %27764 = torch_c.from_builtin_tensor %27763 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27765 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25447 = arith.constant 1 : index
    %dim_25448 = tensor.dim %27765, %c1_25447 : tensor<4x?x4096xf16>
    %27766 = flow.tensor.transfer %27765 : tensor<4x?x4096xf16>{%dim_25448} to #hal.device.promise<@__device_2>
    %27767 = torch_c.from_builtin_tensor %27766 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27768 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25449 = arith.constant 1 : index
    %dim_25450 = tensor.dim %27768, %c1_25449 : tensor<4x?x4096xf16>
    %27769 = flow.tensor.transfer %27768 : tensor<4x?x4096xf16>{%dim_25450} to #hal.device.promise<@__device_2>
    %27770 = torch_c.from_builtin_tensor %27769 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27771 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25451 = arith.constant 1 : index
    %dim_25452 = tensor.dim %27771, %c1_25451 : tensor<4x?x4096xf16>
    %27772 = flow.tensor.transfer %27771 : tensor<4x?x4096xf16>{%dim_25452} to #hal.device.promise<@__device_2>
    %27773 = torch_c.from_builtin_tensor %27772 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27774 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25453 = arith.constant 1 : index
    %dim_25454 = tensor.dim %27774, %c1_25453 : tensor<4x?x4096xf16>
    %27775 = flow.tensor.transfer %27774 : tensor<4x?x4096xf16>{%dim_25454} to #hal.device.promise<@__device_2>
    %27776 = torch_c.from_builtin_tensor %27775 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27777 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25455 = arith.constant 1 : index
    %dim_25456 = tensor.dim %27777, %c1_25455 : tensor<4x?x4096xf16>
    %27778 = flow.tensor.transfer %27777 : tensor<4x?x4096xf16>{%dim_25456} to #hal.device.promise<@__device_2>
    %27779 = torch_c.from_builtin_tensor %27778 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27780 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25457 = arith.constant 1 : index
    %dim_25458 = tensor.dim %27780, %c1_25457 : tensor<4x?x4096xf16>
    %27781 = flow.tensor.transfer %27780 : tensor<4x?x4096xf16>{%dim_25458} to #hal.device.promise<@__device_2>
    %27782 = torch_c.from_builtin_tensor %27781 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25459 = torch.constant.int 1
    %27783 = torch.aten.add.Tensor %27764, %27767, %int1_25459 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25460 = torch.constant.int 1
    %27784 = torch.aten.add.Tensor %27783, %27675, %int1_25460 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25461 = torch.constant.int 1
    %27785 = torch.aten.add.Tensor %27784, %27770, %int1_25461 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25462 = torch.constant.int 1
    %27786 = torch.aten.add.Tensor %27785, %27773, %int1_25462 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25463 = torch.constant.int 1
    %27787 = torch.aten.add.Tensor %27786, %27776, %int1_25463 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25464 = torch.constant.int 1
    %27788 = torch.aten.add.Tensor %27787, %27779, %int1_25464 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25465 = torch.constant.int 1
    %27789 = torch.aten.add.Tensor %27788, %27782, %int1_25465 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27790 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25466 = arith.constant 1 : index
    %dim_25467 = tensor.dim %27790, %c1_25466 : tensor<4x?x4096xf16>
    %27791 = flow.tensor.transfer %27790 : tensor<4x?x4096xf16>{%dim_25467} to #hal.device.promise<@__device_3>
    %27792 = torch_c.from_builtin_tensor %27791 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27793 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25468 = arith.constant 1 : index
    %dim_25469 = tensor.dim %27793, %c1_25468 : tensor<4x?x4096xf16>
    %27794 = flow.tensor.transfer %27793 : tensor<4x?x4096xf16>{%dim_25469} to #hal.device.promise<@__device_3>
    %27795 = torch_c.from_builtin_tensor %27794 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27796 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25470 = arith.constant 1 : index
    %dim_25471 = tensor.dim %27796, %c1_25470 : tensor<4x?x4096xf16>
    %27797 = flow.tensor.transfer %27796 : tensor<4x?x4096xf16>{%dim_25471} to #hal.device.promise<@__device_3>
    %27798 = torch_c.from_builtin_tensor %27797 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27799 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25472 = arith.constant 1 : index
    %dim_25473 = tensor.dim %27799, %c1_25472 : tensor<4x?x4096xf16>
    %27800 = flow.tensor.transfer %27799 : tensor<4x?x4096xf16>{%dim_25473} to #hal.device.promise<@__device_3>
    %27801 = torch_c.from_builtin_tensor %27800 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27802 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25474 = arith.constant 1 : index
    %dim_25475 = tensor.dim %27802, %c1_25474 : tensor<4x?x4096xf16>
    %27803 = flow.tensor.transfer %27802 : tensor<4x?x4096xf16>{%dim_25475} to #hal.device.promise<@__device_3>
    %27804 = torch_c.from_builtin_tensor %27803 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27805 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25476 = arith.constant 1 : index
    %dim_25477 = tensor.dim %27805, %c1_25476 : tensor<4x?x4096xf16>
    %27806 = flow.tensor.transfer %27805 : tensor<4x?x4096xf16>{%dim_25477} to #hal.device.promise<@__device_3>
    %27807 = torch_c.from_builtin_tensor %27806 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27808 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25478 = arith.constant 1 : index
    %dim_25479 = tensor.dim %27808, %c1_25478 : tensor<4x?x4096xf16>
    %27809 = flow.tensor.transfer %27808 : tensor<4x?x4096xf16>{%dim_25479} to #hal.device.promise<@__device_3>
    %27810 = torch_c.from_builtin_tensor %27809 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25480 = torch.constant.int 1
    %27811 = torch.aten.add.Tensor %27792, %27795, %int1_25480 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25481 = torch.constant.int 1
    %27812 = torch.aten.add.Tensor %27811, %27798, %int1_25481 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25482 = torch.constant.int 1
    %27813 = torch.aten.add.Tensor %27812, %27681, %int1_25482 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25483 = torch.constant.int 1
    %27814 = torch.aten.add.Tensor %27813, %27801, %int1_25483 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25484 = torch.constant.int 1
    %27815 = torch.aten.add.Tensor %27814, %27804, %int1_25484 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25485 = torch.constant.int 1
    %27816 = torch.aten.add.Tensor %27815, %27807, %int1_25485 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25486 = torch.constant.int 1
    %27817 = torch.aten.add.Tensor %27816, %27810, %int1_25486 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27818 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25487 = arith.constant 1 : index
    %dim_25488 = tensor.dim %27818, %c1_25487 : tensor<4x?x4096xf16>
    %27819 = flow.tensor.transfer %27818 : tensor<4x?x4096xf16>{%dim_25488} to #hal.device.promise<@__device_4>
    %27820 = torch_c.from_builtin_tensor %27819 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27821 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25489 = arith.constant 1 : index
    %dim_25490 = tensor.dim %27821, %c1_25489 : tensor<4x?x4096xf16>
    %27822 = flow.tensor.transfer %27821 : tensor<4x?x4096xf16>{%dim_25490} to #hal.device.promise<@__device_4>
    %27823 = torch_c.from_builtin_tensor %27822 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27824 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25491 = arith.constant 1 : index
    %dim_25492 = tensor.dim %27824, %c1_25491 : tensor<4x?x4096xf16>
    %27825 = flow.tensor.transfer %27824 : tensor<4x?x4096xf16>{%dim_25492} to #hal.device.promise<@__device_4>
    %27826 = torch_c.from_builtin_tensor %27825 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27827 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25493 = arith.constant 1 : index
    %dim_25494 = tensor.dim %27827, %c1_25493 : tensor<4x?x4096xf16>
    %27828 = flow.tensor.transfer %27827 : tensor<4x?x4096xf16>{%dim_25494} to #hal.device.promise<@__device_4>
    %27829 = torch_c.from_builtin_tensor %27828 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27830 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25495 = arith.constant 1 : index
    %dim_25496 = tensor.dim %27830, %c1_25495 : tensor<4x?x4096xf16>
    %27831 = flow.tensor.transfer %27830 : tensor<4x?x4096xf16>{%dim_25496} to #hal.device.promise<@__device_4>
    %27832 = torch_c.from_builtin_tensor %27831 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27833 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25497 = arith.constant 1 : index
    %dim_25498 = tensor.dim %27833, %c1_25497 : tensor<4x?x4096xf16>
    %27834 = flow.tensor.transfer %27833 : tensor<4x?x4096xf16>{%dim_25498} to #hal.device.promise<@__device_4>
    %27835 = torch_c.from_builtin_tensor %27834 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27836 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25499 = arith.constant 1 : index
    %dim_25500 = tensor.dim %27836, %c1_25499 : tensor<4x?x4096xf16>
    %27837 = flow.tensor.transfer %27836 : tensor<4x?x4096xf16>{%dim_25500} to #hal.device.promise<@__device_4>
    %27838 = torch_c.from_builtin_tensor %27837 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25501 = torch.constant.int 1
    %27839 = torch.aten.add.Tensor %27820, %27823, %int1_25501 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25502 = torch.constant.int 1
    %27840 = torch.aten.add.Tensor %27839, %27826, %int1_25502 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25503 = torch.constant.int 1
    %27841 = torch.aten.add.Tensor %27840, %27829, %int1_25503 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25504 = torch.constant.int 1
    %27842 = torch.aten.add.Tensor %27841, %27687, %int1_25504 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25505 = torch.constant.int 1
    %27843 = torch.aten.add.Tensor %27842, %27832, %int1_25505 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25506 = torch.constant.int 1
    %27844 = torch.aten.add.Tensor %27843, %27835, %int1_25506 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25507 = torch.constant.int 1
    %27845 = torch.aten.add.Tensor %27844, %27838, %int1_25507 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27846 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25508 = arith.constant 1 : index
    %dim_25509 = tensor.dim %27846, %c1_25508 : tensor<4x?x4096xf16>
    %27847 = flow.tensor.transfer %27846 : tensor<4x?x4096xf16>{%dim_25509} to #hal.device.promise<@__device_5>
    %27848 = torch_c.from_builtin_tensor %27847 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27849 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25510 = arith.constant 1 : index
    %dim_25511 = tensor.dim %27849, %c1_25510 : tensor<4x?x4096xf16>
    %27850 = flow.tensor.transfer %27849 : tensor<4x?x4096xf16>{%dim_25511} to #hal.device.promise<@__device_5>
    %27851 = torch_c.from_builtin_tensor %27850 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27852 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25512 = arith.constant 1 : index
    %dim_25513 = tensor.dim %27852, %c1_25512 : tensor<4x?x4096xf16>
    %27853 = flow.tensor.transfer %27852 : tensor<4x?x4096xf16>{%dim_25513} to #hal.device.promise<@__device_5>
    %27854 = torch_c.from_builtin_tensor %27853 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27855 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25514 = arith.constant 1 : index
    %dim_25515 = tensor.dim %27855, %c1_25514 : tensor<4x?x4096xf16>
    %27856 = flow.tensor.transfer %27855 : tensor<4x?x4096xf16>{%dim_25515} to #hal.device.promise<@__device_5>
    %27857 = torch_c.from_builtin_tensor %27856 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27858 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25516 = arith.constant 1 : index
    %dim_25517 = tensor.dim %27858, %c1_25516 : tensor<4x?x4096xf16>
    %27859 = flow.tensor.transfer %27858 : tensor<4x?x4096xf16>{%dim_25517} to #hal.device.promise<@__device_5>
    %27860 = torch_c.from_builtin_tensor %27859 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27861 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25518 = arith.constant 1 : index
    %dim_25519 = tensor.dim %27861, %c1_25518 : tensor<4x?x4096xf16>
    %27862 = flow.tensor.transfer %27861 : tensor<4x?x4096xf16>{%dim_25519} to #hal.device.promise<@__device_5>
    %27863 = torch_c.from_builtin_tensor %27862 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27864 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25520 = arith.constant 1 : index
    %dim_25521 = tensor.dim %27864, %c1_25520 : tensor<4x?x4096xf16>
    %27865 = flow.tensor.transfer %27864 : tensor<4x?x4096xf16>{%dim_25521} to #hal.device.promise<@__device_5>
    %27866 = torch_c.from_builtin_tensor %27865 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25522 = torch.constant.int 1
    %27867 = torch.aten.add.Tensor %27848, %27851, %int1_25522 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25523 = torch.constant.int 1
    %27868 = torch.aten.add.Tensor %27867, %27854, %int1_25523 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25524 = torch.constant.int 1
    %27869 = torch.aten.add.Tensor %27868, %27857, %int1_25524 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25525 = torch.constant.int 1
    %27870 = torch.aten.add.Tensor %27869, %27860, %int1_25525 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25526 = torch.constant.int 1
    %27871 = torch.aten.add.Tensor %27870, %27693, %int1_25526 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25527 = torch.constant.int 1
    %27872 = torch.aten.add.Tensor %27871, %27863, %int1_25527 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25528 = torch.constant.int 1
    %27873 = torch.aten.add.Tensor %27872, %27866, %int1_25528 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27874 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25529 = arith.constant 1 : index
    %dim_25530 = tensor.dim %27874, %c1_25529 : tensor<4x?x4096xf16>
    %27875 = flow.tensor.transfer %27874 : tensor<4x?x4096xf16>{%dim_25530} to #hal.device.promise<@__device_6>
    %27876 = torch_c.from_builtin_tensor %27875 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27877 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25531 = arith.constant 1 : index
    %dim_25532 = tensor.dim %27877, %c1_25531 : tensor<4x?x4096xf16>
    %27878 = flow.tensor.transfer %27877 : tensor<4x?x4096xf16>{%dim_25532} to #hal.device.promise<@__device_6>
    %27879 = torch_c.from_builtin_tensor %27878 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27880 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25533 = arith.constant 1 : index
    %dim_25534 = tensor.dim %27880, %c1_25533 : tensor<4x?x4096xf16>
    %27881 = flow.tensor.transfer %27880 : tensor<4x?x4096xf16>{%dim_25534} to #hal.device.promise<@__device_6>
    %27882 = torch_c.from_builtin_tensor %27881 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27883 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25535 = arith.constant 1 : index
    %dim_25536 = tensor.dim %27883, %c1_25535 : tensor<4x?x4096xf16>
    %27884 = flow.tensor.transfer %27883 : tensor<4x?x4096xf16>{%dim_25536} to #hal.device.promise<@__device_6>
    %27885 = torch_c.from_builtin_tensor %27884 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27886 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25537 = arith.constant 1 : index
    %dim_25538 = tensor.dim %27886, %c1_25537 : tensor<4x?x4096xf16>
    %27887 = flow.tensor.transfer %27886 : tensor<4x?x4096xf16>{%dim_25538} to #hal.device.promise<@__device_6>
    %27888 = torch_c.from_builtin_tensor %27887 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27889 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25539 = arith.constant 1 : index
    %dim_25540 = tensor.dim %27889, %c1_25539 : tensor<4x?x4096xf16>
    %27890 = flow.tensor.transfer %27889 : tensor<4x?x4096xf16>{%dim_25540} to #hal.device.promise<@__device_6>
    %27891 = torch_c.from_builtin_tensor %27890 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27892 = torch_c.to_builtin_tensor %27705 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25541 = arith.constant 1 : index
    %dim_25542 = tensor.dim %27892, %c1_25541 : tensor<4x?x4096xf16>
    %27893 = flow.tensor.transfer %27892 : tensor<4x?x4096xf16>{%dim_25542} to #hal.device.promise<@__device_6>
    %27894 = torch_c.from_builtin_tensor %27893 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25543 = torch.constant.int 1
    %27895 = torch.aten.add.Tensor %27876, %27879, %int1_25543 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25544 = torch.constant.int 1
    %27896 = torch.aten.add.Tensor %27895, %27882, %int1_25544 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25545 = torch.constant.int 1
    %27897 = torch.aten.add.Tensor %27896, %27885, %int1_25545 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25546 = torch.constant.int 1
    %27898 = torch.aten.add.Tensor %27897, %27888, %int1_25546 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25547 = torch.constant.int 1
    %27899 = torch.aten.add.Tensor %27898, %27891, %int1_25547 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25548 = torch.constant.int 1
    %27900 = torch.aten.add.Tensor %27899, %27699, %int1_25548 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25549 = torch.constant.int 1
    %27901 = torch.aten.add.Tensor %27900, %27894, %int1_25549 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27902 = torch_c.to_builtin_tensor %27663 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25550 = arith.constant 1 : index
    %dim_25551 = tensor.dim %27902, %c1_25550 : tensor<4x?x4096xf16>
    %27903 = flow.tensor.transfer %27902 : tensor<4x?x4096xf16>{%dim_25551} to #hal.device.promise<@__device_7>
    %27904 = torch_c.from_builtin_tensor %27903 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27905 = torch_c.to_builtin_tensor %27669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25552 = arith.constant 1 : index
    %dim_25553 = tensor.dim %27905, %c1_25552 : tensor<4x?x4096xf16>
    %27906 = flow.tensor.transfer %27905 : tensor<4x?x4096xf16>{%dim_25553} to #hal.device.promise<@__device_7>
    %27907 = torch_c.from_builtin_tensor %27906 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27908 = torch_c.to_builtin_tensor %27675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25554 = arith.constant 1 : index
    %dim_25555 = tensor.dim %27908, %c1_25554 : tensor<4x?x4096xf16>
    %27909 = flow.tensor.transfer %27908 : tensor<4x?x4096xf16>{%dim_25555} to #hal.device.promise<@__device_7>
    %27910 = torch_c.from_builtin_tensor %27909 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27911 = torch_c.to_builtin_tensor %27681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25556 = arith.constant 1 : index
    %dim_25557 = tensor.dim %27911, %c1_25556 : tensor<4x?x4096xf16>
    %27912 = flow.tensor.transfer %27911 : tensor<4x?x4096xf16>{%dim_25557} to #hal.device.promise<@__device_7>
    %27913 = torch_c.from_builtin_tensor %27912 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27914 = torch_c.to_builtin_tensor %27687 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25558 = arith.constant 1 : index
    %dim_25559 = tensor.dim %27914, %c1_25558 : tensor<4x?x4096xf16>
    %27915 = flow.tensor.transfer %27914 : tensor<4x?x4096xf16>{%dim_25559} to #hal.device.promise<@__device_7>
    %27916 = torch_c.from_builtin_tensor %27915 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27917 = torch_c.to_builtin_tensor %27693 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25560 = arith.constant 1 : index
    %dim_25561 = tensor.dim %27917, %c1_25560 : tensor<4x?x4096xf16>
    %27918 = flow.tensor.transfer %27917 : tensor<4x?x4096xf16>{%dim_25561} to #hal.device.promise<@__device_7>
    %27919 = torch_c.from_builtin_tensor %27918 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %27920 = torch_c.to_builtin_tensor %27699 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25562 = arith.constant 1 : index
    %dim_25563 = tensor.dim %27920, %c1_25562 : tensor<4x?x4096xf16>
    %27921 = flow.tensor.transfer %27920 : tensor<4x?x4096xf16>{%dim_25563} to #hal.device.promise<@__device_7>
    %27922 = torch_c.from_builtin_tensor %27921 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25564 = torch.constant.int 1
    %27923 = torch.aten.add.Tensor %27904, %27907, %int1_25564 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25565 = torch.constant.int 1
    %27924 = torch.aten.add.Tensor %27923, %27910, %int1_25565 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25566 = torch.constant.int 1
    %27925 = torch.aten.add.Tensor %27924, %27913, %int1_25566 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25567 = torch.constant.int 1
    %27926 = torch.aten.add.Tensor %27925, %27916, %int1_25567 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25568 = torch.constant.int 1
    %27927 = torch.aten.add.Tensor %27926, %27919, %int1_25568 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25569 = torch.constant.int 1
    %27928 = torch.aten.add.Tensor %27927, %27922, %int1_25569 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25570 = torch.constant.int 1
    %27929 = torch.aten.add.Tensor %27928, %27705, %int1_25570 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25571 = torch.constant.int 1
    %27930 = torch.aten.add.Tensor %26589, %27733, %int1_25571 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25572 = torch.constant.int 1
    %27931 = torch.aten.add.Tensor %26590, %27761, %int1_25572 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25573 = torch.constant.int 1
    %27932 = torch.aten.add.Tensor %26591, %27789, %int1_25573 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25574 = torch.constant.int 1
    %27933 = torch.aten.add.Tensor %26592, %27817, %int1_25574 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25575 = torch.constant.int 1
    %27934 = torch.aten.add.Tensor %26593, %27845, %int1_25575 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25576 = torch.constant.int 1
    %27935 = torch.aten.add.Tensor %26594, %27873, %int1_25576 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25577 = torch.constant.int 1
    %27936 = torch.aten.add.Tensor %26595, %27901, %int1_25577 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25578 = torch.constant.int 1
    %27937 = torch.aten.add.Tensor %26596, %27929, %int1_25578 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %27937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_25579 = torch.constant.int 6
    %27938 = torch.prims.convert_element_type %27930, %int6_25579 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25580 = torch.constant.int 6
    %27939 = torch.prims.convert_element_type %27931, %int6_25580 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25581 = torch.constant.int 6
    %27940 = torch.prims.convert_element_type %27932, %int6_25581 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25582 = torch.constant.int 6
    %27941 = torch.prims.convert_element_type %27933, %int6_25582 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25583 = torch.constant.int 6
    %27942 = torch.prims.convert_element_type %27934, %int6_25583 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25584 = torch.constant.int 6
    %27943 = torch.prims.convert_element_type %27935, %int6_25584 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25585 = torch.constant.int 6
    %27944 = torch.prims.convert_element_type %27936, %int6_25585 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25586 = torch.constant.int 6
    %27945 = torch.prims.convert_element_type %27937, %int6_25586 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25587 = torch.constant.int 2
    %27946 = torch.aten.pow.Tensor_Scalar %27938, %int2_25587 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25588 = torch.constant.int 2
    %27947 = torch.aten.pow.Tensor_Scalar %27939, %int2_25588 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25589 = torch.constant.int 2
    %27948 = torch.aten.pow.Tensor_Scalar %27940, %int2_25589 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25590 = torch.constant.int 2
    %27949 = torch.aten.pow.Tensor_Scalar %27941, %int2_25590 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25591 = torch.constant.int 2
    %27950 = torch.aten.pow.Tensor_Scalar %27942, %int2_25591 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25592 = torch.constant.int 2
    %27951 = torch.aten.pow.Tensor_Scalar %27943, %int2_25592 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25593 = torch.constant.int 2
    %27952 = torch.aten.pow.Tensor_Scalar %27944, %int2_25593 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25594 = torch.constant.int 2
    %27953 = torch.aten.pow.Tensor_Scalar %27945, %int2_25594 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_25595 = torch.constant.int -1
    %27954 = torch.prim.ListConstruct %int-1_25595 : (!torch.int) -> !torch.list<int>
    %true_25596 = torch.constant.bool true
    %none_25597 = torch.constant.none
    %27955 = torch.aten.mean.dim %27946, %27954, %true_25596, %none_25597 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25598 = torch.constant.int -1
    %27956 = torch.prim.ListConstruct %int-1_25598 : (!torch.int) -> !torch.list<int>
    %true_25599 = torch.constant.bool true
    %none_25600 = torch.constant.none
    %27957 = torch.aten.mean.dim %27947, %27956, %true_25599, %none_25600 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25601 = torch.constant.int -1
    %27958 = torch.prim.ListConstruct %int-1_25601 : (!torch.int) -> !torch.list<int>
    %true_25602 = torch.constant.bool true
    %none_25603 = torch.constant.none
    %27959 = torch.aten.mean.dim %27948, %27958, %true_25602, %none_25603 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25604 = torch.constant.int -1
    %27960 = torch.prim.ListConstruct %int-1_25604 : (!torch.int) -> !torch.list<int>
    %true_25605 = torch.constant.bool true
    %none_25606 = torch.constant.none
    %27961 = torch.aten.mean.dim %27949, %27960, %true_25605, %none_25606 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25607 = torch.constant.int -1
    %27962 = torch.prim.ListConstruct %int-1_25607 : (!torch.int) -> !torch.list<int>
    %true_25608 = torch.constant.bool true
    %none_25609 = torch.constant.none
    %27963 = torch.aten.mean.dim %27950, %27962, %true_25608, %none_25609 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25610 = torch.constant.int -1
    %27964 = torch.prim.ListConstruct %int-1_25610 : (!torch.int) -> !torch.list<int>
    %true_25611 = torch.constant.bool true
    %none_25612 = torch.constant.none
    %27965 = torch.aten.mean.dim %27951, %27964, %true_25611, %none_25612 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25613 = torch.constant.int -1
    %27966 = torch.prim.ListConstruct %int-1_25613 : (!torch.int) -> !torch.list<int>
    %true_25614 = torch.constant.bool true
    %none_25615 = torch.constant.none
    %27967 = torch.aten.mean.dim %27952, %27966, %true_25614, %none_25615 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25616 = torch.constant.int -1
    %27968 = torch.prim.ListConstruct %int-1_25616 : (!torch.int) -> !torch.list<int>
    %true_25617 = torch.constant.bool true
    %none_25618 = torch.constant.none
    %27969 = torch.aten.mean.dim %27953, %27968, %true_25617, %none_25618 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25619 = torch.constant.float 9.9999997473787516E-6
    %int1_25620 = torch.constant.int 1
    %27970 = torch.aten.add.Scalar %27955, %float9.999990e-06_25619, %int1_25620 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25621 = torch.constant.float 9.9999997473787516E-6
    %int1_25622 = torch.constant.int 1
    %27971 = torch.aten.add.Scalar %27957, %float9.999990e-06_25621, %int1_25622 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25623 = torch.constant.float 9.9999997473787516E-6
    %int1_25624 = torch.constant.int 1
    %27972 = torch.aten.add.Scalar %27959, %float9.999990e-06_25623, %int1_25624 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25625 = torch.constant.float 9.9999997473787516E-6
    %int1_25626 = torch.constant.int 1
    %27973 = torch.aten.add.Scalar %27961, %float9.999990e-06_25625, %int1_25626 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25627 = torch.constant.float 9.9999997473787516E-6
    %int1_25628 = torch.constant.int 1
    %27974 = torch.aten.add.Scalar %27963, %float9.999990e-06_25627, %int1_25628 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25629 = torch.constant.float 9.9999997473787516E-6
    %int1_25630 = torch.constant.int 1
    %27975 = torch.aten.add.Scalar %27965, %float9.999990e-06_25629, %int1_25630 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25631 = torch.constant.float 9.9999997473787516E-6
    %int1_25632 = torch.constant.int 1
    %27976 = torch.aten.add.Scalar %27967, %float9.999990e-06_25631, %int1_25632 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_25633 = torch.constant.float 9.9999997473787516E-6
    %int1_25634 = torch.constant.int 1
    %27977 = torch.aten.add.Scalar %27969, %float9.999990e-06_25633, %int1_25634 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27978 = torch.aten.rsqrt %27970 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27979 = torch.aten.rsqrt %27971 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27980 = torch.aten.rsqrt %27972 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27981 = torch.aten.rsqrt %27973 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27982 = torch.aten.rsqrt %27974 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27983 = torch.aten.rsqrt %27975 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27984 = torch.aten.rsqrt %27976 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27985 = torch.aten.rsqrt %27977 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %27985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %27986 = torch.aten.mul.Tensor %27938, %27978 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27987 = torch.aten.mul.Tensor %27939, %27979 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27988 = torch.aten.mul.Tensor %27940, %27980 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27989 = torch.aten.mul.Tensor %27941, %27981 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27990 = torch.aten.mul.Tensor %27942, %27982 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27991 = torch.aten.mul.Tensor %27943, %27983 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27992 = torch.aten.mul.Tensor %27944, %27984 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27993 = torch.aten.mul.Tensor %27945, %27985 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27994 = torch.aten.mul.Tensor %984, %27986 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27995 = torch.aten.mul.Tensor %985, %27987 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27996 = torch.aten.mul.Tensor %986, %27988 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27997 = torch.aten.mul.Tensor %987, %27989 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27998 = torch.aten.mul.Tensor %988, %27990 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %27999 = torch.aten.mul.Tensor %989, %27991 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %27999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28000 = torch.aten.mul.Tensor %990, %27992 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28001 = torch.aten.mul.Tensor %991, %27993 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_25635 = torch.constant.int 5
    %28002 = torch.prims.convert_element_type %27994, %int5_25635 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25636 = torch.constant.int 5
    %28003 = torch.prims.convert_element_type %27995, %int5_25636 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25637 = torch.constant.int 5
    %28004 = torch.prims.convert_element_type %27996, %int5_25637 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25638 = torch.constant.int 5
    %28005 = torch.prims.convert_element_type %27997, %int5_25638 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25639 = torch.constant.int 5
    %28006 = torch.prims.convert_element_type %27998, %int5_25639 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25640 = torch.constant.int 5
    %28007 = torch.prims.convert_element_type %27999, %int5_25640 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25641 = torch.constant.int 5
    %28008 = torch.prims.convert_element_type %28000, %int5_25641 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_25642 = torch.constant.int 5
    %28009 = torch.prims.convert_element_type %28001, %int5_25642 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25643 = torch.constant.int 1
    %int0_25644 = torch.constant.int 0
    %28010 = torch.prim.ListConstruct %int1_25643, %int0_25644 : (!torch.int, !torch.int) -> !torch.list<int>
    %28011 = torch.aten.permute %992, %28010 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25645 = torch.constant.int 1
    %int0_25646 = torch.constant.int 0
    %28012 = torch.prim.ListConstruct %int1_25645, %int0_25646 : (!torch.int, !torch.int) -> !torch.list<int>
    %28013 = torch.aten.permute %993, %28012 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25647 = torch.constant.int 1
    %int0_25648 = torch.constant.int 0
    %28014 = torch.prim.ListConstruct %int1_25647, %int0_25648 : (!torch.int, !torch.int) -> !torch.list<int>
    %28015 = torch.aten.permute %994, %28014 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25649 = torch.constant.int 1
    %int0_25650 = torch.constant.int 0
    %28016 = torch.prim.ListConstruct %int1_25649, %int0_25650 : (!torch.int, !torch.int) -> !torch.list<int>
    %28017 = torch.aten.permute %995, %28016 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25651 = torch.constant.int 1
    %int0_25652 = torch.constant.int 0
    %28018 = torch.prim.ListConstruct %int1_25651, %int0_25652 : (!torch.int, !torch.int) -> !torch.list<int>
    %28019 = torch.aten.permute %996, %28018 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25653 = torch.constant.int 1
    %int0_25654 = torch.constant.int 0
    %28020 = torch.prim.ListConstruct %int1_25653, %int0_25654 : (!torch.int, !torch.int) -> !torch.list<int>
    %28021 = torch.aten.permute %997, %28020 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25655 = torch.constant.int 1
    %int0_25656 = torch.constant.int 0
    %28022 = torch.prim.ListConstruct %int1_25655, %int0_25656 : (!torch.int, !torch.int) -> !torch.list<int>
    %28023 = torch.aten.permute %998, %28022 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25657 = torch.constant.int 1
    %int0_25658 = torch.constant.int 0
    %28024 = torch.prim.ListConstruct %int1_25657, %int0_25658 : (!torch.int, !torch.int) -> !torch.list<int>
    %28025 = torch.aten.permute %999, %28024 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_25659 = torch.constant.int 4
    %28026 = torch.aten.mul.int %int4_25659, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25660 = torch.constant.int 4096
    %28027 = torch.prim.ListConstruct %28026, %int4096_25660 : (!torch.int, !torch.int) -> !torch.list<int>
    %28028 = torch.aten.view %28002, %28027 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28028, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28029 = torch.aten.mm %28028, %28011 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28029, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25661 = torch.constant.int 4
    %int1792_25662 = torch.constant.int 1792
    %28030 = torch.prim.ListConstruct %int4_25661, %2482, %int1792_25662 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28031 = torch.aten.view %28029, %28030 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25663 = torch.constant.int 4
    %28032 = torch.aten.mul.int %int4_25663, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25664 = torch.constant.int 4096
    %28033 = torch.prim.ListConstruct %28032, %int4096_25664 : (!torch.int, !torch.int) -> !torch.list<int>
    %28034 = torch.aten.view %28003, %28033 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28034, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28035 = torch.aten.mm %28034, %28013 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28035, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25665 = torch.constant.int 4
    %int1792_25666 = torch.constant.int 1792
    %28036 = torch.prim.ListConstruct %int4_25665, %2482, %int1792_25666 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28037 = torch.aten.view %28035, %28036 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25667 = torch.constant.int 4
    %28038 = torch.aten.mul.int %int4_25667, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25668 = torch.constant.int 4096
    %28039 = torch.prim.ListConstruct %28038, %int4096_25668 : (!torch.int, !torch.int) -> !torch.list<int>
    %28040 = torch.aten.view %28004, %28039 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28040, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28041 = torch.aten.mm %28040, %28015 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28041, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25669 = torch.constant.int 4
    %int1792_25670 = torch.constant.int 1792
    %28042 = torch.prim.ListConstruct %int4_25669, %2482, %int1792_25670 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28043 = torch.aten.view %28041, %28042 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25671 = torch.constant.int 4
    %28044 = torch.aten.mul.int %int4_25671, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25672 = torch.constant.int 4096
    %28045 = torch.prim.ListConstruct %28044, %int4096_25672 : (!torch.int, !torch.int) -> !torch.list<int>
    %28046 = torch.aten.view %28005, %28045 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28046, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28047 = torch.aten.mm %28046, %28017 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28047, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25673 = torch.constant.int 4
    %int1792_25674 = torch.constant.int 1792
    %28048 = torch.prim.ListConstruct %int4_25673, %2482, %int1792_25674 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28049 = torch.aten.view %28047, %28048 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25675 = torch.constant.int 4
    %28050 = torch.aten.mul.int %int4_25675, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25676 = torch.constant.int 4096
    %28051 = torch.prim.ListConstruct %28050, %int4096_25676 : (!torch.int, !torch.int) -> !torch.list<int>
    %28052 = torch.aten.view %28006, %28051 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28052, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28053 = torch.aten.mm %28052, %28019 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28053, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25677 = torch.constant.int 4
    %int1792_25678 = torch.constant.int 1792
    %28054 = torch.prim.ListConstruct %int4_25677, %2482, %int1792_25678 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28055 = torch.aten.view %28053, %28054 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25679 = torch.constant.int 4
    %28056 = torch.aten.mul.int %int4_25679, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25680 = torch.constant.int 4096
    %28057 = torch.prim.ListConstruct %28056, %int4096_25680 : (!torch.int, !torch.int) -> !torch.list<int>
    %28058 = torch.aten.view %28007, %28057 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28058, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28059 = torch.aten.mm %28058, %28021 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28059, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25681 = torch.constant.int 4
    %int1792_25682 = torch.constant.int 1792
    %28060 = torch.prim.ListConstruct %int4_25681, %2482, %int1792_25682 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28061 = torch.aten.view %28059, %28060 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25683 = torch.constant.int 4
    %28062 = torch.aten.mul.int %int4_25683, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25684 = torch.constant.int 4096
    %28063 = torch.prim.ListConstruct %28062, %int4096_25684 : (!torch.int, !torch.int) -> !torch.list<int>
    %28064 = torch.aten.view %28008, %28063 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28064, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28065 = torch.aten.mm %28064, %28023 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28065, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25685 = torch.constant.int 4
    %int1792_25686 = torch.constant.int 1792
    %28066 = torch.prim.ListConstruct %int4_25685, %2482, %int1792_25686 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28067 = torch.aten.view %28065, %28066 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25687 = torch.constant.int 4
    %28068 = torch.aten.mul.int %int4_25687, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25688 = torch.constant.int 4096
    %28069 = torch.prim.ListConstruct %28068, %int4096_25688 : (!torch.int, !torch.int) -> !torch.list<int>
    %28070 = torch.aten.view %28009, %28069 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28070, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28071 = torch.aten.mm %28070, %28025 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28071, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25689 = torch.constant.int 4
    %int1792_25690 = torch.constant.int 1792
    %28072 = torch.prim.ListConstruct %int4_25689, %2482, %int1792_25690 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28073 = torch.aten.view %28071, %28072 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28074 = torch.aten.silu %28031 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28075 = torch.aten.silu %28037 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28076 = torch.aten.silu %28043 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28077 = torch.aten.silu %28049 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28078 = torch.aten.silu %28055 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28079 = torch.aten.silu %28061 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28080 = torch.aten.silu %28067 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28081 = torch.aten.silu %28073 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_25691 = torch.constant.int 1
    %int0_25692 = torch.constant.int 0
    %28082 = torch.prim.ListConstruct %int1_25691, %int0_25692 : (!torch.int, !torch.int) -> !torch.list<int>
    %28083 = torch.aten.permute %1000, %28082 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25693 = torch.constant.int 1
    %int0_25694 = torch.constant.int 0
    %28084 = torch.prim.ListConstruct %int1_25693, %int0_25694 : (!torch.int, !torch.int) -> !torch.list<int>
    %28085 = torch.aten.permute %1001, %28084 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25695 = torch.constant.int 1
    %int0_25696 = torch.constant.int 0
    %28086 = torch.prim.ListConstruct %int1_25695, %int0_25696 : (!torch.int, !torch.int) -> !torch.list<int>
    %28087 = torch.aten.permute %1002, %28086 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25697 = torch.constant.int 1
    %int0_25698 = torch.constant.int 0
    %28088 = torch.prim.ListConstruct %int1_25697, %int0_25698 : (!torch.int, !torch.int) -> !torch.list<int>
    %28089 = torch.aten.permute %1003, %28088 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25699 = torch.constant.int 1
    %int0_25700 = torch.constant.int 0
    %28090 = torch.prim.ListConstruct %int1_25699, %int0_25700 : (!torch.int, !torch.int) -> !torch.list<int>
    %28091 = torch.aten.permute %1004, %28090 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25701 = torch.constant.int 1
    %int0_25702 = torch.constant.int 0
    %28092 = torch.prim.ListConstruct %int1_25701, %int0_25702 : (!torch.int, !torch.int) -> !torch.list<int>
    %28093 = torch.aten.permute %1005, %28092 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25703 = torch.constant.int 1
    %int0_25704 = torch.constant.int 0
    %28094 = torch.prim.ListConstruct %int1_25703, %int0_25704 : (!torch.int, !torch.int) -> !torch.list<int>
    %28095 = torch.aten.permute %1006, %28094 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_25705 = torch.constant.int 1
    %int0_25706 = torch.constant.int 0
    %28096 = torch.prim.ListConstruct %int1_25705, %int0_25706 : (!torch.int, !torch.int) -> !torch.list<int>
    %28097 = torch.aten.permute %1007, %28096 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_25707 = torch.constant.int 4
    %28098 = torch.aten.mul.int %int4_25707, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25708 = torch.constant.int 4096
    %28099 = torch.prim.ListConstruct %28098, %int4096_25708 : (!torch.int, !torch.int) -> !torch.list<int>
    %28100 = torch.aten.view %28002, %28099 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28100, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28101 = torch.aten.mm %28100, %28083 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28101, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25709 = torch.constant.int 4
    %int1792_25710 = torch.constant.int 1792
    %28102 = torch.prim.ListConstruct %int4_25709, %2482, %int1792_25710 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28103 = torch.aten.view %28101, %28102 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25711 = torch.constant.int 4
    %28104 = torch.aten.mul.int %int4_25711, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25712 = torch.constant.int 4096
    %28105 = torch.prim.ListConstruct %28104, %int4096_25712 : (!torch.int, !torch.int) -> !torch.list<int>
    %28106 = torch.aten.view %28003, %28105 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28106, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28107 = torch.aten.mm %28106, %28085 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28107, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25713 = torch.constant.int 4
    %int1792_25714 = torch.constant.int 1792
    %28108 = torch.prim.ListConstruct %int4_25713, %2482, %int1792_25714 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28109 = torch.aten.view %28107, %28108 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25715 = torch.constant.int 4
    %28110 = torch.aten.mul.int %int4_25715, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25716 = torch.constant.int 4096
    %28111 = torch.prim.ListConstruct %28110, %int4096_25716 : (!torch.int, !torch.int) -> !torch.list<int>
    %28112 = torch.aten.view %28004, %28111 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28112, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28113 = torch.aten.mm %28112, %28087 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28113, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25717 = torch.constant.int 4
    %int1792_25718 = torch.constant.int 1792
    %28114 = torch.prim.ListConstruct %int4_25717, %2482, %int1792_25718 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28115 = torch.aten.view %28113, %28114 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25719 = torch.constant.int 4
    %28116 = torch.aten.mul.int %int4_25719, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25720 = torch.constant.int 4096
    %28117 = torch.prim.ListConstruct %28116, %int4096_25720 : (!torch.int, !torch.int) -> !torch.list<int>
    %28118 = torch.aten.view %28005, %28117 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28118, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28119 = torch.aten.mm %28118, %28089 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28119, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25721 = torch.constant.int 4
    %int1792_25722 = torch.constant.int 1792
    %28120 = torch.prim.ListConstruct %int4_25721, %2482, %int1792_25722 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28121 = torch.aten.view %28119, %28120 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25723 = torch.constant.int 4
    %28122 = torch.aten.mul.int %int4_25723, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25724 = torch.constant.int 4096
    %28123 = torch.prim.ListConstruct %28122, %int4096_25724 : (!torch.int, !torch.int) -> !torch.list<int>
    %28124 = torch.aten.view %28006, %28123 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28124, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28125 = torch.aten.mm %28124, %28091 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28125, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25725 = torch.constant.int 4
    %int1792_25726 = torch.constant.int 1792
    %28126 = torch.prim.ListConstruct %int4_25725, %2482, %int1792_25726 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28127 = torch.aten.view %28125, %28126 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25727 = torch.constant.int 4
    %28128 = torch.aten.mul.int %int4_25727, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25728 = torch.constant.int 4096
    %28129 = torch.prim.ListConstruct %28128, %int4096_25728 : (!torch.int, !torch.int) -> !torch.list<int>
    %28130 = torch.aten.view %28007, %28129 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28130, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28131 = torch.aten.mm %28130, %28093 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28131, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25729 = torch.constant.int 4
    %int1792_25730 = torch.constant.int 1792
    %28132 = torch.prim.ListConstruct %int4_25729, %2482, %int1792_25730 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28133 = torch.aten.view %28131, %28132 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25731 = torch.constant.int 4
    %28134 = torch.aten.mul.int %int4_25731, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25732 = torch.constant.int 4096
    %28135 = torch.prim.ListConstruct %28134, %int4096_25732 : (!torch.int, !torch.int) -> !torch.list<int>
    %28136 = torch.aten.view %28008, %28135 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28136, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28137 = torch.aten.mm %28136, %28095 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28137, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25733 = torch.constant.int 4
    %int1792_25734 = torch.constant.int 1792
    %28138 = torch.prim.ListConstruct %int4_25733, %2482, %int1792_25734 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28139 = torch.aten.view %28137, %28138 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_25735 = torch.constant.int 4
    %28140 = torch.aten.mul.int %int4_25735, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_25736 = torch.constant.int 4096
    %28141 = torch.prim.ListConstruct %28140, %int4096_25736 : (!torch.int, !torch.int) -> !torch.list<int>
    %28142 = torch.aten.view %28009, %28141 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28142, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28143 = torch.aten.mm %28142, %28097 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28143, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_25737 = torch.constant.int 4
    %int1792_25738 = torch.constant.int 1792
    %28144 = torch.prim.ListConstruct %int4_25737, %2482, %int1792_25738 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28145 = torch.aten.view %28143, %28144 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28146 = torch.aten.mul.Tensor %28074, %28103 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28147 = torch.aten.mul.Tensor %28075, %28109 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28148 = torch.aten.mul.Tensor %28076, %28115 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28149 = torch.aten.mul.Tensor %28077, %28121 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28150 = torch.aten.mul.Tensor %28078, %28127 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28151 = torch.aten.mul.Tensor %28079, %28133 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28152 = torch.aten.mul.Tensor %28080, %28139 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %28153 = torch.aten.mul.Tensor %28081, %28145 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %28153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_25739 = torch.constant.int 1
    %int0_25740 = torch.constant.int 0
    %28154 = torch.prim.ListConstruct %int1_25739, %int0_25740 : (!torch.int, !torch.int) -> !torch.list<int>
    %28155 = torch.aten.permute %1008, %28154 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25741 = torch.constant.int 1
    %int0_25742 = torch.constant.int 0
    %28156 = torch.prim.ListConstruct %int1_25741, %int0_25742 : (!torch.int, !torch.int) -> !torch.list<int>
    %28157 = torch.aten.permute %1009, %28156 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25743 = torch.constant.int 1
    %int0_25744 = torch.constant.int 0
    %28158 = torch.prim.ListConstruct %int1_25743, %int0_25744 : (!torch.int, !torch.int) -> !torch.list<int>
    %28159 = torch.aten.permute %1010, %28158 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25745 = torch.constant.int 1
    %int0_25746 = torch.constant.int 0
    %28160 = torch.prim.ListConstruct %int1_25745, %int0_25746 : (!torch.int, !torch.int) -> !torch.list<int>
    %28161 = torch.aten.permute %1011, %28160 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25747 = torch.constant.int 1
    %int0_25748 = torch.constant.int 0
    %28162 = torch.prim.ListConstruct %int1_25747, %int0_25748 : (!torch.int, !torch.int) -> !torch.list<int>
    %28163 = torch.aten.permute %1012, %28162 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25749 = torch.constant.int 1
    %int0_25750 = torch.constant.int 0
    %28164 = torch.prim.ListConstruct %int1_25749, %int0_25750 : (!torch.int, !torch.int) -> !torch.list<int>
    %28165 = torch.aten.permute %1013, %28164 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25751 = torch.constant.int 1
    %int0_25752 = torch.constant.int 0
    %28166 = torch.prim.ListConstruct %int1_25751, %int0_25752 : (!torch.int, !torch.int) -> !torch.list<int>
    %28167 = torch.aten.permute %1014, %28166 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25753 = torch.constant.int 1
    %int0_25754 = torch.constant.int 0
    %28168 = torch.prim.ListConstruct %int1_25753, %int0_25754 : (!torch.int, !torch.int) -> !torch.list<int>
    %28169 = torch.aten.permute %1015, %28168 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_25755 = torch.constant.int 1
    %28170 = torch.aten.size.int %28031, %int1_25755 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25756 = torch.constant.int 4
    %28171 = torch.aten.mul.int %int4_25756, %28170 : !torch.int, !torch.int -> !torch.int
    %int1792_25757 = torch.constant.int 1792
    %28172 = torch.prim.ListConstruct %28171, %int1792_25757 : (!torch.int, !torch.int) -> !torch.list<int>
    %28173 = torch.aten.view %28146, %28172 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28173, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28174 = torch.aten.mm %28173, %28155 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28174, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25758 = torch.constant.int 4
    %int4096_25759 = torch.constant.int 4096
    %28175 = torch.prim.ListConstruct %int4_25758, %28170, %int4096_25759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28176 = torch.aten.view %28174, %28175 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25760 = torch.constant.int 1
    %28177 = torch.aten.size.int %28037, %int1_25760 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25761 = torch.constant.int 4
    %28178 = torch.aten.mul.int %int4_25761, %28177 : !torch.int, !torch.int -> !torch.int
    %int1792_25762 = torch.constant.int 1792
    %28179 = torch.prim.ListConstruct %28178, %int1792_25762 : (!torch.int, !torch.int) -> !torch.list<int>
    %28180 = torch.aten.view %28147, %28179 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28180, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28181 = torch.aten.mm %28180, %28157 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28181, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25763 = torch.constant.int 4
    %int4096_25764 = torch.constant.int 4096
    %28182 = torch.prim.ListConstruct %int4_25763, %28177, %int4096_25764 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28183 = torch.aten.view %28181, %28182 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25765 = torch.constant.int 1
    %28184 = torch.aten.size.int %28043, %int1_25765 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25766 = torch.constant.int 4
    %28185 = torch.aten.mul.int %int4_25766, %28184 : !torch.int, !torch.int -> !torch.int
    %int1792_25767 = torch.constant.int 1792
    %28186 = torch.prim.ListConstruct %28185, %int1792_25767 : (!torch.int, !torch.int) -> !torch.list<int>
    %28187 = torch.aten.view %28148, %28186 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28187, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28188 = torch.aten.mm %28187, %28159 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28188, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25768 = torch.constant.int 4
    %int4096_25769 = torch.constant.int 4096
    %28189 = torch.prim.ListConstruct %int4_25768, %28184, %int4096_25769 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28190 = torch.aten.view %28188, %28189 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25770 = torch.constant.int 1
    %28191 = torch.aten.size.int %28049, %int1_25770 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25771 = torch.constant.int 4
    %28192 = torch.aten.mul.int %int4_25771, %28191 : !torch.int, !torch.int -> !torch.int
    %int1792_25772 = torch.constant.int 1792
    %28193 = torch.prim.ListConstruct %28192, %int1792_25772 : (!torch.int, !torch.int) -> !torch.list<int>
    %28194 = torch.aten.view %28149, %28193 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28194, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28195 = torch.aten.mm %28194, %28161 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28195, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25773 = torch.constant.int 4
    %int4096_25774 = torch.constant.int 4096
    %28196 = torch.prim.ListConstruct %int4_25773, %28191, %int4096_25774 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28197 = torch.aten.view %28195, %28196 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25775 = torch.constant.int 1
    %28198 = torch.aten.size.int %28055, %int1_25775 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25776 = torch.constant.int 4
    %28199 = torch.aten.mul.int %int4_25776, %28198 : !torch.int, !torch.int -> !torch.int
    %int1792_25777 = torch.constant.int 1792
    %28200 = torch.prim.ListConstruct %28199, %int1792_25777 : (!torch.int, !torch.int) -> !torch.list<int>
    %28201 = torch.aten.view %28150, %28200 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28201, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28202 = torch.aten.mm %28201, %28163 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28202, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25778 = torch.constant.int 4
    %int4096_25779 = torch.constant.int 4096
    %28203 = torch.prim.ListConstruct %int4_25778, %28198, %int4096_25779 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28204 = torch.aten.view %28202, %28203 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25780 = torch.constant.int 1
    %28205 = torch.aten.size.int %28061, %int1_25780 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25781 = torch.constant.int 4
    %28206 = torch.aten.mul.int %int4_25781, %28205 : !torch.int, !torch.int -> !torch.int
    %int1792_25782 = torch.constant.int 1792
    %28207 = torch.prim.ListConstruct %28206, %int1792_25782 : (!torch.int, !torch.int) -> !torch.list<int>
    %28208 = torch.aten.view %28151, %28207 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28208, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28209 = torch.aten.mm %28208, %28165 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28209, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25783 = torch.constant.int 4
    %int4096_25784 = torch.constant.int 4096
    %28210 = torch.prim.ListConstruct %int4_25783, %28205, %int4096_25784 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28211 = torch.aten.view %28209, %28210 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25785 = torch.constant.int 1
    %28212 = torch.aten.size.int %28067, %int1_25785 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25786 = torch.constant.int 4
    %28213 = torch.aten.mul.int %int4_25786, %28212 : !torch.int, !torch.int -> !torch.int
    %int1792_25787 = torch.constant.int 1792
    %28214 = torch.prim.ListConstruct %28213, %int1792_25787 : (!torch.int, !torch.int) -> !torch.list<int>
    %28215 = torch.aten.view %28152, %28214 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28215, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28216 = torch.aten.mm %28215, %28167 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28216, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25788 = torch.constant.int 4
    %int4096_25789 = torch.constant.int 4096
    %28217 = torch.prim.ListConstruct %int4_25788, %28212, %int4096_25789 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28218 = torch.aten.view %28216, %28217 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25790 = torch.constant.int 1
    %28219 = torch.aten.size.int %28073, %int1_25790 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_25791 = torch.constant.int 4
    %28220 = torch.aten.mul.int %int4_25791, %28219 : !torch.int, !torch.int -> !torch.int
    %int1792_25792 = torch.constant.int 1792
    %28221 = torch.prim.ListConstruct %28220, %int1792_25792 : (!torch.int, !torch.int) -> !torch.list<int>
    %28222 = torch.aten.view %28153, %28221 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %28222, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %28223 = torch.aten.mm %28222, %28169 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28223, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_25793 = torch.constant.int 4
    %int4096_25794 = torch.constant.int 4096
    %28224 = torch.prim.ListConstruct %int4_25793, %28219, %int4096_25794 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28225 = torch.aten.view %28223, %28224 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28226 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25795 = arith.constant 1 : index
    %dim_25796 = tensor.dim %28226, %c1_25795 : tensor<4x?x4096xf16>
    %28227 = flow.tensor.transfer %28226 : tensor<4x?x4096xf16>{%dim_25796} to #hal.device.promise<@__device_0>
    %28228 = torch_c.from_builtin_tensor %28227 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28229 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25797 = arith.constant 1 : index
    %dim_25798 = tensor.dim %28229, %c1_25797 : tensor<4x?x4096xf16>
    %28230 = flow.tensor.transfer %28229 : tensor<4x?x4096xf16>{%dim_25798} to #hal.device.promise<@__device_0>
    %28231 = torch_c.from_builtin_tensor %28230 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28232 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25799 = arith.constant 1 : index
    %dim_25800 = tensor.dim %28232, %c1_25799 : tensor<4x?x4096xf16>
    %28233 = flow.tensor.transfer %28232 : tensor<4x?x4096xf16>{%dim_25800} to #hal.device.promise<@__device_0>
    %28234 = torch_c.from_builtin_tensor %28233 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28235 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25801 = arith.constant 1 : index
    %dim_25802 = tensor.dim %28235, %c1_25801 : tensor<4x?x4096xf16>
    %28236 = flow.tensor.transfer %28235 : tensor<4x?x4096xf16>{%dim_25802} to #hal.device.promise<@__device_0>
    %28237 = torch_c.from_builtin_tensor %28236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28238 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25803 = arith.constant 1 : index
    %dim_25804 = tensor.dim %28238, %c1_25803 : tensor<4x?x4096xf16>
    %28239 = flow.tensor.transfer %28238 : tensor<4x?x4096xf16>{%dim_25804} to #hal.device.promise<@__device_0>
    %28240 = torch_c.from_builtin_tensor %28239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28241 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25805 = arith.constant 1 : index
    %dim_25806 = tensor.dim %28241, %c1_25805 : tensor<4x?x4096xf16>
    %28242 = flow.tensor.transfer %28241 : tensor<4x?x4096xf16>{%dim_25806} to #hal.device.promise<@__device_0>
    %28243 = torch_c.from_builtin_tensor %28242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28244 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25807 = arith.constant 1 : index
    %dim_25808 = tensor.dim %28244, %c1_25807 : tensor<4x?x4096xf16>
    %28245 = flow.tensor.transfer %28244 : tensor<4x?x4096xf16>{%dim_25808} to #hal.device.promise<@__device_0>
    %28246 = torch_c.from_builtin_tensor %28245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25809 = torch.constant.int 1
    %28247 = torch.aten.add.Tensor %28176, %28228, %int1_25809 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25810 = torch.constant.int 1
    %28248 = torch.aten.add.Tensor %28247, %28231, %int1_25810 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25811 = torch.constant.int 1
    %28249 = torch.aten.add.Tensor %28248, %28234, %int1_25811 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25812 = torch.constant.int 1
    %28250 = torch.aten.add.Tensor %28249, %28237, %int1_25812 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25813 = torch.constant.int 1
    %28251 = torch.aten.add.Tensor %28250, %28240, %int1_25813 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25814 = torch.constant.int 1
    %28252 = torch.aten.add.Tensor %28251, %28243, %int1_25814 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25815 = torch.constant.int 1
    %28253 = torch.aten.add.Tensor %28252, %28246, %int1_25815 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28254 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25816 = arith.constant 1 : index
    %dim_25817 = tensor.dim %28254, %c1_25816 : tensor<4x?x4096xf16>
    %28255 = flow.tensor.transfer %28254 : tensor<4x?x4096xf16>{%dim_25817} to #hal.device.promise<@__device_1>
    %28256 = torch_c.from_builtin_tensor %28255 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28257 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25818 = arith.constant 1 : index
    %dim_25819 = tensor.dim %28257, %c1_25818 : tensor<4x?x4096xf16>
    %28258 = flow.tensor.transfer %28257 : tensor<4x?x4096xf16>{%dim_25819} to #hal.device.promise<@__device_1>
    %28259 = torch_c.from_builtin_tensor %28258 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28260 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25820 = arith.constant 1 : index
    %dim_25821 = tensor.dim %28260, %c1_25820 : tensor<4x?x4096xf16>
    %28261 = flow.tensor.transfer %28260 : tensor<4x?x4096xf16>{%dim_25821} to #hal.device.promise<@__device_1>
    %28262 = torch_c.from_builtin_tensor %28261 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28263 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25822 = arith.constant 1 : index
    %dim_25823 = tensor.dim %28263, %c1_25822 : tensor<4x?x4096xf16>
    %28264 = flow.tensor.transfer %28263 : tensor<4x?x4096xf16>{%dim_25823} to #hal.device.promise<@__device_1>
    %28265 = torch_c.from_builtin_tensor %28264 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28266 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25824 = arith.constant 1 : index
    %dim_25825 = tensor.dim %28266, %c1_25824 : tensor<4x?x4096xf16>
    %28267 = flow.tensor.transfer %28266 : tensor<4x?x4096xf16>{%dim_25825} to #hal.device.promise<@__device_1>
    %28268 = torch_c.from_builtin_tensor %28267 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28269 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25826 = arith.constant 1 : index
    %dim_25827 = tensor.dim %28269, %c1_25826 : tensor<4x?x4096xf16>
    %28270 = flow.tensor.transfer %28269 : tensor<4x?x4096xf16>{%dim_25827} to #hal.device.promise<@__device_1>
    %28271 = torch_c.from_builtin_tensor %28270 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28272 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25828 = arith.constant 1 : index
    %dim_25829 = tensor.dim %28272, %c1_25828 : tensor<4x?x4096xf16>
    %28273 = flow.tensor.transfer %28272 : tensor<4x?x4096xf16>{%dim_25829} to #hal.device.promise<@__device_1>
    %28274 = torch_c.from_builtin_tensor %28273 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25830 = torch.constant.int 1
    %28275 = torch.aten.add.Tensor %28256, %28183, %int1_25830 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25831 = torch.constant.int 1
    %28276 = torch.aten.add.Tensor %28275, %28259, %int1_25831 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25832 = torch.constant.int 1
    %28277 = torch.aten.add.Tensor %28276, %28262, %int1_25832 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25833 = torch.constant.int 1
    %28278 = torch.aten.add.Tensor %28277, %28265, %int1_25833 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25834 = torch.constant.int 1
    %28279 = torch.aten.add.Tensor %28278, %28268, %int1_25834 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25835 = torch.constant.int 1
    %28280 = torch.aten.add.Tensor %28279, %28271, %int1_25835 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25836 = torch.constant.int 1
    %28281 = torch.aten.add.Tensor %28280, %28274, %int1_25836 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28282 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25837 = arith.constant 1 : index
    %dim_25838 = tensor.dim %28282, %c1_25837 : tensor<4x?x4096xf16>
    %28283 = flow.tensor.transfer %28282 : tensor<4x?x4096xf16>{%dim_25838} to #hal.device.promise<@__device_2>
    %28284 = torch_c.from_builtin_tensor %28283 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28285 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25839 = arith.constant 1 : index
    %dim_25840 = tensor.dim %28285, %c1_25839 : tensor<4x?x4096xf16>
    %28286 = flow.tensor.transfer %28285 : tensor<4x?x4096xf16>{%dim_25840} to #hal.device.promise<@__device_2>
    %28287 = torch_c.from_builtin_tensor %28286 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28288 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25841 = arith.constant 1 : index
    %dim_25842 = tensor.dim %28288, %c1_25841 : tensor<4x?x4096xf16>
    %28289 = flow.tensor.transfer %28288 : tensor<4x?x4096xf16>{%dim_25842} to #hal.device.promise<@__device_2>
    %28290 = torch_c.from_builtin_tensor %28289 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28291 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25843 = arith.constant 1 : index
    %dim_25844 = tensor.dim %28291, %c1_25843 : tensor<4x?x4096xf16>
    %28292 = flow.tensor.transfer %28291 : tensor<4x?x4096xf16>{%dim_25844} to #hal.device.promise<@__device_2>
    %28293 = torch_c.from_builtin_tensor %28292 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28294 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25845 = arith.constant 1 : index
    %dim_25846 = tensor.dim %28294, %c1_25845 : tensor<4x?x4096xf16>
    %28295 = flow.tensor.transfer %28294 : tensor<4x?x4096xf16>{%dim_25846} to #hal.device.promise<@__device_2>
    %28296 = torch_c.from_builtin_tensor %28295 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28297 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25847 = arith.constant 1 : index
    %dim_25848 = tensor.dim %28297, %c1_25847 : tensor<4x?x4096xf16>
    %28298 = flow.tensor.transfer %28297 : tensor<4x?x4096xf16>{%dim_25848} to #hal.device.promise<@__device_2>
    %28299 = torch_c.from_builtin_tensor %28298 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28300 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25849 = arith.constant 1 : index
    %dim_25850 = tensor.dim %28300, %c1_25849 : tensor<4x?x4096xf16>
    %28301 = flow.tensor.transfer %28300 : tensor<4x?x4096xf16>{%dim_25850} to #hal.device.promise<@__device_2>
    %28302 = torch_c.from_builtin_tensor %28301 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25851 = torch.constant.int 1
    %28303 = torch.aten.add.Tensor %28284, %28287, %int1_25851 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25852 = torch.constant.int 1
    %28304 = torch.aten.add.Tensor %28303, %28190, %int1_25852 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25853 = torch.constant.int 1
    %28305 = torch.aten.add.Tensor %28304, %28290, %int1_25853 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25854 = torch.constant.int 1
    %28306 = torch.aten.add.Tensor %28305, %28293, %int1_25854 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25855 = torch.constant.int 1
    %28307 = torch.aten.add.Tensor %28306, %28296, %int1_25855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25856 = torch.constant.int 1
    %28308 = torch.aten.add.Tensor %28307, %28299, %int1_25856 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25857 = torch.constant.int 1
    %28309 = torch.aten.add.Tensor %28308, %28302, %int1_25857 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28310 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25858 = arith.constant 1 : index
    %dim_25859 = tensor.dim %28310, %c1_25858 : tensor<4x?x4096xf16>
    %28311 = flow.tensor.transfer %28310 : tensor<4x?x4096xf16>{%dim_25859} to #hal.device.promise<@__device_3>
    %28312 = torch_c.from_builtin_tensor %28311 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28313 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25860 = arith.constant 1 : index
    %dim_25861 = tensor.dim %28313, %c1_25860 : tensor<4x?x4096xf16>
    %28314 = flow.tensor.transfer %28313 : tensor<4x?x4096xf16>{%dim_25861} to #hal.device.promise<@__device_3>
    %28315 = torch_c.from_builtin_tensor %28314 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28316 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25862 = arith.constant 1 : index
    %dim_25863 = tensor.dim %28316, %c1_25862 : tensor<4x?x4096xf16>
    %28317 = flow.tensor.transfer %28316 : tensor<4x?x4096xf16>{%dim_25863} to #hal.device.promise<@__device_3>
    %28318 = torch_c.from_builtin_tensor %28317 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28319 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25864 = arith.constant 1 : index
    %dim_25865 = tensor.dim %28319, %c1_25864 : tensor<4x?x4096xf16>
    %28320 = flow.tensor.transfer %28319 : tensor<4x?x4096xf16>{%dim_25865} to #hal.device.promise<@__device_3>
    %28321 = torch_c.from_builtin_tensor %28320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28322 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25866 = arith.constant 1 : index
    %dim_25867 = tensor.dim %28322, %c1_25866 : tensor<4x?x4096xf16>
    %28323 = flow.tensor.transfer %28322 : tensor<4x?x4096xf16>{%dim_25867} to #hal.device.promise<@__device_3>
    %28324 = torch_c.from_builtin_tensor %28323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28325 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25868 = arith.constant 1 : index
    %dim_25869 = tensor.dim %28325, %c1_25868 : tensor<4x?x4096xf16>
    %28326 = flow.tensor.transfer %28325 : tensor<4x?x4096xf16>{%dim_25869} to #hal.device.promise<@__device_3>
    %28327 = torch_c.from_builtin_tensor %28326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28328 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25870 = arith.constant 1 : index
    %dim_25871 = tensor.dim %28328, %c1_25870 : tensor<4x?x4096xf16>
    %28329 = flow.tensor.transfer %28328 : tensor<4x?x4096xf16>{%dim_25871} to #hal.device.promise<@__device_3>
    %28330 = torch_c.from_builtin_tensor %28329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25872 = torch.constant.int 1
    %28331 = torch.aten.add.Tensor %28312, %28315, %int1_25872 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25873 = torch.constant.int 1
    %28332 = torch.aten.add.Tensor %28331, %28318, %int1_25873 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25874 = torch.constant.int 1
    %28333 = torch.aten.add.Tensor %28332, %28197, %int1_25874 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25875 = torch.constant.int 1
    %28334 = torch.aten.add.Tensor %28333, %28321, %int1_25875 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25876 = torch.constant.int 1
    %28335 = torch.aten.add.Tensor %28334, %28324, %int1_25876 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25877 = torch.constant.int 1
    %28336 = torch.aten.add.Tensor %28335, %28327, %int1_25877 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25878 = torch.constant.int 1
    %28337 = torch.aten.add.Tensor %28336, %28330, %int1_25878 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28338 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25879 = arith.constant 1 : index
    %dim_25880 = tensor.dim %28338, %c1_25879 : tensor<4x?x4096xf16>
    %28339 = flow.tensor.transfer %28338 : tensor<4x?x4096xf16>{%dim_25880} to #hal.device.promise<@__device_4>
    %28340 = torch_c.from_builtin_tensor %28339 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28341 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25881 = arith.constant 1 : index
    %dim_25882 = tensor.dim %28341, %c1_25881 : tensor<4x?x4096xf16>
    %28342 = flow.tensor.transfer %28341 : tensor<4x?x4096xf16>{%dim_25882} to #hal.device.promise<@__device_4>
    %28343 = torch_c.from_builtin_tensor %28342 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28344 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25883 = arith.constant 1 : index
    %dim_25884 = tensor.dim %28344, %c1_25883 : tensor<4x?x4096xf16>
    %28345 = flow.tensor.transfer %28344 : tensor<4x?x4096xf16>{%dim_25884} to #hal.device.promise<@__device_4>
    %28346 = torch_c.from_builtin_tensor %28345 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28347 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25885 = arith.constant 1 : index
    %dim_25886 = tensor.dim %28347, %c1_25885 : tensor<4x?x4096xf16>
    %28348 = flow.tensor.transfer %28347 : tensor<4x?x4096xf16>{%dim_25886} to #hal.device.promise<@__device_4>
    %28349 = torch_c.from_builtin_tensor %28348 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28350 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25887 = arith.constant 1 : index
    %dim_25888 = tensor.dim %28350, %c1_25887 : tensor<4x?x4096xf16>
    %28351 = flow.tensor.transfer %28350 : tensor<4x?x4096xf16>{%dim_25888} to #hal.device.promise<@__device_4>
    %28352 = torch_c.from_builtin_tensor %28351 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28353 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25889 = arith.constant 1 : index
    %dim_25890 = tensor.dim %28353, %c1_25889 : tensor<4x?x4096xf16>
    %28354 = flow.tensor.transfer %28353 : tensor<4x?x4096xf16>{%dim_25890} to #hal.device.promise<@__device_4>
    %28355 = torch_c.from_builtin_tensor %28354 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28356 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25891 = arith.constant 1 : index
    %dim_25892 = tensor.dim %28356, %c1_25891 : tensor<4x?x4096xf16>
    %28357 = flow.tensor.transfer %28356 : tensor<4x?x4096xf16>{%dim_25892} to #hal.device.promise<@__device_4>
    %28358 = torch_c.from_builtin_tensor %28357 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25893 = torch.constant.int 1
    %28359 = torch.aten.add.Tensor %28340, %28343, %int1_25893 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25894 = torch.constant.int 1
    %28360 = torch.aten.add.Tensor %28359, %28346, %int1_25894 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25895 = torch.constant.int 1
    %28361 = torch.aten.add.Tensor %28360, %28349, %int1_25895 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25896 = torch.constant.int 1
    %28362 = torch.aten.add.Tensor %28361, %28204, %int1_25896 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25897 = torch.constant.int 1
    %28363 = torch.aten.add.Tensor %28362, %28352, %int1_25897 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25898 = torch.constant.int 1
    %28364 = torch.aten.add.Tensor %28363, %28355, %int1_25898 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25899 = torch.constant.int 1
    %28365 = torch.aten.add.Tensor %28364, %28358, %int1_25899 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28366 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25900 = arith.constant 1 : index
    %dim_25901 = tensor.dim %28366, %c1_25900 : tensor<4x?x4096xf16>
    %28367 = flow.tensor.transfer %28366 : tensor<4x?x4096xf16>{%dim_25901} to #hal.device.promise<@__device_5>
    %28368 = torch_c.from_builtin_tensor %28367 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28369 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25902 = arith.constant 1 : index
    %dim_25903 = tensor.dim %28369, %c1_25902 : tensor<4x?x4096xf16>
    %28370 = flow.tensor.transfer %28369 : tensor<4x?x4096xf16>{%dim_25903} to #hal.device.promise<@__device_5>
    %28371 = torch_c.from_builtin_tensor %28370 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28372 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25904 = arith.constant 1 : index
    %dim_25905 = tensor.dim %28372, %c1_25904 : tensor<4x?x4096xf16>
    %28373 = flow.tensor.transfer %28372 : tensor<4x?x4096xf16>{%dim_25905} to #hal.device.promise<@__device_5>
    %28374 = torch_c.from_builtin_tensor %28373 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28375 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25906 = arith.constant 1 : index
    %dim_25907 = tensor.dim %28375, %c1_25906 : tensor<4x?x4096xf16>
    %28376 = flow.tensor.transfer %28375 : tensor<4x?x4096xf16>{%dim_25907} to #hal.device.promise<@__device_5>
    %28377 = torch_c.from_builtin_tensor %28376 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28378 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25908 = arith.constant 1 : index
    %dim_25909 = tensor.dim %28378, %c1_25908 : tensor<4x?x4096xf16>
    %28379 = flow.tensor.transfer %28378 : tensor<4x?x4096xf16>{%dim_25909} to #hal.device.promise<@__device_5>
    %28380 = torch_c.from_builtin_tensor %28379 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28381 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25910 = arith.constant 1 : index
    %dim_25911 = tensor.dim %28381, %c1_25910 : tensor<4x?x4096xf16>
    %28382 = flow.tensor.transfer %28381 : tensor<4x?x4096xf16>{%dim_25911} to #hal.device.promise<@__device_5>
    %28383 = torch_c.from_builtin_tensor %28382 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28384 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25912 = arith.constant 1 : index
    %dim_25913 = tensor.dim %28384, %c1_25912 : tensor<4x?x4096xf16>
    %28385 = flow.tensor.transfer %28384 : tensor<4x?x4096xf16>{%dim_25913} to #hal.device.promise<@__device_5>
    %28386 = torch_c.from_builtin_tensor %28385 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25914 = torch.constant.int 1
    %28387 = torch.aten.add.Tensor %28368, %28371, %int1_25914 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25915 = torch.constant.int 1
    %28388 = torch.aten.add.Tensor %28387, %28374, %int1_25915 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25916 = torch.constant.int 1
    %28389 = torch.aten.add.Tensor %28388, %28377, %int1_25916 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25917 = torch.constant.int 1
    %28390 = torch.aten.add.Tensor %28389, %28380, %int1_25917 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25918 = torch.constant.int 1
    %28391 = torch.aten.add.Tensor %28390, %28211, %int1_25918 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25919 = torch.constant.int 1
    %28392 = torch.aten.add.Tensor %28391, %28383, %int1_25919 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25920 = torch.constant.int 1
    %28393 = torch.aten.add.Tensor %28392, %28386, %int1_25920 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28394 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25921 = arith.constant 1 : index
    %dim_25922 = tensor.dim %28394, %c1_25921 : tensor<4x?x4096xf16>
    %28395 = flow.tensor.transfer %28394 : tensor<4x?x4096xf16>{%dim_25922} to #hal.device.promise<@__device_6>
    %28396 = torch_c.from_builtin_tensor %28395 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28397 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25923 = arith.constant 1 : index
    %dim_25924 = tensor.dim %28397, %c1_25923 : tensor<4x?x4096xf16>
    %28398 = flow.tensor.transfer %28397 : tensor<4x?x4096xf16>{%dim_25924} to #hal.device.promise<@__device_6>
    %28399 = torch_c.from_builtin_tensor %28398 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28400 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25925 = arith.constant 1 : index
    %dim_25926 = tensor.dim %28400, %c1_25925 : tensor<4x?x4096xf16>
    %28401 = flow.tensor.transfer %28400 : tensor<4x?x4096xf16>{%dim_25926} to #hal.device.promise<@__device_6>
    %28402 = torch_c.from_builtin_tensor %28401 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28403 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25927 = arith.constant 1 : index
    %dim_25928 = tensor.dim %28403, %c1_25927 : tensor<4x?x4096xf16>
    %28404 = flow.tensor.transfer %28403 : tensor<4x?x4096xf16>{%dim_25928} to #hal.device.promise<@__device_6>
    %28405 = torch_c.from_builtin_tensor %28404 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28406 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25929 = arith.constant 1 : index
    %dim_25930 = tensor.dim %28406, %c1_25929 : tensor<4x?x4096xf16>
    %28407 = flow.tensor.transfer %28406 : tensor<4x?x4096xf16>{%dim_25930} to #hal.device.promise<@__device_6>
    %28408 = torch_c.from_builtin_tensor %28407 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28409 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25931 = arith.constant 1 : index
    %dim_25932 = tensor.dim %28409, %c1_25931 : tensor<4x?x4096xf16>
    %28410 = flow.tensor.transfer %28409 : tensor<4x?x4096xf16>{%dim_25932} to #hal.device.promise<@__device_6>
    %28411 = torch_c.from_builtin_tensor %28410 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28412 = torch_c.to_builtin_tensor %28225 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25933 = arith.constant 1 : index
    %dim_25934 = tensor.dim %28412, %c1_25933 : tensor<4x?x4096xf16>
    %28413 = flow.tensor.transfer %28412 : tensor<4x?x4096xf16>{%dim_25934} to #hal.device.promise<@__device_6>
    %28414 = torch_c.from_builtin_tensor %28413 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25935 = torch.constant.int 1
    %28415 = torch.aten.add.Tensor %28396, %28399, %int1_25935 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25936 = torch.constant.int 1
    %28416 = torch.aten.add.Tensor %28415, %28402, %int1_25936 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25937 = torch.constant.int 1
    %28417 = torch.aten.add.Tensor %28416, %28405, %int1_25937 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25938 = torch.constant.int 1
    %28418 = torch.aten.add.Tensor %28417, %28408, %int1_25938 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25939 = torch.constant.int 1
    %28419 = torch.aten.add.Tensor %28418, %28411, %int1_25939 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25940 = torch.constant.int 1
    %28420 = torch.aten.add.Tensor %28419, %28218, %int1_25940 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25941 = torch.constant.int 1
    %28421 = torch.aten.add.Tensor %28420, %28414, %int1_25941 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28422 = torch_c.to_builtin_tensor %28176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25942 = arith.constant 1 : index
    %dim_25943 = tensor.dim %28422, %c1_25942 : tensor<4x?x4096xf16>
    %28423 = flow.tensor.transfer %28422 : tensor<4x?x4096xf16>{%dim_25943} to #hal.device.promise<@__device_7>
    %28424 = torch_c.from_builtin_tensor %28423 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28425 = torch_c.to_builtin_tensor %28183 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25944 = arith.constant 1 : index
    %dim_25945 = tensor.dim %28425, %c1_25944 : tensor<4x?x4096xf16>
    %28426 = flow.tensor.transfer %28425 : tensor<4x?x4096xf16>{%dim_25945} to #hal.device.promise<@__device_7>
    %28427 = torch_c.from_builtin_tensor %28426 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28428 = torch_c.to_builtin_tensor %28190 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25946 = arith.constant 1 : index
    %dim_25947 = tensor.dim %28428, %c1_25946 : tensor<4x?x4096xf16>
    %28429 = flow.tensor.transfer %28428 : tensor<4x?x4096xf16>{%dim_25947} to #hal.device.promise<@__device_7>
    %28430 = torch_c.from_builtin_tensor %28429 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28431 = torch_c.to_builtin_tensor %28197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25948 = arith.constant 1 : index
    %dim_25949 = tensor.dim %28431, %c1_25948 : tensor<4x?x4096xf16>
    %28432 = flow.tensor.transfer %28431 : tensor<4x?x4096xf16>{%dim_25949} to #hal.device.promise<@__device_7>
    %28433 = torch_c.from_builtin_tensor %28432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28434 = torch_c.to_builtin_tensor %28204 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25950 = arith.constant 1 : index
    %dim_25951 = tensor.dim %28434, %c1_25950 : tensor<4x?x4096xf16>
    %28435 = flow.tensor.transfer %28434 : tensor<4x?x4096xf16>{%dim_25951} to #hal.device.promise<@__device_7>
    %28436 = torch_c.from_builtin_tensor %28435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28437 = torch_c.to_builtin_tensor %28211 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25952 = arith.constant 1 : index
    %dim_25953 = tensor.dim %28437, %c1_25952 : tensor<4x?x4096xf16>
    %28438 = flow.tensor.transfer %28437 : tensor<4x?x4096xf16>{%dim_25953} to #hal.device.promise<@__device_7>
    %28439 = torch_c.from_builtin_tensor %28438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %28440 = torch_c.to_builtin_tensor %28218 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_25954 = arith.constant 1 : index
    %dim_25955 = tensor.dim %28440, %c1_25954 : tensor<4x?x4096xf16>
    %28441 = flow.tensor.transfer %28440 : tensor<4x?x4096xf16>{%dim_25955} to #hal.device.promise<@__device_7>
    %28442 = torch_c.from_builtin_tensor %28441 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25956 = torch.constant.int 1
    %28443 = torch.aten.add.Tensor %28424, %28427, %int1_25956 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25957 = torch.constant.int 1
    %28444 = torch.aten.add.Tensor %28443, %28430, %int1_25957 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25958 = torch.constant.int 1
    %28445 = torch.aten.add.Tensor %28444, %28433, %int1_25958 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25959 = torch.constant.int 1
    %28446 = torch.aten.add.Tensor %28445, %28436, %int1_25959 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25960 = torch.constant.int 1
    %28447 = torch.aten.add.Tensor %28446, %28439, %int1_25960 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25961 = torch.constant.int 1
    %28448 = torch.aten.add.Tensor %28447, %28442, %int1_25961 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25962 = torch.constant.int 1
    %28449 = torch.aten.add.Tensor %28448, %28225, %int1_25962 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25963 = torch.constant.int 1
    %28450 = torch.aten.add.Tensor %27930, %28253, %int1_25963 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25964 = torch.constant.int 1
    %28451 = torch.aten.add.Tensor %27931, %28281, %int1_25964 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25965 = torch.constant.int 1
    %28452 = torch.aten.add.Tensor %27932, %28309, %int1_25965 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25966 = torch.constant.int 1
    %28453 = torch.aten.add.Tensor %27933, %28337, %int1_25966 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25967 = torch.constant.int 1
    %28454 = torch.aten.add.Tensor %27934, %28365, %int1_25967 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25968 = torch.constant.int 1
    %28455 = torch.aten.add.Tensor %27935, %28393, %int1_25968 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25969 = torch.constant.int 1
    %28456 = torch.aten.add.Tensor %27936, %28421, %int1_25969 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_25970 = torch.constant.int 1
    %28457 = torch.aten.add.Tensor %27937, %28449, %int1_25970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_25971 = torch.constant.int 6
    %28458 = torch.prims.convert_element_type %28450, %int6_25971 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25972 = torch.constant.int 6
    %28459 = torch.prims.convert_element_type %28451, %int6_25972 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25973 = torch.constant.int 6
    %28460 = torch.prims.convert_element_type %28452, %int6_25973 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25974 = torch.constant.int 6
    %28461 = torch.prims.convert_element_type %28453, %int6_25974 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25975 = torch.constant.int 6
    %28462 = torch.prims.convert_element_type %28454, %int6_25975 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25976 = torch.constant.int 6
    %28463 = torch.prims.convert_element_type %28455, %int6_25976 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25977 = torch.constant.int 6
    %28464 = torch.prims.convert_element_type %28456, %int6_25977 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_25978 = torch.constant.int 6
    %28465 = torch.prims.convert_element_type %28457, %int6_25978 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25979 = torch.constant.int 2
    %28466 = torch.aten.pow.Tensor_Scalar %28458, %int2_25979 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25980 = torch.constant.int 2
    %28467 = torch.aten.pow.Tensor_Scalar %28459, %int2_25980 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25981 = torch.constant.int 2
    %28468 = torch.aten.pow.Tensor_Scalar %28460, %int2_25981 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25982 = torch.constant.int 2
    %28469 = torch.aten.pow.Tensor_Scalar %28461, %int2_25982 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25983 = torch.constant.int 2
    %28470 = torch.aten.pow.Tensor_Scalar %28462, %int2_25983 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25984 = torch.constant.int 2
    %28471 = torch.aten.pow.Tensor_Scalar %28463, %int2_25984 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25985 = torch.constant.int 2
    %28472 = torch.aten.pow.Tensor_Scalar %28464, %int2_25985 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_25986 = torch.constant.int 2
    %28473 = torch.aten.pow.Tensor_Scalar %28465, %int2_25986 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_25987 = torch.constant.int -1
    %28474 = torch.prim.ListConstruct %int-1_25987 : (!torch.int) -> !torch.list<int>
    %true_25988 = torch.constant.bool true
    %none_25989 = torch.constant.none
    %28475 = torch.aten.mean.dim %28466, %28474, %true_25988, %none_25989 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25990 = torch.constant.int -1
    %28476 = torch.prim.ListConstruct %int-1_25990 : (!torch.int) -> !torch.list<int>
    %true_25991 = torch.constant.bool true
    %none_25992 = torch.constant.none
    %28477 = torch.aten.mean.dim %28467, %28476, %true_25991, %none_25992 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25993 = torch.constant.int -1
    %28478 = torch.prim.ListConstruct %int-1_25993 : (!torch.int) -> !torch.list<int>
    %true_25994 = torch.constant.bool true
    %none_25995 = torch.constant.none
    %28479 = torch.aten.mean.dim %28468, %28478, %true_25994, %none_25995 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25996 = torch.constant.int -1
    %28480 = torch.prim.ListConstruct %int-1_25996 : (!torch.int) -> !torch.list<int>
    %true_25997 = torch.constant.bool true
    %none_25998 = torch.constant.none
    %28481 = torch.aten.mean.dim %28469, %28480, %true_25997, %none_25998 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_25999 = torch.constant.int -1
    %28482 = torch.prim.ListConstruct %int-1_25999 : (!torch.int) -> !torch.list<int>
    %true_26000 = torch.constant.bool true
    %none_26001 = torch.constant.none
    %28483 = torch.aten.mean.dim %28470, %28482, %true_26000, %none_26001 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_26002 = torch.constant.int -1
    %28484 = torch.prim.ListConstruct %int-1_26002 : (!torch.int) -> !torch.list<int>
    %true_26003 = torch.constant.bool true
    %none_26004 = torch.constant.none
    %28485 = torch.aten.mean.dim %28471, %28484, %true_26003, %none_26004 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_26005 = torch.constant.int -1
    %28486 = torch.prim.ListConstruct %int-1_26005 : (!torch.int) -> !torch.list<int>
    %true_26006 = torch.constant.bool true
    %none_26007 = torch.constant.none
    %28487 = torch.aten.mean.dim %28472, %28486, %true_26006, %none_26007 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_26008 = torch.constant.int -1
    %28488 = torch.prim.ListConstruct %int-1_26008 : (!torch.int) -> !torch.list<int>
    %true_26009 = torch.constant.bool true
    %none_26010 = torch.constant.none
    %28489 = torch.aten.mean.dim %28473, %28488, %true_26009, %none_26010 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26011 = torch.constant.float 9.9999997473787516E-6
    %int1_26012 = torch.constant.int 1
    %28490 = torch.aten.add.Scalar %28475, %float9.999990e-06_26011, %int1_26012 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26013 = torch.constant.float 9.9999997473787516E-6
    %int1_26014 = torch.constant.int 1
    %28491 = torch.aten.add.Scalar %28477, %float9.999990e-06_26013, %int1_26014 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26015 = torch.constant.float 9.9999997473787516E-6
    %int1_26016 = torch.constant.int 1
    %28492 = torch.aten.add.Scalar %28479, %float9.999990e-06_26015, %int1_26016 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26017 = torch.constant.float 9.9999997473787516E-6
    %int1_26018 = torch.constant.int 1
    %28493 = torch.aten.add.Scalar %28481, %float9.999990e-06_26017, %int1_26018 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26019 = torch.constant.float 9.9999997473787516E-6
    %int1_26020 = torch.constant.int 1
    %28494 = torch.aten.add.Scalar %28483, %float9.999990e-06_26019, %int1_26020 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26021 = torch.constant.float 9.9999997473787516E-6
    %int1_26022 = torch.constant.int 1
    %28495 = torch.aten.add.Scalar %28485, %float9.999990e-06_26021, %int1_26022 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26023 = torch.constant.float 9.9999997473787516E-6
    %int1_26024 = torch.constant.int 1
    %28496 = torch.aten.add.Scalar %28487, %float9.999990e-06_26023, %int1_26024 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_26025 = torch.constant.float 9.9999997473787516E-6
    %int1_26026 = torch.constant.int 1
    %28497 = torch.aten.add.Scalar %28489, %float9.999990e-06_26025, %int1_26026 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28498 = torch.aten.rsqrt %28490 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28499 = torch.aten.rsqrt %28491 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28500 = torch.aten.rsqrt %28492 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28501 = torch.aten.rsqrt %28493 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28502 = torch.aten.rsqrt %28494 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28503 = torch.aten.rsqrt %28495 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28504 = torch.aten.rsqrt %28496 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28505 = torch.aten.rsqrt %28497 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %28505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %28506 = torch.aten.mul.Tensor %28458, %28498 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28507 = torch.aten.mul.Tensor %28459, %28499 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28508 = torch.aten.mul.Tensor %28460, %28500 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28509 = torch.aten.mul.Tensor %28461, %28501 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28510 = torch.aten.mul.Tensor %28462, %28502 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28511 = torch.aten.mul.Tensor %28463, %28503 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28512 = torch.aten.mul.Tensor %28464, %28504 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28513 = torch.aten.mul.Tensor %28465, %28505 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28514 = torch.aten.mul.Tensor %1016, %28506 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28515 = torch.aten.mul.Tensor %1017, %28507 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28516 = torch.aten.mul.Tensor %1018, %28508 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28517 = torch.aten.mul.Tensor %1019, %28509 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28518 = torch.aten.mul.Tensor %1020, %28510 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28519 = torch.aten.mul.Tensor %1021, %28511 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28520 = torch.aten.mul.Tensor %1022, %28512 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %28521 = torch.aten.mul.Tensor %1023, %28513 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %28521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_26027 = torch.constant.int 5
    %28522 = torch.prims.convert_element_type %28514, %int5_26027 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26028 = torch.constant.int 5
    %28523 = torch.prims.convert_element_type %28515, %int5_26028 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26029 = torch.constant.int 5
    %28524 = torch.prims.convert_element_type %28516, %int5_26029 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26030 = torch.constant.int 5
    %28525 = torch.prims.convert_element_type %28517, %int5_26030 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26031 = torch.constant.int 5
    %28526 = torch.prims.convert_element_type %28518, %int5_26031 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26032 = torch.constant.int 5
    %28527 = torch.prims.convert_element_type %28519, %int5_26032 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26033 = torch.constant.int 5
    %28528 = torch.prims.convert_element_type %28520, %int5_26033 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_26034 = torch.constant.int 5
    %28529 = torch.prims.convert_element_type %28521, %int5_26034 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %28529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_26035 = torch.constant.int 1
    %int0_26036 = torch.constant.int 0
    %28530 = torch.prim.ListConstruct %int1_26035, %int0_26036 : (!torch.int, !torch.int) -> !torch.list<int>
    %28531 = torch.aten.permute %1024, %28530 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26037 = torch.constant.int 1
    %int0_26038 = torch.constant.int 0
    %28532 = torch.prim.ListConstruct %int1_26037, %int0_26038 : (!torch.int, !torch.int) -> !torch.list<int>
    %28533 = torch.aten.permute %1025, %28532 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26039 = torch.constant.int 1
    %int0_26040 = torch.constant.int 0
    %28534 = torch.prim.ListConstruct %int1_26039, %int0_26040 : (!torch.int, !torch.int) -> !torch.list<int>
    %28535 = torch.aten.permute %1026, %28534 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26041 = torch.constant.int 1
    %int0_26042 = torch.constant.int 0
    %28536 = torch.prim.ListConstruct %int1_26041, %int0_26042 : (!torch.int, !torch.int) -> !torch.list<int>
    %28537 = torch.aten.permute %1027, %28536 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26043 = torch.constant.int 1
    %int0_26044 = torch.constant.int 0
    %28538 = torch.prim.ListConstruct %int1_26043, %int0_26044 : (!torch.int, !torch.int) -> !torch.list<int>
    %28539 = torch.aten.permute %1028, %28538 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26045 = torch.constant.int 1
    %int0_26046 = torch.constant.int 0
    %28540 = torch.prim.ListConstruct %int1_26045, %int0_26046 : (!torch.int, !torch.int) -> !torch.list<int>
    %28541 = torch.aten.permute %1029, %28540 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26047 = torch.constant.int 1
    %int0_26048 = torch.constant.int 0
    %28542 = torch.prim.ListConstruct %int1_26047, %int0_26048 : (!torch.int, !torch.int) -> !torch.list<int>
    %28543 = torch.aten.permute %1030, %28542 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_26049 = torch.constant.int 1
    %int0_26050 = torch.constant.int 0
    %28544 = torch.prim.ListConstruct %int1_26049, %int0_26050 : (!torch.int, !torch.int) -> !torch.list<int>
    %28545 = torch.aten.permute %1031, %28544 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_26051 = torch.constant.int 4
    %28546 = torch.aten.mul.int %int4_26051, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26052 = torch.constant.int 4096
    %28547 = torch.prim.ListConstruct %28546, %int4096_26052 : (!torch.int, !torch.int) -> !torch.list<int>
    %28548 = torch.aten.view %28522, %28547 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28548, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28549 = torch.aten.mm %28548, %28531 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28549, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26053 = torch.constant.int 4
    %int512_26054 = torch.constant.int 512
    %28550 = torch.prim.ListConstruct %int4_26053, %2482, %int512_26054 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28551 = torch.aten.view %28549, %28550 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26055 = torch.constant.int 4
    %28552 = torch.aten.mul.int %int4_26055, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26056 = torch.constant.int 4096
    %28553 = torch.prim.ListConstruct %28552, %int4096_26056 : (!torch.int, !torch.int) -> !torch.list<int>
    %28554 = torch.aten.view %28523, %28553 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28554, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28555 = torch.aten.mm %28554, %28533 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28555, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26057 = torch.constant.int 4
    %int512_26058 = torch.constant.int 512
    %28556 = torch.prim.ListConstruct %int4_26057, %2482, %int512_26058 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28557 = torch.aten.view %28555, %28556 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26059 = torch.constant.int 4
    %28558 = torch.aten.mul.int %int4_26059, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26060 = torch.constant.int 4096
    %28559 = torch.prim.ListConstruct %28558, %int4096_26060 : (!torch.int, !torch.int) -> !torch.list<int>
    %28560 = torch.aten.view %28524, %28559 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28560, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28561 = torch.aten.mm %28560, %28535 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28561, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26061 = torch.constant.int 4
    %int512_26062 = torch.constant.int 512
    %28562 = torch.prim.ListConstruct %int4_26061, %2482, %int512_26062 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28563 = torch.aten.view %28561, %28562 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26063 = torch.constant.int 4
    %28564 = torch.aten.mul.int %int4_26063, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26064 = torch.constant.int 4096
    %28565 = torch.prim.ListConstruct %28564, %int4096_26064 : (!torch.int, !torch.int) -> !torch.list<int>
    %28566 = torch.aten.view %28525, %28565 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28566, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28567 = torch.aten.mm %28566, %28537 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28567, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26065 = torch.constant.int 4
    %int512_26066 = torch.constant.int 512
    %28568 = torch.prim.ListConstruct %int4_26065, %2482, %int512_26066 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28569 = torch.aten.view %28567, %28568 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26067 = torch.constant.int 4
    %28570 = torch.aten.mul.int %int4_26067, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26068 = torch.constant.int 4096
    %28571 = torch.prim.ListConstruct %28570, %int4096_26068 : (!torch.int, !torch.int) -> !torch.list<int>
    %28572 = torch.aten.view %28526, %28571 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28572, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28573 = torch.aten.mm %28572, %28539 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28573, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26069 = torch.constant.int 4
    %int512_26070 = torch.constant.int 512
    %28574 = torch.prim.ListConstruct %int4_26069, %2482, %int512_26070 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28575 = torch.aten.view %28573, %28574 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26071 = torch.constant.int 4
    %28576 = torch.aten.mul.int %int4_26071, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26072 = torch.constant.int 4096
    %28577 = torch.prim.ListConstruct %28576, %int4096_26072 : (!torch.int, !torch.int) -> !torch.list<int>
    %28578 = torch.aten.view %28527, %28577 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28578, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28579 = torch.aten.mm %28578, %28541 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28579, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26073 = torch.constant.int 4
    %int512_26074 = torch.constant.int 512
    %28580 = torch.prim.ListConstruct %int4_26073, %2482, %int512_26074 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28581 = torch.aten.view %28579, %28580 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26075 = torch.constant.int 4
    %28582 = torch.aten.mul.int %int4_26075, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26076 = torch.constant.int 4096
    %28583 = torch.prim.ListConstruct %28582, %int4096_26076 : (!torch.int, !torch.int) -> !torch.list<int>
    %28584 = torch.aten.view %28528, %28583 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28584, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28585 = torch.aten.mm %28584, %28543 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28585, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26077 = torch.constant.int 4
    %int512_26078 = torch.constant.int 512
    %28586 = torch.prim.ListConstruct %int4_26077, %2482, %int512_26078 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28587 = torch.aten.view %28585, %28586 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_26079 = torch.constant.int 4
    %28588 = torch.aten.mul.int %int4_26079, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26080 = torch.constant.int 4096
    %28589 = torch.prim.ListConstruct %28588, %int4096_26080 : (!torch.int, !torch.int) -> !torch.list<int>
    %28590 = torch.aten.view %28529, %28589 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28590, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28591 = torch.aten.mm %28590, %28545 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %28591, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_26081 = torch.constant.int 4
    %int512_26082 = torch.constant.int 512
    %28592 = torch.prim.ListConstruct %int4_26081, %2482, %int512_26082 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28593 = torch.aten.view %28591, %28592 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %28593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_26083 = torch.constant.int 1
    %int0_26084 = torch.constant.int 0
    %28594 = torch.prim.ListConstruct %int1_26083, %int0_26084 : (!torch.int, !torch.int) -> !torch.list<int>
    %28595 = torch.aten.permute %1032, %28594 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26085 = torch.constant.int 1
    %int0_26086 = torch.constant.int 0
    %28596 = torch.prim.ListConstruct %int1_26085, %int0_26086 : (!torch.int, !torch.int) -> !torch.list<int>
    %28597 = torch.aten.permute %1033, %28596 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26087 = torch.constant.int 1
    %int0_26088 = torch.constant.int 0
    %28598 = torch.prim.ListConstruct %int1_26087, %int0_26088 : (!torch.int, !torch.int) -> !torch.list<int>
    %28599 = torch.aten.permute %1034, %28598 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26089 = torch.constant.int 1
    %int0_26090 = torch.constant.int 0
    %28600 = torch.prim.ListConstruct %int1_26089, %int0_26090 : (!torch.int, !torch.int) -> !torch.list<int>
    %28601 = torch.aten.permute %1035, %28600 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26091 = torch.constant.int 1
    %int0_26092 = torch.constant.int 0
    %28602 = torch.prim.ListConstruct %int1_26091, %int0_26092 : (!torch.int, !torch.int) -> !torch.list<int>
    %28603 = torch.aten.permute %1036, %28602 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26093 = torch.constant.int 1
    %int0_26094 = torch.constant.int 0
    %28604 = torch.prim.ListConstruct %int1_26093, %int0_26094 : (!torch.int, !torch.int) -> !torch.list<int>
    %28605 = torch.aten.permute %1037, %28604 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26095 = torch.constant.int 1
    %int0_26096 = torch.constant.int 0
    %28606 = torch.prim.ListConstruct %int1_26095, %int0_26096 : (!torch.int, !torch.int) -> !torch.list<int>
    %28607 = torch.aten.permute %1038, %28606 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26097 = torch.constant.int 1
    %int0_26098 = torch.constant.int 0
    %28608 = torch.prim.ListConstruct %int1_26097, %int0_26098 : (!torch.int, !torch.int) -> !torch.list<int>
    %28609 = torch.aten.permute %1039, %28608 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_26099 = torch.constant.int 4
    %28610 = torch.aten.mul.int %int4_26099, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26100 = torch.constant.int 4096
    %28611 = torch.prim.ListConstruct %28610, %int4096_26100 : (!torch.int, !torch.int) -> !torch.list<int>
    %28612 = torch.aten.view %28522, %28611 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28612, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28613 = torch.aten.mm %28612, %28595 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28613, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26101 = torch.constant.int 4
    %int128_26102 = torch.constant.int 128
    %28614 = torch.prim.ListConstruct %int4_26101, %2482, %int128_26102 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28615 = torch.aten.view %28613, %28614 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26103 = torch.constant.int 4
    %28616 = torch.aten.mul.int %int4_26103, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26104 = torch.constant.int 4096
    %28617 = torch.prim.ListConstruct %28616, %int4096_26104 : (!torch.int, !torch.int) -> !torch.list<int>
    %28618 = torch.aten.view %28523, %28617 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28618, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28619 = torch.aten.mm %28618, %28597 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28619, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26105 = torch.constant.int 4
    %int128_26106 = torch.constant.int 128
    %28620 = torch.prim.ListConstruct %int4_26105, %2482, %int128_26106 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28621 = torch.aten.view %28619, %28620 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26107 = torch.constant.int 4
    %28622 = torch.aten.mul.int %int4_26107, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26108 = torch.constant.int 4096
    %28623 = torch.prim.ListConstruct %28622, %int4096_26108 : (!torch.int, !torch.int) -> !torch.list<int>
    %28624 = torch.aten.view %28524, %28623 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28624, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28625 = torch.aten.mm %28624, %28599 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28625, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26109 = torch.constant.int 4
    %int128_26110 = torch.constant.int 128
    %28626 = torch.prim.ListConstruct %int4_26109, %2482, %int128_26110 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28627 = torch.aten.view %28625, %28626 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26111 = torch.constant.int 4
    %28628 = torch.aten.mul.int %int4_26111, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26112 = torch.constant.int 4096
    %28629 = torch.prim.ListConstruct %28628, %int4096_26112 : (!torch.int, !torch.int) -> !torch.list<int>
    %28630 = torch.aten.view %28525, %28629 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28630, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28631 = torch.aten.mm %28630, %28601 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28631, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26113 = torch.constant.int 4
    %int128_26114 = torch.constant.int 128
    %28632 = torch.prim.ListConstruct %int4_26113, %2482, %int128_26114 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28633 = torch.aten.view %28631, %28632 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26115 = torch.constant.int 4
    %28634 = torch.aten.mul.int %int4_26115, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26116 = torch.constant.int 4096
    %28635 = torch.prim.ListConstruct %28634, %int4096_26116 : (!torch.int, !torch.int) -> !torch.list<int>
    %28636 = torch.aten.view %28526, %28635 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28636, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28637 = torch.aten.mm %28636, %28603 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28637, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26117 = torch.constant.int 4
    %int128_26118 = torch.constant.int 128
    %28638 = torch.prim.ListConstruct %int4_26117, %2482, %int128_26118 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28639 = torch.aten.view %28637, %28638 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26119 = torch.constant.int 4
    %28640 = torch.aten.mul.int %int4_26119, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26120 = torch.constant.int 4096
    %28641 = torch.prim.ListConstruct %28640, %int4096_26120 : (!torch.int, !torch.int) -> !torch.list<int>
    %28642 = torch.aten.view %28527, %28641 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28642, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28643 = torch.aten.mm %28642, %28605 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28643, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26121 = torch.constant.int 4
    %int128_26122 = torch.constant.int 128
    %28644 = torch.prim.ListConstruct %int4_26121, %2482, %int128_26122 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28645 = torch.aten.view %28643, %28644 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26123 = torch.constant.int 4
    %28646 = torch.aten.mul.int %int4_26123, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26124 = torch.constant.int 4096
    %28647 = torch.prim.ListConstruct %28646, %int4096_26124 : (!torch.int, !torch.int) -> !torch.list<int>
    %28648 = torch.aten.view %28528, %28647 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28648, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28649 = torch.aten.mm %28648, %28607 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28649, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26125 = torch.constant.int 4
    %int128_26126 = torch.constant.int 128
    %28650 = torch.prim.ListConstruct %int4_26125, %2482, %int128_26126 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28651 = torch.aten.view %28649, %28650 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26127 = torch.constant.int 4
    %28652 = torch.aten.mul.int %int4_26127, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26128 = torch.constant.int 4096
    %28653 = torch.prim.ListConstruct %28652, %int4096_26128 : (!torch.int, !torch.int) -> !torch.list<int>
    %28654 = torch.aten.view %28529, %28653 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28654, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28655 = torch.aten.mm %28654, %28609 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28655, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26129 = torch.constant.int 4
    %int128_26130 = torch.constant.int 128
    %28656 = torch.prim.ListConstruct %int4_26129, %2482, %int128_26130 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28657 = torch.aten.view %28655, %28656 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_26131 = torch.constant.int 1
    %int0_26132 = torch.constant.int 0
    %28658 = torch.prim.ListConstruct %int1_26131, %int0_26132 : (!torch.int, !torch.int) -> !torch.list<int>
    %28659 = torch.aten.permute %1040, %28658 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26133 = torch.constant.int 1
    %int0_26134 = torch.constant.int 0
    %28660 = torch.prim.ListConstruct %int1_26133, %int0_26134 : (!torch.int, !torch.int) -> !torch.list<int>
    %28661 = torch.aten.permute %1041, %28660 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26135 = torch.constant.int 1
    %int0_26136 = torch.constant.int 0
    %28662 = torch.prim.ListConstruct %int1_26135, %int0_26136 : (!torch.int, !torch.int) -> !torch.list<int>
    %28663 = torch.aten.permute %1042, %28662 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26137 = torch.constant.int 1
    %int0_26138 = torch.constant.int 0
    %28664 = torch.prim.ListConstruct %int1_26137, %int0_26138 : (!torch.int, !torch.int) -> !torch.list<int>
    %28665 = torch.aten.permute %1043, %28664 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26139 = torch.constant.int 1
    %int0_26140 = torch.constant.int 0
    %28666 = torch.prim.ListConstruct %int1_26139, %int0_26140 : (!torch.int, !torch.int) -> !torch.list<int>
    %28667 = torch.aten.permute %1044, %28666 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26141 = torch.constant.int 1
    %int0_26142 = torch.constant.int 0
    %28668 = torch.prim.ListConstruct %int1_26141, %int0_26142 : (!torch.int, !torch.int) -> !torch.list<int>
    %28669 = torch.aten.permute %1045, %28668 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26143 = torch.constant.int 1
    %int0_26144 = torch.constant.int 0
    %28670 = torch.prim.ListConstruct %int1_26143, %int0_26144 : (!torch.int, !torch.int) -> !torch.list<int>
    %28671 = torch.aten.permute %1046, %28670 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_26145 = torch.constant.int 1
    %int0_26146 = torch.constant.int 0
    %28672 = torch.prim.ListConstruct %int1_26145, %int0_26146 : (!torch.int, !torch.int) -> !torch.list<int>
    %28673 = torch.aten.permute %1047, %28672 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_26147 = torch.constant.int 4
    %28674 = torch.aten.mul.int %int4_26147, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26148 = torch.constant.int 4096
    %28675 = torch.prim.ListConstruct %28674, %int4096_26148 : (!torch.int, !torch.int) -> !torch.list<int>
    %28676 = torch.aten.view %28522, %28675 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28676, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28677 = torch.aten.mm %28676, %28659 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28677, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26149 = torch.constant.int 4
    %int128_26150 = torch.constant.int 128
    %28678 = torch.prim.ListConstruct %int4_26149, %2482, %int128_26150 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28679 = torch.aten.view %28677, %28678 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26151 = torch.constant.int 4
    %28680 = torch.aten.mul.int %int4_26151, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26152 = torch.constant.int 4096
    %28681 = torch.prim.ListConstruct %28680, %int4096_26152 : (!torch.int, !torch.int) -> !torch.list<int>
    %28682 = torch.aten.view %28523, %28681 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28682, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28683 = torch.aten.mm %28682, %28661 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28683, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26153 = torch.constant.int 4
    %int128_26154 = torch.constant.int 128
    %28684 = torch.prim.ListConstruct %int4_26153, %2482, %int128_26154 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28685 = torch.aten.view %28683, %28684 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26155 = torch.constant.int 4
    %28686 = torch.aten.mul.int %int4_26155, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26156 = torch.constant.int 4096
    %28687 = torch.prim.ListConstruct %28686, %int4096_26156 : (!torch.int, !torch.int) -> !torch.list<int>
    %28688 = torch.aten.view %28524, %28687 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28688, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28689 = torch.aten.mm %28688, %28663 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28689, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26157 = torch.constant.int 4
    %int128_26158 = torch.constant.int 128
    %28690 = torch.prim.ListConstruct %int4_26157, %2482, %int128_26158 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28691 = torch.aten.view %28689, %28690 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26159 = torch.constant.int 4
    %28692 = torch.aten.mul.int %int4_26159, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26160 = torch.constant.int 4096
    %28693 = torch.prim.ListConstruct %28692, %int4096_26160 : (!torch.int, !torch.int) -> !torch.list<int>
    %28694 = torch.aten.view %28525, %28693 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28694, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28695 = torch.aten.mm %28694, %28665 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28695, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26161 = torch.constant.int 4
    %int128_26162 = torch.constant.int 128
    %28696 = torch.prim.ListConstruct %int4_26161, %2482, %int128_26162 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28697 = torch.aten.view %28695, %28696 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26163 = torch.constant.int 4
    %28698 = torch.aten.mul.int %int4_26163, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26164 = torch.constant.int 4096
    %28699 = torch.prim.ListConstruct %28698, %int4096_26164 : (!torch.int, !torch.int) -> !torch.list<int>
    %28700 = torch.aten.view %28526, %28699 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28700, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28701 = torch.aten.mm %28700, %28667 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28701, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26165 = torch.constant.int 4
    %int128_26166 = torch.constant.int 128
    %28702 = torch.prim.ListConstruct %int4_26165, %2482, %int128_26166 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28703 = torch.aten.view %28701, %28702 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26167 = torch.constant.int 4
    %28704 = torch.aten.mul.int %int4_26167, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26168 = torch.constant.int 4096
    %28705 = torch.prim.ListConstruct %28704, %int4096_26168 : (!torch.int, !torch.int) -> !torch.list<int>
    %28706 = torch.aten.view %28527, %28705 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28706, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28707 = torch.aten.mm %28706, %28669 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28707, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26169 = torch.constant.int 4
    %int128_26170 = torch.constant.int 128
    %28708 = torch.prim.ListConstruct %int4_26169, %2482, %int128_26170 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28709 = torch.aten.view %28707, %28708 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26171 = torch.constant.int 4
    %28710 = torch.aten.mul.int %int4_26171, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26172 = torch.constant.int 4096
    %28711 = torch.prim.ListConstruct %28710, %int4096_26172 : (!torch.int, !torch.int) -> !torch.list<int>
    %28712 = torch.aten.view %28528, %28711 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28712, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28713 = torch.aten.mm %28712, %28671 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28713, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26173 = torch.constant.int 4
    %int128_26174 = torch.constant.int 128
    %28714 = torch.prim.ListConstruct %int4_26173, %2482, %int128_26174 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28715 = torch.aten.view %28713, %28714 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26175 = torch.constant.int 4
    %28716 = torch.aten.mul.int %int4_26175, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_26176 = torch.constant.int 4096
    %28717 = torch.prim.ListConstruct %28716, %int4096_26176 : (!torch.int, !torch.int) -> !torch.list<int>
    %28718 = torch.aten.view %28529, %28717 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %28718, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %28719 = torch.aten.mm %28718, %28673 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %28719, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_26177 = torch.constant.int 4
    %int128_26178 = torch.constant.int 128
    %28720 = torch.prim.ListConstruct %int4_26177, %2482, %int128_26178 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28721 = torch.aten.view %28719, %28720 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %28721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_26179 = torch.constant.int 4
    %int4_26180 = torch.constant.int 4
    %int128_26181 = torch.constant.int 128
    %28722 = torch.prim.ListConstruct %int4_26179, %2482, %int4_26180, %int128_26181 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28723 = torch.aten.view %28551, %28722 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26182 = torch.constant.int 4
    %int4_26183 = torch.constant.int 4
    %int128_26184 = torch.constant.int 128
    %28724 = torch.prim.ListConstruct %int4_26182, %2482, %int4_26183, %int128_26184 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28725 = torch.aten.view %28557, %28724 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26185 = torch.constant.int 4
    %int4_26186 = torch.constant.int 4
    %int128_26187 = torch.constant.int 128
    %28726 = torch.prim.ListConstruct %int4_26185, %2482, %int4_26186, %int128_26187 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28727 = torch.aten.view %28563, %28726 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26188 = torch.constant.int 4
    %int4_26189 = torch.constant.int 4
    %int128_26190 = torch.constant.int 128
    %28728 = torch.prim.ListConstruct %int4_26188, %2482, %int4_26189, %int128_26190 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28729 = torch.aten.view %28569, %28728 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26191 = torch.constant.int 4
    %int4_26192 = torch.constant.int 4
    %int128_26193 = torch.constant.int 128
    %28730 = torch.prim.ListConstruct %int4_26191, %2482, %int4_26192, %int128_26193 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28731 = torch.aten.view %28575, %28730 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26194 = torch.constant.int 4
    %int4_26195 = torch.constant.int 4
    %int128_26196 = torch.constant.int 128
    %28732 = torch.prim.ListConstruct %int4_26194, %2482, %int4_26195, %int128_26196 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28733 = torch.aten.view %28581, %28732 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26197 = torch.constant.int 4
    %int4_26198 = torch.constant.int 4
    %int128_26199 = torch.constant.int 128
    %28734 = torch.prim.ListConstruct %int4_26197, %2482, %int4_26198, %int128_26199 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28735 = torch.aten.view %28587, %28734 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26200 = torch.constant.int 4
    %int4_26201 = torch.constant.int 4
    %int128_26202 = torch.constant.int 128
    %28736 = torch.prim.ListConstruct %int4_26200, %2482, %int4_26201, %int128_26202 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28737 = torch.aten.view %28593, %28736 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_26203 = torch.constant.int 4
    %int1_26204 = torch.constant.int 1
    %int128_26205 = torch.constant.int 128
    %28738 = torch.prim.ListConstruct %int4_26203, %2482, %int1_26204, %int128_26205 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28739 = torch.aten.view %28615, %28738 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26206 = torch.constant.int 4
    %int1_26207 = torch.constant.int 1
    %int128_26208 = torch.constant.int 128
    %28740 = torch.prim.ListConstruct %int4_26206, %2482, %int1_26207, %int128_26208 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28741 = torch.aten.view %28621, %28740 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26209 = torch.constant.int 4
    %int1_26210 = torch.constant.int 1
    %int128_26211 = torch.constant.int 128
    %28742 = torch.prim.ListConstruct %int4_26209, %2482, %int1_26210, %int128_26211 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28743 = torch.aten.view %28627, %28742 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26212 = torch.constant.int 4
    %int1_26213 = torch.constant.int 1
    %int128_26214 = torch.constant.int 128
    %28744 = torch.prim.ListConstruct %int4_26212, %2482, %int1_26213, %int128_26214 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28745 = torch.aten.view %28633, %28744 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26215 = torch.constant.int 4
    %int1_26216 = torch.constant.int 1
    %int128_26217 = torch.constant.int 128
    %28746 = torch.prim.ListConstruct %int4_26215, %2482, %int1_26216, %int128_26217 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28747 = torch.aten.view %28639, %28746 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26218 = torch.constant.int 4
    %int1_26219 = torch.constant.int 1
    %int128_26220 = torch.constant.int 128
    %28748 = torch.prim.ListConstruct %int4_26218, %2482, %int1_26219, %int128_26220 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28749 = torch.aten.view %28645, %28748 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26221 = torch.constant.int 4
    %int1_26222 = torch.constant.int 1
    %int128_26223 = torch.constant.int 128
    %28750 = torch.prim.ListConstruct %int4_26221, %2482, %int1_26222, %int128_26223 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28751 = torch.aten.view %28651, %28750 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26224 = torch.constant.int 4
    %int1_26225 = torch.constant.int 1
    %int128_26226 = torch.constant.int 128
    %28752 = torch.prim.ListConstruct %int4_26224, %2482, %int1_26225, %int128_26226 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28753 = torch.aten.view %28657, %28752 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26227 = torch.constant.int 4
    %int1_26228 = torch.constant.int 1
    %int128_26229 = torch.constant.int 128
    %28754 = torch.prim.ListConstruct %int4_26227, %2482, %int1_26228, %int128_26229 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28755 = torch.aten.view %28679, %28754 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26230 = torch.constant.int 4
    %int1_26231 = torch.constant.int 1
    %int128_26232 = torch.constant.int 128
    %28756 = torch.prim.ListConstruct %int4_26230, %2482, %int1_26231, %int128_26232 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28757 = torch.aten.view %28685, %28756 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26233 = torch.constant.int 4
    %int1_26234 = torch.constant.int 1
    %int128_26235 = torch.constant.int 128
    %28758 = torch.prim.ListConstruct %int4_26233, %2482, %int1_26234, %int128_26235 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28759 = torch.aten.view %28691, %28758 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26236 = torch.constant.int 4
    %int1_26237 = torch.constant.int 1
    %int128_26238 = torch.constant.int 128
    %28760 = torch.prim.ListConstruct %int4_26236, %2482, %int1_26237, %int128_26238 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28761 = torch.aten.view %28697, %28760 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26239 = torch.constant.int 4
    %int1_26240 = torch.constant.int 1
    %int128_26241 = torch.constant.int 128
    %28762 = torch.prim.ListConstruct %int4_26239, %2482, %int1_26240, %int128_26241 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28763 = torch.aten.view %28703, %28762 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26242 = torch.constant.int 4
    %int1_26243 = torch.constant.int 1
    %int128_26244 = torch.constant.int 128
    %28764 = torch.prim.ListConstruct %int4_26242, %2482, %int1_26243, %int128_26244 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28765 = torch.aten.view %28709, %28764 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26245 = torch.constant.int 4
    %int1_26246 = torch.constant.int 1
    %int128_26247 = torch.constant.int 128
    %28766 = torch.prim.ListConstruct %int4_26245, %2482, %int1_26246, %int128_26247 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28767 = torch.aten.view %28715, %28766 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_26248 = torch.constant.int 4
    %int1_26249 = torch.constant.int 1
    %int128_26250 = torch.constant.int 128
    %28768 = torch.prim.ListConstruct %int4_26248, %2482, %int1_26249, %int128_26250 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %28769 = torch.aten.view %28721, %28768 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_26251 = torch.constant.int 131072
    %none_26252 = torch.constant.none
    %none_26253 = torch.constant.none
    %cpu_26254 = torch.constant.device "cpu"
    %false_26255 = torch.constant.bool false
    %28770 = torch.aten.arange %int131072_26251, %none_26252, %none_26253, %cpu_26254, %false_26255 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_26256 = torch.constant.int 0
    %int128_26257 = torch.constant.int 128
    %int2_26258 = torch.constant.int 2
    %none_26259 = torch.constant.none
    %none_26260 = torch.constant.none
    %cpu_26261 = torch.constant.device "cpu"
    %false_26262 = torch.constant.bool false
    %28771 = torch.aten.arange.start_step %int0_26256, %int128_26257, %int2_26258, %none_26259, %none_26260, %cpu_26261, %false_26262 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_26263 = torch.constant.int 0
    %int0_26264 = torch.constant.int 0
    %int64_26265 = torch.constant.int 64
    %int1_26266 = torch.constant.int 1
    %28772 = torch.aten.slice.Tensor %28771, %int0_26263, %int0_26264, %int64_26265, %int1_26266 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_26267 = torch.constant.int 6
    %28773 = torch.prims.convert_element_type %28772, %int6_26267 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_26268 = torch.constant.int 128
    %28774 = torch.aten.div.Scalar %28773, %int128_26268 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_26269 = torch.constant.float 5.000000e+05
    %28775 = torch.aten.pow.Scalar %float5.000000e05_26269, %28774 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %28776 = torch.aten.reciprocal %28775 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_26270 = torch.constant.float 1.000000e+00
    %28777 = torch.aten.mul.Scalar %28776, %float1.000000e00_26270 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_26271 = torch.constant.int 131072
    %int1_26272 = torch.constant.int 1
    %28778 = torch.prim.ListConstruct %int131072_26271, %int1_26272 : (!torch.int, !torch.int) -> !torch.list<int>
    %28779 = torch.aten.view %28770, %28778 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %28780 = torch.aten.mul.Tensor %28779, %28777 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %28781 = torch.aten.cos %28780 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %28782 = torch.aten.sin %28780 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %28783 = torch.aten.complex %28781, %28782 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %28784 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28785 = flow.tensor.transfer %28784 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %28786 = torch_c.from_builtin_tensor %28785 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28787 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28788 = flow.tensor.transfer %28787 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %28789 = torch_c.from_builtin_tensor %28788 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28790 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28791 = flow.tensor.transfer %28790 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %28792 = torch_c.from_builtin_tensor %28791 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28793 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28794 = flow.tensor.transfer %28793 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %28795 = torch_c.from_builtin_tensor %28794 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28796 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28797 = flow.tensor.transfer %28796 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %28798 = torch_c.from_builtin_tensor %28797 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28799 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28800 = flow.tensor.transfer %28799 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %28801 = torch_c.from_builtin_tensor %28800 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28802 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28803 = flow.tensor.transfer %28802 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %28804 = torch_c.from_builtin_tensor %28803 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28805 = torch_c.to_builtin_tensor %28783 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28806 = flow.tensor.transfer %28805 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %28807 = torch_c.from_builtin_tensor %28806 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_26273 = torch.constant.int 1
    %28808 = torch.aten.size.int %28551, %int1_26273 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26274 = torch.constant.int 0
    %28809 = torch.aten.add.int %int0_26274, %28808 : !torch.int, !torch.int -> !torch.int
    %int0_26275 = torch.constant.int 0
    %int0_26276 = torch.constant.int 0
    %int1_26277 = torch.constant.int 1
    %28810 = torch.aten.slice.Tensor %28786, %int0_26275, %int0_26276, %28809, %int1_26277 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28810, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26278 = torch.constant.int 1
    %int0_26279 = torch.constant.int 0
    %int9223372036854775807_26280 = torch.constant.int 9223372036854775807
    %int1_26281 = torch.constant.int 1
    %28811 = torch.aten.slice.Tensor %28810, %int1_26278, %int0_26279, %int9223372036854775807_26280, %int1_26281 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28811, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26282 = torch.constant.int 0
    %28812 = torch.aten.unsqueeze %28811, %int0_26282 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28812, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26283 = torch.constant.int 2
    %28813 = torch.aten.unsqueeze %28812, %int2_26283 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28813, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26284 = torch.constant.int 3
    %int0_26285 = torch.constant.int 0
    %int9223372036854775807_26286 = torch.constant.int 9223372036854775807
    %int1_26287 = torch.constant.int 1
    %28814 = torch.aten.slice.Tensor %28813, %int3_26284, %int0_26285, %int9223372036854775807_26286, %int1_26287 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28814, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28815 = torch_c.to_builtin_tensor %28723 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26288 = arith.constant 1 : index
    %dim_26289 = tensor.dim %28815, %c1_26288 : tensor<4x?x4x128xf16>
    %28816 = flow.tensor.bitcast %28815 : tensor<4x?x4x128xf16>{%dim_26289} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26289}
    %28817 = torch_c.from_builtin_tensor %28816 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28818 = torch.aten.mul.Tensor %28817, %28814 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28819 = torch_c.to_builtin_tensor %28818 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26290 = arith.constant 1 : index
    %dim_26291 = tensor.dim %28819, %c1_26290 : tensor<4x?x4x64xcomplex<f32>>
    %28820 = flow.tensor.bitcast %28819 : tensor<4x?x4x64xcomplex<f32>>{%dim_26291} -> tensor<4x?x4x128xf32>{%dim_26291}
    %28821 = torch_c.from_builtin_tensor %28820 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26292 = torch.constant.int 5
    %28822 = torch.prims.convert_element_type %28821, %int5_26292 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26293 = torch.constant.int 1
    %28823 = torch.aten.size.int %28557, %int1_26293 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26294 = torch.constant.int 0
    %28824 = torch.aten.add.int %int0_26294, %28823 : !torch.int, !torch.int -> !torch.int
    %int0_26295 = torch.constant.int 0
    %int0_26296 = torch.constant.int 0
    %int1_26297 = torch.constant.int 1
    %28825 = torch.aten.slice.Tensor %28789, %int0_26295, %int0_26296, %28824, %int1_26297 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28825, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26298 = torch.constant.int 1
    %int0_26299 = torch.constant.int 0
    %int9223372036854775807_26300 = torch.constant.int 9223372036854775807
    %int1_26301 = torch.constant.int 1
    %28826 = torch.aten.slice.Tensor %28825, %int1_26298, %int0_26299, %int9223372036854775807_26300, %int1_26301 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28826, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26302 = torch.constant.int 0
    %28827 = torch.aten.unsqueeze %28826, %int0_26302 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28827, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26303 = torch.constant.int 2
    %28828 = torch.aten.unsqueeze %28827, %int2_26303 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28828, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26304 = torch.constant.int 3
    %int0_26305 = torch.constant.int 0
    %int9223372036854775807_26306 = torch.constant.int 9223372036854775807
    %int1_26307 = torch.constant.int 1
    %28829 = torch.aten.slice.Tensor %28828, %int3_26304, %int0_26305, %int9223372036854775807_26306, %int1_26307 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28829, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28830 = torch_c.to_builtin_tensor %28725 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26308 = arith.constant 1 : index
    %dim_26309 = tensor.dim %28830, %c1_26308 : tensor<4x?x4x128xf16>
    %28831 = flow.tensor.bitcast %28830 : tensor<4x?x4x128xf16>{%dim_26309} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26309}
    %28832 = torch_c.from_builtin_tensor %28831 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28833 = torch.aten.mul.Tensor %28832, %28829 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28834 = torch_c.to_builtin_tensor %28833 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26310 = arith.constant 1 : index
    %dim_26311 = tensor.dim %28834, %c1_26310 : tensor<4x?x4x64xcomplex<f32>>
    %28835 = flow.tensor.bitcast %28834 : tensor<4x?x4x64xcomplex<f32>>{%dim_26311} -> tensor<4x?x4x128xf32>{%dim_26311}
    %28836 = torch_c.from_builtin_tensor %28835 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26312 = torch.constant.int 5
    %28837 = torch.prims.convert_element_type %28836, %int5_26312 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26313 = torch.constant.int 1
    %28838 = torch.aten.size.int %28563, %int1_26313 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26314 = torch.constant.int 0
    %28839 = torch.aten.add.int %int0_26314, %28838 : !torch.int, !torch.int -> !torch.int
    %int0_26315 = torch.constant.int 0
    %int0_26316 = torch.constant.int 0
    %int1_26317 = torch.constant.int 1
    %28840 = torch.aten.slice.Tensor %28792, %int0_26315, %int0_26316, %28839, %int1_26317 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28840, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26318 = torch.constant.int 1
    %int0_26319 = torch.constant.int 0
    %int9223372036854775807_26320 = torch.constant.int 9223372036854775807
    %int1_26321 = torch.constant.int 1
    %28841 = torch.aten.slice.Tensor %28840, %int1_26318, %int0_26319, %int9223372036854775807_26320, %int1_26321 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28841, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26322 = torch.constant.int 0
    %28842 = torch.aten.unsqueeze %28841, %int0_26322 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28842, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26323 = torch.constant.int 2
    %28843 = torch.aten.unsqueeze %28842, %int2_26323 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28843, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26324 = torch.constant.int 3
    %int0_26325 = torch.constant.int 0
    %int9223372036854775807_26326 = torch.constant.int 9223372036854775807
    %int1_26327 = torch.constant.int 1
    %28844 = torch.aten.slice.Tensor %28843, %int3_26324, %int0_26325, %int9223372036854775807_26326, %int1_26327 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28844, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28845 = torch_c.to_builtin_tensor %28727 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26328 = arith.constant 1 : index
    %dim_26329 = tensor.dim %28845, %c1_26328 : tensor<4x?x4x128xf16>
    %28846 = flow.tensor.bitcast %28845 : tensor<4x?x4x128xf16>{%dim_26329} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26329}
    %28847 = torch_c.from_builtin_tensor %28846 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28848 = torch.aten.mul.Tensor %28847, %28844 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28849 = torch_c.to_builtin_tensor %28848 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26330 = arith.constant 1 : index
    %dim_26331 = tensor.dim %28849, %c1_26330 : tensor<4x?x4x64xcomplex<f32>>
    %28850 = flow.tensor.bitcast %28849 : tensor<4x?x4x64xcomplex<f32>>{%dim_26331} -> tensor<4x?x4x128xf32>{%dim_26331}
    %28851 = torch_c.from_builtin_tensor %28850 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26332 = torch.constant.int 5
    %28852 = torch.prims.convert_element_type %28851, %int5_26332 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26333 = torch.constant.int 1
    %28853 = torch.aten.size.int %28569, %int1_26333 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26334 = torch.constant.int 0
    %28854 = torch.aten.add.int %int0_26334, %28853 : !torch.int, !torch.int -> !torch.int
    %int0_26335 = torch.constant.int 0
    %int0_26336 = torch.constant.int 0
    %int1_26337 = torch.constant.int 1
    %28855 = torch.aten.slice.Tensor %28795, %int0_26335, %int0_26336, %28854, %int1_26337 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28855, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26338 = torch.constant.int 1
    %int0_26339 = torch.constant.int 0
    %int9223372036854775807_26340 = torch.constant.int 9223372036854775807
    %int1_26341 = torch.constant.int 1
    %28856 = torch.aten.slice.Tensor %28855, %int1_26338, %int0_26339, %int9223372036854775807_26340, %int1_26341 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28856, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26342 = torch.constant.int 0
    %28857 = torch.aten.unsqueeze %28856, %int0_26342 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28857, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26343 = torch.constant.int 2
    %28858 = torch.aten.unsqueeze %28857, %int2_26343 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28858, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26344 = torch.constant.int 3
    %int0_26345 = torch.constant.int 0
    %int9223372036854775807_26346 = torch.constant.int 9223372036854775807
    %int1_26347 = torch.constant.int 1
    %28859 = torch.aten.slice.Tensor %28858, %int3_26344, %int0_26345, %int9223372036854775807_26346, %int1_26347 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28859, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28860 = torch_c.to_builtin_tensor %28729 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26348 = arith.constant 1 : index
    %dim_26349 = tensor.dim %28860, %c1_26348 : tensor<4x?x4x128xf16>
    %28861 = flow.tensor.bitcast %28860 : tensor<4x?x4x128xf16>{%dim_26349} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26349}
    %28862 = torch_c.from_builtin_tensor %28861 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28863 = torch.aten.mul.Tensor %28862, %28859 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28864 = torch_c.to_builtin_tensor %28863 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26350 = arith.constant 1 : index
    %dim_26351 = tensor.dim %28864, %c1_26350 : tensor<4x?x4x64xcomplex<f32>>
    %28865 = flow.tensor.bitcast %28864 : tensor<4x?x4x64xcomplex<f32>>{%dim_26351} -> tensor<4x?x4x128xf32>{%dim_26351}
    %28866 = torch_c.from_builtin_tensor %28865 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26352 = torch.constant.int 5
    %28867 = torch.prims.convert_element_type %28866, %int5_26352 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26353 = torch.constant.int 1
    %28868 = torch.aten.size.int %28575, %int1_26353 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26354 = torch.constant.int 0
    %28869 = torch.aten.add.int %int0_26354, %28868 : !torch.int, !torch.int -> !torch.int
    %int0_26355 = torch.constant.int 0
    %int0_26356 = torch.constant.int 0
    %int1_26357 = torch.constant.int 1
    %28870 = torch.aten.slice.Tensor %28798, %int0_26355, %int0_26356, %28869, %int1_26357 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28870, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26358 = torch.constant.int 1
    %int0_26359 = torch.constant.int 0
    %int9223372036854775807_26360 = torch.constant.int 9223372036854775807
    %int1_26361 = torch.constant.int 1
    %28871 = torch.aten.slice.Tensor %28870, %int1_26358, %int0_26359, %int9223372036854775807_26360, %int1_26361 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28871, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26362 = torch.constant.int 0
    %28872 = torch.aten.unsqueeze %28871, %int0_26362 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28872, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26363 = torch.constant.int 2
    %28873 = torch.aten.unsqueeze %28872, %int2_26363 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28873, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26364 = torch.constant.int 3
    %int0_26365 = torch.constant.int 0
    %int9223372036854775807_26366 = torch.constant.int 9223372036854775807
    %int1_26367 = torch.constant.int 1
    %28874 = torch.aten.slice.Tensor %28873, %int3_26364, %int0_26365, %int9223372036854775807_26366, %int1_26367 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28874, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28875 = torch_c.to_builtin_tensor %28731 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26368 = arith.constant 1 : index
    %dim_26369 = tensor.dim %28875, %c1_26368 : tensor<4x?x4x128xf16>
    %28876 = flow.tensor.bitcast %28875 : tensor<4x?x4x128xf16>{%dim_26369} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26369}
    %28877 = torch_c.from_builtin_tensor %28876 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28878 = torch.aten.mul.Tensor %28877, %28874 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28879 = torch_c.to_builtin_tensor %28878 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26370 = arith.constant 1 : index
    %dim_26371 = tensor.dim %28879, %c1_26370 : tensor<4x?x4x64xcomplex<f32>>
    %28880 = flow.tensor.bitcast %28879 : tensor<4x?x4x64xcomplex<f32>>{%dim_26371} -> tensor<4x?x4x128xf32>{%dim_26371}
    %28881 = torch_c.from_builtin_tensor %28880 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26372 = torch.constant.int 5
    %28882 = torch.prims.convert_element_type %28881, %int5_26372 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26373 = torch.constant.int 1
    %28883 = torch.aten.size.int %28581, %int1_26373 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26374 = torch.constant.int 0
    %28884 = torch.aten.add.int %int0_26374, %28883 : !torch.int, !torch.int -> !torch.int
    %int0_26375 = torch.constant.int 0
    %int0_26376 = torch.constant.int 0
    %int1_26377 = torch.constant.int 1
    %28885 = torch.aten.slice.Tensor %28801, %int0_26375, %int0_26376, %28884, %int1_26377 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28885, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26378 = torch.constant.int 1
    %int0_26379 = torch.constant.int 0
    %int9223372036854775807_26380 = torch.constant.int 9223372036854775807
    %int1_26381 = torch.constant.int 1
    %28886 = torch.aten.slice.Tensor %28885, %int1_26378, %int0_26379, %int9223372036854775807_26380, %int1_26381 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28886, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26382 = torch.constant.int 0
    %28887 = torch.aten.unsqueeze %28886, %int0_26382 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28887, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26383 = torch.constant.int 2
    %28888 = torch.aten.unsqueeze %28887, %int2_26383 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28888, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26384 = torch.constant.int 3
    %int0_26385 = torch.constant.int 0
    %int9223372036854775807_26386 = torch.constant.int 9223372036854775807
    %int1_26387 = torch.constant.int 1
    %28889 = torch.aten.slice.Tensor %28888, %int3_26384, %int0_26385, %int9223372036854775807_26386, %int1_26387 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28889, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28890 = torch_c.to_builtin_tensor %28733 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26388 = arith.constant 1 : index
    %dim_26389 = tensor.dim %28890, %c1_26388 : tensor<4x?x4x128xf16>
    %28891 = flow.tensor.bitcast %28890 : tensor<4x?x4x128xf16>{%dim_26389} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26389}
    %28892 = torch_c.from_builtin_tensor %28891 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28893 = torch.aten.mul.Tensor %28892, %28889 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28894 = torch_c.to_builtin_tensor %28893 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26390 = arith.constant 1 : index
    %dim_26391 = tensor.dim %28894, %c1_26390 : tensor<4x?x4x64xcomplex<f32>>
    %28895 = flow.tensor.bitcast %28894 : tensor<4x?x4x64xcomplex<f32>>{%dim_26391} -> tensor<4x?x4x128xf32>{%dim_26391}
    %28896 = torch_c.from_builtin_tensor %28895 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26392 = torch.constant.int 5
    %28897 = torch.prims.convert_element_type %28896, %int5_26392 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26393 = torch.constant.int 1
    %28898 = torch.aten.size.int %28587, %int1_26393 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26394 = torch.constant.int 0
    %28899 = torch.aten.add.int %int0_26394, %28898 : !torch.int, !torch.int -> !torch.int
    %int0_26395 = torch.constant.int 0
    %int0_26396 = torch.constant.int 0
    %int1_26397 = torch.constant.int 1
    %28900 = torch.aten.slice.Tensor %28804, %int0_26395, %int0_26396, %28899, %int1_26397 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28900, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26398 = torch.constant.int 1
    %int0_26399 = torch.constant.int 0
    %int9223372036854775807_26400 = torch.constant.int 9223372036854775807
    %int1_26401 = torch.constant.int 1
    %28901 = torch.aten.slice.Tensor %28900, %int1_26398, %int0_26399, %int9223372036854775807_26400, %int1_26401 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28901, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26402 = torch.constant.int 0
    %28902 = torch.aten.unsqueeze %28901, %int0_26402 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28902, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26403 = torch.constant.int 2
    %28903 = torch.aten.unsqueeze %28902, %int2_26403 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28903, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26404 = torch.constant.int 3
    %int0_26405 = torch.constant.int 0
    %int9223372036854775807_26406 = torch.constant.int 9223372036854775807
    %int1_26407 = torch.constant.int 1
    %28904 = torch.aten.slice.Tensor %28903, %int3_26404, %int0_26405, %int9223372036854775807_26406, %int1_26407 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28904, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28905 = torch_c.to_builtin_tensor %28735 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26408 = arith.constant 1 : index
    %dim_26409 = tensor.dim %28905, %c1_26408 : tensor<4x?x4x128xf16>
    %28906 = flow.tensor.bitcast %28905 : tensor<4x?x4x128xf16>{%dim_26409} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26409}
    %28907 = torch_c.from_builtin_tensor %28906 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28908 = torch.aten.mul.Tensor %28907, %28904 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28909 = torch_c.to_builtin_tensor %28908 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26410 = arith.constant 1 : index
    %dim_26411 = tensor.dim %28909, %c1_26410 : tensor<4x?x4x64xcomplex<f32>>
    %28910 = flow.tensor.bitcast %28909 : tensor<4x?x4x64xcomplex<f32>>{%dim_26411} -> tensor<4x?x4x128xf32>{%dim_26411}
    %28911 = torch_c.from_builtin_tensor %28910 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26412 = torch.constant.int 5
    %28912 = torch.prims.convert_element_type %28911, %int5_26412 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_26413 = torch.constant.int 1
    %28913 = torch.aten.size.int %28593, %int1_26413 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_26414 = torch.constant.int 0
    %28914 = torch.aten.add.int %int0_26414, %28913 : !torch.int, !torch.int -> !torch.int
    %int0_26415 = torch.constant.int 0
    %int0_26416 = torch.constant.int 0
    %int1_26417 = torch.constant.int 1
    %28915 = torch.aten.slice.Tensor %28807, %int0_26415, %int0_26416, %28914, %int1_26417 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28915, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26418 = torch.constant.int 1
    %int0_26419 = torch.constant.int 0
    %int9223372036854775807_26420 = torch.constant.int 9223372036854775807
    %int1_26421 = torch.constant.int 1
    %28916 = torch.aten.slice.Tensor %28915, %int1_26418, %int0_26419, %int9223372036854775807_26420, %int1_26421 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28916, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26422 = torch.constant.int 0
    %28917 = torch.aten.unsqueeze %28916, %int0_26422 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28917, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26423 = torch.constant.int 2
    %28918 = torch.aten.unsqueeze %28917, %int2_26423 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28918, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26424 = torch.constant.int 3
    %int0_26425 = torch.constant.int 0
    %int9223372036854775807_26426 = torch.constant.int 9223372036854775807
    %int1_26427 = torch.constant.int 1
    %28919 = torch.aten.slice.Tensor %28918, %int3_26424, %int0_26425, %int9223372036854775807_26426, %int1_26427 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28919, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28920 = torch_c.to_builtin_tensor %28737 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_26428 = arith.constant 1 : index
    %dim_26429 = tensor.dim %28920, %c1_26428 : tensor<4x?x4x128xf16>
    %28921 = flow.tensor.bitcast %28920 : tensor<4x?x4x128xf16>{%dim_26429} -> tensor<4x?x4x64xcomplex<f16>>{%dim_26429}
    %28922 = torch_c.from_builtin_tensor %28921 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %28922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %28923 = torch.aten.mul.Tensor %28922, %28919 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %28923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %28924 = torch_c.to_builtin_tensor %28923 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_26430 = arith.constant 1 : index
    %dim_26431 = tensor.dim %28924, %c1_26430 : tensor<4x?x4x64xcomplex<f32>>
    %28925 = flow.tensor.bitcast %28924 : tensor<4x?x4x64xcomplex<f32>>{%dim_26431} -> tensor<4x?x4x128xf32>{%dim_26431}
    %28926 = torch_c.from_builtin_tensor %28925 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %28926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_26432 = torch.constant.int 5
    %28927 = torch.prims.convert_element_type %28926, %int5_26432 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %28927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_26433 = torch.constant.int 131072
    %none_26434 = torch.constant.none
    %none_26435 = torch.constant.none
    %cpu_26436 = torch.constant.device "cpu"
    %false_26437 = torch.constant.bool false
    %28928 = torch.aten.arange %int131072_26433, %none_26434, %none_26435, %cpu_26436, %false_26437 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_26438 = torch.constant.int 0
    %int128_26439 = torch.constant.int 128
    %int2_26440 = torch.constant.int 2
    %none_26441 = torch.constant.none
    %none_26442 = torch.constant.none
    %cpu_26443 = torch.constant.device "cpu"
    %false_26444 = torch.constant.bool false
    %28929 = torch.aten.arange.start_step %int0_26438, %int128_26439, %int2_26440, %none_26441, %none_26442, %cpu_26443, %false_26444 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_26445 = torch.constant.int 0
    %int0_26446 = torch.constant.int 0
    %int64_26447 = torch.constant.int 64
    %int1_26448 = torch.constant.int 1
    %28930 = torch.aten.slice.Tensor %28929, %int0_26445, %int0_26446, %int64_26447, %int1_26448 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_26449 = torch.constant.int 6
    %28931 = torch.prims.convert_element_type %28930, %int6_26449 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_26450 = torch.constant.int 128
    %28932 = torch.aten.div.Scalar %28931, %int128_26450 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_26451 = torch.constant.float 5.000000e+05
    %28933 = torch.aten.pow.Scalar %float5.000000e05_26451, %28932 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %28934 = torch.aten.reciprocal %28933 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_26452 = torch.constant.float 1.000000e+00
    %28935 = torch.aten.mul.Scalar %28934, %float1.000000e00_26452 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_26453 = torch.constant.int 131072
    %int1_26454 = torch.constant.int 1
    %28936 = torch.prim.ListConstruct %int131072_26453, %int1_26454 : (!torch.int, !torch.int) -> !torch.list<int>
    %28937 = torch.aten.view %28928, %28936 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %28938 = torch.aten.mul.Tensor %28937, %28935 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %28939 = torch.aten.cos %28938 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %28940 = torch.aten.sin %28938 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %28941 = torch.aten.complex %28939, %28940 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %28942 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28943 = flow.tensor.transfer %28942 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %28944 = torch_c.from_builtin_tensor %28943 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28945 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28946 = flow.tensor.transfer %28945 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %28947 = torch_c.from_builtin_tensor %28946 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28948 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28949 = flow.tensor.transfer %28948 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %28950 = torch_c.from_builtin_tensor %28949 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28951 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28952 = flow.tensor.transfer %28951 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %28953 = torch_c.from_builtin_tensor %28952 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28954 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28955 = flow.tensor.transfer %28954 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %28956 = torch_c.from_builtin_tensor %28955 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28957 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28958 = flow.tensor.transfer %28957 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %28959 = torch_c.from_builtin_tensor %28958 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28960 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28961 = flow.tensor.transfer %28960 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %28962 = torch_c.from_builtin_tensor %28961 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %28963 = torch_c.to_builtin_tensor %28941 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %28964 = flow.tensor.transfer %28963 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %28965 = torch_c.from_builtin_tensor %28964 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_26455 = torch.constant.int 1
    %28966 = torch.aten.size.int %28615, %int1_26455 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26456 = torch.constant.int 0
    %28967 = torch.aten.add.int %int0_26456, %28966 : !torch.int, !torch.int -> !torch.int
    %int0_26457 = torch.constant.int 0
    %int0_26458 = torch.constant.int 0
    %int1_26459 = torch.constant.int 1
    %28968 = torch.aten.slice.Tensor %28944, %int0_26457, %int0_26458, %28967, %int1_26459 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28968, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26460 = torch.constant.int 1
    %int0_26461 = torch.constant.int 0
    %int9223372036854775807_26462 = torch.constant.int 9223372036854775807
    %int1_26463 = torch.constant.int 1
    %28969 = torch.aten.slice.Tensor %28968, %int1_26460, %int0_26461, %int9223372036854775807_26462, %int1_26463 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28969, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26464 = torch.constant.int 0
    %28970 = torch.aten.unsqueeze %28969, %int0_26464 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28970, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26465 = torch.constant.int 2
    %28971 = torch.aten.unsqueeze %28970, %int2_26465 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28971, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26466 = torch.constant.int 3
    %int0_26467 = torch.constant.int 0
    %int9223372036854775807_26468 = torch.constant.int 9223372036854775807
    %int1_26469 = torch.constant.int 1
    %28972 = torch.aten.slice.Tensor %28971, %int3_26466, %int0_26467, %int9223372036854775807_26468, %int1_26469 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28972, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28973 = torch_c.to_builtin_tensor %28739 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26470 = arith.constant 1 : index
    %dim_26471 = tensor.dim %28973, %c1_26470 : tensor<4x?x1x128xf16>
    %28974 = flow.tensor.bitcast %28973 : tensor<4x?x1x128xf16>{%dim_26471} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26471}
    %28975 = torch_c.from_builtin_tensor %28974 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %28975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %28976 = torch.aten.mul.Tensor %28975, %28972 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %28977 = torch_c.to_builtin_tensor %28976 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26472 = arith.constant 1 : index
    %dim_26473 = tensor.dim %28977, %c1_26472 : tensor<4x?x1x64xcomplex<f32>>
    %28978 = flow.tensor.bitcast %28977 : tensor<4x?x1x64xcomplex<f32>>{%dim_26473} -> tensor<4x?x1x128xf32>{%dim_26473}
    %28979 = torch_c.from_builtin_tensor %28978 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %28979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26474 = torch.constant.int 5
    %28980 = torch.prims.convert_element_type %28979, %int5_26474 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26475 = torch.constant.int 1
    %28981 = torch.aten.size.int %28621, %int1_26475 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26476 = torch.constant.int 0
    %28982 = torch.aten.add.int %int0_26476, %28981 : !torch.int, !torch.int -> !torch.int
    %int0_26477 = torch.constant.int 0
    %int0_26478 = torch.constant.int 0
    %int1_26479 = torch.constant.int 1
    %28983 = torch.aten.slice.Tensor %28947, %int0_26477, %int0_26478, %28982, %int1_26479 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28983, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26480 = torch.constant.int 1
    %int0_26481 = torch.constant.int 0
    %int9223372036854775807_26482 = torch.constant.int 9223372036854775807
    %int1_26483 = torch.constant.int 1
    %28984 = torch.aten.slice.Tensor %28983, %int1_26480, %int0_26481, %int9223372036854775807_26482, %int1_26483 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28984, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26484 = torch.constant.int 0
    %28985 = torch.aten.unsqueeze %28984, %int0_26484 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %28985, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26485 = torch.constant.int 2
    %28986 = torch.aten.unsqueeze %28985, %int2_26485 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28986, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26486 = torch.constant.int 3
    %int0_26487 = torch.constant.int 0
    %int9223372036854775807_26488 = torch.constant.int 9223372036854775807
    %int1_26489 = torch.constant.int 1
    %28987 = torch.aten.slice.Tensor %28986, %int3_26486, %int0_26487, %int9223372036854775807_26488, %int1_26489 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28987, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %28988 = torch_c.to_builtin_tensor %28741 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26490 = arith.constant 1 : index
    %dim_26491 = tensor.dim %28988, %c1_26490 : tensor<4x?x1x128xf16>
    %28989 = flow.tensor.bitcast %28988 : tensor<4x?x1x128xf16>{%dim_26491} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26491}
    %28990 = torch_c.from_builtin_tensor %28989 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %28990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %28991 = torch.aten.mul.Tensor %28990, %28987 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %28991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %28992 = torch_c.to_builtin_tensor %28991 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26492 = arith.constant 1 : index
    %dim_26493 = tensor.dim %28992, %c1_26492 : tensor<4x?x1x64xcomplex<f32>>
    %28993 = flow.tensor.bitcast %28992 : tensor<4x?x1x64xcomplex<f32>>{%dim_26493} -> tensor<4x?x1x128xf32>{%dim_26493}
    %28994 = torch_c.from_builtin_tensor %28993 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %28994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26494 = torch.constant.int 5
    %28995 = torch.prims.convert_element_type %28994, %int5_26494 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %28995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26495 = torch.constant.int 1
    %28996 = torch.aten.size.int %28627, %int1_26495 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26496 = torch.constant.int 0
    %28997 = torch.aten.add.int %int0_26496, %28996 : !torch.int, !torch.int -> !torch.int
    %int0_26497 = torch.constant.int 0
    %int0_26498 = torch.constant.int 0
    %int1_26499 = torch.constant.int 1
    %28998 = torch.aten.slice.Tensor %28950, %int0_26497, %int0_26498, %28997, %int1_26499 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28998, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26500 = torch.constant.int 1
    %int0_26501 = torch.constant.int 0
    %int9223372036854775807_26502 = torch.constant.int 9223372036854775807
    %int1_26503 = torch.constant.int 1
    %28999 = torch.aten.slice.Tensor %28998, %int1_26500, %int0_26501, %int9223372036854775807_26502, %int1_26503 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %28999, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26504 = torch.constant.int 0
    %29000 = torch.aten.unsqueeze %28999, %int0_26504 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29000, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26505 = torch.constant.int 2
    %29001 = torch.aten.unsqueeze %29000, %int2_26505 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29001, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26506 = torch.constant.int 3
    %int0_26507 = torch.constant.int 0
    %int9223372036854775807_26508 = torch.constant.int 9223372036854775807
    %int1_26509 = torch.constant.int 1
    %29002 = torch.aten.slice.Tensor %29001, %int3_26506, %int0_26507, %int9223372036854775807_26508, %int1_26509 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29002, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29003 = torch_c.to_builtin_tensor %28743 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26510 = arith.constant 1 : index
    %dim_26511 = tensor.dim %29003, %c1_26510 : tensor<4x?x1x128xf16>
    %29004 = flow.tensor.bitcast %29003 : tensor<4x?x1x128xf16>{%dim_26511} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26511}
    %29005 = torch_c.from_builtin_tensor %29004 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29006 = torch.aten.mul.Tensor %29005, %29002 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29007 = torch_c.to_builtin_tensor %29006 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26512 = arith.constant 1 : index
    %dim_26513 = tensor.dim %29007, %c1_26512 : tensor<4x?x1x64xcomplex<f32>>
    %29008 = flow.tensor.bitcast %29007 : tensor<4x?x1x64xcomplex<f32>>{%dim_26513} -> tensor<4x?x1x128xf32>{%dim_26513}
    %29009 = torch_c.from_builtin_tensor %29008 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26514 = torch.constant.int 5
    %29010 = torch.prims.convert_element_type %29009, %int5_26514 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26515 = torch.constant.int 1
    %29011 = torch.aten.size.int %28633, %int1_26515 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26516 = torch.constant.int 0
    %29012 = torch.aten.add.int %int0_26516, %29011 : !torch.int, !torch.int -> !torch.int
    %int0_26517 = torch.constant.int 0
    %int0_26518 = torch.constant.int 0
    %int1_26519 = torch.constant.int 1
    %29013 = torch.aten.slice.Tensor %28953, %int0_26517, %int0_26518, %29012, %int1_26519 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29013, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26520 = torch.constant.int 1
    %int0_26521 = torch.constant.int 0
    %int9223372036854775807_26522 = torch.constant.int 9223372036854775807
    %int1_26523 = torch.constant.int 1
    %29014 = torch.aten.slice.Tensor %29013, %int1_26520, %int0_26521, %int9223372036854775807_26522, %int1_26523 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29014, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26524 = torch.constant.int 0
    %29015 = torch.aten.unsqueeze %29014, %int0_26524 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29015, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26525 = torch.constant.int 2
    %29016 = torch.aten.unsqueeze %29015, %int2_26525 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29016, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26526 = torch.constant.int 3
    %int0_26527 = torch.constant.int 0
    %int9223372036854775807_26528 = torch.constant.int 9223372036854775807
    %int1_26529 = torch.constant.int 1
    %29017 = torch.aten.slice.Tensor %29016, %int3_26526, %int0_26527, %int9223372036854775807_26528, %int1_26529 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29017, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29018 = torch_c.to_builtin_tensor %28745 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26530 = arith.constant 1 : index
    %dim_26531 = tensor.dim %29018, %c1_26530 : tensor<4x?x1x128xf16>
    %29019 = flow.tensor.bitcast %29018 : tensor<4x?x1x128xf16>{%dim_26531} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26531}
    %29020 = torch_c.from_builtin_tensor %29019 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29021 = torch.aten.mul.Tensor %29020, %29017 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29022 = torch_c.to_builtin_tensor %29021 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26532 = arith.constant 1 : index
    %dim_26533 = tensor.dim %29022, %c1_26532 : tensor<4x?x1x64xcomplex<f32>>
    %29023 = flow.tensor.bitcast %29022 : tensor<4x?x1x64xcomplex<f32>>{%dim_26533} -> tensor<4x?x1x128xf32>{%dim_26533}
    %29024 = torch_c.from_builtin_tensor %29023 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26534 = torch.constant.int 5
    %29025 = torch.prims.convert_element_type %29024, %int5_26534 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26535 = torch.constant.int 1
    %29026 = torch.aten.size.int %28639, %int1_26535 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26536 = torch.constant.int 0
    %29027 = torch.aten.add.int %int0_26536, %29026 : !torch.int, !torch.int -> !torch.int
    %int0_26537 = torch.constant.int 0
    %int0_26538 = torch.constant.int 0
    %int1_26539 = torch.constant.int 1
    %29028 = torch.aten.slice.Tensor %28956, %int0_26537, %int0_26538, %29027, %int1_26539 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29028, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26540 = torch.constant.int 1
    %int0_26541 = torch.constant.int 0
    %int9223372036854775807_26542 = torch.constant.int 9223372036854775807
    %int1_26543 = torch.constant.int 1
    %29029 = torch.aten.slice.Tensor %29028, %int1_26540, %int0_26541, %int9223372036854775807_26542, %int1_26543 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29029, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26544 = torch.constant.int 0
    %29030 = torch.aten.unsqueeze %29029, %int0_26544 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29030, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26545 = torch.constant.int 2
    %29031 = torch.aten.unsqueeze %29030, %int2_26545 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29031, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26546 = torch.constant.int 3
    %int0_26547 = torch.constant.int 0
    %int9223372036854775807_26548 = torch.constant.int 9223372036854775807
    %int1_26549 = torch.constant.int 1
    %29032 = torch.aten.slice.Tensor %29031, %int3_26546, %int0_26547, %int9223372036854775807_26548, %int1_26549 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29032, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29033 = torch_c.to_builtin_tensor %28747 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26550 = arith.constant 1 : index
    %dim_26551 = tensor.dim %29033, %c1_26550 : tensor<4x?x1x128xf16>
    %29034 = flow.tensor.bitcast %29033 : tensor<4x?x1x128xf16>{%dim_26551} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26551}
    %29035 = torch_c.from_builtin_tensor %29034 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29036 = torch.aten.mul.Tensor %29035, %29032 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29037 = torch_c.to_builtin_tensor %29036 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26552 = arith.constant 1 : index
    %dim_26553 = tensor.dim %29037, %c1_26552 : tensor<4x?x1x64xcomplex<f32>>
    %29038 = flow.tensor.bitcast %29037 : tensor<4x?x1x64xcomplex<f32>>{%dim_26553} -> tensor<4x?x1x128xf32>{%dim_26553}
    %29039 = torch_c.from_builtin_tensor %29038 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26554 = torch.constant.int 5
    %29040 = torch.prims.convert_element_type %29039, %int5_26554 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26555 = torch.constant.int 1
    %29041 = torch.aten.size.int %28645, %int1_26555 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26556 = torch.constant.int 0
    %29042 = torch.aten.add.int %int0_26556, %29041 : !torch.int, !torch.int -> !torch.int
    %int0_26557 = torch.constant.int 0
    %int0_26558 = torch.constant.int 0
    %int1_26559 = torch.constant.int 1
    %29043 = torch.aten.slice.Tensor %28959, %int0_26557, %int0_26558, %29042, %int1_26559 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29043, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26560 = torch.constant.int 1
    %int0_26561 = torch.constant.int 0
    %int9223372036854775807_26562 = torch.constant.int 9223372036854775807
    %int1_26563 = torch.constant.int 1
    %29044 = torch.aten.slice.Tensor %29043, %int1_26560, %int0_26561, %int9223372036854775807_26562, %int1_26563 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29044, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26564 = torch.constant.int 0
    %29045 = torch.aten.unsqueeze %29044, %int0_26564 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29045, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26565 = torch.constant.int 2
    %29046 = torch.aten.unsqueeze %29045, %int2_26565 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29046, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26566 = torch.constant.int 3
    %int0_26567 = torch.constant.int 0
    %int9223372036854775807_26568 = torch.constant.int 9223372036854775807
    %int1_26569 = torch.constant.int 1
    %29047 = torch.aten.slice.Tensor %29046, %int3_26566, %int0_26567, %int9223372036854775807_26568, %int1_26569 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29047, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29048 = torch_c.to_builtin_tensor %28749 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26570 = arith.constant 1 : index
    %dim_26571 = tensor.dim %29048, %c1_26570 : tensor<4x?x1x128xf16>
    %29049 = flow.tensor.bitcast %29048 : tensor<4x?x1x128xf16>{%dim_26571} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26571}
    %29050 = torch_c.from_builtin_tensor %29049 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29051 = torch.aten.mul.Tensor %29050, %29047 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29052 = torch_c.to_builtin_tensor %29051 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26572 = arith.constant 1 : index
    %dim_26573 = tensor.dim %29052, %c1_26572 : tensor<4x?x1x64xcomplex<f32>>
    %29053 = flow.tensor.bitcast %29052 : tensor<4x?x1x64xcomplex<f32>>{%dim_26573} -> tensor<4x?x1x128xf32>{%dim_26573}
    %29054 = torch_c.from_builtin_tensor %29053 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26574 = torch.constant.int 5
    %29055 = torch.prims.convert_element_type %29054, %int5_26574 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26575 = torch.constant.int 1
    %29056 = torch.aten.size.int %28651, %int1_26575 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26576 = torch.constant.int 0
    %29057 = torch.aten.add.int %int0_26576, %29056 : !torch.int, !torch.int -> !torch.int
    %int0_26577 = torch.constant.int 0
    %int0_26578 = torch.constant.int 0
    %int1_26579 = torch.constant.int 1
    %29058 = torch.aten.slice.Tensor %28962, %int0_26577, %int0_26578, %29057, %int1_26579 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29058, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26580 = torch.constant.int 1
    %int0_26581 = torch.constant.int 0
    %int9223372036854775807_26582 = torch.constant.int 9223372036854775807
    %int1_26583 = torch.constant.int 1
    %29059 = torch.aten.slice.Tensor %29058, %int1_26580, %int0_26581, %int9223372036854775807_26582, %int1_26583 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29059, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26584 = torch.constant.int 0
    %29060 = torch.aten.unsqueeze %29059, %int0_26584 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29060, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26585 = torch.constant.int 2
    %29061 = torch.aten.unsqueeze %29060, %int2_26585 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29061, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26586 = torch.constant.int 3
    %int0_26587 = torch.constant.int 0
    %int9223372036854775807_26588 = torch.constant.int 9223372036854775807
    %int1_26589 = torch.constant.int 1
    %29062 = torch.aten.slice.Tensor %29061, %int3_26586, %int0_26587, %int9223372036854775807_26588, %int1_26589 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29062, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29063 = torch_c.to_builtin_tensor %28751 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26590 = arith.constant 1 : index
    %dim_26591 = tensor.dim %29063, %c1_26590 : tensor<4x?x1x128xf16>
    %29064 = flow.tensor.bitcast %29063 : tensor<4x?x1x128xf16>{%dim_26591} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26591}
    %29065 = torch_c.from_builtin_tensor %29064 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29066 = torch.aten.mul.Tensor %29065, %29062 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29067 = torch_c.to_builtin_tensor %29066 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26592 = arith.constant 1 : index
    %dim_26593 = tensor.dim %29067, %c1_26592 : tensor<4x?x1x64xcomplex<f32>>
    %29068 = flow.tensor.bitcast %29067 : tensor<4x?x1x64xcomplex<f32>>{%dim_26593} -> tensor<4x?x1x128xf32>{%dim_26593}
    %29069 = torch_c.from_builtin_tensor %29068 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26594 = torch.constant.int 5
    %29070 = torch.prims.convert_element_type %29069, %int5_26594 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_26595 = torch.constant.int 1
    %29071 = torch.aten.size.int %28657, %int1_26595 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_26596 = torch.constant.int 0
    %29072 = torch.aten.add.int %int0_26596, %29071 : !torch.int, !torch.int -> !torch.int
    %int0_26597 = torch.constant.int 0
    %int0_26598 = torch.constant.int 0
    %int1_26599 = torch.constant.int 1
    %29073 = torch.aten.slice.Tensor %28965, %int0_26597, %int0_26598, %29072, %int1_26599 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29073, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_26600 = torch.constant.int 1
    %int0_26601 = torch.constant.int 0
    %int9223372036854775807_26602 = torch.constant.int 9223372036854775807
    %int1_26603 = torch.constant.int 1
    %29074 = torch.aten.slice.Tensor %29073, %int1_26600, %int0_26601, %int9223372036854775807_26602, %int1_26603 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %29074, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_26604 = torch.constant.int 0
    %29075 = torch.aten.unsqueeze %29074, %int0_26604 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %29075, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_26605 = torch.constant.int 2
    %29076 = torch.aten.unsqueeze %29075, %int2_26605 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29076, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_26606 = torch.constant.int 3
    %int0_26607 = torch.constant.int 0
    %int9223372036854775807_26608 = torch.constant.int 9223372036854775807
    %int1_26609 = torch.constant.int 1
    %29077 = torch.aten.slice.Tensor %29076, %int3_26606, %int0_26607, %int9223372036854775807_26608, %int1_26609 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29077, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %29078 = torch_c.to_builtin_tensor %28753 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_26610 = arith.constant 1 : index
    %dim_26611 = tensor.dim %29078, %c1_26610 : tensor<4x?x1x128xf16>
    %29079 = flow.tensor.bitcast %29078 : tensor<4x?x1x128xf16>{%dim_26611} -> tensor<4x?x1x64xcomplex<f16>>{%dim_26611}
    %29080 = torch_c.from_builtin_tensor %29079 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %29080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %29081 = torch.aten.mul.Tensor %29080, %29077 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %29081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %29082 = torch_c.to_builtin_tensor %29081 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_26612 = arith.constant 1 : index
    %dim_26613 = tensor.dim %29082, %c1_26612 : tensor<4x?x1x64xcomplex<f32>>
    %29083 = flow.tensor.bitcast %29082 : tensor<4x?x1x64xcomplex<f32>>{%dim_26613} -> tensor<4x?x1x128xf32>{%dim_26613}
    %29084 = torch_c.from_builtin_tensor %29083 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %29084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_26614 = torch.constant.int 5
    %29085 = torch.prims.convert_element_type %29084, %int5_26614 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %29085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_26615 = torch.constant.int 64
    %29086 = torch.aten.mul.Scalar %2364, %int64_26615 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29086, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26616 = torch.constant.int 64
    %29087 = torch.aten.mul.Scalar %2367, %int64_26616 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29087, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26617 = torch.constant.int 64
    %29088 = torch.aten.mul.Scalar %2370, %int64_26617 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29088, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26618 = torch.constant.int 64
    %29089 = torch.aten.mul.Scalar %2373, %int64_26618 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29089, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26619 = torch.constant.int 64
    %29090 = torch.aten.mul.Scalar %2376, %int64_26619 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29090, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26620 = torch.constant.int 64
    %29091 = torch.aten.mul.Scalar %2379, %int64_26620 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29091, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26621 = torch.constant.int 64
    %29092 = torch.aten.mul.Scalar %2382, %int64_26621 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29092, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_26622 = torch.constant.int 64
    %29093 = torch.aten.mul.Scalar %2385, %int64_26622 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29093, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28 = torch.constant.int 28
    %int1_26623 = torch.constant.int 1
    %29094 = torch.aten.add.Scalar %29086, %int28, %int1_26623 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29094, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26624 = torch.constant.int 28
    %int1_26625 = torch.constant.int 1
    %29095 = torch.aten.add.Scalar %29087, %int28_26624, %int1_26625 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29095, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26626 = torch.constant.int 28
    %int1_26627 = torch.constant.int 1
    %29096 = torch.aten.add.Scalar %29088, %int28_26626, %int1_26627 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29096, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26628 = torch.constant.int 28
    %int1_26629 = torch.constant.int 1
    %29097 = torch.aten.add.Scalar %29089, %int28_26628, %int1_26629 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29097, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26630 = torch.constant.int 28
    %int1_26631 = torch.constant.int 1
    %29098 = torch.aten.add.Scalar %29090, %int28_26630, %int1_26631 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29098, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26632 = torch.constant.int 28
    %int1_26633 = torch.constant.int 1
    %29099 = torch.aten.add.Scalar %29091, %int28_26632, %int1_26633 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29099, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26634 = torch.constant.int 28
    %int1_26635 = torch.constant.int 1
    %29100 = torch.aten.add.Scalar %29092, %int28_26634, %int1_26635 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29100, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int28_26636 = torch.constant.int 28
    %int1_26637 = torch.constant.int 1
    %29101 = torch.aten.add.Scalar %29093, %int28_26636, %int1_26637 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29101, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_26638 = torch.constant.int 4
    %int16_26639 = torch.constant.int 16
    %int1_26640 = torch.constant.int 1
    %int128_26641 = torch.constant.int 128
    %29102 = torch.prim.ListConstruct %int4_26638, %3095, %int16_26639, %int1_26640, %int128_26641 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29103 = torch.aten.view %28980, %29102 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29103, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26642 = torch.constant.int 4
    %int16_26643 = torch.constant.int 16
    %int1_26644 = torch.constant.int 1
    %int128_26645 = torch.constant.int 128
    %29104 = torch.prim.ListConstruct %int4_26642, %3095, %int16_26643, %int1_26644, %int128_26645 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29105 = torch.aten.view %28995, %29104 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29105, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26646 = torch.constant.int 4
    %int16_26647 = torch.constant.int 16
    %int1_26648 = torch.constant.int 1
    %int128_26649 = torch.constant.int 128
    %29106 = torch.prim.ListConstruct %int4_26646, %3095, %int16_26647, %int1_26648, %int128_26649 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29107 = torch.aten.view %29010, %29106 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29107, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26650 = torch.constant.int 4
    %int16_26651 = torch.constant.int 16
    %int1_26652 = torch.constant.int 1
    %int128_26653 = torch.constant.int 128
    %29108 = torch.prim.ListConstruct %int4_26650, %3095, %int16_26651, %int1_26652, %int128_26653 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29109 = torch.aten.view %29025, %29108 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29109, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26654 = torch.constant.int 4
    %int16_26655 = torch.constant.int 16
    %int1_26656 = torch.constant.int 1
    %int128_26657 = torch.constant.int 128
    %29110 = torch.prim.ListConstruct %int4_26654, %3095, %int16_26655, %int1_26656, %int128_26657 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29111 = torch.aten.view %29040, %29110 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29111, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26658 = torch.constant.int 4
    %int16_26659 = torch.constant.int 16
    %int1_26660 = torch.constant.int 1
    %int128_26661 = torch.constant.int 128
    %29112 = torch.prim.ListConstruct %int4_26658, %3095, %int16_26659, %int1_26660, %int128_26661 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29113 = torch.aten.view %29055, %29112 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29113, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26662 = torch.constant.int 4
    %int16_26663 = torch.constant.int 16
    %int1_26664 = torch.constant.int 1
    %int128_26665 = torch.constant.int 128
    %29114 = torch.prim.ListConstruct %int4_26662, %3095, %int16_26663, %int1_26664, %int128_26665 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29115 = torch.aten.view %29070, %29114 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29115, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26666 = torch.constant.int 4
    %int16_26667 = torch.constant.int 16
    %int1_26668 = torch.constant.int 1
    %int128_26669 = torch.constant.int 128
    %29116 = torch.prim.ListConstruct %int4_26666, %3095, %int16_26667, %int1_26668, %int128_26669 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29117 = torch.aten.view %29085, %29116 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29117, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26670 = torch.constant.int 4
    %29118 = torch.aten.mul.int %int4_26670, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26671 = torch.constant.int 16
    %int1_26672 = torch.constant.int 1
    %int128_26673 = torch.constant.int 128
    %29119 = torch.prim.ListConstruct %29118, %int16_26671, %int1_26672, %int128_26673 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29120 = torch.aten.view %29103, %29119 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29120, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26674 = torch.constant.int 4
    %29121 = torch.aten.mul.int %int4_26674, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26675 = torch.constant.int 16
    %int1_26676 = torch.constant.int 1
    %int128_26677 = torch.constant.int 128
    %29122 = torch.prim.ListConstruct %29121, %int16_26675, %int1_26676, %int128_26677 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29123 = torch.aten.view %29105, %29122 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29123, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26678 = torch.constant.int 4
    %29124 = torch.aten.mul.int %int4_26678, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26679 = torch.constant.int 16
    %int1_26680 = torch.constant.int 1
    %int128_26681 = torch.constant.int 128
    %29125 = torch.prim.ListConstruct %29124, %int16_26679, %int1_26680, %int128_26681 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29126 = torch.aten.view %29107, %29125 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29126, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26682 = torch.constant.int 4
    %29127 = torch.aten.mul.int %int4_26682, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26683 = torch.constant.int 16
    %int1_26684 = torch.constant.int 1
    %int128_26685 = torch.constant.int 128
    %29128 = torch.prim.ListConstruct %29127, %int16_26683, %int1_26684, %int128_26685 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29129 = torch.aten.view %29109, %29128 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29129, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26686 = torch.constant.int 4
    %29130 = torch.aten.mul.int %int4_26686, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26687 = torch.constant.int 16
    %int1_26688 = torch.constant.int 1
    %int128_26689 = torch.constant.int 128
    %29131 = torch.prim.ListConstruct %29130, %int16_26687, %int1_26688, %int128_26689 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29132 = torch.aten.view %29111, %29131 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29132, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26690 = torch.constant.int 4
    %29133 = torch.aten.mul.int %int4_26690, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26691 = torch.constant.int 16
    %int1_26692 = torch.constant.int 1
    %int128_26693 = torch.constant.int 128
    %29134 = torch.prim.ListConstruct %29133, %int16_26691, %int1_26692, %int128_26693 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29135 = torch.aten.view %29113, %29134 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29135, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26694 = torch.constant.int 4
    %29136 = torch.aten.mul.int %int4_26694, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26695 = torch.constant.int 16
    %int1_26696 = torch.constant.int 1
    %int128_26697 = torch.constant.int 128
    %29137 = torch.prim.ListConstruct %29136, %int16_26695, %int1_26696, %int128_26697 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29138 = torch.aten.view %29115, %29137 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29138, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26698 = torch.constant.int 4
    %29139 = torch.aten.mul.int %int4_26698, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26699 = torch.constant.int 16
    %int1_26700 = torch.constant.int 1
    %int128_26701 = torch.constant.int 128
    %29140 = torch.prim.ListConstruct %29139, %int16_26699, %int1_26700, %int128_26701 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29141 = torch.aten.view %29117, %29140 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29141, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26702 = torch.constant.int 4
    %29142 = torch.aten.mul.int %int4_26702, %3095 : !torch.int, !torch.int -> !torch.int
    %29143 = torch.prim.ListConstruct %29142 : (!torch.int) -> !torch.list<int>
    %29144 = torch.aten.view %29094, %29143 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29144, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26703 = torch.constant.int 4
    %29145 = torch.aten.mul.int %int4_26703, %3095 : !torch.int, !torch.int -> !torch.int
    %29146 = torch.prim.ListConstruct %29145 : (!torch.int) -> !torch.list<int>
    %29147 = torch.aten.view %29095, %29146 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29147, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26704 = torch.constant.int 4
    %29148 = torch.aten.mul.int %int4_26704, %3095 : !torch.int, !torch.int -> !torch.int
    %29149 = torch.prim.ListConstruct %29148 : (!torch.int) -> !torch.list<int>
    %29150 = torch.aten.view %29096, %29149 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29150, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26705 = torch.constant.int 4
    %29151 = torch.aten.mul.int %int4_26705, %3095 : !torch.int, !torch.int -> !torch.int
    %29152 = torch.prim.ListConstruct %29151 : (!torch.int) -> !torch.list<int>
    %29153 = torch.aten.view %29097, %29152 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29153, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26706 = torch.constant.int 4
    %29154 = torch.aten.mul.int %int4_26706, %3095 : !torch.int, !torch.int -> !torch.int
    %29155 = torch.prim.ListConstruct %29154 : (!torch.int) -> !torch.list<int>
    %29156 = torch.aten.view %29098, %29155 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29156, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26707 = torch.constant.int 4
    %29157 = torch.aten.mul.int %int4_26707, %3095 : !torch.int, !torch.int -> !torch.int
    %29158 = torch.prim.ListConstruct %29157 : (!torch.int) -> !torch.list<int>
    %29159 = torch.aten.view %29099, %29158 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29159, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26708 = torch.constant.int 4
    %29160 = torch.aten.mul.int %int4_26708, %3095 : !torch.int, !torch.int -> !torch.int
    %29161 = torch.prim.ListConstruct %29160 : (!torch.int) -> !torch.list<int>
    %29162 = torch.aten.view %29100, %29161 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29162, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26709 = torch.constant.int 4
    %29163 = torch.aten.mul.int %int4_26709, %3095 : !torch.int, !torch.int -> !torch.int
    %29164 = torch.prim.ListConstruct %29163 : (!torch.int) -> !torch.list<int>
    %29165 = torch.aten.view %29101, %29164 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29165, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26710 = torch.constant.int 4
    %int16_26711 = torch.constant.int 16
    %int1_26712 = torch.constant.int 1
    %int128_26713 = torch.constant.int 128
    %29166 = torch.prim.ListConstruct %int4_26710, %3095, %int16_26711, %int1_26712, %int128_26713 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29167 = torch.aten.view %28755, %29166 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29167, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26714 = torch.constant.int 4
    %int16_26715 = torch.constant.int 16
    %int1_26716 = torch.constant.int 1
    %int128_26717 = torch.constant.int 128
    %29168 = torch.prim.ListConstruct %int4_26714, %3095, %int16_26715, %int1_26716, %int128_26717 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29169 = torch.aten.view %28757, %29168 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29169, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26718 = torch.constant.int 4
    %int16_26719 = torch.constant.int 16
    %int1_26720 = torch.constant.int 1
    %int128_26721 = torch.constant.int 128
    %29170 = torch.prim.ListConstruct %int4_26718, %3095, %int16_26719, %int1_26720, %int128_26721 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29171 = torch.aten.view %28759, %29170 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29171, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26722 = torch.constant.int 4
    %int16_26723 = torch.constant.int 16
    %int1_26724 = torch.constant.int 1
    %int128_26725 = torch.constant.int 128
    %29172 = torch.prim.ListConstruct %int4_26722, %3095, %int16_26723, %int1_26724, %int128_26725 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29173 = torch.aten.view %28761, %29172 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29173, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26726 = torch.constant.int 4
    %int16_26727 = torch.constant.int 16
    %int1_26728 = torch.constant.int 1
    %int128_26729 = torch.constant.int 128
    %29174 = torch.prim.ListConstruct %int4_26726, %3095, %int16_26727, %int1_26728, %int128_26729 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29175 = torch.aten.view %28763, %29174 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29175, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26730 = torch.constant.int 4
    %int16_26731 = torch.constant.int 16
    %int1_26732 = torch.constant.int 1
    %int128_26733 = torch.constant.int 128
    %29176 = torch.prim.ListConstruct %int4_26730, %3095, %int16_26731, %int1_26732, %int128_26733 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29177 = torch.aten.view %28765, %29176 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29177, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26734 = torch.constant.int 4
    %int16_26735 = torch.constant.int 16
    %int1_26736 = torch.constant.int 1
    %int128_26737 = torch.constant.int 128
    %29178 = torch.prim.ListConstruct %int4_26734, %3095, %int16_26735, %int1_26736, %int128_26737 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29179 = torch.aten.view %28767, %29178 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29179, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26738 = torch.constant.int 4
    %int16_26739 = torch.constant.int 16
    %int1_26740 = torch.constant.int 1
    %int128_26741 = torch.constant.int 128
    %29180 = torch.prim.ListConstruct %int4_26738, %3095, %int16_26739, %int1_26740, %int128_26741 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29181 = torch.aten.view %28769, %29180 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %29181, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_26742 = torch.constant.int 4
    %29182 = torch.aten.mul.int %int4_26742, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26743 = torch.constant.int 16
    %int1_26744 = torch.constant.int 1
    %int128_26745 = torch.constant.int 128
    %29183 = torch.prim.ListConstruct %29182, %int16_26743, %int1_26744, %int128_26745 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29184 = torch.aten.view %29167, %29183 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29184, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26746 = torch.constant.int 4
    %29185 = torch.aten.mul.int %int4_26746, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26747 = torch.constant.int 16
    %int1_26748 = torch.constant.int 1
    %int128_26749 = torch.constant.int 128
    %29186 = torch.prim.ListConstruct %29185, %int16_26747, %int1_26748, %int128_26749 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29187 = torch.aten.view %29169, %29186 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29187, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26750 = torch.constant.int 4
    %29188 = torch.aten.mul.int %int4_26750, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26751 = torch.constant.int 16
    %int1_26752 = torch.constant.int 1
    %int128_26753 = torch.constant.int 128
    %29189 = torch.prim.ListConstruct %29188, %int16_26751, %int1_26752, %int128_26753 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29190 = torch.aten.view %29171, %29189 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29190, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26754 = torch.constant.int 4
    %29191 = torch.aten.mul.int %int4_26754, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26755 = torch.constant.int 16
    %int1_26756 = torch.constant.int 1
    %int128_26757 = torch.constant.int 128
    %29192 = torch.prim.ListConstruct %29191, %int16_26755, %int1_26756, %int128_26757 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29193 = torch.aten.view %29173, %29192 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29193, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26758 = torch.constant.int 4
    %29194 = torch.aten.mul.int %int4_26758, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26759 = torch.constant.int 16
    %int1_26760 = torch.constant.int 1
    %int128_26761 = torch.constant.int 128
    %29195 = torch.prim.ListConstruct %29194, %int16_26759, %int1_26760, %int128_26761 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29196 = torch.aten.view %29175, %29195 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29196, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26762 = torch.constant.int 4
    %29197 = torch.aten.mul.int %int4_26762, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26763 = torch.constant.int 16
    %int1_26764 = torch.constant.int 1
    %int128_26765 = torch.constant.int 128
    %29198 = torch.prim.ListConstruct %29197, %int16_26763, %int1_26764, %int128_26765 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29199 = torch.aten.view %29177, %29198 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29199, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26766 = torch.constant.int 4
    %29200 = torch.aten.mul.int %int4_26766, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26767 = torch.constant.int 16
    %int1_26768 = torch.constant.int 1
    %int128_26769 = torch.constant.int 128
    %29201 = torch.prim.ListConstruct %29200, %int16_26767, %int1_26768, %int128_26769 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29202 = torch.aten.view %29179, %29201 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29202, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_26770 = torch.constant.int 4
    %29203 = torch.aten.mul.int %int4_26770, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_26771 = torch.constant.int 16
    %int1_26772 = torch.constant.int 1
    %int128_26773 = torch.constant.int 128
    %29204 = torch.prim.ListConstruct %29203, %int16_26771, %int1_26772, %int128_26773 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29205 = torch.aten.view %29181, %29204 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29205, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_26774 = torch.constant.int 1
    %int1_26775 = torch.constant.int 1
    %29206 = torch.aten.add.Scalar %29094, %int1_26774, %int1_26775 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29206, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26776 = torch.constant.int 1
    %int1_26777 = torch.constant.int 1
    %29207 = torch.aten.add.Scalar %29095, %int1_26776, %int1_26777 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29207, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26778 = torch.constant.int 1
    %int1_26779 = torch.constant.int 1
    %29208 = torch.aten.add.Scalar %29096, %int1_26778, %int1_26779 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29208, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26780 = torch.constant.int 1
    %int1_26781 = torch.constant.int 1
    %29209 = torch.aten.add.Scalar %29097, %int1_26780, %int1_26781 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29209, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26782 = torch.constant.int 1
    %int1_26783 = torch.constant.int 1
    %29210 = torch.aten.add.Scalar %29098, %int1_26782, %int1_26783 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29210, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26784 = torch.constant.int 1
    %int1_26785 = torch.constant.int 1
    %29211 = torch.aten.add.Scalar %29099, %int1_26784, %int1_26785 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29211, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26786 = torch.constant.int 1
    %int1_26787 = torch.constant.int 1
    %29212 = torch.aten.add.Scalar %29100, %int1_26786, %int1_26787 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29212, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_26788 = torch.constant.int 1
    %int1_26789 = torch.constant.int 1
    %29213 = torch.aten.add.Scalar %29101, %int1_26788, %int1_26789 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %29213, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_26790 = torch.constant.int 4
    %29214 = torch.aten.mul.int %int4_26790, %3095 : !torch.int, !torch.int -> !torch.int
    %29215 = torch.prim.ListConstruct %29214 : (!torch.int) -> !torch.list<int>
    %29216 = torch.aten.view %29206, %29215 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29216, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26791 = torch.constant.int 4
    %29217 = torch.aten.mul.int %int4_26791, %3095 : !torch.int, !torch.int -> !torch.int
    %29218 = torch.prim.ListConstruct %29217 : (!torch.int) -> !torch.list<int>
    %29219 = torch.aten.view %29207, %29218 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29219, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26792 = torch.constant.int 4
    %29220 = torch.aten.mul.int %int4_26792, %3095 : !torch.int, !torch.int -> !torch.int
    %29221 = torch.prim.ListConstruct %29220 : (!torch.int) -> !torch.list<int>
    %29222 = torch.aten.view %29208, %29221 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29222, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26793 = torch.constant.int 4
    %29223 = torch.aten.mul.int %int4_26793, %3095 : !torch.int, !torch.int -> !torch.int
    %29224 = torch.prim.ListConstruct %29223 : (!torch.int) -> !torch.list<int>
    %29225 = torch.aten.view %29209, %29224 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29225, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26794 = torch.constant.int 4
    %29226 = torch.aten.mul.int %int4_26794, %3095 : !torch.int, !torch.int -> !torch.int
    %29227 = torch.prim.ListConstruct %29226 : (!torch.int) -> !torch.list<int>
    %29228 = torch.aten.view %29210, %29227 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29228, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26795 = torch.constant.int 4
    %29229 = torch.aten.mul.int %int4_26795, %3095 : !torch.int, !torch.int -> !torch.int
    %29230 = torch.prim.ListConstruct %29229 : (!torch.int) -> !torch.list<int>
    %29231 = torch.aten.view %29211, %29230 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29231, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26796 = torch.constant.int 4
    %29232 = torch.aten.mul.int %int4_26796, %3095 : !torch.int, !torch.int -> !torch.int
    %29233 = torch.prim.ListConstruct %29232 : (!torch.int) -> !torch.list<int>
    %29234 = torch.aten.view %29212, %29233 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29234, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_26797 = torch.constant.int 4
    %29235 = torch.aten.mul.int %int4_26797, %3095 : !torch.int, !torch.int -> !torch.int
    %29236 = torch.prim.ListConstruct %29235 : (!torch.int) -> !torch.list<int>
    %29237 = torch.aten.view %29213, %29236 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29237, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %29238 = torch.prim.ListConstruct %29144, %29216 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26798 = torch.constant.int 0
    %29239 = torch.aten.cat %29238, %int0_26798 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29239, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29240 = torch.prim.ListConstruct %29147, %29219 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26799 = torch.constant.int 0
    %29241 = torch.aten.cat %29240, %int0_26799 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29241, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29242 = torch.prim.ListConstruct %29150, %29222 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26800 = torch.constant.int 0
    %29243 = torch.aten.cat %29242, %int0_26800 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29243, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29244 = torch.prim.ListConstruct %29153, %29225 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26801 = torch.constant.int 0
    %29245 = torch.aten.cat %29244, %int0_26801 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29245, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29246 = torch.prim.ListConstruct %29156, %29228 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26802 = torch.constant.int 0
    %29247 = torch.aten.cat %29246, %int0_26802 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29247, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29248 = torch.prim.ListConstruct %29159, %29231 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26803 = torch.constant.int 0
    %29249 = torch.aten.cat %29248, %int0_26803 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29249, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29250 = torch.prim.ListConstruct %29162, %29234 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26804 = torch.constant.int 0
    %29251 = torch.aten.cat %29250, %int0_26804 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29251, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29252 = torch.prim.ListConstruct %29165, %29237 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_26805 = torch.constant.int 0
    %29253 = torch.aten.cat %29252, %int0_26805 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %29253, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %29254 = torch.prim.ListConstruct %29120, %29184 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26806 = torch.constant.int 0
    %29255 = torch.aten.cat %29254, %int0_26806 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29255, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29256 = torch.prim.ListConstruct %29123, %29187 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26807 = torch.constant.int 0
    %29257 = torch.aten.cat %29256, %int0_26807 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29257, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29258 = torch.prim.ListConstruct %29126, %29190 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26808 = torch.constant.int 0
    %29259 = torch.aten.cat %29258, %int0_26808 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29259, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29260 = torch.prim.ListConstruct %29129, %29193 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26809 = torch.constant.int 0
    %29261 = torch.aten.cat %29260, %int0_26809 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29261, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29262 = torch.prim.ListConstruct %29132, %29196 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26810 = torch.constant.int 0
    %29263 = torch.aten.cat %29262, %int0_26810 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29263, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29264 = torch.prim.ListConstruct %29135, %29199 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26811 = torch.constant.int 0
    %29265 = torch.aten.cat %29264, %int0_26811 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29265, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29266 = torch.prim.ListConstruct %29138, %29202 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26812 = torch.constant.int 0
    %29267 = torch.aten.cat %29266, %int0_26812 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29267, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29268 = torch.prim.ListConstruct %29141, %29205 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_26813 = torch.constant.int 0
    %29269 = torch.aten.cat %29268, %int0_26813 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29269, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26814 = torch.constant.int 32
    %int2_26815 = torch.constant.int 2
    %int16_26816 = torch.constant.int 16
    %int1_26817 = torch.constant.int 1
    %int128_26818 = torch.constant.int 128
    %29270 = torch.prim.ListConstruct %3023, %int32_26814, %int2_26815, %int16_26816, %int1_26817, %int128_26818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29271 = torch.aten.view %27420, %29270 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29271, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26819 = torch.constant.int 32
    %29272 = torch.aten.mul.int %3023, %int32_26819 : !torch.int, !torch.int -> !torch.int
    %int2_26820 = torch.constant.int 2
    %29273 = torch.aten.mul.int %29272, %int2_26820 : !torch.int, !torch.int -> !torch.int
    %int16_26821 = torch.constant.int 16
    %int1_26822 = torch.constant.int 1
    %int128_26823 = torch.constant.int 128
    %29274 = torch.prim.ListConstruct %29273, %int16_26821, %int1_26822, %int128_26823 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29275 = torch.aten.view %29271, %29274 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29275, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29276 = torch.prim.ListConstruct %29239 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26824 = torch.constant.bool false
    %29277 = torch.aten.index_put %29275, %29276, %29255, %false_26824 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29277, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26825 = torch.constant.int 32
    %int2_26826 = torch.constant.int 2
    %int16_26827 = torch.constant.int 16
    %int1_26828 = torch.constant.int 1
    %int128_26829 = torch.constant.int 128
    %29278 = torch.prim.ListConstruct %3023, %int32_26825, %int2_26826, %int16_26827, %int1_26828, %int128_26829 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29279 = torch.aten.view %29277, %29278 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29279, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26830 = torch.constant.int 131072
    %29280 = torch.prim.ListConstruct %3023, %int131072_26830 : (!torch.int, !torch.int) -> !torch.list<int>
    %29281 = torch.aten.view %29279, %29280 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29281, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26831 = torch.constant.int 32
    %int2_26832 = torch.constant.int 2
    %int16_26833 = torch.constant.int 16
    %int1_26834 = torch.constant.int 1
    %int128_26835 = torch.constant.int 128
    %29282 = torch.prim.ListConstruct %3026, %int32_26831, %int2_26832, %int16_26833, %int1_26834, %int128_26835 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29283 = torch.aten.view %27432, %29282 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29283, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26836 = torch.constant.int 32
    %29284 = torch.aten.mul.int %3026, %int32_26836 : !torch.int, !torch.int -> !torch.int
    %int2_26837 = torch.constant.int 2
    %29285 = torch.aten.mul.int %29284, %int2_26837 : !torch.int, !torch.int -> !torch.int
    %int16_26838 = torch.constant.int 16
    %int1_26839 = torch.constant.int 1
    %int128_26840 = torch.constant.int 128
    %29286 = torch.prim.ListConstruct %29285, %int16_26838, %int1_26839, %int128_26840 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29287 = torch.aten.view %29283, %29286 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29287, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29288 = torch.prim.ListConstruct %29241 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26841 = torch.constant.bool false
    %29289 = torch.aten.index_put %29287, %29288, %29257, %false_26841 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29289, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26842 = torch.constant.int 32
    %int2_26843 = torch.constant.int 2
    %int16_26844 = torch.constant.int 16
    %int1_26845 = torch.constant.int 1
    %int128_26846 = torch.constant.int 128
    %29290 = torch.prim.ListConstruct %3026, %int32_26842, %int2_26843, %int16_26844, %int1_26845, %int128_26846 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29291 = torch.aten.view %29289, %29290 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29291, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26847 = torch.constant.int 131072
    %29292 = torch.prim.ListConstruct %3026, %int131072_26847 : (!torch.int, !torch.int) -> !torch.list<int>
    %29293 = torch.aten.view %29291, %29292 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29293, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26848 = torch.constant.int 32
    %int2_26849 = torch.constant.int 2
    %int16_26850 = torch.constant.int 16
    %int1_26851 = torch.constant.int 1
    %int128_26852 = torch.constant.int 128
    %29294 = torch.prim.ListConstruct %3029, %int32_26848, %int2_26849, %int16_26850, %int1_26851, %int128_26852 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29295 = torch.aten.view %27444, %29294 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29295, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26853 = torch.constant.int 32
    %29296 = torch.aten.mul.int %3029, %int32_26853 : !torch.int, !torch.int -> !torch.int
    %int2_26854 = torch.constant.int 2
    %29297 = torch.aten.mul.int %29296, %int2_26854 : !torch.int, !torch.int -> !torch.int
    %int16_26855 = torch.constant.int 16
    %int1_26856 = torch.constant.int 1
    %int128_26857 = torch.constant.int 128
    %29298 = torch.prim.ListConstruct %29297, %int16_26855, %int1_26856, %int128_26857 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29299 = torch.aten.view %29295, %29298 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29299, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29300 = torch.prim.ListConstruct %29243 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26858 = torch.constant.bool false
    %29301 = torch.aten.index_put %29299, %29300, %29259, %false_26858 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29301, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26859 = torch.constant.int 32
    %int2_26860 = torch.constant.int 2
    %int16_26861 = torch.constant.int 16
    %int1_26862 = torch.constant.int 1
    %int128_26863 = torch.constant.int 128
    %29302 = torch.prim.ListConstruct %3029, %int32_26859, %int2_26860, %int16_26861, %int1_26862, %int128_26863 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29303 = torch.aten.view %29301, %29302 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29303, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26864 = torch.constant.int 131072
    %29304 = torch.prim.ListConstruct %3029, %int131072_26864 : (!torch.int, !torch.int) -> !torch.list<int>
    %29305 = torch.aten.view %29303, %29304 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29305, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26865 = torch.constant.int 32
    %int2_26866 = torch.constant.int 2
    %int16_26867 = torch.constant.int 16
    %int1_26868 = torch.constant.int 1
    %int128_26869 = torch.constant.int 128
    %29306 = torch.prim.ListConstruct %3032, %int32_26865, %int2_26866, %int16_26867, %int1_26868, %int128_26869 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29307 = torch.aten.view %27456, %29306 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29307, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26870 = torch.constant.int 32
    %29308 = torch.aten.mul.int %3032, %int32_26870 : !torch.int, !torch.int -> !torch.int
    %int2_26871 = torch.constant.int 2
    %29309 = torch.aten.mul.int %29308, %int2_26871 : !torch.int, !torch.int -> !torch.int
    %int16_26872 = torch.constant.int 16
    %int1_26873 = torch.constant.int 1
    %int128_26874 = torch.constant.int 128
    %29310 = torch.prim.ListConstruct %29309, %int16_26872, %int1_26873, %int128_26874 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29311 = torch.aten.view %29307, %29310 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29311, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29312 = torch.prim.ListConstruct %29245 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26875 = torch.constant.bool false
    %29313 = torch.aten.index_put %29311, %29312, %29261, %false_26875 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29313, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26876 = torch.constant.int 32
    %int2_26877 = torch.constant.int 2
    %int16_26878 = torch.constant.int 16
    %int1_26879 = torch.constant.int 1
    %int128_26880 = torch.constant.int 128
    %29314 = torch.prim.ListConstruct %3032, %int32_26876, %int2_26877, %int16_26878, %int1_26879, %int128_26880 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29315 = torch.aten.view %29313, %29314 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29315, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26881 = torch.constant.int 131072
    %29316 = torch.prim.ListConstruct %3032, %int131072_26881 : (!torch.int, !torch.int) -> !torch.list<int>
    %29317 = torch.aten.view %29315, %29316 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29317, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26882 = torch.constant.int 32
    %int2_26883 = torch.constant.int 2
    %int16_26884 = torch.constant.int 16
    %int1_26885 = torch.constant.int 1
    %int128_26886 = torch.constant.int 128
    %29318 = torch.prim.ListConstruct %3035, %int32_26882, %int2_26883, %int16_26884, %int1_26885, %int128_26886 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29319 = torch.aten.view %27468, %29318 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29319, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26887 = torch.constant.int 32
    %29320 = torch.aten.mul.int %3035, %int32_26887 : !torch.int, !torch.int -> !torch.int
    %int2_26888 = torch.constant.int 2
    %29321 = torch.aten.mul.int %29320, %int2_26888 : !torch.int, !torch.int -> !torch.int
    %int16_26889 = torch.constant.int 16
    %int1_26890 = torch.constant.int 1
    %int128_26891 = torch.constant.int 128
    %29322 = torch.prim.ListConstruct %29321, %int16_26889, %int1_26890, %int128_26891 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29323 = torch.aten.view %29319, %29322 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29323, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29324 = torch.prim.ListConstruct %29247 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26892 = torch.constant.bool false
    %29325 = torch.aten.index_put %29323, %29324, %29263, %false_26892 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29325, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26893 = torch.constant.int 32
    %int2_26894 = torch.constant.int 2
    %int16_26895 = torch.constant.int 16
    %int1_26896 = torch.constant.int 1
    %int128_26897 = torch.constant.int 128
    %29326 = torch.prim.ListConstruct %3035, %int32_26893, %int2_26894, %int16_26895, %int1_26896, %int128_26897 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29327 = torch.aten.view %29325, %29326 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29327, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26898 = torch.constant.int 131072
    %29328 = torch.prim.ListConstruct %3035, %int131072_26898 : (!torch.int, !torch.int) -> !torch.list<int>
    %29329 = torch.aten.view %29327, %29328 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29329, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26899 = torch.constant.int 32
    %int2_26900 = torch.constant.int 2
    %int16_26901 = torch.constant.int 16
    %int1_26902 = torch.constant.int 1
    %int128_26903 = torch.constant.int 128
    %29330 = torch.prim.ListConstruct %3038, %int32_26899, %int2_26900, %int16_26901, %int1_26902, %int128_26903 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29331 = torch.aten.view %27480, %29330 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29331, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26904 = torch.constant.int 32
    %29332 = torch.aten.mul.int %3038, %int32_26904 : !torch.int, !torch.int -> !torch.int
    %int2_26905 = torch.constant.int 2
    %29333 = torch.aten.mul.int %29332, %int2_26905 : !torch.int, !torch.int -> !torch.int
    %int16_26906 = torch.constant.int 16
    %int1_26907 = torch.constant.int 1
    %int128_26908 = torch.constant.int 128
    %29334 = torch.prim.ListConstruct %29333, %int16_26906, %int1_26907, %int128_26908 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29335 = torch.aten.view %29331, %29334 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29335, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29336 = torch.prim.ListConstruct %29249 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26909 = torch.constant.bool false
    %29337 = torch.aten.index_put %29335, %29336, %29265, %false_26909 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29337, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26910 = torch.constant.int 32
    %int2_26911 = torch.constant.int 2
    %int16_26912 = torch.constant.int 16
    %int1_26913 = torch.constant.int 1
    %int128_26914 = torch.constant.int 128
    %29338 = torch.prim.ListConstruct %3038, %int32_26910, %int2_26911, %int16_26912, %int1_26913, %int128_26914 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29339 = torch.aten.view %29337, %29338 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29339, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26915 = torch.constant.int 131072
    %29340 = torch.prim.ListConstruct %3038, %int131072_26915 : (!torch.int, !torch.int) -> !torch.list<int>
    %29341 = torch.aten.view %29339, %29340 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29341, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26916 = torch.constant.int 32
    %int2_26917 = torch.constant.int 2
    %int16_26918 = torch.constant.int 16
    %int1_26919 = torch.constant.int 1
    %int128_26920 = torch.constant.int 128
    %29342 = torch.prim.ListConstruct %3041, %int32_26916, %int2_26917, %int16_26918, %int1_26919, %int128_26920 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29343 = torch.aten.view %27492, %29342 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29343, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26921 = torch.constant.int 32
    %29344 = torch.aten.mul.int %3041, %int32_26921 : !torch.int, !torch.int -> !torch.int
    %int2_26922 = torch.constant.int 2
    %29345 = torch.aten.mul.int %29344, %int2_26922 : !torch.int, !torch.int -> !torch.int
    %int16_26923 = torch.constant.int 16
    %int1_26924 = torch.constant.int 1
    %int128_26925 = torch.constant.int 128
    %29346 = torch.prim.ListConstruct %29345, %int16_26923, %int1_26924, %int128_26925 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29347 = torch.aten.view %29343, %29346 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29347, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29348 = torch.prim.ListConstruct %29251 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26926 = torch.constant.bool false
    %29349 = torch.aten.index_put %29347, %29348, %29267, %false_26926 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29349, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26927 = torch.constant.int 32
    %int2_26928 = torch.constant.int 2
    %int16_26929 = torch.constant.int 16
    %int1_26930 = torch.constant.int 1
    %int128_26931 = torch.constant.int 128
    %29350 = torch.prim.ListConstruct %3041, %int32_26927, %int2_26928, %int16_26929, %int1_26930, %int128_26931 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29351 = torch.aten.view %29349, %29350 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29351, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26932 = torch.constant.int 131072
    %29352 = torch.prim.ListConstruct %3041, %int131072_26932 : (!torch.int, !torch.int) -> !torch.list<int>
    %29353 = torch.aten.view %29351, %29352 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29353, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_26933 = torch.constant.int 32
    %int2_26934 = torch.constant.int 2
    %int16_26935 = torch.constant.int 16
    %int1_26936 = torch.constant.int 1
    %int128_26937 = torch.constant.int 128
    %29354 = torch.prim.ListConstruct %3044, %int32_26933, %int2_26934, %int16_26935, %int1_26936, %int128_26937 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29355 = torch.aten.view %27504, %29354 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29355, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_26938 = torch.constant.int 32
    %29356 = torch.aten.mul.int %3044, %int32_26938 : !torch.int, !torch.int -> !torch.int
    %int2_26939 = torch.constant.int 2
    %29357 = torch.aten.mul.int %29356, %int2_26939 : !torch.int, !torch.int -> !torch.int
    %int16_26940 = torch.constant.int 16
    %int1_26941 = torch.constant.int 1
    %int128_26942 = torch.constant.int 128
    %29358 = torch.prim.ListConstruct %29357, %int16_26940, %int1_26941, %int128_26942 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29359 = torch.aten.view %29355, %29358 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29359, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %29360 = torch.prim.ListConstruct %29253 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_26943 = torch.constant.bool false
    %29361 = torch.aten.index_put %29359, %29360, %29269, %false_26943 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %29361, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_26944 = torch.constant.int 32
    %int2_26945 = torch.constant.int 2
    %int16_26946 = torch.constant.int 16
    %int1_26947 = torch.constant.int 1
    %int128_26948 = torch.constant.int 128
    %29362 = torch.prim.ListConstruct %3044, %int32_26944, %int2_26945, %int16_26946, %int1_26947, %int128_26948 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29363 = torch.aten.view %29361, %29362 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %29363, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_26949 = torch.constant.int 131072
    %29364 = torch.prim.ListConstruct %3044, %int131072_26949 : (!torch.int, !torch.int) -> !torch.list<int>
    %29365 = torch.aten.view %29363, %29364 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %29365, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_26950 = torch.constant.int -2
    %29366 = torch.aten.unsqueeze %28980, %int-2_26950 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26951 = torch.constant.int -2
    %29367 = torch.aten.unsqueeze %28995, %int-2_26951 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26952 = torch.constant.int -2
    %29368 = torch.aten.unsqueeze %29010, %int-2_26952 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26953 = torch.constant.int -2
    %29369 = torch.aten.unsqueeze %29025, %int-2_26953 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26954 = torch.constant.int -2
    %29370 = torch.aten.unsqueeze %29040, %int-2_26954 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26955 = torch.constant.int -2
    %29371 = torch.aten.unsqueeze %29055, %int-2_26955 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26956 = torch.constant.int -2
    %29372 = torch.aten.unsqueeze %29070, %int-2_26956 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_26957 = torch.constant.int -2
    %29373 = torch.aten.unsqueeze %29085, %int-2_26957 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_26958 = torch.constant.int 4
    %int1_26959 = torch.constant.int 1
    %int4_26960 = torch.constant.int 4
    %int128_26961 = torch.constant.int 128
    %29374 = torch.prim.ListConstruct %int4_26958, %28966, %int1_26959, %int4_26960, %int128_26961 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26962 = torch.constant.bool false
    %29375 = torch.aten.expand %29366, %29374, %false_26962 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26963 = torch.constant.int 4
    %int1_26964 = torch.constant.int 1
    %int4_26965 = torch.constant.int 4
    %int128_26966 = torch.constant.int 128
    %29376 = torch.prim.ListConstruct %int4_26963, %28966, %int1_26964, %int4_26965, %int128_26966 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26967 = torch.constant.bool false
    %29377 = torch.aten.expand %29367, %29376, %false_26967 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26968 = torch.constant.int 4
    %int1_26969 = torch.constant.int 1
    %int4_26970 = torch.constant.int 4
    %int128_26971 = torch.constant.int 128
    %29378 = torch.prim.ListConstruct %int4_26968, %28966, %int1_26969, %int4_26970, %int128_26971 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26972 = torch.constant.bool false
    %29379 = torch.aten.expand %29368, %29378, %false_26972 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26973 = torch.constant.int 4
    %int1_26974 = torch.constant.int 1
    %int4_26975 = torch.constant.int 4
    %int128_26976 = torch.constant.int 128
    %29380 = torch.prim.ListConstruct %int4_26973, %28966, %int1_26974, %int4_26975, %int128_26976 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26977 = torch.constant.bool false
    %29381 = torch.aten.expand %29369, %29380, %false_26977 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26978 = torch.constant.int 4
    %int1_26979 = torch.constant.int 1
    %int4_26980 = torch.constant.int 4
    %int128_26981 = torch.constant.int 128
    %29382 = torch.prim.ListConstruct %int4_26978, %28966, %int1_26979, %int4_26980, %int128_26981 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26982 = torch.constant.bool false
    %29383 = torch.aten.expand %29370, %29382, %false_26982 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26983 = torch.constant.int 4
    %int1_26984 = torch.constant.int 1
    %int4_26985 = torch.constant.int 4
    %int128_26986 = torch.constant.int 128
    %29384 = torch.prim.ListConstruct %int4_26983, %28966, %int1_26984, %int4_26985, %int128_26986 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26987 = torch.constant.bool false
    %29385 = torch.aten.expand %29371, %29384, %false_26987 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26988 = torch.constant.int 4
    %int1_26989 = torch.constant.int 1
    %int4_26990 = torch.constant.int 4
    %int128_26991 = torch.constant.int 128
    %29386 = torch.prim.ListConstruct %int4_26988, %28966, %int1_26989, %int4_26990, %int128_26991 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26992 = torch.constant.bool false
    %29387 = torch.aten.expand %29372, %29386, %false_26992 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26993 = torch.constant.int 4
    %int1_26994 = torch.constant.int 1
    %int4_26995 = torch.constant.int 4
    %int128_26996 = torch.constant.int 128
    %29388 = torch.prim.ListConstruct %int4_26993, %28966, %int1_26994, %int4_26995, %int128_26996 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_26997 = torch.constant.bool false
    %29389 = torch.aten.expand %29373, %29388, %false_26997 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_26998 = torch.constant.int 4
    %int4_26999 = torch.constant.int 4
    %int128_27000 = torch.constant.int 128
    %29390 = torch.prim.ListConstruct %int4_26998, %28966, %int4_26999, %int128_27000 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29391 = torch.aten.view %29375, %29390 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27001 = torch.constant.int 4
    %int4_27002 = torch.constant.int 4
    %int128_27003 = torch.constant.int 128
    %29392 = torch.prim.ListConstruct %int4_27001, %28966, %int4_27002, %int128_27003 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29393 = torch.aten.view %29377, %29392 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27004 = torch.constant.int 4
    %int4_27005 = torch.constant.int 4
    %int128_27006 = torch.constant.int 128
    %29394 = torch.prim.ListConstruct %int4_27004, %28966, %int4_27005, %int128_27006 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29395 = torch.aten.view %29379, %29394 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27007 = torch.constant.int 4
    %int4_27008 = torch.constant.int 4
    %int128_27009 = torch.constant.int 128
    %29396 = torch.prim.ListConstruct %int4_27007, %28966, %int4_27008, %int128_27009 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29397 = torch.aten.view %29381, %29396 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27010 = torch.constant.int 4
    %int4_27011 = torch.constant.int 4
    %int128_27012 = torch.constant.int 128
    %29398 = torch.prim.ListConstruct %int4_27010, %28966, %int4_27011, %int128_27012 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29399 = torch.aten.view %29383, %29398 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27013 = torch.constant.int 4
    %int4_27014 = torch.constant.int 4
    %int128_27015 = torch.constant.int 128
    %29400 = torch.prim.ListConstruct %int4_27013, %28966, %int4_27014, %int128_27015 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29401 = torch.aten.view %29385, %29400 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27016 = torch.constant.int 4
    %int4_27017 = torch.constant.int 4
    %int128_27018 = torch.constant.int 128
    %29402 = torch.prim.ListConstruct %int4_27016, %28966, %int4_27017, %int128_27018 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29403 = torch.aten.view %29387, %29402 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27019 = torch.constant.int 4
    %int4_27020 = torch.constant.int 4
    %int128_27021 = torch.constant.int 128
    %29404 = torch.prim.ListConstruct %int4_27019, %28966, %int4_27020, %int128_27021 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29405 = torch.aten.view %29389, %29404 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_27022 = torch.constant.int -2
    %29406 = torch.aten.unsqueeze %28755, %int-2_27022 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27023 = torch.constant.int -2
    %29407 = torch.aten.unsqueeze %28757, %int-2_27023 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27024 = torch.constant.int -2
    %29408 = torch.aten.unsqueeze %28759, %int-2_27024 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27025 = torch.constant.int -2
    %29409 = torch.aten.unsqueeze %28761, %int-2_27025 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27026 = torch.constant.int -2
    %29410 = torch.aten.unsqueeze %28763, %int-2_27026 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27027 = torch.constant.int -2
    %29411 = torch.aten.unsqueeze %28765, %int-2_27027 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27028 = torch.constant.int -2
    %29412 = torch.aten.unsqueeze %28767, %int-2_27028 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_27029 = torch.constant.int -2
    %29413 = torch.aten.unsqueeze %28769, %int-2_27029 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %29413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_27030 = torch.constant.int 1
    %29414 = torch.aten.size.int %28679, %int1_27030 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_27031 = torch.constant.int 4
    %int1_27032 = torch.constant.int 1
    %int4_27033 = torch.constant.int 4
    %int128_27034 = torch.constant.int 128
    %29415 = torch.prim.ListConstruct %int4_27031, %29414, %int1_27032, %int4_27033, %int128_27034 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27035 = torch.constant.bool false
    %29416 = torch.aten.expand %29406, %29415, %false_27035 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27036 = torch.constant.int 4
    %int1_27037 = torch.constant.int 1
    %int4_27038 = torch.constant.int 4
    %int128_27039 = torch.constant.int 128
    %29417 = torch.prim.ListConstruct %int4_27036, %29414, %int1_27037, %int4_27038, %int128_27039 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27040 = torch.constant.bool false
    %29418 = torch.aten.expand %29407, %29417, %false_27040 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27041 = torch.constant.int 4
    %int1_27042 = torch.constant.int 1
    %int4_27043 = torch.constant.int 4
    %int128_27044 = torch.constant.int 128
    %29419 = torch.prim.ListConstruct %int4_27041, %29414, %int1_27042, %int4_27043, %int128_27044 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27045 = torch.constant.bool false
    %29420 = torch.aten.expand %29408, %29419, %false_27045 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27046 = torch.constant.int 4
    %int1_27047 = torch.constant.int 1
    %int4_27048 = torch.constant.int 4
    %int128_27049 = torch.constant.int 128
    %29421 = torch.prim.ListConstruct %int4_27046, %29414, %int1_27047, %int4_27048, %int128_27049 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27050 = torch.constant.bool false
    %29422 = torch.aten.expand %29409, %29421, %false_27050 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27051 = torch.constant.int 4
    %int1_27052 = torch.constant.int 1
    %int4_27053 = torch.constant.int 4
    %int128_27054 = torch.constant.int 128
    %29423 = torch.prim.ListConstruct %int4_27051, %29414, %int1_27052, %int4_27053, %int128_27054 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27055 = torch.constant.bool false
    %29424 = torch.aten.expand %29410, %29423, %false_27055 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27056 = torch.constant.int 4
    %int1_27057 = torch.constant.int 1
    %int4_27058 = torch.constant.int 4
    %int128_27059 = torch.constant.int 128
    %29425 = torch.prim.ListConstruct %int4_27056, %29414, %int1_27057, %int4_27058, %int128_27059 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27060 = torch.constant.bool false
    %29426 = torch.aten.expand %29411, %29425, %false_27060 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27061 = torch.constant.int 4
    %int1_27062 = torch.constant.int 1
    %int4_27063 = torch.constant.int 4
    %int128_27064 = torch.constant.int 128
    %29427 = torch.prim.ListConstruct %int4_27061, %29414, %int1_27062, %int4_27063, %int128_27064 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27065 = torch.constant.bool false
    %29428 = torch.aten.expand %29412, %29427, %false_27065 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27066 = torch.constant.int 4
    %int1_27067 = torch.constant.int 1
    %int4_27068 = torch.constant.int 4
    %int128_27069 = torch.constant.int 128
    %29429 = torch.prim.ListConstruct %int4_27066, %29414, %int1_27067, %int4_27068, %int128_27069 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_27070 = torch.constant.bool false
    %29430 = torch.aten.expand %29413, %29429, %false_27070 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %29430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_27071 = torch.constant.int 4
    %int4_27072 = torch.constant.int 4
    %int128_27073 = torch.constant.int 128
    %29431 = torch.prim.ListConstruct %int4_27071, %29414, %int4_27072, %int128_27073 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29432 = torch.aten.view %29416, %29431 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27074 = torch.constant.int 4
    %int4_27075 = torch.constant.int 4
    %int128_27076 = torch.constant.int 128
    %29433 = torch.prim.ListConstruct %int4_27074, %29414, %int4_27075, %int128_27076 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29434 = torch.aten.view %29418, %29433 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27077 = torch.constant.int 4
    %int4_27078 = torch.constant.int 4
    %int128_27079 = torch.constant.int 128
    %29435 = torch.prim.ListConstruct %int4_27077, %29414, %int4_27078, %int128_27079 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29436 = torch.aten.view %29420, %29435 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27080 = torch.constant.int 4
    %int4_27081 = torch.constant.int 4
    %int128_27082 = torch.constant.int 128
    %29437 = torch.prim.ListConstruct %int4_27080, %29414, %int4_27081, %int128_27082 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29438 = torch.aten.view %29422, %29437 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27083 = torch.constant.int 4
    %int4_27084 = torch.constant.int 4
    %int128_27085 = torch.constant.int 128
    %29439 = torch.prim.ListConstruct %int4_27083, %29414, %int4_27084, %int128_27085 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29440 = torch.aten.view %29424, %29439 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27086 = torch.constant.int 4
    %int4_27087 = torch.constant.int 4
    %int128_27088 = torch.constant.int 128
    %29441 = torch.prim.ListConstruct %int4_27086, %29414, %int4_27087, %int128_27088 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29442 = torch.aten.view %29426, %29441 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27089 = torch.constant.int 4
    %int4_27090 = torch.constant.int 4
    %int128_27091 = torch.constant.int 128
    %29443 = torch.prim.ListConstruct %int4_27089, %29414, %int4_27090, %int128_27091 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29444 = torch.aten.view %29428, %29443 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27092 = torch.constant.int 4
    %int4_27093 = torch.constant.int 4
    %int128_27094 = torch.constant.int 128
    %29445 = torch.prim.ListConstruct %int4_27092, %29414, %int4_27093, %int128_27094 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29446 = torch.aten.view %29430, %29445 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27095 = torch.constant.int 1
    %int2_27096 = torch.constant.int 2
    %29447 = torch.aten.transpose.int %28822, %int1_27095, %int2_27096 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29447, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27097 = torch.constant.int 1
    %int2_27098 = torch.constant.int 2
    %29448 = torch.aten.transpose.int %28837, %int1_27097, %int2_27098 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29448, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27099 = torch.constant.int 1
    %int2_27100 = torch.constant.int 2
    %29449 = torch.aten.transpose.int %28852, %int1_27099, %int2_27100 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29449, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27101 = torch.constant.int 1
    %int2_27102 = torch.constant.int 2
    %29450 = torch.aten.transpose.int %28867, %int1_27101, %int2_27102 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29450, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27103 = torch.constant.int 1
    %int2_27104 = torch.constant.int 2
    %29451 = torch.aten.transpose.int %28882, %int1_27103, %int2_27104 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29451, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27105 = torch.constant.int 1
    %int2_27106 = torch.constant.int 2
    %29452 = torch.aten.transpose.int %28897, %int1_27105, %int2_27106 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29452, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27107 = torch.constant.int 1
    %int2_27108 = torch.constant.int 2
    %29453 = torch.aten.transpose.int %28912, %int1_27107, %int2_27108 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29453, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27109 = torch.constant.int 1
    %int2_27110 = torch.constant.int 2
    %29454 = torch.aten.transpose.int %28927, %int1_27109, %int2_27110 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29454, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27111 = torch.constant.int 1
    %int2_27112 = torch.constant.int 2
    %29455 = torch.aten.transpose.int %29391, %int1_27111, %int2_27112 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29455, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27113 = torch.constant.int 1
    %int2_27114 = torch.constant.int 2
    %29456 = torch.aten.transpose.int %29393, %int1_27113, %int2_27114 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29456, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27115 = torch.constant.int 1
    %int2_27116 = torch.constant.int 2
    %29457 = torch.aten.transpose.int %29395, %int1_27115, %int2_27116 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29457, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27117 = torch.constant.int 1
    %int2_27118 = torch.constant.int 2
    %29458 = torch.aten.transpose.int %29397, %int1_27117, %int2_27118 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29458, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27119 = torch.constant.int 1
    %int2_27120 = torch.constant.int 2
    %29459 = torch.aten.transpose.int %29399, %int1_27119, %int2_27120 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29459, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27121 = torch.constant.int 1
    %int2_27122 = torch.constant.int 2
    %29460 = torch.aten.transpose.int %29401, %int1_27121, %int2_27122 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29460, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27123 = torch.constant.int 1
    %int2_27124 = torch.constant.int 2
    %29461 = torch.aten.transpose.int %29403, %int1_27123, %int2_27124 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29461, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27125 = torch.constant.int 1
    %int2_27126 = torch.constant.int 2
    %29462 = torch.aten.transpose.int %29405, %int1_27125, %int2_27126 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29462, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27127 = torch.constant.int 1
    %int2_27128 = torch.constant.int 2
    %29463 = torch.aten.transpose.int %29432, %int1_27127, %int2_27128 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29463, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27129 = torch.constant.int 1
    %int2_27130 = torch.constant.int 2
    %29464 = torch.aten.transpose.int %29434, %int1_27129, %int2_27130 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29464, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27131 = torch.constant.int 1
    %int2_27132 = torch.constant.int 2
    %29465 = torch.aten.transpose.int %29436, %int1_27131, %int2_27132 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29465, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27133 = torch.constant.int 1
    %int2_27134 = torch.constant.int 2
    %29466 = torch.aten.transpose.int %29438, %int1_27133, %int2_27134 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29466, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27135 = torch.constant.int 1
    %int2_27136 = torch.constant.int 2
    %29467 = torch.aten.transpose.int %29440, %int1_27135, %int2_27136 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29467, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27137 = torch.constant.int 1
    %int2_27138 = torch.constant.int 2
    %29468 = torch.aten.transpose.int %29442, %int1_27137, %int2_27138 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29468, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27139 = torch.constant.int 1
    %int2_27140 = torch.constant.int 2
    %29469 = torch.aten.transpose.int %29444, %int1_27139, %int2_27140 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29469, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27141 = torch.constant.int 1
    %int2_27142 = torch.constant.int 2
    %29470 = torch.aten.transpose.int %29446, %int1_27141, %int2_27142 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %29470, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27143 = torch.constant.float 0.000000e+00
    %true_27144 = torch.constant.bool true
    %none_27145 = torch.constant.none
    %none_27146 = torch.constant.none
    %29471:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29447, %29455, %29463, %float0.000000e00_27143, %true_27144, %none_27145, %none_27146) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29471#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27147 = torch.constant.float 0.000000e+00
    %true_27148 = torch.constant.bool true
    %none_27149 = torch.constant.none
    %none_27150 = torch.constant.none
    %29472:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29448, %29456, %29464, %float0.000000e00_27147, %true_27148, %none_27149, %none_27150) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29472#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27151 = torch.constant.float 0.000000e+00
    %true_27152 = torch.constant.bool true
    %none_27153 = torch.constant.none
    %none_27154 = torch.constant.none
    %29473:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29449, %29457, %29465, %float0.000000e00_27151, %true_27152, %none_27153, %none_27154) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29473#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27155 = torch.constant.float 0.000000e+00
    %true_27156 = torch.constant.bool true
    %none_27157 = torch.constant.none
    %none_27158 = torch.constant.none
    %29474:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29450, %29458, %29466, %float0.000000e00_27155, %true_27156, %none_27157, %none_27158) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29474#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27159 = torch.constant.float 0.000000e+00
    %true_27160 = torch.constant.bool true
    %none_27161 = torch.constant.none
    %none_27162 = torch.constant.none
    %29475:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29451, %29459, %29467, %float0.000000e00_27159, %true_27160, %none_27161, %none_27162) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29475#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27163 = torch.constant.float 0.000000e+00
    %true_27164 = torch.constant.bool true
    %none_27165 = torch.constant.none
    %none_27166 = torch.constant.none
    %29476:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29452, %29460, %29468, %float0.000000e00_27163, %true_27164, %none_27165, %none_27166) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29476#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27167 = torch.constant.float 0.000000e+00
    %true_27168 = torch.constant.bool true
    %none_27169 = torch.constant.none
    %none_27170 = torch.constant.none
    %29477:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29453, %29461, %29469, %float0.000000e00_27167, %true_27168, %none_27169, %none_27170) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29477#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_27171 = torch.constant.float 0.000000e+00
    %true_27172 = torch.constant.bool true
    %none_27173 = torch.constant.none
    %none_27174 = torch.constant.none
    %29478:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%29454, %29462, %29470, %float0.000000e00_27171, %true_27172, %none_27173, %none_27174) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %29478#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_27175 = torch.constant.int 1
    %int2_27176 = torch.constant.int 2
    %29479 = torch.aten.transpose.int %29471#0, %int1_27175, %int2_27176 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27177 = torch.constant.int 1
    %int2_27178 = torch.constant.int 2
    %29480 = torch.aten.transpose.int %29472#0, %int1_27177, %int2_27178 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27179 = torch.constant.int 1
    %int2_27180 = torch.constant.int 2
    %29481 = torch.aten.transpose.int %29473#0, %int1_27179, %int2_27180 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27181 = torch.constant.int 1
    %int2_27182 = torch.constant.int 2
    %29482 = torch.aten.transpose.int %29474#0, %int1_27181, %int2_27182 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27183 = torch.constant.int 1
    %int2_27184 = torch.constant.int 2
    %29483 = torch.aten.transpose.int %29475#0, %int1_27183, %int2_27184 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27185 = torch.constant.int 1
    %int2_27186 = torch.constant.int 2
    %29484 = torch.aten.transpose.int %29476#0, %int1_27185, %int2_27186 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27187 = torch.constant.int 1
    %int2_27188 = torch.constant.int 2
    %29485 = torch.aten.transpose.int %29477#0, %int1_27187, %int2_27188 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_27189 = torch.constant.int 1
    %int2_27190 = torch.constant.int 2
    %29486 = torch.aten.transpose.int %29478#0, %int1_27189, %int2_27190 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %29486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_27191 = torch.constant.int 4
    %int512_27192 = torch.constant.int 512
    %29487 = torch.prim.ListConstruct %int4_27191, %28808, %int512_27192 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29488 = torch.aten.view %29479, %29487 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27193 = torch.constant.int 4
    %int512_27194 = torch.constant.int 512
    %29489 = torch.prim.ListConstruct %int4_27193, %28823, %int512_27194 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29490 = torch.aten.view %29480, %29489 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27195 = torch.constant.int 4
    %int512_27196 = torch.constant.int 512
    %29491 = torch.prim.ListConstruct %int4_27195, %28838, %int512_27196 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29492 = torch.aten.view %29481, %29491 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27197 = torch.constant.int 4
    %int512_27198 = torch.constant.int 512
    %29493 = torch.prim.ListConstruct %int4_27197, %28853, %int512_27198 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29494 = torch.aten.view %29482, %29493 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27199 = torch.constant.int 4
    %int512_27200 = torch.constant.int 512
    %29495 = torch.prim.ListConstruct %int4_27199, %28868, %int512_27200 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29496 = torch.aten.view %29483, %29495 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27201 = torch.constant.int 4
    %int512_27202 = torch.constant.int 512
    %29497 = torch.prim.ListConstruct %int4_27201, %28883, %int512_27202 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29498 = torch.aten.view %29484, %29497 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27203 = torch.constant.int 4
    %int512_27204 = torch.constant.int 512
    %29499 = torch.prim.ListConstruct %int4_27203, %28898, %int512_27204 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29500 = torch.aten.view %29485, %29499 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27205 = torch.constant.int 4
    %int512_27206 = torch.constant.int 512
    %29501 = torch.prim.ListConstruct %int4_27205, %28913, %int512_27206 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29502 = torch.aten.view %29486, %29501 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %29502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_27207 = torch.constant.int 1
    %int0_27208 = torch.constant.int 0
    %29503 = torch.prim.ListConstruct %int1_27207, %int0_27208 : (!torch.int, !torch.int) -> !torch.list<int>
    %29504 = torch.aten.permute %1048, %29503 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27209 = torch.constant.int 1
    %int0_27210 = torch.constant.int 0
    %29505 = torch.prim.ListConstruct %int1_27209, %int0_27210 : (!torch.int, !torch.int) -> !torch.list<int>
    %29506 = torch.aten.permute %1049, %29505 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27211 = torch.constant.int 1
    %int0_27212 = torch.constant.int 0
    %29507 = torch.prim.ListConstruct %int1_27211, %int0_27212 : (!torch.int, !torch.int) -> !torch.list<int>
    %29508 = torch.aten.permute %1050, %29507 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27213 = torch.constant.int 1
    %int0_27214 = torch.constant.int 0
    %29509 = torch.prim.ListConstruct %int1_27213, %int0_27214 : (!torch.int, !torch.int) -> !torch.list<int>
    %29510 = torch.aten.permute %1051, %29509 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27215 = torch.constant.int 1
    %int0_27216 = torch.constant.int 0
    %29511 = torch.prim.ListConstruct %int1_27215, %int0_27216 : (!torch.int, !torch.int) -> !torch.list<int>
    %29512 = torch.aten.permute %1052, %29511 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27217 = torch.constant.int 1
    %int0_27218 = torch.constant.int 0
    %29513 = torch.prim.ListConstruct %int1_27217, %int0_27218 : (!torch.int, !torch.int) -> !torch.list<int>
    %29514 = torch.aten.permute %1053, %29513 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27219 = torch.constant.int 1
    %int0_27220 = torch.constant.int 0
    %29515 = torch.prim.ListConstruct %int1_27219, %int0_27220 : (!torch.int, !torch.int) -> !torch.list<int>
    %29516 = torch.aten.permute %1054, %29515 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_27221 = torch.constant.int 1
    %int0_27222 = torch.constant.int 0
    %29517 = torch.prim.ListConstruct %int1_27221, %int0_27222 : (!torch.int, !torch.int) -> !torch.list<int>
    %29518 = torch.aten.permute %1055, %29517 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_27223 = torch.constant.int 4
    %29519 = torch.aten.mul.int %int4_27223, %28808 : !torch.int, !torch.int -> !torch.int
    %int512_27224 = torch.constant.int 512
    %29520 = torch.prim.ListConstruct %29519, %int512_27224 : (!torch.int, !torch.int) -> !torch.list<int>
    %29521 = torch.aten.view %29488, %29520 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29521, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29522 = torch.aten.mm %29521, %29504 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29522, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27225 = torch.constant.int 4
    %int4096_27226 = torch.constant.int 4096
    %29523 = torch.prim.ListConstruct %int4_27225, %28808, %int4096_27226 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29524 = torch.aten.view %29522, %29523 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27227 = torch.constant.int 4
    %29525 = torch.aten.mul.int %int4_27227, %28823 : !torch.int, !torch.int -> !torch.int
    %int512_27228 = torch.constant.int 512
    %29526 = torch.prim.ListConstruct %29525, %int512_27228 : (!torch.int, !torch.int) -> !torch.list<int>
    %29527 = torch.aten.view %29490, %29526 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29527, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29528 = torch.aten.mm %29527, %29506 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29528, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27229 = torch.constant.int 4
    %int4096_27230 = torch.constant.int 4096
    %29529 = torch.prim.ListConstruct %int4_27229, %28823, %int4096_27230 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29530 = torch.aten.view %29528, %29529 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27231 = torch.constant.int 4
    %29531 = torch.aten.mul.int %int4_27231, %28838 : !torch.int, !torch.int -> !torch.int
    %int512_27232 = torch.constant.int 512
    %29532 = torch.prim.ListConstruct %29531, %int512_27232 : (!torch.int, !torch.int) -> !torch.list<int>
    %29533 = torch.aten.view %29492, %29532 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29533, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29534 = torch.aten.mm %29533, %29508 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29534, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27233 = torch.constant.int 4
    %int4096_27234 = torch.constant.int 4096
    %29535 = torch.prim.ListConstruct %int4_27233, %28838, %int4096_27234 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29536 = torch.aten.view %29534, %29535 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27235 = torch.constant.int 4
    %29537 = torch.aten.mul.int %int4_27235, %28853 : !torch.int, !torch.int -> !torch.int
    %int512_27236 = torch.constant.int 512
    %29538 = torch.prim.ListConstruct %29537, %int512_27236 : (!torch.int, !torch.int) -> !torch.list<int>
    %29539 = torch.aten.view %29494, %29538 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29539, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29540 = torch.aten.mm %29539, %29510 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29540, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27237 = torch.constant.int 4
    %int4096_27238 = torch.constant.int 4096
    %29541 = torch.prim.ListConstruct %int4_27237, %28853, %int4096_27238 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29542 = torch.aten.view %29540, %29541 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27239 = torch.constant.int 4
    %29543 = torch.aten.mul.int %int4_27239, %28868 : !torch.int, !torch.int -> !torch.int
    %int512_27240 = torch.constant.int 512
    %29544 = torch.prim.ListConstruct %29543, %int512_27240 : (!torch.int, !torch.int) -> !torch.list<int>
    %29545 = torch.aten.view %29496, %29544 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29545, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29546 = torch.aten.mm %29545, %29512 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29546, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27241 = torch.constant.int 4
    %int4096_27242 = torch.constant.int 4096
    %29547 = torch.prim.ListConstruct %int4_27241, %28868, %int4096_27242 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29548 = torch.aten.view %29546, %29547 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27243 = torch.constant.int 4
    %29549 = torch.aten.mul.int %int4_27243, %28883 : !torch.int, !torch.int -> !torch.int
    %int512_27244 = torch.constant.int 512
    %29550 = torch.prim.ListConstruct %29549, %int512_27244 : (!torch.int, !torch.int) -> !torch.list<int>
    %29551 = torch.aten.view %29498, %29550 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29551, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29552 = torch.aten.mm %29551, %29514 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29552, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27245 = torch.constant.int 4
    %int4096_27246 = torch.constant.int 4096
    %29553 = torch.prim.ListConstruct %int4_27245, %28883, %int4096_27246 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29554 = torch.aten.view %29552, %29553 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27247 = torch.constant.int 4
    %29555 = torch.aten.mul.int %int4_27247, %28898 : !torch.int, !torch.int -> !torch.int
    %int512_27248 = torch.constant.int 512
    %29556 = torch.prim.ListConstruct %29555, %int512_27248 : (!torch.int, !torch.int) -> !torch.list<int>
    %29557 = torch.aten.view %29500, %29556 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29557, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29558 = torch.aten.mm %29557, %29516 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29558, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27249 = torch.constant.int 4
    %int4096_27250 = torch.constant.int 4096
    %29559 = torch.prim.ListConstruct %int4_27249, %28898, %int4096_27250 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29560 = torch.aten.view %29558, %29559 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_27251 = torch.constant.int 4
    %29561 = torch.aten.mul.int %int4_27251, %28913 : !torch.int, !torch.int -> !torch.int
    %int512_27252 = torch.constant.int 512
    %29562 = torch.prim.ListConstruct %29561, %int512_27252 : (!torch.int, !torch.int) -> !torch.list<int>
    %29563 = torch.aten.view %29502, %29562 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %29563, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %29564 = torch.aten.mm %29563, %29518 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29564, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27253 = torch.constant.int 4
    %int4096_27254 = torch.constant.int 4096
    %29565 = torch.prim.ListConstruct %int4_27253, %28913, %int4096_27254 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29566 = torch.aten.view %29564, %29565 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29567 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27255 = arith.constant 1 : index
    %dim_27256 = tensor.dim %29567, %c1_27255 : tensor<4x?x4096xf16>
    %29568 = flow.tensor.transfer %29567 : tensor<4x?x4096xf16>{%dim_27256} to #hal.device.promise<@__device_0>
    %29569 = torch_c.from_builtin_tensor %29568 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29570 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27257 = arith.constant 1 : index
    %dim_27258 = tensor.dim %29570, %c1_27257 : tensor<4x?x4096xf16>
    %29571 = flow.tensor.transfer %29570 : tensor<4x?x4096xf16>{%dim_27258} to #hal.device.promise<@__device_0>
    %29572 = torch_c.from_builtin_tensor %29571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29573 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27259 = arith.constant 1 : index
    %dim_27260 = tensor.dim %29573, %c1_27259 : tensor<4x?x4096xf16>
    %29574 = flow.tensor.transfer %29573 : tensor<4x?x4096xf16>{%dim_27260} to #hal.device.promise<@__device_0>
    %29575 = torch_c.from_builtin_tensor %29574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29576 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27261 = arith.constant 1 : index
    %dim_27262 = tensor.dim %29576, %c1_27261 : tensor<4x?x4096xf16>
    %29577 = flow.tensor.transfer %29576 : tensor<4x?x4096xf16>{%dim_27262} to #hal.device.promise<@__device_0>
    %29578 = torch_c.from_builtin_tensor %29577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29579 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27263 = arith.constant 1 : index
    %dim_27264 = tensor.dim %29579, %c1_27263 : tensor<4x?x4096xf16>
    %29580 = flow.tensor.transfer %29579 : tensor<4x?x4096xf16>{%dim_27264} to #hal.device.promise<@__device_0>
    %29581 = torch_c.from_builtin_tensor %29580 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29582 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27265 = arith.constant 1 : index
    %dim_27266 = tensor.dim %29582, %c1_27265 : tensor<4x?x4096xf16>
    %29583 = flow.tensor.transfer %29582 : tensor<4x?x4096xf16>{%dim_27266} to #hal.device.promise<@__device_0>
    %29584 = torch_c.from_builtin_tensor %29583 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29585 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27267 = arith.constant 1 : index
    %dim_27268 = tensor.dim %29585, %c1_27267 : tensor<4x?x4096xf16>
    %29586 = flow.tensor.transfer %29585 : tensor<4x?x4096xf16>{%dim_27268} to #hal.device.promise<@__device_0>
    %29587 = torch_c.from_builtin_tensor %29586 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27269 = torch.constant.int 1
    %29588 = torch.aten.add.Tensor %29524, %29569, %int1_27269 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27270 = torch.constant.int 1
    %29589 = torch.aten.add.Tensor %29588, %29572, %int1_27270 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27271 = torch.constant.int 1
    %29590 = torch.aten.add.Tensor %29589, %29575, %int1_27271 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27272 = torch.constant.int 1
    %29591 = torch.aten.add.Tensor %29590, %29578, %int1_27272 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27273 = torch.constant.int 1
    %29592 = torch.aten.add.Tensor %29591, %29581, %int1_27273 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27274 = torch.constant.int 1
    %29593 = torch.aten.add.Tensor %29592, %29584, %int1_27274 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27275 = torch.constant.int 1
    %29594 = torch.aten.add.Tensor %29593, %29587, %int1_27275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29595 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27276 = arith.constant 1 : index
    %dim_27277 = tensor.dim %29595, %c1_27276 : tensor<4x?x4096xf16>
    %29596 = flow.tensor.transfer %29595 : tensor<4x?x4096xf16>{%dim_27277} to #hal.device.promise<@__device_1>
    %29597 = torch_c.from_builtin_tensor %29596 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29598 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27278 = arith.constant 1 : index
    %dim_27279 = tensor.dim %29598, %c1_27278 : tensor<4x?x4096xf16>
    %29599 = flow.tensor.transfer %29598 : tensor<4x?x4096xf16>{%dim_27279} to #hal.device.promise<@__device_1>
    %29600 = torch_c.from_builtin_tensor %29599 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29601 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27280 = arith.constant 1 : index
    %dim_27281 = tensor.dim %29601, %c1_27280 : tensor<4x?x4096xf16>
    %29602 = flow.tensor.transfer %29601 : tensor<4x?x4096xf16>{%dim_27281} to #hal.device.promise<@__device_1>
    %29603 = torch_c.from_builtin_tensor %29602 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29604 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27282 = arith.constant 1 : index
    %dim_27283 = tensor.dim %29604, %c1_27282 : tensor<4x?x4096xf16>
    %29605 = flow.tensor.transfer %29604 : tensor<4x?x4096xf16>{%dim_27283} to #hal.device.promise<@__device_1>
    %29606 = torch_c.from_builtin_tensor %29605 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29607 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27284 = arith.constant 1 : index
    %dim_27285 = tensor.dim %29607, %c1_27284 : tensor<4x?x4096xf16>
    %29608 = flow.tensor.transfer %29607 : tensor<4x?x4096xf16>{%dim_27285} to #hal.device.promise<@__device_1>
    %29609 = torch_c.from_builtin_tensor %29608 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29610 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27286 = arith.constant 1 : index
    %dim_27287 = tensor.dim %29610, %c1_27286 : tensor<4x?x4096xf16>
    %29611 = flow.tensor.transfer %29610 : tensor<4x?x4096xf16>{%dim_27287} to #hal.device.promise<@__device_1>
    %29612 = torch_c.from_builtin_tensor %29611 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29613 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27288 = arith.constant 1 : index
    %dim_27289 = tensor.dim %29613, %c1_27288 : tensor<4x?x4096xf16>
    %29614 = flow.tensor.transfer %29613 : tensor<4x?x4096xf16>{%dim_27289} to #hal.device.promise<@__device_1>
    %29615 = torch_c.from_builtin_tensor %29614 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27290 = torch.constant.int 1
    %29616 = torch.aten.add.Tensor %29597, %29530, %int1_27290 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27291 = torch.constant.int 1
    %29617 = torch.aten.add.Tensor %29616, %29600, %int1_27291 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27292 = torch.constant.int 1
    %29618 = torch.aten.add.Tensor %29617, %29603, %int1_27292 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27293 = torch.constant.int 1
    %29619 = torch.aten.add.Tensor %29618, %29606, %int1_27293 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27294 = torch.constant.int 1
    %29620 = torch.aten.add.Tensor %29619, %29609, %int1_27294 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27295 = torch.constant.int 1
    %29621 = torch.aten.add.Tensor %29620, %29612, %int1_27295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27296 = torch.constant.int 1
    %29622 = torch.aten.add.Tensor %29621, %29615, %int1_27296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29623 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27297 = arith.constant 1 : index
    %dim_27298 = tensor.dim %29623, %c1_27297 : tensor<4x?x4096xf16>
    %29624 = flow.tensor.transfer %29623 : tensor<4x?x4096xf16>{%dim_27298} to #hal.device.promise<@__device_2>
    %29625 = torch_c.from_builtin_tensor %29624 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29626 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27299 = arith.constant 1 : index
    %dim_27300 = tensor.dim %29626, %c1_27299 : tensor<4x?x4096xf16>
    %29627 = flow.tensor.transfer %29626 : tensor<4x?x4096xf16>{%dim_27300} to #hal.device.promise<@__device_2>
    %29628 = torch_c.from_builtin_tensor %29627 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29629 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27301 = arith.constant 1 : index
    %dim_27302 = tensor.dim %29629, %c1_27301 : tensor<4x?x4096xf16>
    %29630 = flow.tensor.transfer %29629 : tensor<4x?x4096xf16>{%dim_27302} to #hal.device.promise<@__device_2>
    %29631 = torch_c.from_builtin_tensor %29630 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29632 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27303 = arith.constant 1 : index
    %dim_27304 = tensor.dim %29632, %c1_27303 : tensor<4x?x4096xf16>
    %29633 = flow.tensor.transfer %29632 : tensor<4x?x4096xf16>{%dim_27304} to #hal.device.promise<@__device_2>
    %29634 = torch_c.from_builtin_tensor %29633 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29635 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27305 = arith.constant 1 : index
    %dim_27306 = tensor.dim %29635, %c1_27305 : tensor<4x?x4096xf16>
    %29636 = flow.tensor.transfer %29635 : tensor<4x?x4096xf16>{%dim_27306} to #hal.device.promise<@__device_2>
    %29637 = torch_c.from_builtin_tensor %29636 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29638 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27307 = arith.constant 1 : index
    %dim_27308 = tensor.dim %29638, %c1_27307 : tensor<4x?x4096xf16>
    %29639 = flow.tensor.transfer %29638 : tensor<4x?x4096xf16>{%dim_27308} to #hal.device.promise<@__device_2>
    %29640 = torch_c.from_builtin_tensor %29639 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29641 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27309 = arith.constant 1 : index
    %dim_27310 = tensor.dim %29641, %c1_27309 : tensor<4x?x4096xf16>
    %29642 = flow.tensor.transfer %29641 : tensor<4x?x4096xf16>{%dim_27310} to #hal.device.promise<@__device_2>
    %29643 = torch_c.from_builtin_tensor %29642 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27311 = torch.constant.int 1
    %29644 = torch.aten.add.Tensor %29625, %29628, %int1_27311 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27312 = torch.constant.int 1
    %29645 = torch.aten.add.Tensor %29644, %29536, %int1_27312 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27313 = torch.constant.int 1
    %29646 = torch.aten.add.Tensor %29645, %29631, %int1_27313 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27314 = torch.constant.int 1
    %29647 = torch.aten.add.Tensor %29646, %29634, %int1_27314 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27315 = torch.constant.int 1
    %29648 = torch.aten.add.Tensor %29647, %29637, %int1_27315 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27316 = torch.constant.int 1
    %29649 = torch.aten.add.Tensor %29648, %29640, %int1_27316 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27317 = torch.constant.int 1
    %29650 = torch.aten.add.Tensor %29649, %29643, %int1_27317 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29651 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27318 = arith.constant 1 : index
    %dim_27319 = tensor.dim %29651, %c1_27318 : tensor<4x?x4096xf16>
    %29652 = flow.tensor.transfer %29651 : tensor<4x?x4096xf16>{%dim_27319} to #hal.device.promise<@__device_3>
    %29653 = torch_c.from_builtin_tensor %29652 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29654 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27320 = arith.constant 1 : index
    %dim_27321 = tensor.dim %29654, %c1_27320 : tensor<4x?x4096xf16>
    %29655 = flow.tensor.transfer %29654 : tensor<4x?x4096xf16>{%dim_27321} to #hal.device.promise<@__device_3>
    %29656 = torch_c.from_builtin_tensor %29655 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29657 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27322 = arith.constant 1 : index
    %dim_27323 = tensor.dim %29657, %c1_27322 : tensor<4x?x4096xf16>
    %29658 = flow.tensor.transfer %29657 : tensor<4x?x4096xf16>{%dim_27323} to #hal.device.promise<@__device_3>
    %29659 = torch_c.from_builtin_tensor %29658 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29660 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27324 = arith.constant 1 : index
    %dim_27325 = tensor.dim %29660, %c1_27324 : tensor<4x?x4096xf16>
    %29661 = flow.tensor.transfer %29660 : tensor<4x?x4096xf16>{%dim_27325} to #hal.device.promise<@__device_3>
    %29662 = torch_c.from_builtin_tensor %29661 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29663 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27326 = arith.constant 1 : index
    %dim_27327 = tensor.dim %29663, %c1_27326 : tensor<4x?x4096xf16>
    %29664 = flow.tensor.transfer %29663 : tensor<4x?x4096xf16>{%dim_27327} to #hal.device.promise<@__device_3>
    %29665 = torch_c.from_builtin_tensor %29664 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29666 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27328 = arith.constant 1 : index
    %dim_27329 = tensor.dim %29666, %c1_27328 : tensor<4x?x4096xf16>
    %29667 = flow.tensor.transfer %29666 : tensor<4x?x4096xf16>{%dim_27329} to #hal.device.promise<@__device_3>
    %29668 = torch_c.from_builtin_tensor %29667 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29669 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27330 = arith.constant 1 : index
    %dim_27331 = tensor.dim %29669, %c1_27330 : tensor<4x?x4096xf16>
    %29670 = flow.tensor.transfer %29669 : tensor<4x?x4096xf16>{%dim_27331} to #hal.device.promise<@__device_3>
    %29671 = torch_c.from_builtin_tensor %29670 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27332 = torch.constant.int 1
    %29672 = torch.aten.add.Tensor %29653, %29656, %int1_27332 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27333 = torch.constant.int 1
    %29673 = torch.aten.add.Tensor %29672, %29659, %int1_27333 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27334 = torch.constant.int 1
    %29674 = torch.aten.add.Tensor %29673, %29542, %int1_27334 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27335 = torch.constant.int 1
    %29675 = torch.aten.add.Tensor %29674, %29662, %int1_27335 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27336 = torch.constant.int 1
    %29676 = torch.aten.add.Tensor %29675, %29665, %int1_27336 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27337 = torch.constant.int 1
    %29677 = torch.aten.add.Tensor %29676, %29668, %int1_27337 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27338 = torch.constant.int 1
    %29678 = torch.aten.add.Tensor %29677, %29671, %int1_27338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29679 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27339 = arith.constant 1 : index
    %dim_27340 = tensor.dim %29679, %c1_27339 : tensor<4x?x4096xf16>
    %29680 = flow.tensor.transfer %29679 : tensor<4x?x4096xf16>{%dim_27340} to #hal.device.promise<@__device_4>
    %29681 = torch_c.from_builtin_tensor %29680 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29682 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27341 = arith.constant 1 : index
    %dim_27342 = tensor.dim %29682, %c1_27341 : tensor<4x?x4096xf16>
    %29683 = flow.tensor.transfer %29682 : tensor<4x?x4096xf16>{%dim_27342} to #hal.device.promise<@__device_4>
    %29684 = torch_c.from_builtin_tensor %29683 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29685 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27343 = arith.constant 1 : index
    %dim_27344 = tensor.dim %29685, %c1_27343 : tensor<4x?x4096xf16>
    %29686 = flow.tensor.transfer %29685 : tensor<4x?x4096xf16>{%dim_27344} to #hal.device.promise<@__device_4>
    %29687 = torch_c.from_builtin_tensor %29686 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29688 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27345 = arith.constant 1 : index
    %dim_27346 = tensor.dim %29688, %c1_27345 : tensor<4x?x4096xf16>
    %29689 = flow.tensor.transfer %29688 : tensor<4x?x4096xf16>{%dim_27346} to #hal.device.promise<@__device_4>
    %29690 = torch_c.from_builtin_tensor %29689 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29691 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27347 = arith.constant 1 : index
    %dim_27348 = tensor.dim %29691, %c1_27347 : tensor<4x?x4096xf16>
    %29692 = flow.tensor.transfer %29691 : tensor<4x?x4096xf16>{%dim_27348} to #hal.device.promise<@__device_4>
    %29693 = torch_c.from_builtin_tensor %29692 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29694 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27349 = arith.constant 1 : index
    %dim_27350 = tensor.dim %29694, %c1_27349 : tensor<4x?x4096xf16>
    %29695 = flow.tensor.transfer %29694 : tensor<4x?x4096xf16>{%dim_27350} to #hal.device.promise<@__device_4>
    %29696 = torch_c.from_builtin_tensor %29695 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29697 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27351 = arith.constant 1 : index
    %dim_27352 = tensor.dim %29697, %c1_27351 : tensor<4x?x4096xf16>
    %29698 = flow.tensor.transfer %29697 : tensor<4x?x4096xf16>{%dim_27352} to #hal.device.promise<@__device_4>
    %29699 = torch_c.from_builtin_tensor %29698 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27353 = torch.constant.int 1
    %29700 = torch.aten.add.Tensor %29681, %29684, %int1_27353 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27354 = torch.constant.int 1
    %29701 = torch.aten.add.Tensor %29700, %29687, %int1_27354 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27355 = torch.constant.int 1
    %29702 = torch.aten.add.Tensor %29701, %29690, %int1_27355 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27356 = torch.constant.int 1
    %29703 = torch.aten.add.Tensor %29702, %29548, %int1_27356 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27357 = torch.constant.int 1
    %29704 = torch.aten.add.Tensor %29703, %29693, %int1_27357 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27358 = torch.constant.int 1
    %29705 = torch.aten.add.Tensor %29704, %29696, %int1_27358 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27359 = torch.constant.int 1
    %29706 = torch.aten.add.Tensor %29705, %29699, %int1_27359 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29707 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27360 = arith.constant 1 : index
    %dim_27361 = tensor.dim %29707, %c1_27360 : tensor<4x?x4096xf16>
    %29708 = flow.tensor.transfer %29707 : tensor<4x?x4096xf16>{%dim_27361} to #hal.device.promise<@__device_5>
    %29709 = torch_c.from_builtin_tensor %29708 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29710 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27362 = arith.constant 1 : index
    %dim_27363 = tensor.dim %29710, %c1_27362 : tensor<4x?x4096xf16>
    %29711 = flow.tensor.transfer %29710 : tensor<4x?x4096xf16>{%dim_27363} to #hal.device.promise<@__device_5>
    %29712 = torch_c.from_builtin_tensor %29711 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29713 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27364 = arith.constant 1 : index
    %dim_27365 = tensor.dim %29713, %c1_27364 : tensor<4x?x4096xf16>
    %29714 = flow.tensor.transfer %29713 : tensor<4x?x4096xf16>{%dim_27365} to #hal.device.promise<@__device_5>
    %29715 = torch_c.from_builtin_tensor %29714 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29716 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27366 = arith.constant 1 : index
    %dim_27367 = tensor.dim %29716, %c1_27366 : tensor<4x?x4096xf16>
    %29717 = flow.tensor.transfer %29716 : tensor<4x?x4096xf16>{%dim_27367} to #hal.device.promise<@__device_5>
    %29718 = torch_c.from_builtin_tensor %29717 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29719 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27368 = arith.constant 1 : index
    %dim_27369 = tensor.dim %29719, %c1_27368 : tensor<4x?x4096xf16>
    %29720 = flow.tensor.transfer %29719 : tensor<4x?x4096xf16>{%dim_27369} to #hal.device.promise<@__device_5>
    %29721 = torch_c.from_builtin_tensor %29720 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29722 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27370 = arith.constant 1 : index
    %dim_27371 = tensor.dim %29722, %c1_27370 : tensor<4x?x4096xf16>
    %29723 = flow.tensor.transfer %29722 : tensor<4x?x4096xf16>{%dim_27371} to #hal.device.promise<@__device_5>
    %29724 = torch_c.from_builtin_tensor %29723 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29725 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27372 = arith.constant 1 : index
    %dim_27373 = tensor.dim %29725, %c1_27372 : tensor<4x?x4096xf16>
    %29726 = flow.tensor.transfer %29725 : tensor<4x?x4096xf16>{%dim_27373} to #hal.device.promise<@__device_5>
    %29727 = torch_c.from_builtin_tensor %29726 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27374 = torch.constant.int 1
    %29728 = torch.aten.add.Tensor %29709, %29712, %int1_27374 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27375 = torch.constant.int 1
    %29729 = torch.aten.add.Tensor %29728, %29715, %int1_27375 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27376 = torch.constant.int 1
    %29730 = torch.aten.add.Tensor %29729, %29718, %int1_27376 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27377 = torch.constant.int 1
    %29731 = torch.aten.add.Tensor %29730, %29721, %int1_27377 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27378 = torch.constant.int 1
    %29732 = torch.aten.add.Tensor %29731, %29554, %int1_27378 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27379 = torch.constant.int 1
    %29733 = torch.aten.add.Tensor %29732, %29724, %int1_27379 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27380 = torch.constant.int 1
    %29734 = torch.aten.add.Tensor %29733, %29727, %int1_27380 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29735 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27381 = arith.constant 1 : index
    %dim_27382 = tensor.dim %29735, %c1_27381 : tensor<4x?x4096xf16>
    %29736 = flow.tensor.transfer %29735 : tensor<4x?x4096xf16>{%dim_27382} to #hal.device.promise<@__device_6>
    %29737 = torch_c.from_builtin_tensor %29736 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29738 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27383 = arith.constant 1 : index
    %dim_27384 = tensor.dim %29738, %c1_27383 : tensor<4x?x4096xf16>
    %29739 = flow.tensor.transfer %29738 : tensor<4x?x4096xf16>{%dim_27384} to #hal.device.promise<@__device_6>
    %29740 = torch_c.from_builtin_tensor %29739 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29741 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27385 = arith.constant 1 : index
    %dim_27386 = tensor.dim %29741, %c1_27385 : tensor<4x?x4096xf16>
    %29742 = flow.tensor.transfer %29741 : tensor<4x?x4096xf16>{%dim_27386} to #hal.device.promise<@__device_6>
    %29743 = torch_c.from_builtin_tensor %29742 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29744 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27387 = arith.constant 1 : index
    %dim_27388 = tensor.dim %29744, %c1_27387 : tensor<4x?x4096xf16>
    %29745 = flow.tensor.transfer %29744 : tensor<4x?x4096xf16>{%dim_27388} to #hal.device.promise<@__device_6>
    %29746 = torch_c.from_builtin_tensor %29745 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29747 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27389 = arith.constant 1 : index
    %dim_27390 = tensor.dim %29747, %c1_27389 : tensor<4x?x4096xf16>
    %29748 = flow.tensor.transfer %29747 : tensor<4x?x4096xf16>{%dim_27390} to #hal.device.promise<@__device_6>
    %29749 = torch_c.from_builtin_tensor %29748 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29750 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27391 = arith.constant 1 : index
    %dim_27392 = tensor.dim %29750, %c1_27391 : tensor<4x?x4096xf16>
    %29751 = flow.tensor.transfer %29750 : tensor<4x?x4096xf16>{%dim_27392} to #hal.device.promise<@__device_6>
    %29752 = torch_c.from_builtin_tensor %29751 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29753 = torch_c.to_builtin_tensor %29566 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27393 = arith.constant 1 : index
    %dim_27394 = tensor.dim %29753, %c1_27393 : tensor<4x?x4096xf16>
    %29754 = flow.tensor.transfer %29753 : tensor<4x?x4096xf16>{%dim_27394} to #hal.device.promise<@__device_6>
    %29755 = torch_c.from_builtin_tensor %29754 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27395 = torch.constant.int 1
    %29756 = torch.aten.add.Tensor %29737, %29740, %int1_27395 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27396 = torch.constant.int 1
    %29757 = torch.aten.add.Tensor %29756, %29743, %int1_27396 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27397 = torch.constant.int 1
    %29758 = torch.aten.add.Tensor %29757, %29746, %int1_27397 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27398 = torch.constant.int 1
    %29759 = torch.aten.add.Tensor %29758, %29749, %int1_27398 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27399 = torch.constant.int 1
    %29760 = torch.aten.add.Tensor %29759, %29752, %int1_27399 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27400 = torch.constant.int 1
    %29761 = torch.aten.add.Tensor %29760, %29560, %int1_27400 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27401 = torch.constant.int 1
    %29762 = torch.aten.add.Tensor %29761, %29755, %int1_27401 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29763 = torch_c.to_builtin_tensor %29524 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27402 = arith.constant 1 : index
    %dim_27403 = tensor.dim %29763, %c1_27402 : tensor<4x?x4096xf16>
    %29764 = flow.tensor.transfer %29763 : tensor<4x?x4096xf16>{%dim_27403} to #hal.device.promise<@__device_7>
    %29765 = torch_c.from_builtin_tensor %29764 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29766 = torch_c.to_builtin_tensor %29530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27404 = arith.constant 1 : index
    %dim_27405 = tensor.dim %29766, %c1_27404 : tensor<4x?x4096xf16>
    %29767 = flow.tensor.transfer %29766 : tensor<4x?x4096xf16>{%dim_27405} to #hal.device.promise<@__device_7>
    %29768 = torch_c.from_builtin_tensor %29767 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29769 = torch_c.to_builtin_tensor %29536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27406 = arith.constant 1 : index
    %dim_27407 = tensor.dim %29769, %c1_27406 : tensor<4x?x4096xf16>
    %29770 = flow.tensor.transfer %29769 : tensor<4x?x4096xf16>{%dim_27407} to #hal.device.promise<@__device_7>
    %29771 = torch_c.from_builtin_tensor %29770 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29772 = torch_c.to_builtin_tensor %29542 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27408 = arith.constant 1 : index
    %dim_27409 = tensor.dim %29772, %c1_27408 : tensor<4x?x4096xf16>
    %29773 = flow.tensor.transfer %29772 : tensor<4x?x4096xf16>{%dim_27409} to #hal.device.promise<@__device_7>
    %29774 = torch_c.from_builtin_tensor %29773 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29775 = torch_c.to_builtin_tensor %29548 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27410 = arith.constant 1 : index
    %dim_27411 = tensor.dim %29775, %c1_27410 : tensor<4x?x4096xf16>
    %29776 = flow.tensor.transfer %29775 : tensor<4x?x4096xf16>{%dim_27411} to #hal.device.promise<@__device_7>
    %29777 = torch_c.from_builtin_tensor %29776 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29778 = torch_c.to_builtin_tensor %29554 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27412 = arith.constant 1 : index
    %dim_27413 = tensor.dim %29778, %c1_27412 : tensor<4x?x4096xf16>
    %29779 = flow.tensor.transfer %29778 : tensor<4x?x4096xf16>{%dim_27413} to #hal.device.promise<@__device_7>
    %29780 = torch_c.from_builtin_tensor %29779 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %29781 = torch_c.to_builtin_tensor %29560 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27414 = arith.constant 1 : index
    %dim_27415 = tensor.dim %29781, %c1_27414 : tensor<4x?x4096xf16>
    %29782 = flow.tensor.transfer %29781 : tensor<4x?x4096xf16>{%dim_27415} to #hal.device.promise<@__device_7>
    %29783 = torch_c.from_builtin_tensor %29782 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27416 = torch.constant.int 1
    %29784 = torch.aten.add.Tensor %29765, %29768, %int1_27416 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27417 = torch.constant.int 1
    %29785 = torch.aten.add.Tensor %29784, %29771, %int1_27417 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27418 = torch.constant.int 1
    %29786 = torch.aten.add.Tensor %29785, %29774, %int1_27418 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27419 = torch.constant.int 1
    %29787 = torch.aten.add.Tensor %29786, %29777, %int1_27419 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27420 = torch.constant.int 1
    %29788 = torch.aten.add.Tensor %29787, %29780, %int1_27420 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27421 = torch.constant.int 1
    %29789 = torch.aten.add.Tensor %29788, %29783, %int1_27421 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27422 = torch.constant.int 1
    %29790 = torch.aten.add.Tensor %29789, %29566, %int1_27422 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27423 = torch.constant.int 1
    %29791 = torch.aten.add.Tensor %28450, %29594, %int1_27423 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27424 = torch.constant.int 1
    %29792 = torch.aten.add.Tensor %28451, %29622, %int1_27424 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27425 = torch.constant.int 1
    %29793 = torch.aten.add.Tensor %28452, %29650, %int1_27425 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27426 = torch.constant.int 1
    %29794 = torch.aten.add.Tensor %28453, %29678, %int1_27426 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27427 = torch.constant.int 1
    %29795 = torch.aten.add.Tensor %28454, %29706, %int1_27427 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27428 = torch.constant.int 1
    %29796 = torch.aten.add.Tensor %28455, %29734, %int1_27428 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27429 = torch.constant.int 1
    %29797 = torch.aten.add.Tensor %28456, %29762, %int1_27429 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27430 = torch.constant.int 1
    %29798 = torch.aten.add.Tensor %28457, %29790, %int1_27430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_27431 = torch.constant.int 6
    %29799 = torch.prims.convert_element_type %29791, %int6_27431 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27432 = torch.constant.int 6
    %29800 = torch.prims.convert_element_type %29792, %int6_27432 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27433 = torch.constant.int 6
    %29801 = torch.prims.convert_element_type %29793, %int6_27433 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27434 = torch.constant.int 6
    %29802 = torch.prims.convert_element_type %29794, %int6_27434 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27435 = torch.constant.int 6
    %29803 = torch.prims.convert_element_type %29795, %int6_27435 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27436 = torch.constant.int 6
    %29804 = torch.prims.convert_element_type %29796, %int6_27436 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27437 = torch.constant.int 6
    %29805 = torch.prims.convert_element_type %29797, %int6_27437 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27438 = torch.constant.int 6
    %29806 = torch.prims.convert_element_type %29798, %int6_27438 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27439 = torch.constant.int 2
    %29807 = torch.aten.pow.Tensor_Scalar %29799, %int2_27439 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27440 = torch.constant.int 2
    %29808 = torch.aten.pow.Tensor_Scalar %29800, %int2_27440 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27441 = torch.constant.int 2
    %29809 = torch.aten.pow.Tensor_Scalar %29801, %int2_27441 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27442 = torch.constant.int 2
    %29810 = torch.aten.pow.Tensor_Scalar %29802, %int2_27442 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27443 = torch.constant.int 2
    %29811 = torch.aten.pow.Tensor_Scalar %29803, %int2_27443 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27444 = torch.constant.int 2
    %29812 = torch.aten.pow.Tensor_Scalar %29804, %int2_27444 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27445 = torch.constant.int 2
    %29813 = torch.aten.pow.Tensor_Scalar %29805, %int2_27445 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27446 = torch.constant.int 2
    %29814 = torch.aten.pow.Tensor_Scalar %29806, %int2_27446 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_27447 = torch.constant.int -1
    %29815 = torch.prim.ListConstruct %int-1_27447 : (!torch.int) -> !torch.list<int>
    %true_27448 = torch.constant.bool true
    %none_27449 = torch.constant.none
    %29816 = torch.aten.mean.dim %29807, %29815, %true_27448, %none_27449 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27450 = torch.constant.int -1
    %29817 = torch.prim.ListConstruct %int-1_27450 : (!torch.int) -> !torch.list<int>
    %true_27451 = torch.constant.bool true
    %none_27452 = torch.constant.none
    %29818 = torch.aten.mean.dim %29808, %29817, %true_27451, %none_27452 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27453 = torch.constant.int -1
    %29819 = torch.prim.ListConstruct %int-1_27453 : (!torch.int) -> !torch.list<int>
    %true_27454 = torch.constant.bool true
    %none_27455 = torch.constant.none
    %29820 = torch.aten.mean.dim %29809, %29819, %true_27454, %none_27455 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27456 = torch.constant.int -1
    %29821 = torch.prim.ListConstruct %int-1_27456 : (!torch.int) -> !torch.list<int>
    %true_27457 = torch.constant.bool true
    %none_27458 = torch.constant.none
    %29822 = torch.aten.mean.dim %29810, %29821, %true_27457, %none_27458 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27459 = torch.constant.int -1
    %29823 = torch.prim.ListConstruct %int-1_27459 : (!torch.int) -> !torch.list<int>
    %true_27460 = torch.constant.bool true
    %none_27461 = torch.constant.none
    %29824 = torch.aten.mean.dim %29811, %29823, %true_27460, %none_27461 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27462 = torch.constant.int -1
    %29825 = torch.prim.ListConstruct %int-1_27462 : (!torch.int) -> !torch.list<int>
    %true_27463 = torch.constant.bool true
    %none_27464 = torch.constant.none
    %29826 = torch.aten.mean.dim %29812, %29825, %true_27463, %none_27464 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27465 = torch.constant.int -1
    %29827 = torch.prim.ListConstruct %int-1_27465 : (!torch.int) -> !torch.list<int>
    %true_27466 = torch.constant.bool true
    %none_27467 = torch.constant.none
    %29828 = torch.aten.mean.dim %29813, %29827, %true_27466, %none_27467 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27468 = torch.constant.int -1
    %29829 = torch.prim.ListConstruct %int-1_27468 : (!torch.int) -> !torch.list<int>
    %true_27469 = torch.constant.bool true
    %none_27470 = torch.constant.none
    %29830 = torch.aten.mean.dim %29814, %29829, %true_27469, %none_27470 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27471 = torch.constant.float 9.9999997473787516E-6
    %int1_27472 = torch.constant.int 1
    %29831 = torch.aten.add.Scalar %29816, %float9.999990e-06_27471, %int1_27472 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27473 = torch.constant.float 9.9999997473787516E-6
    %int1_27474 = torch.constant.int 1
    %29832 = torch.aten.add.Scalar %29818, %float9.999990e-06_27473, %int1_27474 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27475 = torch.constant.float 9.9999997473787516E-6
    %int1_27476 = torch.constant.int 1
    %29833 = torch.aten.add.Scalar %29820, %float9.999990e-06_27475, %int1_27476 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27477 = torch.constant.float 9.9999997473787516E-6
    %int1_27478 = torch.constant.int 1
    %29834 = torch.aten.add.Scalar %29822, %float9.999990e-06_27477, %int1_27478 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27479 = torch.constant.float 9.9999997473787516E-6
    %int1_27480 = torch.constant.int 1
    %29835 = torch.aten.add.Scalar %29824, %float9.999990e-06_27479, %int1_27480 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27481 = torch.constant.float 9.9999997473787516E-6
    %int1_27482 = torch.constant.int 1
    %29836 = torch.aten.add.Scalar %29826, %float9.999990e-06_27481, %int1_27482 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27483 = torch.constant.float 9.9999997473787516E-6
    %int1_27484 = torch.constant.int 1
    %29837 = torch.aten.add.Scalar %29828, %float9.999990e-06_27483, %int1_27484 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27485 = torch.constant.float 9.9999997473787516E-6
    %int1_27486 = torch.constant.int 1
    %29838 = torch.aten.add.Scalar %29830, %float9.999990e-06_27485, %int1_27486 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29839 = torch.aten.rsqrt %29831 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29840 = torch.aten.rsqrt %29832 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29841 = torch.aten.rsqrt %29833 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29842 = torch.aten.rsqrt %29834 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29843 = torch.aten.rsqrt %29835 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29844 = torch.aten.rsqrt %29836 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29845 = torch.aten.rsqrt %29837 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29846 = torch.aten.rsqrt %29838 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %29846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %29847 = torch.aten.mul.Tensor %29799, %29839 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29848 = torch.aten.mul.Tensor %29800, %29840 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29849 = torch.aten.mul.Tensor %29801, %29841 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29850 = torch.aten.mul.Tensor %29802, %29842 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29851 = torch.aten.mul.Tensor %29803, %29843 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29852 = torch.aten.mul.Tensor %29804, %29844 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29853 = torch.aten.mul.Tensor %29805, %29845 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29854 = torch.aten.mul.Tensor %29806, %29846 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29855 = torch.aten.mul.Tensor %1056, %29847 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29856 = torch.aten.mul.Tensor %1057, %29848 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29857 = torch.aten.mul.Tensor %1058, %29849 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29858 = torch.aten.mul.Tensor %1059, %29850 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29859 = torch.aten.mul.Tensor %1060, %29851 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29860 = torch.aten.mul.Tensor %1061, %29852 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29861 = torch.aten.mul.Tensor %1062, %29853 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %29862 = torch.aten.mul.Tensor %1063, %29854 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %29862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_27487 = torch.constant.int 5
    %29863 = torch.prims.convert_element_type %29855, %int5_27487 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27488 = torch.constant.int 5
    %29864 = torch.prims.convert_element_type %29856, %int5_27488 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27489 = torch.constant.int 5
    %29865 = torch.prims.convert_element_type %29857, %int5_27489 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27490 = torch.constant.int 5
    %29866 = torch.prims.convert_element_type %29858, %int5_27490 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27491 = torch.constant.int 5
    %29867 = torch.prims.convert_element_type %29859, %int5_27491 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27492 = torch.constant.int 5
    %29868 = torch.prims.convert_element_type %29860, %int5_27492 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27493 = torch.constant.int 5
    %29869 = torch.prims.convert_element_type %29861, %int5_27493 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27494 = torch.constant.int 5
    %29870 = torch.prims.convert_element_type %29862, %int5_27494 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %29870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27495 = torch.constant.int 1
    %int0_27496 = torch.constant.int 0
    %29871 = torch.prim.ListConstruct %int1_27495, %int0_27496 : (!torch.int, !torch.int) -> !torch.list<int>
    %29872 = torch.aten.permute %1064, %29871 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27497 = torch.constant.int 1
    %int0_27498 = torch.constant.int 0
    %29873 = torch.prim.ListConstruct %int1_27497, %int0_27498 : (!torch.int, !torch.int) -> !torch.list<int>
    %29874 = torch.aten.permute %1065, %29873 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27499 = torch.constant.int 1
    %int0_27500 = torch.constant.int 0
    %29875 = torch.prim.ListConstruct %int1_27499, %int0_27500 : (!torch.int, !torch.int) -> !torch.list<int>
    %29876 = torch.aten.permute %1066, %29875 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27501 = torch.constant.int 1
    %int0_27502 = torch.constant.int 0
    %29877 = torch.prim.ListConstruct %int1_27501, %int0_27502 : (!torch.int, !torch.int) -> !torch.list<int>
    %29878 = torch.aten.permute %1067, %29877 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27503 = torch.constant.int 1
    %int0_27504 = torch.constant.int 0
    %29879 = torch.prim.ListConstruct %int1_27503, %int0_27504 : (!torch.int, !torch.int) -> !torch.list<int>
    %29880 = torch.aten.permute %1068, %29879 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27505 = torch.constant.int 1
    %int0_27506 = torch.constant.int 0
    %29881 = torch.prim.ListConstruct %int1_27505, %int0_27506 : (!torch.int, !torch.int) -> !torch.list<int>
    %29882 = torch.aten.permute %1069, %29881 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27507 = torch.constant.int 1
    %int0_27508 = torch.constant.int 0
    %29883 = torch.prim.ListConstruct %int1_27507, %int0_27508 : (!torch.int, !torch.int) -> !torch.list<int>
    %29884 = torch.aten.permute %1070, %29883 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27509 = torch.constant.int 1
    %int0_27510 = torch.constant.int 0
    %29885 = torch.prim.ListConstruct %int1_27509, %int0_27510 : (!torch.int, !torch.int) -> !torch.list<int>
    %29886 = torch.aten.permute %1071, %29885 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_27511 = torch.constant.int 4
    %29887 = torch.aten.mul.int %int4_27511, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27512 = torch.constant.int 4096
    %29888 = torch.prim.ListConstruct %29887, %int4096_27512 : (!torch.int, !torch.int) -> !torch.list<int>
    %29889 = torch.aten.view %29863, %29888 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29889, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29890 = torch.aten.mm %29889, %29872 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29890, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27513 = torch.constant.int 4
    %int1792_27514 = torch.constant.int 1792
    %29891 = torch.prim.ListConstruct %int4_27513, %2482, %int1792_27514 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29892 = torch.aten.view %29890, %29891 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27515 = torch.constant.int 4
    %29893 = torch.aten.mul.int %int4_27515, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27516 = torch.constant.int 4096
    %29894 = torch.prim.ListConstruct %29893, %int4096_27516 : (!torch.int, !torch.int) -> !torch.list<int>
    %29895 = torch.aten.view %29864, %29894 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29895, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29896 = torch.aten.mm %29895, %29874 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29896, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27517 = torch.constant.int 4
    %int1792_27518 = torch.constant.int 1792
    %29897 = torch.prim.ListConstruct %int4_27517, %2482, %int1792_27518 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29898 = torch.aten.view %29896, %29897 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27519 = torch.constant.int 4
    %29899 = torch.aten.mul.int %int4_27519, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27520 = torch.constant.int 4096
    %29900 = torch.prim.ListConstruct %29899, %int4096_27520 : (!torch.int, !torch.int) -> !torch.list<int>
    %29901 = torch.aten.view %29865, %29900 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29901, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29902 = torch.aten.mm %29901, %29876 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29902, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27521 = torch.constant.int 4
    %int1792_27522 = torch.constant.int 1792
    %29903 = torch.prim.ListConstruct %int4_27521, %2482, %int1792_27522 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29904 = torch.aten.view %29902, %29903 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27523 = torch.constant.int 4
    %29905 = torch.aten.mul.int %int4_27523, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27524 = torch.constant.int 4096
    %29906 = torch.prim.ListConstruct %29905, %int4096_27524 : (!torch.int, !torch.int) -> !torch.list<int>
    %29907 = torch.aten.view %29866, %29906 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29907, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29908 = torch.aten.mm %29907, %29878 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29908, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27525 = torch.constant.int 4
    %int1792_27526 = torch.constant.int 1792
    %29909 = torch.prim.ListConstruct %int4_27525, %2482, %int1792_27526 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29910 = torch.aten.view %29908, %29909 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27527 = torch.constant.int 4
    %29911 = torch.aten.mul.int %int4_27527, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27528 = torch.constant.int 4096
    %29912 = torch.prim.ListConstruct %29911, %int4096_27528 : (!torch.int, !torch.int) -> !torch.list<int>
    %29913 = torch.aten.view %29867, %29912 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29913, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29914 = torch.aten.mm %29913, %29880 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29914, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27529 = torch.constant.int 4
    %int1792_27530 = torch.constant.int 1792
    %29915 = torch.prim.ListConstruct %int4_27529, %2482, %int1792_27530 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29916 = torch.aten.view %29914, %29915 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27531 = torch.constant.int 4
    %29917 = torch.aten.mul.int %int4_27531, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27532 = torch.constant.int 4096
    %29918 = torch.prim.ListConstruct %29917, %int4096_27532 : (!torch.int, !torch.int) -> !torch.list<int>
    %29919 = torch.aten.view %29868, %29918 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29919, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29920 = torch.aten.mm %29919, %29882 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29920, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27533 = torch.constant.int 4
    %int1792_27534 = torch.constant.int 1792
    %29921 = torch.prim.ListConstruct %int4_27533, %2482, %int1792_27534 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29922 = torch.aten.view %29920, %29921 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27535 = torch.constant.int 4
    %29923 = torch.aten.mul.int %int4_27535, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27536 = torch.constant.int 4096
    %29924 = torch.prim.ListConstruct %29923, %int4096_27536 : (!torch.int, !torch.int) -> !torch.list<int>
    %29925 = torch.aten.view %29869, %29924 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29925, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29926 = torch.aten.mm %29925, %29884 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29926, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27537 = torch.constant.int 4
    %int1792_27538 = torch.constant.int 1792
    %29927 = torch.prim.ListConstruct %int4_27537, %2482, %int1792_27538 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29928 = torch.aten.view %29926, %29927 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27539 = torch.constant.int 4
    %29929 = torch.aten.mul.int %int4_27539, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27540 = torch.constant.int 4096
    %29930 = torch.prim.ListConstruct %29929, %int4096_27540 : (!torch.int, !torch.int) -> !torch.list<int>
    %29931 = torch.aten.view %29870, %29930 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29931, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29932 = torch.aten.mm %29931, %29886 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29932, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27541 = torch.constant.int 4
    %int1792_27542 = torch.constant.int 1792
    %29933 = torch.prim.ListConstruct %int4_27541, %2482, %int1792_27542 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29934 = torch.aten.view %29932, %29933 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29935 = torch.aten.silu %29892 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29936 = torch.aten.silu %29898 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29937 = torch.aten.silu %29904 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29938 = torch.aten.silu %29910 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29939 = torch.aten.silu %29916 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29940 = torch.aten.silu %29922 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29941 = torch.aten.silu %29928 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %29942 = torch.aten.silu %29934 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_27543 = torch.constant.int 1
    %int0_27544 = torch.constant.int 0
    %29943 = torch.prim.ListConstruct %int1_27543, %int0_27544 : (!torch.int, !torch.int) -> !torch.list<int>
    %29944 = torch.aten.permute %1072, %29943 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27545 = torch.constant.int 1
    %int0_27546 = torch.constant.int 0
    %29945 = torch.prim.ListConstruct %int1_27545, %int0_27546 : (!torch.int, !torch.int) -> !torch.list<int>
    %29946 = torch.aten.permute %1073, %29945 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27547 = torch.constant.int 1
    %int0_27548 = torch.constant.int 0
    %29947 = torch.prim.ListConstruct %int1_27547, %int0_27548 : (!torch.int, !torch.int) -> !torch.list<int>
    %29948 = torch.aten.permute %1074, %29947 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27549 = torch.constant.int 1
    %int0_27550 = torch.constant.int 0
    %29949 = torch.prim.ListConstruct %int1_27549, %int0_27550 : (!torch.int, !torch.int) -> !torch.list<int>
    %29950 = torch.aten.permute %1075, %29949 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27551 = torch.constant.int 1
    %int0_27552 = torch.constant.int 0
    %29951 = torch.prim.ListConstruct %int1_27551, %int0_27552 : (!torch.int, !torch.int) -> !torch.list<int>
    %29952 = torch.aten.permute %1076, %29951 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27553 = torch.constant.int 1
    %int0_27554 = torch.constant.int 0
    %29953 = torch.prim.ListConstruct %int1_27553, %int0_27554 : (!torch.int, !torch.int) -> !torch.list<int>
    %29954 = torch.aten.permute %1077, %29953 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27555 = torch.constant.int 1
    %int0_27556 = torch.constant.int 0
    %29955 = torch.prim.ListConstruct %int1_27555, %int0_27556 : (!torch.int, !torch.int) -> !torch.list<int>
    %29956 = torch.aten.permute %1078, %29955 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_27557 = torch.constant.int 1
    %int0_27558 = torch.constant.int 0
    %29957 = torch.prim.ListConstruct %int1_27557, %int0_27558 : (!torch.int, !torch.int) -> !torch.list<int>
    %29958 = torch.aten.permute %1079, %29957 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_27559 = torch.constant.int 4
    %29959 = torch.aten.mul.int %int4_27559, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27560 = torch.constant.int 4096
    %29960 = torch.prim.ListConstruct %29959, %int4096_27560 : (!torch.int, !torch.int) -> !torch.list<int>
    %29961 = torch.aten.view %29863, %29960 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29961, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29962 = torch.aten.mm %29961, %29944 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29962, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27561 = torch.constant.int 4
    %int1792_27562 = torch.constant.int 1792
    %29963 = torch.prim.ListConstruct %int4_27561, %2482, %int1792_27562 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29964 = torch.aten.view %29962, %29963 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27563 = torch.constant.int 4
    %29965 = torch.aten.mul.int %int4_27563, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27564 = torch.constant.int 4096
    %29966 = torch.prim.ListConstruct %29965, %int4096_27564 : (!torch.int, !torch.int) -> !torch.list<int>
    %29967 = torch.aten.view %29864, %29966 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29967, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29968 = torch.aten.mm %29967, %29946 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29968, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27565 = torch.constant.int 4
    %int1792_27566 = torch.constant.int 1792
    %29969 = torch.prim.ListConstruct %int4_27565, %2482, %int1792_27566 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29970 = torch.aten.view %29968, %29969 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27567 = torch.constant.int 4
    %29971 = torch.aten.mul.int %int4_27567, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27568 = torch.constant.int 4096
    %29972 = torch.prim.ListConstruct %29971, %int4096_27568 : (!torch.int, !torch.int) -> !torch.list<int>
    %29973 = torch.aten.view %29865, %29972 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29973, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29974 = torch.aten.mm %29973, %29948 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29974, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27569 = torch.constant.int 4
    %int1792_27570 = torch.constant.int 1792
    %29975 = torch.prim.ListConstruct %int4_27569, %2482, %int1792_27570 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29976 = torch.aten.view %29974, %29975 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27571 = torch.constant.int 4
    %29977 = torch.aten.mul.int %int4_27571, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27572 = torch.constant.int 4096
    %29978 = torch.prim.ListConstruct %29977, %int4096_27572 : (!torch.int, !torch.int) -> !torch.list<int>
    %29979 = torch.aten.view %29866, %29978 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29979, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29980 = torch.aten.mm %29979, %29950 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29980, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27573 = torch.constant.int 4
    %int1792_27574 = torch.constant.int 1792
    %29981 = torch.prim.ListConstruct %int4_27573, %2482, %int1792_27574 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29982 = torch.aten.view %29980, %29981 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27575 = torch.constant.int 4
    %29983 = torch.aten.mul.int %int4_27575, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27576 = torch.constant.int 4096
    %29984 = torch.prim.ListConstruct %29983, %int4096_27576 : (!torch.int, !torch.int) -> !torch.list<int>
    %29985 = torch.aten.view %29867, %29984 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29985, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29986 = torch.aten.mm %29985, %29952 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29986, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27577 = torch.constant.int 4
    %int1792_27578 = torch.constant.int 1792
    %29987 = torch.prim.ListConstruct %int4_27577, %2482, %int1792_27578 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29988 = torch.aten.view %29986, %29987 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27579 = torch.constant.int 4
    %29989 = torch.aten.mul.int %int4_27579, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27580 = torch.constant.int 4096
    %29990 = torch.prim.ListConstruct %29989, %int4096_27580 : (!torch.int, !torch.int) -> !torch.list<int>
    %29991 = torch.aten.view %29868, %29990 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29991, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29992 = torch.aten.mm %29991, %29954 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29992, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27581 = torch.constant.int 4
    %int1792_27582 = torch.constant.int 1792
    %29993 = torch.prim.ListConstruct %int4_27581, %2482, %int1792_27582 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %29994 = torch.aten.view %29992, %29993 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %29994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27583 = torch.constant.int 4
    %29995 = torch.aten.mul.int %int4_27583, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27584 = torch.constant.int 4096
    %29996 = torch.prim.ListConstruct %29995, %int4096_27584 : (!torch.int, !torch.int) -> !torch.list<int>
    %29997 = torch.aten.view %29869, %29996 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %29997, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %29998 = torch.aten.mm %29997, %29956 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %29998, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27585 = torch.constant.int 4
    %int1792_27586 = torch.constant.int 1792
    %29999 = torch.prim.ListConstruct %int4_27585, %2482, %int1792_27586 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30000 = torch.aten.view %29998, %29999 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_27587 = torch.constant.int 4
    %30001 = torch.aten.mul.int %int4_27587, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27588 = torch.constant.int 4096
    %30002 = torch.prim.ListConstruct %30001, %int4096_27588 : (!torch.int, !torch.int) -> !torch.list<int>
    %30003 = torch.aten.view %29870, %30002 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30003, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30004 = torch.aten.mm %30003, %29958 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30004, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_27589 = torch.constant.int 4
    %int1792_27590 = torch.constant.int 1792
    %30005 = torch.prim.ListConstruct %int4_27589, %2482, %int1792_27590 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30006 = torch.aten.view %30004, %30005 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30007 = torch.aten.mul.Tensor %29935, %29964 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30008 = torch.aten.mul.Tensor %29936, %29970 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30009 = torch.aten.mul.Tensor %29937, %29976 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30010 = torch.aten.mul.Tensor %29938, %29982 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30011 = torch.aten.mul.Tensor %29939, %29988 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30012 = torch.aten.mul.Tensor %29940, %29994 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30013 = torch.aten.mul.Tensor %29941, %30000 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %30014 = torch.aten.mul.Tensor %29942, %30006 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %30014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_27591 = torch.constant.int 1
    %int0_27592 = torch.constant.int 0
    %30015 = torch.prim.ListConstruct %int1_27591, %int0_27592 : (!torch.int, !torch.int) -> !torch.list<int>
    %30016 = torch.aten.permute %1080, %30015 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27593 = torch.constant.int 1
    %int0_27594 = torch.constant.int 0
    %30017 = torch.prim.ListConstruct %int1_27593, %int0_27594 : (!torch.int, !torch.int) -> !torch.list<int>
    %30018 = torch.aten.permute %1081, %30017 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27595 = torch.constant.int 1
    %int0_27596 = torch.constant.int 0
    %30019 = torch.prim.ListConstruct %int1_27595, %int0_27596 : (!torch.int, !torch.int) -> !torch.list<int>
    %30020 = torch.aten.permute %1082, %30019 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27597 = torch.constant.int 1
    %int0_27598 = torch.constant.int 0
    %30021 = torch.prim.ListConstruct %int1_27597, %int0_27598 : (!torch.int, !torch.int) -> !torch.list<int>
    %30022 = torch.aten.permute %1083, %30021 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27599 = torch.constant.int 1
    %int0_27600 = torch.constant.int 0
    %30023 = torch.prim.ListConstruct %int1_27599, %int0_27600 : (!torch.int, !torch.int) -> !torch.list<int>
    %30024 = torch.aten.permute %1084, %30023 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27601 = torch.constant.int 1
    %int0_27602 = torch.constant.int 0
    %30025 = torch.prim.ListConstruct %int1_27601, %int0_27602 : (!torch.int, !torch.int) -> !torch.list<int>
    %30026 = torch.aten.permute %1085, %30025 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27603 = torch.constant.int 1
    %int0_27604 = torch.constant.int 0
    %30027 = torch.prim.ListConstruct %int1_27603, %int0_27604 : (!torch.int, !torch.int) -> !torch.list<int>
    %30028 = torch.aten.permute %1086, %30027 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27605 = torch.constant.int 1
    %int0_27606 = torch.constant.int 0
    %30029 = torch.prim.ListConstruct %int1_27605, %int0_27606 : (!torch.int, !torch.int) -> !torch.list<int>
    %30030 = torch.aten.permute %1087, %30029 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_27607 = torch.constant.int 1
    %30031 = torch.aten.size.int %29892, %int1_27607 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27608 = torch.constant.int 4
    %30032 = torch.aten.mul.int %int4_27608, %30031 : !torch.int, !torch.int -> !torch.int
    %int1792_27609 = torch.constant.int 1792
    %30033 = torch.prim.ListConstruct %30032, %int1792_27609 : (!torch.int, !torch.int) -> !torch.list<int>
    %30034 = torch.aten.view %30007, %30033 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30034, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30035 = torch.aten.mm %30034, %30016 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30035, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27610 = torch.constant.int 4
    %int4096_27611 = torch.constant.int 4096
    %30036 = torch.prim.ListConstruct %int4_27610, %30031, %int4096_27611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30037 = torch.aten.view %30035, %30036 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27612 = torch.constant.int 1
    %30038 = torch.aten.size.int %29898, %int1_27612 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27613 = torch.constant.int 4
    %30039 = torch.aten.mul.int %int4_27613, %30038 : !torch.int, !torch.int -> !torch.int
    %int1792_27614 = torch.constant.int 1792
    %30040 = torch.prim.ListConstruct %30039, %int1792_27614 : (!torch.int, !torch.int) -> !torch.list<int>
    %30041 = torch.aten.view %30008, %30040 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30041, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30042 = torch.aten.mm %30041, %30018 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30042, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27615 = torch.constant.int 4
    %int4096_27616 = torch.constant.int 4096
    %30043 = torch.prim.ListConstruct %int4_27615, %30038, %int4096_27616 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30044 = torch.aten.view %30042, %30043 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27617 = torch.constant.int 1
    %30045 = torch.aten.size.int %29904, %int1_27617 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27618 = torch.constant.int 4
    %30046 = torch.aten.mul.int %int4_27618, %30045 : !torch.int, !torch.int -> !torch.int
    %int1792_27619 = torch.constant.int 1792
    %30047 = torch.prim.ListConstruct %30046, %int1792_27619 : (!torch.int, !torch.int) -> !torch.list<int>
    %30048 = torch.aten.view %30009, %30047 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30048, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30049 = torch.aten.mm %30048, %30020 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30049, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27620 = torch.constant.int 4
    %int4096_27621 = torch.constant.int 4096
    %30050 = torch.prim.ListConstruct %int4_27620, %30045, %int4096_27621 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30051 = torch.aten.view %30049, %30050 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27622 = torch.constant.int 1
    %30052 = torch.aten.size.int %29910, %int1_27622 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27623 = torch.constant.int 4
    %30053 = torch.aten.mul.int %int4_27623, %30052 : !torch.int, !torch.int -> !torch.int
    %int1792_27624 = torch.constant.int 1792
    %30054 = torch.prim.ListConstruct %30053, %int1792_27624 : (!torch.int, !torch.int) -> !torch.list<int>
    %30055 = torch.aten.view %30010, %30054 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30055, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30056 = torch.aten.mm %30055, %30022 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30056, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27625 = torch.constant.int 4
    %int4096_27626 = torch.constant.int 4096
    %30057 = torch.prim.ListConstruct %int4_27625, %30052, %int4096_27626 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30058 = torch.aten.view %30056, %30057 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27627 = torch.constant.int 1
    %30059 = torch.aten.size.int %29916, %int1_27627 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27628 = torch.constant.int 4
    %30060 = torch.aten.mul.int %int4_27628, %30059 : !torch.int, !torch.int -> !torch.int
    %int1792_27629 = torch.constant.int 1792
    %30061 = torch.prim.ListConstruct %30060, %int1792_27629 : (!torch.int, !torch.int) -> !torch.list<int>
    %30062 = torch.aten.view %30011, %30061 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30062, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30063 = torch.aten.mm %30062, %30024 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30063, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27630 = torch.constant.int 4
    %int4096_27631 = torch.constant.int 4096
    %30064 = torch.prim.ListConstruct %int4_27630, %30059, %int4096_27631 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30065 = torch.aten.view %30063, %30064 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27632 = torch.constant.int 1
    %30066 = torch.aten.size.int %29922, %int1_27632 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27633 = torch.constant.int 4
    %30067 = torch.aten.mul.int %int4_27633, %30066 : !torch.int, !torch.int -> !torch.int
    %int1792_27634 = torch.constant.int 1792
    %30068 = torch.prim.ListConstruct %30067, %int1792_27634 : (!torch.int, !torch.int) -> !torch.list<int>
    %30069 = torch.aten.view %30012, %30068 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30069, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30070 = torch.aten.mm %30069, %30026 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30070, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27635 = torch.constant.int 4
    %int4096_27636 = torch.constant.int 4096
    %30071 = torch.prim.ListConstruct %int4_27635, %30066, %int4096_27636 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30072 = torch.aten.view %30070, %30071 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27637 = torch.constant.int 1
    %30073 = torch.aten.size.int %29928, %int1_27637 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27638 = torch.constant.int 4
    %30074 = torch.aten.mul.int %int4_27638, %30073 : !torch.int, !torch.int -> !torch.int
    %int1792_27639 = torch.constant.int 1792
    %30075 = torch.prim.ListConstruct %30074, %int1792_27639 : (!torch.int, !torch.int) -> !torch.list<int>
    %30076 = torch.aten.view %30013, %30075 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30076, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30077 = torch.aten.mm %30076, %30028 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30077, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27640 = torch.constant.int 4
    %int4096_27641 = torch.constant.int 4096
    %30078 = torch.prim.ListConstruct %int4_27640, %30073, %int4096_27641 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30079 = torch.aten.view %30077, %30078 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27642 = torch.constant.int 1
    %30080 = torch.aten.size.int %29934, %int1_27642 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_27643 = torch.constant.int 4
    %30081 = torch.aten.mul.int %int4_27643, %30080 : !torch.int, !torch.int -> !torch.int
    %int1792_27644 = torch.constant.int 1792
    %30082 = torch.prim.ListConstruct %30081, %int1792_27644 : (!torch.int, !torch.int) -> !torch.list<int>
    %30083 = torch.aten.view %30014, %30082 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %30083, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %30084 = torch.aten.mm %30083, %30030 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30084, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_27645 = torch.constant.int 4
    %int4096_27646 = torch.constant.int 4096
    %30085 = torch.prim.ListConstruct %int4_27645, %30080, %int4096_27646 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30086 = torch.aten.view %30084, %30085 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30087 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27647 = arith.constant 1 : index
    %dim_27648 = tensor.dim %30087, %c1_27647 : tensor<4x?x4096xf16>
    %30088 = flow.tensor.transfer %30087 : tensor<4x?x4096xf16>{%dim_27648} to #hal.device.promise<@__device_0>
    %30089 = torch_c.from_builtin_tensor %30088 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30090 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27649 = arith.constant 1 : index
    %dim_27650 = tensor.dim %30090, %c1_27649 : tensor<4x?x4096xf16>
    %30091 = flow.tensor.transfer %30090 : tensor<4x?x4096xf16>{%dim_27650} to #hal.device.promise<@__device_0>
    %30092 = torch_c.from_builtin_tensor %30091 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30093 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27651 = arith.constant 1 : index
    %dim_27652 = tensor.dim %30093, %c1_27651 : tensor<4x?x4096xf16>
    %30094 = flow.tensor.transfer %30093 : tensor<4x?x4096xf16>{%dim_27652} to #hal.device.promise<@__device_0>
    %30095 = torch_c.from_builtin_tensor %30094 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30096 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27653 = arith.constant 1 : index
    %dim_27654 = tensor.dim %30096, %c1_27653 : tensor<4x?x4096xf16>
    %30097 = flow.tensor.transfer %30096 : tensor<4x?x4096xf16>{%dim_27654} to #hal.device.promise<@__device_0>
    %30098 = torch_c.from_builtin_tensor %30097 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30099 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27655 = arith.constant 1 : index
    %dim_27656 = tensor.dim %30099, %c1_27655 : tensor<4x?x4096xf16>
    %30100 = flow.tensor.transfer %30099 : tensor<4x?x4096xf16>{%dim_27656} to #hal.device.promise<@__device_0>
    %30101 = torch_c.from_builtin_tensor %30100 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30102 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27657 = arith.constant 1 : index
    %dim_27658 = tensor.dim %30102, %c1_27657 : tensor<4x?x4096xf16>
    %30103 = flow.tensor.transfer %30102 : tensor<4x?x4096xf16>{%dim_27658} to #hal.device.promise<@__device_0>
    %30104 = torch_c.from_builtin_tensor %30103 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30105 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27659 = arith.constant 1 : index
    %dim_27660 = tensor.dim %30105, %c1_27659 : tensor<4x?x4096xf16>
    %30106 = flow.tensor.transfer %30105 : tensor<4x?x4096xf16>{%dim_27660} to #hal.device.promise<@__device_0>
    %30107 = torch_c.from_builtin_tensor %30106 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27661 = torch.constant.int 1
    %30108 = torch.aten.add.Tensor %30037, %30089, %int1_27661 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27662 = torch.constant.int 1
    %30109 = torch.aten.add.Tensor %30108, %30092, %int1_27662 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27663 = torch.constant.int 1
    %30110 = torch.aten.add.Tensor %30109, %30095, %int1_27663 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27664 = torch.constant.int 1
    %30111 = torch.aten.add.Tensor %30110, %30098, %int1_27664 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27665 = torch.constant.int 1
    %30112 = torch.aten.add.Tensor %30111, %30101, %int1_27665 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27666 = torch.constant.int 1
    %30113 = torch.aten.add.Tensor %30112, %30104, %int1_27666 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27667 = torch.constant.int 1
    %30114 = torch.aten.add.Tensor %30113, %30107, %int1_27667 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30115 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27668 = arith.constant 1 : index
    %dim_27669 = tensor.dim %30115, %c1_27668 : tensor<4x?x4096xf16>
    %30116 = flow.tensor.transfer %30115 : tensor<4x?x4096xf16>{%dim_27669} to #hal.device.promise<@__device_1>
    %30117 = torch_c.from_builtin_tensor %30116 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30118 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27670 = arith.constant 1 : index
    %dim_27671 = tensor.dim %30118, %c1_27670 : tensor<4x?x4096xf16>
    %30119 = flow.tensor.transfer %30118 : tensor<4x?x4096xf16>{%dim_27671} to #hal.device.promise<@__device_1>
    %30120 = torch_c.from_builtin_tensor %30119 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30121 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27672 = arith.constant 1 : index
    %dim_27673 = tensor.dim %30121, %c1_27672 : tensor<4x?x4096xf16>
    %30122 = flow.tensor.transfer %30121 : tensor<4x?x4096xf16>{%dim_27673} to #hal.device.promise<@__device_1>
    %30123 = torch_c.from_builtin_tensor %30122 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30124 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27674 = arith.constant 1 : index
    %dim_27675 = tensor.dim %30124, %c1_27674 : tensor<4x?x4096xf16>
    %30125 = flow.tensor.transfer %30124 : tensor<4x?x4096xf16>{%dim_27675} to #hal.device.promise<@__device_1>
    %30126 = torch_c.from_builtin_tensor %30125 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30127 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27676 = arith.constant 1 : index
    %dim_27677 = tensor.dim %30127, %c1_27676 : tensor<4x?x4096xf16>
    %30128 = flow.tensor.transfer %30127 : tensor<4x?x4096xf16>{%dim_27677} to #hal.device.promise<@__device_1>
    %30129 = torch_c.from_builtin_tensor %30128 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30130 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27678 = arith.constant 1 : index
    %dim_27679 = tensor.dim %30130, %c1_27678 : tensor<4x?x4096xf16>
    %30131 = flow.tensor.transfer %30130 : tensor<4x?x4096xf16>{%dim_27679} to #hal.device.promise<@__device_1>
    %30132 = torch_c.from_builtin_tensor %30131 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30133 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27680 = arith.constant 1 : index
    %dim_27681 = tensor.dim %30133, %c1_27680 : tensor<4x?x4096xf16>
    %30134 = flow.tensor.transfer %30133 : tensor<4x?x4096xf16>{%dim_27681} to #hal.device.promise<@__device_1>
    %30135 = torch_c.from_builtin_tensor %30134 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27682 = torch.constant.int 1
    %30136 = torch.aten.add.Tensor %30117, %30044, %int1_27682 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27683 = torch.constant.int 1
    %30137 = torch.aten.add.Tensor %30136, %30120, %int1_27683 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27684 = torch.constant.int 1
    %30138 = torch.aten.add.Tensor %30137, %30123, %int1_27684 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27685 = torch.constant.int 1
    %30139 = torch.aten.add.Tensor %30138, %30126, %int1_27685 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27686 = torch.constant.int 1
    %30140 = torch.aten.add.Tensor %30139, %30129, %int1_27686 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27687 = torch.constant.int 1
    %30141 = torch.aten.add.Tensor %30140, %30132, %int1_27687 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27688 = torch.constant.int 1
    %30142 = torch.aten.add.Tensor %30141, %30135, %int1_27688 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30143 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27689 = arith.constant 1 : index
    %dim_27690 = tensor.dim %30143, %c1_27689 : tensor<4x?x4096xf16>
    %30144 = flow.tensor.transfer %30143 : tensor<4x?x4096xf16>{%dim_27690} to #hal.device.promise<@__device_2>
    %30145 = torch_c.from_builtin_tensor %30144 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30146 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27691 = arith.constant 1 : index
    %dim_27692 = tensor.dim %30146, %c1_27691 : tensor<4x?x4096xf16>
    %30147 = flow.tensor.transfer %30146 : tensor<4x?x4096xf16>{%dim_27692} to #hal.device.promise<@__device_2>
    %30148 = torch_c.from_builtin_tensor %30147 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30149 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27693 = arith.constant 1 : index
    %dim_27694 = tensor.dim %30149, %c1_27693 : tensor<4x?x4096xf16>
    %30150 = flow.tensor.transfer %30149 : tensor<4x?x4096xf16>{%dim_27694} to #hal.device.promise<@__device_2>
    %30151 = torch_c.from_builtin_tensor %30150 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30152 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27695 = arith.constant 1 : index
    %dim_27696 = tensor.dim %30152, %c1_27695 : tensor<4x?x4096xf16>
    %30153 = flow.tensor.transfer %30152 : tensor<4x?x4096xf16>{%dim_27696} to #hal.device.promise<@__device_2>
    %30154 = torch_c.from_builtin_tensor %30153 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30155 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27697 = arith.constant 1 : index
    %dim_27698 = tensor.dim %30155, %c1_27697 : tensor<4x?x4096xf16>
    %30156 = flow.tensor.transfer %30155 : tensor<4x?x4096xf16>{%dim_27698} to #hal.device.promise<@__device_2>
    %30157 = torch_c.from_builtin_tensor %30156 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30158 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27699 = arith.constant 1 : index
    %dim_27700 = tensor.dim %30158, %c1_27699 : tensor<4x?x4096xf16>
    %30159 = flow.tensor.transfer %30158 : tensor<4x?x4096xf16>{%dim_27700} to #hal.device.promise<@__device_2>
    %30160 = torch_c.from_builtin_tensor %30159 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30161 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27701 = arith.constant 1 : index
    %dim_27702 = tensor.dim %30161, %c1_27701 : tensor<4x?x4096xf16>
    %30162 = flow.tensor.transfer %30161 : tensor<4x?x4096xf16>{%dim_27702} to #hal.device.promise<@__device_2>
    %30163 = torch_c.from_builtin_tensor %30162 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27703 = torch.constant.int 1
    %30164 = torch.aten.add.Tensor %30145, %30148, %int1_27703 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27704 = torch.constant.int 1
    %30165 = torch.aten.add.Tensor %30164, %30051, %int1_27704 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27705 = torch.constant.int 1
    %30166 = torch.aten.add.Tensor %30165, %30151, %int1_27705 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27706 = torch.constant.int 1
    %30167 = torch.aten.add.Tensor %30166, %30154, %int1_27706 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27707 = torch.constant.int 1
    %30168 = torch.aten.add.Tensor %30167, %30157, %int1_27707 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27708 = torch.constant.int 1
    %30169 = torch.aten.add.Tensor %30168, %30160, %int1_27708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27709 = torch.constant.int 1
    %30170 = torch.aten.add.Tensor %30169, %30163, %int1_27709 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30171 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27710 = arith.constant 1 : index
    %dim_27711 = tensor.dim %30171, %c1_27710 : tensor<4x?x4096xf16>
    %30172 = flow.tensor.transfer %30171 : tensor<4x?x4096xf16>{%dim_27711} to #hal.device.promise<@__device_3>
    %30173 = torch_c.from_builtin_tensor %30172 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30174 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27712 = arith.constant 1 : index
    %dim_27713 = tensor.dim %30174, %c1_27712 : tensor<4x?x4096xf16>
    %30175 = flow.tensor.transfer %30174 : tensor<4x?x4096xf16>{%dim_27713} to #hal.device.promise<@__device_3>
    %30176 = torch_c.from_builtin_tensor %30175 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30177 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27714 = arith.constant 1 : index
    %dim_27715 = tensor.dim %30177, %c1_27714 : tensor<4x?x4096xf16>
    %30178 = flow.tensor.transfer %30177 : tensor<4x?x4096xf16>{%dim_27715} to #hal.device.promise<@__device_3>
    %30179 = torch_c.from_builtin_tensor %30178 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30180 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27716 = arith.constant 1 : index
    %dim_27717 = tensor.dim %30180, %c1_27716 : tensor<4x?x4096xf16>
    %30181 = flow.tensor.transfer %30180 : tensor<4x?x4096xf16>{%dim_27717} to #hal.device.promise<@__device_3>
    %30182 = torch_c.from_builtin_tensor %30181 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30183 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27718 = arith.constant 1 : index
    %dim_27719 = tensor.dim %30183, %c1_27718 : tensor<4x?x4096xf16>
    %30184 = flow.tensor.transfer %30183 : tensor<4x?x4096xf16>{%dim_27719} to #hal.device.promise<@__device_3>
    %30185 = torch_c.from_builtin_tensor %30184 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30186 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27720 = arith.constant 1 : index
    %dim_27721 = tensor.dim %30186, %c1_27720 : tensor<4x?x4096xf16>
    %30187 = flow.tensor.transfer %30186 : tensor<4x?x4096xf16>{%dim_27721} to #hal.device.promise<@__device_3>
    %30188 = torch_c.from_builtin_tensor %30187 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30189 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27722 = arith.constant 1 : index
    %dim_27723 = tensor.dim %30189, %c1_27722 : tensor<4x?x4096xf16>
    %30190 = flow.tensor.transfer %30189 : tensor<4x?x4096xf16>{%dim_27723} to #hal.device.promise<@__device_3>
    %30191 = torch_c.from_builtin_tensor %30190 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27724 = torch.constant.int 1
    %30192 = torch.aten.add.Tensor %30173, %30176, %int1_27724 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27725 = torch.constant.int 1
    %30193 = torch.aten.add.Tensor %30192, %30179, %int1_27725 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27726 = torch.constant.int 1
    %30194 = torch.aten.add.Tensor %30193, %30058, %int1_27726 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27727 = torch.constant.int 1
    %30195 = torch.aten.add.Tensor %30194, %30182, %int1_27727 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27728 = torch.constant.int 1
    %30196 = torch.aten.add.Tensor %30195, %30185, %int1_27728 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27729 = torch.constant.int 1
    %30197 = torch.aten.add.Tensor %30196, %30188, %int1_27729 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27730 = torch.constant.int 1
    %30198 = torch.aten.add.Tensor %30197, %30191, %int1_27730 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30199 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27731 = arith.constant 1 : index
    %dim_27732 = tensor.dim %30199, %c1_27731 : tensor<4x?x4096xf16>
    %30200 = flow.tensor.transfer %30199 : tensor<4x?x4096xf16>{%dim_27732} to #hal.device.promise<@__device_4>
    %30201 = torch_c.from_builtin_tensor %30200 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30202 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27733 = arith.constant 1 : index
    %dim_27734 = tensor.dim %30202, %c1_27733 : tensor<4x?x4096xf16>
    %30203 = flow.tensor.transfer %30202 : tensor<4x?x4096xf16>{%dim_27734} to #hal.device.promise<@__device_4>
    %30204 = torch_c.from_builtin_tensor %30203 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30205 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27735 = arith.constant 1 : index
    %dim_27736 = tensor.dim %30205, %c1_27735 : tensor<4x?x4096xf16>
    %30206 = flow.tensor.transfer %30205 : tensor<4x?x4096xf16>{%dim_27736} to #hal.device.promise<@__device_4>
    %30207 = torch_c.from_builtin_tensor %30206 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30208 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27737 = arith.constant 1 : index
    %dim_27738 = tensor.dim %30208, %c1_27737 : tensor<4x?x4096xf16>
    %30209 = flow.tensor.transfer %30208 : tensor<4x?x4096xf16>{%dim_27738} to #hal.device.promise<@__device_4>
    %30210 = torch_c.from_builtin_tensor %30209 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30211 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27739 = arith.constant 1 : index
    %dim_27740 = tensor.dim %30211, %c1_27739 : tensor<4x?x4096xf16>
    %30212 = flow.tensor.transfer %30211 : tensor<4x?x4096xf16>{%dim_27740} to #hal.device.promise<@__device_4>
    %30213 = torch_c.from_builtin_tensor %30212 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30214 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27741 = arith.constant 1 : index
    %dim_27742 = tensor.dim %30214, %c1_27741 : tensor<4x?x4096xf16>
    %30215 = flow.tensor.transfer %30214 : tensor<4x?x4096xf16>{%dim_27742} to #hal.device.promise<@__device_4>
    %30216 = torch_c.from_builtin_tensor %30215 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30217 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27743 = arith.constant 1 : index
    %dim_27744 = tensor.dim %30217, %c1_27743 : tensor<4x?x4096xf16>
    %30218 = flow.tensor.transfer %30217 : tensor<4x?x4096xf16>{%dim_27744} to #hal.device.promise<@__device_4>
    %30219 = torch_c.from_builtin_tensor %30218 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27745 = torch.constant.int 1
    %30220 = torch.aten.add.Tensor %30201, %30204, %int1_27745 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27746 = torch.constant.int 1
    %30221 = torch.aten.add.Tensor %30220, %30207, %int1_27746 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27747 = torch.constant.int 1
    %30222 = torch.aten.add.Tensor %30221, %30210, %int1_27747 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27748 = torch.constant.int 1
    %30223 = torch.aten.add.Tensor %30222, %30065, %int1_27748 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27749 = torch.constant.int 1
    %30224 = torch.aten.add.Tensor %30223, %30213, %int1_27749 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27750 = torch.constant.int 1
    %30225 = torch.aten.add.Tensor %30224, %30216, %int1_27750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27751 = torch.constant.int 1
    %30226 = torch.aten.add.Tensor %30225, %30219, %int1_27751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30227 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27752 = arith.constant 1 : index
    %dim_27753 = tensor.dim %30227, %c1_27752 : tensor<4x?x4096xf16>
    %30228 = flow.tensor.transfer %30227 : tensor<4x?x4096xf16>{%dim_27753} to #hal.device.promise<@__device_5>
    %30229 = torch_c.from_builtin_tensor %30228 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30230 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27754 = arith.constant 1 : index
    %dim_27755 = tensor.dim %30230, %c1_27754 : tensor<4x?x4096xf16>
    %30231 = flow.tensor.transfer %30230 : tensor<4x?x4096xf16>{%dim_27755} to #hal.device.promise<@__device_5>
    %30232 = torch_c.from_builtin_tensor %30231 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30233 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27756 = arith.constant 1 : index
    %dim_27757 = tensor.dim %30233, %c1_27756 : tensor<4x?x4096xf16>
    %30234 = flow.tensor.transfer %30233 : tensor<4x?x4096xf16>{%dim_27757} to #hal.device.promise<@__device_5>
    %30235 = torch_c.from_builtin_tensor %30234 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30236 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27758 = arith.constant 1 : index
    %dim_27759 = tensor.dim %30236, %c1_27758 : tensor<4x?x4096xf16>
    %30237 = flow.tensor.transfer %30236 : tensor<4x?x4096xf16>{%dim_27759} to #hal.device.promise<@__device_5>
    %30238 = torch_c.from_builtin_tensor %30237 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30239 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27760 = arith.constant 1 : index
    %dim_27761 = tensor.dim %30239, %c1_27760 : tensor<4x?x4096xf16>
    %30240 = flow.tensor.transfer %30239 : tensor<4x?x4096xf16>{%dim_27761} to #hal.device.promise<@__device_5>
    %30241 = torch_c.from_builtin_tensor %30240 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30242 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27762 = arith.constant 1 : index
    %dim_27763 = tensor.dim %30242, %c1_27762 : tensor<4x?x4096xf16>
    %30243 = flow.tensor.transfer %30242 : tensor<4x?x4096xf16>{%dim_27763} to #hal.device.promise<@__device_5>
    %30244 = torch_c.from_builtin_tensor %30243 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30245 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27764 = arith.constant 1 : index
    %dim_27765 = tensor.dim %30245, %c1_27764 : tensor<4x?x4096xf16>
    %30246 = flow.tensor.transfer %30245 : tensor<4x?x4096xf16>{%dim_27765} to #hal.device.promise<@__device_5>
    %30247 = torch_c.from_builtin_tensor %30246 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27766 = torch.constant.int 1
    %30248 = torch.aten.add.Tensor %30229, %30232, %int1_27766 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27767 = torch.constant.int 1
    %30249 = torch.aten.add.Tensor %30248, %30235, %int1_27767 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27768 = torch.constant.int 1
    %30250 = torch.aten.add.Tensor %30249, %30238, %int1_27768 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27769 = torch.constant.int 1
    %30251 = torch.aten.add.Tensor %30250, %30241, %int1_27769 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27770 = torch.constant.int 1
    %30252 = torch.aten.add.Tensor %30251, %30072, %int1_27770 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27771 = torch.constant.int 1
    %30253 = torch.aten.add.Tensor %30252, %30244, %int1_27771 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27772 = torch.constant.int 1
    %30254 = torch.aten.add.Tensor %30253, %30247, %int1_27772 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30255 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27773 = arith.constant 1 : index
    %dim_27774 = tensor.dim %30255, %c1_27773 : tensor<4x?x4096xf16>
    %30256 = flow.tensor.transfer %30255 : tensor<4x?x4096xf16>{%dim_27774} to #hal.device.promise<@__device_6>
    %30257 = torch_c.from_builtin_tensor %30256 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30258 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27775 = arith.constant 1 : index
    %dim_27776 = tensor.dim %30258, %c1_27775 : tensor<4x?x4096xf16>
    %30259 = flow.tensor.transfer %30258 : tensor<4x?x4096xf16>{%dim_27776} to #hal.device.promise<@__device_6>
    %30260 = torch_c.from_builtin_tensor %30259 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30261 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27777 = arith.constant 1 : index
    %dim_27778 = tensor.dim %30261, %c1_27777 : tensor<4x?x4096xf16>
    %30262 = flow.tensor.transfer %30261 : tensor<4x?x4096xf16>{%dim_27778} to #hal.device.promise<@__device_6>
    %30263 = torch_c.from_builtin_tensor %30262 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30264 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27779 = arith.constant 1 : index
    %dim_27780 = tensor.dim %30264, %c1_27779 : tensor<4x?x4096xf16>
    %30265 = flow.tensor.transfer %30264 : tensor<4x?x4096xf16>{%dim_27780} to #hal.device.promise<@__device_6>
    %30266 = torch_c.from_builtin_tensor %30265 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30267 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27781 = arith.constant 1 : index
    %dim_27782 = tensor.dim %30267, %c1_27781 : tensor<4x?x4096xf16>
    %30268 = flow.tensor.transfer %30267 : tensor<4x?x4096xf16>{%dim_27782} to #hal.device.promise<@__device_6>
    %30269 = torch_c.from_builtin_tensor %30268 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30270 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27783 = arith.constant 1 : index
    %dim_27784 = tensor.dim %30270, %c1_27783 : tensor<4x?x4096xf16>
    %30271 = flow.tensor.transfer %30270 : tensor<4x?x4096xf16>{%dim_27784} to #hal.device.promise<@__device_6>
    %30272 = torch_c.from_builtin_tensor %30271 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30273 = torch_c.to_builtin_tensor %30086 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27785 = arith.constant 1 : index
    %dim_27786 = tensor.dim %30273, %c1_27785 : tensor<4x?x4096xf16>
    %30274 = flow.tensor.transfer %30273 : tensor<4x?x4096xf16>{%dim_27786} to #hal.device.promise<@__device_6>
    %30275 = torch_c.from_builtin_tensor %30274 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27787 = torch.constant.int 1
    %30276 = torch.aten.add.Tensor %30257, %30260, %int1_27787 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27788 = torch.constant.int 1
    %30277 = torch.aten.add.Tensor %30276, %30263, %int1_27788 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27789 = torch.constant.int 1
    %30278 = torch.aten.add.Tensor %30277, %30266, %int1_27789 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27790 = torch.constant.int 1
    %30279 = torch.aten.add.Tensor %30278, %30269, %int1_27790 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27791 = torch.constant.int 1
    %30280 = torch.aten.add.Tensor %30279, %30272, %int1_27791 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27792 = torch.constant.int 1
    %30281 = torch.aten.add.Tensor %30280, %30079, %int1_27792 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27793 = torch.constant.int 1
    %30282 = torch.aten.add.Tensor %30281, %30275, %int1_27793 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30283 = torch_c.to_builtin_tensor %30037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27794 = arith.constant 1 : index
    %dim_27795 = tensor.dim %30283, %c1_27794 : tensor<4x?x4096xf16>
    %30284 = flow.tensor.transfer %30283 : tensor<4x?x4096xf16>{%dim_27795} to #hal.device.promise<@__device_7>
    %30285 = torch_c.from_builtin_tensor %30284 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30286 = torch_c.to_builtin_tensor %30044 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27796 = arith.constant 1 : index
    %dim_27797 = tensor.dim %30286, %c1_27796 : tensor<4x?x4096xf16>
    %30287 = flow.tensor.transfer %30286 : tensor<4x?x4096xf16>{%dim_27797} to #hal.device.promise<@__device_7>
    %30288 = torch_c.from_builtin_tensor %30287 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30289 = torch_c.to_builtin_tensor %30051 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27798 = arith.constant 1 : index
    %dim_27799 = tensor.dim %30289, %c1_27798 : tensor<4x?x4096xf16>
    %30290 = flow.tensor.transfer %30289 : tensor<4x?x4096xf16>{%dim_27799} to #hal.device.promise<@__device_7>
    %30291 = torch_c.from_builtin_tensor %30290 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30292 = torch_c.to_builtin_tensor %30058 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27800 = arith.constant 1 : index
    %dim_27801 = tensor.dim %30292, %c1_27800 : tensor<4x?x4096xf16>
    %30293 = flow.tensor.transfer %30292 : tensor<4x?x4096xf16>{%dim_27801} to #hal.device.promise<@__device_7>
    %30294 = torch_c.from_builtin_tensor %30293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30295 = torch_c.to_builtin_tensor %30065 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27802 = arith.constant 1 : index
    %dim_27803 = tensor.dim %30295, %c1_27802 : tensor<4x?x4096xf16>
    %30296 = flow.tensor.transfer %30295 : tensor<4x?x4096xf16>{%dim_27803} to #hal.device.promise<@__device_7>
    %30297 = torch_c.from_builtin_tensor %30296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30298 = torch_c.to_builtin_tensor %30072 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27804 = arith.constant 1 : index
    %dim_27805 = tensor.dim %30298, %c1_27804 : tensor<4x?x4096xf16>
    %30299 = flow.tensor.transfer %30298 : tensor<4x?x4096xf16>{%dim_27805} to #hal.device.promise<@__device_7>
    %30300 = torch_c.from_builtin_tensor %30299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %30301 = torch_c.to_builtin_tensor %30079 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_27806 = arith.constant 1 : index
    %dim_27807 = tensor.dim %30301, %c1_27806 : tensor<4x?x4096xf16>
    %30302 = flow.tensor.transfer %30301 : tensor<4x?x4096xf16>{%dim_27807} to #hal.device.promise<@__device_7>
    %30303 = torch_c.from_builtin_tensor %30302 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27808 = torch.constant.int 1
    %30304 = torch.aten.add.Tensor %30285, %30288, %int1_27808 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27809 = torch.constant.int 1
    %30305 = torch.aten.add.Tensor %30304, %30291, %int1_27809 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27810 = torch.constant.int 1
    %30306 = torch.aten.add.Tensor %30305, %30294, %int1_27810 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27811 = torch.constant.int 1
    %30307 = torch.aten.add.Tensor %30306, %30297, %int1_27811 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27812 = torch.constant.int 1
    %30308 = torch.aten.add.Tensor %30307, %30300, %int1_27812 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27813 = torch.constant.int 1
    %30309 = torch.aten.add.Tensor %30308, %30303, %int1_27813 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27814 = torch.constant.int 1
    %30310 = torch.aten.add.Tensor %30309, %30086, %int1_27814 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27815 = torch.constant.int 1
    %30311 = torch.aten.add.Tensor %29791, %30114, %int1_27815 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27816 = torch.constant.int 1
    %30312 = torch.aten.add.Tensor %29792, %30142, %int1_27816 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27817 = torch.constant.int 1
    %30313 = torch.aten.add.Tensor %29793, %30170, %int1_27817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27818 = torch.constant.int 1
    %30314 = torch.aten.add.Tensor %29794, %30198, %int1_27818 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27819 = torch.constant.int 1
    %30315 = torch.aten.add.Tensor %29795, %30226, %int1_27819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27820 = torch.constant.int 1
    %30316 = torch.aten.add.Tensor %29796, %30254, %int1_27820 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27821 = torch.constant.int 1
    %30317 = torch.aten.add.Tensor %29797, %30282, %int1_27821 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27822 = torch.constant.int 1
    %30318 = torch.aten.add.Tensor %29798, %30310, %int1_27822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_27823 = torch.constant.int 6
    %30319 = torch.prims.convert_element_type %30311, %int6_27823 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27824 = torch.constant.int 6
    %30320 = torch.prims.convert_element_type %30312, %int6_27824 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27825 = torch.constant.int 6
    %30321 = torch.prims.convert_element_type %30313, %int6_27825 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27826 = torch.constant.int 6
    %30322 = torch.prims.convert_element_type %30314, %int6_27826 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27827 = torch.constant.int 6
    %30323 = torch.prims.convert_element_type %30315, %int6_27827 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27828 = torch.constant.int 6
    %30324 = torch.prims.convert_element_type %30316, %int6_27828 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27829 = torch.constant.int 6
    %30325 = torch.prims.convert_element_type %30317, %int6_27829 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_27830 = torch.constant.int 6
    %30326 = torch.prims.convert_element_type %30318, %int6_27830 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27831 = torch.constant.int 2
    %30327 = torch.aten.pow.Tensor_Scalar %30319, %int2_27831 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27832 = torch.constant.int 2
    %30328 = torch.aten.pow.Tensor_Scalar %30320, %int2_27832 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27833 = torch.constant.int 2
    %30329 = torch.aten.pow.Tensor_Scalar %30321, %int2_27833 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27834 = torch.constant.int 2
    %30330 = torch.aten.pow.Tensor_Scalar %30322, %int2_27834 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27835 = torch.constant.int 2
    %30331 = torch.aten.pow.Tensor_Scalar %30323, %int2_27835 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27836 = torch.constant.int 2
    %30332 = torch.aten.pow.Tensor_Scalar %30324, %int2_27836 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27837 = torch.constant.int 2
    %30333 = torch.aten.pow.Tensor_Scalar %30325, %int2_27837 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_27838 = torch.constant.int 2
    %30334 = torch.aten.pow.Tensor_Scalar %30326, %int2_27838 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_27839 = torch.constant.int -1
    %30335 = torch.prim.ListConstruct %int-1_27839 : (!torch.int) -> !torch.list<int>
    %true_27840 = torch.constant.bool true
    %none_27841 = torch.constant.none
    %30336 = torch.aten.mean.dim %30327, %30335, %true_27840, %none_27841 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27842 = torch.constant.int -1
    %30337 = torch.prim.ListConstruct %int-1_27842 : (!torch.int) -> !torch.list<int>
    %true_27843 = torch.constant.bool true
    %none_27844 = torch.constant.none
    %30338 = torch.aten.mean.dim %30328, %30337, %true_27843, %none_27844 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27845 = torch.constant.int -1
    %30339 = torch.prim.ListConstruct %int-1_27845 : (!torch.int) -> !torch.list<int>
    %true_27846 = torch.constant.bool true
    %none_27847 = torch.constant.none
    %30340 = torch.aten.mean.dim %30329, %30339, %true_27846, %none_27847 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27848 = torch.constant.int -1
    %30341 = torch.prim.ListConstruct %int-1_27848 : (!torch.int) -> !torch.list<int>
    %true_27849 = torch.constant.bool true
    %none_27850 = torch.constant.none
    %30342 = torch.aten.mean.dim %30330, %30341, %true_27849, %none_27850 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27851 = torch.constant.int -1
    %30343 = torch.prim.ListConstruct %int-1_27851 : (!torch.int) -> !torch.list<int>
    %true_27852 = torch.constant.bool true
    %none_27853 = torch.constant.none
    %30344 = torch.aten.mean.dim %30331, %30343, %true_27852, %none_27853 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27854 = torch.constant.int -1
    %30345 = torch.prim.ListConstruct %int-1_27854 : (!torch.int) -> !torch.list<int>
    %true_27855 = torch.constant.bool true
    %none_27856 = torch.constant.none
    %30346 = torch.aten.mean.dim %30332, %30345, %true_27855, %none_27856 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27857 = torch.constant.int -1
    %30347 = torch.prim.ListConstruct %int-1_27857 : (!torch.int) -> !torch.list<int>
    %true_27858 = torch.constant.bool true
    %none_27859 = torch.constant.none
    %30348 = torch.aten.mean.dim %30333, %30347, %true_27858, %none_27859 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_27860 = torch.constant.int -1
    %30349 = torch.prim.ListConstruct %int-1_27860 : (!torch.int) -> !torch.list<int>
    %true_27861 = torch.constant.bool true
    %none_27862 = torch.constant.none
    %30350 = torch.aten.mean.dim %30334, %30349, %true_27861, %none_27862 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27863 = torch.constant.float 9.9999997473787516E-6
    %int1_27864 = torch.constant.int 1
    %30351 = torch.aten.add.Scalar %30336, %float9.999990e-06_27863, %int1_27864 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27865 = torch.constant.float 9.9999997473787516E-6
    %int1_27866 = torch.constant.int 1
    %30352 = torch.aten.add.Scalar %30338, %float9.999990e-06_27865, %int1_27866 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27867 = torch.constant.float 9.9999997473787516E-6
    %int1_27868 = torch.constant.int 1
    %30353 = torch.aten.add.Scalar %30340, %float9.999990e-06_27867, %int1_27868 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27869 = torch.constant.float 9.9999997473787516E-6
    %int1_27870 = torch.constant.int 1
    %30354 = torch.aten.add.Scalar %30342, %float9.999990e-06_27869, %int1_27870 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27871 = torch.constant.float 9.9999997473787516E-6
    %int1_27872 = torch.constant.int 1
    %30355 = torch.aten.add.Scalar %30344, %float9.999990e-06_27871, %int1_27872 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27873 = torch.constant.float 9.9999997473787516E-6
    %int1_27874 = torch.constant.int 1
    %30356 = torch.aten.add.Scalar %30346, %float9.999990e-06_27873, %int1_27874 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27875 = torch.constant.float 9.9999997473787516E-6
    %int1_27876 = torch.constant.int 1
    %30357 = torch.aten.add.Scalar %30348, %float9.999990e-06_27875, %int1_27876 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_27877 = torch.constant.float 9.9999997473787516E-6
    %int1_27878 = torch.constant.int 1
    %30358 = torch.aten.add.Scalar %30350, %float9.999990e-06_27877, %int1_27878 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30359 = torch.aten.rsqrt %30351 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30360 = torch.aten.rsqrt %30352 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30361 = torch.aten.rsqrt %30353 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30362 = torch.aten.rsqrt %30354 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30363 = torch.aten.rsqrt %30355 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30364 = torch.aten.rsqrt %30356 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30365 = torch.aten.rsqrt %30357 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30366 = torch.aten.rsqrt %30358 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %30366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %30367 = torch.aten.mul.Tensor %30319, %30359 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30368 = torch.aten.mul.Tensor %30320, %30360 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30369 = torch.aten.mul.Tensor %30321, %30361 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30370 = torch.aten.mul.Tensor %30322, %30362 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30371 = torch.aten.mul.Tensor %30323, %30363 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30372 = torch.aten.mul.Tensor %30324, %30364 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30373 = torch.aten.mul.Tensor %30325, %30365 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30374 = torch.aten.mul.Tensor %30326, %30366 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30375 = torch.aten.mul.Tensor %1088, %30367 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30376 = torch.aten.mul.Tensor %1089, %30368 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30377 = torch.aten.mul.Tensor %1090, %30369 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30378 = torch.aten.mul.Tensor %1091, %30370 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30379 = torch.aten.mul.Tensor %1092, %30371 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30380 = torch.aten.mul.Tensor %1093, %30372 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30381 = torch.aten.mul.Tensor %1094, %30373 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %30382 = torch.aten.mul.Tensor %1095, %30374 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %30382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_27879 = torch.constant.int 5
    %30383 = torch.prims.convert_element_type %30375, %int5_27879 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27880 = torch.constant.int 5
    %30384 = torch.prims.convert_element_type %30376, %int5_27880 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27881 = torch.constant.int 5
    %30385 = torch.prims.convert_element_type %30377, %int5_27881 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27882 = torch.constant.int 5
    %30386 = torch.prims.convert_element_type %30378, %int5_27882 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27883 = torch.constant.int 5
    %30387 = torch.prims.convert_element_type %30379, %int5_27883 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27884 = torch.constant.int 5
    %30388 = torch.prims.convert_element_type %30380, %int5_27884 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27885 = torch.constant.int 5
    %30389 = torch.prims.convert_element_type %30381, %int5_27885 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_27886 = torch.constant.int 5
    %30390 = torch.prims.convert_element_type %30382, %int5_27886 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %30390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_27887 = torch.constant.int 1
    %int0_27888 = torch.constant.int 0
    %30391 = torch.prim.ListConstruct %int1_27887, %int0_27888 : (!torch.int, !torch.int) -> !torch.list<int>
    %30392 = torch.aten.permute %1096, %30391 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27889 = torch.constant.int 1
    %int0_27890 = torch.constant.int 0
    %30393 = torch.prim.ListConstruct %int1_27889, %int0_27890 : (!torch.int, !torch.int) -> !torch.list<int>
    %30394 = torch.aten.permute %1097, %30393 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27891 = torch.constant.int 1
    %int0_27892 = torch.constant.int 0
    %30395 = torch.prim.ListConstruct %int1_27891, %int0_27892 : (!torch.int, !torch.int) -> !torch.list<int>
    %30396 = torch.aten.permute %1098, %30395 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27893 = torch.constant.int 1
    %int0_27894 = torch.constant.int 0
    %30397 = torch.prim.ListConstruct %int1_27893, %int0_27894 : (!torch.int, !torch.int) -> !torch.list<int>
    %30398 = torch.aten.permute %1099, %30397 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27895 = torch.constant.int 1
    %int0_27896 = torch.constant.int 0
    %30399 = torch.prim.ListConstruct %int1_27895, %int0_27896 : (!torch.int, !torch.int) -> !torch.list<int>
    %30400 = torch.aten.permute %1100, %30399 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27897 = torch.constant.int 1
    %int0_27898 = torch.constant.int 0
    %30401 = torch.prim.ListConstruct %int1_27897, %int0_27898 : (!torch.int, !torch.int) -> !torch.list<int>
    %30402 = torch.aten.permute %1101, %30401 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27899 = torch.constant.int 1
    %int0_27900 = torch.constant.int 0
    %30403 = torch.prim.ListConstruct %int1_27899, %int0_27900 : (!torch.int, !torch.int) -> !torch.list<int>
    %30404 = torch.aten.permute %1102, %30403 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_27901 = torch.constant.int 1
    %int0_27902 = torch.constant.int 0
    %30405 = torch.prim.ListConstruct %int1_27901, %int0_27902 : (!torch.int, !torch.int) -> !torch.list<int>
    %30406 = torch.aten.permute %1103, %30405 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_27903 = torch.constant.int 4
    %30407 = torch.aten.mul.int %int4_27903, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27904 = torch.constant.int 4096
    %30408 = torch.prim.ListConstruct %30407, %int4096_27904 : (!torch.int, !torch.int) -> !torch.list<int>
    %30409 = torch.aten.view %30383, %30408 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30409, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30410 = torch.aten.mm %30409, %30392 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30410, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27905 = torch.constant.int 4
    %int512_27906 = torch.constant.int 512
    %30411 = torch.prim.ListConstruct %int4_27905, %2482, %int512_27906 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30412 = torch.aten.view %30410, %30411 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27907 = torch.constant.int 4
    %30413 = torch.aten.mul.int %int4_27907, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27908 = torch.constant.int 4096
    %30414 = torch.prim.ListConstruct %30413, %int4096_27908 : (!torch.int, !torch.int) -> !torch.list<int>
    %30415 = torch.aten.view %30384, %30414 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30415, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30416 = torch.aten.mm %30415, %30394 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30416, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27909 = torch.constant.int 4
    %int512_27910 = torch.constant.int 512
    %30417 = torch.prim.ListConstruct %int4_27909, %2482, %int512_27910 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30418 = torch.aten.view %30416, %30417 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27911 = torch.constant.int 4
    %30419 = torch.aten.mul.int %int4_27911, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27912 = torch.constant.int 4096
    %30420 = torch.prim.ListConstruct %30419, %int4096_27912 : (!torch.int, !torch.int) -> !torch.list<int>
    %30421 = torch.aten.view %30385, %30420 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30421, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30422 = torch.aten.mm %30421, %30396 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30422, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27913 = torch.constant.int 4
    %int512_27914 = torch.constant.int 512
    %30423 = torch.prim.ListConstruct %int4_27913, %2482, %int512_27914 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30424 = torch.aten.view %30422, %30423 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27915 = torch.constant.int 4
    %30425 = torch.aten.mul.int %int4_27915, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27916 = torch.constant.int 4096
    %30426 = torch.prim.ListConstruct %30425, %int4096_27916 : (!torch.int, !torch.int) -> !torch.list<int>
    %30427 = torch.aten.view %30386, %30426 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30427, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30428 = torch.aten.mm %30427, %30398 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30428, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27917 = torch.constant.int 4
    %int512_27918 = torch.constant.int 512
    %30429 = torch.prim.ListConstruct %int4_27917, %2482, %int512_27918 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30430 = torch.aten.view %30428, %30429 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27919 = torch.constant.int 4
    %30431 = torch.aten.mul.int %int4_27919, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27920 = torch.constant.int 4096
    %30432 = torch.prim.ListConstruct %30431, %int4096_27920 : (!torch.int, !torch.int) -> !torch.list<int>
    %30433 = torch.aten.view %30387, %30432 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30433, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30434 = torch.aten.mm %30433, %30400 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30434, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27921 = torch.constant.int 4
    %int512_27922 = torch.constant.int 512
    %30435 = torch.prim.ListConstruct %int4_27921, %2482, %int512_27922 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30436 = torch.aten.view %30434, %30435 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27923 = torch.constant.int 4
    %30437 = torch.aten.mul.int %int4_27923, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27924 = torch.constant.int 4096
    %30438 = torch.prim.ListConstruct %30437, %int4096_27924 : (!torch.int, !torch.int) -> !torch.list<int>
    %30439 = torch.aten.view %30388, %30438 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30439, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30440 = torch.aten.mm %30439, %30402 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30440, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27925 = torch.constant.int 4
    %int512_27926 = torch.constant.int 512
    %30441 = torch.prim.ListConstruct %int4_27925, %2482, %int512_27926 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30442 = torch.aten.view %30440, %30441 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27927 = torch.constant.int 4
    %30443 = torch.aten.mul.int %int4_27927, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27928 = torch.constant.int 4096
    %30444 = torch.prim.ListConstruct %30443, %int4096_27928 : (!torch.int, !torch.int) -> !torch.list<int>
    %30445 = torch.aten.view %30389, %30444 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30445, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30446 = torch.aten.mm %30445, %30404 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30446, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27929 = torch.constant.int 4
    %int512_27930 = torch.constant.int 512
    %30447 = torch.prim.ListConstruct %int4_27929, %2482, %int512_27930 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30448 = torch.aten.view %30446, %30447 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_27931 = torch.constant.int 4
    %30449 = torch.aten.mul.int %int4_27931, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27932 = torch.constant.int 4096
    %30450 = torch.prim.ListConstruct %30449, %int4096_27932 : (!torch.int, !torch.int) -> !torch.list<int>
    %30451 = torch.aten.view %30390, %30450 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30451, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30452 = torch.aten.mm %30451, %30406 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %30452, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_27933 = torch.constant.int 4
    %int512_27934 = torch.constant.int 512
    %30453 = torch.prim.ListConstruct %int4_27933, %2482, %int512_27934 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30454 = torch.aten.view %30452, %30453 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %30454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_27935 = torch.constant.int 1
    %int0_27936 = torch.constant.int 0
    %30455 = torch.prim.ListConstruct %int1_27935, %int0_27936 : (!torch.int, !torch.int) -> !torch.list<int>
    %30456 = torch.aten.permute %1104, %30455 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27937 = torch.constant.int 1
    %int0_27938 = torch.constant.int 0
    %30457 = torch.prim.ListConstruct %int1_27937, %int0_27938 : (!torch.int, !torch.int) -> !torch.list<int>
    %30458 = torch.aten.permute %1105, %30457 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27939 = torch.constant.int 1
    %int0_27940 = torch.constant.int 0
    %30459 = torch.prim.ListConstruct %int1_27939, %int0_27940 : (!torch.int, !torch.int) -> !torch.list<int>
    %30460 = torch.aten.permute %1106, %30459 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27941 = torch.constant.int 1
    %int0_27942 = torch.constant.int 0
    %30461 = torch.prim.ListConstruct %int1_27941, %int0_27942 : (!torch.int, !torch.int) -> !torch.list<int>
    %30462 = torch.aten.permute %1107, %30461 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27943 = torch.constant.int 1
    %int0_27944 = torch.constant.int 0
    %30463 = torch.prim.ListConstruct %int1_27943, %int0_27944 : (!torch.int, !torch.int) -> !torch.list<int>
    %30464 = torch.aten.permute %1108, %30463 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27945 = torch.constant.int 1
    %int0_27946 = torch.constant.int 0
    %30465 = torch.prim.ListConstruct %int1_27945, %int0_27946 : (!torch.int, !torch.int) -> !torch.list<int>
    %30466 = torch.aten.permute %1109, %30465 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27947 = torch.constant.int 1
    %int0_27948 = torch.constant.int 0
    %30467 = torch.prim.ListConstruct %int1_27947, %int0_27948 : (!torch.int, !torch.int) -> !torch.list<int>
    %30468 = torch.aten.permute %1110, %30467 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27949 = torch.constant.int 1
    %int0_27950 = torch.constant.int 0
    %30469 = torch.prim.ListConstruct %int1_27949, %int0_27950 : (!torch.int, !torch.int) -> !torch.list<int>
    %30470 = torch.aten.permute %1111, %30469 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_27951 = torch.constant.int 4
    %30471 = torch.aten.mul.int %int4_27951, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27952 = torch.constant.int 4096
    %30472 = torch.prim.ListConstruct %30471, %int4096_27952 : (!torch.int, !torch.int) -> !torch.list<int>
    %30473 = torch.aten.view %30383, %30472 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30473, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30474 = torch.aten.mm %30473, %30456 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30474, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27953 = torch.constant.int 4
    %int128_27954 = torch.constant.int 128
    %30475 = torch.prim.ListConstruct %int4_27953, %2482, %int128_27954 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30476 = torch.aten.view %30474, %30475 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27955 = torch.constant.int 4
    %30477 = torch.aten.mul.int %int4_27955, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27956 = torch.constant.int 4096
    %30478 = torch.prim.ListConstruct %30477, %int4096_27956 : (!torch.int, !torch.int) -> !torch.list<int>
    %30479 = torch.aten.view %30384, %30478 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30479, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30480 = torch.aten.mm %30479, %30458 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30480, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27957 = torch.constant.int 4
    %int128_27958 = torch.constant.int 128
    %30481 = torch.prim.ListConstruct %int4_27957, %2482, %int128_27958 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30482 = torch.aten.view %30480, %30481 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27959 = torch.constant.int 4
    %30483 = torch.aten.mul.int %int4_27959, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27960 = torch.constant.int 4096
    %30484 = torch.prim.ListConstruct %30483, %int4096_27960 : (!torch.int, !torch.int) -> !torch.list<int>
    %30485 = torch.aten.view %30385, %30484 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30485, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30486 = torch.aten.mm %30485, %30460 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30486, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27961 = torch.constant.int 4
    %int128_27962 = torch.constant.int 128
    %30487 = torch.prim.ListConstruct %int4_27961, %2482, %int128_27962 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30488 = torch.aten.view %30486, %30487 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27963 = torch.constant.int 4
    %30489 = torch.aten.mul.int %int4_27963, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27964 = torch.constant.int 4096
    %30490 = torch.prim.ListConstruct %30489, %int4096_27964 : (!torch.int, !torch.int) -> !torch.list<int>
    %30491 = torch.aten.view %30386, %30490 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30491, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30492 = torch.aten.mm %30491, %30462 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30492, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27965 = torch.constant.int 4
    %int128_27966 = torch.constant.int 128
    %30493 = torch.prim.ListConstruct %int4_27965, %2482, %int128_27966 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30494 = torch.aten.view %30492, %30493 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27967 = torch.constant.int 4
    %30495 = torch.aten.mul.int %int4_27967, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27968 = torch.constant.int 4096
    %30496 = torch.prim.ListConstruct %30495, %int4096_27968 : (!torch.int, !torch.int) -> !torch.list<int>
    %30497 = torch.aten.view %30387, %30496 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30497, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30498 = torch.aten.mm %30497, %30464 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30498, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27969 = torch.constant.int 4
    %int128_27970 = torch.constant.int 128
    %30499 = torch.prim.ListConstruct %int4_27969, %2482, %int128_27970 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30500 = torch.aten.view %30498, %30499 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27971 = torch.constant.int 4
    %30501 = torch.aten.mul.int %int4_27971, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27972 = torch.constant.int 4096
    %30502 = torch.prim.ListConstruct %30501, %int4096_27972 : (!torch.int, !torch.int) -> !torch.list<int>
    %30503 = torch.aten.view %30388, %30502 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30503, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30504 = torch.aten.mm %30503, %30466 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30504, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27973 = torch.constant.int 4
    %int128_27974 = torch.constant.int 128
    %30505 = torch.prim.ListConstruct %int4_27973, %2482, %int128_27974 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30506 = torch.aten.view %30504, %30505 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27975 = torch.constant.int 4
    %30507 = torch.aten.mul.int %int4_27975, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27976 = torch.constant.int 4096
    %30508 = torch.prim.ListConstruct %30507, %int4096_27976 : (!torch.int, !torch.int) -> !torch.list<int>
    %30509 = torch.aten.view %30389, %30508 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30509, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30510 = torch.aten.mm %30509, %30468 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30510, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27977 = torch.constant.int 4
    %int128_27978 = torch.constant.int 128
    %30511 = torch.prim.ListConstruct %int4_27977, %2482, %int128_27978 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30512 = torch.aten.view %30510, %30511 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_27979 = torch.constant.int 4
    %30513 = torch.aten.mul.int %int4_27979, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_27980 = torch.constant.int 4096
    %30514 = torch.prim.ListConstruct %30513, %int4096_27980 : (!torch.int, !torch.int) -> !torch.list<int>
    %30515 = torch.aten.view %30390, %30514 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30515, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30516 = torch.aten.mm %30515, %30470 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30516, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_27981 = torch.constant.int 4
    %int128_27982 = torch.constant.int 128
    %30517 = torch.prim.ListConstruct %int4_27981, %2482, %int128_27982 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30518 = torch.aten.view %30516, %30517 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_27983 = torch.constant.int 1
    %int0_27984 = torch.constant.int 0
    %30519 = torch.prim.ListConstruct %int1_27983, %int0_27984 : (!torch.int, !torch.int) -> !torch.list<int>
    %30520 = torch.aten.permute %1112, %30519 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27985 = torch.constant.int 1
    %int0_27986 = torch.constant.int 0
    %30521 = torch.prim.ListConstruct %int1_27985, %int0_27986 : (!torch.int, !torch.int) -> !torch.list<int>
    %30522 = torch.aten.permute %1113, %30521 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27987 = torch.constant.int 1
    %int0_27988 = torch.constant.int 0
    %30523 = torch.prim.ListConstruct %int1_27987, %int0_27988 : (!torch.int, !torch.int) -> !torch.list<int>
    %30524 = torch.aten.permute %1114, %30523 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27989 = torch.constant.int 1
    %int0_27990 = torch.constant.int 0
    %30525 = torch.prim.ListConstruct %int1_27989, %int0_27990 : (!torch.int, !torch.int) -> !torch.list<int>
    %30526 = torch.aten.permute %1115, %30525 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27991 = torch.constant.int 1
    %int0_27992 = torch.constant.int 0
    %30527 = torch.prim.ListConstruct %int1_27991, %int0_27992 : (!torch.int, !torch.int) -> !torch.list<int>
    %30528 = torch.aten.permute %1116, %30527 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27993 = torch.constant.int 1
    %int0_27994 = torch.constant.int 0
    %30529 = torch.prim.ListConstruct %int1_27993, %int0_27994 : (!torch.int, !torch.int) -> !torch.list<int>
    %30530 = torch.aten.permute %1117, %30529 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27995 = torch.constant.int 1
    %int0_27996 = torch.constant.int 0
    %30531 = torch.prim.ListConstruct %int1_27995, %int0_27996 : (!torch.int, !torch.int) -> !torch.list<int>
    %30532 = torch.aten.permute %1118, %30531 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_27997 = torch.constant.int 1
    %int0_27998 = torch.constant.int 0
    %30533 = torch.prim.ListConstruct %int1_27997, %int0_27998 : (!torch.int, !torch.int) -> !torch.list<int>
    %30534 = torch.aten.permute %1119, %30533 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_27999 = torch.constant.int 4
    %30535 = torch.aten.mul.int %int4_27999, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28000 = torch.constant.int 4096
    %30536 = torch.prim.ListConstruct %30535, %int4096_28000 : (!torch.int, !torch.int) -> !torch.list<int>
    %30537 = torch.aten.view %30383, %30536 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30537, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30538 = torch.aten.mm %30537, %30520 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30538, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28001 = torch.constant.int 4
    %int128_28002 = torch.constant.int 128
    %30539 = torch.prim.ListConstruct %int4_28001, %2482, %int128_28002 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30540 = torch.aten.view %30538, %30539 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28003 = torch.constant.int 4
    %30541 = torch.aten.mul.int %int4_28003, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28004 = torch.constant.int 4096
    %30542 = torch.prim.ListConstruct %30541, %int4096_28004 : (!torch.int, !torch.int) -> !torch.list<int>
    %30543 = torch.aten.view %30384, %30542 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30543, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30544 = torch.aten.mm %30543, %30522 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30544, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28005 = torch.constant.int 4
    %int128_28006 = torch.constant.int 128
    %30545 = torch.prim.ListConstruct %int4_28005, %2482, %int128_28006 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30546 = torch.aten.view %30544, %30545 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28007 = torch.constant.int 4
    %30547 = torch.aten.mul.int %int4_28007, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28008 = torch.constant.int 4096
    %30548 = torch.prim.ListConstruct %30547, %int4096_28008 : (!torch.int, !torch.int) -> !torch.list<int>
    %30549 = torch.aten.view %30385, %30548 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30549, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30550 = torch.aten.mm %30549, %30524 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30550, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28009 = torch.constant.int 4
    %int128_28010 = torch.constant.int 128
    %30551 = torch.prim.ListConstruct %int4_28009, %2482, %int128_28010 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30552 = torch.aten.view %30550, %30551 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28011 = torch.constant.int 4
    %30553 = torch.aten.mul.int %int4_28011, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28012 = torch.constant.int 4096
    %30554 = torch.prim.ListConstruct %30553, %int4096_28012 : (!torch.int, !torch.int) -> !torch.list<int>
    %30555 = torch.aten.view %30386, %30554 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30555, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30556 = torch.aten.mm %30555, %30526 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30556, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28013 = torch.constant.int 4
    %int128_28014 = torch.constant.int 128
    %30557 = torch.prim.ListConstruct %int4_28013, %2482, %int128_28014 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30558 = torch.aten.view %30556, %30557 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28015 = torch.constant.int 4
    %30559 = torch.aten.mul.int %int4_28015, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28016 = torch.constant.int 4096
    %30560 = torch.prim.ListConstruct %30559, %int4096_28016 : (!torch.int, !torch.int) -> !torch.list<int>
    %30561 = torch.aten.view %30387, %30560 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30561, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30562 = torch.aten.mm %30561, %30528 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30562, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28017 = torch.constant.int 4
    %int128_28018 = torch.constant.int 128
    %30563 = torch.prim.ListConstruct %int4_28017, %2482, %int128_28018 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30564 = torch.aten.view %30562, %30563 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28019 = torch.constant.int 4
    %30565 = torch.aten.mul.int %int4_28019, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28020 = torch.constant.int 4096
    %30566 = torch.prim.ListConstruct %30565, %int4096_28020 : (!torch.int, !torch.int) -> !torch.list<int>
    %30567 = torch.aten.view %30388, %30566 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30567, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30568 = torch.aten.mm %30567, %30530 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30568, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28021 = torch.constant.int 4
    %int128_28022 = torch.constant.int 128
    %30569 = torch.prim.ListConstruct %int4_28021, %2482, %int128_28022 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30570 = torch.aten.view %30568, %30569 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28023 = torch.constant.int 4
    %30571 = torch.aten.mul.int %int4_28023, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28024 = torch.constant.int 4096
    %30572 = torch.prim.ListConstruct %30571, %int4096_28024 : (!torch.int, !torch.int) -> !torch.list<int>
    %30573 = torch.aten.view %30389, %30572 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30573, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30574 = torch.aten.mm %30573, %30532 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30574, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28025 = torch.constant.int 4
    %int128_28026 = torch.constant.int 128
    %30575 = torch.prim.ListConstruct %int4_28025, %2482, %int128_28026 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30576 = torch.aten.view %30574, %30575 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28027 = torch.constant.int 4
    %30577 = torch.aten.mul.int %int4_28027, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_28028 = torch.constant.int 4096
    %30578 = torch.prim.ListConstruct %30577, %int4096_28028 : (!torch.int, !torch.int) -> !torch.list<int>
    %30579 = torch.aten.view %30390, %30578 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %30579, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %30580 = torch.aten.mm %30579, %30534 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %30580, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_28029 = torch.constant.int 4
    %int128_28030 = torch.constant.int 128
    %30581 = torch.prim.ListConstruct %int4_28029, %2482, %int128_28030 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30582 = torch.aten.view %30580, %30581 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %30582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_28031 = torch.constant.int 4
    %int4_28032 = torch.constant.int 4
    %int128_28033 = torch.constant.int 128
    %30583 = torch.prim.ListConstruct %int4_28031, %2482, %int4_28032, %int128_28033 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30584 = torch.aten.view %30412, %30583 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28034 = torch.constant.int 4
    %int4_28035 = torch.constant.int 4
    %int128_28036 = torch.constant.int 128
    %30585 = torch.prim.ListConstruct %int4_28034, %2482, %int4_28035, %int128_28036 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30586 = torch.aten.view %30418, %30585 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28037 = torch.constant.int 4
    %int4_28038 = torch.constant.int 4
    %int128_28039 = torch.constant.int 128
    %30587 = torch.prim.ListConstruct %int4_28037, %2482, %int4_28038, %int128_28039 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30588 = torch.aten.view %30424, %30587 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28040 = torch.constant.int 4
    %int4_28041 = torch.constant.int 4
    %int128_28042 = torch.constant.int 128
    %30589 = torch.prim.ListConstruct %int4_28040, %2482, %int4_28041, %int128_28042 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30590 = torch.aten.view %30430, %30589 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28043 = torch.constant.int 4
    %int4_28044 = torch.constant.int 4
    %int128_28045 = torch.constant.int 128
    %30591 = torch.prim.ListConstruct %int4_28043, %2482, %int4_28044, %int128_28045 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30592 = torch.aten.view %30436, %30591 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28046 = torch.constant.int 4
    %int4_28047 = torch.constant.int 4
    %int128_28048 = torch.constant.int 128
    %30593 = torch.prim.ListConstruct %int4_28046, %2482, %int4_28047, %int128_28048 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30594 = torch.aten.view %30442, %30593 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28049 = torch.constant.int 4
    %int4_28050 = torch.constant.int 4
    %int128_28051 = torch.constant.int 128
    %30595 = torch.prim.ListConstruct %int4_28049, %2482, %int4_28050, %int128_28051 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30596 = torch.aten.view %30448, %30595 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28052 = torch.constant.int 4
    %int4_28053 = torch.constant.int 4
    %int128_28054 = torch.constant.int 128
    %30597 = torch.prim.ListConstruct %int4_28052, %2482, %int4_28053, %int128_28054 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30598 = torch.aten.view %30454, %30597 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28055 = torch.constant.int 4
    %int1_28056 = torch.constant.int 1
    %int128_28057 = torch.constant.int 128
    %30599 = torch.prim.ListConstruct %int4_28055, %2482, %int1_28056, %int128_28057 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30600 = torch.aten.view %30476, %30599 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28058 = torch.constant.int 4
    %int1_28059 = torch.constant.int 1
    %int128_28060 = torch.constant.int 128
    %30601 = torch.prim.ListConstruct %int4_28058, %2482, %int1_28059, %int128_28060 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30602 = torch.aten.view %30482, %30601 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28061 = torch.constant.int 4
    %int1_28062 = torch.constant.int 1
    %int128_28063 = torch.constant.int 128
    %30603 = torch.prim.ListConstruct %int4_28061, %2482, %int1_28062, %int128_28063 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30604 = torch.aten.view %30488, %30603 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28064 = torch.constant.int 4
    %int1_28065 = torch.constant.int 1
    %int128_28066 = torch.constant.int 128
    %30605 = torch.prim.ListConstruct %int4_28064, %2482, %int1_28065, %int128_28066 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30606 = torch.aten.view %30494, %30605 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28067 = torch.constant.int 4
    %int1_28068 = torch.constant.int 1
    %int128_28069 = torch.constant.int 128
    %30607 = torch.prim.ListConstruct %int4_28067, %2482, %int1_28068, %int128_28069 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30608 = torch.aten.view %30500, %30607 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28070 = torch.constant.int 4
    %int1_28071 = torch.constant.int 1
    %int128_28072 = torch.constant.int 128
    %30609 = torch.prim.ListConstruct %int4_28070, %2482, %int1_28071, %int128_28072 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30610 = torch.aten.view %30506, %30609 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28073 = torch.constant.int 4
    %int1_28074 = torch.constant.int 1
    %int128_28075 = torch.constant.int 128
    %30611 = torch.prim.ListConstruct %int4_28073, %2482, %int1_28074, %int128_28075 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30612 = torch.aten.view %30512, %30611 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28076 = torch.constant.int 4
    %int1_28077 = torch.constant.int 1
    %int128_28078 = torch.constant.int 128
    %30613 = torch.prim.ListConstruct %int4_28076, %2482, %int1_28077, %int128_28078 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30614 = torch.aten.view %30518, %30613 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28079 = torch.constant.int 4
    %int1_28080 = torch.constant.int 1
    %int128_28081 = torch.constant.int 128
    %30615 = torch.prim.ListConstruct %int4_28079, %2482, %int1_28080, %int128_28081 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30616 = torch.aten.view %30540, %30615 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28082 = torch.constant.int 4
    %int1_28083 = torch.constant.int 1
    %int128_28084 = torch.constant.int 128
    %30617 = torch.prim.ListConstruct %int4_28082, %2482, %int1_28083, %int128_28084 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30618 = torch.aten.view %30546, %30617 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28085 = torch.constant.int 4
    %int1_28086 = torch.constant.int 1
    %int128_28087 = torch.constant.int 128
    %30619 = torch.prim.ListConstruct %int4_28085, %2482, %int1_28086, %int128_28087 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30620 = torch.aten.view %30552, %30619 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28088 = torch.constant.int 4
    %int1_28089 = torch.constant.int 1
    %int128_28090 = torch.constant.int 128
    %30621 = torch.prim.ListConstruct %int4_28088, %2482, %int1_28089, %int128_28090 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30622 = torch.aten.view %30558, %30621 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28091 = torch.constant.int 4
    %int1_28092 = torch.constant.int 1
    %int128_28093 = torch.constant.int 128
    %30623 = torch.prim.ListConstruct %int4_28091, %2482, %int1_28092, %int128_28093 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30624 = torch.aten.view %30564, %30623 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28094 = torch.constant.int 4
    %int1_28095 = torch.constant.int 1
    %int128_28096 = torch.constant.int 128
    %30625 = torch.prim.ListConstruct %int4_28094, %2482, %int1_28095, %int128_28096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30626 = torch.aten.view %30570, %30625 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28097 = torch.constant.int 4
    %int1_28098 = torch.constant.int 1
    %int128_28099 = torch.constant.int 128
    %30627 = torch.prim.ListConstruct %int4_28097, %2482, %int1_28098, %int128_28099 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30628 = torch.aten.view %30576, %30627 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_28100 = torch.constant.int 4
    %int1_28101 = torch.constant.int 1
    %int128_28102 = torch.constant.int 128
    %30629 = torch.prim.ListConstruct %int4_28100, %2482, %int1_28101, %int128_28102 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30630 = torch.aten.view %30582, %30629 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_28103 = torch.constant.int 131072
    %none_28104 = torch.constant.none
    %none_28105 = torch.constant.none
    %cpu_28106 = torch.constant.device "cpu"
    %false_28107 = torch.constant.bool false
    %30631 = torch.aten.arange %int131072_28103, %none_28104, %none_28105, %cpu_28106, %false_28107 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_28108 = torch.constant.int 0
    %int128_28109 = torch.constant.int 128
    %int2_28110 = torch.constant.int 2
    %none_28111 = torch.constant.none
    %none_28112 = torch.constant.none
    %cpu_28113 = torch.constant.device "cpu"
    %false_28114 = torch.constant.bool false
    %30632 = torch.aten.arange.start_step %int0_28108, %int128_28109, %int2_28110, %none_28111, %none_28112, %cpu_28113, %false_28114 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_28115 = torch.constant.int 0
    %int0_28116 = torch.constant.int 0
    %int64_28117 = torch.constant.int 64
    %int1_28118 = torch.constant.int 1
    %30633 = torch.aten.slice.Tensor %30632, %int0_28115, %int0_28116, %int64_28117, %int1_28118 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_28119 = torch.constant.int 6
    %30634 = torch.prims.convert_element_type %30633, %int6_28119 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_28120 = torch.constant.int 128
    %30635 = torch.aten.div.Scalar %30634, %int128_28120 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_28121 = torch.constant.float 5.000000e+05
    %30636 = torch.aten.pow.Scalar %float5.000000e05_28121, %30635 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %30637 = torch.aten.reciprocal %30636 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_28122 = torch.constant.float 1.000000e+00
    %30638 = torch.aten.mul.Scalar %30637, %float1.000000e00_28122 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_28123 = torch.constant.int 131072
    %int1_28124 = torch.constant.int 1
    %30639 = torch.prim.ListConstruct %int131072_28123, %int1_28124 : (!torch.int, !torch.int) -> !torch.list<int>
    %30640 = torch.aten.view %30631, %30639 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %30641 = torch.aten.mul.Tensor %30640, %30638 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %30642 = torch.aten.cos %30641 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %30643 = torch.aten.sin %30641 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %30644 = torch.aten.complex %30642, %30643 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %30645 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30646 = flow.tensor.transfer %30645 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %30647 = torch_c.from_builtin_tensor %30646 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30648 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30649 = flow.tensor.transfer %30648 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %30650 = torch_c.from_builtin_tensor %30649 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30651 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30652 = flow.tensor.transfer %30651 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %30653 = torch_c.from_builtin_tensor %30652 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30654 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30655 = flow.tensor.transfer %30654 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %30656 = torch_c.from_builtin_tensor %30655 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30657 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30658 = flow.tensor.transfer %30657 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %30659 = torch_c.from_builtin_tensor %30658 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30660 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30661 = flow.tensor.transfer %30660 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %30662 = torch_c.from_builtin_tensor %30661 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30663 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30664 = flow.tensor.transfer %30663 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %30665 = torch_c.from_builtin_tensor %30664 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30666 = torch_c.to_builtin_tensor %30644 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30667 = flow.tensor.transfer %30666 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %30668 = torch_c.from_builtin_tensor %30667 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_28125 = torch.constant.int 1
    %30669 = torch.aten.size.int %30412, %int1_28125 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28126 = torch.constant.int 0
    %30670 = torch.aten.add.int %int0_28126, %30669 : !torch.int, !torch.int -> !torch.int
    %int0_28127 = torch.constant.int 0
    %int0_28128 = torch.constant.int 0
    %int1_28129 = torch.constant.int 1
    %30671 = torch.aten.slice.Tensor %30647, %int0_28127, %int0_28128, %30670, %int1_28129 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30671, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28130 = torch.constant.int 1
    %int0_28131 = torch.constant.int 0
    %int9223372036854775807_28132 = torch.constant.int 9223372036854775807
    %int1_28133 = torch.constant.int 1
    %30672 = torch.aten.slice.Tensor %30671, %int1_28130, %int0_28131, %int9223372036854775807_28132, %int1_28133 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30672, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28134 = torch.constant.int 0
    %30673 = torch.aten.unsqueeze %30672, %int0_28134 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30673, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28135 = torch.constant.int 2
    %30674 = torch.aten.unsqueeze %30673, %int2_28135 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30674, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28136 = torch.constant.int 3
    %int0_28137 = torch.constant.int 0
    %int9223372036854775807_28138 = torch.constant.int 9223372036854775807
    %int1_28139 = torch.constant.int 1
    %30675 = torch.aten.slice.Tensor %30674, %int3_28136, %int0_28137, %int9223372036854775807_28138, %int1_28139 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30675, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30676 = torch_c.to_builtin_tensor %30584 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28140 = arith.constant 1 : index
    %dim_28141 = tensor.dim %30676, %c1_28140 : tensor<4x?x4x128xf16>
    %30677 = flow.tensor.bitcast %30676 : tensor<4x?x4x128xf16>{%dim_28141} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28141}
    %30678 = torch_c.from_builtin_tensor %30677 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30679 = torch.aten.mul.Tensor %30678, %30675 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30680 = torch_c.to_builtin_tensor %30679 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28142 = arith.constant 1 : index
    %dim_28143 = tensor.dim %30680, %c1_28142 : tensor<4x?x4x64xcomplex<f32>>
    %30681 = flow.tensor.bitcast %30680 : tensor<4x?x4x64xcomplex<f32>>{%dim_28143} -> tensor<4x?x4x128xf32>{%dim_28143}
    %30682 = torch_c.from_builtin_tensor %30681 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28144 = torch.constant.int 5
    %30683 = torch.prims.convert_element_type %30682, %int5_28144 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28145 = torch.constant.int 1
    %30684 = torch.aten.size.int %30418, %int1_28145 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28146 = torch.constant.int 0
    %30685 = torch.aten.add.int %int0_28146, %30684 : !torch.int, !torch.int -> !torch.int
    %int0_28147 = torch.constant.int 0
    %int0_28148 = torch.constant.int 0
    %int1_28149 = torch.constant.int 1
    %30686 = torch.aten.slice.Tensor %30650, %int0_28147, %int0_28148, %30685, %int1_28149 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30686, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28150 = torch.constant.int 1
    %int0_28151 = torch.constant.int 0
    %int9223372036854775807_28152 = torch.constant.int 9223372036854775807
    %int1_28153 = torch.constant.int 1
    %30687 = torch.aten.slice.Tensor %30686, %int1_28150, %int0_28151, %int9223372036854775807_28152, %int1_28153 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30687, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28154 = torch.constant.int 0
    %30688 = torch.aten.unsqueeze %30687, %int0_28154 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30688, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28155 = torch.constant.int 2
    %30689 = torch.aten.unsqueeze %30688, %int2_28155 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30689, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28156 = torch.constant.int 3
    %int0_28157 = torch.constant.int 0
    %int9223372036854775807_28158 = torch.constant.int 9223372036854775807
    %int1_28159 = torch.constant.int 1
    %30690 = torch.aten.slice.Tensor %30689, %int3_28156, %int0_28157, %int9223372036854775807_28158, %int1_28159 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30690, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30691 = torch_c.to_builtin_tensor %30586 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28160 = arith.constant 1 : index
    %dim_28161 = tensor.dim %30691, %c1_28160 : tensor<4x?x4x128xf16>
    %30692 = flow.tensor.bitcast %30691 : tensor<4x?x4x128xf16>{%dim_28161} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28161}
    %30693 = torch_c.from_builtin_tensor %30692 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30694 = torch.aten.mul.Tensor %30693, %30690 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30695 = torch_c.to_builtin_tensor %30694 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28162 = arith.constant 1 : index
    %dim_28163 = tensor.dim %30695, %c1_28162 : tensor<4x?x4x64xcomplex<f32>>
    %30696 = flow.tensor.bitcast %30695 : tensor<4x?x4x64xcomplex<f32>>{%dim_28163} -> tensor<4x?x4x128xf32>{%dim_28163}
    %30697 = torch_c.from_builtin_tensor %30696 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28164 = torch.constant.int 5
    %30698 = torch.prims.convert_element_type %30697, %int5_28164 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28165 = torch.constant.int 1
    %30699 = torch.aten.size.int %30424, %int1_28165 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28166 = torch.constant.int 0
    %30700 = torch.aten.add.int %int0_28166, %30699 : !torch.int, !torch.int -> !torch.int
    %int0_28167 = torch.constant.int 0
    %int0_28168 = torch.constant.int 0
    %int1_28169 = torch.constant.int 1
    %30701 = torch.aten.slice.Tensor %30653, %int0_28167, %int0_28168, %30700, %int1_28169 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30701, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28170 = torch.constant.int 1
    %int0_28171 = torch.constant.int 0
    %int9223372036854775807_28172 = torch.constant.int 9223372036854775807
    %int1_28173 = torch.constant.int 1
    %30702 = torch.aten.slice.Tensor %30701, %int1_28170, %int0_28171, %int9223372036854775807_28172, %int1_28173 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30702, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28174 = torch.constant.int 0
    %30703 = torch.aten.unsqueeze %30702, %int0_28174 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30703, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28175 = torch.constant.int 2
    %30704 = torch.aten.unsqueeze %30703, %int2_28175 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30704, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28176 = torch.constant.int 3
    %int0_28177 = torch.constant.int 0
    %int9223372036854775807_28178 = torch.constant.int 9223372036854775807
    %int1_28179 = torch.constant.int 1
    %30705 = torch.aten.slice.Tensor %30704, %int3_28176, %int0_28177, %int9223372036854775807_28178, %int1_28179 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30705, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30706 = torch_c.to_builtin_tensor %30588 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28180 = arith.constant 1 : index
    %dim_28181 = tensor.dim %30706, %c1_28180 : tensor<4x?x4x128xf16>
    %30707 = flow.tensor.bitcast %30706 : tensor<4x?x4x128xf16>{%dim_28181} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28181}
    %30708 = torch_c.from_builtin_tensor %30707 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30709 = torch.aten.mul.Tensor %30708, %30705 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30710 = torch_c.to_builtin_tensor %30709 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28182 = arith.constant 1 : index
    %dim_28183 = tensor.dim %30710, %c1_28182 : tensor<4x?x4x64xcomplex<f32>>
    %30711 = flow.tensor.bitcast %30710 : tensor<4x?x4x64xcomplex<f32>>{%dim_28183} -> tensor<4x?x4x128xf32>{%dim_28183}
    %30712 = torch_c.from_builtin_tensor %30711 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28184 = torch.constant.int 5
    %30713 = torch.prims.convert_element_type %30712, %int5_28184 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28185 = torch.constant.int 1
    %30714 = torch.aten.size.int %30430, %int1_28185 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28186 = torch.constant.int 0
    %30715 = torch.aten.add.int %int0_28186, %30714 : !torch.int, !torch.int -> !torch.int
    %int0_28187 = torch.constant.int 0
    %int0_28188 = torch.constant.int 0
    %int1_28189 = torch.constant.int 1
    %30716 = torch.aten.slice.Tensor %30656, %int0_28187, %int0_28188, %30715, %int1_28189 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30716, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28190 = torch.constant.int 1
    %int0_28191 = torch.constant.int 0
    %int9223372036854775807_28192 = torch.constant.int 9223372036854775807
    %int1_28193 = torch.constant.int 1
    %30717 = torch.aten.slice.Tensor %30716, %int1_28190, %int0_28191, %int9223372036854775807_28192, %int1_28193 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30717, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28194 = torch.constant.int 0
    %30718 = torch.aten.unsqueeze %30717, %int0_28194 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30718, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28195 = torch.constant.int 2
    %30719 = torch.aten.unsqueeze %30718, %int2_28195 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30719, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28196 = torch.constant.int 3
    %int0_28197 = torch.constant.int 0
    %int9223372036854775807_28198 = torch.constant.int 9223372036854775807
    %int1_28199 = torch.constant.int 1
    %30720 = torch.aten.slice.Tensor %30719, %int3_28196, %int0_28197, %int9223372036854775807_28198, %int1_28199 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30720, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30721 = torch_c.to_builtin_tensor %30590 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28200 = arith.constant 1 : index
    %dim_28201 = tensor.dim %30721, %c1_28200 : tensor<4x?x4x128xf16>
    %30722 = flow.tensor.bitcast %30721 : tensor<4x?x4x128xf16>{%dim_28201} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28201}
    %30723 = torch_c.from_builtin_tensor %30722 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30724 = torch.aten.mul.Tensor %30723, %30720 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30725 = torch_c.to_builtin_tensor %30724 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28202 = arith.constant 1 : index
    %dim_28203 = tensor.dim %30725, %c1_28202 : tensor<4x?x4x64xcomplex<f32>>
    %30726 = flow.tensor.bitcast %30725 : tensor<4x?x4x64xcomplex<f32>>{%dim_28203} -> tensor<4x?x4x128xf32>{%dim_28203}
    %30727 = torch_c.from_builtin_tensor %30726 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28204 = torch.constant.int 5
    %30728 = torch.prims.convert_element_type %30727, %int5_28204 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28205 = torch.constant.int 1
    %30729 = torch.aten.size.int %30436, %int1_28205 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28206 = torch.constant.int 0
    %30730 = torch.aten.add.int %int0_28206, %30729 : !torch.int, !torch.int -> !torch.int
    %int0_28207 = torch.constant.int 0
    %int0_28208 = torch.constant.int 0
    %int1_28209 = torch.constant.int 1
    %30731 = torch.aten.slice.Tensor %30659, %int0_28207, %int0_28208, %30730, %int1_28209 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30731, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28210 = torch.constant.int 1
    %int0_28211 = torch.constant.int 0
    %int9223372036854775807_28212 = torch.constant.int 9223372036854775807
    %int1_28213 = torch.constant.int 1
    %30732 = torch.aten.slice.Tensor %30731, %int1_28210, %int0_28211, %int9223372036854775807_28212, %int1_28213 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30732, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28214 = torch.constant.int 0
    %30733 = torch.aten.unsqueeze %30732, %int0_28214 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30733, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28215 = torch.constant.int 2
    %30734 = torch.aten.unsqueeze %30733, %int2_28215 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30734, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28216 = torch.constant.int 3
    %int0_28217 = torch.constant.int 0
    %int9223372036854775807_28218 = torch.constant.int 9223372036854775807
    %int1_28219 = torch.constant.int 1
    %30735 = torch.aten.slice.Tensor %30734, %int3_28216, %int0_28217, %int9223372036854775807_28218, %int1_28219 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30735, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30736 = torch_c.to_builtin_tensor %30592 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28220 = arith.constant 1 : index
    %dim_28221 = tensor.dim %30736, %c1_28220 : tensor<4x?x4x128xf16>
    %30737 = flow.tensor.bitcast %30736 : tensor<4x?x4x128xf16>{%dim_28221} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28221}
    %30738 = torch_c.from_builtin_tensor %30737 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30739 = torch.aten.mul.Tensor %30738, %30735 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30740 = torch_c.to_builtin_tensor %30739 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28222 = arith.constant 1 : index
    %dim_28223 = tensor.dim %30740, %c1_28222 : tensor<4x?x4x64xcomplex<f32>>
    %30741 = flow.tensor.bitcast %30740 : tensor<4x?x4x64xcomplex<f32>>{%dim_28223} -> tensor<4x?x4x128xf32>{%dim_28223}
    %30742 = torch_c.from_builtin_tensor %30741 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28224 = torch.constant.int 5
    %30743 = torch.prims.convert_element_type %30742, %int5_28224 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28225 = torch.constant.int 1
    %30744 = torch.aten.size.int %30442, %int1_28225 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28226 = torch.constant.int 0
    %30745 = torch.aten.add.int %int0_28226, %30744 : !torch.int, !torch.int -> !torch.int
    %int0_28227 = torch.constant.int 0
    %int0_28228 = torch.constant.int 0
    %int1_28229 = torch.constant.int 1
    %30746 = torch.aten.slice.Tensor %30662, %int0_28227, %int0_28228, %30745, %int1_28229 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30746, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28230 = torch.constant.int 1
    %int0_28231 = torch.constant.int 0
    %int9223372036854775807_28232 = torch.constant.int 9223372036854775807
    %int1_28233 = torch.constant.int 1
    %30747 = torch.aten.slice.Tensor %30746, %int1_28230, %int0_28231, %int9223372036854775807_28232, %int1_28233 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30747, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28234 = torch.constant.int 0
    %30748 = torch.aten.unsqueeze %30747, %int0_28234 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30748, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28235 = torch.constant.int 2
    %30749 = torch.aten.unsqueeze %30748, %int2_28235 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30749, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28236 = torch.constant.int 3
    %int0_28237 = torch.constant.int 0
    %int9223372036854775807_28238 = torch.constant.int 9223372036854775807
    %int1_28239 = torch.constant.int 1
    %30750 = torch.aten.slice.Tensor %30749, %int3_28236, %int0_28237, %int9223372036854775807_28238, %int1_28239 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30750, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30751 = torch_c.to_builtin_tensor %30594 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28240 = arith.constant 1 : index
    %dim_28241 = tensor.dim %30751, %c1_28240 : tensor<4x?x4x128xf16>
    %30752 = flow.tensor.bitcast %30751 : tensor<4x?x4x128xf16>{%dim_28241} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28241}
    %30753 = torch_c.from_builtin_tensor %30752 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30754 = torch.aten.mul.Tensor %30753, %30750 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30755 = torch_c.to_builtin_tensor %30754 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28242 = arith.constant 1 : index
    %dim_28243 = tensor.dim %30755, %c1_28242 : tensor<4x?x4x64xcomplex<f32>>
    %30756 = flow.tensor.bitcast %30755 : tensor<4x?x4x64xcomplex<f32>>{%dim_28243} -> tensor<4x?x4x128xf32>{%dim_28243}
    %30757 = torch_c.from_builtin_tensor %30756 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28244 = torch.constant.int 5
    %30758 = torch.prims.convert_element_type %30757, %int5_28244 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28245 = torch.constant.int 1
    %30759 = torch.aten.size.int %30448, %int1_28245 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28246 = torch.constant.int 0
    %30760 = torch.aten.add.int %int0_28246, %30759 : !torch.int, !torch.int -> !torch.int
    %int0_28247 = torch.constant.int 0
    %int0_28248 = torch.constant.int 0
    %int1_28249 = torch.constant.int 1
    %30761 = torch.aten.slice.Tensor %30665, %int0_28247, %int0_28248, %30760, %int1_28249 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30761, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28250 = torch.constant.int 1
    %int0_28251 = torch.constant.int 0
    %int9223372036854775807_28252 = torch.constant.int 9223372036854775807
    %int1_28253 = torch.constant.int 1
    %30762 = torch.aten.slice.Tensor %30761, %int1_28250, %int0_28251, %int9223372036854775807_28252, %int1_28253 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30762, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28254 = torch.constant.int 0
    %30763 = torch.aten.unsqueeze %30762, %int0_28254 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30763, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28255 = torch.constant.int 2
    %30764 = torch.aten.unsqueeze %30763, %int2_28255 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30764, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28256 = torch.constant.int 3
    %int0_28257 = torch.constant.int 0
    %int9223372036854775807_28258 = torch.constant.int 9223372036854775807
    %int1_28259 = torch.constant.int 1
    %30765 = torch.aten.slice.Tensor %30764, %int3_28256, %int0_28257, %int9223372036854775807_28258, %int1_28259 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30765, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30766 = torch_c.to_builtin_tensor %30596 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28260 = arith.constant 1 : index
    %dim_28261 = tensor.dim %30766, %c1_28260 : tensor<4x?x4x128xf16>
    %30767 = flow.tensor.bitcast %30766 : tensor<4x?x4x128xf16>{%dim_28261} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28261}
    %30768 = torch_c.from_builtin_tensor %30767 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30769 = torch.aten.mul.Tensor %30768, %30765 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30770 = torch_c.to_builtin_tensor %30769 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28262 = arith.constant 1 : index
    %dim_28263 = tensor.dim %30770, %c1_28262 : tensor<4x?x4x64xcomplex<f32>>
    %30771 = flow.tensor.bitcast %30770 : tensor<4x?x4x64xcomplex<f32>>{%dim_28263} -> tensor<4x?x4x128xf32>{%dim_28263}
    %30772 = torch_c.from_builtin_tensor %30771 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28264 = torch.constant.int 5
    %30773 = torch.prims.convert_element_type %30772, %int5_28264 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28265 = torch.constant.int 1
    %30774 = torch.aten.size.int %30454, %int1_28265 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_28266 = torch.constant.int 0
    %30775 = torch.aten.add.int %int0_28266, %30774 : !torch.int, !torch.int -> !torch.int
    %int0_28267 = torch.constant.int 0
    %int0_28268 = torch.constant.int 0
    %int1_28269 = torch.constant.int 1
    %30776 = torch.aten.slice.Tensor %30668, %int0_28267, %int0_28268, %30775, %int1_28269 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30776, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28270 = torch.constant.int 1
    %int0_28271 = torch.constant.int 0
    %int9223372036854775807_28272 = torch.constant.int 9223372036854775807
    %int1_28273 = torch.constant.int 1
    %30777 = torch.aten.slice.Tensor %30776, %int1_28270, %int0_28271, %int9223372036854775807_28272, %int1_28273 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30777, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28274 = torch.constant.int 0
    %30778 = torch.aten.unsqueeze %30777, %int0_28274 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30778, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28275 = torch.constant.int 2
    %30779 = torch.aten.unsqueeze %30778, %int2_28275 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30779, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28276 = torch.constant.int 3
    %int0_28277 = torch.constant.int 0
    %int9223372036854775807_28278 = torch.constant.int 9223372036854775807
    %int1_28279 = torch.constant.int 1
    %30780 = torch.aten.slice.Tensor %30779, %int3_28276, %int0_28277, %int9223372036854775807_28278, %int1_28279 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30780, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30781 = torch_c.to_builtin_tensor %30598 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_28280 = arith.constant 1 : index
    %dim_28281 = tensor.dim %30781, %c1_28280 : tensor<4x?x4x128xf16>
    %30782 = flow.tensor.bitcast %30781 : tensor<4x?x4x128xf16>{%dim_28281} -> tensor<4x?x4x64xcomplex<f16>>{%dim_28281}
    %30783 = torch_c.from_builtin_tensor %30782 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %30783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %30784 = torch.aten.mul.Tensor %30783, %30780 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %30784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %30785 = torch_c.to_builtin_tensor %30784 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_28282 = arith.constant 1 : index
    %dim_28283 = tensor.dim %30785, %c1_28282 : tensor<4x?x4x64xcomplex<f32>>
    %30786 = flow.tensor.bitcast %30785 : tensor<4x?x4x64xcomplex<f32>>{%dim_28283} -> tensor<4x?x4x128xf32>{%dim_28283}
    %30787 = torch_c.from_builtin_tensor %30786 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %30787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_28284 = torch.constant.int 5
    %30788 = torch.prims.convert_element_type %30787, %int5_28284 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %30788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_28285 = torch.constant.int 131072
    %none_28286 = torch.constant.none
    %none_28287 = torch.constant.none
    %cpu_28288 = torch.constant.device "cpu"
    %false_28289 = torch.constant.bool false
    %30789 = torch.aten.arange %int131072_28285, %none_28286, %none_28287, %cpu_28288, %false_28289 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_28290 = torch.constant.int 0
    %int128_28291 = torch.constant.int 128
    %int2_28292 = torch.constant.int 2
    %none_28293 = torch.constant.none
    %none_28294 = torch.constant.none
    %cpu_28295 = torch.constant.device "cpu"
    %false_28296 = torch.constant.bool false
    %30790 = torch.aten.arange.start_step %int0_28290, %int128_28291, %int2_28292, %none_28293, %none_28294, %cpu_28295, %false_28296 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_28297 = torch.constant.int 0
    %int0_28298 = torch.constant.int 0
    %int64_28299 = torch.constant.int 64
    %int1_28300 = torch.constant.int 1
    %30791 = torch.aten.slice.Tensor %30790, %int0_28297, %int0_28298, %int64_28299, %int1_28300 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_28301 = torch.constant.int 6
    %30792 = torch.prims.convert_element_type %30791, %int6_28301 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_28302 = torch.constant.int 128
    %30793 = torch.aten.div.Scalar %30792, %int128_28302 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_28303 = torch.constant.float 5.000000e+05
    %30794 = torch.aten.pow.Scalar %float5.000000e05_28303, %30793 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %30795 = torch.aten.reciprocal %30794 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_28304 = torch.constant.float 1.000000e+00
    %30796 = torch.aten.mul.Scalar %30795, %float1.000000e00_28304 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_28305 = torch.constant.int 131072
    %int1_28306 = torch.constant.int 1
    %30797 = torch.prim.ListConstruct %int131072_28305, %int1_28306 : (!torch.int, !torch.int) -> !torch.list<int>
    %30798 = torch.aten.view %30789, %30797 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %30799 = torch.aten.mul.Tensor %30798, %30796 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %30800 = torch.aten.cos %30799 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %30801 = torch.aten.sin %30799 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %30802 = torch.aten.complex %30800, %30801 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %30803 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30804 = flow.tensor.transfer %30803 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %30805 = torch_c.from_builtin_tensor %30804 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30806 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30807 = flow.tensor.transfer %30806 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %30808 = torch_c.from_builtin_tensor %30807 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30809 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30810 = flow.tensor.transfer %30809 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %30811 = torch_c.from_builtin_tensor %30810 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30812 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30813 = flow.tensor.transfer %30812 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %30814 = torch_c.from_builtin_tensor %30813 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30815 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30816 = flow.tensor.transfer %30815 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %30817 = torch_c.from_builtin_tensor %30816 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30818 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30819 = flow.tensor.transfer %30818 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %30820 = torch_c.from_builtin_tensor %30819 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30821 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30822 = flow.tensor.transfer %30821 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %30823 = torch_c.from_builtin_tensor %30822 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %30824 = torch_c.to_builtin_tensor %30802 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %30825 = flow.tensor.transfer %30824 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %30826 = torch_c.from_builtin_tensor %30825 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_28307 = torch.constant.int 1
    %30827 = torch.aten.size.int %30476, %int1_28307 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28308 = torch.constant.int 0
    %30828 = torch.aten.add.int %int0_28308, %30827 : !torch.int, !torch.int -> !torch.int
    %int0_28309 = torch.constant.int 0
    %int0_28310 = torch.constant.int 0
    %int1_28311 = torch.constant.int 1
    %30829 = torch.aten.slice.Tensor %30805, %int0_28309, %int0_28310, %30828, %int1_28311 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30829, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28312 = torch.constant.int 1
    %int0_28313 = torch.constant.int 0
    %int9223372036854775807_28314 = torch.constant.int 9223372036854775807
    %int1_28315 = torch.constant.int 1
    %30830 = torch.aten.slice.Tensor %30829, %int1_28312, %int0_28313, %int9223372036854775807_28314, %int1_28315 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30830, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28316 = torch.constant.int 0
    %30831 = torch.aten.unsqueeze %30830, %int0_28316 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30831, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28317 = torch.constant.int 2
    %30832 = torch.aten.unsqueeze %30831, %int2_28317 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30832, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28318 = torch.constant.int 3
    %int0_28319 = torch.constant.int 0
    %int9223372036854775807_28320 = torch.constant.int 9223372036854775807
    %int1_28321 = torch.constant.int 1
    %30833 = torch.aten.slice.Tensor %30832, %int3_28318, %int0_28319, %int9223372036854775807_28320, %int1_28321 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30833, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30834 = torch_c.to_builtin_tensor %30600 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28322 = arith.constant 1 : index
    %dim_28323 = tensor.dim %30834, %c1_28322 : tensor<4x?x1x128xf16>
    %30835 = flow.tensor.bitcast %30834 : tensor<4x?x1x128xf16>{%dim_28323} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28323}
    %30836 = torch_c.from_builtin_tensor %30835 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30837 = torch.aten.mul.Tensor %30836, %30833 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30838 = torch_c.to_builtin_tensor %30837 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28324 = arith.constant 1 : index
    %dim_28325 = tensor.dim %30838, %c1_28324 : tensor<4x?x1x64xcomplex<f32>>
    %30839 = flow.tensor.bitcast %30838 : tensor<4x?x1x64xcomplex<f32>>{%dim_28325} -> tensor<4x?x1x128xf32>{%dim_28325}
    %30840 = torch_c.from_builtin_tensor %30839 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28326 = torch.constant.int 5
    %30841 = torch.prims.convert_element_type %30840, %int5_28326 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28327 = torch.constant.int 1
    %30842 = torch.aten.size.int %30482, %int1_28327 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28328 = torch.constant.int 0
    %30843 = torch.aten.add.int %int0_28328, %30842 : !torch.int, !torch.int -> !torch.int
    %int0_28329 = torch.constant.int 0
    %int0_28330 = torch.constant.int 0
    %int1_28331 = torch.constant.int 1
    %30844 = torch.aten.slice.Tensor %30808, %int0_28329, %int0_28330, %30843, %int1_28331 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30844, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28332 = torch.constant.int 1
    %int0_28333 = torch.constant.int 0
    %int9223372036854775807_28334 = torch.constant.int 9223372036854775807
    %int1_28335 = torch.constant.int 1
    %30845 = torch.aten.slice.Tensor %30844, %int1_28332, %int0_28333, %int9223372036854775807_28334, %int1_28335 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30845, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28336 = torch.constant.int 0
    %30846 = torch.aten.unsqueeze %30845, %int0_28336 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30846, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28337 = torch.constant.int 2
    %30847 = torch.aten.unsqueeze %30846, %int2_28337 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30847, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28338 = torch.constant.int 3
    %int0_28339 = torch.constant.int 0
    %int9223372036854775807_28340 = torch.constant.int 9223372036854775807
    %int1_28341 = torch.constant.int 1
    %30848 = torch.aten.slice.Tensor %30847, %int3_28338, %int0_28339, %int9223372036854775807_28340, %int1_28341 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30848, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30849 = torch_c.to_builtin_tensor %30602 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28342 = arith.constant 1 : index
    %dim_28343 = tensor.dim %30849, %c1_28342 : tensor<4x?x1x128xf16>
    %30850 = flow.tensor.bitcast %30849 : tensor<4x?x1x128xf16>{%dim_28343} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28343}
    %30851 = torch_c.from_builtin_tensor %30850 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30852 = torch.aten.mul.Tensor %30851, %30848 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30853 = torch_c.to_builtin_tensor %30852 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28344 = arith.constant 1 : index
    %dim_28345 = tensor.dim %30853, %c1_28344 : tensor<4x?x1x64xcomplex<f32>>
    %30854 = flow.tensor.bitcast %30853 : tensor<4x?x1x64xcomplex<f32>>{%dim_28345} -> tensor<4x?x1x128xf32>{%dim_28345}
    %30855 = torch_c.from_builtin_tensor %30854 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28346 = torch.constant.int 5
    %30856 = torch.prims.convert_element_type %30855, %int5_28346 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28347 = torch.constant.int 1
    %30857 = torch.aten.size.int %30488, %int1_28347 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28348 = torch.constant.int 0
    %30858 = torch.aten.add.int %int0_28348, %30857 : !torch.int, !torch.int -> !torch.int
    %int0_28349 = torch.constant.int 0
    %int0_28350 = torch.constant.int 0
    %int1_28351 = torch.constant.int 1
    %30859 = torch.aten.slice.Tensor %30811, %int0_28349, %int0_28350, %30858, %int1_28351 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30859, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28352 = torch.constant.int 1
    %int0_28353 = torch.constant.int 0
    %int9223372036854775807_28354 = torch.constant.int 9223372036854775807
    %int1_28355 = torch.constant.int 1
    %30860 = torch.aten.slice.Tensor %30859, %int1_28352, %int0_28353, %int9223372036854775807_28354, %int1_28355 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30860, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28356 = torch.constant.int 0
    %30861 = torch.aten.unsqueeze %30860, %int0_28356 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30861, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28357 = torch.constant.int 2
    %30862 = torch.aten.unsqueeze %30861, %int2_28357 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30862, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28358 = torch.constant.int 3
    %int0_28359 = torch.constant.int 0
    %int9223372036854775807_28360 = torch.constant.int 9223372036854775807
    %int1_28361 = torch.constant.int 1
    %30863 = torch.aten.slice.Tensor %30862, %int3_28358, %int0_28359, %int9223372036854775807_28360, %int1_28361 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30863, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30864 = torch_c.to_builtin_tensor %30604 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28362 = arith.constant 1 : index
    %dim_28363 = tensor.dim %30864, %c1_28362 : tensor<4x?x1x128xf16>
    %30865 = flow.tensor.bitcast %30864 : tensor<4x?x1x128xf16>{%dim_28363} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28363}
    %30866 = torch_c.from_builtin_tensor %30865 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30867 = torch.aten.mul.Tensor %30866, %30863 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30868 = torch_c.to_builtin_tensor %30867 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28364 = arith.constant 1 : index
    %dim_28365 = tensor.dim %30868, %c1_28364 : tensor<4x?x1x64xcomplex<f32>>
    %30869 = flow.tensor.bitcast %30868 : tensor<4x?x1x64xcomplex<f32>>{%dim_28365} -> tensor<4x?x1x128xf32>{%dim_28365}
    %30870 = torch_c.from_builtin_tensor %30869 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28366 = torch.constant.int 5
    %30871 = torch.prims.convert_element_type %30870, %int5_28366 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28367 = torch.constant.int 1
    %30872 = torch.aten.size.int %30494, %int1_28367 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28368 = torch.constant.int 0
    %30873 = torch.aten.add.int %int0_28368, %30872 : !torch.int, !torch.int -> !torch.int
    %int0_28369 = torch.constant.int 0
    %int0_28370 = torch.constant.int 0
    %int1_28371 = torch.constant.int 1
    %30874 = torch.aten.slice.Tensor %30814, %int0_28369, %int0_28370, %30873, %int1_28371 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30874, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28372 = torch.constant.int 1
    %int0_28373 = torch.constant.int 0
    %int9223372036854775807_28374 = torch.constant.int 9223372036854775807
    %int1_28375 = torch.constant.int 1
    %30875 = torch.aten.slice.Tensor %30874, %int1_28372, %int0_28373, %int9223372036854775807_28374, %int1_28375 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30875, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28376 = torch.constant.int 0
    %30876 = torch.aten.unsqueeze %30875, %int0_28376 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30876, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28377 = torch.constant.int 2
    %30877 = torch.aten.unsqueeze %30876, %int2_28377 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30877, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28378 = torch.constant.int 3
    %int0_28379 = torch.constant.int 0
    %int9223372036854775807_28380 = torch.constant.int 9223372036854775807
    %int1_28381 = torch.constant.int 1
    %30878 = torch.aten.slice.Tensor %30877, %int3_28378, %int0_28379, %int9223372036854775807_28380, %int1_28381 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30878, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30879 = torch_c.to_builtin_tensor %30606 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28382 = arith.constant 1 : index
    %dim_28383 = tensor.dim %30879, %c1_28382 : tensor<4x?x1x128xf16>
    %30880 = flow.tensor.bitcast %30879 : tensor<4x?x1x128xf16>{%dim_28383} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28383}
    %30881 = torch_c.from_builtin_tensor %30880 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30882 = torch.aten.mul.Tensor %30881, %30878 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30883 = torch_c.to_builtin_tensor %30882 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28384 = arith.constant 1 : index
    %dim_28385 = tensor.dim %30883, %c1_28384 : tensor<4x?x1x64xcomplex<f32>>
    %30884 = flow.tensor.bitcast %30883 : tensor<4x?x1x64xcomplex<f32>>{%dim_28385} -> tensor<4x?x1x128xf32>{%dim_28385}
    %30885 = torch_c.from_builtin_tensor %30884 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28386 = torch.constant.int 5
    %30886 = torch.prims.convert_element_type %30885, %int5_28386 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28387 = torch.constant.int 1
    %30887 = torch.aten.size.int %30500, %int1_28387 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28388 = torch.constant.int 0
    %30888 = torch.aten.add.int %int0_28388, %30887 : !torch.int, !torch.int -> !torch.int
    %int0_28389 = torch.constant.int 0
    %int0_28390 = torch.constant.int 0
    %int1_28391 = torch.constant.int 1
    %30889 = torch.aten.slice.Tensor %30817, %int0_28389, %int0_28390, %30888, %int1_28391 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30889, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28392 = torch.constant.int 1
    %int0_28393 = torch.constant.int 0
    %int9223372036854775807_28394 = torch.constant.int 9223372036854775807
    %int1_28395 = torch.constant.int 1
    %30890 = torch.aten.slice.Tensor %30889, %int1_28392, %int0_28393, %int9223372036854775807_28394, %int1_28395 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30890, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28396 = torch.constant.int 0
    %30891 = torch.aten.unsqueeze %30890, %int0_28396 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30891, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28397 = torch.constant.int 2
    %30892 = torch.aten.unsqueeze %30891, %int2_28397 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30892, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28398 = torch.constant.int 3
    %int0_28399 = torch.constant.int 0
    %int9223372036854775807_28400 = torch.constant.int 9223372036854775807
    %int1_28401 = torch.constant.int 1
    %30893 = torch.aten.slice.Tensor %30892, %int3_28398, %int0_28399, %int9223372036854775807_28400, %int1_28401 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30893, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30894 = torch_c.to_builtin_tensor %30608 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28402 = arith.constant 1 : index
    %dim_28403 = tensor.dim %30894, %c1_28402 : tensor<4x?x1x128xf16>
    %30895 = flow.tensor.bitcast %30894 : tensor<4x?x1x128xf16>{%dim_28403} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28403}
    %30896 = torch_c.from_builtin_tensor %30895 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30897 = torch.aten.mul.Tensor %30896, %30893 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30898 = torch_c.to_builtin_tensor %30897 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28404 = arith.constant 1 : index
    %dim_28405 = tensor.dim %30898, %c1_28404 : tensor<4x?x1x64xcomplex<f32>>
    %30899 = flow.tensor.bitcast %30898 : tensor<4x?x1x64xcomplex<f32>>{%dim_28405} -> tensor<4x?x1x128xf32>{%dim_28405}
    %30900 = torch_c.from_builtin_tensor %30899 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28406 = torch.constant.int 5
    %30901 = torch.prims.convert_element_type %30900, %int5_28406 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28407 = torch.constant.int 1
    %30902 = torch.aten.size.int %30506, %int1_28407 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28408 = torch.constant.int 0
    %30903 = torch.aten.add.int %int0_28408, %30902 : !torch.int, !torch.int -> !torch.int
    %int0_28409 = torch.constant.int 0
    %int0_28410 = torch.constant.int 0
    %int1_28411 = torch.constant.int 1
    %30904 = torch.aten.slice.Tensor %30820, %int0_28409, %int0_28410, %30903, %int1_28411 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30904, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28412 = torch.constant.int 1
    %int0_28413 = torch.constant.int 0
    %int9223372036854775807_28414 = torch.constant.int 9223372036854775807
    %int1_28415 = torch.constant.int 1
    %30905 = torch.aten.slice.Tensor %30904, %int1_28412, %int0_28413, %int9223372036854775807_28414, %int1_28415 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30905, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28416 = torch.constant.int 0
    %30906 = torch.aten.unsqueeze %30905, %int0_28416 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30906, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28417 = torch.constant.int 2
    %30907 = torch.aten.unsqueeze %30906, %int2_28417 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30907, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28418 = torch.constant.int 3
    %int0_28419 = torch.constant.int 0
    %int9223372036854775807_28420 = torch.constant.int 9223372036854775807
    %int1_28421 = torch.constant.int 1
    %30908 = torch.aten.slice.Tensor %30907, %int3_28418, %int0_28419, %int9223372036854775807_28420, %int1_28421 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30908, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30909 = torch_c.to_builtin_tensor %30610 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28422 = arith.constant 1 : index
    %dim_28423 = tensor.dim %30909, %c1_28422 : tensor<4x?x1x128xf16>
    %30910 = flow.tensor.bitcast %30909 : tensor<4x?x1x128xf16>{%dim_28423} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28423}
    %30911 = torch_c.from_builtin_tensor %30910 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30912 = torch.aten.mul.Tensor %30911, %30908 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30913 = torch_c.to_builtin_tensor %30912 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28424 = arith.constant 1 : index
    %dim_28425 = tensor.dim %30913, %c1_28424 : tensor<4x?x1x64xcomplex<f32>>
    %30914 = flow.tensor.bitcast %30913 : tensor<4x?x1x64xcomplex<f32>>{%dim_28425} -> tensor<4x?x1x128xf32>{%dim_28425}
    %30915 = torch_c.from_builtin_tensor %30914 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28426 = torch.constant.int 5
    %30916 = torch.prims.convert_element_type %30915, %int5_28426 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28427 = torch.constant.int 1
    %30917 = torch.aten.size.int %30512, %int1_28427 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28428 = torch.constant.int 0
    %30918 = torch.aten.add.int %int0_28428, %30917 : !torch.int, !torch.int -> !torch.int
    %int0_28429 = torch.constant.int 0
    %int0_28430 = torch.constant.int 0
    %int1_28431 = torch.constant.int 1
    %30919 = torch.aten.slice.Tensor %30823, %int0_28429, %int0_28430, %30918, %int1_28431 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30919, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28432 = torch.constant.int 1
    %int0_28433 = torch.constant.int 0
    %int9223372036854775807_28434 = torch.constant.int 9223372036854775807
    %int1_28435 = torch.constant.int 1
    %30920 = torch.aten.slice.Tensor %30919, %int1_28432, %int0_28433, %int9223372036854775807_28434, %int1_28435 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30920, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28436 = torch.constant.int 0
    %30921 = torch.aten.unsqueeze %30920, %int0_28436 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30921, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28437 = torch.constant.int 2
    %30922 = torch.aten.unsqueeze %30921, %int2_28437 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30922, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28438 = torch.constant.int 3
    %int0_28439 = torch.constant.int 0
    %int9223372036854775807_28440 = torch.constant.int 9223372036854775807
    %int1_28441 = torch.constant.int 1
    %30923 = torch.aten.slice.Tensor %30922, %int3_28438, %int0_28439, %int9223372036854775807_28440, %int1_28441 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30923, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30924 = torch_c.to_builtin_tensor %30612 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28442 = arith.constant 1 : index
    %dim_28443 = tensor.dim %30924, %c1_28442 : tensor<4x?x1x128xf16>
    %30925 = flow.tensor.bitcast %30924 : tensor<4x?x1x128xf16>{%dim_28443} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28443}
    %30926 = torch_c.from_builtin_tensor %30925 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30927 = torch.aten.mul.Tensor %30926, %30923 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30928 = torch_c.to_builtin_tensor %30927 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28444 = arith.constant 1 : index
    %dim_28445 = tensor.dim %30928, %c1_28444 : tensor<4x?x1x64xcomplex<f32>>
    %30929 = flow.tensor.bitcast %30928 : tensor<4x?x1x64xcomplex<f32>>{%dim_28445} -> tensor<4x?x1x128xf32>{%dim_28445}
    %30930 = torch_c.from_builtin_tensor %30929 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28446 = torch.constant.int 5
    %30931 = torch.prims.convert_element_type %30930, %int5_28446 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_28447 = torch.constant.int 1
    %30932 = torch.aten.size.int %30518, %int1_28447 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_28448 = torch.constant.int 0
    %30933 = torch.aten.add.int %int0_28448, %30932 : !torch.int, !torch.int -> !torch.int
    %int0_28449 = torch.constant.int 0
    %int0_28450 = torch.constant.int 0
    %int1_28451 = torch.constant.int 1
    %30934 = torch.aten.slice.Tensor %30826, %int0_28449, %int0_28450, %30933, %int1_28451 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30934, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_28452 = torch.constant.int 1
    %int0_28453 = torch.constant.int 0
    %int9223372036854775807_28454 = torch.constant.int 9223372036854775807
    %int1_28455 = torch.constant.int 1
    %30935 = torch.aten.slice.Tensor %30934, %int1_28452, %int0_28453, %int9223372036854775807_28454, %int1_28455 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %30935, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_28456 = torch.constant.int 0
    %30936 = torch.aten.unsqueeze %30935, %int0_28456 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %30936, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_28457 = torch.constant.int 2
    %30937 = torch.aten.unsqueeze %30936, %int2_28457 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30937, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_28458 = torch.constant.int 3
    %int0_28459 = torch.constant.int 0
    %int9223372036854775807_28460 = torch.constant.int 9223372036854775807
    %int1_28461 = torch.constant.int 1
    %30938 = torch.aten.slice.Tensor %30937, %int3_28458, %int0_28459, %int9223372036854775807_28460, %int1_28461 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30938, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %30939 = torch_c.to_builtin_tensor %30614 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_28462 = arith.constant 1 : index
    %dim_28463 = tensor.dim %30939, %c1_28462 : tensor<4x?x1x128xf16>
    %30940 = flow.tensor.bitcast %30939 : tensor<4x?x1x128xf16>{%dim_28463} -> tensor<4x?x1x64xcomplex<f16>>{%dim_28463}
    %30941 = torch_c.from_builtin_tensor %30940 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %30941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %30942 = torch.aten.mul.Tensor %30941, %30938 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %30942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %30943 = torch_c.to_builtin_tensor %30942 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_28464 = arith.constant 1 : index
    %dim_28465 = tensor.dim %30943, %c1_28464 : tensor<4x?x1x64xcomplex<f32>>
    %30944 = flow.tensor.bitcast %30943 : tensor<4x?x1x64xcomplex<f32>>{%dim_28465} -> tensor<4x?x1x128xf32>{%dim_28465}
    %30945 = torch_c.from_builtin_tensor %30944 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %30945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_28466 = torch.constant.int 5
    %30946 = torch.prims.convert_element_type %30945, %int5_28466 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %30946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_28467 = torch.constant.int 64
    %30947 = torch.aten.mul.Scalar %2364, %int64_28467 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30947, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28468 = torch.constant.int 64
    %30948 = torch.aten.mul.Scalar %2367, %int64_28468 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30948, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28469 = torch.constant.int 64
    %30949 = torch.aten.mul.Scalar %2370, %int64_28469 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30949, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28470 = torch.constant.int 64
    %30950 = torch.aten.mul.Scalar %2373, %int64_28470 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30950, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28471 = torch.constant.int 64
    %30951 = torch.aten.mul.Scalar %2376, %int64_28471 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30951, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28472 = torch.constant.int 64
    %30952 = torch.aten.mul.Scalar %2379, %int64_28472 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30952, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28473 = torch.constant.int 64
    %30953 = torch.aten.mul.Scalar %2382, %int64_28473 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30953, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_28474 = torch.constant.int 64
    %30954 = torch.aten.mul.Scalar %2385, %int64_28474 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30954, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30 = torch.constant.int 30
    %int1_28475 = torch.constant.int 1
    %30955 = torch.aten.add.Scalar %30947, %int30, %int1_28475 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30955, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28476 = torch.constant.int 30
    %int1_28477 = torch.constant.int 1
    %30956 = torch.aten.add.Scalar %30948, %int30_28476, %int1_28477 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30956, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28478 = torch.constant.int 30
    %int1_28479 = torch.constant.int 1
    %30957 = torch.aten.add.Scalar %30949, %int30_28478, %int1_28479 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30957, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28480 = torch.constant.int 30
    %int1_28481 = torch.constant.int 1
    %30958 = torch.aten.add.Scalar %30950, %int30_28480, %int1_28481 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30958, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28482 = torch.constant.int 30
    %int1_28483 = torch.constant.int 1
    %30959 = torch.aten.add.Scalar %30951, %int30_28482, %int1_28483 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30959, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28484 = torch.constant.int 30
    %int1_28485 = torch.constant.int 1
    %30960 = torch.aten.add.Scalar %30952, %int30_28484, %int1_28485 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30960, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28486 = torch.constant.int 30
    %int1_28487 = torch.constant.int 1
    %30961 = torch.aten.add.Scalar %30953, %int30_28486, %int1_28487 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30961, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int30_28488 = torch.constant.int 30
    %int1_28489 = torch.constant.int 1
    %30962 = torch.aten.add.Scalar %30954, %int30_28488, %int1_28489 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %30962, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_28490 = torch.constant.int 4
    %int16_28491 = torch.constant.int 16
    %int1_28492 = torch.constant.int 1
    %int128_28493 = torch.constant.int 128
    %30963 = torch.prim.ListConstruct %int4_28490, %3095, %int16_28491, %int1_28492, %int128_28493 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30964 = torch.aten.view %30841, %30963 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30964, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28494 = torch.constant.int 4
    %int16_28495 = torch.constant.int 16
    %int1_28496 = torch.constant.int 1
    %int128_28497 = torch.constant.int 128
    %30965 = torch.prim.ListConstruct %int4_28494, %3095, %int16_28495, %int1_28496, %int128_28497 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30966 = torch.aten.view %30856, %30965 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30966, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28498 = torch.constant.int 4
    %int16_28499 = torch.constant.int 16
    %int1_28500 = torch.constant.int 1
    %int128_28501 = torch.constant.int 128
    %30967 = torch.prim.ListConstruct %int4_28498, %3095, %int16_28499, %int1_28500, %int128_28501 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30968 = torch.aten.view %30871, %30967 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30968, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28502 = torch.constant.int 4
    %int16_28503 = torch.constant.int 16
    %int1_28504 = torch.constant.int 1
    %int128_28505 = torch.constant.int 128
    %30969 = torch.prim.ListConstruct %int4_28502, %3095, %int16_28503, %int1_28504, %int128_28505 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30970 = torch.aten.view %30886, %30969 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30970, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28506 = torch.constant.int 4
    %int16_28507 = torch.constant.int 16
    %int1_28508 = torch.constant.int 1
    %int128_28509 = torch.constant.int 128
    %30971 = torch.prim.ListConstruct %int4_28506, %3095, %int16_28507, %int1_28508, %int128_28509 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30972 = torch.aten.view %30901, %30971 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30972, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28510 = torch.constant.int 4
    %int16_28511 = torch.constant.int 16
    %int1_28512 = torch.constant.int 1
    %int128_28513 = torch.constant.int 128
    %30973 = torch.prim.ListConstruct %int4_28510, %3095, %int16_28511, %int1_28512, %int128_28513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30974 = torch.aten.view %30916, %30973 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30974, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28514 = torch.constant.int 4
    %int16_28515 = torch.constant.int 16
    %int1_28516 = torch.constant.int 1
    %int128_28517 = torch.constant.int 128
    %30975 = torch.prim.ListConstruct %int4_28514, %3095, %int16_28515, %int1_28516, %int128_28517 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30976 = torch.aten.view %30931, %30975 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30976, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28518 = torch.constant.int 4
    %int16_28519 = torch.constant.int 16
    %int1_28520 = torch.constant.int 1
    %int128_28521 = torch.constant.int 128
    %30977 = torch.prim.ListConstruct %int4_28518, %3095, %int16_28519, %int1_28520, %int128_28521 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30978 = torch.aten.view %30946, %30977 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %30978, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28522 = torch.constant.int 4
    %30979 = torch.aten.mul.int %int4_28522, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28523 = torch.constant.int 16
    %int1_28524 = torch.constant.int 1
    %int128_28525 = torch.constant.int 128
    %30980 = torch.prim.ListConstruct %30979, %int16_28523, %int1_28524, %int128_28525 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30981 = torch.aten.view %30964, %30980 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30981, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28526 = torch.constant.int 4
    %30982 = torch.aten.mul.int %int4_28526, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28527 = torch.constant.int 16
    %int1_28528 = torch.constant.int 1
    %int128_28529 = torch.constant.int 128
    %30983 = torch.prim.ListConstruct %30982, %int16_28527, %int1_28528, %int128_28529 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30984 = torch.aten.view %30966, %30983 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30984, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28530 = torch.constant.int 4
    %30985 = torch.aten.mul.int %int4_28530, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28531 = torch.constant.int 16
    %int1_28532 = torch.constant.int 1
    %int128_28533 = torch.constant.int 128
    %30986 = torch.prim.ListConstruct %30985, %int16_28531, %int1_28532, %int128_28533 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30987 = torch.aten.view %30968, %30986 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30987, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28534 = torch.constant.int 4
    %30988 = torch.aten.mul.int %int4_28534, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28535 = torch.constant.int 16
    %int1_28536 = torch.constant.int 1
    %int128_28537 = torch.constant.int 128
    %30989 = torch.prim.ListConstruct %30988, %int16_28535, %int1_28536, %int128_28537 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30990 = torch.aten.view %30970, %30989 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30990, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28538 = torch.constant.int 4
    %30991 = torch.aten.mul.int %int4_28538, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28539 = torch.constant.int 16
    %int1_28540 = torch.constant.int 1
    %int128_28541 = torch.constant.int 128
    %30992 = torch.prim.ListConstruct %30991, %int16_28539, %int1_28540, %int128_28541 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30993 = torch.aten.view %30972, %30992 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30993, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28542 = torch.constant.int 4
    %30994 = torch.aten.mul.int %int4_28542, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28543 = torch.constant.int 16
    %int1_28544 = torch.constant.int 1
    %int128_28545 = torch.constant.int 128
    %30995 = torch.prim.ListConstruct %30994, %int16_28543, %int1_28544, %int128_28545 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30996 = torch.aten.view %30974, %30995 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30996, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28546 = torch.constant.int 4
    %30997 = torch.aten.mul.int %int4_28546, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28547 = torch.constant.int 16
    %int1_28548 = torch.constant.int 1
    %int128_28549 = torch.constant.int 128
    %30998 = torch.prim.ListConstruct %30997, %int16_28547, %int1_28548, %int128_28549 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %30999 = torch.aten.view %30976, %30998 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %30999, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28550 = torch.constant.int 4
    %31000 = torch.aten.mul.int %int4_28550, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28551 = torch.constant.int 16
    %int1_28552 = torch.constant.int 1
    %int128_28553 = torch.constant.int 128
    %31001 = torch.prim.ListConstruct %31000, %int16_28551, %int1_28552, %int128_28553 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31002 = torch.aten.view %30978, %31001 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31002, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28554 = torch.constant.int 4
    %31003 = torch.aten.mul.int %int4_28554, %3095 : !torch.int, !torch.int -> !torch.int
    %31004 = torch.prim.ListConstruct %31003 : (!torch.int) -> !torch.list<int>
    %31005 = torch.aten.view %30955, %31004 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31005, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28555 = torch.constant.int 4
    %31006 = torch.aten.mul.int %int4_28555, %3095 : !torch.int, !torch.int -> !torch.int
    %31007 = torch.prim.ListConstruct %31006 : (!torch.int) -> !torch.list<int>
    %31008 = torch.aten.view %30956, %31007 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31008, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28556 = torch.constant.int 4
    %31009 = torch.aten.mul.int %int4_28556, %3095 : !torch.int, !torch.int -> !torch.int
    %31010 = torch.prim.ListConstruct %31009 : (!torch.int) -> !torch.list<int>
    %31011 = torch.aten.view %30957, %31010 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31011, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28557 = torch.constant.int 4
    %31012 = torch.aten.mul.int %int4_28557, %3095 : !torch.int, !torch.int -> !torch.int
    %31013 = torch.prim.ListConstruct %31012 : (!torch.int) -> !torch.list<int>
    %31014 = torch.aten.view %30958, %31013 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31014, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28558 = torch.constant.int 4
    %31015 = torch.aten.mul.int %int4_28558, %3095 : !torch.int, !torch.int -> !torch.int
    %31016 = torch.prim.ListConstruct %31015 : (!torch.int) -> !torch.list<int>
    %31017 = torch.aten.view %30959, %31016 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31017, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28559 = torch.constant.int 4
    %31018 = torch.aten.mul.int %int4_28559, %3095 : !torch.int, !torch.int -> !torch.int
    %31019 = torch.prim.ListConstruct %31018 : (!torch.int) -> !torch.list<int>
    %31020 = torch.aten.view %30960, %31019 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31020, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28560 = torch.constant.int 4
    %31021 = torch.aten.mul.int %int4_28560, %3095 : !torch.int, !torch.int -> !torch.int
    %31022 = torch.prim.ListConstruct %31021 : (!torch.int) -> !torch.list<int>
    %31023 = torch.aten.view %30961, %31022 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31023, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28561 = torch.constant.int 4
    %31024 = torch.aten.mul.int %int4_28561, %3095 : !torch.int, !torch.int -> !torch.int
    %31025 = torch.prim.ListConstruct %31024 : (!torch.int) -> !torch.list<int>
    %31026 = torch.aten.view %30962, %31025 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31026, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28562 = torch.constant.int 4
    %int16_28563 = torch.constant.int 16
    %int1_28564 = torch.constant.int 1
    %int128_28565 = torch.constant.int 128
    %31027 = torch.prim.ListConstruct %int4_28562, %3095, %int16_28563, %int1_28564, %int128_28565 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31028 = torch.aten.view %30616, %31027 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31028, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28566 = torch.constant.int 4
    %int16_28567 = torch.constant.int 16
    %int1_28568 = torch.constant.int 1
    %int128_28569 = torch.constant.int 128
    %31029 = torch.prim.ListConstruct %int4_28566, %3095, %int16_28567, %int1_28568, %int128_28569 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31030 = torch.aten.view %30618, %31029 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31030, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28570 = torch.constant.int 4
    %int16_28571 = torch.constant.int 16
    %int1_28572 = torch.constant.int 1
    %int128_28573 = torch.constant.int 128
    %31031 = torch.prim.ListConstruct %int4_28570, %3095, %int16_28571, %int1_28572, %int128_28573 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31032 = torch.aten.view %30620, %31031 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31032, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28574 = torch.constant.int 4
    %int16_28575 = torch.constant.int 16
    %int1_28576 = torch.constant.int 1
    %int128_28577 = torch.constant.int 128
    %31033 = torch.prim.ListConstruct %int4_28574, %3095, %int16_28575, %int1_28576, %int128_28577 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31034 = torch.aten.view %30622, %31033 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31034, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28578 = torch.constant.int 4
    %int16_28579 = torch.constant.int 16
    %int1_28580 = torch.constant.int 1
    %int128_28581 = torch.constant.int 128
    %31035 = torch.prim.ListConstruct %int4_28578, %3095, %int16_28579, %int1_28580, %int128_28581 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31036 = torch.aten.view %30624, %31035 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31036, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28582 = torch.constant.int 4
    %int16_28583 = torch.constant.int 16
    %int1_28584 = torch.constant.int 1
    %int128_28585 = torch.constant.int 128
    %31037 = torch.prim.ListConstruct %int4_28582, %3095, %int16_28583, %int1_28584, %int128_28585 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31038 = torch.aten.view %30626, %31037 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31038, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28586 = torch.constant.int 4
    %int16_28587 = torch.constant.int 16
    %int1_28588 = torch.constant.int 1
    %int128_28589 = torch.constant.int 128
    %31039 = torch.prim.ListConstruct %int4_28586, %3095, %int16_28587, %int1_28588, %int128_28589 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31040 = torch.aten.view %30628, %31039 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31040, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28590 = torch.constant.int 4
    %int16_28591 = torch.constant.int 16
    %int1_28592 = torch.constant.int 1
    %int128_28593 = torch.constant.int 128
    %31041 = torch.prim.ListConstruct %int4_28590, %3095, %int16_28591, %int1_28592, %int128_28593 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31042 = torch.aten.view %30630, %31041 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %31042, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_28594 = torch.constant.int 4
    %31043 = torch.aten.mul.int %int4_28594, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28595 = torch.constant.int 16
    %int1_28596 = torch.constant.int 1
    %int128_28597 = torch.constant.int 128
    %31044 = torch.prim.ListConstruct %31043, %int16_28595, %int1_28596, %int128_28597 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31045 = torch.aten.view %31028, %31044 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31045, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28598 = torch.constant.int 4
    %31046 = torch.aten.mul.int %int4_28598, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28599 = torch.constant.int 16
    %int1_28600 = torch.constant.int 1
    %int128_28601 = torch.constant.int 128
    %31047 = torch.prim.ListConstruct %31046, %int16_28599, %int1_28600, %int128_28601 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31048 = torch.aten.view %31030, %31047 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31048, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28602 = torch.constant.int 4
    %31049 = torch.aten.mul.int %int4_28602, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28603 = torch.constant.int 16
    %int1_28604 = torch.constant.int 1
    %int128_28605 = torch.constant.int 128
    %31050 = torch.prim.ListConstruct %31049, %int16_28603, %int1_28604, %int128_28605 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31051 = torch.aten.view %31032, %31050 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31051, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28606 = torch.constant.int 4
    %31052 = torch.aten.mul.int %int4_28606, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28607 = torch.constant.int 16
    %int1_28608 = torch.constant.int 1
    %int128_28609 = torch.constant.int 128
    %31053 = torch.prim.ListConstruct %31052, %int16_28607, %int1_28608, %int128_28609 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31054 = torch.aten.view %31034, %31053 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31054, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28610 = torch.constant.int 4
    %31055 = torch.aten.mul.int %int4_28610, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28611 = torch.constant.int 16
    %int1_28612 = torch.constant.int 1
    %int128_28613 = torch.constant.int 128
    %31056 = torch.prim.ListConstruct %31055, %int16_28611, %int1_28612, %int128_28613 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31057 = torch.aten.view %31036, %31056 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31057, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28614 = torch.constant.int 4
    %31058 = torch.aten.mul.int %int4_28614, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28615 = torch.constant.int 16
    %int1_28616 = torch.constant.int 1
    %int128_28617 = torch.constant.int 128
    %31059 = torch.prim.ListConstruct %31058, %int16_28615, %int1_28616, %int128_28617 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31060 = torch.aten.view %31038, %31059 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31060, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28618 = torch.constant.int 4
    %31061 = torch.aten.mul.int %int4_28618, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28619 = torch.constant.int 16
    %int1_28620 = torch.constant.int 1
    %int128_28621 = torch.constant.int 128
    %31062 = torch.prim.ListConstruct %31061, %int16_28619, %int1_28620, %int128_28621 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31063 = torch.aten.view %31040, %31062 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31063, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_28622 = torch.constant.int 4
    %31064 = torch.aten.mul.int %int4_28622, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_28623 = torch.constant.int 16
    %int1_28624 = torch.constant.int 1
    %int128_28625 = torch.constant.int 128
    %31065 = torch.prim.ListConstruct %31064, %int16_28623, %int1_28624, %int128_28625 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31066 = torch.aten.view %31042, %31065 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31066, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_28626 = torch.constant.int 1
    %int1_28627 = torch.constant.int 1
    %31067 = torch.aten.add.Scalar %30955, %int1_28626, %int1_28627 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31067, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28628 = torch.constant.int 1
    %int1_28629 = torch.constant.int 1
    %31068 = torch.aten.add.Scalar %30956, %int1_28628, %int1_28629 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31068, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28630 = torch.constant.int 1
    %int1_28631 = torch.constant.int 1
    %31069 = torch.aten.add.Scalar %30957, %int1_28630, %int1_28631 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31069, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28632 = torch.constant.int 1
    %int1_28633 = torch.constant.int 1
    %31070 = torch.aten.add.Scalar %30958, %int1_28632, %int1_28633 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31070, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28634 = torch.constant.int 1
    %int1_28635 = torch.constant.int 1
    %31071 = torch.aten.add.Scalar %30959, %int1_28634, %int1_28635 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31071, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28636 = torch.constant.int 1
    %int1_28637 = torch.constant.int 1
    %31072 = torch.aten.add.Scalar %30960, %int1_28636, %int1_28637 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31072, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28638 = torch.constant.int 1
    %int1_28639 = torch.constant.int 1
    %31073 = torch.aten.add.Scalar %30961, %int1_28638, %int1_28639 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31073, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_28640 = torch.constant.int 1
    %int1_28641 = torch.constant.int 1
    %31074 = torch.aten.add.Scalar %30962, %int1_28640, %int1_28641 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %31074, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_28642 = torch.constant.int 4
    %31075 = torch.aten.mul.int %int4_28642, %3095 : !torch.int, !torch.int -> !torch.int
    %31076 = torch.prim.ListConstruct %31075 : (!torch.int) -> !torch.list<int>
    %31077 = torch.aten.view %31067, %31076 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31077, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28643 = torch.constant.int 4
    %31078 = torch.aten.mul.int %int4_28643, %3095 : !torch.int, !torch.int -> !torch.int
    %31079 = torch.prim.ListConstruct %31078 : (!torch.int) -> !torch.list<int>
    %31080 = torch.aten.view %31068, %31079 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31080, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28644 = torch.constant.int 4
    %31081 = torch.aten.mul.int %int4_28644, %3095 : !torch.int, !torch.int -> !torch.int
    %31082 = torch.prim.ListConstruct %31081 : (!torch.int) -> !torch.list<int>
    %31083 = torch.aten.view %31069, %31082 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31083, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28645 = torch.constant.int 4
    %31084 = torch.aten.mul.int %int4_28645, %3095 : !torch.int, !torch.int -> !torch.int
    %31085 = torch.prim.ListConstruct %31084 : (!torch.int) -> !torch.list<int>
    %31086 = torch.aten.view %31070, %31085 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31086, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28646 = torch.constant.int 4
    %31087 = torch.aten.mul.int %int4_28646, %3095 : !torch.int, !torch.int -> !torch.int
    %31088 = torch.prim.ListConstruct %31087 : (!torch.int) -> !torch.list<int>
    %31089 = torch.aten.view %31071, %31088 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31089, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28647 = torch.constant.int 4
    %31090 = torch.aten.mul.int %int4_28647, %3095 : !torch.int, !torch.int -> !torch.int
    %31091 = torch.prim.ListConstruct %31090 : (!torch.int) -> !torch.list<int>
    %31092 = torch.aten.view %31072, %31091 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31092, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28648 = torch.constant.int 4
    %31093 = torch.aten.mul.int %int4_28648, %3095 : !torch.int, !torch.int -> !torch.int
    %31094 = torch.prim.ListConstruct %31093 : (!torch.int) -> !torch.list<int>
    %31095 = torch.aten.view %31073, %31094 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31095, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_28649 = torch.constant.int 4
    %31096 = torch.aten.mul.int %int4_28649, %3095 : !torch.int, !torch.int -> !torch.int
    %31097 = torch.prim.ListConstruct %31096 : (!torch.int) -> !torch.list<int>
    %31098 = torch.aten.view %31074, %31097 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31098, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %31099 = torch.prim.ListConstruct %31005, %31077 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28650 = torch.constant.int 0
    %31100 = torch.aten.cat %31099, %int0_28650 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31100, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31101 = torch.prim.ListConstruct %31008, %31080 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28651 = torch.constant.int 0
    %31102 = torch.aten.cat %31101, %int0_28651 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31102, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31103 = torch.prim.ListConstruct %31011, %31083 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28652 = torch.constant.int 0
    %31104 = torch.aten.cat %31103, %int0_28652 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31104, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31105 = torch.prim.ListConstruct %31014, %31086 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28653 = torch.constant.int 0
    %31106 = torch.aten.cat %31105, %int0_28653 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31106, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31107 = torch.prim.ListConstruct %31017, %31089 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28654 = torch.constant.int 0
    %31108 = torch.aten.cat %31107, %int0_28654 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31108, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31109 = torch.prim.ListConstruct %31020, %31092 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28655 = torch.constant.int 0
    %31110 = torch.aten.cat %31109, %int0_28655 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31110, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31111 = torch.prim.ListConstruct %31023, %31095 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28656 = torch.constant.int 0
    %31112 = torch.aten.cat %31111, %int0_28656 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31112, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31113 = torch.prim.ListConstruct %31026, %31098 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_28657 = torch.constant.int 0
    %31114 = torch.aten.cat %31113, %int0_28657 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %31114, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %31115 = torch.prim.ListConstruct %30981, %31045 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28658 = torch.constant.int 0
    %31116 = torch.aten.cat %31115, %int0_28658 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31116, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31117 = torch.prim.ListConstruct %30984, %31048 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28659 = torch.constant.int 0
    %31118 = torch.aten.cat %31117, %int0_28659 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31118, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31119 = torch.prim.ListConstruct %30987, %31051 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28660 = torch.constant.int 0
    %31120 = torch.aten.cat %31119, %int0_28660 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31120, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31121 = torch.prim.ListConstruct %30990, %31054 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28661 = torch.constant.int 0
    %31122 = torch.aten.cat %31121, %int0_28661 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31122, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31123 = torch.prim.ListConstruct %30993, %31057 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28662 = torch.constant.int 0
    %31124 = torch.aten.cat %31123, %int0_28662 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31124, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31125 = torch.prim.ListConstruct %30996, %31060 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28663 = torch.constant.int 0
    %31126 = torch.aten.cat %31125, %int0_28663 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31126, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31127 = torch.prim.ListConstruct %30999, %31063 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28664 = torch.constant.int 0
    %31128 = torch.aten.cat %31127, %int0_28664 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31128, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31129 = torch.prim.ListConstruct %31002, %31066 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_28665 = torch.constant.int 0
    %31130 = torch.aten.cat %31129, %int0_28665 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31130, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28666 = torch.constant.int 32
    %int2_28667 = torch.constant.int 2
    %int16_28668 = torch.constant.int 16
    %int1_28669 = torch.constant.int 1
    %int128_28670 = torch.constant.int 128
    %31131 = torch.prim.ListConstruct %3023, %int32_28666, %int2_28667, %int16_28668, %int1_28669, %int128_28670 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31132 = torch.aten.view %29281, %31131 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31132, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28671 = torch.constant.int 32
    %31133 = torch.aten.mul.int %3023, %int32_28671 : !torch.int, !torch.int -> !torch.int
    %int2_28672 = torch.constant.int 2
    %31134 = torch.aten.mul.int %31133, %int2_28672 : !torch.int, !torch.int -> !torch.int
    %int16_28673 = torch.constant.int 16
    %int1_28674 = torch.constant.int 1
    %int128_28675 = torch.constant.int 128
    %31135 = torch.prim.ListConstruct %31134, %int16_28673, %int1_28674, %int128_28675 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31136 = torch.aten.view %31132, %31135 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31136, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31137 = torch.prim.ListConstruct %31100 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28676 = torch.constant.bool false
    %31138 = torch.aten.index_put %31136, %31137, %31116, %false_28676 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31138, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28677 = torch.constant.int 32
    %int2_28678 = torch.constant.int 2
    %int16_28679 = torch.constant.int 16
    %int1_28680 = torch.constant.int 1
    %int128_28681 = torch.constant.int 128
    %31139 = torch.prim.ListConstruct %3023, %int32_28677, %int2_28678, %int16_28679, %int1_28680, %int128_28681 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31140 = torch.aten.view %31138, %31139 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31140, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28682 = torch.constant.int 131072
    %31141 = torch.prim.ListConstruct %3023, %int131072_28682 : (!torch.int, !torch.int) -> !torch.list<int>
    %31142 = torch.aten.view %31140, %31141 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31142, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28683 = torch.constant.int 32
    %int2_28684 = torch.constant.int 2
    %int16_28685 = torch.constant.int 16
    %int1_28686 = torch.constant.int 1
    %int128_28687 = torch.constant.int 128
    %31143 = torch.prim.ListConstruct %3026, %int32_28683, %int2_28684, %int16_28685, %int1_28686, %int128_28687 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31144 = torch.aten.view %29293, %31143 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31144, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28688 = torch.constant.int 32
    %31145 = torch.aten.mul.int %3026, %int32_28688 : !torch.int, !torch.int -> !torch.int
    %int2_28689 = torch.constant.int 2
    %31146 = torch.aten.mul.int %31145, %int2_28689 : !torch.int, !torch.int -> !torch.int
    %int16_28690 = torch.constant.int 16
    %int1_28691 = torch.constant.int 1
    %int128_28692 = torch.constant.int 128
    %31147 = torch.prim.ListConstruct %31146, %int16_28690, %int1_28691, %int128_28692 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31148 = torch.aten.view %31144, %31147 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31148, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31149 = torch.prim.ListConstruct %31102 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28693 = torch.constant.bool false
    %31150 = torch.aten.index_put %31148, %31149, %31118, %false_28693 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31150, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28694 = torch.constant.int 32
    %int2_28695 = torch.constant.int 2
    %int16_28696 = torch.constant.int 16
    %int1_28697 = torch.constant.int 1
    %int128_28698 = torch.constant.int 128
    %31151 = torch.prim.ListConstruct %3026, %int32_28694, %int2_28695, %int16_28696, %int1_28697, %int128_28698 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31152 = torch.aten.view %31150, %31151 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31152, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28699 = torch.constant.int 131072
    %31153 = torch.prim.ListConstruct %3026, %int131072_28699 : (!torch.int, !torch.int) -> !torch.list<int>
    %31154 = torch.aten.view %31152, %31153 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31154, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28700 = torch.constant.int 32
    %int2_28701 = torch.constant.int 2
    %int16_28702 = torch.constant.int 16
    %int1_28703 = torch.constant.int 1
    %int128_28704 = torch.constant.int 128
    %31155 = torch.prim.ListConstruct %3029, %int32_28700, %int2_28701, %int16_28702, %int1_28703, %int128_28704 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31156 = torch.aten.view %29305, %31155 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31156, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28705 = torch.constant.int 32
    %31157 = torch.aten.mul.int %3029, %int32_28705 : !torch.int, !torch.int -> !torch.int
    %int2_28706 = torch.constant.int 2
    %31158 = torch.aten.mul.int %31157, %int2_28706 : !torch.int, !torch.int -> !torch.int
    %int16_28707 = torch.constant.int 16
    %int1_28708 = torch.constant.int 1
    %int128_28709 = torch.constant.int 128
    %31159 = torch.prim.ListConstruct %31158, %int16_28707, %int1_28708, %int128_28709 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31160 = torch.aten.view %31156, %31159 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31160, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31161 = torch.prim.ListConstruct %31104 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28710 = torch.constant.bool false
    %31162 = torch.aten.index_put %31160, %31161, %31120, %false_28710 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31162, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28711 = torch.constant.int 32
    %int2_28712 = torch.constant.int 2
    %int16_28713 = torch.constant.int 16
    %int1_28714 = torch.constant.int 1
    %int128_28715 = torch.constant.int 128
    %31163 = torch.prim.ListConstruct %3029, %int32_28711, %int2_28712, %int16_28713, %int1_28714, %int128_28715 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31164 = torch.aten.view %31162, %31163 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31164, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28716 = torch.constant.int 131072
    %31165 = torch.prim.ListConstruct %3029, %int131072_28716 : (!torch.int, !torch.int) -> !torch.list<int>
    %31166 = torch.aten.view %31164, %31165 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31166, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28717 = torch.constant.int 32
    %int2_28718 = torch.constant.int 2
    %int16_28719 = torch.constant.int 16
    %int1_28720 = torch.constant.int 1
    %int128_28721 = torch.constant.int 128
    %31167 = torch.prim.ListConstruct %3032, %int32_28717, %int2_28718, %int16_28719, %int1_28720, %int128_28721 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31168 = torch.aten.view %29317, %31167 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31168, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28722 = torch.constant.int 32
    %31169 = torch.aten.mul.int %3032, %int32_28722 : !torch.int, !torch.int -> !torch.int
    %int2_28723 = torch.constant.int 2
    %31170 = torch.aten.mul.int %31169, %int2_28723 : !torch.int, !torch.int -> !torch.int
    %int16_28724 = torch.constant.int 16
    %int1_28725 = torch.constant.int 1
    %int128_28726 = torch.constant.int 128
    %31171 = torch.prim.ListConstruct %31170, %int16_28724, %int1_28725, %int128_28726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31172 = torch.aten.view %31168, %31171 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31172, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31173 = torch.prim.ListConstruct %31106 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28727 = torch.constant.bool false
    %31174 = torch.aten.index_put %31172, %31173, %31122, %false_28727 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31174, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28728 = torch.constant.int 32
    %int2_28729 = torch.constant.int 2
    %int16_28730 = torch.constant.int 16
    %int1_28731 = torch.constant.int 1
    %int128_28732 = torch.constant.int 128
    %31175 = torch.prim.ListConstruct %3032, %int32_28728, %int2_28729, %int16_28730, %int1_28731, %int128_28732 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31176 = torch.aten.view %31174, %31175 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31176, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28733 = torch.constant.int 131072
    %31177 = torch.prim.ListConstruct %3032, %int131072_28733 : (!torch.int, !torch.int) -> !torch.list<int>
    %31178 = torch.aten.view %31176, %31177 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31178, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28734 = torch.constant.int 32
    %int2_28735 = torch.constant.int 2
    %int16_28736 = torch.constant.int 16
    %int1_28737 = torch.constant.int 1
    %int128_28738 = torch.constant.int 128
    %31179 = torch.prim.ListConstruct %3035, %int32_28734, %int2_28735, %int16_28736, %int1_28737, %int128_28738 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31180 = torch.aten.view %29329, %31179 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31180, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28739 = torch.constant.int 32
    %31181 = torch.aten.mul.int %3035, %int32_28739 : !torch.int, !torch.int -> !torch.int
    %int2_28740 = torch.constant.int 2
    %31182 = torch.aten.mul.int %31181, %int2_28740 : !torch.int, !torch.int -> !torch.int
    %int16_28741 = torch.constant.int 16
    %int1_28742 = torch.constant.int 1
    %int128_28743 = torch.constant.int 128
    %31183 = torch.prim.ListConstruct %31182, %int16_28741, %int1_28742, %int128_28743 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31184 = torch.aten.view %31180, %31183 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31184, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31185 = torch.prim.ListConstruct %31108 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28744 = torch.constant.bool false
    %31186 = torch.aten.index_put %31184, %31185, %31124, %false_28744 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31186, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28745 = torch.constant.int 32
    %int2_28746 = torch.constant.int 2
    %int16_28747 = torch.constant.int 16
    %int1_28748 = torch.constant.int 1
    %int128_28749 = torch.constant.int 128
    %31187 = torch.prim.ListConstruct %3035, %int32_28745, %int2_28746, %int16_28747, %int1_28748, %int128_28749 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31188 = torch.aten.view %31186, %31187 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31188, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28750 = torch.constant.int 131072
    %31189 = torch.prim.ListConstruct %3035, %int131072_28750 : (!torch.int, !torch.int) -> !torch.list<int>
    %31190 = torch.aten.view %31188, %31189 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31190, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28751 = torch.constant.int 32
    %int2_28752 = torch.constant.int 2
    %int16_28753 = torch.constant.int 16
    %int1_28754 = torch.constant.int 1
    %int128_28755 = torch.constant.int 128
    %31191 = torch.prim.ListConstruct %3038, %int32_28751, %int2_28752, %int16_28753, %int1_28754, %int128_28755 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31192 = torch.aten.view %29341, %31191 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31192, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28756 = torch.constant.int 32
    %31193 = torch.aten.mul.int %3038, %int32_28756 : !torch.int, !torch.int -> !torch.int
    %int2_28757 = torch.constant.int 2
    %31194 = torch.aten.mul.int %31193, %int2_28757 : !torch.int, !torch.int -> !torch.int
    %int16_28758 = torch.constant.int 16
    %int1_28759 = torch.constant.int 1
    %int128_28760 = torch.constant.int 128
    %31195 = torch.prim.ListConstruct %31194, %int16_28758, %int1_28759, %int128_28760 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31196 = torch.aten.view %31192, %31195 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31196, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31197 = torch.prim.ListConstruct %31110 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28761 = torch.constant.bool false
    %31198 = torch.aten.index_put %31196, %31197, %31126, %false_28761 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31198, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28762 = torch.constant.int 32
    %int2_28763 = torch.constant.int 2
    %int16_28764 = torch.constant.int 16
    %int1_28765 = torch.constant.int 1
    %int128_28766 = torch.constant.int 128
    %31199 = torch.prim.ListConstruct %3038, %int32_28762, %int2_28763, %int16_28764, %int1_28765, %int128_28766 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31200 = torch.aten.view %31198, %31199 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31200, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28767 = torch.constant.int 131072
    %31201 = torch.prim.ListConstruct %3038, %int131072_28767 : (!torch.int, !torch.int) -> !torch.list<int>
    %31202 = torch.aten.view %31200, %31201 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31202, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28768 = torch.constant.int 32
    %int2_28769 = torch.constant.int 2
    %int16_28770 = torch.constant.int 16
    %int1_28771 = torch.constant.int 1
    %int128_28772 = torch.constant.int 128
    %31203 = torch.prim.ListConstruct %3041, %int32_28768, %int2_28769, %int16_28770, %int1_28771, %int128_28772 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31204 = torch.aten.view %29353, %31203 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31204, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28773 = torch.constant.int 32
    %31205 = torch.aten.mul.int %3041, %int32_28773 : !torch.int, !torch.int -> !torch.int
    %int2_28774 = torch.constant.int 2
    %31206 = torch.aten.mul.int %31205, %int2_28774 : !torch.int, !torch.int -> !torch.int
    %int16_28775 = torch.constant.int 16
    %int1_28776 = torch.constant.int 1
    %int128_28777 = torch.constant.int 128
    %31207 = torch.prim.ListConstruct %31206, %int16_28775, %int1_28776, %int128_28777 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31208 = torch.aten.view %31204, %31207 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31208, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31209 = torch.prim.ListConstruct %31112 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28778 = torch.constant.bool false
    %31210 = torch.aten.index_put %31208, %31209, %31128, %false_28778 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31210, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28779 = torch.constant.int 32
    %int2_28780 = torch.constant.int 2
    %int16_28781 = torch.constant.int 16
    %int1_28782 = torch.constant.int 1
    %int128_28783 = torch.constant.int 128
    %31211 = torch.prim.ListConstruct %3041, %int32_28779, %int2_28780, %int16_28781, %int1_28782, %int128_28783 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31212 = torch.aten.view %31210, %31211 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31212, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28784 = torch.constant.int 131072
    %31213 = torch.prim.ListConstruct %3041, %int131072_28784 : (!torch.int, !torch.int) -> !torch.list<int>
    %31214 = torch.aten.view %31212, %31213 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31214, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_28785 = torch.constant.int 32
    %int2_28786 = torch.constant.int 2
    %int16_28787 = torch.constant.int 16
    %int1_28788 = torch.constant.int 1
    %int128_28789 = torch.constant.int 128
    %31215 = torch.prim.ListConstruct %3044, %int32_28785, %int2_28786, %int16_28787, %int1_28788, %int128_28789 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31216 = torch.aten.view %29365, %31215 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31216, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_28790 = torch.constant.int 32
    %31217 = torch.aten.mul.int %3044, %int32_28790 : !torch.int, !torch.int -> !torch.int
    %int2_28791 = torch.constant.int 2
    %31218 = torch.aten.mul.int %31217, %int2_28791 : !torch.int, !torch.int -> !torch.int
    %int16_28792 = torch.constant.int 16
    %int1_28793 = torch.constant.int 1
    %int128_28794 = torch.constant.int 128
    %31219 = torch.prim.ListConstruct %31218, %int16_28792, %int1_28793, %int128_28794 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31220 = torch.aten.view %31216, %31219 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31220, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %31221 = torch.prim.ListConstruct %31114 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_28795 = torch.constant.bool false
    %31222 = torch.aten.index_put %31220, %31221, %31130, %false_28795 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %31222, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_28796 = torch.constant.int 32
    %int2_28797 = torch.constant.int 2
    %int16_28798 = torch.constant.int 16
    %int1_28799 = torch.constant.int 1
    %int128_28800 = torch.constant.int 128
    %31223 = torch.prim.ListConstruct %3044, %int32_28796, %int2_28797, %int16_28798, %int1_28799, %int128_28800 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31224 = torch.aten.view %31222, %31223 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %31224, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_28801 = torch.constant.int 131072
    %31225 = torch.prim.ListConstruct %3044, %int131072_28801 : (!torch.int, !torch.int) -> !torch.list<int>
    %31226 = torch.aten.view %31224, %31225 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %31226, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_28802 = torch.constant.int -2
    %31227 = torch.aten.unsqueeze %30841, %int-2_28802 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28803 = torch.constant.int -2
    %31228 = torch.aten.unsqueeze %30856, %int-2_28803 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28804 = torch.constant.int -2
    %31229 = torch.aten.unsqueeze %30871, %int-2_28804 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28805 = torch.constant.int -2
    %31230 = torch.aten.unsqueeze %30886, %int-2_28805 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28806 = torch.constant.int -2
    %31231 = torch.aten.unsqueeze %30901, %int-2_28806 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28807 = torch.constant.int -2
    %31232 = torch.aten.unsqueeze %30916, %int-2_28807 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28808 = torch.constant.int -2
    %31233 = torch.aten.unsqueeze %30931, %int-2_28808 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28809 = torch.constant.int -2
    %31234 = torch.aten.unsqueeze %30946, %int-2_28809 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_28810 = torch.constant.int 4
    %int1_28811 = torch.constant.int 1
    %int4_28812 = torch.constant.int 4
    %int128_28813 = torch.constant.int 128
    %31235 = torch.prim.ListConstruct %int4_28810, %30827, %int1_28811, %int4_28812, %int128_28813 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28814 = torch.constant.bool false
    %31236 = torch.aten.expand %31227, %31235, %false_28814 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28815 = torch.constant.int 4
    %int1_28816 = torch.constant.int 1
    %int4_28817 = torch.constant.int 4
    %int128_28818 = torch.constant.int 128
    %31237 = torch.prim.ListConstruct %int4_28815, %30827, %int1_28816, %int4_28817, %int128_28818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28819 = torch.constant.bool false
    %31238 = torch.aten.expand %31228, %31237, %false_28819 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28820 = torch.constant.int 4
    %int1_28821 = torch.constant.int 1
    %int4_28822 = torch.constant.int 4
    %int128_28823 = torch.constant.int 128
    %31239 = torch.prim.ListConstruct %int4_28820, %30827, %int1_28821, %int4_28822, %int128_28823 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28824 = torch.constant.bool false
    %31240 = torch.aten.expand %31229, %31239, %false_28824 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28825 = torch.constant.int 4
    %int1_28826 = torch.constant.int 1
    %int4_28827 = torch.constant.int 4
    %int128_28828 = torch.constant.int 128
    %31241 = torch.prim.ListConstruct %int4_28825, %30827, %int1_28826, %int4_28827, %int128_28828 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28829 = torch.constant.bool false
    %31242 = torch.aten.expand %31230, %31241, %false_28829 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28830 = torch.constant.int 4
    %int1_28831 = torch.constant.int 1
    %int4_28832 = torch.constant.int 4
    %int128_28833 = torch.constant.int 128
    %31243 = torch.prim.ListConstruct %int4_28830, %30827, %int1_28831, %int4_28832, %int128_28833 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28834 = torch.constant.bool false
    %31244 = torch.aten.expand %31231, %31243, %false_28834 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28835 = torch.constant.int 4
    %int1_28836 = torch.constant.int 1
    %int4_28837 = torch.constant.int 4
    %int128_28838 = torch.constant.int 128
    %31245 = torch.prim.ListConstruct %int4_28835, %30827, %int1_28836, %int4_28837, %int128_28838 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28839 = torch.constant.bool false
    %31246 = torch.aten.expand %31232, %31245, %false_28839 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28840 = torch.constant.int 4
    %int1_28841 = torch.constant.int 1
    %int4_28842 = torch.constant.int 4
    %int128_28843 = torch.constant.int 128
    %31247 = torch.prim.ListConstruct %int4_28840, %30827, %int1_28841, %int4_28842, %int128_28843 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28844 = torch.constant.bool false
    %31248 = torch.aten.expand %31233, %31247, %false_28844 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28845 = torch.constant.int 4
    %int1_28846 = torch.constant.int 1
    %int4_28847 = torch.constant.int 4
    %int128_28848 = torch.constant.int 128
    %31249 = torch.prim.ListConstruct %int4_28845, %30827, %int1_28846, %int4_28847, %int128_28848 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28849 = torch.constant.bool false
    %31250 = torch.aten.expand %31234, %31249, %false_28849 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28850 = torch.constant.int 4
    %int4_28851 = torch.constant.int 4
    %int128_28852 = torch.constant.int 128
    %31251 = torch.prim.ListConstruct %int4_28850, %30827, %int4_28851, %int128_28852 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31252 = torch.aten.view %31236, %31251 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28853 = torch.constant.int 4
    %int4_28854 = torch.constant.int 4
    %int128_28855 = torch.constant.int 128
    %31253 = torch.prim.ListConstruct %int4_28853, %30827, %int4_28854, %int128_28855 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31254 = torch.aten.view %31238, %31253 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28856 = torch.constant.int 4
    %int4_28857 = torch.constant.int 4
    %int128_28858 = torch.constant.int 128
    %31255 = torch.prim.ListConstruct %int4_28856, %30827, %int4_28857, %int128_28858 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31256 = torch.aten.view %31240, %31255 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28859 = torch.constant.int 4
    %int4_28860 = torch.constant.int 4
    %int128_28861 = torch.constant.int 128
    %31257 = torch.prim.ListConstruct %int4_28859, %30827, %int4_28860, %int128_28861 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31258 = torch.aten.view %31242, %31257 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28862 = torch.constant.int 4
    %int4_28863 = torch.constant.int 4
    %int128_28864 = torch.constant.int 128
    %31259 = torch.prim.ListConstruct %int4_28862, %30827, %int4_28863, %int128_28864 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31260 = torch.aten.view %31244, %31259 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28865 = torch.constant.int 4
    %int4_28866 = torch.constant.int 4
    %int128_28867 = torch.constant.int 128
    %31261 = torch.prim.ListConstruct %int4_28865, %30827, %int4_28866, %int128_28867 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31262 = torch.aten.view %31246, %31261 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28868 = torch.constant.int 4
    %int4_28869 = torch.constant.int 4
    %int128_28870 = torch.constant.int 128
    %31263 = torch.prim.ListConstruct %int4_28868, %30827, %int4_28869, %int128_28870 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31264 = torch.aten.view %31248, %31263 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28871 = torch.constant.int 4
    %int4_28872 = torch.constant.int 4
    %int128_28873 = torch.constant.int 128
    %31265 = torch.prim.ListConstruct %int4_28871, %30827, %int4_28872, %int128_28873 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31266 = torch.aten.view %31250, %31265 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_28874 = torch.constant.int -2
    %31267 = torch.aten.unsqueeze %30616, %int-2_28874 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28875 = torch.constant.int -2
    %31268 = torch.aten.unsqueeze %30618, %int-2_28875 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28876 = torch.constant.int -2
    %31269 = torch.aten.unsqueeze %30620, %int-2_28876 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28877 = torch.constant.int -2
    %31270 = torch.aten.unsqueeze %30622, %int-2_28877 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28878 = torch.constant.int -2
    %31271 = torch.aten.unsqueeze %30624, %int-2_28878 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28879 = torch.constant.int -2
    %31272 = torch.aten.unsqueeze %30626, %int-2_28879 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28880 = torch.constant.int -2
    %31273 = torch.aten.unsqueeze %30628, %int-2_28880 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_28881 = torch.constant.int -2
    %31274 = torch.aten.unsqueeze %30630, %int-2_28881 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %31274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_28882 = torch.constant.int 1
    %31275 = torch.aten.size.int %30540, %int1_28882 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_28883 = torch.constant.int 4
    %int1_28884 = torch.constant.int 1
    %int4_28885 = torch.constant.int 4
    %int128_28886 = torch.constant.int 128
    %31276 = torch.prim.ListConstruct %int4_28883, %31275, %int1_28884, %int4_28885, %int128_28886 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28887 = torch.constant.bool false
    %31277 = torch.aten.expand %31267, %31276, %false_28887 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28888 = torch.constant.int 4
    %int1_28889 = torch.constant.int 1
    %int4_28890 = torch.constant.int 4
    %int128_28891 = torch.constant.int 128
    %31278 = torch.prim.ListConstruct %int4_28888, %31275, %int1_28889, %int4_28890, %int128_28891 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28892 = torch.constant.bool false
    %31279 = torch.aten.expand %31268, %31278, %false_28892 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28893 = torch.constant.int 4
    %int1_28894 = torch.constant.int 1
    %int4_28895 = torch.constant.int 4
    %int128_28896 = torch.constant.int 128
    %31280 = torch.prim.ListConstruct %int4_28893, %31275, %int1_28894, %int4_28895, %int128_28896 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28897 = torch.constant.bool false
    %31281 = torch.aten.expand %31269, %31280, %false_28897 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28898 = torch.constant.int 4
    %int1_28899 = torch.constant.int 1
    %int4_28900 = torch.constant.int 4
    %int128_28901 = torch.constant.int 128
    %31282 = torch.prim.ListConstruct %int4_28898, %31275, %int1_28899, %int4_28900, %int128_28901 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28902 = torch.constant.bool false
    %31283 = torch.aten.expand %31270, %31282, %false_28902 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28903 = torch.constant.int 4
    %int1_28904 = torch.constant.int 1
    %int4_28905 = torch.constant.int 4
    %int128_28906 = torch.constant.int 128
    %31284 = torch.prim.ListConstruct %int4_28903, %31275, %int1_28904, %int4_28905, %int128_28906 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28907 = torch.constant.bool false
    %31285 = torch.aten.expand %31271, %31284, %false_28907 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28908 = torch.constant.int 4
    %int1_28909 = torch.constant.int 1
    %int4_28910 = torch.constant.int 4
    %int128_28911 = torch.constant.int 128
    %31286 = torch.prim.ListConstruct %int4_28908, %31275, %int1_28909, %int4_28910, %int128_28911 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28912 = torch.constant.bool false
    %31287 = torch.aten.expand %31272, %31286, %false_28912 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28913 = torch.constant.int 4
    %int1_28914 = torch.constant.int 1
    %int4_28915 = torch.constant.int 4
    %int128_28916 = torch.constant.int 128
    %31288 = torch.prim.ListConstruct %int4_28913, %31275, %int1_28914, %int4_28915, %int128_28916 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28917 = torch.constant.bool false
    %31289 = torch.aten.expand %31273, %31288, %false_28917 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28918 = torch.constant.int 4
    %int1_28919 = torch.constant.int 1
    %int4_28920 = torch.constant.int 4
    %int128_28921 = torch.constant.int 128
    %31290 = torch.prim.ListConstruct %int4_28918, %31275, %int1_28919, %int4_28920, %int128_28921 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_28922 = torch.constant.bool false
    %31291 = torch.aten.expand %31274, %31290, %false_28922 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %31291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_28923 = torch.constant.int 4
    %int4_28924 = torch.constant.int 4
    %int128_28925 = torch.constant.int 128
    %31292 = torch.prim.ListConstruct %int4_28923, %31275, %int4_28924, %int128_28925 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31293 = torch.aten.view %31277, %31292 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28926 = torch.constant.int 4
    %int4_28927 = torch.constant.int 4
    %int128_28928 = torch.constant.int 128
    %31294 = torch.prim.ListConstruct %int4_28926, %31275, %int4_28927, %int128_28928 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31295 = torch.aten.view %31279, %31294 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28929 = torch.constant.int 4
    %int4_28930 = torch.constant.int 4
    %int128_28931 = torch.constant.int 128
    %31296 = torch.prim.ListConstruct %int4_28929, %31275, %int4_28930, %int128_28931 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31297 = torch.aten.view %31281, %31296 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28932 = torch.constant.int 4
    %int4_28933 = torch.constant.int 4
    %int128_28934 = torch.constant.int 128
    %31298 = torch.prim.ListConstruct %int4_28932, %31275, %int4_28933, %int128_28934 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31299 = torch.aten.view %31283, %31298 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28935 = torch.constant.int 4
    %int4_28936 = torch.constant.int 4
    %int128_28937 = torch.constant.int 128
    %31300 = torch.prim.ListConstruct %int4_28935, %31275, %int4_28936, %int128_28937 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31301 = torch.aten.view %31285, %31300 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28938 = torch.constant.int 4
    %int4_28939 = torch.constant.int 4
    %int128_28940 = torch.constant.int 128
    %31302 = torch.prim.ListConstruct %int4_28938, %31275, %int4_28939, %int128_28940 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31303 = torch.aten.view %31287, %31302 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28941 = torch.constant.int 4
    %int4_28942 = torch.constant.int 4
    %int128_28943 = torch.constant.int 128
    %31304 = torch.prim.ListConstruct %int4_28941, %31275, %int4_28942, %int128_28943 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31305 = torch.aten.view %31289, %31304 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_28944 = torch.constant.int 4
    %int4_28945 = torch.constant.int 4
    %int128_28946 = torch.constant.int 128
    %31306 = torch.prim.ListConstruct %int4_28944, %31275, %int4_28945, %int128_28946 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31307 = torch.aten.view %31291, %31306 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_28947 = torch.constant.int 1
    %int2_28948 = torch.constant.int 2
    %31308 = torch.aten.transpose.int %30683, %int1_28947, %int2_28948 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31308, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28949 = torch.constant.int 1
    %int2_28950 = torch.constant.int 2
    %31309 = torch.aten.transpose.int %30698, %int1_28949, %int2_28950 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31309, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28951 = torch.constant.int 1
    %int2_28952 = torch.constant.int 2
    %31310 = torch.aten.transpose.int %30713, %int1_28951, %int2_28952 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31310, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28953 = torch.constant.int 1
    %int2_28954 = torch.constant.int 2
    %31311 = torch.aten.transpose.int %30728, %int1_28953, %int2_28954 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31311, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28955 = torch.constant.int 1
    %int2_28956 = torch.constant.int 2
    %31312 = torch.aten.transpose.int %30743, %int1_28955, %int2_28956 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31312, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28957 = torch.constant.int 1
    %int2_28958 = torch.constant.int 2
    %31313 = torch.aten.transpose.int %30758, %int1_28957, %int2_28958 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31313, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28959 = torch.constant.int 1
    %int2_28960 = torch.constant.int 2
    %31314 = torch.aten.transpose.int %30773, %int1_28959, %int2_28960 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31314, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28961 = torch.constant.int 1
    %int2_28962 = torch.constant.int 2
    %31315 = torch.aten.transpose.int %30788, %int1_28961, %int2_28962 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31315, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28963 = torch.constant.int 1
    %int2_28964 = torch.constant.int 2
    %31316 = torch.aten.transpose.int %31252, %int1_28963, %int2_28964 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31316, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28965 = torch.constant.int 1
    %int2_28966 = torch.constant.int 2
    %31317 = torch.aten.transpose.int %31254, %int1_28965, %int2_28966 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31317, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28967 = torch.constant.int 1
    %int2_28968 = torch.constant.int 2
    %31318 = torch.aten.transpose.int %31256, %int1_28967, %int2_28968 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31318, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28969 = torch.constant.int 1
    %int2_28970 = torch.constant.int 2
    %31319 = torch.aten.transpose.int %31258, %int1_28969, %int2_28970 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31319, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28971 = torch.constant.int 1
    %int2_28972 = torch.constant.int 2
    %31320 = torch.aten.transpose.int %31260, %int1_28971, %int2_28972 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31320, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28973 = torch.constant.int 1
    %int2_28974 = torch.constant.int 2
    %31321 = torch.aten.transpose.int %31262, %int1_28973, %int2_28974 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31321, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28975 = torch.constant.int 1
    %int2_28976 = torch.constant.int 2
    %31322 = torch.aten.transpose.int %31264, %int1_28975, %int2_28976 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31322, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28977 = torch.constant.int 1
    %int2_28978 = torch.constant.int 2
    %31323 = torch.aten.transpose.int %31266, %int1_28977, %int2_28978 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31323, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28979 = torch.constant.int 1
    %int2_28980 = torch.constant.int 2
    %31324 = torch.aten.transpose.int %31293, %int1_28979, %int2_28980 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31324, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28981 = torch.constant.int 1
    %int2_28982 = torch.constant.int 2
    %31325 = torch.aten.transpose.int %31295, %int1_28981, %int2_28982 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31325, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28983 = torch.constant.int 1
    %int2_28984 = torch.constant.int 2
    %31326 = torch.aten.transpose.int %31297, %int1_28983, %int2_28984 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31326, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28985 = torch.constant.int 1
    %int2_28986 = torch.constant.int 2
    %31327 = torch.aten.transpose.int %31299, %int1_28985, %int2_28986 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31327, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28987 = torch.constant.int 1
    %int2_28988 = torch.constant.int 2
    %31328 = torch.aten.transpose.int %31301, %int1_28987, %int2_28988 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31328, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28989 = torch.constant.int 1
    %int2_28990 = torch.constant.int 2
    %31329 = torch.aten.transpose.int %31303, %int1_28989, %int2_28990 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31329, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28991 = torch.constant.int 1
    %int2_28992 = torch.constant.int 2
    %31330 = torch.aten.transpose.int %31305, %int1_28991, %int2_28992 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31330, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_28993 = torch.constant.int 1
    %int2_28994 = torch.constant.int 2
    %31331 = torch.aten.transpose.int %31307, %int1_28993, %int2_28994 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %31331, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_28995 = torch.constant.float 0.000000e+00
    %true_28996 = torch.constant.bool true
    %none_28997 = torch.constant.none
    %none_28998 = torch.constant.none
    %31332:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31308, %31316, %31324, %float0.000000e00_28995, %true_28996, %none_28997, %none_28998) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31332#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_28999 = torch.constant.float 0.000000e+00
    %true_29000 = torch.constant.bool true
    %none_29001 = torch.constant.none
    %none_29002 = torch.constant.none
    %31333:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31309, %31317, %31325, %float0.000000e00_28999, %true_29000, %none_29001, %none_29002) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31333#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29003 = torch.constant.float 0.000000e+00
    %true_29004 = torch.constant.bool true
    %none_29005 = torch.constant.none
    %none_29006 = torch.constant.none
    %31334:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31310, %31318, %31326, %float0.000000e00_29003, %true_29004, %none_29005, %none_29006) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31334#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29007 = torch.constant.float 0.000000e+00
    %true_29008 = torch.constant.bool true
    %none_29009 = torch.constant.none
    %none_29010 = torch.constant.none
    %31335:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31311, %31319, %31327, %float0.000000e00_29007, %true_29008, %none_29009, %none_29010) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31335#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29011 = torch.constant.float 0.000000e+00
    %true_29012 = torch.constant.bool true
    %none_29013 = torch.constant.none
    %none_29014 = torch.constant.none
    %31336:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31312, %31320, %31328, %float0.000000e00_29011, %true_29012, %none_29013, %none_29014) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31336#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29015 = torch.constant.float 0.000000e+00
    %true_29016 = torch.constant.bool true
    %none_29017 = torch.constant.none
    %none_29018 = torch.constant.none
    %31337:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31313, %31321, %31329, %float0.000000e00_29015, %true_29016, %none_29017, %none_29018) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31337#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29019 = torch.constant.float 0.000000e+00
    %true_29020 = torch.constant.bool true
    %none_29021 = torch.constant.none
    %none_29022 = torch.constant.none
    %31338:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31314, %31322, %31330, %float0.000000e00_29019, %true_29020, %none_29021, %none_29022) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31338#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_29023 = torch.constant.float 0.000000e+00
    %true_29024 = torch.constant.bool true
    %none_29025 = torch.constant.none
    %none_29026 = torch.constant.none
    %31339:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%31315, %31323, %31331, %float0.000000e00_29023, %true_29024, %none_29025, %none_29026) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %31339#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_29027 = torch.constant.int 1
    %int2_29028 = torch.constant.int 2
    %31340 = torch.aten.transpose.int %31332#0, %int1_29027, %int2_29028 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29029 = torch.constant.int 1
    %int2_29030 = torch.constant.int 2
    %31341 = torch.aten.transpose.int %31333#0, %int1_29029, %int2_29030 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29031 = torch.constant.int 1
    %int2_29032 = torch.constant.int 2
    %31342 = torch.aten.transpose.int %31334#0, %int1_29031, %int2_29032 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29033 = torch.constant.int 1
    %int2_29034 = torch.constant.int 2
    %31343 = torch.aten.transpose.int %31335#0, %int1_29033, %int2_29034 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29035 = torch.constant.int 1
    %int2_29036 = torch.constant.int 2
    %31344 = torch.aten.transpose.int %31336#0, %int1_29035, %int2_29036 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29037 = torch.constant.int 1
    %int2_29038 = torch.constant.int 2
    %31345 = torch.aten.transpose.int %31337#0, %int1_29037, %int2_29038 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29039 = torch.constant.int 1
    %int2_29040 = torch.constant.int 2
    %31346 = torch.aten.transpose.int %31338#0, %int1_29039, %int2_29040 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29041 = torch.constant.int 1
    %int2_29042 = torch.constant.int 2
    %31347 = torch.aten.transpose.int %31339#0, %int1_29041, %int2_29042 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %31347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29043 = torch.constant.int 4
    %int512_29044 = torch.constant.int 512
    %31348 = torch.prim.ListConstruct %int4_29043, %30669, %int512_29044 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31349 = torch.aten.view %31340, %31348 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29045 = torch.constant.int 4
    %int512_29046 = torch.constant.int 512
    %31350 = torch.prim.ListConstruct %int4_29045, %30684, %int512_29046 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31351 = torch.aten.view %31341, %31350 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29047 = torch.constant.int 4
    %int512_29048 = torch.constant.int 512
    %31352 = torch.prim.ListConstruct %int4_29047, %30699, %int512_29048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31353 = torch.aten.view %31342, %31352 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29049 = torch.constant.int 4
    %int512_29050 = torch.constant.int 512
    %31354 = torch.prim.ListConstruct %int4_29049, %30714, %int512_29050 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31355 = torch.aten.view %31343, %31354 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29051 = torch.constant.int 4
    %int512_29052 = torch.constant.int 512
    %31356 = torch.prim.ListConstruct %int4_29051, %30729, %int512_29052 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31357 = torch.aten.view %31344, %31356 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29053 = torch.constant.int 4
    %int512_29054 = torch.constant.int 512
    %31358 = torch.prim.ListConstruct %int4_29053, %30744, %int512_29054 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31359 = torch.aten.view %31345, %31358 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29055 = torch.constant.int 4
    %int512_29056 = torch.constant.int 512
    %31360 = torch.prim.ListConstruct %int4_29055, %30759, %int512_29056 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31361 = torch.aten.view %31346, %31360 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29057 = torch.constant.int 4
    %int512_29058 = torch.constant.int 512
    %31362 = torch.prim.ListConstruct %int4_29057, %30774, %int512_29058 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31363 = torch.aten.view %31347, %31362 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %31363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_29059 = torch.constant.int 1
    %int0_29060 = torch.constant.int 0
    %31364 = torch.prim.ListConstruct %int1_29059, %int0_29060 : (!torch.int, !torch.int) -> !torch.list<int>
    %31365 = torch.aten.permute %1120, %31364 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29061 = torch.constant.int 1
    %int0_29062 = torch.constant.int 0
    %31366 = torch.prim.ListConstruct %int1_29061, %int0_29062 : (!torch.int, !torch.int) -> !torch.list<int>
    %31367 = torch.aten.permute %1121, %31366 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29063 = torch.constant.int 1
    %int0_29064 = torch.constant.int 0
    %31368 = torch.prim.ListConstruct %int1_29063, %int0_29064 : (!torch.int, !torch.int) -> !torch.list<int>
    %31369 = torch.aten.permute %1122, %31368 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29065 = torch.constant.int 1
    %int0_29066 = torch.constant.int 0
    %31370 = torch.prim.ListConstruct %int1_29065, %int0_29066 : (!torch.int, !torch.int) -> !torch.list<int>
    %31371 = torch.aten.permute %1123, %31370 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29067 = torch.constant.int 1
    %int0_29068 = torch.constant.int 0
    %31372 = torch.prim.ListConstruct %int1_29067, %int0_29068 : (!torch.int, !torch.int) -> !torch.list<int>
    %31373 = torch.aten.permute %1124, %31372 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29069 = torch.constant.int 1
    %int0_29070 = torch.constant.int 0
    %31374 = torch.prim.ListConstruct %int1_29069, %int0_29070 : (!torch.int, !torch.int) -> !torch.list<int>
    %31375 = torch.aten.permute %1125, %31374 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29071 = torch.constant.int 1
    %int0_29072 = torch.constant.int 0
    %31376 = torch.prim.ListConstruct %int1_29071, %int0_29072 : (!torch.int, !torch.int) -> !torch.list<int>
    %31377 = torch.aten.permute %1126, %31376 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_29073 = torch.constant.int 1
    %int0_29074 = torch.constant.int 0
    %31378 = torch.prim.ListConstruct %int1_29073, %int0_29074 : (!torch.int, !torch.int) -> !torch.list<int>
    %31379 = torch.aten.permute %1127, %31378 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_29075 = torch.constant.int 4
    %31380 = torch.aten.mul.int %int4_29075, %30669 : !torch.int, !torch.int -> !torch.int
    %int512_29076 = torch.constant.int 512
    %31381 = torch.prim.ListConstruct %31380, %int512_29076 : (!torch.int, !torch.int) -> !torch.list<int>
    %31382 = torch.aten.view %31349, %31381 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31382, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31383 = torch.aten.mm %31382, %31365 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31383, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29077 = torch.constant.int 4
    %int4096_29078 = torch.constant.int 4096
    %31384 = torch.prim.ListConstruct %int4_29077, %30669, %int4096_29078 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31385 = torch.aten.view %31383, %31384 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29079 = torch.constant.int 4
    %31386 = torch.aten.mul.int %int4_29079, %30684 : !torch.int, !torch.int -> !torch.int
    %int512_29080 = torch.constant.int 512
    %31387 = torch.prim.ListConstruct %31386, %int512_29080 : (!torch.int, !torch.int) -> !torch.list<int>
    %31388 = torch.aten.view %31351, %31387 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31388, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31389 = torch.aten.mm %31388, %31367 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31389, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29081 = torch.constant.int 4
    %int4096_29082 = torch.constant.int 4096
    %31390 = torch.prim.ListConstruct %int4_29081, %30684, %int4096_29082 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31391 = torch.aten.view %31389, %31390 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29083 = torch.constant.int 4
    %31392 = torch.aten.mul.int %int4_29083, %30699 : !torch.int, !torch.int -> !torch.int
    %int512_29084 = torch.constant.int 512
    %31393 = torch.prim.ListConstruct %31392, %int512_29084 : (!torch.int, !torch.int) -> !torch.list<int>
    %31394 = torch.aten.view %31353, %31393 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31394, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31395 = torch.aten.mm %31394, %31369 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31395, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29085 = torch.constant.int 4
    %int4096_29086 = torch.constant.int 4096
    %31396 = torch.prim.ListConstruct %int4_29085, %30699, %int4096_29086 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31397 = torch.aten.view %31395, %31396 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29087 = torch.constant.int 4
    %31398 = torch.aten.mul.int %int4_29087, %30714 : !torch.int, !torch.int -> !torch.int
    %int512_29088 = torch.constant.int 512
    %31399 = torch.prim.ListConstruct %31398, %int512_29088 : (!torch.int, !torch.int) -> !torch.list<int>
    %31400 = torch.aten.view %31355, %31399 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31400, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31401 = torch.aten.mm %31400, %31371 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31401, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29089 = torch.constant.int 4
    %int4096_29090 = torch.constant.int 4096
    %31402 = torch.prim.ListConstruct %int4_29089, %30714, %int4096_29090 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31403 = torch.aten.view %31401, %31402 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29091 = torch.constant.int 4
    %31404 = torch.aten.mul.int %int4_29091, %30729 : !torch.int, !torch.int -> !torch.int
    %int512_29092 = torch.constant.int 512
    %31405 = torch.prim.ListConstruct %31404, %int512_29092 : (!torch.int, !torch.int) -> !torch.list<int>
    %31406 = torch.aten.view %31357, %31405 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31406, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31407 = torch.aten.mm %31406, %31373 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31407, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29093 = torch.constant.int 4
    %int4096_29094 = torch.constant.int 4096
    %31408 = torch.prim.ListConstruct %int4_29093, %30729, %int4096_29094 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31409 = torch.aten.view %31407, %31408 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29095 = torch.constant.int 4
    %31410 = torch.aten.mul.int %int4_29095, %30744 : !torch.int, !torch.int -> !torch.int
    %int512_29096 = torch.constant.int 512
    %31411 = torch.prim.ListConstruct %31410, %int512_29096 : (!torch.int, !torch.int) -> !torch.list<int>
    %31412 = torch.aten.view %31359, %31411 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31412, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31413 = torch.aten.mm %31412, %31375 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31413, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29097 = torch.constant.int 4
    %int4096_29098 = torch.constant.int 4096
    %31414 = torch.prim.ListConstruct %int4_29097, %30744, %int4096_29098 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31415 = torch.aten.view %31413, %31414 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29099 = torch.constant.int 4
    %31416 = torch.aten.mul.int %int4_29099, %30759 : !torch.int, !torch.int -> !torch.int
    %int512_29100 = torch.constant.int 512
    %31417 = torch.prim.ListConstruct %31416, %int512_29100 : (!torch.int, !torch.int) -> !torch.list<int>
    %31418 = torch.aten.view %31361, %31417 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31418, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31419 = torch.aten.mm %31418, %31377 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31419, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29101 = torch.constant.int 4
    %int4096_29102 = torch.constant.int 4096
    %31420 = torch.prim.ListConstruct %int4_29101, %30759, %int4096_29102 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31421 = torch.aten.view %31419, %31420 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_29103 = torch.constant.int 4
    %31422 = torch.aten.mul.int %int4_29103, %30774 : !torch.int, !torch.int -> !torch.int
    %int512_29104 = torch.constant.int 512
    %31423 = torch.prim.ListConstruct %31422, %int512_29104 : (!torch.int, !torch.int) -> !torch.list<int>
    %31424 = torch.aten.view %31363, %31423 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %31424, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %31425 = torch.aten.mm %31424, %31379 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31425, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29105 = torch.constant.int 4
    %int4096_29106 = torch.constant.int 4096
    %31426 = torch.prim.ListConstruct %int4_29105, %30774, %int4096_29106 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31427 = torch.aten.view %31425, %31426 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31428 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29107 = arith.constant 1 : index
    %dim_29108 = tensor.dim %31428, %c1_29107 : tensor<4x?x4096xf16>
    %31429 = flow.tensor.transfer %31428 : tensor<4x?x4096xf16>{%dim_29108} to #hal.device.promise<@__device_0>
    %31430 = torch_c.from_builtin_tensor %31429 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31431 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29109 = arith.constant 1 : index
    %dim_29110 = tensor.dim %31431, %c1_29109 : tensor<4x?x4096xf16>
    %31432 = flow.tensor.transfer %31431 : tensor<4x?x4096xf16>{%dim_29110} to #hal.device.promise<@__device_0>
    %31433 = torch_c.from_builtin_tensor %31432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31434 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29111 = arith.constant 1 : index
    %dim_29112 = tensor.dim %31434, %c1_29111 : tensor<4x?x4096xf16>
    %31435 = flow.tensor.transfer %31434 : tensor<4x?x4096xf16>{%dim_29112} to #hal.device.promise<@__device_0>
    %31436 = torch_c.from_builtin_tensor %31435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31437 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29113 = arith.constant 1 : index
    %dim_29114 = tensor.dim %31437, %c1_29113 : tensor<4x?x4096xf16>
    %31438 = flow.tensor.transfer %31437 : tensor<4x?x4096xf16>{%dim_29114} to #hal.device.promise<@__device_0>
    %31439 = torch_c.from_builtin_tensor %31438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31440 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29115 = arith.constant 1 : index
    %dim_29116 = tensor.dim %31440, %c1_29115 : tensor<4x?x4096xf16>
    %31441 = flow.tensor.transfer %31440 : tensor<4x?x4096xf16>{%dim_29116} to #hal.device.promise<@__device_0>
    %31442 = torch_c.from_builtin_tensor %31441 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31443 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29117 = arith.constant 1 : index
    %dim_29118 = tensor.dim %31443, %c1_29117 : tensor<4x?x4096xf16>
    %31444 = flow.tensor.transfer %31443 : tensor<4x?x4096xf16>{%dim_29118} to #hal.device.promise<@__device_0>
    %31445 = torch_c.from_builtin_tensor %31444 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31446 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29119 = arith.constant 1 : index
    %dim_29120 = tensor.dim %31446, %c1_29119 : tensor<4x?x4096xf16>
    %31447 = flow.tensor.transfer %31446 : tensor<4x?x4096xf16>{%dim_29120} to #hal.device.promise<@__device_0>
    %31448 = torch_c.from_builtin_tensor %31447 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29121 = torch.constant.int 1
    %31449 = torch.aten.add.Tensor %31385, %31430, %int1_29121 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29122 = torch.constant.int 1
    %31450 = torch.aten.add.Tensor %31449, %31433, %int1_29122 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29123 = torch.constant.int 1
    %31451 = torch.aten.add.Tensor %31450, %31436, %int1_29123 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29124 = torch.constant.int 1
    %31452 = torch.aten.add.Tensor %31451, %31439, %int1_29124 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29125 = torch.constant.int 1
    %31453 = torch.aten.add.Tensor %31452, %31442, %int1_29125 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29126 = torch.constant.int 1
    %31454 = torch.aten.add.Tensor %31453, %31445, %int1_29126 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29127 = torch.constant.int 1
    %31455 = torch.aten.add.Tensor %31454, %31448, %int1_29127 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31456 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29128 = arith.constant 1 : index
    %dim_29129 = tensor.dim %31456, %c1_29128 : tensor<4x?x4096xf16>
    %31457 = flow.tensor.transfer %31456 : tensor<4x?x4096xf16>{%dim_29129} to #hal.device.promise<@__device_1>
    %31458 = torch_c.from_builtin_tensor %31457 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31459 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29130 = arith.constant 1 : index
    %dim_29131 = tensor.dim %31459, %c1_29130 : tensor<4x?x4096xf16>
    %31460 = flow.tensor.transfer %31459 : tensor<4x?x4096xf16>{%dim_29131} to #hal.device.promise<@__device_1>
    %31461 = torch_c.from_builtin_tensor %31460 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31462 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29132 = arith.constant 1 : index
    %dim_29133 = tensor.dim %31462, %c1_29132 : tensor<4x?x4096xf16>
    %31463 = flow.tensor.transfer %31462 : tensor<4x?x4096xf16>{%dim_29133} to #hal.device.promise<@__device_1>
    %31464 = torch_c.from_builtin_tensor %31463 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31465 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29134 = arith.constant 1 : index
    %dim_29135 = tensor.dim %31465, %c1_29134 : tensor<4x?x4096xf16>
    %31466 = flow.tensor.transfer %31465 : tensor<4x?x4096xf16>{%dim_29135} to #hal.device.promise<@__device_1>
    %31467 = torch_c.from_builtin_tensor %31466 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31468 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29136 = arith.constant 1 : index
    %dim_29137 = tensor.dim %31468, %c1_29136 : tensor<4x?x4096xf16>
    %31469 = flow.tensor.transfer %31468 : tensor<4x?x4096xf16>{%dim_29137} to #hal.device.promise<@__device_1>
    %31470 = torch_c.from_builtin_tensor %31469 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31471 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29138 = arith.constant 1 : index
    %dim_29139 = tensor.dim %31471, %c1_29138 : tensor<4x?x4096xf16>
    %31472 = flow.tensor.transfer %31471 : tensor<4x?x4096xf16>{%dim_29139} to #hal.device.promise<@__device_1>
    %31473 = torch_c.from_builtin_tensor %31472 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31474 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29140 = arith.constant 1 : index
    %dim_29141 = tensor.dim %31474, %c1_29140 : tensor<4x?x4096xf16>
    %31475 = flow.tensor.transfer %31474 : tensor<4x?x4096xf16>{%dim_29141} to #hal.device.promise<@__device_1>
    %31476 = torch_c.from_builtin_tensor %31475 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29142 = torch.constant.int 1
    %31477 = torch.aten.add.Tensor %31458, %31391, %int1_29142 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29143 = torch.constant.int 1
    %31478 = torch.aten.add.Tensor %31477, %31461, %int1_29143 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29144 = torch.constant.int 1
    %31479 = torch.aten.add.Tensor %31478, %31464, %int1_29144 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29145 = torch.constant.int 1
    %31480 = torch.aten.add.Tensor %31479, %31467, %int1_29145 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29146 = torch.constant.int 1
    %31481 = torch.aten.add.Tensor %31480, %31470, %int1_29146 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29147 = torch.constant.int 1
    %31482 = torch.aten.add.Tensor %31481, %31473, %int1_29147 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29148 = torch.constant.int 1
    %31483 = torch.aten.add.Tensor %31482, %31476, %int1_29148 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31484 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29149 = arith.constant 1 : index
    %dim_29150 = tensor.dim %31484, %c1_29149 : tensor<4x?x4096xf16>
    %31485 = flow.tensor.transfer %31484 : tensor<4x?x4096xf16>{%dim_29150} to #hal.device.promise<@__device_2>
    %31486 = torch_c.from_builtin_tensor %31485 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31487 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29151 = arith.constant 1 : index
    %dim_29152 = tensor.dim %31487, %c1_29151 : tensor<4x?x4096xf16>
    %31488 = flow.tensor.transfer %31487 : tensor<4x?x4096xf16>{%dim_29152} to #hal.device.promise<@__device_2>
    %31489 = torch_c.from_builtin_tensor %31488 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31490 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29153 = arith.constant 1 : index
    %dim_29154 = tensor.dim %31490, %c1_29153 : tensor<4x?x4096xf16>
    %31491 = flow.tensor.transfer %31490 : tensor<4x?x4096xf16>{%dim_29154} to #hal.device.promise<@__device_2>
    %31492 = torch_c.from_builtin_tensor %31491 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31493 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29155 = arith.constant 1 : index
    %dim_29156 = tensor.dim %31493, %c1_29155 : tensor<4x?x4096xf16>
    %31494 = flow.tensor.transfer %31493 : tensor<4x?x4096xf16>{%dim_29156} to #hal.device.promise<@__device_2>
    %31495 = torch_c.from_builtin_tensor %31494 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31496 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29157 = arith.constant 1 : index
    %dim_29158 = tensor.dim %31496, %c1_29157 : tensor<4x?x4096xf16>
    %31497 = flow.tensor.transfer %31496 : tensor<4x?x4096xf16>{%dim_29158} to #hal.device.promise<@__device_2>
    %31498 = torch_c.from_builtin_tensor %31497 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31499 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29159 = arith.constant 1 : index
    %dim_29160 = tensor.dim %31499, %c1_29159 : tensor<4x?x4096xf16>
    %31500 = flow.tensor.transfer %31499 : tensor<4x?x4096xf16>{%dim_29160} to #hal.device.promise<@__device_2>
    %31501 = torch_c.from_builtin_tensor %31500 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31502 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29161 = arith.constant 1 : index
    %dim_29162 = tensor.dim %31502, %c1_29161 : tensor<4x?x4096xf16>
    %31503 = flow.tensor.transfer %31502 : tensor<4x?x4096xf16>{%dim_29162} to #hal.device.promise<@__device_2>
    %31504 = torch_c.from_builtin_tensor %31503 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29163 = torch.constant.int 1
    %31505 = torch.aten.add.Tensor %31486, %31489, %int1_29163 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29164 = torch.constant.int 1
    %31506 = torch.aten.add.Tensor %31505, %31397, %int1_29164 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29165 = torch.constant.int 1
    %31507 = torch.aten.add.Tensor %31506, %31492, %int1_29165 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29166 = torch.constant.int 1
    %31508 = torch.aten.add.Tensor %31507, %31495, %int1_29166 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29167 = torch.constant.int 1
    %31509 = torch.aten.add.Tensor %31508, %31498, %int1_29167 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29168 = torch.constant.int 1
    %31510 = torch.aten.add.Tensor %31509, %31501, %int1_29168 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29169 = torch.constant.int 1
    %31511 = torch.aten.add.Tensor %31510, %31504, %int1_29169 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31512 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29170 = arith.constant 1 : index
    %dim_29171 = tensor.dim %31512, %c1_29170 : tensor<4x?x4096xf16>
    %31513 = flow.tensor.transfer %31512 : tensor<4x?x4096xf16>{%dim_29171} to #hal.device.promise<@__device_3>
    %31514 = torch_c.from_builtin_tensor %31513 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31515 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29172 = arith.constant 1 : index
    %dim_29173 = tensor.dim %31515, %c1_29172 : tensor<4x?x4096xf16>
    %31516 = flow.tensor.transfer %31515 : tensor<4x?x4096xf16>{%dim_29173} to #hal.device.promise<@__device_3>
    %31517 = torch_c.from_builtin_tensor %31516 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31518 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29174 = arith.constant 1 : index
    %dim_29175 = tensor.dim %31518, %c1_29174 : tensor<4x?x4096xf16>
    %31519 = flow.tensor.transfer %31518 : tensor<4x?x4096xf16>{%dim_29175} to #hal.device.promise<@__device_3>
    %31520 = torch_c.from_builtin_tensor %31519 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31521 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29176 = arith.constant 1 : index
    %dim_29177 = tensor.dim %31521, %c1_29176 : tensor<4x?x4096xf16>
    %31522 = flow.tensor.transfer %31521 : tensor<4x?x4096xf16>{%dim_29177} to #hal.device.promise<@__device_3>
    %31523 = torch_c.from_builtin_tensor %31522 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31524 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29178 = arith.constant 1 : index
    %dim_29179 = tensor.dim %31524, %c1_29178 : tensor<4x?x4096xf16>
    %31525 = flow.tensor.transfer %31524 : tensor<4x?x4096xf16>{%dim_29179} to #hal.device.promise<@__device_3>
    %31526 = torch_c.from_builtin_tensor %31525 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31527 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29180 = arith.constant 1 : index
    %dim_29181 = tensor.dim %31527, %c1_29180 : tensor<4x?x4096xf16>
    %31528 = flow.tensor.transfer %31527 : tensor<4x?x4096xf16>{%dim_29181} to #hal.device.promise<@__device_3>
    %31529 = torch_c.from_builtin_tensor %31528 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31530 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29182 = arith.constant 1 : index
    %dim_29183 = tensor.dim %31530, %c1_29182 : tensor<4x?x4096xf16>
    %31531 = flow.tensor.transfer %31530 : tensor<4x?x4096xf16>{%dim_29183} to #hal.device.promise<@__device_3>
    %31532 = torch_c.from_builtin_tensor %31531 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29184 = torch.constant.int 1
    %31533 = torch.aten.add.Tensor %31514, %31517, %int1_29184 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29185 = torch.constant.int 1
    %31534 = torch.aten.add.Tensor %31533, %31520, %int1_29185 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29186 = torch.constant.int 1
    %31535 = torch.aten.add.Tensor %31534, %31403, %int1_29186 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29187 = torch.constant.int 1
    %31536 = torch.aten.add.Tensor %31535, %31523, %int1_29187 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29188 = torch.constant.int 1
    %31537 = torch.aten.add.Tensor %31536, %31526, %int1_29188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29189 = torch.constant.int 1
    %31538 = torch.aten.add.Tensor %31537, %31529, %int1_29189 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29190 = torch.constant.int 1
    %31539 = torch.aten.add.Tensor %31538, %31532, %int1_29190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31540 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29191 = arith.constant 1 : index
    %dim_29192 = tensor.dim %31540, %c1_29191 : tensor<4x?x4096xf16>
    %31541 = flow.tensor.transfer %31540 : tensor<4x?x4096xf16>{%dim_29192} to #hal.device.promise<@__device_4>
    %31542 = torch_c.from_builtin_tensor %31541 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31543 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29193 = arith.constant 1 : index
    %dim_29194 = tensor.dim %31543, %c1_29193 : tensor<4x?x4096xf16>
    %31544 = flow.tensor.transfer %31543 : tensor<4x?x4096xf16>{%dim_29194} to #hal.device.promise<@__device_4>
    %31545 = torch_c.from_builtin_tensor %31544 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31546 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29195 = arith.constant 1 : index
    %dim_29196 = tensor.dim %31546, %c1_29195 : tensor<4x?x4096xf16>
    %31547 = flow.tensor.transfer %31546 : tensor<4x?x4096xf16>{%dim_29196} to #hal.device.promise<@__device_4>
    %31548 = torch_c.from_builtin_tensor %31547 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31549 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29197 = arith.constant 1 : index
    %dim_29198 = tensor.dim %31549, %c1_29197 : tensor<4x?x4096xf16>
    %31550 = flow.tensor.transfer %31549 : tensor<4x?x4096xf16>{%dim_29198} to #hal.device.promise<@__device_4>
    %31551 = torch_c.from_builtin_tensor %31550 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31552 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29199 = arith.constant 1 : index
    %dim_29200 = tensor.dim %31552, %c1_29199 : tensor<4x?x4096xf16>
    %31553 = flow.tensor.transfer %31552 : tensor<4x?x4096xf16>{%dim_29200} to #hal.device.promise<@__device_4>
    %31554 = torch_c.from_builtin_tensor %31553 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31555 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29201 = arith.constant 1 : index
    %dim_29202 = tensor.dim %31555, %c1_29201 : tensor<4x?x4096xf16>
    %31556 = flow.tensor.transfer %31555 : tensor<4x?x4096xf16>{%dim_29202} to #hal.device.promise<@__device_4>
    %31557 = torch_c.from_builtin_tensor %31556 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31558 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29203 = arith.constant 1 : index
    %dim_29204 = tensor.dim %31558, %c1_29203 : tensor<4x?x4096xf16>
    %31559 = flow.tensor.transfer %31558 : tensor<4x?x4096xf16>{%dim_29204} to #hal.device.promise<@__device_4>
    %31560 = torch_c.from_builtin_tensor %31559 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29205 = torch.constant.int 1
    %31561 = torch.aten.add.Tensor %31542, %31545, %int1_29205 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29206 = torch.constant.int 1
    %31562 = torch.aten.add.Tensor %31561, %31548, %int1_29206 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29207 = torch.constant.int 1
    %31563 = torch.aten.add.Tensor %31562, %31551, %int1_29207 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29208 = torch.constant.int 1
    %31564 = torch.aten.add.Tensor %31563, %31409, %int1_29208 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29209 = torch.constant.int 1
    %31565 = torch.aten.add.Tensor %31564, %31554, %int1_29209 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29210 = torch.constant.int 1
    %31566 = torch.aten.add.Tensor %31565, %31557, %int1_29210 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29211 = torch.constant.int 1
    %31567 = torch.aten.add.Tensor %31566, %31560, %int1_29211 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31568 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29212 = arith.constant 1 : index
    %dim_29213 = tensor.dim %31568, %c1_29212 : tensor<4x?x4096xf16>
    %31569 = flow.tensor.transfer %31568 : tensor<4x?x4096xf16>{%dim_29213} to #hal.device.promise<@__device_5>
    %31570 = torch_c.from_builtin_tensor %31569 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31571 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29214 = arith.constant 1 : index
    %dim_29215 = tensor.dim %31571, %c1_29214 : tensor<4x?x4096xf16>
    %31572 = flow.tensor.transfer %31571 : tensor<4x?x4096xf16>{%dim_29215} to #hal.device.promise<@__device_5>
    %31573 = torch_c.from_builtin_tensor %31572 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31574 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29216 = arith.constant 1 : index
    %dim_29217 = tensor.dim %31574, %c1_29216 : tensor<4x?x4096xf16>
    %31575 = flow.tensor.transfer %31574 : tensor<4x?x4096xf16>{%dim_29217} to #hal.device.promise<@__device_5>
    %31576 = torch_c.from_builtin_tensor %31575 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31577 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29218 = arith.constant 1 : index
    %dim_29219 = tensor.dim %31577, %c1_29218 : tensor<4x?x4096xf16>
    %31578 = flow.tensor.transfer %31577 : tensor<4x?x4096xf16>{%dim_29219} to #hal.device.promise<@__device_5>
    %31579 = torch_c.from_builtin_tensor %31578 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31580 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29220 = arith.constant 1 : index
    %dim_29221 = tensor.dim %31580, %c1_29220 : tensor<4x?x4096xf16>
    %31581 = flow.tensor.transfer %31580 : tensor<4x?x4096xf16>{%dim_29221} to #hal.device.promise<@__device_5>
    %31582 = torch_c.from_builtin_tensor %31581 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31583 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29222 = arith.constant 1 : index
    %dim_29223 = tensor.dim %31583, %c1_29222 : tensor<4x?x4096xf16>
    %31584 = flow.tensor.transfer %31583 : tensor<4x?x4096xf16>{%dim_29223} to #hal.device.promise<@__device_5>
    %31585 = torch_c.from_builtin_tensor %31584 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31586 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29224 = arith.constant 1 : index
    %dim_29225 = tensor.dim %31586, %c1_29224 : tensor<4x?x4096xf16>
    %31587 = flow.tensor.transfer %31586 : tensor<4x?x4096xf16>{%dim_29225} to #hal.device.promise<@__device_5>
    %31588 = torch_c.from_builtin_tensor %31587 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29226 = torch.constant.int 1
    %31589 = torch.aten.add.Tensor %31570, %31573, %int1_29226 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29227 = torch.constant.int 1
    %31590 = torch.aten.add.Tensor %31589, %31576, %int1_29227 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29228 = torch.constant.int 1
    %31591 = torch.aten.add.Tensor %31590, %31579, %int1_29228 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29229 = torch.constant.int 1
    %31592 = torch.aten.add.Tensor %31591, %31582, %int1_29229 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29230 = torch.constant.int 1
    %31593 = torch.aten.add.Tensor %31592, %31415, %int1_29230 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29231 = torch.constant.int 1
    %31594 = torch.aten.add.Tensor %31593, %31585, %int1_29231 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29232 = torch.constant.int 1
    %31595 = torch.aten.add.Tensor %31594, %31588, %int1_29232 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31596 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29233 = arith.constant 1 : index
    %dim_29234 = tensor.dim %31596, %c1_29233 : tensor<4x?x4096xf16>
    %31597 = flow.tensor.transfer %31596 : tensor<4x?x4096xf16>{%dim_29234} to #hal.device.promise<@__device_6>
    %31598 = torch_c.from_builtin_tensor %31597 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31599 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29235 = arith.constant 1 : index
    %dim_29236 = tensor.dim %31599, %c1_29235 : tensor<4x?x4096xf16>
    %31600 = flow.tensor.transfer %31599 : tensor<4x?x4096xf16>{%dim_29236} to #hal.device.promise<@__device_6>
    %31601 = torch_c.from_builtin_tensor %31600 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31602 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29237 = arith.constant 1 : index
    %dim_29238 = tensor.dim %31602, %c1_29237 : tensor<4x?x4096xf16>
    %31603 = flow.tensor.transfer %31602 : tensor<4x?x4096xf16>{%dim_29238} to #hal.device.promise<@__device_6>
    %31604 = torch_c.from_builtin_tensor %31603 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31605 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29239 = arith.constant 1 : index
    %dim_29240 = tensor.dim %31605, %c1_29239 : tensor<4x?x4096xf16>
    %31606 = flow.tensor.transfer %31605 : tensor<4x?x4096xf16>{%dim_29240} to #hal.device.promise<@__device_6>
    %31607 = torch_c.from_builtin_tensor %31606 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31608 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29241 = arith.constant 1 : index
    %dim_29242 = tensor.dim %31608, %c1_29241 : tensor<4x?x4096xf16>
    %31609 = flow.tensor.transfer %31608 : tensor<4x?x4096xf16>{%dim_29242} to #hal.device.promise<@__device_6>
    %31610 = torch_c.from_builtin_tensor %31609 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31611 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29243 = arith.constant 1 : index
    %dim_29244 = tensor.dim %31611, %c1_29243 : tensor<4x?x4096xf16>
    %31612 = flow.tensor.transfer %31611 : tensor<4x?x4096xf16>{%dim_29244} to #hal.device.promise<@__device_6>
    %31613 = torch_c.from_builtin_tensor %31612 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31614 = torch_c.to_builtin_tensor %31427 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29245 = arith.constant 1 : index
    %dim_29246 = tensor.dim %31614, %c1_29245 : tensor<4x?x4096xf16>
    %31615 = flow.tensor.transfer %31614 : tensor<4x?x4096xf16>{%dim_29246} to #hal.device.promise<@__device_6>
    %31616 = torch_c.from_builtin_tensor %31615 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29247 = torch.constant.int 1
    %31617 = torch.aten.add.Tensor %31598, %31601, %int1_29247 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29248 = torch.constant.int 1
    %31618 = torch.aten.add.Tensor %31617, %31604, %int1_29248 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29249 = torch.constant.int 1
    %31619 = torch.aten.add.Tensor %31618, %31607, %int1_29249 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29250 = torch.constant.int 1
    %31620 = torch.aten.add.Tensor %31619, %31610, %int1_29250 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29251 = torch.constant.int 1
    %31621 = torch.aten.add.Tensor %31620, %31613, %int1_29251 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29252 = torch.constant.int 1
    %31622 = torch.aten.add.Tensor %31621, %31421, %int1_29252 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29253 = torch.constant.int 1
    %31623 = torch.aten.add.Tensor %31622, %31616, %int1_29253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31624 = torch_c.to_builtin_tensor %31385 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29254 = arith.constant 1 : index
    %dim_29255 = tensor.dim %31624, %c1_29254 : tensor<4x?x4096xf16>
    %31625 = flow.tensor.transfer %31624 : tensor<4x?x4096xf16>{%dim_29255} to #hal.device.promise<@__device_7>
    %31626 = torch_c.from_builtin_tensor %31625 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31627 = torch_c.to_builtin_tensor %31391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29256 = arith.constant 1 : index
    %dim_29257 = tensor.dim %31627, %c1_29256 : tensor<4x?x4096xf16>
    %31628 = flow.tensor.transfer %31627 : tensor<4x?x4096xf16>{%dim_29257} to #hal.device.promise<@__device_7>
    %31629 = torch_c.from_builtin_tensor %31628 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31630 = torch_c.to_builtin_tensor %31397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29258 = arith.constant 1 : index
    %dim_29259 = tensor.dim %31630, %c1_29258 : tensor<4x?x4096xf16>
    %31631 = flow.tensor.transfer %31630 : tensor<4x?x4096xf16>{%dim_29259} to #hal.device.promise<@__device_7>
    %31632 = torch_c.from_builtin_tensor %31631 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31633 = torch_c.to_builtin_tensor %31403 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29260 = arith.constant 1 : index
    %dim_29261 = tensor.dim %31633, %c1_29260 : tensor<4x?x4096xf16>
    %31634 = flow.tensor.transfer %31633 : tensor<4x?x4096xf16>{%dim_29261} to #hal.device.promise<@__device_7>
    %31635 = torch_c.from_builtin_tensor %31634 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31636 = torch_c.to_builtin_tensor %31409 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29262 = arith.constant 1 : index
    %dim_29263 = tensor.dim %31636, %c1_29262 : tensor<4x?x4096xf16>
    %31637 = flow.tensor.transfer %31636 : tensor<4x?x4096xf16>{%dim_29263} to #hal.device.promise<@__device_7>
    %31638 = torch_c.from_builtin_tensor %31637 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31639 = torch_c.to_builtin_tensor %31415 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29264 = arith.constant 1 : index
    %dim_29265 = tensor.dim %31639, %c1_29264 : tensor<4x?x4096xf16>
    %31640 = flow.tensor.transfer %31639 : tensor<4x?x4096xf16>{%dim_29265} to #hal.device.promise<@__device_7>
    %31641 = torch_c.from_builtin_tensor %31640 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31642 = torch_c.to_builtin_tensor %31421 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29266 = arith.constant 1 : index
    %dim_29267 = tensor.dim %31642, %c1_29266 : tensor<4x?x4096xf16>
    %31643 = flow.tensor.transfer %31642 : tensor<4x?x4096xf16>{%dim_29267} to #hal.device.promise<@__device_7>
    %31644 = torch_c.from_builtin_tensor %31643 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29268 = torch.constant.int 1
    %31645 = torch.aten.add.Tensor %31626, %31629, %int1_29268 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29269 = torch.constant.int 1
    %31646 = torch.aten.add.Tensor %31645, %31632, %int1_29269 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29270 = torch.constant.int 1
    %31647 = torch.aten.add.Tensor %31646, %31635, %int1_29270 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29271 = torch.constant.int 1
    %31648 = torch.aten.add.Tensor %31647, %31638, %int1_29271 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29272 = torch.constant.int 1
    %31649 = torch.aten.add.Tensor %31648, %31641, %int1_29272 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29273 = torch.constant.int 1
    %31650 = torch.aten.add.Tensor %31649, %31644, %int1_29273 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29274 = torch.constant.int 1
    %31651 = torch.aten.add.Tensor %31650, %31427, %int1_29274 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29275 = torch.constant.int 1
    %31652 = torch.aten.add.Tensor %30311, %31455, %int1_29275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29276 = torch.constant.int 1
    %31653 = torch.aten.add.Tensor %30312, %31483, %int1_29276 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29277 = torch.constant.int 1
    %31654 = torch.aten.add.Tensor %30313, %31511, %int1_29277 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29278 = torch.constant.int 1
    %31655 = torch.aten.add.Tensor %30314, %31539, %int1_29278 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29279 = torch.constant.int 1
    %31656 = torch.aten.add.Tensor %30315, %31567, %int1_29279 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29280 = torch.constant.int 1
    %31657 = torch.aten.add.Tensor %30316, %31595, %int1_29280 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29281 = torch.constant.int 1
    %31658 = torch.aten.add.Tensor %30317, %31623, %int1_29281 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29282 = torch.constant.int 1
    %31659 = torch.aten.add.Tensor %30318, %31651, %int1_29282 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_29283 = torch.constant.int 6
    %31660 = torch.prims.convert_element_type %31652, %int6_29283 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29284 = torch.constant.int 6
    %31661 = torch.prims.convert_element_type %31653, %int6_29284 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29285 = torch.constant.int 6
    %31662 = torch.prims.convert_element_type %31654, %int6_29285 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29286 = torch.constant.int 6
    %31663 = torch.prims.convert_element_type %31655, %int6_29286 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29287 = torch.constant.int 6
    %31664 = torch.prims.convert_element_type %31656, %int6_29287 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29288 = torch.constant.int 6
    %31665 = torch.prims.convert_element_type %31657, %int6_29288 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29289 = torch.constant.int 6
    %31666 = torch.prims.convert_element_type %31658, %int6_29289 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29290 = torch.constant.int 6
    %31667 = torch.prims.convert_element_type %31659, %int6_29290 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29291 = torch.constant.int 2
    %31668 = torch.aten.pow.Tensor_Scalar %31660, %int2_29291 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29292 = torch.constant.int 2
    %31669 = torch.aten.pow.Tensor_Scalar %31661, %int2_29292 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29293 = torch.constant.int 2
    %31670 = torch.aten.pow.Tensor_Scalar %31662, %int2_29293 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29294 = torch.constant.int 2
    %31671 = torch.aten.pow.Tensor_Scalar %31663, %int2_29294 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29295 = torch.constant.int 2
    %31672 = torch.aten.pow.Tensor_Scalar %31664, %int2_29295 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29296 = torch.constant.int 2
    %31673 = torch.aten.pow.Tensor_Scalar %31665, %int2_29296 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29297 = torch.constant.int 2
    %31674 = torch.aten.pow.Tensor_Scalar %31666, %int2_29297 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29298 = torch.constant.int 2
    %31675 = torch.aten.pow.Tensor_Scalar %31667, %int2_29298 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_29299 = torch.constant.int -1
    %31676 = torch.prim.ListConstruct %int-1_29299 : (!torch.int) -> !torch.list<int>
    %true_29300 = torch.constant.bool true
    %none_29301 = torch.constant.none
    %31677 = torch.aten.mean.dim %31668, %31676, %true_29300, %none_29301 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29302 = torch.constant.int -1
    %31678 = torch.prim.ListConstruct %int-1_29302 : (!torch.int) -> !torch.list<int>
    %true_29303 = torch.constant.bool true
    %none_29304 = torch.constant.none
    %31679 = torch.aten.mean.dim %31669, %31678, %true_29303, %none_29304 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29305 = torch.constant.int -1
    %31680 = torch.prim.ListConstruct %int-1_29305 : (!torch.int) -> !torch.list<int>
    %true_29306 = torch.constant.bool true
    %none_29307 = torch.constant.none
    %31681 = torch.aten.mean.dim %31670, %31680, %true_29306, %none_29307 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29308 = torch.constant.int -1
    %31682 = torch.prim.ListConstruct %int-1_29308 : (!torch.int) -> !torch.list<int>
    %true_29309 = torch.constant.bool true
    %none_29310 = torch.constant.none
    %31683 = torch.aten.mean.dim %31671, %31682, %true_29309, %none_29310 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29311 = torch.constant.int -1
    %31684 = torch.prim.ListConstruct %int-1_29311 : (!torch.int) -> !torch.list<int>
    %true_29312 = torch.constant.bool true
    %none_29313 = torch.constant.none
    %31685 = torch.aten.mean.dim %31672, %31684, %true_29312, %none_29313 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29314 = torch.constant.int -1
    %31686 = torch.prim.ListConstruct %int-1_29314 : (!torch.int) -> !torch.list<int>
    %true_29315 = torch.constant.bool true
    %none_29316 = torch.constant.none
    %31687 = torch.aten.mean.dim %31673, %31686, %true_29315, %none_29316 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29317 = torch.constant.int -1
    %31688 = torch.prim.ListConstruct %int-1_29317 : (!torch.int) -> !torch.list<int>
    %true_29318 = torch.constant.bool true
    %none_29319 = torch.constant.none
    %31689 = torch.aten.mean.dim %31674, %31688, %true_29318, %none_29319 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29320 = torch.constant.int -1
    %31690 = torch.prim.ListConstruct %int-1_29320 : (!torch.int) -> !torch.list<int>
    %true_29321 = torch.constant.bool true
    %none_29322 = torch.constant.none
    %31691 = torch.aten.mean.dim %31675, %31690, %true_29321, %none_29322 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29323 = torch.constant.float 9.9999997473787516E-6
    %int1_29324 = torch.constant.int 1
    %31692 = torch.aten.add.Scalar %31677, %float9.999990e-06_29323, %int1_29324 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29325 = torch.constant.float 9.9999997473787516E-6
    %int1_29326 = torch.constant.int 1
    %31693 = torch.aten.add.Scalar %31679, %float9.999990e-06_29325, %int1_29326 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29327 = torch.constant.float 9.9999997473787516E-6
    %int1_29328 = torch.constant.int 1
    %31694 = torch.aten.add.Scalar %31681, %float9.999990e-06_29327, %int1_29328 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29329 = torch.constant.float 9.9999997473787516E-6
    %int1_29330 = torch.constant.int 1
    %31695 = torch.aten.add.Scalar %31683, %float9.999990e-06_29329, %int1_29330 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29331 = torch.constant.float 9.9999997473787516E-6
    %int1_29332 = torch.constant.int 1
    %31696 = torch.aten.add.Scalar %31685, %float9.999990e-06_29331, %int1_29332 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29333 = torch.constant.float 9.9999997473787516E-6
    %int1_29334 = torch.constant.int 1
    %31697 = torch.aten.add.Scalar %31687, %float9.999990e-06_29333, %int1_29334 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29335 = torch.constant.float 9.9999997473787516E-6
    %int1_29336 = torch.constant.int 1
    %31698 = torch.aten.add.Scalar %31689, %float9.999990e-06_29335, %int1_29336 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29337 = torch.constant.float 9.9999997473787516E-6
    %int1_29338 = torch.constant.int 1
    %31699 = torch.aten.add.Scalar %31691, %float9.999990e-06_29337, %int1_29338 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31700 = torch.aten.rsqrt %31692 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31701 = torch.aten.rsqrt %31693 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31702 = torch.aten.rsqrt %31694 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31703 = torch.aten.rsqrt %31695 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31704 = torch.aten.rsqrt %31696 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31705 = torch.aten.rsqrt %31697 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31706 = torch.aten.rsqrt %31698 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31707 = torch.aten.rsqrt %31699 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %31707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %31708 = torch.aten.mul.Tensor %31660, %31700 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31709 = torch.aten.mul.Tensor %31661, %31701 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31710 = torch.aten.mul.Tensor %31662, %31702 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31711 = torch.aten.mul.Tensor %31663, %31703 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31712 = torch.aten.mul.Tensor %31664, %31704 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31713 = torch.aten.mul.Tensor %31665, %31705 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31714 = torch.aten.mul.Tensor %31666, %31706 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31715 = torch.aten.mul.Tensor %31667, %31707 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31716 = torch.aten.mul.Tensor %1128, %31708 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31717 = torch.aten.mul.Tensor %1129, %31709 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31718 = torch.aten.mul.Tensor %1130, %31710 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31719 = torch.aten.mul.Tensor %1131, %31711 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31720 = torch.aten.mul.Tensor %1132, %31712 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31721 = torch.aten.mul.Tensor %1133, %31713 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31722 = torch.aten.mul.Tensor %1134, %31714 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %31723 = torch.aten.mul.Tensor %1135, %31715 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %31723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_29339 = torch.constant.int 5
    %31724 = torch.prims.convert_element_type %31716, %int5_29339 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29340 = torch.constant.int 5
    %31725 = torch.prims.convert_element_type %31717, %int5_29340 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29341 = torch.constant.int 5
    %31726 = torch.prims.convert_element_type %31718, %int5_29341 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29342 = torch.constant.int 5
    %31727 = torch.prims.convert_element_type %31719, %int5_29342 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29343 = torch.constant.int 5
    %31728 = torch.prims.convert_element_type %31720, %int5_29343 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29344 = torch.constant.int 5
    %31729 = torch.prims.convert_element_type %31721, %int5_29344 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29345 = torch.constant.int 5
    %31730 = torch.prims.convert_element_type %31722, %int5_29345 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29346 = torch.constant.int 5
    %31731 = torch.prims.convert_element_type %31723, %int5_29346 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29347 = torch.constant.int 1
    %int0_29348 = torch.constant.int 0
    %31732 = torch.prim.ListConstruct %int1_29347, %int0_29348 : (!torch.int, !torch.int) -> !torch.list<int>
    %31733 = torch.aten.permute %1136, %31732 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29349 = torch.constant.int 1
    %int0_29350 = torch.constant.int 0
    %31734 = torch.prim.ListConstruct %int1_29349, %int0_29350 : (!torch.int, !torch.int) -> !torch.list<int>
    %31735 = torch.aten.permute %1137, %31734 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29351 = torch.constant.int 1
    %int0_29352 = torch.constant.int 0
    %31736 = torch.prim.ListConstruct %int1_29351, %int0_29352 : (!torch.int, !torch.int) -> !torch.list<int>
    %31737 = torch.aten.permute %1138, %31736 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29353 = torch.constant.int 1
    %int0_29354 = torch.constant.int 0
    %31738 = torch.prim.ListConstruct %int1_29353, %int0_29354 : (!torch.int, !torch.int) -> !torch.list<int>
    %31739 = torch.aten.permute %1139, %31738 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29355 = torch.constant.int 1
    %int0_29356 = torch.constant.int 0
    %31740 = torch.prim.ListConstruct %int1_29355, %int0_29356 : (!torch.int, !torch.int) -> !torch.list<int>
    %31741 = torch.aten.permute %1140, %31740 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29357 = torch.constant.int 1
    %int0_29358 = torch.constant.int 0
    %31742 = torch.prim.ListConstruct %int1_29357, %int0_29358 : (!torch.int, !torch.int) -> !torch.list<int>
    %31743 = torch.aten.permute %1141, %31742 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29359 = torch.constant.int 1
    %int0_29360 = torch.constant.int 0
    %31744 = torch.prim.ListConstruct %int1_29359, %int0_29360 : (!torch.int, !torch.int) -> !torch.list<int>
    %31745 = torch.aten.permute %1142, %31744 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29361 = torch.constant.int 1
    %int0_29362 = torch.constant.int 0
    %31746 = torch.prim.ListConstruct %int1_29361, %int0_29362 : (!torch.int, !torch.int) -> !torch.list<int>
    %31747 = torch.aten.permute %1143, %31746 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_29363 = torch.constant.int 4
    %31748 = torch.aten.mul.int %int4_29363, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29364 = torch.constant.int 4096
    %31749 = torch.prim.ListConstruct %31748, %int4096_29364 : (!torch.int, !torch.int) -> !torch.list<int>
    %31750 = torch.aten.view %31724, %31749 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31750, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31751 = torch.aten.mm %31750, %31733 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31751, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29365 = torch.constant.int 4
    %int1792_29366 = torch.constant.int 1792
    %31752 = torch.prim.ListConstruct %int4_29365, %2482, %int1792_29366 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31753 = torch.aten.view %31751, %31752 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29367 = torch.constant.int 4
    %31754 = torch.aten.mul.int %int4_29367, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29368 = torch.constant.int 4096
    %31755 = torch.prim.ListConstruct %31754, %int4096_29368 : (!torch.int, !torch.int) -> !torch.list<int>
    %31756 = torch.aten.view %31725, %31755 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31756, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31757 = torch.aten.mm %31756, %31735 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31757, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29369 = torch.constant.int 4
    %int1792_29370 = torch.constant.int 1792
    %31758 = torch.prim.ListConstruct %int4_29369, %2482, %int1792_29370 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31759 = torch.aten.view %31757, %31758 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29371 = torch.constant.int 4
    %31760 = torch.aten.mul.int %int4_29371, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29372 = torch.constant.int 4096
    %31761 = torch.prim.ListConstruct %31760, %int4096_29372 : (!torch.int, !torch.int) -> !torch.list<int>
    %31762 = torch.aten.view %31726, %31761 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31762, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31763 = torch.aten.mm %31762, %31737 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31763, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29373 = torch.constant.int 4
    %int1792_29374 = torch.constant.int 1792
    %31764 = torch.prim.ListConstruct %int4_29373, %2482, %int1792_29374 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31765 = torch.aten.view %31763, %31764 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29375 = torch.constant.int 4
    %31766 = torch.aten.mul.int %int4_29375, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29376 = torch.constant.int 4096
    %31767 = torch.prim.ListConstruct %31766, %int4096_29376 : (!torch.int, !torch.int) -> !torch.list<int>
    %31768 = torch.aten.view %31727, %31767 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31768, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31769 = torch.aten.mm %31768, %31739 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31769, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29377 = torch.constant.int 4
    %int1792_29378 = torch.constant.int 1792
    %31770 = torch.prim.ListConstruct %int4_29377, %2482, %int1792_29378 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31771 = torch.aten.view %31769, %31770 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29379 = torch.constant.int 4
    %31772 = torch.aten.mul.int %int4_29379, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29380 = torch.constant.int 4096
    %31773 = torch.prim.ListConstruct %31772, %int4096_29380 : (!torch.int, !torch.int) -> !torch.list<int>
    %31774 = torch.aten.view %31728, %31773 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31774, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31775 = torch.aten.mm %31774, %31741 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31775, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29381 = torch.constant.int 4
    %int1792_29382 = torch.constant.int 1792
    %31776 = torch.prim.ListConstruct %int4_29381, %2482, %int1792_29382 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31777 = torch.aten.view %31775, %31776 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29383 = torch.constant.int 4
    %31778 = torch.aten.mul.int %int4_29383, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29384 = torch.constant.int 4096
    %31779 = torch.prim.ListConstruct %31778, %int4096_29384 : (!torch.int, !torch.int) -> !torch.list<int>
    %31780 = torch.aten.view %31729, %31779 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31780, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31781 = torch.aten.mm %31780, %31743 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31781, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29385 = torch.constant.int 4
    %int1792_29386 = torch.constant.int 1792
    %31782 = torch.prim.ListConstruct %int4_29385, %2482, %int1792_29386 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31783 = torch.aten.view %31781, %31782 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29387 = torch.constant.int 4
    %31784 = torch.aten.mul.int %int4_29387, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29388 = torch.constant.int 4096
    %31785 = torch.prim.ListConstruct %31784, %int4096_29388 : (!torch.int, !torch.int) -> !torch.list<int>
    %31786 = torch.aten.view %31730, %31785 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31786, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31787 = torch.aten.mm %31786, %31745 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31787, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29389 = torch.constant.int 4
    %int1792_29390 = torch.constant.int 1792
    %31788 = torch.prim.ListConstruct %int4_29389, %2482, %int1792_29390 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31789 = torch.aten.view %31787, %31788 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29391 = torch.constant.int 4
    %31790 = torch.aten.mul.int %int4_29391, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29392 = torch.constant.int 4096
    %31791 = torch.prim.ListConstruct %31790, %int4096_29392 : (!torch.int, !torch.int) -> !torch.list<int>
    %31792 = torch.aten.view %31731, %31791 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31792, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31793 = torch.aten.mm %31792, %31747 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31793, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29393 = torch.constant.int 4
    %int1792_29394 = torch.constant.int 1792
    %31794 = torch.prim.ListConstruct %int4_29393, %2482, %int1792_29394 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31795 = torch.aten.view %31793, %31794 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31796 = torch.aten.silu %31753 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31797 = torch.aten.silu %31759 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31798 = torch.aten.silu %31765 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31799 = torch.aten.silu %31771 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31800 = torch.aten.silu %31777 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31801 = torch.aten.silu %31783 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31802 = torch.aten.silu %31789 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31803 = torch.aten.silu %31795 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_29395 = torch.constant.int 1
    %int0_29396 = torch.constant.int 0
    %31804 = torch.prim.ListConstruct %int1_29395, %int0_29396 : (!torch.int, !torch.int) -> !torch.list<int>
    %31805 = torch.aten.permute %1144, %31804 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29397 = torch.constant.int 1
    %int0_29398 = torch.constant.int 0
    %31806 = torch.prim.ListConstruct %int1_29397, %int0_29398 : (!torch.int, !torch.int) -> !torch.list<int>
    %31807 = torch.aten.permute %1145, %31806 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29399 = torch.constant.int 1
    %int0_29400 = torch.constant.int 0
    %31808 = torch.prim.ListConstruct %int1_29399, %int0_29400 : (!torch.int, !torch.int) -> !torch.list<int>
    %31809 = torch.aten.permute %1146, %31808 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29401 = torch.constant.int 1
    %int0_29402 = torch.constant.int 0
    %31810 = torch.prim.ListConstruct %int1_29401, %int0_29402 : (!torch.int, !torch.int) -> !torch.list<int>
    %31811 = torch.aten.permute %1147, %31810 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29403 = torch.constant.int 1
    %int0_29404 = torch.constant.int 0
    %31812 = torch.prim.ListConstruct %int1_29403, %int0_29404 : (!torch.int, !torch.int) -> !torch.list<int>
    %31813 = torch.aten.permute %1148, %31812 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29405 = torch.constant.int 1
    %int0_29406 = torch.constant.int 0
    %31814 = torch.prim.ListConstruct %int1_29405, %int0_29406 : (!torch.int, !torch.int) -> !torch.list<int>
    %31815 = torch.aten.permute %1149, %31814 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29407 = torch.constant.int 1
    %int0_29408 = torch.constant.int 0
    %31816 = torch.prim.ListConstruct %int1_29407, %int0_29408 : (!torch.int, !torch.int) -> !torch.list<int>
    %31817 = torch.aten.permute %1150, %31816 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_29409 = torch.constant.int 1
    %int0_29410 = torch.constant.int 0
    %31818 = torch.prim.ListConstruct %int1_29409, %int0_29410 : (!torch.int, !torch.int) -> !torch.list<int>
    %31819 = torch.aten.permute %1151, %31818 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_29411 = torch.constant.int 4
    %31820 = torch.aten.mul.int %int4_29411, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29412 = torch.constant.int 4096
    %31821 = torch.prim.ListConstruct %31820, %int4096_29412 : (!torch.int, !torch.int) -> !torch.list<int>
    %31822 = torch.aten.view %31724, %31821 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31822, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31823 = torch.aten.mm %31822, %31805 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31823, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29413 = torch.constant.int 4
    %int1792_29414 = torch.constant.int 1792
    %31824 = torch.prim.ListConstruct %int4_29413, %2482, %int1792_29414 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31825 = torch.aten.view %31823, %31824 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29415 = torch.constant.int 4
    %31826 = torch.aten.mul.int %int4_29415, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29416 = torch.constant.int 4096
    %31827 = torch.prim.ListConstruct %31826, %int4096_29416 : (!torch.int, !torch.int) -> !torch.list<int>
    %31828 = torch.aten.view %31725, %31827 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31828, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31829 = torch.aten.mm %31828, %31807 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31829, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29417 = torch.constant.int 4
    %int1792_29418 = torch.constant.int 1792
    %31830 = torch.prim.ListConstruct %int4_29417, %2482, %int1792_29418 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31831 = torch.aten.view %31829, %31830 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29419 = torch.constant.int 4
    %31832 = torch.aten.mul.int %int4_29419, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29420 = torch.constant.int 4096
    %31833 = torch.prim.ListConstruct %31832, %int4096_29420 : (!torch.int, !torch.int) -> !torch.list<int>
    %31834 = torch.aten.view %31726, %31833 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31834, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31835 = torch.aten.mm %31834, %31809 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31835, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29421 = torch.constant.int 4
    %int1792_29422 = torch.constant.int 1792
    %31836 = torch.prim.ListConstruct %int4_29421, %2482, %int1792_29422 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31837 = torch.aten.view %31835, %31836 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29423 = torch.constant.int 4
    %31838 = torch.aten.mul.int %int4_29423, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29424 = torch.constant.int 4096
    %31839 = torch.prim.ListConstruct %31838, %int4096_29424 : (!torch.int, !torch.int) -> !torch.list<int>
    %31840 = torch.aten.view %31727, %31839 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31840, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31841 = torch.aten.mm %31840, %31811 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31841, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29425 = torch.constant.int 4
    %int1792_29426 = torch.constant.int 1792
    %31842 = torch.prim.ListConstruct %int4_29425, %2482, %int1792_29426 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31843 = torch.aten.view %31841, %31842 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29427 = torch.constant.int 4
    %31844 = torch.aten.mul.int %int4_29427, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29428 = torch.constant.int 4096
    %31845 = torch.prim.ListConstruct %31844, %int4096_29428 : (!torch.int, !torch.int) -> !torch.list<int>
    %31846 = torch.aten.view %31728, %31845 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31846, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31847 = torch.aten.mm %31846, %31813 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31847, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29429 = torch.constant.int 4
    %int1792_29430 = torch.constant.int 1792
    %31848 = torch.prim.ListConstruct %int4_29429, %2482, %int1792_29430 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31849 = torch.aten.view %31847, %31848 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29431 = torch.constant.int 4
    %31850 = torch.aten.mul.int %int4_29431, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29432 = torch.constant.int 4096
    %31851 = torch.prim.ListConstruct %31850, %int4096_29432 : (!torch.int, !torch.int) -> !torch.list<int>
    %31852 = torch.aten.view %31729, %31851 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31852, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31853 = torch.aten.mm %31852, %31815 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31853, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29433 = torch.constant.int 4
    %int1792_29434 = torch.constant.int 1792
    %31854 = torch.prim.ListConstruct %int4_29433, %2482, %int1792_29434 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31855 = torch.aten.view %31853, %31854 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29435 = torch.constant.int 4
    %31856 = torch.aten.mul.int %int4_29435, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29436 = torch.constant.int 4096
    %31857 = torch.prim.ListConstruct %31856, %int4096_29436 : (!torch.int, !torch.int) -> !torch.list<int>
    %31858 = torch.aten.view %31730, %31857 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31858, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31859 = torch.aten.mm %31858, %31817 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31859, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29437 = torch.constant.int 4
    %int1792_29438 = torch.constant.int 1792
    %31860 = torch.prim.ListConstruct %int4_29437, %2482, %int1792_29438 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31861 = torch.aten.view %31859, %31860 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_29439 = torch.constant.int 4
    %31862 = torch.aten.mul.int %int4_29439, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29440 = torch.constant.int 4096
    %31863 = torch.prim.ListConstruct %31862, %int4096_29440 : (!torch.int, !torch.int) -> !torch.list<int>
    %31864 = torch.aten.view %31731, %31863 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31864, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %31865 = torch.aten.mm %31864, %31819 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31865, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_29441 = torch.constant.int 4
    %int1792_29442 = torch.constant.int 1792
    %31866 = torch.prim.ListConstruct %int4_29441, %2482, %int1792_29442 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31867 = torch.aten.view %31865, %31866 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31868 = torch.aten.mul.Tensor %31796, %31825 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31869 = torch.aten.mul.Tensor %31797, %31831 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31870 = torch.aten.mul.Tensor %31798, %31837 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31871 = torch.aten.mul.Tensor %31799, %31843 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31872 = torch.aten.mul.Tensor %31800, %31849 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31873 = torch.aten.mul.Tensor %31801, %31855 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31874 = torch.aten.mul.Tensor %31802, %31861 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %31875 = torch.aten.mul.Tensor %31803, %31867 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %31875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_29443 = torch.constant.int 1
    %int0_29444 = torch.constant.int 0
    %31876 = torch.prim.ListConstruct %int1_29443, %int0_29444 : (!torch.int, !torch.int) -> !torch.list<int>
    %31877 = torch.aten.permute %1152, %31876 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29445 = torch.constant.int 1
    %int0_29446 = torch.constant.int 0
    %31878 = torch.prim.ListConstruct %int1_29445, %int0_29446 : (!torch.int, !torch.int) -> !torch.list<int>
    %31879 = torch.aten.permute %1153, %31878 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29447 = torch.constant.int 1
    %int0_29448 = torch.constant.int 0
    %31880 = torch.prim.ListConstruct %int1_29447, %int0_29448 : (!torch.int, !torch.int) -> !torch.list<int>
    %31881 = torch.aten.permute %1154, %31880 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29449 = torch.constant.int 1
    %int0_29450 = torch.constant.int 0
    %31882 = torch.prim.ListConstruct %int1_29449, %int0_29450 : (!torch.int, !torch.int) -> !torch.list<int>
    %31883 = torch.aten.permute %1155, %31882 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29451 = torch.constant.int 1
    %int0_29452 = torch.constant.int 0
    %31884 = torch.prim.ListConstruct %int1_29451, %int0_29452 : (!torch.int, !torch.int) -> !torch.list<int>
    %31885 = torch.aten.permute %1156, %31884 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29453 = torch.constant.int 1
    %int0_29454 = torch.constant.int 0
    %31886 = torch.prim.ListConstruct %int1_29453, %int0_29454 : (!torch.int, !torch.int) -> !torch.list<int>
    %31887 = torch.aten.permute %1157, %31886 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29455 = torch.constant.int 1
    %int0_29456 = torch.constant.int 0
    %31888 = torch.prim.ListConstruct %int1_29455, %int0_29456 : (!torch.int, !torch.int) -> !torch.list<int>
    %31889 = torch.aten.permute %1158, %31888 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29457 = torch.constant.int 1
    %int0_29458 = torch.constant.int 0
    %31890 = torch.prim.ListConstruct %int1_29457, %int0_29458 : (!torch.int, !torch.int) -> !torch.list<int>
    %31891 = torch.aten.permute %1159, %31890 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_29459 = torch.constant.int 1
    %31892 = torch.aten.size.int %31753, %int1_29459 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29460 = torch.constant.int 4
    %31893 = torch.aten.mul.int %int4_29460, %31892 : !torch.int, !torch.int -> !torch.int
    %int1792_29461 = torch.constant.int 1792
    %31894 = torch.prim.ListConstruct %31893, %int1792_29461 : (!torch.int, !torch.int) -> !torch.list<int>
    %31895 = torch.aten.view %31868, %31894 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31895, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31896 = torch.aten.mm %31895, %31877 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31896, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29462 = torch.constant.int 4
    %int4096_29463 = torch.constant.int 4096
    %31897 = torch.prim.ListConstruct %int4_29462, %31892, %int4096_29463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31898 = torch.aten.view %31896, %31897 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29464 = torch.constant.int 1
    %31899 = torch.aten.size.int %31759, %int1_29464 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29465 = torch.constant.int 4
    %31900 = torch.aten.mul.int %int4_29465, %31899 : !torch.int, !torch.int -> !torch.int
    %int1792_29466 = torch.constant.int 1792
    %31901 = torch.prim.ListConstruct %31900, %int1792_29466 : (!torch.int, !torch.int) -> !torch.list<int>
    %31902 = torch.aten.view %31869, %31901 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31902, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31903 = torch.aten.mm %31902, %31879 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31903, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29467 = torch.constant.int 4
    %int4096_29468 = torch.constant.int 4096
    %31904 = torch.prim.ListConstruct %int4_29467, %31899, %int4096_29468 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31905 = torch.aten.view %31903, %31904 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29469 = torch.constant.int 1
    %31906 = torch.aten.size.int %31765, %int1_29469 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29470 = torch.constant.int 4
    %31907 = torch.aten.mul.int %int4_29470, %31906 : !torch.int, !torch.int -> !torch.int
    %int1792_29471 = torch.constant.int 1792
    %31908 = torch.prim.ListConstruct %31907, %int1792_29471 : (!torch.int, !torch.int) -> !torch.list<int>
    %31909 = torch.aten.view %31870, %31908 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31909, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31910 = torch.aten.mm %31909, %31881 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31910, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29472 = torch.constant.int 4
    %int4096_29473 = torch.constant.int 4096
    %31911 = torch.prim.ListConstruct %int4_29472, %31906, %int4096_29473 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31912 = torch.aten.view %31910, %31911 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29474 = torch.constant.int 1
    %31913 = torch.aten.size.int %31771, %int1_29474 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29475 = torch.constant.int 4
    %31914 = torch.aten.mul.int %int4_29475, %31913 : !torch.int, !torch.int -> !torch.int
    %int1792_29476 = torch.constant.int 1792
    %31915 = torch.prim.ListConstruct %31914, %int1792_29476 : (!torch.int, !torch.int) -> !torch.list<int>
    %31916 = torch.aten.view %31871, %31915 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31916, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31917 = torch.aten.mm %31916, %31883 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31917, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29477 = torch.constant.int 4
    %int4096_29478 = torch.constant.int 4096
    %31918 = torch.prim.ListConstruct %int4_29477, %31913, %int4096_29478 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31919 = torch.aten.view %31917, %31918 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29479 = torch.constant.int 1
    %31920 = torch.aten.size.int %31777, %int1_29479 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29480 = torch.constant.int 4
    %31921 = torch.aten.mul.int %int4_29480, %31920 : !torch.int, !torch.int -> !torch.int
    %int1792_29481 = torch.constant.int 1792
    %31922 = torch.prim.ListConstruct %31921, %int1792_29481 : (!torch.int, !torch.int) -> !torch.list<int>
    %31923 = torch.aten.view %31872, %31922 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31923, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31924 = torch.aten.mm %31923, %31885 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31924, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29482 = torch.constant.int 4
    %int4096_29483 = torch.constant.int 4096
    %31925 = torch.prim.ListConstruct %int4_29482, %31920, %int4096_29483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31926 = torch.aten.view %31924, %31925 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29484 = torch.constant.int 1
    %31927 = torch.aten.size.int %31783, %int1_29484 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29485 = torch.constant.int 4
    %31928 = torch.aten.mul.int %int4_29485, %31927 : !torch.int, !torch.int -> !torch.int
    %int1792_29486 = torch.constant.int 1792
    %31929 = torch.prim.ListConstruct %31928, %int1792_29486 : (!torch.int, !torch.int) -> !torch.list<int>
    %31930 = torch.aten.view %31873, %31929 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31930, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31931 = torch.aten.mm %31930, %31887 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31931, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29487 = torch.constant.int 4
    %int4096_29488 = torch.constant.int 4096
    %31932 = torch.prim.ListConstruct %int4_29487, %31927, %int4096_29488 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31933 = torch.aten.view %31931, %31932 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29489 = torch.constant.int 1
    %31934 = torch.aten.size.int %31789, %int1_29489 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29490 = torch.constant.int 4
    %31935 = torch.aten.mul.int %int4_29490, %31934 : !torch.int, !torch.int -> !torch.int
    %int1792_29491 = torch.constant.int 1792
    %31936 = torch.prim.ListConstruct %31935, %int1792_29491 : (!torch.int, !torch.int) -> !torch.list<int>
    %31937 = torch.aten.view %31874, %31936 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31937, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31938 = torch.aten.mm %31937, %31889 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31938, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29492 = torch.constant.int 4
    %int4096_29493 = torch.constant.int 4096
    %31939 = torch.prim.ListConstruct %int4_29492, %31934, %int4096_29493 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31940 = torch.aten.view %31938, %31939 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29494 = torch.constant.int 1
    %31941 = torch.aten.size.int %31795, %int1_29494 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_29495 = torch.constant.int 4
    %31942 = torch.aten.mul.int %int4_29495, %31941 : !torch.int, !torch.int -> !torch.int
    %int1792_29496 = torch.constant.int 1792
    %31943 = torch.prim.ListConstruct %31942, %int1792_29496 : (!torch.int, !torch.int) -> !torch.list<int>
    %31944 = torch.aten.view %31875, %31943 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %31944, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %31945 = torch.aten.mm %31944, %31891 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %31945, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_29497 = torch.constant.int 4
    %int4096_29498 = torch.constant.int 4096
    %31946 = torch.prim.ListConstruct %int4_29497, %31941, %int4096_29498 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %31947 = torch.aten.view %31945, %31946 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31948 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29499 = arith.constant 1 : index
    %dim_29500 = tensor.dim %31948, %c1_29499 : tensor<4x?x4096xf16>
    %31949 = flow.tensor.transfer %31948 : tensor<4x?x4096xf16>{%dim_29500} to #hal.device.promise<@__device_0>
    %31950 = torch_c.from_builtin_tensor %31949 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31951 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29501 = arith.constant 1 : index
    %dim_29502 = tensor.dim %31951, %c1_29501 : tensor<4x?x4096xf16>
    %31952 = flow.tensor.transfer %31951 : tensor<4x?x4096xf16>{%dim_29502} to #hal.device.promise<@__device_0>
    %31953 = torch_c.from_builtin_tensor %31952 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31954 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29503 = arith.constant 1 : index
    %dim_29504 = tensor.dim %31954, %c1_29503 : tensor<4x?x4096xf16>
    %31955 = flow.tensor.transfer %31954 : tensor<4x?x4096xf16>{%dim_29504} to #hal.device.promise<@__device_0>
    %31956 = torch_c.from_builtin_tensor %31955 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31957 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29505 = arith.constant 1 : index
    %dim_29506 = tensor.dim %31957, %c1_29505 : tensor<4x?x4096xf16>
    %31958 = flow.tensor.transfer %31957 : tensor<4x?x4096xf16>{%dim_29506} to #hal.device.promise<@__device_0>
    %31959 = torch_c.from_builtin_tensor %31958 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31960 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29507 = arith.constant 1 : index
    %dim_29508 = tensor.dim %31960, %c1_29507 : tensor<4x?x4096xf16>
    %31961 = flow.tensor.transfer %31960 : tensor<4x?x4096xf16>{%dim_29508} to #hal.device.promise<@__device_0>
    %31962 = torch_c.from_builtin_tensor %31961 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31963 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29509 = arith.constant 1 : index
    %dim_29510 = tensor.dim %31963, %c1_29509 : tensor<4x?x4096xf16>
    %31964 = flow.tensor.transfer %31963 : tensor<4x?x4096xf16>{%dim_29510} to #hal.device.promise<@__device_0>
    %31965 = torch_c.from_builtin_tensor %31964 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31966 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29511 = arith.constant 1 : index
    %dim_29512 = tensor.dim %31966, %c1_29511 : tensor<4x?x4096xf16>
    %31967 = flow.tensor.transfer %31966 : tensor<4x?x4096xf16>{%dim_29512} to #hal.device.promise<@__device_0>
    %31968 = torch_c.from_builtin_tensor %31967 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29513 = torch.constant.int 1
    %31969 = torch.aten.add.Tensor %31898, %31950, %int1_29513 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29514 = torch.constant.int 1
    %31970 = torch.aten.add.Tensor %31969, %31953, %int1_29514 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29515 = torch.constant.int 1
    %31971 = torch.aten.add.Tensor %31970, %31956, %int1_29515 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29516 = torch.constant.int 1
    %31972 = torch.aten.add.Tensor %31971, %31959, %int1_29516 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29517 = torch.constant.int 1
    %31973 = torch.aten.add.Tensor %31972, %31962, %int1_29517 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29518 = torch.constant.int 1
    %31974 = torch.aten.add.Tensor %31973, %31965, %int1_29518 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29519 = torch.constant.int 1
    %31975 = torch.aten.add.Tensor %31974, %31968, %int1_29519 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31976 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29520 = arith.constant 1 : index
    %dim_29521 = tensor.dim %31976, %c1_29520 : tensor<4x?x4096xf16>
    %31977 = flow.tensor.transfer %31976 : tensor<4x?x4096xf16>{%dim_29521} to #hal.device.promise<@__device_1>
    %31978 = torch_c.from_builtin_tensor %31977 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31979 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29522 = arith.constant 1 : index
    %dim_29523 = tensor.dim %31979, %c1_29522 : tensor<4x?x4096xf16>
    %31980 = flow.tensor.transfer %31979 : tensor<4x?x4096xf16>{%dim_29523} to #hal.device.promise<@__device_1>
    %31981 = torch_c.from_builtin_tensor %31980 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31982 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29524 = arith.constant 1 : index
    %dim_29525 = tensor.dim %31982, %c1_29524 : tensor<4x?x4096xf16>
    %31983 = flow.tensor.transfer %31982 : tensor<4x?x4096xf16>{%dim_29525} to #hal.device.promise<@__device_1>
    %31984 = torch_c.from_builtin_tensor %31983 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31985 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29526 = arith.constant 1 : index
    %dim_29527 = tensor.dim %31985, %c1_29526 : tensor<4x?x4096xf16>
    %31986 = flow.tensor.transfer %31985 : tensor<4x?x4096xf16>{%dim_29527} to #hal.device.promise<@__device_1>
    %31987 = torch_c.from_builtin_tensor %31986 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31988 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29528 = arith.constant 1 : index
    %dim_29529 = tensor.dim %31988, %c1_29528 : tensor<4x?x4096xf16>
    %31989 = flow.tensor.transfer %31988 : tensor<4x?x4096xf16>{%dim_29529} to #hal.device.promise<@__device_1>
    %31990 = torch_c.from_builtin_tensor %31989 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31991 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29530 = arith.constant 1 : index
    %dim_29531 = tensor.dim %31991, %c1_29530 : tensor<4x?x4096xf16>
    %31992 = flow.tensor.transfer %31991 : tensor<4x?x4096xf16>{%dim_29531} to #hal.device.promise<@__device_1>
    %31993 = torch_c.from_builtin_tensor %31992 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %31994 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29532 = arith.constant 1 : index
    %dim_29533 = tensor.dim %31994, %c1_29532 : tensor<4x?x4096xf16>
    %31995 = flow.tensor.transfer %31994 : tensor<4x?x4096xf16>{%dim_29533} to #hal.device.promise<@__device_1>
    %31996 = torch_c.from_builtin_tensor %31995 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29534 = torch.constant.int 1
    %31997 = torch.aten.add.Tensor %31978, %31905, %int1_29534 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29535 = torch.constant.int 1
    %31998 = torch.aten.add.Tensor %31997, %31981, %int1_29535 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29536 = torch.constant.int 1
    %31999 = torch.aten.add.Tensor %31998, %31984, %int1_29536 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %31999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29537 = torch.constant.int 1
    %32000 = torch.aten.add.Tensor %31999, %31987, %int1_29537 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29538 = torch.constant.int 1
    %32001 = torch.aten.add.Tensor %32000, %31990, %int1_29538 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29539 = torch.constant.int 1
    %32002 = torch.aten.add.Tensor %32001, %31993, %int1_29539 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29540 = torch.constant.int 1
    %32003 = torch.aten.add.Tensor %32002, %31996, %int1_29540 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32004 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29541 = arith.constant 1 : index
    %dim_29542 = tensor.dim %32004, %c1_29541 : tensor<4x?x4096xf16>
    %32005 = flow.tensor.transfer %32004 : tensor<4x?x4096xf16>{%dim_29542} to #hal.device.promise<@__device_2>
    %32006 = torch_c.from_builtin_tensor %32005 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32007 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29543 = arith.constant 1 : index
    %dim_29544 = tensor.dim %32007, %c1_29543 : tensor<4x?x4096xf16>
    %32008 = flow.tensor.transfer %32007 : tensor<4x?x4096xf16>{%dim_29544} to #hal.device.promise<@__device_2>
    %32009 = torch_c.from_builtin_tensor %32008 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32010 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29545 = arith.constant 1 : index
    %dim_29546 = tensor.dim %32010, %c1_29545 : tensor<4x?x4096xf16>
    %32011 = flow.tensor.transfer %32010 : tensor<4x?x4096xf16>{%dim_29546} to #hal.device.promise<@__device_2>
    %32012 = torch_c.from_builtin_tensor %32011 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32013 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29547 = arith.constant 1 : index
    %dim_29548 = tensor.dim %32013, %c1_29547 : tensor<4x?x4096xf16>
    %32014 = flow.tensor.transfer %32013 : tensor<4x?x4096xf16>{%dim_29548} to #hal.device.promise<@__device_2>
    %32015 = torch_c.from_builtin_tensor %32014 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32016 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29549 = arith.constant 1 : index
    %dim_29550 = tensor.dim %32016, %c1_29549 : tensor<4x?x4096xf16>
    %32017 = flow.tensor.transfer %32016 : tensor<4x?x4096xf16>{%dim_29550} to #hal.device.promise<@__device_2>
    %32018 = torch_c.from_builtin_tensor %32017 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32019 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29551 = arith.constant 1 : index
    %dim_29552 = tensor.dim %32019, %c1_29551 : tensor<4x?x4096xf16>
    %32020 = flow.tensor.transfer %32019 : tensor<4x?x4096xf16>{%dim_29552} to #hal.device.promise<@__device_2>
    %32021 = torch_c.from_builtin_tensor %32020 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32022 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29553 = arith.constant 1 : index
    %dim_29554 = tensor.dim %32022, %c1_29553 : tensor<4x?x4096xf16>
    %32023 = flow.tensor.transfer %32022 : tensor<4x?x4096xf16>{%dim_29554} to #hal.device.promise<@__device_2>
    %32024 = torch_c.from_builtin_tensor %32023 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29555 = torch.constant.int 1
    %32025 = torch.aten.add.Tensor %32006, %32009, %int1_29555 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29556 = torch.constant.int 1
    %32026 = torch.aten.add.Tensor %32025, %31912, %int1_29556 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29557 = torch.constant.int 1
    %32027 = torch.aten.add.Tensor %32026, %32012, %int1_29557 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29558 = torch.constant.int 1
    %32028 = torch.aten.add.Tensor %32027, %32015, %int1_29558 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29559 = torch.constant.int 1
    %32029 = torch.aten.add.Tensor %32028, %32018, %int1_29559 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29560 = torch.constant.int 1
    %32030 = torch.aten.add.Tensor %32029, %32021, %int1_29560 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29561 = torch.constant.int 1
    %32031 = torch.aten.add.Tensor %32030, %32024, %int1_29561 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32032 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29562 = arith.constant 1 : index
    %dim_29563 = tensor.dim %32032, %c1_29562 : tensor<4x?x4096xf16>
    %32033 = flow.tensor.transfer %32032 : tensor<4x?x4096xf16>{%dim_29563} to #hal.device.promise<@__device_3>
    %32034 = torch_c.from_builtin_tensor %32033 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32035 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29564 = arith.constant 1 : index
    %dim_29565 = tensor.dim %32035, %c1_29564 : tensor<4x?x4096xf16>
    %32036 = flow.tensor.transfer %32035 : tensor<4x?x4096xf16>{%dim_29565} to #hal.device.promise<@__device_3>
    %32037 = torch_c.from_builtin_tensor %32036 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32038 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29566 = arith.constant 1 : index
    %dim_29567 = tensor.dim %32038, %c1_29566 : tensor<4x?x4096xf16>
    %32039 = flow.tensor.transfer %32038 : tensor<4x?x4096xf16>{%dim_29567} to #hal.device.promise<@__device_3>
    %32040 = torch_c.from_builtin_tensor %32039 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32041 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29568 = arith.constant 1 : index
    %dim_29569 = tensor.dim %32041, %c1_29568 : tensor<4x?x4096xf16>
    %32042 = flow.tensor.transfer %32041 : tensor<4x?x4096xf16>{%dim_29569} to #hal.device.promise<@__device_3>
    %32043 = torch_c.from_builtin_tensor %32042 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32044 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29570 = arith.constant 1 : index
    %dim_29571 = tensor.dim %32044, %c1_29570 : tensor<4x?x4096xf16>
    %32045 = flow.tensor.transfer %32044 : tensor<4x?x4096xf16>{%dim_29571} to #hal.device.promise<@__device_3>
    %32046 = torch_c.from_builtin_tensor %32045 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32047 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29572 = arith.constant 1 : index
    %dim_29573 = tensor.dim %32047, %c1_29572 : tensor<4x?x4096xf16>
    %32048 = flow.tensor.transfer %32047 : tensor<4x?x4096xf16>{%dim_29573} to #hal.device.promise<@__device_3>
    %32049 = torch_c.from_builtin_tensor %32048 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32050 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29574 = arith.constant 1 : index
    %dim_29575 = tensor.dim %32050, %c1_29574 : tensor<4x?x4096xf16>
    %32051 = flow.tensor.transfer %32050 : tensor<4x?x4096xf16>{%dim_29575} to #hal.device.promise<@__device_3>
    %32052 = torch_c.from_builtin_tensor %32051 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29576 = torch.constant.int 1
    %32053 = torch.aten.add.Tensor %32034, %32037, %int1_29576 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29577 = torch.constant.int 1
    %32054 = torch.aten.add.Tensor %32053, %32040, %int1_29577 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29578 = torch.constant.int 1
    %32055 = torch.aten.add.Tensor %32054, %31919, %int1_29578 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29579 = torch.constant.int 1
    %32056 = torch.aten.add.Tensor %32055, %32043, %int1_29579 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29580 = torch.constant.int 1
    %32057 = torch.aten.add.Tensor %32056, %32046, %int1_29580 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29581 = torch.constant.int 1
    %32058 = torch.aten.add.Tensor %32057, %32049, %int1_29581 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29582 = torch.constant.int 1
    %32059 = torch.aten.add.Tensor %32058, %32052, %int1_29582 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32060 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29583 = arith.constant 1 : index
    %dim_29584 = tensor.dim %32060, %c1_29583 : tensor<4x?x4096xf16>
    %32061 = flow.tensor.transfer %32060 : tensor<4x?x4096xf16>{%dim_29584} to #hal.device.promise<@__device_4>
    %32062 = torch_c.from_builtin_tensor %32061 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32063 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29585 = arith.constant 1 : index
    %dim_29586 = tensor.dim %32063, %c1_29585 : tensor<4x?x4096xf16>
    %32064 = flow.tensor.transfer %32063 : tensor<4x?x4096xf16>{%dim_29586} to #hal.device.promise<@__device_4>
    %32065 = torch_c.from_builtin_tensor %32064 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32066 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29587 = arith.constant 1 : index
    %dim_29588 = tensor.dim %32066, %c1_29587 : tensor<4x?x4096xf16>
    %32067 = flow.tensor.transfer %32066 : tensor<4x?x4096xf16>{%dim_29588} to #hal.device.promise<@__device_4>
    %32068 = torch_c.from_builtin_tensor %32067 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32069 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29589 = arith.constant 1 : index
    %dim_29590 = tensor.dim %32069, %c1_29589 : tensor<4x?x4096xf16>
    %32070 = flow.tensor.transfer %32069 : tensor<4x?x4096xf16>{%dim_29590} to #hal.device.promise<@__device_4>
    %32071 = torch_c.from_builtin_tensor %32070 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32072 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29591 = arith.constant 1 : index
    %dim_29592 = tensor.dim %32072, %c1_29591 : tensor<4x?x4096xf16>
    %32073 = flow.tensor.transfer %32072 : tensor<4x?x4096xf16>{%dim_29592} to #hal.device.promise<@__device_4>
    %32074 = torch_c.from_builtin_tensor %32073 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32075 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29593 = arith.constant 1 : index
    %dim_29594 = tensor.dim %32075, %c1_29593 : tensor<4x?x4096xf16>
    %32076 = flow.tensor.transfer %32075 : tensor<4x?x4096xf16>{%dim_29594} to #hal.device.promise<@__device_4>
    %32077 = torch_c.from_builtin_tensor %32076 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32078 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29595 = arith.constant 1 : index
    %dim_29596 = tensor.dim %32078, %c1_29595 : tensor<4x?x4096xf16>
    %32079 = flow.tensor.transfer %32078 : tensor<4x?x4096xf16>{%dim_29596} to #hal.device.promise<@__device_4>
    %32080 = torch_c.from_builtin_tensor %32079 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29597 = torch.constant.int 1
    %32081 = torch.aten.add.Tensor %32062, %32065, %int1_29597 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29598 = torch.constant.int 1
    %32082 = torch.aten.add.Tensor %32081, %32068, %int1_29598 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29599 = torch.constant.int 1
    %32083 = torch.aten.add.Tensor %32082, %32071, %int1_29599 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29600 = torch.constant.int 1
    %32084 = torch.aten.add.Tensor %32083, %31926, %int1_29600 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29601 = torch.constant.int 1
    %32085 = torch.aten.add.Tensor %32084, %32074, %int1_29601 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29602 = torch.constant.int 1
    %32086 = torch.aten.add.Tensor %32085, %32077, %int1_29602 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29603 = torch.constant.int 1
    %32087 = torch.aten.add.Tensor %32086, %32080, %int1_29603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32088 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29604 = arith.constant 1 : index
    %dim_29605 = tensor.dim %32088, %c1_29604 : tensor<4x?x4096xf16>
    %32089 = flow.tensor.transfer %32088 : tensor<4x?x4096xf16>{%dim_29605} to #hal.device.promise<@__device_5>
    %32090 = torch_c.from_builtin_tensor %32089 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32091 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29606 = arith.constant 1 : index
    %dim_29607 = tensor.dim %32091, %c1_29606 : tensor<4x?x4096xf16>
    %32092 = flow.tensor.transfer %32091 : tensor<4x?x4096xf16>{%dim_29607} to #hal.device.promise<@__device_5>
    %32093 = torch_c.from_builtin_tensor %32092 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32094 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29608 = arith.constant 1 : index
    %dim_29609 = tensor.dim %32094, %c1_29608 : tensor<4x?x4096xf16>
    %32095 = flow.tensor.transfer %32094 : tensor<4x?x4096xf16>{%dim_29609} to #hal.device.promise<@__device_5>
    %32096 = torch_c.from_builtin_tensor %32095 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32097 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29610 = arith.constant 1 : index
    %dim_29611 = tensor.dim %32097, %c1_29610 : tensor<4x?x4096xf16>
    %32098 = flow.tensor.transfer %32097 : tensor<4x?x4096xf16>{%dim_29611} to #hal.device.promise<@__device_5>
    %32099 = torch_c.from_builtin_tensor %32098 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32100 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29612 = arith.constant 1 : index
    %dim_29613 = tensor.dim %32100, %c1_29612 : tensor<4x?x4096xf16>
    %32101 = flow.tensor.transfer %32100 : tensor<4x?x4096xf16>{%dim_29613} to #hal.device.promise<@__device_5>
    %32102 = torch_c.from_builtin_tensor %32101 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32103 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29614 = arith.constant 1 : index
    %dim_29615 = tensor.dim %32103, %c1_29614 : tensor<4x?x4096xf16>
    %32104 = flow.tensor.transfer %32103 : tensor<4x?x4096xf16>{%dim_29615} to #hal.device.promise<@__device_5>
    %32105 = torch_c.from_builtin_tensor %32104 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32106 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29616 = arith.constant 1 : index
    %dim_29617 = tensor.dim %32106, %c1_29616 : tensor<4x?x4096xf16>
    %32107 = flow.tensor.transfer %32106 : tensor<4x?x4096xf16>{%dim_29617} to #hal.device.promise<@__device_5>
    %32108 = torch_c.from_builtin_tensor %32107 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29618 = torch.constant.int 1
    %32109 = torch.aten.add.Tensor %32090, %32093, %int1_29618 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29619 = torch.constant.int 1
    %32110 = torch.aten.add.Tensor %32109, %32096, %int1_29619 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29620 = torch.constant.int 1
    %32111 = torch.aten.add.Tensor %32110, %32099, %int1_29620 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29621 = torch.constant.int 1
    %32112 = torch.aten.add.Tensor %32111, %32102, %int1_29621 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29622 = torch.constant.int 1
    %32113 = torch.aten.add.Tensor %32112, %31933, %int1_29622 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29623 = torch.constant.int 1
    %32114 = torch.aten.add.Tensor %32113, %32105, %int1_29623 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29624 = torch.constant.int 1
    %32115 = torch.aten.add.Tensor %32114, %32108, %int1_29624 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32116 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29625 = arith.constant 1 : index
    %dim_29626 = tensor.dim %32116, %c1_29625 : tensor<4x?x4096xf16>
    %32117 = flow.tensor.transfer %32116 : tensor<4x?x4096xf16>{%dim_29626} to #hal.device.promise<@__device_6>
    %32118 = torch_c.from_builtin_tensor %32117 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32119 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29627 = arith.constant 1 : index
    %dim_29628 = tensor.dim %32119, %c1_29627 : tensor<4x?x4096xf16>
    %32120 = flow.tensor.transfer %32119 : tensor<4x?x4096xf16>{%dim_29628} to #hal.device.promise<@__device_6>
    %32121 = torch_c.from_builtin_tensor %32120 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32122 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29629 = arith.constant 1 : index
    %dim_29630 = tensor.dim %32122, %c1_29629 : tensor<4x?x4096xf16>
    %32123 = flow.tensor.transfer %32122 : tensor<4x?x4096xf16>{%dim_29630} to #hal.device.promise<@__device_6>
    %32124 = torch_c.from_builtin_tensor %32123 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32125 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29631 = arith.constant 1 : index
    %dim_29632 = tensor.dim %32125, %c1_29631 : tensor<4x?x4096xf16>
    %32126 = flow.tensor.transfer %32125 : tensor<4x?x4096xf16>{%dim_29632} to #hal.device.promise<@__device_6>
    %32127 = torch_c.from_builtin_tensor %32126 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32128 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29633 = arith.constant 1 : index
    %dim_29634 = tensor.dim %32128, %c1_29633 : tensor<4x?x4096xf16>
    %32129 = flow.tensor.transfer %32128 : tensor<4x?x4096xf16>{%dim_29634} to #hal.device.promise<@__device_6>
    %32130 = torch_c.from_builtin_tensor %32129 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32131 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29635 = arith.constant 1 : index
    %dim_29636 = tensor.dim %32131, %c1_29635 : tensor<4x?x4096xf16>
    %32132 = flow.tensor.transfer %32131 : tensor<4x?x4096xf16>{%dim_29636} to #hal.device.promise<@__device_6>
    %32133 = torch_c.from_builtin_tensor %32132 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32134 = torch_c.to_builtin_tensor %31947 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29637 = arith.constant 1 : index
    %dim_29638 = tensor.dim %32134, %c1_29637 : tensor<4x?x4096xf16>
    %32135 = flow.tensor.transfer %32134 : tensor<4x?x4096xf16>{%dim_29638} to #hal.device.promise<@__device_6>
    %32136 = torch_c.from_builtin_tensor %32135 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29639 = torch.constant.int 1
    %32137 = torch.aten.add.Tensor %32118, %32121, %int1_29639 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29640 = torch.constant.int 1
    %32138 = torch.aten.add.Tensor %32137, %32124, %int1_29640 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29641 = torch.constant.int 1
    %32139 = torch.aten.add.Tensor %32138, %32127, %int1_29641 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29642 = torch.constant.int 1
    %32140 = torch.aten.add.Tensor %32139, %32130, %int1_29642 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29643 = torch.constant.int 1
    %32141 = torch.aten.add.Tensor %32140, %32133, %int1_29643 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29644 = torch.constant.int 1
    %32142 = torch.aten.add.Tensor %32141, %31940, %int1_29644 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29645 = torch.constant.int 1
    %32143 = torch.aten.add.Tensor %32142, %32136, %int1_29645 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32144 = torch_c.to_builtin_tensor %31898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29646 = arith.constant 1 : index
    %dim_29647 = tensor.dim %32144, %c1_29646 : tensor<4x?x4096xf16>
    %32145 = flow.tensor.transfer %32144 : tensor<4x?x4096xf16>{%dim_29647} to #hal.device.promise<@__device_7>
    %32146 = torch_c.from_builtin_tensor %32145 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32147 = torch_c.to_builtin_tensor %31905 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29648 = arith.constant 1 : index
    %dim_29649 = tensor.dim %32147, %c1_29648 : tensor<4x?x4096xf16>
    %32148 = flow.tensor.transfer %32147 : tensor<4x?x4096xf16>{%dim_29649} to #hal.device.promise<@__device_7>
    %32149 = torch_c.from_builtin_tensor %32148 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32150 = torch_c.to_builtin_tensor %31912 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29650 = arith.constant 1 : index
    %dim_29651 = tensor.dim %32150, %c1_29650 : tensor<4x?x4096xf16>
    %32151 = flow.tensor.transfer %32150 : tensor<4x?x4096xf16>{%dim_29651} to #hal.device.promise<@__device_7>
    %32152 = torch_c.from_builtin_tensor %32151 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32153 = torch_c.to_builtin_tensor %31919 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29652 = arith.constant 1 : index
    %dim_29653 = tensor.dim %32153, %c1_29652 : tensor<4x?x4096xf16>
    %32154 = flow.tensor.transfer %32153 : tensor<4x?x4096xf16>{%dim_29653} to #hal.device.promise<@__device_7>
    %32155 = torch_c.from_builtin_tensor %32154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32156 = torch_c.to_builtin_tensor %31926 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29654 = arith.constant 1 : index
    %dim_29655 = tensor.dim %32156, %c1_29654 : tensor<4x?x4096xf16>
    %32157 = flow.tensor.transfer %32156 : tensor<4x?x4096xf16>{%dim_29655} to #hal.device.promise<@__device_7>
    %32158 = torch_c.from_builtin_tensor %32157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32159 = torch_c.to_builtin_tensor %31933 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29656 = arith.constant 1 : index
    %dim_29657 = tensor.dim %32159, %c1_29656 : tensor<4x?x4096xf16>
    %32160 = flow.tensor.transfer %32159 : tensor<4x?x4096xf16>{%dim_29657} to #hal.device.promise<@__device_7>
    %32161 = torch_c.from_builtin_tensor %32160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %32162 = torch_c.to_builtin_tensor %31940 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_29658 = arith.constant 1 : index
    %dim_29659 = tensor.dim %32162, %c1_29658 : tensor<4x?x4096xf16>
    %32163 = flow.tensor.transfer %32162 : tensor<4x?x4096xf16>{%dim_29659} to #hal.device.promise<@__device_7>
    %32164 = torch_c.from_builtin_tensor %32163 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29660 = torch.constant.int 1
    %32165 = torch.aten.add.Tensor %32146, %32149, %int1_29660 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29661 = torch.constant.int 1
    %32166 = torch.aten.add.Tensor %32165, %32152, %int1_29661 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29662 = torch.constant.int 1
    %32167 = torch.aten.add.Tensor %32166, %32155, %int1_29662 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29663 = torch.constant.int 1
    %32168 = torch.aten.add.Tensor %32167, %32158, %int1_29663 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29664 = torch.constant.int 1
    %32169 = torch.aten.add.Tensor %32168, %32161, %int1_29664 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29665 = torch.constant.int 1
    %32170 = torch.aten.add.Tensor %32169, %32164, %int1_29665 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29666 = torch.constant.int 1
    %32171 = torch.aten.add.Tensor %32170, %31947, %int1_29666 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29667 = torch.constant.int 1
    %32172 = torch.aten.add.Tensor %31652, %31975, %int1_29667 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29668 = torch.constant.int 1
    %32173 = torch.aten.add.Tensor %31653, %32003, %int1_29668 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29669 = torch.constant.int 1
    %32174 = torch.aten.add.Tensor %31654, %32031, %int1_29669 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29670 = torch.constant.int 1
    %32175 = torch.aten.add.Tensor %31655, %32059, %int1_29670 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29671 = torch.constant.int 1
    %32176 = torch.aten.add.Tensor %31656, %32087, %int1_29671 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29672 = torch.constant.int 1
    %32177 = torch.aten.add.Tensor %31657, %32115, %int1_29672 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29673 = torch.constant.int 1
    %32178 = torch.aten.add.Tensor %31658, %32143, %int1_29673 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29674 = torch.constant.int 1
    %32179 = torch.aten.add.Tensor %31659, %32171, %int1_29674 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_29675 = torch.constant.int 6
    %32180 = torch.prims.convert_element_type %32172, %int6_29675 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29676 = torch.constant.int 6
    %32181 = torch.prims.convert_element_type %32173, %int6_29676 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29677 = torch.constant.int 6
    %32182 = torch.prims.convert_element_type %32174, %int6_29677 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29678 = torch.constant.int 6
    %32183 = torch.prims.convert_element_type %32175, %int6_29678 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29679 = torch.constant.int 6
    %32184 = torch.prims.convert_element_type %32176, %int6_29679 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29680 = torch.constant.int 6
    %32185 = torch.prims.convert_element_type %32177, %int6_29680 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29681 = torch.constant.int 6
    %32186 = torch.prims.convert_element_type %32178, %int6_29681 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_29682 = torch.constant.int 6
    %32187 = torch.prims.convert_element_type %32179, %int6_29682 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29683 = torch.constant.int 2
    %32188 = torch.aten.pow.Tensor_Scalar %32180, %int2_29683 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29684 = torch.constant.int 2
    %32189 = torch.aten.pow.Tensor_Scalar %32181, %int2_29684 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29685 = torch.constant.int 2
    %32190 = torch.aten.pow.Tensor_Scalar %32182, %int2_29685 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29686 = torch.constant.int 2
    %32191 = torch.aten.pow.Tensor_Scalar %32183, %int2_29686 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29687 = torch.constant.int 2
    %32192 = torch.aten.pow.Tensor_Scalar %32184, %int2_29687 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29688 = torch.constant.int 2
    %32193 = torch.aten.pow.Tensor_Scalar %32185, %int2_29688 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29689 = torch.constant.int 2
    %32194 = torch.aten.pow.Tensor_Scalar %32186, %int2_29689 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_29690 = torch.constant.int 2
    %32195 = torch.aten.pow.Tensor_Scalar %32187, %int2_29690 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_29691 = torch.constant.int -1
    %32196 = torch.prim.ListConstruct %int-1_29691 : (!torch.int) -> !torch.list<int>
    %true_29692 = torch.constant.bool true
    %none_29693 = torch.constant.none
    %32197 = torch.aten.mean.dim %32188, %32196, %true_29692, %none_29693 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29694 = torch.constant.int -1
    %32198 = torch.prim.ListConstruct %int-1_29694 : (!torch.int) -> !torch.list<int>
    %true_29695 = torch.constant.bool true
    %none_29696 = torch.constant.none
    %32199 = torch.aten.mean.dim %32189, %32198, %true_29695, %none_29696 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29697 = torch.constant.int -1
    %32200 = torch.prim.ListConstruct %int-1_29697 : (!torch.int) -> !torch.list<int>
    %true_29698 = torch.constant.bool true
    %none_29699 = torch.constant.none
    %32201 = torch.aten.mean.dim %32190, %32200, %true_29698, %none_29699 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29700 = torch.constant.int -1
    %32202 = torch.prim.ListConstruct %int-1_29700 : (!torch.int) -> !torch.list<int>
    %true_29701 = torch.constant.bool true
    %none_29702 = torch.constant.none
    %32203 = torch.aten.mean.dim %32191, %32202, %true_29701, %none_29702 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29703 = torch.constant.int -1
    %32204 = torch.prim.ListConstruct %int-1_29703 : (!torch.int) -> !torch.list<int>
    %true_29704 = torch.constant.bool true
    %none_29705 = torch.constant.none
    %32205 = torch.aten.mean.dim %32192, %32204, %true_29704, %none_29705 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29706 = torch.constant.int -1
    %32206 = torch.prim.ListConstruct %int-1_29706 : (!torch.int) -> !torch.list<int>
    %true_29707 = torch.constant.bool true
    %none_29708 = torch.constant.none
    %32207 = torch.aten.mean.dim %32193, %32206, %true_29707, %none_29708 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29709 = torch.constant.int -1
    %32208 = torch.prim.ListConstruct %int-1_29709 : (!torch.int) -> !torch.list<int>
    %true_29710 = torch.constant.bool true
    %none_29711 = torch.constant.none
    %32209 = torch.aten.mean.dim %32194, %32208, %true_29710, %none_29711 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_29712 = torch.constant.int -1
    %32210 = torch.prim.ListConstruct %int-1_29712 : (!torch.int) -> !torch.list<int>
    %true_29713 = torch.constant.bool true
    %none_29714 = torch.constant.none
    %32211 = torch.aten.mean.dim %32195, %32210, %true_29713, %none_29714 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29715 = torch.constant.float 9.9999997473787516E-6
    %int1_29716 = torch.constant.int 1
    %32212 = torch.aten.add.Scalar %32197, %float9.999990e-06_29715, %int1_29716 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29717 = torch.constant.float 9.9999997473787516E-6
    %int1_29718 = torch.constant.int 1
    %32213 = torch.aten.add.Scalar %32199, %float9.999990e-06_29717, %int1_29718 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29719 = torch.constant.float 9.9999997473787516E-6
    %int1_29720 = torch.constant.int 1
    %32214 = torch.aten.add.Scalar %32201, %float9.999990e-06_29719, %int1_29720 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29721 = torch.constant.float 9.9999997473787516E-6
    %int1_29722 = torch.constant.int 1
    %32215 = torch.aten.add.Scalar %32203, %float9.999990e-06_29721, %int1_29722 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29723 = torch.constant.float 9.9999997473787516E-6
    %int1_29724 = torch.constant.int 1
    %32216 = torch.aten.add.Scalar %32205, %float9.999990e-06_29723, %int1_29724 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29725 = torch.constant.float 9.9999997473787516E-6
    %int1_29726 = torch.constant.int 1
    %32217 = torch.aten.add.Scalar %32207, %float9.999990e-06_29725, %int1_29726 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29727 = torch.constant.float 9.9999997473787516E-6
    %int1_29728 = torch.constant.int 1
    %32218 = torch.aten.add.Scalar %32209, %float9.999990e-06_29727, %int1_29728 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_29729 = torch.constant.float 9.9999997473787516E-6
    %int1_29730 = torch.constant.int 1
    %32219 = torch.aten.add.Scalar %32211, %float9.999990e-06_29729, %int1_29730 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32220 = torch.aten.rsqrt %32212 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32221 = torch.aten.rsqrt %32213 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32222 = torch.aten.rsqrt %32214 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32223 = torch.aten.rsqrt %32215 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32224 = torch.aten.rsqrt %32216 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32225 = torch.aten.rsqrt %32217 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32226 = torch.aten.rsqrt %32218 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32227 = torch.aten.rsqrt %32219 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %32227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %32228 = torch.aten.mul.Tensor %32180, %32220 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32229 = torch.aten.mul.Tensor %32181, %32221 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32230 = torch.aten.mul.Tensor %32182, %32222 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32231 = torch.aten.mul.Tensor %32183, %32223 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32232 = torch.aten.mul.Tensor %32184, %32224 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32233 = torch.aten.mul.Tensor %32185, %32225 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32234 = torch.aten.mul.Tensor %32186, %32226 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32235 = torch.aten.mul.Tensor %32187, %32227 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32236 = torch.aten.mul.Tensor %1160, %32228 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32237 = torch.aten.mul.Tensor %1161, %32229 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32238 = torch.aten.mul.Tensor %1162, %32230 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32239 = torch.aten.mul.Tensor %1163, %32231 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32240 = torch.aten.mul.Tensor %1164, %32232 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32241 = torch.aten.mul.Tensor %1165, %32233 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32242 = torch.aten.mul.Tensor %1166, %32234 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %32243 = torch.aten.mul.Tensor %1167, %32235 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %32243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_29731 = torch.constant.int 5
    %32244 = torch.prims.convert_element_type %32236, %int5_29731 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29732 = torch.constant.int 5
    %32245 = torch.prims.convert_element_type %32237, %int5_29732 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29733 = torch.constant.int 5
    %32246 = torch.prims.convert_element_type %32238, %int5_29733 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29734 = torch.constant.int 5
    %32247 = torch.prims.convert_element_type %32239, %int5_29734 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29735 = torch.constant.int 5
    %32248 = torch.prims.convert_element_type %32240, %int5_29735 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29736 = torch.constant.int 5
    %32249 = torch.prims.convert_element_type %32241, %int5_29736 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29737 = torch.constant.int 5
    %32250 = torch.prims.convert_element_type %32242, %int5_29737 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_29738 = torch.constant.int 5
    %32251 = torch.prims.convert_element_type %32243, %int5_29738 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %32251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_29739 = torch.constant.int 1
    %int0_29740 = torch.constant.int 0
    %32252 = torch.prim.ListConstruct %int1_29739, %int0_29740 : (!torch.int, !torch.int) -> !torch.list<int>
    %32253 = torch.aten.permute %1168, %32252 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29741 = torch.constant.int 1
    %int0_29742 = torch.constant.int 0
    %32254 = torch.prim.ListConstruct %int1_29741, %int0_29742 : (!torch.int, !torch.int) -> !torch.list<int>
    %32255 = torch.aten.permute %1169, %32254 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29743 = torch.constant.int 1
    %int0_29744 = torch.constant.int 0
    %32256 = torch.prim.ListConstruct %int1_29743, %int0_29744 : (!torch.int, !torch.int) -> !torch.list<int>
    %32257 = torch.aten.permute %1170, %32256 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29745 = torch.constant.int 1
    %int0_29746 = torch.constant.int 0
    %32258 = torch.prim.ListConstruct %int1_29745, %int0_29746 : (!torch.int, !torch.int) -> !torch.list<int>
    %32259 = torch.aten.permute %1171, %32258 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29747 = torch.constant.int 1
    %int0_29748 = torch.constant.int 0
    %32260 = torch.prim.ListConstruct %int1_29747, %int0_29748 : (!torch.int, !torch.int) -> !torch.list<int>
    %32261 = torch.aten.permute %1172, %32260 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29749 = torch.constant.int 1
    %int0_29750 = torch.constant.int 0
    %32262 = torch.prim.ListConstruct %int1_29749, %int0_29750 : (!torch.int, !torch.int) -> !torch.list<int>
    %32263 = torch.aten.permute %1173, %32262 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29751 = torch.constant.int 1
    %int0_29752 = torch.constant.int 0
    %32264 = torch.prim.ListConstruct %int1_29751, %int0_29752 : (!torch.int, !torch.int) -> !torch.list<int>
    %32265 = torch.aten.permute %1174, %32264 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_29753 = torch.constant.int 1
    %int0_29754 = torch.constant.int 0
    %32266 = torch.prim.ListConstruct %int1_29753, %int0_29754 : (!torch.int, !torch.int) -> !torch.list<int>
    %32267 = torch.aten.permute %1175, %32266 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_29755 = torch.constant.int 4
    %32268 = torch.aten.mul.int %int4_29755, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29756 = torch.constant.int 4096
    %32269 = torch.prim.ListConstruct %32268, %int4096_29756 : (!torch.int, !torch.int) -> !torch.list<int>
    %32270 = torch.aten.view %32244, %32269 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32270, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32271 = torch.aten.mm %32270, %32253 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32271, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29757 = torch.constant.int 4
    %int512_29758 = torch.constant.int 512
    %32272 = torch.prim.ListConstruct %int4_29757, %2482, %int512_29758 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32273 = torch.aten.view %32271, %32272 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29759 = torch.constant.int 4
    %32274 = torch.aten.mul.int %int4_29759, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29760 = torch.constant.int 4096
    %32275 = torch.prim.ListConstruct %32274, %int4096_29760 : (!torch.int, !torch.int) -> !torch.list<int>
    %32276 = torch.aten.view %32245, %32275 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32276, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32277 = torch.aten.mm %32276, %32255 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32277, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29761 = torch.constant.int 4
    %int512_29762 = torch.constant.int 512
    %32278 = torch.prim.ListConstruct %int4_29761, %2482, %int512_29762 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32279 = torch.aten.view %32277, %32278 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29763 = torch.constant.int 4
    %32280 = torch.aten.mul.int %int4_29763, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29764 = torch.constant.int 4096
    %32281 = torch.prim.ListConstruct %32280, %int4096_29764 : (!torch.int, !torch.int) -> !torch.list<int>
    %32282 = torch.aten.view %32246, %32281 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32282, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32283 = torch.aten.mm %32282, %32257 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32283, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29765 = torch.constant.int 4
    %int512_29766 = torch.constant.int 512
    %32284 = torch.prim.ListConstruct %int4_29765, %2482, %int512_29766 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32285 = torch.aten.view %32283, %32284 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29767 = torch.constant.int 4
    %32286 = torch.aten.mul.int %int4_29767, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29768 = torch.constant.int 4096
    %32287 = torch.prim.ListConstruct %32286, %int4096_29768 : (!torch.int, !torch.int) -> !torch.list<int>
    %32288 = torch.aten.view %32247, %32287 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32288, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32289 = torch.aten.mm %32288, %32259 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32289, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29769 = torch.constant.int 4
    %int512_29770 = torch.constant.int 512
    %32290 = torch.prim.ListConstruct %int4_29769, %2482, %int512_29770 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32291 = torch.aten.view %32289, %32290 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29771 = torch.constant.int 4
    %32292 = torch.aten.mul.int %int4_29771, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29772 = torch.constant.int 4096
    %32293 = torch.prim.ListConstruct %32292, %int4096_29772 : (!torch.int, !torch.int) -> !torch.list<int>
    %32294 = torch.aten.view %32248, %32293 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32294, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32295 = torch.aten.mm %32294, %32261 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32295, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29773 = torch.constant.int 4
    %int512_29774 = torch.constant.int 512
    %32296 = torch.prim.ListConstruct %int4_29773, %2482, %int512_29774 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32297 = torch.aten.view %32295, %32296 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29775 = torch.constant.int 4
    %32298 = torch.aten.mul.int %int4_29775, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29776 = torch.constant.int 4096
    %32299 = torch.prim.ListConstruct %32298, %int4096_29776 : (!torch.int, !torch.int) -> !torch.list<int>
    %32300 = torch.aten.view %32249, %32299 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32300, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32301 = torch.aten.mm %32300, %32263 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32301, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29777 = torch.constant.int 4
    %int512_29778 = torch.constant.int 512
    %32302 = torch.prim.ListConstruct %int4_29777, %2482, %int512_29778 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32303 = torch.aten.view %32301, %32302 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29779 = torch.constant.int 4
    %32304 = torch.aten.mul.int %int4_29779, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29780 = torch.constant.int 4096
    %32305 = torch.prim.ListConstruct %32304, %int4096_29780 : (!torch.int, !torch.int) -> !torch.list<int>
    %32306 = torch.aten.view %32250, %32305 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32306, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32307 = torch.aten.mm %32306, %32265 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32307, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29781 = torch.constant.int 4
    %int512_29782 = torch.constant.int 512
    %32308 = torch.prim.ListConstruct %int4_29781, %2482, %int512_29782 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32309 = torch.aten.view %32307, %32308 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_29783 = torch.constant.int 4
    %32310 = torch.aten.mul.int %int4_29783, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29784 = torch.constant.int 4096
    %32311 = torch.prim.ListConstruct %32310, %int4096_29784 : (!torch.int, !torch.int) -> !torch.list<int>
    %32312 = torch.aten.view %32251, %32311 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32312, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32313 = torch.aten.mm %32312, %32267 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %32313, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_29785 = torch.constant.int 4
    %int512_29786 = torch.constant.int 512
    %32314 = torch.prim.ListConstruct %int4_29785, %2482, %int512_29786 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32315 = torch.aten.view %32313, %32314 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %32315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_29787 = torch.constant.int 1
    %int0_29788 = torch.constant.int 0
    %32316 = torch.prim.ListConstruct %int1_29787, %int0_29788 : (!torch.int, !torch.int) -> !torch.list<int>
    %32317 = torch.aten.permute %1176, %32316 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29789 = torch.constant.int 1
    %int0_29790 = torch.constant.int 0
    %32318 = torch.prim.ListConstruct %int1_29789, %int0_29790 : (!torch.int, !torch.int) -> !torch.list<int>
    %32319 = torch.aten.permute %1177, %32318 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29791 = torch.constant.int 1
    %int0_29792 = torch.constant.int 0
    %32320 = torch.prim.ListConstruct %int1_29791, %int0_29792 : (!torch.int, !torch.int) -> !torch.list<int>
    %32321 = torch.aten.permute %1178, %32320 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29793 = torch.constant.int 1
    %int0_29794 = torch.constant.int 0
    %32322 = torch.prim.ListConstruct %int1_29793, %int0_29794 : (!torch.int, !torch.int) -> !torch.list<int>
    %32323 = torch.aten.permute %1179, %32322 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29795 = torch.constant.int 1
    %int0_29796 = torch.constant.int 0
    %32324 = torch.prim.ListConstruct %int1_29795, %int0_29796 : (!torch.int, !torch.int) -> !torch.list<int>
    %32325 = torch.aten.permute %1180, %32324 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29797 = torch.constant.int 1
    %int0_29798 = torch.constant.int 0
    %32326 = torch.prim.ListConstruct %int1_29797, %int0_29798 : (!torch.int, !torch.int) -> !torch.list<int>
    %32327 = torch.aten.permute %1181, %32326 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29799 = torch.constant.int 1
    %int0_29800 = torch.constant.int 0
    %32328 = torch.prim.ListConstruct %int1_29799, %int0_29800 : (!torch.int, !torch.int) -> !torch.list<int>
    %32329 = torch.aten.permute %1182, %32328 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29801 = torch.constant.int 1
    %int0_29802 = torch.constant.int 0
    %32330 = torch.prim.ListConstruct %int1_29801, %int0_29802 : (!torch.int, !torch.int) -> !torch.list<int>
    %32331 = torch.aten.permute %1183, %32330 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_29803 = torch.constant.int 4
    %32332 = torch.aten.mul.int %int4_29803, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29804 = torch.constant.int 4096
    %32333 = torch.prim.ListConstruct %32332, %int4096_29804 : (!torch.int, !torch.int) -> !torch.list<int>
    %32334 = torch.aten.view %32244, %32333 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32334, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32335 = torch.aten.mm %32334, %32317 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32335, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29805 = torch.constant.int 4
    %int128_29806 = torch.constant.int 128
    %32336 = torch.prim.ListConstruct %int4_29805, %2482, %int128_29806 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32337 = torch.aten.view %32335, %32336 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29807 = torch.constant.int 4
    %32338 = torch.aten.mul.int %int4_29807, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29808 = torch.constant.int 4096
    %32339 = torch.prim.ListConstruct %32338, %int4096_29808 : (!torch.int, !torch.int) -> !torch.list<int>
    %32340 = torch.aten.view %32245, %32339 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32340, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32341 = torch.aten.mm %32340, %32319 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32341, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29809 = torch.constant.int 4
    %int128_29810 = torch.constant.int 128
    %32342 = torch.prim.ListConstruct %int4_29809, %2482, %int128_29810 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32343 = torch.aten.view %32341, %32342 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29811 = torch.constant.int 4
    %32344 = torch.aten.mul.int %int4_29811, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29812 = torch.constant.int 4096
    %32345 = torch.prim.ListConstruct %32344, %int4096_29812 : (!torch.int, !torch.int) -> !torch.list<int>
    %32346 = torch.aten.view %32246, %32345 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32346, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32347 = torch.aten.mm %32346, %32321 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32347, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29813 = torch.constant.int 4
    %int128_29814 = torch.constant.int 128
    %32348 = torch.prim.ListConstruct %int4_29813, %2482, %int128_29814 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32349 = torch.aten.view %32347, %32348 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29815 = torch.constant.int 4
    %32350 = torch.aten.mul.int %int4_29815, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29816 = torch.constant.int 4096
    %32351 = torch.prim.ListConstruct %32350, %int4096_29816 : (!torch.int, !torch.int) -> !torch.list<int>
    %32352 = torch.aten.view %32247, %32351 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32352, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32353 = torch.aten.mm %32352, %32323 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32353, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29817 = torch.constant.int 4
    %int128_29818 = torch.constant.int 128
    %32354 = torch.prim.ListConstruct %int4_29817, %2482, %int128_29818 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32355 = torch.aten.view %32353, %32354 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29819 = torch.constant.int 4
    %32356 = torch.aten.mul.int %int4_29819, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29820 = torch.constant.int 4096
    %32357 = torch.prim.ListConstruct %32356, %int4096_29820 : (!torch.int, !torch.int) -> !torch.list<int>
    %32358 = torch.aten.view %32248, %32357 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32358, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32359 = torch.aten.mm %32358, %32325 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32359, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29821 = torch.constant.int 4
    %int128_29822 = torch.constant.int 128
    %32360 = torch.prim.ListConstruct %int4_29821, %2482, %int128_29822 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32361 = torch.aten.view %32359, %32360 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29823 = torch.constant.int 4
    %32362 = torch.aten.mul.int %int4_29823, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29824 = torch.constant.int 4096
    %32363 = torch.prim.ListConstruct %32362, %int4096_29824 : (!torch.int, !torch.int) -> !torch.list<int>
    %32364 = torch.aten.view %32249, %32363 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32364, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32365 = torch.aten.mm %32364, %32327 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32365, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29825 = torch.constant.int 4
    %int128_29826 = torch.constant.int 128
    %32366 = torch.prim.ListConstruct %int4_29825, %2482, %int128_29826 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32367 = torch.aten.view %32365, %32366 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29827 = torch.constant.int 4
    %32368 = torch.aten.mul.int %int4_29827, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29828 = torch.constant.int 4096
    %32369 = torch.prim.ListConstruct %32368, %int4096_29828 : (!torch.int, !torch.int) -> !torch.list<int>
    %32370 = torch.aten.view %32250, %32369 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32370, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32371 = torch.aten.mm %32370, %32329 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32371, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29829 = torch.constant.int 4
    %int128_29830 = torch.constant.int 128
    %32372 = torch.prim.ListConstruct %int4_29829, %2482, %int128_29830 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32373 = torch.aten.view %32371, %32372 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29831 = torch.constant.int 4
    %32374 = torch.aten.mul.int %int4_29831, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29832 = torch.constant.int 4096
    %32375 = torch.prim.ListConstruct %32374, %int4096_29832 : (!torch.int, !torch.int) -> !torch.list<int>
    %32376 = torch.aten.view %32251, %32375 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32376, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32377 = torch.aten.mm %32376, %32331 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32377, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29833 = torch.constant.int 4
    %int128_29834 = torch.constant.int 128
    %32378 = torch.prim.ListConstruct %int4_29833, %2482, %int128_29834 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32379 = torch.aten.view %32377, %32378 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_29835 = torch.constant.int 1
    %int0_29836 = torch.constant.int 0
    %32380 = torch.prim.ListConstruct %int1_29835, %int0_29836 : (!torch.int, !torch.int) -> !torch.list<int>
    %32381 = torch.aten.permute %1184, %32380 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29837 = torch.constant.int 1
    %int0_29838 = torch.constant.int 0
    %32382 = torch.prim.ListConstruct %int1_29837, %int0_29838 : (!torch.int, !torch.int) -> !torch.list<int>
    %32383 = torch.aten.permute %1185, %32382 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29839 = torch.constant.int 1
    %int0_29840 = torch.constant.int 0
    %32384 = torch.prim.ListConstruct %int1_29839, %int0_29840 : (!torch.int, !torch.int) -> !torch.list<int>
    %32385 = torch.aten.permute %1186, %32384 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29841 = torch.constant.int 1
    %int0_29842 = torch.constant.int 0
    %32386 = torch.prim.ListConstruct %int1_29841, %int0_29842 : (!torch.int, !torch.int) -> !torch.list<int>
    %32387 = torch.aten.permute %1187, %32386 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29843 = torch.constant.int 1
    %int0_29844 = torch.constant.int 0
    %32388 = torch.prim.ListConstruct %int1_29843, %int0_29844 : (!torch.int, !torch.int) -> !torch.list<int>
    %32389 = torch.aten.permute %1188, %32388 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29845 = torch.constant.int 1
    %int0_29846 = torch.constant.int 0
    %32390 = torch.prim.ListConstruct %int1_29845, %int0_29846 : (!torch.int, !torch.int) -> !torch.list<int>
    %32391 = torch.aten.permute %1189, %32390 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29847 = torch.constant.int 1
    %int0_29848 = torch.constant.int 0
    %32392 = torch.prim.ListConstruct %int1_29847, %int0_29848 : (!torch.int, !torch.int) -> !torch.list<int>
    %32393 = torch.aten.permute %1190, %32392 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_29849 = torch.constant.int 1
    %int0_29850 = torch.constant.int 0
    %32394 = torch.prim.ListConstruct %int1_29849, %int0_29850 : (!torch.int, !torch.int) -> !torch.list<int>
    %32395 = torch.aten.permute %1191, %32394 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_29851 = torch.constant.int 4
    %32396 = torch.aten.mul.int %int4_29851, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29852 = torch.constant.int 4096
    %32397 = torch.prim.ListConstruct %32396, %int4096_29852 : (!torch.int, !torch.int) -> !torch.list<int>
    %32398 = torch.aten.view %32244, %32397 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32398, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32399 = torch.aten.mm %32398, %32381 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32399, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29853 = torch.constant.int 4
    %int128_29854 = torch.constant.int 128
    %32400 = torch.prim.ListConstruct %int4_29853, %2482, %int128_29854 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32401 = torch.aten.view %32399, %32400 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29855 = torch.constant.int 4
    %32402 = torch.aten.mul.int %int4_29855, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29856 = torch.constant.int 4096
    %32403 = torch.prim.ListConstruct %32402, %int4096_29856 : (!torch.int, !torch.int) -> !torch.list<int>
    %32404 = torch.aten.view %32245, %32403 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32404, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32405 = torch.aten.mm %32404, %32383 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32405, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29857 = torch.constant.int 4
    %int128_29858 = torch.constant.int 128
    %32406 = torch.prim.ListConstruct %int4_29857, %2482, %int128_29858 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32407 = torch.aten.view %32405, %32406 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29859 = torch.constant.int 4
    %32408 = torch.aten.mul.int %int4_29859, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29860 = torch.constant.int 4096
    %32409 = torch.prim.ListConstruct %32408, %int4096_29860 : (!torch.int, !torch.int) -> !torch.list<int>
    %32410 = torch.aten.view %32246, %32409 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32410, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32411 = torch.aten.mm %32410, %32385 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32411, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29861 = torch.constant.int 4
    %int128_29862 = torch.constant.int 128
    %32412 = torch.prim.ListConstruct %int4_29861, %2482, %int128_29862 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32413 = torch.aten.view %32411, %32412 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29863 = torch.constant.int 4
    %32414 = torch.aten.mul.int %int4_29863, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29864 = torch.constant.int 4096
    %32415 = torch.prim.ListConstruct %32414, %int4096_29864 : (!torch.int, !torch.int) -> !torch.list<int>
    %32416 = torch.aten.view %32247, %32415 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32416, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32417 = torch.aten.mm %32416, %32387 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32417, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29865 = torch.constant.int 4
    %int128_29866 = torch.constant.int 128
    %32418 = torch.prim.ListConstruct %int4_29865, %2482, %int128_29866 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32419 = torch.aten.view %32417, %32418 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29867 = torch.constant.int 4
    %32420 = torch.aten.mul.int %int4_29867, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29868 = torch.constant.int 4096
    %32421 = torch.prim.ListConstruct %32420, %int4096_29868 : (!torch.int, !torch.int) -> !torch.list<int>
    %32422 = torch.aten.view %32248, %32421 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32422, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32423 = torch.aten.mm %32422, %32389 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32423, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29869 = torch.constant.int 4
    %int128_29870 = torch.constant.int 128
    %32424 = torch.prim.ListConstruct %int4_29869, %2482, %int128_29870 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32425 = torch.aten.view %32423, %32424 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29871 = torch.constant.int 4
    %32426 = torch.aten.mul.int %int4_29871, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29872 = torch.constant.int 4096
    %32427 = torch.prim.ListConstruct %32426, %int4096_29872 : (!torch.int, !torch.int) -> !torch.list<int>
    %32428 = torch.aten.view %32249, %32427 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32428, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32429 = torch.aten.mm %32428, %32391 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32429, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29873 = torch.constant.int 4
    %int128_29874 = torch.constant.int 128
    %32430 = torch.prim.ListConstruct %int4_29873, %2482, %int128_29874 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32431 = torch.aten.view %32429, %32430 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29875 = torch.constant.int 4
    %32432 = torch.aten.mul.int %int4_29875, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29876 = torch.constant.int 4096
    %32433 = torch.prim.ListConstruct %32432, %int4096_29876 : (!torch.int, !torch.int) -> !torch.list<int>
    %32434 = torch.aten.view %32250, %32433 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32434, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32435 = torch.aten.mm %32434, %32393 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32435, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29877 = torch.constant.int 4
    %int128_29878 = torch.constant.int 128
    %32436 = torch.prim.ListConstruct %int4_29877, %2482, %int128_29878 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32437 = torch.aten.view %32435, %32436 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29879 = torch.constant.int 4
    %32438 = torch.aten.mul.int %int4_29879, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_29880 = torch.constant.int 4096
    %32439 = torch.prim.ListConstruct %32438, %int4096_29880 : (!torch.int, !torch.int) -> !torch.list<int>
    %32440 = torch.aten.view %32251, %32439 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %32440, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %32441 = torch.aten.mm %32440, %32395 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %32441, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_29881 = torch.constant.int 4
    %int128_29882 = torch.constant.int 128
    %32442 = torch.prim.ListConstruct %int4_29881, %2482, %int128_29882 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32443 = torch.aten.view %32441, %32442 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %32443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_29883 = torch.constant.int 4
    %int4_29884 = torch.constant.int 4
    %int128_29885 = torch.constant.int 128
    %32444 = torch.prim.ListConstruct %int4_29883, %2482, %int4_29884, %int128_29885 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32445 = torch.aten.view %32273, %32444 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29886 = torch.constant.int 4
    %int4_29887 = torch.constant.int 4
    %int128_29888 = torch.constant.int 128
    %32446 = torch.prim.ListConstruct %int4_29886, %2482, %int4_29887, %int128_29888 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32447 = torch.aten.view %32279, %32446 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29889 = torch.constant.int 4
    %int4_29890 = torch.constant.int 4
    %int128_29891 = torch.constant.int 128
    %32448 = torch.prim.ListConstruct %int4_29889, %2482, %int4_29890, %int128_29891 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32449 = torch.aten.view %32285, %32448 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29892 = torch.constant.int 4
    %int4_29893 = torch.constant.int 4
    %int128_29894 = torch.constant.int 128
    %32450 = torch.prim.ListConstruct %int4_29892, %2482, %int4_29893, %int128_29894 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32451 = torch.aten.view %32291, %32450 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29895 = torch.constant.int 4
    %int4_29896 = torch.constant.int 4
    %int128_29897 = torch.constant.int 128
    %32452 = torch.prim.ListConstruct %int4_29895, %2482, %int4_29896, %int128_29897 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32453 = torch.aten.view %32297, %32452 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29898 = torch.constant.int 4
    %int4_29899 = torch.constant.int 4
    %int128_29900 = torch.constant.int 128
    %32454 = torch.prim.ListConstruct %int4_29898, %2482, %int4_29899, %int128_29900 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32455 = torch.aten.view %32303, %32454 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29901 = torch.constant.int 4
    %int4_29902 = torch.constant.int 4
    %int128_29903 = torch.constant.int 128
    %32456 = torch.prim.ListConstruct %int4_29901, %2482, %int4_29902, %int128_29903 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32457 = torch.aten.view %32309, %32456 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29904 = torch.constant.int 4
    %int4_29905 = torch.constant.int 4
    %int128_29906 = torch.constant.int 128
    %32458 = torch.prim.ListConstruct %int4_29904, %2482, %int4_29905, %int128_29906 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32459 = torch.aten.view %32315, %32458 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_29907 = torch.constant.int 4
    %int1_29908 = torch.constant.int 1
    %int128_29909 = torch.constant.int 128
    %32460 = torch.prim.ListConstruct %int4_29907, %2482, %int1_29908, %int128_29909 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32461 = torch.aten.view %32337, %32460 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29910 = torch.constant.int 4
    %int1_29911 = torch.constant.int 1
    %int128_29912 = torch.constant.int 128
    %32462 = torch.prim.ListConstruct %int4_29910, %2482, %int1_29911, %int128_29912 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32463 = torch.aten.view %32343, %32462 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29913 = torch.constant.int 4
    %int1_29914 = torch.constant.int 1
    %int128_29915 = torch.constant.int 128
    %32464 = torch.prim.ListConstruct %int4_29913, %2482, %int1_29914, %int128_29915 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32465 = torch.aten.view %32349, %32464 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29916 = torch.constant.int 4
    %int1_29917 = torch.constant.int 1
    %int128_29918 = torch.constant.int 128
    %32466 = torch.prim.ListConstruct %int4_29916, %2482, %int1_29917, %int128_29918 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32467 = torch.aten.view %32355, %32466 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29919 = torch.constant.int 4
    %int1_29920 = torch.constant.int 1
    %int128_29921 = torch.constant.int 128
    %32468 = torch.prim.ListConstruct %int4_29919, %2482, %int1_29920, %int128_29921 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32469 = torch.aten.view %32361, %32468 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29922 = torch.constant.int 4
    %int1_29923 = torch.constant.int 1
    %int128_29924 = torch.constant.int 128
    %32470 = torch.prim.ListConstruct %int4_29922, %2482, %int1_29923, %int128_29924 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32471 = torch.aten.view %32367, %32470 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29925 = torch.constant.int 4
    %int1_29926 = torch.constant.int 1
    %int128_29927 = torch.constant.int 128
    %32472 = torch.prim.ListConstruct %int4_29925, %2482, %int1_29926, %int128_29927 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32473 = torch.aten.view %32373, %32472 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29928 = torch.constant.int 4
    %int1_29929 = torch.constant.int 1
    %int128_29930 = torch.constant.int 128
    %32474 = torch.prim.ListConstruct %int4_29928, %2482, %int1_29929, %int128_29930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32475 = torch.aten.view %32379, %32474 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29931 = torch.constant.int 4
    %int1_29932 = torch.constant.int 1
    %int128_29933 = torch.constant.int 128
    %32476 = torch.prim.ListConstruct %int4_29931, %2482, %int1_29932, %int128_29933 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32477 = torch.aten.view %32401, %32476 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29934 = torch.constant.int 4
    %int1_29935 = torch.constant.int 1
    %int128_29936 = torch.constant.int 128
    %32478 = torch.prim.ListConstruct %int4_29934, %2482, %int1_29935, %int128_29936 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32479 = torch.aten.view %32407, %32478 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29937 = torch.constant.int 4
    %int1_29938 = torch.constant.int 1
    %int128_29939 = torch.constant.int 128
    %32480 = torch.prim.ListConstruct %int4_29937, %2482, %int1_29938, %int128_29939 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32481 = torch.aten.view %32413, %32480 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29940 = torch.constant.int 4
    %int1_29941 = torch.constant.int 1
    %int128_29942 = torch.constant.int 128
    %32482 = torch.prim.ListConstruct %int4_29940, %2482, %int1_29941, %int128_29942 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32483 = torch.aten.view %32419, %32482 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29943 = torch.constant.int 4
    %int1_29944 = torch.constant.int 1
    %int128_29945 = torch.constant.int 128
    %32484 = torch.prim.ListConstruct %int4_29943, %2482, %int1_29944, %int128_29945 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32485 = torch.aten.view %32425, %32484 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29946 = torch.constant.int 4
    %int1_29947 = torch.constant.int 1
    %int128_29948 = torch.constant.int 128
    %32486 = torch.prim.ListConstruct %int4_29946, %2482, %int1_29947, %int128_29948 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32487 = torch.aten.view %32431, %32486 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29949 = torch.constant.int 4
    %int1_29950 = torch.constant.int 1
    %int128_29951 = torch.constant.int 128
    %32488 = torch.prim.ListConstruct %int4_29949, %2482, %int1_29950, %int128_29951 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32489 = torch.aten.view %32437, %32488 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_29952 = torch.constant.int 4
    %int1_29953 = torch.constant.int 1
    %int128_29954 = torch.constant.int 128
    %32490 = torch.prim.ListConstruct %int4_29952, %2482, %int1_29953, %int128_29954 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32491 = torch.aten.view %32443, %32490 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_29955 = torch.constant.int 131072
    %none_29956 = torch.constant.none
    %none_29957 = torch.constant.none
    %cpu_29958 = torch.constant.device "cpu"
    %false_29959 = torch.constant.bool false
    %32492 = torch.aten.arange %int131072_29955, %none_29956, %none_29957, %cpu_29958, %false_29959 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_29960 = torch.constant.int 0
    %int128_29961 = torch.constant.int 128
    %int2_29962 = torch.constant.int 2
    %none_29963 = torch.constant.none
    %none_29964 = torch.constant.none
    %cpu_29965 = torch.constant.device "cpu"
    %false_29966 = torch.constant.bool false
    %32493 = torch.aten.arange.start_step %int0_29960, %int128_29961, %int2_29962, %none_29963, %none_29964, %cpu_29965, %false_29966 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_29967 = torch.constant.int 0
    %int0_29968 = torch.constant.int 0
    %int64_29969 = torch.constant.int 64
    %int1_29970 = torch.constant.int 1
    %32494 = torch.aten.slice.Tensor %32493, %int0_29967, %int0_29968, %int64_29969, %int1_29970 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_29971 = torch.constant.int 6
    %32495 = torch.prims.convert_element_type %32494, %int6_29971 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_29972 = torch.constant.int 128
    %32496 = torch.aten.div.Scalar %32495, %int128_29972 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_29973 = torch.constant.float 5.000000e+05
    %32497 = torch.aten.pow.Scalar %float5.000000e05_29973, %32496 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %32498 = torch.aten.reciprocal %32497 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_29974 = torch.constant.float 1.000000e+00
    %32499 = torch.aten.mul.Scalar %32498, %float1.000000e00_29974 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_29975 = torch.constant.int 131072
    %int1_29976 = torch.constant.int 1
    %32500 = torch.prim.ListConstruct %int131072_29975, %int1_29976 : (!torch.int, !torch.int) -> !torch.list<int>
    %32501 = torch.aten.view %32492, %32500 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %32502 = torch.aten.mul.Tensor %32501, %32499 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %32503 = torch.aten.cos %32502 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %32504 = torch.aten.sin %32502 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %32505 = torch.aten.complex %32503, %32504 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %32506 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32507 = flow.tensor.transfer %32506 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %32508 = torch_c.from_builtin_tensor %32507 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32509 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32510 = flow.tensor.transfer %32509 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %32511 = torch_c.from_builtin_tensor %32510 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32512 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32513 = flow.tensor.transfer %32512 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %32514 = torch_c.from_builtin_tensor %32513 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32515 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32516 = flow.tensor.transfer %32515 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %32517 = torch_c.from_builtin_tensor %32516 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32518 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32519 = flow.tensor.transfer %32518 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %32520 = torch_c.from_builtin_tensor %32519 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32521 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32522 = flow.tensor.transfer %32521 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %32523 = torch_c.from_builtin_tensor %32522 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32524 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32525 = flow.tensor.transfer %32524 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %32526 = torch_c.from_builtin_tensor %32525 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32527 = torch_c.to_builtin_tensor %32505 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32528 = flow.tensor.transfer %32527 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %32529 = torch_c.from_builtin_tensor %32528 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_29977 = torch.constant.int 1
    %32530 = torch.aten.size.int %32273, %int1_29977 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_29978 = torch.constant.int 0
    %32531 = torch.aten.add.int %int0_29978, %32530 : !torch.int, !torch.int -> !torch.int
    %int0_29979 = torch.constant.int 0
    %int0_29980 = torch.constant.int 0
    %int1_29981 = torch.constant.int 1
    %32532 = torch.aten.slice.Tensor %32508, %int0_29979, %int0_29980, %32531, %int1_29981 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32532, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_29982 = torch.constant.int 1
    %int0_29983 = torch.constant.int 0
    %int9223372036854775807_29984 = torch.constant.int 9223372036854775807
    %int1_29985 = torch.constant.int 1
    %32533 = torch.aten.slice.Tensor %32532, %int1_29982, %int0_29983, %int9223372036854775807_29984, %int1_29985 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32533, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_29986 = torch.constant.int 0
    %32534 = torch.aten.unsqueeze %32533, %int0_29986 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32534, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_29987 = torch.constant.int 2
    %32535 = torch.aten.unsqueeze %32534, %int2_29987 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32535, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_29988 = torch.constant.int 3
    %int0_29989 = torch.constant.int 0
    %int9223372036854775807_29990 = torch.constant.int 9223372036854775807
    %int1_29991 = torch.constant.int 1
    %32536 = torch.aten.slice.Tensor %32535, %int3_29988, %int0_29989, %int9223372036854775807_29990, %int1_29991 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32536, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32537 = torch_c.to_builtin_tensor %32445 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_29992 = arith.constant 1 : index
    %dim_29993 = tensor.dim %32537, %c1_29992 : tensor<4x?x4x128xf16>
    %32538 = flow.tensor.bitcast %32537 : tensor<4x?x4x128xf16>{%dim_29993} -> tensor<4x?x4x64xcomplex<f16>>{%dim_29993}
    %32539 = torch_c.from_builtin_tensor %32538 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32540 = torch.aten.mul.Tensor %32539, %32536 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32541 = torch_c.to_builtin_tensor %32540 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_29994 = arith.constant 1 : index
    %dim_29995 = tensor.dim %32541, %c1_29994 : tensor<4x?x4x64xcomplex<f32>>
    %32542 = flow.tensor.bitcast %32541 : tensor<4x?x4x64xcomplex<f32>>{%dim_29995} -> tensor<4x?x4x128xf32>{%dim_29995}
    %32543 = torch_c.from_builtin_tensor %32542 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_29996 = torch.constant.int 5
    %32544 = torch.prims.convert_element_type %32543, %int5_29996 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_29997 = torch.constant.int 1
    %32545 = torch.aten.size.int %32279, %int1_29997 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_29998 = torch.constant.int 0
    %32546 = torch.aten.add.int %int0_29998, %32545 : !torch.int, !torch.int -> !torch.int
    %int0_29999 = torch.constant.int 0
    %int0_30000 = torch.constant.int 0
    %int1_30001 = torch.constant.int 1
    %32547 = torch.aten.slice.Tensor %32511, %int0_29999, %int0_30000, %32546, %int1_30001 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32547, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30002 = torch.constant.int 1
    %int0_30003 = torch.constant.int 0
    %int9223372036854775807_30004 = torch.constant.int 9223372036854775807
    %int1_30005 = torch.constant.int 1
    %32548 = torch.aten.slice.Tensor %32547, %int1_30002, %int0_30003, %int9223372036854775807_30004, %int1_30005 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32548, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30006 = torch.constant.int 0
    %32549 = torch.aten.unsqueeze %32548, %int0_30006 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32549, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30007 = torch.constant.int 2
    %32550 = torch.aten.unsqueeze %32549, %int2_30007 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32550, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30008 = torch.constant.int 3
    %int0_30009 = torch.constant.int 0
    %int9223372036854775807_30010 = torch.constant.int 9223372036854775807
    %int1_30011 = torch.constant.int 1
    %32551 = torch.aten.slice.Tensor %32550, %int3_30008, %int0_30009, %int9223372036854775807_30010, %int1_30011 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32551, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32552 = torch_c.to_builtin_tensor %32447 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30012 = arith.constant 1 : index
    %dim_30013 = tensor.dim %32552, %c1_30012 : tensor<4x?x4x128xf16>
    %32553 = flow.tensor.bitcast %32552 : tensor<4x?x4x128xf16>{%dim_30013} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30013}
    %32554 = torch_c.from_builtin_tensor %32553 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32555 = torch.aten.mul.Tensor %32554, %32551 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32556 = torch_c.to_builtin_tensor %32555 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30014 = arith.constant 1 : index
    %dim_30015 = tensor.dim %32556, %c1_30014 : tensor<4x?x4x64xcomplex<f32>>
    %32557 = flow.tensor.bitcast %32556 : tensor<4x?x4x64xcomplex<f32>>{%dim_30015} -> tensor<4x?x4x128xf32>{%dim_30015}
    %32558 = torch_c.from_builtin_tensor %32557 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30016 = torch.constant.int 5
    %32559 = torch.prims.convert_element_type %32558, %int5_30016 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30017 = torch.constant.int 1
    %32560 = torch.aten.size.int %32285, %int1_30017 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30018 = torch.constant.int 0
    %32561 = torch.aten.add.int %int0_30018, %32560 : !torch.int, !torch.int -> !torch.int
    %int0_30019 = torch.constant.int 0
    %int0_30020 = torch.constant.int 0
    %int1_30021 = torch.constant.int 1
    %32562 = torch.aten.slice.Tensor %32514, %int0_30019, %int0_30020, %32561, %int1_30021 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32562, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30022 = torch.constant.int 1
    %int0_30023 = torch.constant.int 0
    %int9223372036854775807_30024 = torch.constant.int 9223372036854775807
    %int1_30025 = torch.constant.int 1
    %32563 = torch.aten.slice.Tensor %32562, %int1_30022, %int0_30023, %int9223372036854775807_30024, %int1_30025 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32563, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30026 = torch.constant.int 0
    %32564 = torch.aten.unsqueeze %32563, %int0_30026 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32564, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30027 = torch.constant.int 2
    %32565 = torch.aten.unsqueeze %32564, %int2_30027 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32565, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30028 = torch.constant.int 3
    %int0_30029 = torch.constant.int 0
    %int9223372036854775807_30030 = torch.constant.int 9223372036854775807
    %int1_30031 = torch.constant.int 1
    %32566 = torch.aten.slice.Tensor %32565, %int3_30028, %int0_30029, %int9223372036854775807_30030, %int1_30031 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32566, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32567 = torch_c.to_builtin_tensor %32449 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30032 = arith.constant 1 : index
    %dim_30033 = tensor.dim %32567, %c1_30032 : tensor<4x?x4x128xf16>
    %32568 = flow.tensor.bitcast %32567 : tensor<4x?x4x128xf16>{%dim_30033} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30033}
    %32569 = torch_c.from_builtin_tensor %32568 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32570 = torch.aten.mul.Tensor %32569, %32566 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32571 = torch_c.to_builtin_tensor %32570 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30034 = arith.constant 1 : index
    %dim_30035 = tensor.dim %32571, %c1_30034 : tensor<4x?x4x64xcomplex<f32>>
    %32572 = flow.tensor.bitcast %32571 : tensor<4x?x4x64xcomplex<f32>>{%dim_30035} -> tensor<4x?x4x128xf32>{%dim_30035}
    %32573 = torch_c.from_builtin_tensor %32572 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30036 = torch.constant.int 5
    %32574 = torch.prims.convert_element_type %32573, %int5_30036 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30037 = torch.constant.int 1
    %32575 = torch.aten.size.int %32291, %int1_30037 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30038 = torch.constant.int 0
    %32576 = torch.aten.add.int %int0_30038, %32575 : !torch.int, !torch.int -> !torch.int
    %int0_30039 = torch.constant.int 0
    %int0_30040 = torch.constant.int 0
    %int1_30041 = torch.constant.int 1
    %32577 = torch.aten.slice.Tensor %32517, %int0_30039, %int0_30040, %32576, %int1_30041 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32577, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30042 = torch.constant.int 1
    %int0_30043 = torch.constant.int 0
    %int9223372036854775807_30044 = torch.constant.int 9223372036854775807
    %int1_30045 = torch.constant.int 1
    %32578 = torch.aten.slice.Tensor %32577, %int1_30042, %int0_30043, %int9223372036854775807_30044, %int1_30045 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32578, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30046 = torch.constant.int 0
    %32579 = torch.aten.unsqueeze %32578, %int0_30046 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32579, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30047 = torch.constant.int 2
    %32580 = torch.aten.unsqueeze %32579, %int2_30047 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32580, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30048 = torch.constant.int 3
    %int0_30049 = torch.constant.int 0
    %int9223372036854775807_30050 = torch.constant.int 9223372036854775807
    %int1_30051 = torch.constant.int 1
    %32581 = torch.aten.slice.Tensor %32580, %int3_30048, %int0_30049, %int9223372036854775807_30050, %int1_30051 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32581, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32582 = torch_c.to_builtin_tensor %32451 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30052 = arith.constant 1 : index
    %dim_30053 = tensor.dim %32582, %c1_30052 : tensor<4x?x4x128xf16>
    %32583 = flow.tensor.bitcast %32582 : tensor<4x?x4x128xf16>{%dim_30053} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30053}
    %32584 = torch_c.from_builtin_tensor %32583 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32585 = torch.aten.mul.Tensor %32584, %32581 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32586 = torch_c.to_builtin_tensor %32585 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30054 = arith.constant 1 : index
    %dim_30055 = tensor.dim %32586, %c1_30054 : tensor<4x?x4x64xcomplex<f32>>
    %32587 = flow.tensor.bitcast %32586 : tensor<4x?x4x64xcomplex<f32>>{%dim_30055} -> tensor<4x?x4x128xf32>{%dim_30055}
    %32588 = torch_c.from_builtin_tensor %32587 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30056 = torch.constant.int 5
    %32589 = torch.prims.convert_element_type %32588, %int5_30056 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30057 = torch.constant.int 1
    %32590 = torch.aten.size.int %32297, %int1_30057 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30058 = torch.constant.int 0
    %32591 = torch.aten.add.int %int0_30058, %32590 : !torch.int, !torch.int -> !torch.int
    %int0_30059 = torch.constant.int 0
    %int0_30060 = torch.constant.int 0
    %int1_30061 = torch.constant.int 1
    %32592 = torch.aten.slice.Tensor %32520, %int0_30059, %int0_30060, %32591, %int1_30061 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32592, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30062 = torch.constant.int 1
    %int0_30063 = torch.constant.int 0
    %int9223372036854775807_30064 = torch.constant.int 9223372036854775807
    %int1_30065 = torch.constant.int 1
    %32593 = torch.aten.slice.Tensor %32592, %int1_30062, %int0_30063, %int9223372036854775807_30064, %int1_30065 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32593, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30066 = torch.constant.int 0
    %32594 = torch.aten.unsqueeze %32593, %int0_30066 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32594, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30067 = torch.constant.int 2
    %32595 = torch.aten.unsqueeze %32594, %int2_30067 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32595, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30068 = torch.constant.int 3
    %int0_30069 = torch.constant.int 0
    %int9223372036854775807_30070 = torch.constant.int 9223372036854775807
    %int1_30071 = torch.constant.int 1
    %32596 = torch.aten.slice.Tensor %32595, %int3_30068, %int0_30069, %int9223372036854775807_30070, %int1_30071 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32596, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32597 = torch_c.to_builtin_tensor %32453 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30072 = arith.constant 1 : index
    %dim_30073 = tensor.dim %32597, %c1_30072 : tensor<4x?x4x128xf16>
    %32598 = flow.tensor.bitcast %32597 : tensor<4x?x4x128xf16>{%dim_30073} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30073}
    %32599 = torch_c.from_builtin_tensor %32598 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32600 = torch.aten.mul.Tensor %32599, %32596 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32601 = torch_c.to_builtin_tensor %32600 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30074 = arith.constant 1 : index
    %dim_30075 = tensor.dim %32601, %c1_30074 : tensor<4x?x4x64xcomplex<f32>>
    %32602 = flow.tensor.bitcast %32601 : tensor<4x?x4x64xcomplex<f32>>{%dim_30075} -> tensor<4x?x4x128xf32>{%dim_30075}
    %32603 = torch_c.from_builtin_tensor %32602 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30076 = torch.constant.int 5
    %32604 = torch.prims.convert_element_type %32603, %int5_30076 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30077 = torch.constant.int 1
    %32605 = torch.aten.size.int %32303, %int1_30077 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30078 = torch.constant.int 0
    %32606 = torch.aten.add.int %int0_30078, %32605 : !torch.int, !torch.int -> !torch.int
    %int0_30079 = torch.constant.int 0
    %int0_30080 = torch.constant.int 0
    %int1_30081 = torch.constant.int 1
    %32607 = torch.aten.slice.Tensor %32523, %int0_30079, %int0_30080, %32606, %int1_30081 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32607, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30082 = torch.constant.int 1
    %int0_30083 = torch.constant.int 0
    %int9223372036854775807_30084 = torch.constant.int 9223372036854775807
    %int1_30085 = torch.constant.int 1
    %32608 = torch.aten.slice.Tensor %32607, %int1_30082, %int0_30083, %int9223372036854775807_30084, %int1_30085 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32608, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30086 = torch.constant.int 0
    %32609 = torch.aten.unsqueeze %32608, %int0_30086 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32609, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30087 = torch.constant.int 2
    %32610 = torch.aten.unsqueeze %32609, %int2_30087 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32610, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30088 = torch.constant.int 3
    %int0_30089 = torch.constant.int 0
    %int9223372036854775807_30090 = torch.constant.int 9223372036854775807
    %int1_30091 = torch.constant.int 1
    %32611 = torch.aten.slice.Tensor %32610, %int3_30088, %int0_30089, %int9223372036854775807_30090, %int1_30091 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32611, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32612 = torch_c.to_builtin_tensor %32455 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30092 = arith.constant 1 : index
    %dim_30093 = tensor.dim %32612, %c1_30092 : tensor<4x?x4x128xf16>
    %32613 = flow.tensor.bitcast %32612 : tensor<4x?x4x128xf16>{%dim_30093} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30093}
    %32614 = torch_c.from_builtin_tensor %32613 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32615 = torch.aten.mul.Tensor %32614, %32611 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32616 = torch_c.to_builtin_tensor %32615 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30094 = arith.constant 1 : index
    %dim_30095 = tensor.dim %32616, %c1_30094 : tensor<4x?x4x64xcomplex<f32>>
    %32617 = flow.tensor.bitcast %32616 : tensor<4x?x4x64xcomplex<f32>>{%dim_30095} -> tensor<4x?x4x128xf32>{%dim_30095}
    %32618 = torch_c.from_builtin_tensor %32617 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30096 = torch.constant.int 5
    %32619 = torch.prims.convert_element_type %32618, %int5_30096 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30097 = torch.constant.int 1
    %32620 = torch.aten.size.int %32309, %int1_30097 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30098 = torch.constant.int 0
    %32621 = torch.aten.add.int %int0_30098, %32620 : !torch.int, !torch.int -> !torch.int
    %int0_30099 = torch.constant.int 0
    %int0_30100 = torch.constant.int 0
    %int1_30101 = torch.constant.int 1
    %32622 = torch.aten.slice.Tensor %32526, %int0_30099, %int0_30100, %32621, %int1_30101 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32622, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30102 = torch.constant.int 1
    %int0_30103 = torch.constant.int 0
    %int9223372036854775807_30104 = torch.constant.int 9223372036854775807
    %int1_30105 = torch.constant.int 1
    %32623 = torch.aten.slice.Tensor %32622, %int1_30102, %int0_30103, %int9223372036854775807_30104, %int1_30105 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32623, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30106 = torch.constant.int 0
    %32624 = torch.aten.unsqueeze %32623, %int0_30106 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32624, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30107 = torch.constant.int 2
    %32625 = torch.aten.unsqueeze %32624, %int2_30107 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32625, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30108 = torch.constant.int 3
    %int0_30109 = torch.constant.int 0
    %int9223372036854775807_30110 = torch.constant.int 9223372036854775807
    %int1_30111 = torch.constant.int 1
    %32626 = torch.aten.slice.Tensor %32625, %int3_30108, %int0_30109, %int9223372036854775807_30110, %int1_30111 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32626, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32627 = torch_c.to_builtin_tensor %32457 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30112 = arith.constant 1 : index
    %dim_30113 = tensor.dim %32627, %c1_30112 : tensor<4x?x4x128xf16>
    %32628 = flow.tensor.bitcast %32627 : tensor<4x?x4x128xf16>{%dim_30113} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30113}
    %32629 = torch_c.from_builtin_tensor %32628 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32630 = torch.aten.mul.Tensor %32629, %32626 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32631 = torch_c.to_builtin_tensor %32630 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30114 = arith.constant 1 : index
    %dim_30115 = tensor.dim %32631, %c1_30114 : tensor<4x?x4x64xcomplex<f32>>
    %32632 = flow.tensor.bitcast %32631 : tensor<4x?x4x64xcomplex<f32>>{%dim_30115} -> tensor<4x?x4x128xf32>{%dim_30115}
    %32633 = torch_c.from_builtin_tensor %32632 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30116 = torch.constant.int 5
    %32634 = torch.prims.convert_element_type %32633, %int5_30116 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30117 = torch.constant.int 1
    %32635 = torch.aten.size.int %32315, %int1_30117 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_30118 = torch.constant.int 0
    %32636 = torch.aten.add.int %int0_30118, %32635 : !torch.int, !torch.int -> !torch.int
    %int0_30119 = torch.constant.int 0
    %int0_30120 = torch.constant.int 0
    %int1_30121 = torch.constant.int 1
    %32637 = torch.aten.slice.Tensor %32529, %int0_30119, %int0_30120, %32636, %int1_30121 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32637, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30122 = torch.constant.int 1
    %int0_30123 = torch.constant.int 0
    %int9223372036854775807_30124 = torch.constant.int 9223372036854775807
    %int1_30125 = torch.constant.int 1
    %32638 = torch.aten.slice.Tensor %32637, %int1_30122, %int0_30123, %int9223372036854775807_30124, %int1_30125 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32638, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30126 = torch.constant.int 0
    %32639 = torch.aten.unsqueeze %32638, %int0_30126 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32639, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30127 = torch.constant.int 2
    %32640 = torch.aten.unsqueeze %32639, %int2_30127 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32640, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30128 = torch.constant.int 3
    %int0_30129 = torch.constant.int 0
    %int9223372036854775807_30130 = torch.constant.int 9223372036854775807
    %int1_30131 = torch.constant.int 1
    %32641 = torch.aten.slice.Tensor %32640, %int3_30128, %int0_30129, %int9223372036854775807_30130, %int1_30131 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32641, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32642 = torch_c.to_builtin_tensor %32459 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_30132 = arith.constant 1 : index
    %dim_30133 = tensor.dim %32642, %c1_30132 : tensor<4x?x4x128xf16>
    %32643 = flow.tensor.bitcast %32642 : tensor<4x?x4x128xf16>{%dim_30133} -> tensor<4x?x4x64xcomplex<f16>>{%dim_30133}
    %32644 = torch_c.from_builtin_tensor %32643 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %32644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %32645 = torch.aten.mul.Tensor %32644, %32641 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %32645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %32646 = torch_c.to_builtin_tensor %32645 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_30134 = arith.constant 1 : index
    %dim_30135 = tensor.dim %32646, %c1_30134 : tensor<4x?x4x64xcomplex<f32>>
    %32647 = flow.tensor.bitcast %32646 : tensor<4x?x4x64xcomplex<f32>>{%dim_30135} -> tensor<4x?x4x128xf32>{%dim_30135}
    %32648 = torch_c.from_builtin_tensor %32647 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %32648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_30136 = torch.constant.int 5
    %32649 = torch.prims.convert_element_type %32648, %int5_30136 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %32649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_30137 = torch.constant.int 131072
    %none_30138 = torch.constant.none
    %none_30139 = torch.constant.none
    %cpu_30140 = torch.constant.device "cpu"
    %false_30141 = torch.constant.bool false
    %32650 = torch.aten.arange %int131072_30137, %none_30138, %none_30139, %cpu_30140, %false_30141 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_30142 = torch.constant.int 0
    %int128_30143 = torch.constant.int 128
    %int2_30144 = torch.constant.int 2
    %none_30145 = torch.constant.none
    %none_30146 = torch.constant.none
    %cpu_30147 = torch.constant.device "cpu"
    %false_30148 = torch.constant.bool false
    %32651 = torch.aten.arange.start_step %int0_30142, %int128_30143, %int2_30144, %none_30145, %none_30146, %cpu_30147, %false_30148 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_30149 = torch.constant.int 0
    %int0_30150 = torch.constant.int 0
    %int64_30151 = torch.constant.int 64
    %int1_30152 = torch.constant.int 1
    %32652 = torch.aten.slice.Tensor %32651, %int0_30149, %int0_30150, %int64_30151, %int1_30152 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_30153 = torch.constant.int 6
    %32653 = torch.prims.convert_element_type %32652, %int6_30153 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_30154 = torch.constant.int 128
    %32654 = torch.aten.div.Scalar %32653, %int128_30154 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_30155 = torch.constant.float 5.000000e+05
    %32655 = torch.aten.pow.Scalar %float5.000000e05_30155, %32654 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %32656 = torch.aten.reciprocal %32655 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_30156 = torch.constant.float 1.000000e+00
    %32657 = torch.aten.mul.Scalar %32656, %float1.000000e00_30156 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_30157 = torch.constant.int 131072
    %int1_30158 = torch.constant.int 1
    %32658 = torch.prim.ListConstruct %int131072_30157, %int1_30158 : (!torch.int, !torch.int) -> !torch.list<int>
    %32659 = torch.aten.view %32650, %32658 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %32660 = torch.aten.mul.Tensor %32659, %32657 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %32661 = torch.aten.cos %32660 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %32662 = torch.aten.sin %32660 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %32663 = torch.aten.complex %32661, %32662 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %32664 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32665 = flow.tensor.transfer %32664 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %32666 = torch_c.from_builtin_tensor %32665 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32667 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32668 = flow.tensor.transfer %32667 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %32669 = torch_c.from_builtin_tensor %32668 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32670 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32671 = flow.tensor.transfer %32670 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %32672 = torch_c.from_builtin_tensor %32671 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32673 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32674 = flow.tensor.transfer %32673 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %32675 = torch_c.from_builtin_tensor %32674 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32676 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32677 = flow.tensor.transfer %32676 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %32678 = torch_c.from_builtin_tensor %32677 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32679 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32680 = flow.tensor.transfer %32679 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %32681 = torch_c.from_builtin_tensor %32680 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32682 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32683 = flow.tensor.transfer %32682 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %32684 = torch_c.from_builtin_tensor %32683 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %32685 = torch_c.to_builtin_tensor %32663 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %32686 = flow.tensor.transfer %32685 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %32687 = torch_c.from_builtin_tensor %32686 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_30159 = torch.constant.int 1
    %32688 = torch.aten.size.int %32337, %int1_30159 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30160 = torch.constant.int 0
    %32689 = torch.aten.add.int %int0_30160, %32688 : !torch.int, !torch.int -> !torch.int
    %int0_30161 = torch.constant.int 0
    %int0_30162 = torch.constant.int 0
    %int1_30163 = torch.constant.int 1
    %32690 = torch.aten.slice.Tensor %32666, %int0_30161, %int0_30162, %32689, %int1_30163 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32690, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30164 = torch.constant.int 1
    %int0_30165 = torch.constant.int 0
    %int9223372036854775807_30166 = torch.constant.int 9223372036854775807
    %int1_30167 = torch.constant.int 1
    %32691 = torch.aten.slice.Tensor %32690, %int1_30164, %int0_30165, %int9223372036854775807_30166, %int1_30167 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32691, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30168 = torch.constant.int 0
    %32692 = torch.aten.unsqueeze %32691, %int0_30168 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32692, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30169 = torch.constant.int 2
    %32693 = torch.aten.unsqueeze %32692, %int2_30169 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32693, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30170 = torch.constant.int 3
    %int0_30171 = torch.constant.int 0
    %int9223372036854775807_30172 = torch.constant.int 9223372036854775807
    %int1_30173 = torch.constant.int 1
    %32694 = torch.aten.slice.Tensor %32693, %int3_30170, %int0_30171, %int9223372036854775807_30172, %int1_30173 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32694, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32695 = torch_c.to_builtin_tensor %32461 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30174 = arith.constant 1 : index
    %dim_30175 = tensor.dim %32695, %c1_30174 : tensor<4x?x1x128xf16>
    %32696 = flow.tensor.bitcast %32695 : tensor<4x?x1x128xf16>{%dim_30175} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30175}
    %32697 = torch_c.from_builtin_tensor %32696 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32698 = torch.aten.mul.Tensor %32697, %32694 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32699 = torch_c.to_builtin_tensor %32698 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30176 = arith.constant 1 : index
    %dim_30177 = tensor.dim %32699, %c1_30176 : tensor<4x?x1x64xcomplex<f32>>
    %32700 = flow.tensor.bitcast %32699 : tensor<4x?x1x64xcomplex<f32>>{%dim_30177} -> tensor<4x?x1x128xf32>{%dim_30177}
    %32701 = torch_c.from_builtin_tensor %32700 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30178 = torch.constant.int 5
    %32702 = torch.prims.convert_element_type %32701, %int5_30178 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30179 = torch.constant.int 1
    %32703 = torch.aten.size.int %32343, %int1_30179 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30180 = torch.constant.int 0
    %32704 = torch.aten.add.int %int0_30180, %32703 : !torch.int, !torch.int -> !torch.int
    %int0_30181 = torch.constant.int 0
    %int0_30182 = torch.constant.int 0
    %int1_30183 = torch.constant.int 1
    %32705 = torch.aten.slice.Tensor %32669, %int0_30181, %int0_30182, %32704, %int1_30183 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32705, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30184 = torch.constant.int 1
    %int0_30185 = torch.constant.int 0
    %int9223372036854775807_30186 = torch.constant.int 9223372036854775807
    %int1_30187 = torch.constant.int 1
    %32706 = torch.aten.slice.Tensor %32705, %int1_30184, %int0_30185, %int9223372036854775807_30186, %int1_30187 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32706, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30188 = torch.constant.int 0
    %32707 = torch.aten.unsqueeze %32706, %int0_30188 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32707, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30189 = torch.constant.int 2
    %32708 = torch.aten.unsqueeze %32707, %int2_30189 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32708, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30190 = torch.constant.int 3
    %int0_30191 = torch.constant.int 0
    %int9223372036854775807_30192 = torch.constant.int 9223372036854775807
    %int1_30193 = torch.constant.int 1
    %32709 = torch.aten.slice.Tensor %32708, %int3_30190, %int0_30191, %int9223372036854775807_30192, %int1_30193 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32709, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32710 = torch_c.to_builtin_tensor %32463 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30194 = arith.constant 1 : index
    %dim_30195 = tensor.dim %32710, %c1_30194 : tensor<4x?x1x128xf16>
    %32711 = flow.tensor.bitcast %32710 : tensor<4x?x1x128xf16>{%dim_30195} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30195}
    %32712 = torch_c.from_builtin_tensor %32711 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32713 = torch.aten.mul.Tensor %32712, %32709 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32714 = torch_c.to_builtin_tensor %32713 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30196 = arith.constant 1 : index
    %dim_30197 = tensor.dim %32714, %c1_30196 : tensor<4x?x1x64xcomplex<f32>>
    %32715 = flow.tensor.bitcast %32714 : tensor<4x?x1x64xcomplex<f32>>{%dim_30197} -> tensor<4x?x1x128xf32>{%dim_30197}
    %32716 = torch_c.from_builtin_tensor %32715 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30198 = torch.constant.int 5
    %32717 = torch.prims.convert_element_type %32716, %int5_30198 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30199 = torch.constant.int 1
    %32718 = torch.aten.size.int %32349, %int1_30199 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30200 = torch.constant.int 0
    %32719 = torch.aten.add.int %int0_30200, %32718 : !torch.int, !torch.int -> !torch.int
    %int0_30201 = torch.constant.int 0
    %int0_30202 = torch.constant.int 0
    %int1_30203 = torch.constant.int 1
    %32720 = torch.aten.slice.Tensor %32672, %int0_30201, %int0_30202, %32719, %int1_30203 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32720, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30204 = torch.constant.int 1
    %int0_30205 = torch.constant.int 0
    %int9223372036854775807_30206 = torch.constant.int 9223372036854775807
    %int1_30207 = torch.constant.int 1
    %32721 = torch.aten.slice.Tensor %32720, %int1_30204, %int0_30205, %int9223372036854775807_30206, %int1_30207 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32721, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30208 = torch.constant.int 0
    %32722 = torch.aten.unsqueeze %32721, %int0_30208 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32722, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30209 = torch.constant.int 2
    %32723 = torch.aten.unsqueeze %32722, %int2_30209 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32723, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30210 = torch.constant.int 3
    %int0_30211 = torch.constant.int 0
    %int9223372036854775807_30212 = torch.constant.int 9223372036854775807
    %int1_30213 = torch.constant.int 1
    %32724 = torch.aten.slice.Tensor %32723, %int3_30210, %int0_30211, %int9223372036854775807_30212, %int1_30213 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32724, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32725 = torch_c.to_builtin_tensor %32465 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30214 = arith.constant 1 : index
    %dim_30215 = tensor.dim %32725, %c1_30214 : tensor<4x?x1x128xf16>
    %32726 = flow.tensor.bitcast %32725 : tensor<4x?x1x128xf16>{%dim_30215} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30215}
    %32727 = torch_c.from_builtin_tensor %32726 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32728 = torch.aten.mul.Tensor %32727, %32724 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32729 = torch_c.to_builtin_tensor %32728 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30216 = arith.constant 1 : index
    %dim_30217 = tensor.dim %32729, %c1_30216 : tensor<4x?x1x64xcomplex<f32>>
    %32730 = flow.tensor.bitcast %32729 : tensor<4x?x1x64xcomplex<f32>>{%dim_30217} -> tensor<4x?x1x128xf32>{%dim_30217}
    %32731 = torch_c.from_builtin_tensor %32730 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30218 = torch.constant.int 5
    %32732 = torch.prims.convert_element_type %32731, %int5_30218 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30219 = torch.constant.int 1
    %32733 = torch.aten.size.int %32355, %int1_30219 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30220 = torch.constant.int 0
    %32734 = torch.aten.add.int %int0_30220, %32733 : !torch.int, !torch.int -> !torch.int
    %int0_30221 = torch.constant.int 0
    %int0_30222 = torch.constant.int 0
    %int1_30223 = torch.constant.int 1
    %32735 = torch.aten.slice.Tensor %32675, %int0_30221, %int0_30222, %32734, %int1_30223 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32735, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30224 = torch.constant.int 1
    %int0_30225 = torch.constant.int 0
    %int9223372036854775807_30226 = torch.constant.int 9223372036854775807
    %int1_30227 = torch.constant.int 1
    %32736 = torch.aten.slice.Tensor %32735, %int1_30224, %int0_30225, %int9223372036854775807_30226, %int1_30227 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32736, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30228 = torch.constant.int 0
    %32737 = torch.aten.unsqueeze %32736, %int0_30228 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32737, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30229 = torch.constant.int 2
    %32738 = torch.aten.unsqueeze %32737, %int2_30229 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32738, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30230 = torch.constant.int 3
    %int0_30231 = torch.constant.int 0
    %int9223372036854775807_30232 = torch.constant.int 9223372036854775807
    %int1_30233 = torch.constant.int 1
    %32739 = torch.aten.slice.Tensor %32738, %int3_30230, %int0_30231, %int9223372036854775807_30232, %int1_30233 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32739, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32740 = torch_c.to_builtin_tensor %32467 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30234 = arith.constant 1 : index
    %dim_30235 = tensor.dim %32740, %c1_30234 : tensor<4x?x1x128xf16>
    %32741 = flow.tensor.bitcast %32740 : tensor<4x?x1x128xf16>{%dim_30235} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30235}
    %32742 = torch_c.from_builtin_tensor %32741 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32743 = torch.aten.mul.Tensor %32742, %32739 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32744 = torch_c.to_builtin_tensor %32743 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30236 = arith.constant 1 : index
    %dim_30237 = tensor.dim %32744, %c1_30236 : tensor<4x?x1x64xcomplex<f32>>
    %32745 = flow.tensor.bitcast %32744 : tensor<4x?x1x64xcomplex<f32>>{%dim_30237} -> tensor<4x?x1x128xf32>{%dim_30237}
    %32746 = torch_c.from_builtin_tensor %32745 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30238 = torch.constant.int 5
    %32747 = torch.prims.convert_element_type %32746, %int5_30238 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30239 = torch.constant.int 1
    %32748 = torch.aten.size.int %32361, %int1_30239 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30240 = torch.constant.int 0
    %32749 = torch.aten.add.int %int0_30240, %32748 : !torch.int, !torch.int -> !torch.int
    %int0_30241 = torch.constant.int 0
    %int0_30242 = torch.constant.int 0
    %int1_30243 = torch.constant.int 1
    %32750 = torch.aten.slice.Tensor %32678, %int0_30241, %int0_30242, %32749, %int1_30243 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32750, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30244 = torch.constant.int 1
    %int0_30245 = torch.constant.int 0
    %int9223372036854775807_30246 = torch.constant.int 9223372036854775807
    %int1_30247 = torch.constant.int 1
    %32751 = torch.aten.slice.Tensor %32750, %int1_30244, %int0_30245, %int9223372036854775807_30246, %int1_30247 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32751, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30248 = torch.constant.int 0
    %32752 = torch.aten.unsqueeze %32751, %int0_30248 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32752, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30249 = torch.constant.int 2
    %32753 = torch.aten.unsqueeze %32752, %int2_30249 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32753, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30250 = torch.constant.int 3
    %int0_30251 = torch.constant.int 0
    %int9223372036854775807_30252 = torch.constant.int 9223372036854775807
    %int1_30253 = torch.constant.int 1
    %32754 = torch.aten.slice.Tensor %32753, %int3_30250, %int0_30251, %int9223372036854775807_30252, %int1_30253 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32754, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32755 = torch_c.to_builtin_tensor %32469 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30254 = arith.constant 1 : index
    %dim_30255 = tensor.dim %32755, %c1_30254 : tensor<4x?x1x128xf16>
    %32756 = flow.tensor.bitcast %32755 : tensor<4x?x1x128xf16>{%dim_30255} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30255}
    %32757 = torch_c.from_builtin_tensor %32756 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32758 = torch.aten.mul.Tensor %32757, %32754 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32759 = torch_c.to_builtin_tensor %32758 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30256 = arith.constant 1 : index
    %dim_30257 = tensor.dim %32759, %c1_30256 : tensor<4x?x1x64xcomplex<f32>>
    %32760 = flow.tensor.bitcast %32759 : tensor<4x?x1x64xcomplex<f32>>{%dim_30257} -> tensor<4x?x1x128xf32>{%dim_30257}
    %32761 = torch_c.from_builtin_tensor %32760 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30258 = torch.constant.int 5
    %32762 = torch.prims.convert_element_type %32761, %int5_30258 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30259 = torch.constant.int 1
    %32763 = torch.aten.size.int %32367, %int1_30259 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30260 = torch.constant.int 0
    %32764 = torch.aten.add.int %int0_30260, %32763 : !torch.int, !torch.int -> !torch.int
    %int0_30261 = torch.constant.int 0
    %int0_30262 = torch.constant.int 0
    %int1_30263 = torch.constant.int 1
    %32765 = torch.aten.slice.Tensor %32681, %int0_30261, %int0_30262, %32764, %int1_30263 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32765, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30264 = torch.constant.int 1
    %int0_30265 = torch.constant.int 0
    %int9223372036854775807_30266 = torch.constant.int 9223372036854775807
    %int1_30267 = torch.constant.int 1
    %32766 = torch.aten.slice.Tensor %32765, %int1_30264, %int0_30265, %int9223372036854775807_30266, %int1_30267 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32766, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30268 = torch.constant.int 0
    %32767 = torch.aten.unsqueeze %32766, %int0_30268 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32767, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30269 = torch.constant.int 2
    %32768 = torch.aten.unsqueeze %32767, %int2_30269 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32768, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30270 = torch.constant.int 3
    %int0_30271 = torch.constant.int 0
    %int9223372036854775807_30272 = torch.constant.int 9223372036854775807
    %int1_30273 = torch.constant.int 1
    %32769 = torch.aten.slice.Tensor %32768, %int3_30270, %int0_30271, %int9223372036854775807_30272, %int1_30273 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32769, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32770 = torch_c.to_builtin_tensor %32471 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30274 = arith.constant 1 : index
    %dim_30275 = tensor.dim %32770, %c1_30274 : tensor<4x?x1x128xf16>
    %32771 = flow.tensor.bitcast %32770 : tensor<4x?x1x128xf16>{%dim_30275} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30275}
    %32772 = torch_c.from_builtin_tensor %32771 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32773 = torch.aten.mul.Tensor %32772, %32769 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32774 = torch_c.to_builtin_tensor %32773 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30276 = arith.constant 1 : index
    %dim_30277 = tensor.dim %32774, %c1_30276 : tensor<4x?x1x64xcomplex<f32>>
    %32775 = flow.tensor.bitcast %32774 : tensor<4x?x1x64xcomplex<f32>>{%dim_30277} -> tensor<4x?x1x128xf32>{%dim_30277}
    %32776 = torch_c.from_builtin_tensor %32775 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30278 = torch.constant.int 5
    %32777 = torch.prims.convert_element_type %32776, %int5_30278 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30279 = torch.constant.int 1
    %32778 = torch.aten.size.int %32373, %int1_30279 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30280 = torch.constant.int 0
    %32779 = torch.aten.add.int %int0_30280, %32778 : !torch.int, !torch.int -> !torch.int
    %int0_30281 = torch.constant.int 0
    %int0_30282 = torch.constant.int 0
    %int1_30283 = torch.constant.int 1
    %32780 = torch.aten.slice.Tensor %32684, %int0_30281, %int0_30282, %32779, %int1_30283 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32780, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30284 = torch.constant.int 1
    %int0_30285 = torch.constant.int 0
    %int9223372036854775807_30286 = torch.constant.int 9223372036854775807
    %int1_30287 = torch.constant.int 1
    %32781 = torch.aten.slice.Tensor %32780, %int1_30284, %int0_30285, %int9223372036854775807_30286, %int1_30287 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32781, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30288 = torch.constant.int 0
    %32782 = torch.aten.unsqueeze %32781, %int0_30288 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32782, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30289 = torch.constant.int 2
    %32783 = torch.aten.unsqueeze %32782, %int2_30289 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32783, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30290 = torch.constant.int 3
    %int0_30291 = torch.constant.int 0
    %int9223372036854775807_30292 = torch.constant.int 9223372036854775807
    %int1_30293 = torch.constant.int 1
    %32784 = torch.aten.slice.Tensor %32783, %int3_30290, %int0_30291, %int9223372036854775807_30292, %int1_30293 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32784, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32785 = torch_c.to_builtin_tensor %32473 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30294 = arith.constant 1 : index
    %dim_30295 = tensor.dim %32785, %c1_30294 : tensor<4x?x1x128xf16>
    %32786 = flow.tensor.bitcast %32785 : tensor<4x?x1x128xf16>{%dim_30295} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30295}
    %32787 = torch_c.from_builtin_tensor %32786 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32788 = torch.aten.mul.Tensor %32787, %32784 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32789 = torch_c.to_builtin_tensor %32788 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30296 = arith.constant 1 : index
    %dim_30297 = tensor.dim %32789, %c1_30296 : tensor<4x?x1x64xcomplex<f32>>
    %32790 = flow.tensor.bitcast %32789 : tensor<4x?x1x64xcomplex<f32>>{%dim_30297} -> tensor<4x?x1x128xf32>{%dim_30297}
    %32791 = torch_c.from_builtin_tensor %32790 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30298 = torch.constant.int 5
    %32792 = torch.prims.convert_element_type %32791, %int5_30298 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_30299 = torch.constant.int 1
    %32793 = torch.aten.size.int %32379, %int1_30299 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_30300 = torch.constant.int 0
    %32794 = torch.aten.add.int %int0_30300, %32793 : !torch.int, !torch.int -> !torch.int
    %int0_30301 = torch.constant.int 0
    %int0_30302 = torch.constant.int 0
    %int1_30303 = torch.constant.int 1
    %32795 = torch.aten.slice.Tensor %32687, %int0_30301, %int0_30302, %32794, %int1_30303 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32795, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_30304 = torch.constant.int 1
    %int0_30305 = torch.constant.int 0
    %int9223372036854775807_30306 = torch.constant.int 9223372036854775807
    %int1_30307 = torch.constant.int 1
    %32796 = torch.aten.slice.Tensor %32795, %int1_30304, %int0_30305, %int9223372036854775807_30306, %int1_30307 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %32796, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_30308 = torch.constant.int 0
    %32797 = torch.aten.unsqueeze %32796, %int0_30308 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %32797, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_30309 = torch.constant.int 2
    %32798 = torch.aten.unsqueeze %32797, %int2_30309 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32798, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_30310 = torch.constant.int 3
    %int0_30311 = torch.constant.int 0
    %int9223372036854775807_30312 = torch.constant.int 9223372036854775807
    %int1_30313 = torch.constant.int 1
    %32799 = torch.aten.slice.Tensor %32798, %int3_30310, %int0_30311, %int9223372036854775807_30312, %int1_30313 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32799, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %32800 = torch_c.to_builtin_tensor %32475 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_30314 = arith.constant 1 : index
    %dim_30315 = tensor.dim %32800, %c1_30314 : tensor<4x?x1x128xf16>
    %32801 = flow.tensor.bitcast %32800 : tensor<4x?x1x128xf16>{%dim_30315} -> tensor<4x?x1x64xcomplex<f16>>{%dim_30315}
    %32802 = torch_c.from_builtin_tensor %32801 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %32802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %32803 = torch.aten.mul.Tensor %32802, %32799 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %32803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %32804 = torch_c.to_builtin_tensor %32803 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_30316 = arith.constant 1 : index
    %dim_30317 = tensor.dim %32804, %c1_30316 : tensor<4x?x1x64xcomplex<f32>>
    %32805 = flow.tensor.bitcast %32804 : tensor<4x?x1x64xcomplex<f32>>{%dim_30317} -> tensor<4x?x1x128xf32>{%dim_30317}
    %32806 = torch_c.from_builtin_tensor %32805 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %32806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_30318 = torch.constant.int 5
    %32807 = torch.prims.convert_element_type %32806, %int5_30318 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %32807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_30319 = torch.constant.int 64
    %32808 = torch.aten.mul.Scalar %2364, %int64_30319 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32808, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30320 = torch.constant.int 64
    %32809 = torch.aten.mul.Scalar %2367, %int64_30320 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32809, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30321 = torch.constant.int 64
    %32810 = torch.aten.mul.Scalar %2370, %int64_30321 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32810, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30322 = torch.constant.int 64
    %32811 = torch.aten.mul.Scalar %2373, %int64_30322 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32811, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30323 = torch.constant.int 64
    %32812 = torch.aten.mul.Scalar %2376, %int64_30323 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32812, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30324 = torch.constant.int 64
    %32813 = torch.aten.mul.Scalar %2379, %int64_30324 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32813, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30325 = torch.constant.int 64
    %32814 = torch.aten.mul.Scalar %2382, %int64_30325 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32814, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_30326 = torch.constant.int 64
    %32815 = torch.aten.mul.Scalar %2385, %int64_30326 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32815, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30327 = torch.constant.int 32
    %int1_30328 = torch.constant.int 1
    %32816 = torch.aten.add.Scalar %32808, %int32_30327, %int1_30328 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32816, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30329 = torch.constant.int 32
    %int1_30330 = torch.constant.int 1
    %32817 = torch.aten.add.Scalar %32809, %int32_30329, %int1_30330 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32817, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30331 = torch.constant.int 32
    %int1_30332 = torch.constant.int 1
    %32818 = torch.aten.add.Scalar %32810, %int32_30331, %int1_30332 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32818, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30333 = torch.constant.int 32
    %int1_30334 = torch.constant.int 1
    %32819 = torch.aten.add.Scalar %32811, %int32_30333, %int1_30334 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32819, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30335 = torch.constant.int 32
    %int1_30336 = torch.constant.int 1
    %32820 = torch.aten.add.Scalar %32812, %int32_30335, %int1_30336 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32820, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30337 = torch.constant.int 32
    %int1_30338 = torch.constant.int 1
    %32821 = torch.aten.add.Scalar %32813, %int32_30337, %int1_30338 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32821, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30339 = torch.constant.int 32
    %int1_30340 = torch.constant.int 1
    %32822 = torch.aten.add.Scalar %32814, %int32_30339, %int1_30340 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32822, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int32_30341 = torch.constant.int 32
    %int1_30342 = torch.constant.int 1
    %32823 = torch.aten.add.Scalar %32815, %int32_30341, %int1_30342 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32823, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_30343 = torch.constant.int 4
    %int16_30344 = torch.constant.int 16
    %int1_30345 = torch.constant.int 1
    %int128_30346 = torch.constant.int 128
    %32824 = torch.prim.ListConstruct %int4_30343, %3095, %int16_30344, %int1_30345, %int128_30346 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32825 = torch.aten.view %32702, %32824 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32825, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30347 = torch.constant.int 4
    %int16_30348 = torch.constant.int 16
    %int1_30349 = torch.constant.int 1
    %int128_30350 = torch.constant.int 128
    %32826 = torch.prim.ListConstruct %int4_30347, %3095, %int16_30348, %int1_30349, %int128_30350 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32827 = torch.aten.view %32717, %32826 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32827, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30351 = torch.constant.int 4
    %int16_30352 = torch.constant.int 16
    %int1_30353 = torch.constant.int 1
    %int128_30354 = torch.constant.int 128
    %32828 = torch.prim.ListConstruct %int4_30351, %3095, %int16_30352, %int1_30353, %int128_30354 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32829 = torch.aten.view %32732, %32828 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32829, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30355 = torch.constant.int 4
    %int16_30356 = torch.constant.int 16
    %int1_30357 = torch.constant.int 1
    %int128_30358 = torch.constant.int 128
    %32830 = torch.prim.ListConstruct %int4_30355, %3095, %int16_30356, %int1_30357, %int128_30358 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32831 = torch.aten.view %32747, %32830 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32831, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30359 = torch.constant.int 4
    %int16_30360 = torch.constant.int 16
    %int1_30361 = torch.constant.int 1
    %int128_30362 = torch.constant.int 128
    %32832 = torch.prim.ListConstruct %int4_30359, %3095, %int16_30360, %int1_30361, %int128_30362 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32833 = torch.aten.view %32762, %32832 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32833, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30363 = torch.constant.int 4
    %int16_30364 = torch.constant.int 16
    %int1_30365 = torch.constant.int 1
    %int128_30366 = torch.constant.int 128
    %32834 = torch.prim.ListConstruct %int4_30363, %3095, %int16_30364, %int1_30365, %int128_30366 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32835 = torch.aten.view %32777, %32834 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32835, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30367 = torch.constant.int 4
    %int16_30368 = torch.constant.int 16
    %int1_30369 = torch.constant.int 1
    %int128_30370 = torch.constant.int 128
    %32836 = torch.prim.ListConstruct %int4_30367, %3095, %int16_30368, %int1_30369, %int128_30370 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32837 = torch.aten.view %32792, %32836 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32837, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30371 = torch.constant.int 4
    %int16_30372 = torch.constant.int 16
    %int1_30373 = torch.constant.int 1
    %int128_30374 = torch.constant.int 128
    %32838 = torch.prim.ListConstruct %int4_30371, %3095, %int16_30372, %int1_30373, %int128_30374 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32839 = torch.aten.view %32807, %32838 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32839, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30375 = torch.constant.int 4
    %32840 = torch.aten.mul.int %int4_30375, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30376 = torch.constant.int 16
    %int1_30377 = torch.constant.int 1
    %int128_30378 = torch.constant.int 128
    %32841 = torch.prim.ListConstruct %32840, %int16_30376, %int1_30377, %int128_30378 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32842 = torch.aten.view %32825, %32841 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32842, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30379 = torch.constant.int 4
    %32843 = torch.aten.mul.int %int4_30379, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30380 = torch.constant.int 16
    %int1_30381 = torch.constant.int 1
    %int128_30382 = torch.constant.int 128
    %32844 = torch.prim.ListConstruct %32843, %int16_30380, %int1_30381, %int128_30382 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32845 = torch.aten.view %32827, %32844 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32845, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30383 = torch.constant.int 4
    %32846 = torch.aten.mul.int %int4_30383, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30384 = torch.constant.int 16
    %int1_30385 = torch.constant.int 1
    %int128_30386 = torch.constant.int 128
    %32847 = torch.prim.ListConstruct %32846, %int16_30384, %int1_30385, %int128_30386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32848 = torch.aten.view %32829, %32847 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32848, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30387 = torch.constant.int 4
    %32849 = torch.aten.mul.int %int4_30387, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30388 = torch.constant.int 16
    %int1_30389 = torch.constant.int 1
    %int128_30390 = torch.constant.int 128
    %32850 = torch.prim.ListConstruct %32849, %int16_30388, %int1_30389, %int128_30390 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32851 = torch.aten.view %32831, %32850 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32851, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30391 = torch.constant.int 4
    %32852 = torch.aten.mul.int %int4_30391, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30392 = torch.constant.int 16
    %int1_30393 = torch.constant.int 1
    %int128_30394 = torch.constant.int 128
    %32853 = torch.prim.ListConstruct %32852, %int16_30392, %int1_30393, %int128_30394 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32854 = torch.aten.view %32833, %32853 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32854, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30395 = torch.constant.int 4
    %32855 = torch.aten.mul.int %int4_30395, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30396 = torch.constant.int 16
    %int1_30397 = torch.constant.int 1
    %int128_30398 = torch.constant.int 128
    %32856 = torch.prim.ListConstruct %32855, %int16_30396, %int1_30397, %int128_30398 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32857 = torch.aten.view %32835, %32856 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32857, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30399 = torch.constant.int 4
    %32858 = torch.aten.mul.int %int4_30399, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30400 = torch.constant.int 16
    %int1_30401 = torch.constant.int 1
    %int128_30402 = torch.constant.int 128
    %32859 = torch.prim.ListConstruct %32858, %int16_30400, %int1_30401, %int128_30402 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32860 = torch.aten.view %32837, %32859 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32860, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30403 = torch.constant.int 4
    %32861 = torch.aten.mul.int %int4_30403, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30404 = torch.constant.int 16
    %int1_30405 = torch.constant.int 1
    %int128_30406 = torch.constant.int 128
    %32862 = torch.prim.ListConstruct %32861, %int16_30404, %int1_30405, %int128_30406 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32863 = torch.aten.view %32839, %32862 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32863, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30407 = torch.constant.int 4
    %32864 = torch.aten.mul.int %int4_30407, %3095 : !torch.int, !torch.int -> !torch.int
    %32865 = torch.prim.ListConstruct %32864 : (!torch.int) -> !torch.list<int>
    %32866 = torch.aten.view %32816, %32865 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32866, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30408 = torch.constant.int 4
    %32867 = torch.aten.mul.int %int4_30408, %3095 : !torch.int, !torch.int -> !torch.int
    %32868 = torch.prim.ListConstruct %32867 : (!torch.int) -> !torch.list<int>
    %32869 = torch.aten.view %32817, %32868 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32869, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30409 = torch.constant.int 4
    %32870 = torch.aten.mul.int %int4_30409, %3095 : !torch.int, !torch.int -> !torch.int
    %32871 = torch.prim.ListConstruct %32870 : (!torch.int) -> !torch.list<int>
    %32872 = torch.aten.view %32818, %32871 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32872, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30410 = torch.constant.int 4
    %32873 = torch.aten.mul.int %int4_30410, %3095 : !torch.int, !torch.int -> !torch.int
    %32874 = torch.prim.ListConstruct %32873 : (!torch.int) -> !torch.list<int>
    %32875 = torch.aten.view %32819, %32874 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32875, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30411 = torch.constant.int 4
    %32876 = torch.aten.mul.int %int4_30411, %3095 : !torch.int, !torch.int -> !torch.int
    %32877 = torch.prim.ListConstruct %32876 : (!torch.int) -> !torch.list<int>
    %32878 = torch.aten.view %32820, %32877 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32878, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30412 = torch.constant.int 4
    %32879 = torch.aten.mul.int %int4_30412, %3095 : !torch.int, !torch.int -> !torch.int
    %32880 = torch.prim.ListConstruct %32879 : (!torch.int) -> !torch.list<int>
    %32881 = torch.aten.view %32821, %32880 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32881, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30413 = torch.constant.int 4
    %32882 = torch.aten.mul.int %int4_30413, %3095 : !torch.int, !torch.int -> !torch.int
    %32883 = torch.prim.ListConstruct %32882 : (!torch.int) -> !torch.list<int>
    %32884 = torch.aten.view %32822, %32883 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32884, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30414 = torch.constant.int 4
    %32885 = torch.aten.mul.int %int4_30414, %3095 : !torch.int, !torch.int -> !torch.int
    %32886 = torch.prim.ListConstruct %32885 : (!torch.int) -> !torch.list<int>
    %32887 = torch.aten.view %32823, %32886 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32887, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30415 = torch.constant.int 4
    %int16_30416 = torch.constant.int 16
    %int1_30417 = torch.constant.int 1
    %int128_30418 = torch.constant.int 128
    %32888 = torch.prim.ListConstruct %int4_30415, %3095, %int16_30416, %int1_30417, %int128_30418 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32889 = torch.aten.view %32477, %32888 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32889, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30419 = torch.constant.int 4
    %int16_30420 = torch.constant.int 16
    %int1_30421 = torch.constant.int 1
    %int128_30422 = torch.constant.int 128
    %32890 = torch.prim.ListConstruct %int4_30419, %3095, %int16_30420, %int1_30421, %int128_30422 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32891 = torch.aten.view %32479, %32890 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32891, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30423 = torch.constant.int 4
    %int16_30424 = torch.constant.int 16
    %int1_30425 = torch.constant.int 1
    %int128_30426 = torch.constant.int 128
    %32892 = torch.prim.ListConstruct %int4_30423, %3095, %int16_30424, %int1_30425, %int128_30426 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32893 = torch.aten.view %32481, %32892 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32893, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30427 = torch.constant.int 4
    %int16_30428 = torch.constant.int 16
    %int1_30429 = torch.constant.int 1
    %int128_30430 = torch.constant.int 128
    %32894 = torch.prim.ListConstruct %int4_30427, %3095, %int16_30428, %int1_30429, %int128_30430 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32895 = torch.aten.view %32483, %32894 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32895, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30431 = torch.constant.int 4
    %int16_30432 = torch.constant.int 16
    %int1_30433 = torch.constant.int 1
    %int128_30434 = torch.constant.int 128
    %32896 = torch.prim.ListConstruct %int4_30431, %3095, %int16_30432, %int1_30433, %int128_30434 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32897 = torch.aten.view %32485, %32896 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32897, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30435 = torch.constant.int 4
    %int16_30436 = torch.constant.int 16
    %int1_30437 = torch.constant.int 1
    %int128_30438 = torch.constant.int 128
    %32898 = torch.prim.ListConstruct %int4_30435, %3095, %int16_30436, %int1_30437, %int128_30438 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32899 = torch.aten.view %32487, %32898 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32899, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30439 = torch.constant.int 4
    %int16_30440 = torch.constant.int 16
    %int1_30441 = torch.constant.int 1
    %int128_30442 = torch.constant.int 128
    %32900 = torch.prim.ListConstruct %int4_30439, %3095, %int16_30440, %int1_30441, %int128_30442 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32901 = torch.aten.view %32489, %32900 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32901, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30443 = torch.constant.int 4
    %int16_30444 = torch.constant.int 16
    %int1_30445 = torch.constant.int 1
    %int128_30446 = torch.constant.int 128
    %32902 = torch.prim.ListConstruct %int4_30443, %3095, %int16_30444, %int1_30445, %int128_30446 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32903 = torch.aten.view %32491, %32902 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %32903, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_30447 = torch.constant.int 4
    %32904 = torch.aten.mul.int %int4_30447, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30448 = torch.constant.int 16
    %int1_30449 = torch.constant.int 1
    %int128_30450 = torch.constant.int 128
    %32905 = torch.prim.ListConstruct %32904, %int16_30448, %int1_30449, %int128_30450 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32906 = torch.aten.view %32889, %32905 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32906, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30451 = torch.constant.int 4
    %32907 = torch.aten.mul.int %int4_30451, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30452 = torch.constant.int 16
    %int1_30453 = torch.constant.int 1
    %int128_30454 = torch.constant.int 128
    %32908 = torch.prim.ListConstruct %32907, %int16_30452, %int1_30453, %int128_30454 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32909 = torch.aten.view %32891, %32908 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32909, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30455 = torch.constant.int 4
    %32910 = torch.aten.mul.int %int4_30455, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30456 = torch.constant.int 16
    %int1_30457 = torch.constant.int 1
    %int128_30458 = torch.constant.int 128
    %32911 = torch.prim.ListConstruct %32910, %int16_30456, %int1_30457, %int128_30458 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32912 = torch.aten.view %32893, %32911 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32912, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30459 = torch.constant.int 4
    %32913 = torch.aten.mul.int %int4_30459, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30460 = torch.constant.int 16
    %int1_30461 = torch.constant.int 1
    %int128_30462 = torch.constant.int 128
    %32914 = torch.prim.ListConstruct %32913, %int16_30460, %int1_30461, %int128_30462 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32915 = torch.aten.view %32895, %32914 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32915, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30463 = torch.constant.int 4
    %32916 = torch.aten.mul.int %int4_30463, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30464 = torch.constant.int 16
    %int1_30465 = torch.constant.int 1
    %int128_30466 = torch.constant.int 128
    %32917 = torch.prim.ListConstruct %32916, %int16_30464, %int1_30465, %int128_30466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32918 = torch.aten.view %32897, %32917 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32918, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30467 = torch.constant.int 4
    %32919 = torch.aten.mul.int %int4_30467, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30468 = torch.constant.int 16
    %int1_30469 = torch.constant.int 1
    %int128_30470 = torch.constant.int 128
    %32920 = torch.prim.ListConstruct %32919, %int16_30468, %int1_30469, %int128_30470 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32921 = torch.aten.view %32899, %32920 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32921, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30471 = torch.constant.int 4
    %32922 = torch.aten.mul.int %int4_30471, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30472 = torch.constant.int 16
    %int1_30473 = torch.constant.int 1
    %int128_30474 = torch.constant.int 128
    %32923 = torch.prim.ListConstruct %32922, %int16_30472, %int1_30473, %int128_30474 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32924 = torch.aten.view %32901, %32923 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32924, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_30475 = torch.constant.int 4
    %32925 = torch.aten.mul.int %int4_30475, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_30476 = torch.constant.int 16
    %int1_30477 = torch.constant.int 1
    %int128_30478 = torch.constant.int 128
    %32926 = torch.prim.ListConstruct %32925, %int16_30476, %int1_30477, %int128_30478 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32927 = torch.aten.view %32903, %32926 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32927, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_30479 = torch.constant.int 1
    %int1_30480 = torch.constant.int 1
    %32928 = torch.aten.add.Scalar %32816, %int1_30479, %int1_30480 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32928, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30481 = torch.constant.int 1
    %int1_30482 = torch.constant.int 1
    %32929 = torch.aten.add.Scalar %32817, %int1_30481, %int1_30482 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32929, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30483 = torch.constant.int 1
    %int1_30484 = torch.constant.int 1
    %32930 = torch.aten.add.Scalar %32818, %int1_30483, %int1_30484 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32930, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30485 = torch.constant.int 1
    %int1_30486 = torch.constant.int 1
    %32931 = torch.aten.add.Scalar %32819, %int1_30485, %int1_30486 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32931, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30487 = torch.constant.int 1
    %int1_30488 = torch.constant.int 1
    %32932 = torch.aten.add.Scalar %32820, %int1_30487, %int1_30488 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32932, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30489 = torch.constant.int 1
    %int1_30490 = torch.constant.int 1
    %32933 = torch.aten.add.Scalar %32821, %int1_30489, %int1_30490 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32933, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30491 = torch.constant.int 1
    %int1_30492 = torch.constant.int 1
    %32934 = torch.aten.add.Scalar %32822, %int1_30491, %int1_30492 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32934, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_30493 = torch.constant.int 1
    %int1_30494 = torch.constant.int 1
    %32935 = torch.aten.add.Scalar %32823, %int1_30493, %int1_30494 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %32935, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_30495 = torch.constant.int 4
    %32936 = torch.aten.mul.int %int4_30495, %3095 : !torch.int, !torch.int -> !torch.int
    %32937 = torch.prim.ListConstruct %32936 : (!torch.int) -> !torch.list<int>
    %32938 = torch.aten.view %32928, %32937 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32938, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30496 = torch.constant.int 4
    %32939 = torch.aten.mul.int %int4_30496, %3095 : !torch.int, !torch.int -> !torch.int
    %32940 = torch.prim.ListConstruct %32939 : (!torch.int) -> !torch.list<int>
    %32941 = torch.aten.view %32929, %32940 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32941, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30497 = torch.constant.int 4
    %32942 = torch.aten.mul.int %int4_30497, %3095 : !torch.int, !torch.int -> !torch.int
    %32943 = torch.prim.ListConstruct %32942 : (!torch.int) -> !torch.list<int>
    %32944 = torch.aten.view %32930, %32943 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32944, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30498 = torch.constant.int 4
    %32945 = torch.aten.mul.int %int4_30498, %3095 : !torch.int, !torch.int -> !torch.int
    %32946 = torch.prim.ListConstruct %32945 : (!torch.int) -> !torch.list<int>
    %32947 = torch.aten.view %32931, %32946 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32947, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30499 = torch.constant.int 4
    %32948 = torch.aten.mul.int %int4_30499, %3095 : !torch.int, !torch.int -> !torch.int
    %32949 = torch.prim.ListConstruct %32948 : (!torch.int) -> !torch.list<int>
    %32950 = torch.aten.view %32932, %32949 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32950, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30500 = torch.constant.int 4
    %32951 = torch.aten.mul.int %int4_30500, %3095 : !torch.int, !torch.int -> !torch.int
    %32952 = torch.prim.ListConstruct %32951 : (!torch.int) -> !torch.list<int>
    %32953 = torch.aten.view %32933, %32952 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32953, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30501 = torch.constant.int 4
    %32954 = torch.aten.mul.int %int4_30501, %3095 : !torch.int, !torch.int -> !torch.int
    %32955 = torch.prim.ListConstruct %32954 : (!torch.int) -> !torch.list<int>
    %32956 = torch.aten.view %32934, %32955 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32956, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_30502 = torch.constant.int 4
    %32957 = torch.aten.mul.int %int4_30502, %3095 : !torch.int, !torch.int -> !torch.int
    %32958 = torch.prim.ListConstruct %32957 : (!torch.int) -> !torch.list<int>
    %32959 = torch.aten.view %32935, %32958 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32959, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %32960 = torch.prim.ListConstruct %32866, %32938 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30503 = torch.constant.int 0
    %32961 = torch.aten.cat %32960, %int0_30503 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32961, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32962 = torch.prim.ListConstruct %32869, %32941 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30504 = torch.constant.int 0
    %32963 = torch.aten.cat %32962, %int0_30504 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32963, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32964 = torch.prim.ListConstruct %32872, %32944 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30505 = torch.constant.int 0
    %32965 = torch.aten.cat %32964, %int0_30505 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32965, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32966 = torch.prim.ListConstruct %32875, %32947 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30506 = torch.constant.int 0
    %32967 = torch.aten.cat %32966, %int0_30506 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32967, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32968 = torch.prim.ListConstruct %32878, %32950 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30507 = torch.constant.int 0
    %32969 = torch.aten.cat %32968, %int0_30507 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32969, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32970 = torch.prim.ListConstruct %32881, %32953 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30508 = torch.constant.int 0
    %32971 = torch.aten.cat %32970, %int0_30508 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32971, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32972 = torch.prim.ListConstruct %32884, %32956 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30509 = torch.constant.int 0
    %32973 = torch.aten.cat %32972, %int0_30509 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32973, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32974 = torch.prim.ListConstruct %32887, %32959 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_30510 = torch.constant.int 0
    %32975 = torch.aten.cat %32974, %int0_30510 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %32975, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %32976 = torch.prim.ListConstruct %32842, %32906 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30511 = torch.constant.int 0
    %32977 = torch.aten.cat %32976, %int0_30511 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32977, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32978 = torch.prim.ListConstruct %32845, %32909 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30512 = torch.constant.int 0
    %32979 = torch.aten.cat %32978, %int0_30512 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32979, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32980 = torch.prim.ListConstruct %32848, %32912 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30513 = torch.constant.int 0
    %32981 = torch.aten.cat %32980, %int0_30513 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32981, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32982 = torch.prim.ListConstruct %32851, %32915 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30514 = torch.constant.int 0
    %32983 = torch.aten.cat %32982, %int0_30514 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32983, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32984 = torch.prim.ListConstruct %32854, %32918 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30515 = torch.constant.int 0
    %32985 = torch.aten.cat %32984, %int0_30515 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32985, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32986 = torch.prim.ListConstruct %32857, %32921 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30516 = torch.constant.int 0
    %32987 = torch.aten.cat %32986, %int0_30516 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32987, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32988 = torch.prim.ListConstruct %32860, %32924 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30517 = torch.constant.int 0
    %32989 = torch.aten.cat %32988, %int0_30517 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32989, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32990 = torch.prim.ListConstruct %32863, %32927 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_30518 = torch.constant.int 0
    %32991 = torch.aten.cat %32990, %int0_30518 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32991, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30519 = torch.constant.int 32
    %int2_30520 = torch.constant.int 2
    %int16_30521 = torch.constant.int 16
    %int1_30522 = torch.constant.int 1
    %int128_30523 = torch.constant.int 128
    %32992 = torch.prim.ListConstruct %3023, %int32_30519, %int2_30520, %int16_30521, %int1_30522, %int128_30523 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32993 = torch.aten.view %31142, %32992 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %32993, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30524 = torch.constant.int 32
    %32994 = torch.aten.mul.int %3023, %int32_30524 : !torch.int, !torch.int -> !torch.int
    %int2_30525 = torch.constant.int 2
    %32995 = torch.aten.mul.int %32994, %int2_30525 : !torch.int, !torch.int -> !torch.int
    %int16_30526 = torch.constant.int 16
    %int1_30527 = torch.constant.int 1
    %int128_30528 = torch.constant.int 128
    %32996 = torch.prim.ListConstruct %32995, %int16_30526, %int1_30527, %int128_30528 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %32997 = torch.aten.view %32993, %32996 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32997, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %32998 = torch.prim.ListConstruct %32961 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30529 = torch.constant.bool false
    %32999 = torch.aten.index_put %32997, %32998, %32977, %false_30529 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %32999, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30530 = torch.constant.int 32
    %int2_30531 = torch.constant.int 2
    %int16_30532 = torch.constant.int 16
    %int1_30533 = torch.constant.int 1
    %int128_30534 = torch.constant.int 128
    %33000 = torch.prim.ListConstruct %3023, %int32_30530, %int2_30531, %int16_30532, %int1_30533, %int128_30534 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33001 = torch.aten.view %32999, %33000 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33001, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30535 = torch.constant.int 131072
    %33002 = torch.prim.ListConstruct %3023, %int131072_30535 : (!torch.int, !torch.int) -> !torch.list<int>
    %33003 = torch.aten.view %33001, %33002 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33003, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30536 = torch.constant.int 32
    %int2_30537 = torch.constant.int 2
    %int16_30538 = torch.constant.int 16
    %int1_30539 = torch.constant.int 1
    %int128_30540 = torch.constant.int 128
    %33004 = torch.prim.ListConstruct %3026, %int32_30536, %int2_30537, %int16_30538, %int1_30539, %int128_30540 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33005 = torch.aten.view %31154, %33004 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33005, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30541 = torch.constant.int 32
    %33006 = torch.aten.mul.int %3026, %int32_30541 : !torch.int, !torch.int -> !torch.int
    %int2_30542 = torch.constant.int 2
    %33007 = torch.aten.mul.int %33006, %int2_30542 : !torch.int, !torch.int -> !torch.int
    %int16_30543 = torch.constant.int 16
    %int1_30544 = torch.constant.int 1
    %int128_30545 = torch.constant.int 128
    %33008 = torch.prim.ListConstruct %33007, %int16_30543, %int1_30544, %int128_30545 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33009 = torch.aten.view %33005, %33008 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33009, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33010 = torch.prim.ListConstruct %32963 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30546 = torch.constant.bool false
    %33011 = torch.aten.index_put %33009, %33010, %32979, %false_30546 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33011, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30547 = torch.constant.int 32
    %int2_30548 = torch.constant.int 2
    %int16_30549 = torch.constant.int 16
    %int1_30550 = torch.constant.int 1
    %int128_30551 = torch.constant.int 128
    %33012 = torch.prim.ListConstruct %3026, %int32_30547, %int2_30548, %int16_30549, %int1_30550, %int128_30551 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33013 = torch.aten.view %33011, %33012 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33013, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30552 = torch.constant.int 131072
    %33014 = torch.prim.ListConstruct %3026, %int131072_30552 : (!torch.int, !torch.int) -> !torch.list<int>
    %33015 = torch.aten.view %33013, %33014 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33015, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30553 = torch.constant.int 32
    %int2_30554 = torch.constant.int 2
    %int16_30555 = torch.constant.int 16
    %int1_30556 = torch.constant.int 1
    %int128_30557 = torch.constant.int 128
    %33016 = torch.prim.ListConstruct %3029, %int32_30553, %int2_30554, %int16_30555, %int1_30556, %int128_30557 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33017 = torch.aten.view %31166, %33016 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33017, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30558 = torch.constant.int 32
    %33018 = torch.aten.mul.int %3029, %int32_30558 : !torch.int, !torch.int -> !torch.int
    %int2_30559 = torch.constant.int 2
    %33019 = torch.aten.mul.int %33018, %int2_30559 : !torch.int, !torch.int -> !torch.int
    %int16_30560 = torch.constant.int 16
    %int1_30561 = torch.constant.int 1
    %int128_30562 = torch.constant.int 128
    %33020 = torch.prim.ListConstruct %33019, %int16_30560, %int1_30561, %int128_30562 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33021 = torch.aten.view %33017, %33020 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33021, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33022 = torch.prim.ListConstruct %32965 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30563 = torch.constant.bool false
    %33023 = torch.aten.index_put %33021, %33022, %32981, %false_30563 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33023, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30564 = torch.constant.int 32
    %int2_30565 = torch.constant.int 2
    %int16_30566 = torch.constant.int 16
    %int1_30567 = torch.constant.int 1
    %int128_30568 = torch.constant.int 128
    %33024 = torch.prim.ListConstruct %3029, %int32_30564, %int2_30565, %int16_30566, %int1_30567, %int128_30568 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33025 = torch.aten.view %33023, %33024 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33025, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30569 = torch.constant.int 131072
    %33026 = torch.prim.ListConstruct %3029, %int131072_30569 : (!torch.int, !torch.int) -> !torch.list<int>
    %33027 = torch.aten.view %33025, %33026 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33027, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30570 = torch.constant.int 32
    %int2_30571 = torch.constant.int 2
    %int16_30572 = torch.constant.int 16
    %int1_30573 = torch.constant.int 1
    %int128_30574 = torch.constant.int 128
    %33028 = torch.prim.ListConstruct %3032, %int32_30570, %int2_30571, %int16_30572, %int1_30573, %int128_30574 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33029 = torch.aten.view %31178, %33028 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33029, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30575 = torch.constant.int 32
    %33030 = torch.aten.mul.int %3032, %int32_30575 : !torch.int, !torch.int -> !torch.int
    %int2_30576 = torch.constant.int 2
    %33031 = torch.aten.mul.int %33030, %int2_30576 : !torch.int, !torch.int -> !torch.int
    %int16_30577 = torch.constant.int 16
    %int1_30578 = torch.constant.int 1
    %int128_30579 = torch.constant.int 128
    %33032 = torch.prim.ListConstruct %33031, %int16_30577, %int1_30578, %int128_30579 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33033 = torch.aten.view %33029, %33032 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33033, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33034 = torch.prim.ListConstruct %32967 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30580 = torch.constant.bool false
    %33035 = torch.aten.index_put %33033, %33034, %32983, %false_30580 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33035, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30581 = torch.constant.int 32
    %int2_30582 = torch.constant.int 2
    %int16_30583 = torch.constant.int 16
    %int1_30584 = torch.constant.int 1
    %int128_30585 = torch.constant.int 128
    %33036 = torch.prim.ListConstruct %3032, %int32_30581, %int2_30582, %int16_30583, %int1_30584, %int128_30585 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33037 = torch.aten.view %33035, %33036 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33037, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30586 = torch.constant.int 131072
    %33038 = torch.prim.ListConstruct %3032, %int131072_30586 : (!torch.int, !torch.int) -> !torch.list<int>
    %33039 = torch.aten.view %33037, %33038 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33039, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30587 = torch.constant.int 32
    %int2_30588 = torch.constant.int 2
    %int16_30589 = torch.constant.int 16
    %int1_30590 = torch.constant.int 1
    %int128_30591 = torch.constant.int 128
    %33040 = torch.prim.ListConstruct %3035, %int32_30587, %int2_30588, %int16_30589, %int1_30590, %int128_30591 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33041 = torch.aten.view %31190, %33040 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33041, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30592 = torch.constant.int 32
    %33042 = torch.aten.mul.int %3035, %int32_30592 : !torch.int, !torch.int -> !torch.int
    %int2_30593 = torch.constant.int 2
    %33043 = torch.aten.mul.int %33042, %int2_30593 : !torch.int, !torch.int -> !torch.int
    %int16_30594 = torch.constant.int 16
    %int1_30595 = torch.constant.int 1
    %int128_30596 = torch.constant.int 128
    %33044 = torch.prim.ListConstruct %33043, %int16_30594, %int1_30595, %int128_30596 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33045 = torch.aten.view %33041, %33044 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33045, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33046 = torch.prim.ListConstruct %32969 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30597 = torch.constant.bool false
    %33047 = torch.aten.index_put %33045, %33046, %32985, %false_30597 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33047, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30598 = torch.constant.int 32
    %int2_30599 = torch.constant.int 2
    %int16_30600 = torch.constant.int 16
    %int1_30601 = torch.constant.int 1
    %int128_30602 = torch.constant.int 128
    %33048 = torch.prim.ListConstruct %3035, %int32_30598, %int2_30599, %int16_30600, %int1_30601, %int128_30602 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33049 = torch.aten.view %33047, %33048 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33049, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30603 = torch.constant.int 131072
    %33050 = torch.prim.ListConstruct %3035, %int131072_30603 : (!torch.int, !torch.int) -> !torch.list<int>
    %33051 = torch.aten.view %33049, %33050 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33051, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30604 = torch.constant.int 32
    %int2_30605 = torch.constant.int 2
    %int16_30606 = torch.constant.int 16
    %int1_30607 = torch.constant.int 1
    %int128_30608 = torch.constant.int 128
    %33052 = torch.prim.ListConstruct %3038, %int32_30604, %int2_30605, %int16_30606, %int1_30607, %int128_30608 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33053 = torch.aten.view %31202, %33052 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33053, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30609 = torch.constant.int 32
    %33054 = torch.aten.mul.int %3038, %int32_30609 : !torch.int, !torch.int -> !torch.int
    %int2_30610 = torch.constant.int 2
    %33055 = torch.aten.mul.int %33054, %int2_30610 : !torch.int, !torch.int -> !torch.int
    %int16_30611 = torch.constant.int 16
    %int1_30612 = torch.constant.int 1
    %int128_30613 = torch.constant.int 128
    %33056 = torch.prim.ListConstruct %33055, %int16_30611, %int1_30612, %int128_30613 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33057 = torch.aten.view %33053, %33056 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33057, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33058 = torch.prim.ListConstruct %32971 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30614 = torch.constant.bool false
    %33059 = torch.aten.index_put %33057, %33058, %32987, %false_30614 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33059, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30615 = torch.constant.int 32
    %int2_30616 = torch.constant.int 2
    %int16_30617 = torch.constant.int 16
    %int1_30618 = torch.constant.int 1
    %int128_30619 = torch.constant.int 128
    %33060 = torch.prim.ListConstruct %3038, %int32_30615, %int2_30616, %int16_30617, %int1_30618, %int128_30619 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33061 = torch.aten.view %33059, %33060 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33061, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30620 = torch.constant.int 131072
    %33062 = torch.prim.ListConstruct %3038, %int131072_30620 : (!torch.int, !torch.int) -> !torch.list<int>
    %33063 = torch.aten.view %33061, %33062 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33063, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30621 = torch.constant.int 32
    %int2_30622 = torch.constant.int 2
    %int16_30623 = torch.constant.int 16
    %int1_30624 = torch.constant.int 1
    %int128_30625 = torch.constant.int 128
    %33064 = torch.prim.ListConstruct %3041, %int32_30621, %int2_30622, %int16_30623, %int1_30624, %int128_30625 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33065 = torch.aten.view %31214, %33064 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33065, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30626 = torch.constant.int 32
    %33066 = torch.aten.mul.int %3041, %int32_30626 : !torch.int, !torch.int -> !torch.int
    %int2_30627 = torch.constant.int 2
    %33067 = torch.aten.mul.int %33066, %int2_30627 : !torch.int, !torch.int -> !torch.int
    %int16_30628 = torch.constant.int 16
    %int1_30629 = torch.constant.int 1
    %int128_30630 = torch.constant.int 128
    %33068 = torch.prim.ListConstruct %33067, %int16_30628, %int1_30629, %int128_30630 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33069 = torch.aten.view %33065, %33068 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33069, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33070 = torch.prim.ListConstruct %32973 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30631 = torch.constant.bool false
    %33071 = torch.aten.index_put %33069, %33070, %32989, %false_30631 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33071, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30632 = torch.constant.int 32
    %int2_30633 = torch.constant.int 2
    %int16_30634 = torch.constant.int 16
    %int1_30635 = torch.constant.int 1
    %int128_30636 = torch.constant.int 128
    %33072 = torch.prim.ListConstruct %3041, %int32_30632, %int2_30633, %int16_30634, %int1_30635, %int128_30636 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33073 = torch.aten.view %33071, %33072 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33073, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30637 = torch.constant.int 131072
    %33074 = torch.prim.ListConstruct %3041, %int131072_30637 : (!torch.int, !torch.int) -> !torch.list<int>
    %33075 = torch.aten.view %33073, %33074 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33075, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_30638 = torch.constant.int 32
    %int2_30639 = torch.constant.int 2
    %int16_30640 = torch.constant.int 16
    %int1_30641 = torch.constant.int 1
    %int128_30642 = torch.constant.int 128
    %33076 = torch.prim.ListConstruct %3044, %int32_30638, %int2_30639, %int16_30640, %int1_30641, %int128_30642 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33077 = torch.aten.view %31226, %33076 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33077, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_30643 = torch.constant.int 32
    %33078 = torch.aten.mul.int %3044, %int32_30643 : !torch.int, !torch.int -> !torch.int
    %int2_30644 = torch.constant.int 2
    %33079 = torch.aten.mul.int %33078, %int2_30644 : !torch.int, !torch.int -> !torch.int
    %int16_30645 = torch.constant.int 16
    %int1_30646 = torch.constant.int 1
    %int128_30647 = torch.constant.int 128
    %33080 = torch.prim.ListConstruct %33079, %int16_30645, %int1_30646, %int128_30647 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33081 = torch.aten.view %33077, %33080 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33081, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %33082 = torch.prim.ListConstruct %32975 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_30648 = torch.constant.bool false
    %33083 = torch.aten.index_put %33081, %33082, %32991, %false_30648 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %33083, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_30649 = torch.constant.int 32
    %int2_30650 = torch.constant.int 2
    %int16_30651 = torch.constant.int 16
    %int1_30652 = torch.constant.int 1
    %int128_30653 = torch.constant.int 128
    %33084 = torch.prim.ListConstruct %3044, %int32_30649, %int2_30650, %int16_30651, %int1_30652, %int128_30653 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33085 = torch.aten.view %33083, %33084 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %33085, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_30654 = torch.constant.int 131072
    %33086 = torch.prim.ListConstruct %3044, %int131072_30654 : (!torch.int, !torch.int) -> !torch.list<int>
    %33087 = torch.aten.view %33085, %33086 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %33087, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_30655 = torch.constant.int -2
    %33088 = torch.aten.unsqueeze %32702, %int-2_30655 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30656 = torch.constant.int -2
    %33089 = torch.aten.unsqueeze %32717, %int-2_30656 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30657 = torch.constant.int -2
    %33090 = torch.aten.unsqueeze %32732, %int-2_30657 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30658 = torch.constant.int -2
    %33091 = torch.aten.unsqueeze %32747, %int-2_30658 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30659 = torch.constant.int -2
    %33092 = torch.aten.unsqueeze %32762, %int-2_30659 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30660 = torch.constant.int -2
    %33093 = torch.aten.unsqueeze %32777, %int-2_30660 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30661 = torch.constant.int -2
    %33094 = torch.aten.unsqueeze %32792, %int-2_30661 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30662 = torch.constant.int -2
    %33095 = torch.aten.unsqueeze %32807, %int-2_30662 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_30663 = torch.constant.int 4
    %int1_30664 = torch.constant.int 1
    %int4_30665 = torch.constant.int 4
    %int128_30666 = torch.constant.int 128
    %33096 = torch.prim.ListConstruct %int4_30663, %32688, %int1_30664, %int4_30665, %int128_30666 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30667 = torch.constant.bool false
    %33097 = torch.aten.expand %33088, %33096, %false_30667 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30668 = torch.constant.int 4
    %int1_30669 = torch.constant.int 1
    %int4_30670 = torch.constant.int 4
    %int128_30671 = torch.constant.int 128
    %33098 = torch.prim.ListConstruct %int4_30668, %32688, %int1_30669, %int4_30670, %int128_30671 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30672 = torch.constant.bool false
    %33099 = torch.aten.expand %33089, %33098, %false_30672 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30673 = torch.constant.int 4
    %int1_30674 = torch.constant.int 1
    %int4_30675 = torch.constant.int 4
    %int128_30676 = torch.constant.int 128
    %33100 = torch.prim.ListConstruct %int4_30673, %32688, %int1_30674, %int4_30675, %int128_30676 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30677 = torch.constant.bool false
    %33101 = torch.aten.expand %33090, %33100, %false_30677 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30678 = torch.constant.int 4
    %int1_30679 = torch.constant.int 1
    %int4_30680 = torch.constant.int 4
    %int128_30681 = torch.constant.int 128
    %33102 = torch.prim.ListConstruct %int4_30678, %32688, %int1_30679, %int4_30680, %int128_30681 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30682 = torch.constant.bool false
    %33103 = torch.aten.expand %33091, %33102, %false_30682 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30683 = torch.constant.int 4
    %int1_30684 = torch.constant.int 1
    %int4_30685 = torch.constant.int 4
    %int128_30686 = torch.constant.int 128
    %33104 = torch.prim.ListConstruct %int4_30683, %32688, %int1_30684, %int4_30685, %int128_30686 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30687 = torch.constant.bool false
    %33105 = torch.aten.expand %33092, %33104, %false_30687 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30688 = torch.constant.int 4
    %int1_30689 = torch.constant.int 1
    %int4_30690 = torch.constant.int 4
    %int128_30691 = torch.constant.int 128
    %33106 = torch.prim.ListConstruct %int4_30688, %32688, %int1_30689, %int4_30690, %int128_30691 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30692 = torch.constant.bool false
    %33107 = torch.aten.expand %33093, %33106, %false_30692 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30693 = torch.constant.int 4
    %int1_30694 = torch.constant.int 1
    %int4_30695 = torch.constant.int 4
    %int128_30696 = torch.constant.int 128
    %33108 = torch.prim.ListConstruct %int4_30693, %32688, %int1_30694, %int4_30695, %int128_30696 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30697 = torch.constant.bool false
    %33109 = torch.aten.expand %33094, %33108, %false_30697 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30698 = torch.constant.int 4
    %int1_30699 = torch.constant.int 1
    %int4_30700 = torch.constant.int 4
    %int128_30701 = torch.constant.int 128
    %33110 = torch.prim.ListConstruct %int4_30698, %32688, %int1_30699, %int4_30700, %int128_30701 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30702 = torch.constant.bool false
    %33111 = torch.aten.expand %33095, %33110, %false_30702 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30703 = torch.constant.int 4
    %int4_30704 = torch.constant.int 4
    %int128_30705 = torch.constant.int 128
    %33112 = torch.prim.ListConstruct %int4_30703, %32688, %int4_30704, %int128_30705 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33113 = torch.aten.view %33097, %33112 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30706 = torch.constant.int 4
    %int4_30707 = torch.constant.int 4
    %int128_30708 = torch.constant.int 128
    %33114 = torch.prim.ListConstruct %int4_30706, %32688, %int4_30707, %int128_30708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33115 = torch.aten.view %33099, %33114 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30709 = torch.constant.int 4
    %int4_30710 = torch.constant.int 4
    %int128_30711 = torch.constant.int 128
    %33116 = torch.prim.ListConstruct %int4_30709, %32688, %int4_30710, %int128_30711 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33117 = torch.aten.view %33101, %33116 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30712 = torch.constant.int 4
    %int4_30713 = torch.constant.int 4
    %int128_30714 = torch.constant.int 128
    %33118 = torch.prim.ListConstruct %int4_30712, %32688, %int4_30713, %int128_30714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33119 = torch.aten.view %33103, %33118 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30715 = torch.constant.int 4
    %int4_30716 = torch.constant.int 4
    %int128_30717 = torch.constant.int 128
    %33120 = torch.prim.ListConstruct %int4_30715, %32688, %int4_30716, %int128_30717 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33121 = torch.aten.view %33105, %33120 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30718 = torch.constant.int 4
    %int4_30719 = torch.constant.int 4
    %int128_30720 = torch.constant.int 128
    %33122 = torch.prim.ListConstruct %int4_30718, %32688, %int4_30719, %int128_30720 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33123 = torch.aten.view %33107, %33122 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30721 = torch.constant.int 4
    %int4_30722 = torch.constant.int 4
    %int128_30723 = torch.constant.int 128
    %33124 = torch.prim.ListConstruct %int4_30721, %32688, %int4_30722, %int128_30723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33125 = torch.aten.view %33109, %33124 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30724 = torch.constant.int 4
    %int4_30725 = torch.constant.int 4
    %int128_30726 = torch.constant.int 128
    %33126 = torch.prim.ListConstruct %int4_30724, %32688, %int4_30725, %int128_30726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33127 = torch.aten.view %33111, %33126 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_30727 = torch.constant.int -2
    %33128 = torch.aten.unsqueeze %32477, %int-2_30727 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30728 = torch.constant.int -2
    %33129 = torch.aten.unsqueeze %32479, %int-2_30728 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30729 = torch.constant.int -2
    %33130 = torch.aten.unsqueeze %32481, %int-2_30729 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30730 = torch.constant.int -2
    %33131 = torch.aten.unsqueeze %32483, %int-2_30730 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30731 = torch.constant.int -2
    %33132 = torch.aten.unsqueeze %32485, %int-2_30731 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30732 = torch.constant.int -2
    %33133 = torch.aten.unsqueeze %32487, %int-2_30732 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30733 = torch.constant.int -2
    %33134 = torch.aten.unsqueeze %32489, %int-2_30733 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_30734 = torch.constant.int -2
    %33135 = torch.aten.unsqueeze %32491, %int-2_30734 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %33135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_30735 = torch.constant.int 1
    %33136 = torch.aten.size.int %32401, %int1_30735 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_30736 = torch.constant.int 4
    %int1_30737 = torch.constant.int 1
    %int4_30738 = torch.constant.int 4
    %int128_30739 = torch.constant.int 128
    %33137 = torch.prim.ListConstruct %int4_30736, %33136, %int1_30737, %int4_30738, %int128_30739 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30740 = torch.constant.bool false
    %33138 = torch.aten.expand %33128, %33137, %false_30740 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30741 = torch.constant.int 4
    %int1_30742 = torch.constant.int 1
    %int4_30743 = torch.constant.int 4
    %int128_30744 = torch.constant.int 128
    %33139 = torch.prim.ListConstruct %int4_30741, %33136, %int1_30742, %int4_30743, %int128_30744 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30745 = torch.constant.bool false
    %33140 = torch.aten.expand %33129, %33139, %false_30745 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30746 = torch.constant.int 4
    %int1_30747 = torch.constant.int 1
    %int4_30748 = torch.constant.int 4
    %int128_30749 = torch.constant.int 128
    %33141 = torch.prim.ListConstruct %int4_30746, %33136, %int1_30747, %int4_30748, %int128_30749 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30750 = torch.constant.bool false
    %33142 = torch.aten.expand %33130, %33141, %false_30750 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30751 = torch.constant.int 4
    %int1_30752 = torch.constant.int 1
    %int4_30753 = torch.constant.int 4
    %int128_30754 = torch.constant.int 128
    %33143 = torch.prim.ListConstruct %int4_30751, %33136, %int1_30752, %int4_30753, %int128_30754 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30755 = torch.constant.bool false
    %33144 = torch.aten.expand %33131, %33143, %false_30755 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30756 = torch.constant.int 4
    %int1_30757 = torch.constant.int 1
    %int4_30758 = torch.constant.int 4
    %int128_30759 = torch.constant.int 128
    %33145 = torch.prim.ListConstruct %int4_30756, %33136, %int1_30757, %int4_30758, %int128_30759 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30760 = torch.constant.bool false
    %33146 = torch.aten.expand %33132, %33145, %false_30760 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30761 = torch.constant.int 4
    %int1_30762 = torch.constant.int 1
    %int4_30763 = torch.constant.int 4
    %int128_30764 = torch.constant.int 128
    %33147 = torch.prim.ListConstruct %int4_30761, %33136, %int1_30762, %int4_30763, %int128_30764 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30765 = torch.constant.bool false
    %33148 = torch.aten.expand %33133, %33147, %false_30765 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30766 = torch.constant.int 4
    %int1_30767 = torch.constant.int 1
    %int4_30768 = torch.constant.int 4
    %int128_30769 = torch.constant.int 128
    %33149 = torch.prim.ListConstruct %int4_30766, %33136, %int1_30767, %int4_30768, %int128_30769 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30770 = torch.constant.bool false
    %33150 = torch.aten.expand %33134, %33149, %false_30770 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30771 = torch.constant.int 4
    %int1_30772 = torch.constant.int 1
    %int4_30773 = torch.constant.int 4
    %int128_30774 = torch.constant.int 128
    %33151 = torch.prim.ListConstruct %int4_30771, %33136, %int1_30772, %int4_30773, %int128_30774 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_30775 = torch.constant.bool false
    %33152 = torch.aten.expand %33135, %33151, %false_30775 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %33152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_30776 = torch.constant.int 4
    %int4_30777 = torch.constant.int 4
    %int128_30778 = torch.constant.int 128
    %33153 = torch.prim.ListConstruct %int4_30776, %33136, %int4_30777, %int128_30778 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33154 = torch.aten.view %33138, %33153 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30779 = torch.constant.int 4
    %int4_30780 = torch.constant.int 4
    %int128_30781 = torch.constant.int 128
    %33155 = torch.prim.ListConstruct %int4_30779, %33136, %int4_30780, %int128_30781 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33156 = torch.aten.view %33140, %33155 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30782 = torch.constant.int 4
    %int4_30783 = torch.constant.int 4
    %int128_30784 = torch.constant.int 128
    %33157 = torch.prim.ListConstruct %int4_30782, %33136, %int4_30783, %int128_30784 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33158 = torch.aten.view %33142, %33157 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30785 = torch.constant.int 4
    %int4_30786 = torch.constant.int 4
    %int128_30787 = torch.constant.int 128
    %33159 = torch.prim.ListConstruct %int4_30785, %33136, %int4_30786, %int128_30787 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33160 = torch.aten.view %33144, %33159 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30788 = torch.constant.int 4
    %int4_30789 = torch.constant.int 4
    %int128_30790 = torch.constant.int 128
    %33161 = torch.prim.ListConstruct %int4_30788, %33136, %int4_30789, %int128_30790 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33162 = torch.aten.view %33146, %33161 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30791 = torch.constant.int 4
    %int4_30792 = torch.constant.int 4
    %int128_30793 = torch.constant.int 128
    %33163 = torch.prim.ListConstruct %int4_30791, %33136, %int4_30792, %int128_30793 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33164 = torch.aten.view %33148, %33163 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30794 = torch.constant.int 4
    %int4_30795 = torch.constant.int 4
    %int128_30796 = torch.constant.int 128
    %33165 = torch.prim.ListConstruct %int4_30794, %33136, %int4_30795, %int128_30796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33166 = torch.aten.view %33150, %33165 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30797 = torch.constant.int 4
    %int4_30798 = torch.constant.int 4
    %int128_30799 = torch.constant.int 128
    %33167 = torch.prim.ListConstruct %int4_30797, %33136, %int4_30798, %int128_30799 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33168 = torch.aten.view %33152, %33167 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30800 = torch.constant.int 1
    %int2_30801 = torch.constant.int 2
    %33169 = torch.aten.transpose.int %32544, %int1_30800, %int2_30801 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33169, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30802 = torch.constant.int 1
    %int2_30803 = torch.constant.int 2
    %33170 = torch.aten.transpose.int %32559, %int1_30802, %int2_30803 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33170, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30804 = torch.constant.int 1
    %int2_30805 = torch.constant.int 2
    %33171 = torch.aten.transpose.int %32574, %int1_30804, %int2_30805 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33171, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30806 = torch.constant.int 1
    %int2_30807 = torch.constant.int 2
    %33172 = torch.aten.transpose.int %32589, %int1_30806, %int2_30807 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33172, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30808 = torch.constant.int 1
    %int2_30809 = torch.constant.int 2
    %33173 = torch.aten.transpose.int %32604, %int1_30808, %int2_30809 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33173, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30810 = torch.constant.int 1
    %int2_30811 = torch.constant.int 2
    %33174 = torch.aten.transpose.int %32619, %int1_30810, %int2_30811 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33174, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30812 = torch.constant.int 1
    %int2_30813 = torch.constant.int 2
    %33175 = torch.aten.transpose.int %32634, %int1_30812, %int2_30813 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33175, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30814 = torch.constant.int 1
    %int2_30815 = torch.constant.int 2
    %33176 = torch.aten.transpose.int %32649, %int1_30814, %int2_30815 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33176, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30816 = torch.constant.int 1
    %int2_30817 = torch.constant.int 2
    %33177 = torch.aten.transpose.int %33113, %int1_30816, %int2_30817 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33177, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30818 = torch.constant.int 1
    %int2_30819 = torch.constant.int 2
    %33178 = torch.aten.transpose.int %33115, %int1_30818, %int2_30819 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33178, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30820 = torch.constant.int 1
    %int2_30821 = torch.constant.int 2
    %33179 = torch.aten.transpose.int %33117, %int1_30820, %int2_30821 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33179, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30822 = torch.constant.int 1
    %int2_30823 = torch.constant.int 2
    %33180 = torch.aten.transpose.int %33119, %int1_30822, %int2_30823 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33180, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30824 = torch.constant.int 1
    %int2_30825 = torch.constant.int 2
    %33181 = torch.aten.transpose.int %33121, %int1_30824, %int2_30825 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33181, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30826 = torch.constant.int 1
    %int2_30827 = torch.constant.int 2
    %33182 = torch.aten.transpose.int %33123, %int1_30826, %int2_30827 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33182, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30828 = torch.constant.int 1
    %int2_30829 = torch.constant.int 2
    %33183 = torch.aten.transpose.int %33125, %int1_30828, %int2_30829 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33183, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30830 = torch.constant.int 1
    %int2_30831 = torch.constant.int 2
    %33184 = torch.aten.transpose.int %33127, %int1_30830, %int2_30831 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33184, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30832 = torch.constant.int 1
    %int2_30833 = torch.constant.int 2
    %33185 = torch.aten.transpose.int %33154, %int1_30832, %int2_30833 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33185, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30834 = torch.constant.int 1
    %int2_30835 = torch.constant.int 2
    %33186 = torch.aten.transpose.int %33156, %int1_30834, %int2_30835 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33186, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30836 = torch.constant.int 1
    %int2_30837 = torch.constant.int 2
    %33187 = torch.aten.transpose.int %33158, %int1_30836, %int2_30837 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33187, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30838 = torch.constant.int 1
    %int2_30839 = torch.constant.int 2
    %33188 = torch.aten.transpose.int %33160, %int1_30838, %int2_30839 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33188, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30840 = torch.constant.int 1
    %int2_30841 = torch.constant.int 2
    %33189 = torch.aten.transpose.int %33162, %int1_30840, %int2_30841 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33189, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30842 = torch.constant.int 1
    %int2_30843 = torch.constant.int 2
    %33190 = torch.aten.transpose.int %33164, %int1_30842, %int2_30843 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33190, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30844 = torch.constant.int 1
    %int2_30845 = torch.constant.int 2
    %33191 = torch.aten.transpose.int %33166, %int1_30844, %int2_30845 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33191, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30846 = torch.constant.int 1
    %int2_30847 = torch.constant.int 2
    %33192 = torch.aten.transpose.int %33168, %int1_30846, %int2_30847 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %33192, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30848 = torch.constant.float 0.000000e+00
    %true_30849 = torch.constant.bool true
    %none_30850 = torch.constant.none
    %none_30851 = torch.constant.none
    %33193:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33169, %33177, %33185, %float0.000000e00_30848, %true_30849, %none_30850, %none_30851) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33193#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30852 = torch.constant.float 0.000000e+00
    %true_30853 = torch.constant.bool true
    %none_30854 = torch.constant.none
    %none_30855 = torch.constant.none
    %33194:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33170, %33178, %33186, %float0.000000e00_30852, %true_30853, %none_30854, %none_30855) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33194#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30856 = torch.constant.float 0.000000e+00
    %true_30857 = torch.constant.bool true
    %none_30858 = torch.constant.none
    %none_30859 = torch.constant.none
    %33195:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33171, %33179, %33187, %float0.000000e00_30856, %true_30857, %none_30858, %none_30859) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33195#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30860 = torch.constant.float 0.000000e+00
    %true_30861 = torch.constant.bool true
    %none_30862 = torch.constant.none
    %none_30863 = torch.constant.none
    %33196:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33172, %33180, %33188, %float0.000000e00_30860, %true_30861, %none_30862, %none_30863) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33196#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30864 = torch.constant.float 0.000000e+00
    %true_30865 = torch.constant.bool true
    %none_30866 = torch.constant.none
    %none_30867 = torch.constant.none
    %33197:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33173, %33181, %33189, %float0.000000e00_30864, %true_30865, %none_30866, %none_30867) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33197#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30868 = torch.constant.float 0.000000e+00
    %true_30869 = torch.constant.bool true
    %none_30870 = torch.constant.none
    %none_30871 = torch.constant.none
    %33198:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33174, %33182, %33190, %float0.000000e00_30868, %true_30869, %none_30870, %none_30871) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33198#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30872 = torch.constant.float 0.000000e+00
    %true_30873 = torch.constant.bool true
    %none_30874 = torch.constant.none
    %none_30875 = torch.constant.none
    %33199:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33175, %33183, %33191, %float0.000000e00_30872, %true_30873, %none_30874, %none_30875) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33199#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_30876 = torch.constant.float 0.000000e+00
    %true_30877 = torch.constant.bool true
    %none_30878 = torch.constant.none
    %none_30879 = torch.constant.none
    %33200:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%33176, %33184, %33192, %float0.000000e00_30876, %true_30877, %none_30878, %none_30879) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %33200#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_30880 = torch.constant.int 1
    %int2_30881 = torch.constant.int 2
    %33201 = torch.aten.transpose.int %33193#0, %int1_30880, %int2_30881 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30882 = torch.constant.int 1
    %int2_30883 = torch.constant.int 2
    %33202 = torch.aten.transpose.int %33194#0, %int1_30882, %int2_30883 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30884 = torch.constant.int 1
    %int2_30885 = torch.constant.int 2
    %33203 = torch.aten.transpose.int %33195#0, %int1_30884, %int2_30885 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30886 = torch.constant.int 1
    %int2_30887 = torch.constant.int 2
    %33204 = torch.aten.transpose.int %33196#0, %int1_30886, %int2_30887 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30888 = torch.constant.int 1
    %int2_30889 = torch.constant.int 2
    %33205 = torch.aten.transpose.int %33197#0, %int1_30888, %int2_30889 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30890 = torch.constant.int 1
    %int2_30891 = torch.constant.int 2
    %33206 = torch.aten.transpose.int %33198#0, %int1_30890, %int2_30891 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30892 = torch.constant.int 1
    %int2_30893 = torch.constant.int 2
    %33207 = torch.aten.transpose.int %33199#0, %int1_30892, %int2_30893 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_30894 = torch.constant.int 1
    %int2_30895 = torch.constant.int 2
    %33208 = torch.aten.transpose.int %33200#0, %int1_30894, %int2_30895 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %33208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_30896 = torch.constant.int 4
    %int512_30897 = torch.constant.int 512
    %33209 = torch.prim.ListConstruct %int4_30896, %32530, %int512_30897 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33210 = torch.aten.view %33201, %33209 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30898 = torch.constant.int 4
    %int512_30899 = torch.constant.int 512
    %33211 = torch.prim.ListConstruct %int4_30898, %32545, %int512_30899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33212 = torch.aten.view %33202, %33211 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30900 = torch.constant.int 4
    %int512_30901 = torch.constant.int 512
    %33213 = torch.prim.ListConstruct %int4_30900, %32560, %int512_30901 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33214 = torch.aten.view %33203, %33213 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30902 = torch.constant.int 4
    %int512_30903 = torch.constant.int 512
    %33215 = torch.prim.ListConstruct %int4_30902, %32575, %int512_30903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33216 = torch.aten.view %33204, %33215 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30904 = torch.constant.int 4
    %int512_30905 = torch.constant.int 512
    %33217 = torch.prim.ListConstruct %int4_30904, %32590, %int512_30905 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33218 = torch.aten.view %33205, %33217 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30906 = torch.constant.int 4
    %int512_30907 = torch.constant.int 512
    %33219 = torch.prim.ListConstruct %int4_30906, %32605, %int512_30907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33220 = torch.aten.view %33206, %33219 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30908 = torch.constant.int 4
    %int512_30909 = torch.constant.int 512
    %33221 = torch.prim.ListConstruct %int4_30908, %32620, %int512_30909 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33222 = torch.aten.view %33207, %33221 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_30910 = torch.constant.int 4
    %int512_30911 = torch.constant.int 512
    %33223 = torch.prim.ListConstruct %int4_30910, %32635, %int512_30911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33224 = torch.aten.view %33208, %33223 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %33224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_30912 = torch.constant.int 1
    %int0_30913 = torch.constant.int 0
    %33225 = torch.prim.ListConstruct %int1_30912, %int0_30913 : (!torch.int, !torch.int) -> !torch.list<int>
    %33226 = torch.aten.permute %1192, %33225 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30914 = torch.constant.int 1
    %int0_30915 = torch.constant.int 0
    %33227 = torch.prim.ListConstruct %int1_30914, %int0_30915 : (!torch.int, !torch.int) -> !torch.list<int>
    %33228 = torch.aten.permute %1193, %33227 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30916 = torch.constant.int 1
    %int0_30917 = torch.constant.int 0
    %33229 = torch.prim.ListConstruct %int1_30916, %int0_30917 : (!torch.int, !torch.int) -> !torch.list<int>
    %33230 = torch.aten.permute %1194, %33229 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30918 = torch.constant.int 1
    %int0_30919 = torch.constant.int 0
    %33231 = torch.prim.ListConstruct %int1_30918, %int0_30919 : (!torch.int, !torch.int) -> !torch.list<int>
    %33232 = torch.aten.permute %1195, %33231 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30920 = torch.constant.int 1
    %int0_30921 = torch.constant.int 0
    %33233 = torch.prim.ListConstruct %int1_30920, %int0_30921 : (!torch.int, !torch.int) -> !torch.list<int>
    %33234 = torch.aten.permute %1196, %33233 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30922 = torch.constant.int 1
    %int0_30923 = torch.constant.int 0
    %33235 = torch.prim.ListConstruct %int1_30922, %int0_30923 : (!torch.int, !torch.int) -> !torch.list<int>
    %33236 = torch.aten.permute %1197, %33235 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30924 = torch.constant.int 1
    %int0_30925 = torch.constant.int 0
    %33237 = torch.prim.ListConstruct %int1_30924, %int0_30925 : (!torch.int, !torch.int) -> !torch.list<int>
    %33238 = torch.aten.permute %1198, %33237 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_30926 = torch.constant.int 1
    %int0_30927 = torch.constant.int 0
    %33239 = torch.prim.ListConstruct %int1_30926, %int0_30927 : (!torch.int, !torch.int) -> !torch.list<int>
    %33240 = torch.aten.permute %1199, %33239 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_30928 = torch.constant.int 4
    %33241 = torch.aten.mul.int %int4_30928, %32530 : !torch.int, !torch.int -> !torch.int
    %int512_30929 = torch.constant.int 512
    %33242 = torch.prim.ListConstruct %33241, %int512_30929 : (!torch.int, !torch.int) -> !torch.list<int>
    %33243 = torch.aten.view %33210, %33242 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33243, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33244 = torch.aten.mm %33243, %33226 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33244, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30930 = torch.constant.int 4
    %int4096_30931 = torch.constant.int 4096
    %33245 = torch.prim.ListConstruct %int4_30930, %32530, %int4096_30931 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33246 = torch.aten.view %33244, %33245 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30932 = torch.constant.int 4
    %33247 = torch.aten.mul.int %int4_30932, %32545 : !torch.int, !torch.int -> !torch.int
    %int512_30933 = torch.constant.int 512
    %33248 = torch.prim.ListConstruct %33247, %int512_30933 : (!torch.int, !torch.int) -> !torch.list<int>
    %33249 = torch.aten.view %33212, %33248 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33249, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33250 = torch.aten.mm %33249, %33228 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33250, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30934 = torch.constant.int 4
    %int4096_30935 = torch.constant.int 4096
    %33251 = torch.prim.ListConstruct %int4_30934, %32545, %int4096_30935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33252 = torch.aten.view %33250, %33251 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30936 = torch.constant.int 4
    %33253 = torch.aten.mul.int %int4_30936, %32560 : !torch.int, !torch.int -> !torch.int
    %int512_30937 = torch.constant.int 512
    %33254 = torch.prim.ListConstruct %33253, %int512_30937 : (!torch.int, !torch.int) -> !torch.list<int>
    %33255 = torch.aten.view %33214, %33254 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33255, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33256 = torch.aten.mm %33255, %33230 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33256, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30938 = torch.constant.int 4
    %int4096_30939 = torch.constant.int 4096
    %33257 = torch.prim.ListConstruct %int4_30938, %32560, %int4096_30939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33258 = torch.aten.view %33256, %33257 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30940 = torch.constant.int 4
    %33259 = torch.aten.mul.int %int4_30940, %32575 : !torch.int, !torch.int -> !torch.int
    %int512_30941 = torch.constant.int 512
    %33260 = torch.prim.ListConstruct %33259, %int512_30941 : (!torch.int, !torch.int) -> !torch.list<int>
    %33261 = torch.aten.view %33216, %33260 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33261, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33262 = torch.aten.mm %33261, %33232 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33262, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30942 = torch.constant.int 4
    %int4096_30943 = torch.constant.int 4096
    %33263 = torch.prim.ListConstruct %int4_30942, %32575, %int4096_30943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33264 = torch.aten.view %33262, %33263 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30944 = torch.constant.int 4
    %33265 = torch.aten.mul.int %int4_30944, %32590 : !torch.int, !torch.int -> !torch.int
    %int512_30945 = torch.constant.int 512
    %33266 = torch.prim.ListConstruct %33265, %int512_30945 : (!torch.int, !torch.int) -> !torch.list<int>
    %33267 = torch.aten.view %33218, %33266 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33267, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33268 = torch.aten.mm %33267, %33234 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33268, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30946 = torch.constant.int 4
    %int4096_30947 = torch.constant.int 4096
    %33269 = torch.prim.ListConstruct %int4_30946, %32590, %int4096_30947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33270 = torch.aten.view %33268, %33269 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30948 = torch.constant.int 4
    %33271 = torch.aten.mul.int %int4_30948, %32605 : !torch.int, !torch.int -> !torch.int
    %int512_30949 = torch.constant.int 512
    %33272 = torch.prim.ListConstruct %33271, %int512_30949 : (!torch.int, !torch.int) -> !torch.list<int>
    %33273 = torch.aten.view %33220, %33272 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33273, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33274 = torch.aten.mm %33273, %33236 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33274, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30950 = torch.constant.int 4
    %int4096_30951 = torch.constant.int 4096
    %33275 = torch.prim.ListConstruct %int4_30950, %32605, %int4096_30951 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33276 = torch.aten.view %33274, %33275 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30952 = torch.constant.int 4
    %33277 = torch.aten.mul.int %int4_30952, %32620 : !torch.int, !torch.int -> !torch.int
    %int512_30953 = torch.constant.int 512
    %33278 = torch.prim.ListConstruct %33277, %int512_30953 : (!torch.int, !torch.int) -> !torch.list<int>
    %33279 = torch.aten.view %33222, %33278 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33279, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33280 = torch.aten.mm %33279, %33238 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33280, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30954 = torch.constant.int 4
    %int4096_30955 = torch.constant.int 4096
    %33281 = torch.prim.ListConstruct %int4_30954, %32620, %int4096_30955 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33282 = torch.aten.view %33280, %33281 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_30956 = torch.constant.int 4
    %33283 = torch.aten.mul.int %int4_30956, %32635 : !torch.int, !torch.int -> !torch.int
    %int512_30957 = torch.constant.int 512
    %33284 = torch.prim.ListConstruct %33283, %int512_30957 : (!torch.int, !torch.int) -> !torch.list<int>
    %33285 = torch.aten.view %33224, %33284 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %33285, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %33286 = torch.aten.mm %33285, %33240 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33286, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_30958 = torch.constant.int 4
    %int4096_30959 = torch.constant.int 4096
    %33287 = torch.prim.ListConstruct %int4_30958, %32635, %int4096_30959 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33288 = torch.aten.view %33286, %33287 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33289 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30960 = arith.constant 1 : index
    %dim_30961 = tensor.dim %33289, %c1_30960 : tensor<4x?x4096xf16>
    %33290 = flow.tensor.transfer %33289 : tensor<4x?x4096xf16>{%dim_30961} to #hal.device.promise<@__device_0>
    %33291 = torch_c.from_builtin_tensor %33290 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33292 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30962 = arith.constant 1 : index
    %dim_30963 = tensor.dim %33292, %c1_30962 : tensor<4x?x4096xf16>
    %33293 = flow.tensor.transfer %33292 : tensor<4x?x4096xf16>{%dim_30963} to #hal.device.promise<@__device_0>
    %33294 = torch_c.from_builtin_tensor %33293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33295 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30964 = arith.constant 1 : index
    %dim_30965 = tensor.dim %33295, %c1_30964 : tensor<4x?x4096xf16>
    %33296 = flow.tensor.transfer %33295 : tensor<4x?x4096xf16>{%dim_30965} to #hal.device.promise<@__device_0>
    %33297 = torch_c.from_builtin_tensor %33296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33298 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30966 = arith.constant 1 : index
    %dim_30967 = tensor.dim %33298, %c1_30966 : tensor<4x?x4096xf16>
    %33299 = flow.tensor.transfer %33298 : tensor<4x?x4096xf16>{%dim_30967} to #hal.device.promise<@__device_0>
    %33300 = torch_c.from_builtin_tensor %33299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33301 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30968 = arith.constant 1 : index
    %dim_30969 = tensor.dim %33301, %c1_30968 : tensor<4x?x4096xf16>
    %33302 = flow.tensor.transfer %33301 : tensor<4x?x4096xf16>{%dim_30969} to #hal.device.promise<@__device_0>
    %33303 = torch_c.from_builtin_tensor %33302 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33304 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30970 = arith.constant 1 : index
    %dim_30971 = tensor.dim %33304, %c1_30970 : tensor<4x?x4096xf16>
    %33305 = flow.tensor.transfer %33304 : tensor<4x?x4096xf16>{%dim_30971} to #hal.device.promise<@__device_0>
    %33306 = torch_c.from_builtin_tensor %33305 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33307 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30972 = arith.constant 1 : index
    %dim_30973 = tensor.dim %33307, %c1_30972 : tensor<4x?x4096xf16>
    %33308 = flow.tensor.transfer %33307 : tensor<4x?x4096xf16>{%dim_30973} to #hal.device.promise<@__device_0>
    %33309 = torch_c.from_builtin_tensor %33308 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30974 = torch.constant.int 1
    %33310 = torch.aten.add.Tensor %33246, %33291, %int1_30974 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30975 = torch.constant.int 1
    %33311 = torch.aten.add.Tensor %33310, %33294, %int1_30975 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30976 = torch.constant.int 1
    %33312 = torch.aten.add.Tensor %33311, %33297, %int1_30976 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30977 = torch.constant.int 1
    %33313 = torch.aten.add.Tensor %33312, %33300, %int1_30977 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30978 = torch.constant.int 1
    %33314 = torch.aten.add.Tensor %33313, %33303, %int1_30978 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30979 = torch.constant.int 1
    %33315 = torch.aten.add.Tensor %33314, %33306, %int1_30979 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30980 = torch.constant.int 1
    %33316 = torch.aten.add.Tensor %33315, %33309, %int1_30980 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33317 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30981 = arith.constant 1 : index
    %dim_30982 = tensor.dim %33317, %c1_30981 : tensor<4x?x4096xf16>
    %33318 = flow.tensor.transfer %33317 : tensor<4x?x4096xf16>{%dim_30982} to #hal.device.promise<@__device_1>
    %33319 = torch_c.from_builtin_tensor %33318 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33320 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30983 = arith.constant 1 : index
    %dim_30984 = tensor.dim %33320, %c1_30983 : tensor<4x?x4096xf16>
    %33321 = flow.tensor.transfer %33320 : tensor<4x?x4096xf16>{%dim_30984} to #hal.device.promise<@__device_1>
    %33322 = torch_c.from_builtin_tensor %33321 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33323 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30985 = arith.constant 1 : index
    %dim_30986 = tensor.dim %33323, %c1_30985 : tensor<4x?x4096xf16>
    %33324 = flow.tensor.transfer %33323 : tensor<4x?x4096xf16>{%dim_30986} to #hal.device.promise<@__device_1>
    %33325 = torch_c.from_builtin_tensor %33324 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33326 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30987 = arith.constant 1 : index
    %dim_30988 = tensor.dim %33326, %c1_30987 : tensor<4x?x4096xf16>
    %33327 = flow.tensor.transfer %33326 : tensor<4x?x4096xf16>{%dim_30988} to #hal.device.promise<@__device_1>
    %33328 = torch_c.from_builtin_tensor %33327 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33329 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30989 = arith.constant 1 : index
    %dim_30990 = tensor.dim %33329, %c1_30989 : tensor<4x?x4096xf16>
    %33330 = flow.tensor.transfer %33329 : tensor<4x?x4096xf16>{%dim_30990} to #hal.device.promise<@__device_1>
    %33331 = torch_c.from_builtin_tensor %33330 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33332 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30991 = arith.constant 1 : index
    %dim_30992 = tensor.dim %33332, %c1_30991 : tensor<4x?x4096xf16>
    %33333 = flow.tensor.transfer %33332 : tensor<4x?x4096xf16>{%dim_30992} to #hal.device.promise<@__device_1>
    %33334 = torch_c.from_builtin_tensor %33333 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33335 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_30993 = arith.constant 1 : index
    %dim_30994 = tensor.dim %33335, %c1_30993 : tensor<4x?x4096xf16>
    %33336 = flow.tensor.transfer %33335 : tensor<4x?x4096xf16>{%dim_30994} to #hal.device.promise<@__device_1>
    %33337 = torch_c.from_builtin_tensor %33336 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30995 = torch.constant.int 1
    %33338 = torch.aten.add.Tensor %33319, %33252, %int1_30995 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30996 = torch.constant.int 1
    %33339 = torch.aten.add.Tensor %33338, %33322, %int1_30996 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30997 = torch.constant.int 1
    %33340 = torch.aten.add.Tensor %33339, %33325, %int1_30997 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30998 = torch.constant.int 1
    %33341 = torch.aten.add.Tensor %33340, %33328, %int1_30998 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_30999 = torch.constant.int 1
    %33342 = torch.aten.add.Tensor %33341, %33331, %int1_30999 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31000 = torch.constant.int 1
    %33343 = torch.aten.add.Tensor %33342, %33334, %int1_31000 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31001 = torch.constant.int 1
    %33344 = torch.aten.add.Tensor %33343, %33337, %int1_31001 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33345 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31002 = arith.constant 1 : index
    %dim_31003 = tensor.dim %33345, %c1_31002 : tensor<4x?x4096xf16>
    %33346 = flow.tensor.transfer %33345 : tensor<4x?x4096xf16>{%dim_31003} to #hal.device.promise<@__device_2>
    %33347 = torch_c.from_builtin_tensor %33346 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33348 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31004 = arith.constant 1 : index
    %dim_31005 = tensor.dim %33348, %c1_31004 : tensor<4x?x4096xf16>
    %33349 = flow.tensor.transfer %33348 : tensor<4x?x4096xf16>{%dim_31005} to #hal.device.promise<@__device_2>
    %33350 = torch_c.from_builtin_tensor %33349 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33351 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31006 = arith.constant 1 : index
    %dim_31007 = tensor.dim %33351, %c1_31006 : tensor<4x?x4096xf16>
    %33352 = flow.tensor.transfer %33351 : tensor<4x?x4096xf16>{%dim_31007} to #hal.device.promise<@__device_2>
    %33353 = torch_c.from_builtin_tensor %33352 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33354 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31008 = arith.constant 1 : index
    %dim_31009 = tensor.dim %33354, %c1_31008 : tensor<4x?x4096xf16>
    %33355 = flow.tensor.transfer %33354 : tensor<4x?x4096xf16>{%dim_31009} to #hal.device.promise<@__device_2>
    %33356 = torch_c.from_builtin_tensor %33355 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33357 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31010 = arith.constant 1 : index
    %dim_31011 = tensor.dim %33357, %c1_31010 : tensor<4x?x4096xf16>
    %33358 = flow.tensor.transfer %33357 : tensor<4x?x4096xf16>{%dim_31011} to #hal.device.promise<@__device_2>
    %33359 = torch_c.from_builtin_tensor %33358 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33360 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31012 = arith.constant 1 : index
    %dim_31013 = tensor.dim %33360, %c1_31012 : tensor<4x?x4096xf16>
    %33361 = flow.tensor.transfer %33360 : tensor<4x?x4096xf16>{%dim_31013} to #hal.device.promise<@__device_2>
    %33362 = torch_c.from_builtin_tensor %33361 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33363 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31014 = arith.constant 1 : index
    %dim_31015 = tensor.dim %33363, %c1_31014 : tensor<4x?x4096xf16>
    %33364 = flow.tensor.transfer %33363 : tensor<4x?x4096xf16>{%dim_31015} to #hal.device.promise<@__device_2>
    %33365 = torch_c.from_builtin_tensor %33364 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31016 = torch.constant.int 1
    %33366 = torch.aten.add.Tensor %33347, %33350, %int1_31016 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31017 = torch.constant.int 1
    %33367 = torch.aten.add.Tensor %33366, %33258, %int1_31017 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31018 = torch.constant.int 1
    %33368 = torch.aten.add.Tensor %33367, %33353, %int1_31018 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31019 = torch.constant.int 1
    %33369 = torch.aten.add.Tensor %33368, %33356, %int1_31019 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31020 = torch.constant.int 1
    %33370 = torch.aten.add.Tensor %33369, %33359, %int1_31020 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31021 = torch.constant.int 1
    %33371 = torch.aten.add.Tensor %33370, %33362, %int1_31021 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31022 = torch.constant.int 1
    %33372 = torch.aten.add.Tensor %33371, %33365, %int1_31022 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33373 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31023 = arith.constant 1 : index
    %dim_31024 = tensor.dim %33373, %c1_31023 : tensor<4x?x4096xf16>
    %33374 = flow.tensor.transfer %33373 : tensor<4x?x4096xf16>{%dim_31024} to #hal.device.promise<@__device_3>
    %33375 = torch_c.from_builtin_tensor %33374 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33376 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31025 = arith.constant 1 : index
    %dim_31026 = tensor.dim %33376, %c1_31025 : tensor<4x?x4096xf16>
    %33377 = flow.tensor.transfer %33376 : tensor<4x?x4096xf16>{%dim_31026} to #hal.device.promise<@__device_3>
    %33378 = torch_c.from_builtin_tensor %33377 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33379 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31027 = arith.constant 1 : index
    %dim_31028 = tensor.dim %33379, %c1_31027 : tensor<4x?x4096xf16>
    %33380 = flow.tensor.transfer %33379 : tensor<4x?x4096xf16>{%dim_31028} to #hal.device.promise<@__device_3>
    %33381 = torch_c.from_builtin_tensor %33380 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33382 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31029 = arith.constant 1 : index
    %dim_31030 = tensor.dim %33382, %c1_31029 : tensor<4x?x4096xf16>
    %33383 = flow.tensor.transfer %33382 : tensor<4x?x4096xf16>{%dim_31030} to #hal.device.promise<@__device_3>
    %33384 = torch_c.from_builtin_tensor %33383 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33385 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31031 = arith.constant 1 : index
    %dim_31032 = tensor.dim %33385, %c1_31031 : tensor<4x?x4096xf16>
    %33386 = flow.tensor.transfer %33385 : tensor<4x?x4096xf16>{%dim_31032} to #hal.device.promise<@__device_3>
    %33387 = torch_c.from_builtin_tensor %33386 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33388 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31033 = arith.constant 1 : index
    %dim_31034 = tensor.dim %33388, %c1_31033 : tensor<4x?x4096xf16>
    %33389 = flow.tensor.transfer %33388 : tensor<4x?x4096xf16>{%dim_31034} to #hal.device.promise<@__device_3>
    %33390 = torch_c.from_builtin_tensor %33389 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33391 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31035 = arith.constant 1 : index
    %dim_31036 = tensor.dim %33391, %c1_31035 : tensor<4x?x4096xf16>
    %33392 = flow.tensor.transfer %33391 : tensor<4x?x4096xf16>{%dim_31036} to #hal.device.promise<@__device_3>
    %33393 = torch_c.from_builtin_tensor %33392 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31037 = torch.constant.int 1
    %33394 = torch.aten.add.Tensor %33375, %33378, %int1_31037 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31038 = torch.constant.int 1
    %33395 = torch.aten.add.Tensor %33394, %33381, %int1_31038 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31039 = torch.constant.int 1
    %33396 = torch.aten.add.Tensor %33395, %33264, %int1_31039 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31040 = torch.constant.int 1
    %33397 = torch.aten.add.Tensor %33396, %33384, %int1_31040 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31041 = torch.constant.int 1
    %33398 = torch.aten.add.Tensor %33397, %33387, %int1_31041 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31042 = torch.constant.int 1
    %33399 = torch.aten.add.Tensor %33398, %33390, %int1_31042 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31043 = torch.constant.int 1
    %33400 = torch.aten.add.Tensor %33399, %33393, %int1_31043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33401 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31044 = arith.constant 1 : index
    %dim_31045 = tensor.dim %33401, %c1_31044 : tensor<4x?x4096xf16>
    %33402 = flow.tensor.transfer %33401 : tensor<4x?x4096xf16>{%dim_31045} to #hal.device.promise<@__device_4>
    %33403 = torch_c.from_builtin_tensor %33402 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33404 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31046 = arith.constant 1 : index
    %dim_31047 = tensor.dim %33404, %c1_31046 : tensor<4x?x4096xf16>
    %33405 = flow.tensor.transfer %33404 : tensor<4x?x4096xf16>{%dim_31047} to #hal.device.promise<@__device_4>
    %33406 = torch_c.from_builtin_tensor %33405 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33407 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31048 = arith.constant 1 : index
    %dim_31049 = tensor.dim %33407, %c1_31048 : tensor<4x?x4096xf16>
    %33408 = flow.tensor.transfer %33407 : tensor<4x?x4096xf16>{%dim_31049} to #hal.device.promise<@__device_4>
    %33409 = torch_c.from_builtin_tensor %33408 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33410 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31050 = arith.constant 1 : index
    %dim_31051 = tensor.dim %33410, %c1_31050 : tensor<4x?x4096xf16>
    %33411 = flow.tensor.transfer %33410 : tensor<4x?x4096xf16>{%dim_31051} to #hal.device.promise<@__device_4>
    %33412 = torch_c.from_builtin_tensor %33411 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33413 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31052 = arith.constant 1 : index
    %dim_31053 = tensor.dim %33413, %c1_31052 : tensor<4x?x4096xf16>
    %33414 = flow.tensor.transfer %33413 : tensor<4x?x4096xf16>{%dim_31053} to #hal.device.promise<@__device_4>
    %33415 = torch_c.from_builtin_tensor %33414 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33416 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31054 = arith.constant 1 : index
    %dim_31055 = tensor.dim %33416, %c1_31054 : tensor<4x?x4096xf16>
    %33417 = flow.tensor.transfer %33416 : tensor<4x?x4096xf16>{%dim_31055} to #hal.device.promise<@__device_4>
    %33418 = torch_c.from_builtin_tensor %33417 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33419 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31056 = arith.constant 1 : index
    %dim_31057 = tensor.dim %33419, %c1_31056 : tensor<4x?x4096xf16>
    %33420 = flow.tensor.transfer %33419 : tensor<4x?x4096xf16>{%dim_31057} to #hal.device.promise<@__device_4>
    %33421 = torch_c.from_builtin_tensor %33420 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31058 = torch.constant.int 1
    %33422 = torch.aten.add.Tensor %33403, %33406, %int1_31058 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31059 = torch.constant.int 1
    %33423 = torch.aten.add.Tensor %33422, %33409, %int1_31059 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31060 = torch.constant.int 1
    %33424 = torch.aten.add.Tensor %33423, %33412, %int1_31060 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31061 = torch.constant.int 1
    %33425 = torch.aten.add.Tensor %33424, %33270, %int1_31061 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31062 = torch.constant.int 1
    %33426 = torch.aten.add.Tensor %33425, %33415, %int1_31062 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31063 = torch.constant.int 1
    %33427 = torch.aten.add.Tensor %33426, %33418, %int1_31063 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31064 = torch.constant.int 1
    %33428 = torch.aten.add.Tensor %33427, %33421, %int1_31064 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33429 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31065 = arith.constant 1 : index
    %dim_31066 = tensor.dim %33429, %c1_31065 : tensor<4x?x4096xf16>
    %33430 = flow.tensor.transfer %33429 : tensor<4x?x4096xf16>{%dim_31066} to #hal.device.promise<@__device_5>
    %33431 = torch_c.from_builtin_tensor %33430 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33432 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31067 = arith.constant 1 : index
    %dim_31068 = tensor.dim %33432, %c1_31067 : tensor<4x?x4096xf16>
    %33433 = flow.tensor.transfer %33432 : tensor<4x?x4096xf16>{%dim_31068} to #hal.device.promise<@__device_5>
    %33434 = torch_c.from_builtin_tensor %33433 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33435 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31069 = arith.constant 1 : index
    %dim_31070 = tensor.dim %33435, %c1_31069 : tensor<4x?x4096xf16>
    %33436 = flow.tensor.transfer %33435 : tensor<4x?x4096xf16>{%dim_31070} to #hal.device.promise<@__device_5>
    %33437 = torch_c.from_builtin_tensor %33436 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33438 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31071 = arith.constant 1 : index
    %dim_31072 = tensor.dim %33438, %c1_31071 : tensor<4x?x4096xf16>
    %33439 = flow.tensor.transfer %33438 : tensor<4x?x4096xf16>{%dim_31072} to #hal.device.promise<@__device_5>
    %33440 = torch_c.from_builtin_tensor %33439 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33441 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31073 = arith.constant 1 : index
    %dim_31074 = tensor.dim %33441, %c1_31073 : tensor<4x?x4096xf16>
    %33442 = flow.tensor.transfer %33441 : tensor<4x?x4096xf16>{%dim_31074} to #hal.device.promise<@__device_5>
    %33443 = torch_c.from_builtin_tensor %33442 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33444 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31075 = arith.constant 1 : index
    %dim_31076 = tensor.dim %33444, %c1_31075 : tensor<4x?x4096xf16>
    %33445 = flow.tensor.transfer %33444 : tensor<4x?x4096xf16>{%dim_31076} to #hal.device.promise<@__device_5>
    %33446 = torch_c.from_builtin_tensor %33445 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33447 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31077 = arith.constant 1 : index
    %dim_31078 = tensor.dim %33447, %c1_31077 : tensor<4x?x4096xf16>
    %33448 = flow.tensor.transfer %33447 : tensor<4x?x4096xf16>{%dim_31078} to #hal.device.promise<@__device_5>
    %33449 = torch_c.from_builtin_tensor %33448 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31079 = torch.constant.int 1
    %33450 = torch.aten.add.Tensor %33431, %33434, %int1_31079 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31080 = torch.constant.int 1
    %33451 = torch.aten.add.Tensor %33450, %33437, %int1_31080 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31081 = torch.constant.int 1
    %33452 = torch.aten.add.Tensor %33451, %33440, %int1_31081 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31082 = torch.constant.int 1
    %33453 = torch.aten.add.Tensor %33452, %33443, %int1_31082 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31083 = torch.constant.int 1
    %33454 = torch.aten.add.Tensor %33453, %33276, %int1_31083 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31084 = torch.constant.int 1
    %33455 = torch.aten.add.Tensor %33454, %33446, %int1_31084 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31085 = torch.constant.int 1
    %33456 = torch.aten.add.Tensor %33455, %33449, %int1_31085 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33457 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31086 = arith.constant 1 : index
    %dim_31087 = tensor.dim %33457, %c1_31086 : tensor<4x?x4096xf16>
    %33458 = flow.tensor.transfer %33457 : tensor<4x?x4096xf16>{%dim_31087} to #hal.device.promise<@__device_6>
    %33459 = torch_c.from_builtin_tensor %33458 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33460 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31088 = arith.constant 1 : index
    %dim_31089 = tensor.dim %33460, %c1_31088 : tensor<4x?x4096xf16>
    %33461 = flow.tensor.transfer %33460 : tensor<4x?x4096xf16>{%dim_31089} to #hal.device.promise<@__device_6>
    %33462 = torch_c.from_builtin_tensor %33461 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33463 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31090 = arith.constant 1 : index
    %dim_31091 = tensor.dim %33463, %c1_31090 : tensor<4x?x4096xf16>
    %33464 = flow.tensor.transfer %33463 : tensor<4x?x4096xf16>{%dim_31091} to #hal.device.promise<@__device_6>
    %33465 = torch_c.from_builtin_tensor %33464 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33466 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31092 = arith.constant 1 : index
    %dim_31093 = tensor.dim %33466, %c1_31092 : tensor<4x?x4096xf16>
    %33467 = flow.tensor.transfer %33466 : tensor<4x?x4096xf16>{%dim_31093} to #hal.device.promise<@__device_6>
    %33468 = torch_c.from_builtin_tensor %33467 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33469 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31094 = arith.constant 1 : index
    %dim_31095 = tensor.dim %33469, %c1_31094 : tensor<4x?x4096xf16>
    %33470 = flow.tensor.transfer %33469 : tensor<4x?x4096xf16>{%dim_31095} to #hal.device.promise<@__device_6>
    %33471 = torch_c.from_builtin_tensor %33470 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33472 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31096 = arith.constant 1 : index
    %dim_31097 = tensor.dim %33472, %c1_31096 : tensor<4x?x4096xf16>
    %33473 = flow.tensor.transfer %33472 : tensor<4x?x4096xf16>{%dim_31097} to #hal.device.promise<@__device_6>
    %33474 = torch_c.from_builtin_tensor %33473 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33475 = torch_c.to_builtin_tensor %33288 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31098 = arith.constant 1 : index
    %dim_31099 = tensor.dim %33475, %c1_31098 : tensor<4x?x4096xf16>
    %33476 = flow.tensor.transfer %33475 : tensor<4x?x4096xf16>{%dim_31099} to #hal.device.promise<@__device_6>
    %33477 = torch_c.from_builtin_tensor %33476 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31100 = torch.constant.int 1
    %33478 = torch.aten.add.Tensor %33459, %33462, %int1_31100 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31101 = torch.constant.int 1
    %33479 = torch.aten.add.Tensor %33478, %33465, %int1_31101 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31102 = torch.constant.int 1
    %33480 = torch.aten.add.Tensor %33479, %33468, %int1_31102 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31103 = torch.constant.int 1
    %33481 = torch.aten.add.Tensor %33480, %33471, %int1_31103 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31104 = torch.constant.int 1
    %33482 = torch.aten.add.Tensor %33481, %33474, %int1_31104 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31105 = torch.constant.int 1
    %33483 = torch.aten.add.Tensor %33482, %33282, %int1_31105 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31106 = torch.constant.int 1
    %33484 = torch.aten.add.Tensor %33483, %33477, %int1_31106 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33485 = torch_c.to_builtin_tensor %33246 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31107 = arith.constant 1 : index
    %dim_31108 = tensor.dim %33485, %c1_31107 : tensor<4x?x4096xf16>
    %33486 = flow.tensor.transfer %33485 : tensor<4x?x4096xf16>{%dim_31108} to #hal.device.promise<@__device_7>
    %33487 = torch_c.from_builtin_tensor %33486 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33488 = torch_c.to_builtin_tensor %33252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31109 = arith.constant 1 : index
    %dim_31110 = tensor.dim %33488, %c1_31109 : tensor<4x?x4096xf16>
    %33489 = flow.tensor.transfer %33488 : tensor<4x?x4096xf16>{%dim_31110} to #hal.device.promise<@__device_7>
    %33490 = torch_c.from_builtin_tensor %33489 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33491 = torch_c.to_builtin_tensor %33258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31111 = arith.constant 1 : index
    %dim_31112 = tensor.dim %33491, %c1_31111 : tensor<4x?x4096xf16>
    %33492 = flow.tensor.transfer %33491 : tensor<4x?x4096xf16>{%dim_31112} to #hal.device.promise<@__device_7>
    %33493 = torch_c.from_builtin_tensor %33492 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33494 = torch_c.to_builtin_tensor %33264 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31113 = arith.constant 1 : index
    %dim_31114 = tensor.dim %33494, %c1_31113 : tensor<4x?x4096xf16>
    %33495 = flow.tensor.transfer %33494 : tensor<4x?x4096xf16>{%dim_31114} to #hal.device.promise<@__device_7>
    %33496 = torch_c.from_builtin_tensor %33495 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33497 = torch_c.to_builtin_tensor %33270 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31115 = arith.constant 1 : index
    %dim_31116 = tensor.dim %33497, %c1_31115 : tensor<4x?x4096xf16>
    %33498 = flow.tensor.transfer %33497 : tensor<4x?x4096xf16>{%dim_31116} to #hal.device.promise<@__device_7>
    %33499 = torch_c.from_builtin_tensor %33498 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33500 = torch_c.to_builtin_tensor %33276 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31117 = arith.constant 1 : index
    %dim_31118 = tensor.dim %33500, %c1_31117 : tensor<4x?x4096xf16>
    %33501 = flow.tensor.transfer %33500 : tensor<4x?x4096xf16>{%dim_31118} to #hal.device.promise<@__device_7>
    %33502 = torch_c.from_builtin_tensor %33501 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33503 = torch_c.to_builtin_tensor %33282 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31119 = arith.constant 1 : index
    %dim_31120 = tensor.dim %33503, %c1_31119 : tensor<4x?x4096xf16>
    %33504 = flow.tensor.transfer %33503 : tensor<4x?x4096xf16>{%dim_31120} to #hal.device.promise<@__device_7>
    %33505 = torch_c.from_builtin_tensor %33504 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31121 = torch.constant.int 1
    %33506 = torch.aten.add.Tensor %33487, %33490, %int1_31121 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31122 = torch.constant.int 1
    %33507 = torch.aten.add.Tensor %33506, %33493, %int1_31122 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31123 = torch.constant.int 1
    %33508 = torch.aten.add.Tensor %33507, %33496, %int1_31123 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31124 = torch.constant.int 1
    %33509 = torch.aten.add.Tensor %33508, %33499, %int1_31124 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31125 = torch.constant.int 1
    %33510 = torch.aten.add.Tensor %33509, %33502, %int1_31125 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31126 = torch.constant.int 1
    %33511 = torch.aten.add.Tensor %33510, %33505, %int1_31126 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31127 = torch.constant.int 1
    %33512 = torch.aten.add.Tensor %33511, %33288, %int1_31127 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31128 = torch.constant.int 1
    %33513 = torch.aten.add.Tensor %32172, %33316, %int1_31128 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31129 = torch.constant.int 1
    %33514 = torch.aten.add.Tensor %32173, %33344, %int1_31129 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31130 = torch.constant.int 1
    %33515 = torch.aten.add.Tensor %32174, %33372, %int1_31130 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31131 = torch.constant.int 1
    %33516 = torch.aten.add.Tensor %32175, %33400, %int1_31131 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31132 = torch.constant.int 1
    %33517 = torch.aten.add.Tensor %32176, %33428, %int1_31132 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31133 = torch.constant.int 1
    %33518 = torch.aten.add.Tensor %32177, %33456, %int1_31133 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31134 = torch.constant.int 1
    %33519 = torch.aten.add.Tensor %32178, %33484, %int1_31134 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31135 = torch.constant.int 1
    %33520 = torch.aten.add.Tensor %32179, %33512, %int1_31135 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_31136 = torch.constant.int 6
    %33521 = torch.prims.convert_element_type %33513, %int6_31136 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31137 = torch.constant.int 6
    %33522 = torch.prims.convert_element_type %33514, %int6_31137 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31138 = torch.constant.int 6
    %33523 = torch.prims.convert_element_type %33515, %int6_31138 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31139 = torch.constant.int 6
    %33524 = torch.prims.convert_element_type %33516, %int6_31139 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31140 = torch.constant.int 6
    %33525 = torch.prims.convert_element_type %33517, %int6_31140 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31141 = torch.constant.int 6
    %33526 = torch.prims.convert_element_type %33518, %int6_31141 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31142 = torch.constant.int 6
    %33527 = torch.prims.convert_element_type %33519, %int6_31142 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31143 = torch.constant.int 6
    %33528 = torch.prims.convert_element_type %33520, %int6_31143 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31144 = torch.constant.int 2
    %33529 = torch.aten.pow.Tensor_Scalar %33521, %int2_31144 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31145 = torch.constant.int 2
    %33530 = torch.aten.pow.Tensor_Scalar %33522, %int2_31145 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31146 = torch.constant.int 2
    %33531 = torch.aten.pow.Tensor_Scalar %33523, %int2_31146 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31147 = torch.constant.int 2
    %33532 = torch.aten.pow.Tensor_Scalar %33524, %int2_31147 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31148 = torch.constant.int 2
    %33533 = torch.aten.pow.Tensor_Scalar %33525, %int2_31148 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31149 = torch.constant.int 2
    %33534 = torch.aten.pow.Tensor_Scalar %33526, %int2_31149 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31150 = torch.constant.int 2
    %33535 = torch.aten.pow.Tensor_Scalar %33527, %int2_31150 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31151 = torch.constant.int 2
    %33536 = torch.aten.pow.Tensor_Scalar %33528, %int2_31151 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_31152 = torch.constant.int -1
    %33537 = torch.prim.ListConstruct %int-1_31152 : (!torch.int) -> !torch.list<int>
    %true_31153 = torch.constant.bool true
    %none_31154 = torch.constant.none
    %33538 = torch.aten.mean.dim %33529, %33537, %true_31153, %none_31154 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31155 = torch.constant.int -1
    %33539 = torch.prim.ListConstruct %int-1_31155 : (!torch.int) -> !torch.list<int>
    %true_31156 = torch.constant.bool true
    %none_31157 = torch.constant.none
    %33540 = torch.aten.mean.dim %33530, %33539, %true_31156, %none_31157 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31158 = torch.constant.int -1
    %33541 = torch.prim.ListConstruct %int-1_31158 : (!torch.int) -> !torch.list<int>
    %true_31159 = torch.constant.bool true
    %none_31160 = torch.constant.none
    %33542 = torch.aten.mean.dim %33531, %33541, %true_31159, %none_31160 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31161 = torch.constant.int -1
    %33543 = torch.prim.ListConstruct %int-1_31161 : (!torch.int) -> !torch.list<int>
    %true_31162 = torch.constant.bool true
    %none_31163 = torch.constant.none
    %33544 = torch.aten.mean.dim %33532, %33543, %true_31162, %none_31163 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31164 = torch.constant.int -1
    %33545 = torch.prim.ListConstruct %int-1_31164 : (!torch.int) -> !torch.list<int>
    %true_31165 = torch.constant.bool true
    %none_31166 = torch.constant.none
    %33546 = torch.aten.mean.dim %33533, %33545, %true_31165, %none_31166 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31167 = torch.constant.int -1
    %33547 = torch.prim.ListConstruct %int-1_31167 : (!torch.int) -> !torch.list<int>
    %true_31168 = torch.constant.bool true
    %none_31169 = torch.constant.none
    %33548 = torch.aten.mean.dim %33534, %33547, %true_31168, %none_31169 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31170 = torch.constant.int -1
    %33549 = torch.prim.ListConstruct %int-1_31170 : (!torch.int) -> !torch.list<int>
    %true_31171 = torch.constant.bool true
    %none_31172 = torch.constant.none
    %33550 = torch.aten.mean.dim %33535, %33549, %true_31171, %none_31172 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31173 = torch.constant.int -1
    %33551 = torch.prim.ListConstruct %int-1_31173 : (!torch.int) -> !torch.list<int>
    %true_31174 = torch.constant.bool true
    %none_31175 = torch.constant.none
    %33552 = torch.aten.mean.dim %33536, %33551, %true_31174, %none_31175 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31176 = torch.constant.float 9.9999997473787516E-6
    %int1_31177 = torch.constant.int 1
    %33553 = torch.aten.add.Scalar %33538, %float9.999990e-06_31176, %int1_31177 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31178 = torch.constant.float 9.9999997473787516E-6
    %int1_31179 = torch.constant.int 1
    %33554 = torch.aten.add.Scalar %33540, %float9.999990e-06_31178, %int1_31179 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31180 = torch.constant.float 9.9999997473787516E-6
    %int1_31181 = torch.constant.int 1
    %33555 = torch.aten.add.Scalar %33542, %float9.999990e-06_31180, %int1_31181 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31182 = torch.constant.float 9.9999997473787516E-6
    %int1_31183 = torch.constant.int 1
    %33556 = torch.aten.add.Scalar %33544, %float9.999990e-06_31182, %int1_31183 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31184 = torch.constant.float 9.9999997473787516E-6
    %int1_31185 = torch.constant.int 1
    %33557 = torch.aten.add.Scalar %33546, %float9.999990e-06_31184, %int1_31185 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31186 = torch.constant.float 9.9999997473787516E-6
    %int1_31187 = torch.constant.int 1
    %33558 = torch.aten.add.Scalar %33548, %float9.999990e-06_31186, %int1_31187 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31188 = torch.constant.float 9.9999997473787516E-6
    %int1_31189 = torch.constant.int 1
    %33559 = torch.aten.add.Scalar %33550, %float9.999990e-06_31188, %int1_31189 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31190 = torch.constant.float 9.9999997473787516E-6
    %int1_31191 = torch.constant.int 1
    %33560 = torch.aten.add.Scalar %33552, %float9.999990e-06_31190, %int1_31191 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33561 = torch.aten.rsqrt %33553 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33562 = torch.aten.rsqrt %33554 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33563 = torch.aten.rsqrt %33555 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33564 = torch.aten.rsqrt %33556 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33565 = torch.aten.rsqrt %33557 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33566 = torch.aten.rsqrt %33558 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33567 = torch.aten.rsqrt %33559 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33568 = torch.aten.rsqrt %33560 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %33568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %33569 = torch.aten.mul.Tensor %33521, %33561 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33570 = torch.aten.mul.Tensor %33522, %33562 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33571 = torch.aten.mul.Tensor %33523, %33563 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33572 = torch.aten.mul.Tensor %33524, %33564 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33573 = torch.aten.mul.Tensor %33525, %33565 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33574 = torch.aten.mul.Tensor %33526, %33566 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33575 = torch.aten.mul.Tensor %33527, %33567 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33576 = torch.aten.mul.Tensor %33528, %33568 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33577 = torch.aten.mul.Tensor %1200, %33569 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33578 = torch.aten.mul.Tensor %1201, %33570 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33579 = torch.aten.mul.Tensor %1202, %33571 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33580 = torch.aten.mul.Tensor %1203, %33572 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33581 = torch.aten.mul.Tensor %1204, %33573 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33582 = torch.aten.mul.Tensor %1205, %33574 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33583 = torch.aten.mul.Tensor %1206, %33575 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %33584 = torch.aten.mul.Tensor %1207, %33576 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %33584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_31192 = torch.constant.int 5
    %33585 = torch.prims.convert_element_type %33577, %int5_31192 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31193 = torch.constant.int 5
    %33586 = torch.prims.convert_element_type %33578, %int5_31193 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31194 = torch.constant.int 5
    %33587 = torch.prims.convert_element_type %33579, %int5_31194 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31195 = torch.constant.int 5
    %33588 = torch.prims.convert_element_type %33580, %int5_31195 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31196 = torch.constant.int 5
    %33589 = torch.prims.convert_element_type %33581, %int5_31196 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31197 = torch.constant.int 5
    %33590 = torch.prims.convert_element_type %33582, %int5_31197 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31198 = torch.constant.int 5
    %33591 = torch.prims.convert_element_type %33583, %int5_31198 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31199 = torch.constant.int 5
    %33592 = torch.prims.convert_element_type %33584, %int5_31199 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31200 = torch.constant.int 1
    %int0_31201 = torch.constant.int 0
    %33593 = torch.prim.ListConstruct %int1_31200, %int0_31201 : (!torch.int, !torch.int) -> !torch.list<int>
    %33594 = torch.aten.permute %1208, %33593 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31202 = torch.constant.int 1
    %int0_31203 = torch.constant.int 0
    %33595 = torch.prim.ListConstruct %int1_31202, %int0_31203 : (!torch.int, !torch.int) -> !torch.list<int>
    %33596 = torch.aten.permute %1209, %33595 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31204 = torch.constant.int 1
    %int0_31205 = torch.constant.int 0
    %33597 = torch.prim.ListConstruct %int1_31204, %int0_31205 : (!torch.int, !torch.int) -> !torch.list<int>
    %33598 = torch.aten.permute %1210, %33597 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31206 = torch.constant.int 1
    %int0_31207 = torch.constant.int 0
    %33599 = torch.prim.ListConstruct %int1_31206, %int0_31207 : (!torch.int, !torch.int) -> !torch.list<int>
    %33600 = torch.aten.permute %1211, %33599 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31208 = torch.constant.int 1
    %int0_31209 = torch.constant.int 0
    %33601 = torch.prim.ListConstruct %int1_31208, %int0_31209 : (!torch.int, !torch.int) -> !torch.list<int>
    %33602 = torch.aten.permute %1212, %33601 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31210 = torch.constant.int 1
    %int0_31211 = torch.constant.int 0
    %33603 = torch.prim.ListConstruct %int1_31210, %int0_31211 : (!torch.int, !torch.int) -> !torch.list<int>
    %33604 = torch.aten.permute %1213, %33603 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31212 = torch.constant.int 1
    %int0_31213 = torch.constant.int 0
    %33605 = torch.prim.ListConstruct %int1_31212, %int0_31213 : (!torch.int, !torch.int) -> !torch.list<int>
    %33606 = torch.aten.permute %1214, %33605 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31214 = torch.constant.int 1
    %int0_31215 = torch.constant.int 0
    %33607 = torch.prim.ListConstruct %int1_31214, %int0_31215 : (!torch.int, !torch.int) -> !torch.list<int>
    %33608 = torch.aten.permute %1215, %33607 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_31216 = torch.constant.int 4
    %33609 = torch.aten.mul.int %int4_31216, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31217 = torch.constant.int 4096
    %33610 = torch.prim.ListConstruct %33609, %int4096_31217 : (!torch.int, !torch.int) -> !torch.list<int>
    %33611 = torch.aten.view %33585, %33610 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33611, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33612 = torch.aten.mm %33611, %33594 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33612, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31218 = torch.constant.int 4
    %int1792_31219 = torch.constant.int 1792
    %33613 = torch.prim.ListConstruct %int4_31218, %2482, %int1792_31219 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33614 = torch.aten.view %33612, %33613 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31220 = torch.constant.int 4
    %33615 = torch.aten.mul.int %int4_31220, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31221 = torch.constant.int 4096
    %33616 = torch.prim.ListConstruct %33615, %int4096_31221 : (!torch.int, !torch.int) -> !torch.list<int>
    %33617 = torch.aten.view %33586, %33616 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33617, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33618 = torch.aten.mm %33617, %33596 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33618, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31222 = torch.constant.int 4
    %int1792_31223 = torch.constant.int 1792
    %33619 = torch.prim.ListConstruct %int4_31222, %2482, %int1792_31223 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33620 = torch.aten.view %33618, %33619 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31224 = torch.constant.int 4
    %33621 = torch.aten.mul.int %int4_31224, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31225 = torch.constant.int 4096
    %33622 = torch.prim.ListConstruct %33621, %int4096_31225 : (!torch.int, !torch.int) -> !torch.list<int>
    %33623 = torch.aten.view %33587, %33622 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33623, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33624 = torch.aten.mm %33623, %33598 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33624, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31226 = torch.constant.int 4
    %int1792_31227 = torch.constant.int 1792
    %33625 = torch.prim.ListConstruct %int4_31226, %2482, %int1792_31227 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33626 = torch.aten.view %33624, %33625 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31228 = torch.constant.int 4
    %33627 = torch.aten.mul.int %int4_31228, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31229 = torch.constant.int 4096
    %33628 = torch.prim.ListConstruct %33627, %int4096_31229 : (!torch.int, !torch.int) -> !torch.list<int>
    %33629 = torch.aten.view %33588, %33628 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33629, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33630 = torch.aten.mm %33629, %33600 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33630, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31230 = torch.constant.int 4
    %int1792_31231 = torch.constant.int 1792
    %33631 = torch.prim.ListConstruct %int4_31230, %2482, %int1792_31231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33632 = torch.aten.view %33630, %33631 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31232 = torch.constant.int 4
    %33633 = torch.aten.mul.int %int4_31232, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31233 = torch.constant.int 4096
    %33634 = torch.prim.ListConstruct %33633, %int4096_31233 : (!torch.int, !torch.int) -> !torch.list<int>
    %33635 = torch.aten.view %33589, %33634 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33635, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33636 = torch.aten.mm %33635, %33602 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33636, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31234 = torch.constant.int 4
    %int1792_31235 = torch.constant.int 1792
    %33637 = torch.prim.ListConstruct %int4_31234, %2482, %int1792_31235 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33638 = torch.aten.view %33636, %33637 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31236 = torch.constant.int 4
    %33639 = torch.aten.mul.int %int4_31236, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31237 = torch.constant.int 4096
    %33640 = torch.prim.ListConstruct %33639, %int4096_31237 : (!torch.int, !torch.int) -> !torch.list<int>
    %33641 = torch.aten.view %33590, %33640 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33641, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33642 = torch.aten.mm %33641, %33604 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33642, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31238 = torch.constant.int 4
    %int1792_31239 = torch.constant.int 1792
    %33643 = torch.prim.ListConstruct %int4_31238, %2482, %int1792_31239 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33644 = torch.aten.view %33642, %33643 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31240 = torch.constant.int 4
    %33645 = torch.aten.mul.int %int4_31240, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31241 = torch.constant.int 4096
    %33646 = torch.prim.ListConstruct %33645, %int4096_31241 : (!torch.int, !torch.int) -> !torch.list<int>
    %33647 = torch.aten.view %33591, %33646 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33647, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33648 = torch.aten.mm %33647, %33606 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33648, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31242 = torch.constant.int 4
    %int1792_31243 = torch.constant.int 1792
    %33649 = torch.prim.ListConstruct %int4_31242, %2482, %int1792_31243 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33650 = torch.aten.view %33648, %33649 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31244 = torch.constant.int 4
    %33651 = torch.aten.mul.int %int4_31244, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31245 = torch.constant.int 4096
    %33652 = torch.prim.ListConstruct %33651, %int4096_31245 : (!torch.int, !torch.int) -> !torch.list<int>
    %33653 = torch.aten.view %33592, %33652 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33653, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33654 = torch.aten.mm %33653, %33608 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33654, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31246 = torch.constant.int 4
    %int1792_31247 = torch.constant.int 1792
    %33655 = torch.prim.ListConstruct %int4_31246, %2482, %int1792_31247 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33656 = torch.aten.view %33654, %33655 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33657 = torch.aten.silu %33614 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33658 = torch.aten.silu %33620 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33659 = torch.aten.silu %33626 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33660 = torch.aten.silu %33632 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33661 = torch.aten.silu %33638 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33662 = torch.aten.silu %33644 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33663 = torch.aten.silu %33650 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33664 = torch.aten.silu %33656 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_31248 = torch.constant.int 1
    %int0_31249 = torch.constant.int 0
    %33665 = torch.prim.ListConstruct %int1_31248, %int0_31249 : (!torch.int, !torch.int) -> !torch.list<int>
    %33666 = torch.aten.permute %1216, %33665 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31250 = torch.constant.int 1
    %int0_31251 = torch.constant.int 0
    %33667 = torch.prim.ListConstruct %int1_31250, %int0_31251 : (!torch.int, !torch.int) -> !torch.list<int>
    %33668 = torch.aten.permute %1217, %33667 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31252 = torch.constant.int 1
    %int0_31253 = torch.constant.int 0
    %33669 = torch.prim.ListConstruct %int1_31252, %int0_31253 : (!torch.int, !torch.int) -> !torch.list<int>
    %33670 = torch.aten.permute %1218, %33669 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31254 = torch.constant.int 1
    %int0_31255 = torch.constant.int 0
    %33671 = torch.prim.ListConstruct %int1_31254, %int0_31255 : (!torch.int, !torch.int) -> !torch.list<int>
    %33672 = torch.aten.permute %1219, %33671 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31256 = torch.constant.int 1
    %int0_31257 = torch.constant.int 0
    %33673 = torch.prim.ListConstruct %int1_31256, %int0_31257 : (!torch.int, !torch.int) -> !torch.list<int>
    %33674 = torch.aten.permute %1220, %33673 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31258 = torch.constant.int 1
    %int0_31259 = torch.constant.int 0
    %33675 = torch.prim.ListConstruct %int1_31258, %int0_31259 : (!torch.int, !torch.int) -> !torch.list<int>
    %33676 = torch.aten.permute %1221, %33675 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31260 = torch.constant.int 1
    %int0_31261 = torch.constant.int 0
    %33677 = torch.prim.ListConstruct %int1_31260, %int0_31261 : (!torch.int, !torch.int) -> !torch.list<int>
    %33678 = torch.aten.permute %1222, %33677 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_31262 = torch.constant.int 1
    %int0_31263 = torch.constant.int 0
    %33679 = torch.prim.ListConstruct %int1_31262, %int0_31263 : (!torch.int, !torch.int) -> !torch.list<int>
    %33680 = torch.aten.permute %1223, %33679 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_31264 = torch.constant.int 4
    %33681 = torch.aten.mul.int %int4_31264, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31265 = torch.constant.int 4096
    %33682 = torch.prim.ListConstruct %33681, %int4096_31265 : (!torch.int, !torch.int) -> !torch.list<int>
    %33683 = torch.aten.view %33585, %33682 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33683, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33684 = torch.aten.mm %33683, %33666 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33684, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31266 = torch.constant.int 4
    %int1792_31267 = torch.constant.int 1792
    %33685 = torch.prim.ListConstruct %int4_31266, %2482, %int1792_31267 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33686 = torch.aten.view %33684, %33685 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31268 = torch.constant.int 4
    %33687 = torch.aten.mul.int %int4_31268, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31269 = torch.constant.int 4096
    %33688 = torch.prim.ListConstruct %33687, %int4096_31269 : (!torch.int, !torch.int) -> !torch.list<int>
    %33689 = torch.aten.view %33586, %33688 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33689, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33690 = torch.aten.mm %33689, %33668 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33690, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31270 = torch.constant.int 4
    %int1792_31271 = torch.constant.int 1792
    %33691 = torch.prim.ListConstruct %int4_31270, %2482, %int1792_31271 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33692 = torch.aten.view %33690, %33691 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31272 = torch.constant.int 4
    %33693 = torch.aten.mul.int %int4_31272, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31273 = torch.constant.int 4096
    %33694 = torch.prim.ListConstruct %33693, %int4096_31273 : (!torch.int, !torch.int) -> !torch.list<int>
    %33695 = torch.aten.view %33587, %33694 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33695, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33696 = torch.aten.mm %33695, %33670 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33696, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31274 = torch.constant.int 4
    %int1792_31275 = torch.constant.int 1792
    %33697 = torch.prim.ListConstruct %int4_31274, %2482, %int1792_31275 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33698 = torch.aten.view %33696, %33697 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31276 = torch.constant.int 4
    %33699 = torch.aten.mul.int %int4_31276, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31277 = torch.constant.int 4096
    %33700 = torch.prim.ListConstruct %33699, %int4096_31277 : (!torch.int, !torch.int) -> !torch.list<int>
    %33701 = torch.aten.view %33588, %33700 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33701, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33702 = torch.aten.mm %33701, %33672 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33702, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31278 = torch.constant.int 4
    %int1792_31279 = torch.constant.int 1792
    %33703 = torch.prim.ListConstruct %int4_31278, %2482, %int1792_31279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33704 = torch.aten.view %33702, %33703 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31280 = torch.constant.int 4
    %33705 = torch.aten.mul.int %int4_31280, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31281 = torch.constant.int 4096
    %33706 = torch.prim.ListConstruct %33705, %int4096_31281 : (!torch.int, !torch.int) -> !torch.list<int>
    %33707 = torch.aten.view %33589, %33706 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33707, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33708 = torch.aten.mm %33707, %33674 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33708, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31282 = torch.constant.int 4
    %int1792_31283 = torch.constant.int 1792
    %33709 = torch.prim.ListConstruct %int4_31282, %2482, %int1792_31283 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33710 = torch.aten.view %33708, %33709 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31284 = torch.constant.int 4
    %33711 = torch.aten.mul.int %int4_31284, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31285 = torch.constant.int 4096
    %33712 = torch.prim.ListConstruct %33711, %int4096_31285 : (!torch.int, !torch.int) -> !torch.list<int>
    %33713 = torch.aten.view %33590, %33712 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33713, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33714 = torch.aten.mm %33713, %33676 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33714, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31286 = torch.constant.int 4
    %int1792_31287 = torch.constant.int 1792
    %33715 = torch.prim.ListConstruct %int4_31286, %2482, %int1792_31287 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33716 = torch.aten.view %33714, %33715 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31288 = torch.constant.int 4
    %33717 = torch.aten.mul.int %int4_31288, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31289 = torch.constant.int 4096
    %33718 = torch.prim.ListConstruct %33717, %int4096_31289 : (!torch.int, !torch.int) -> !torch.list<int>
    %33719 = torch.aten.view %33591, %33718 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33719, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33720 = torch.aten.mm %33719, %33678 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33720, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31290 = torch.constant.int 4
    %int1792_31291 = torch.constant.int 1792
    %33721 = torch.prim.ListConstruct %int4_31290, %2482, %int1792_31291 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33722 = torch.aten.view %33720, %33721 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_31292 = torch.constant.int 4
    %33723 = torch.aten.mul.int %int4_31292, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31293 = torch.constant.int 4096
    %33724 = torch.prim.ListConstruct %33723, %int4096_31293 : (!torch.int, !torch.int) -> !torch.list<int>
    %33725 = torch.aten.view %33592, %33724 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33725, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %33726 = torch.aten.mm %33725, %33680 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33726, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_31294 = torch.constant.int 4
    %int1792_31295 = torch.constant.int 1792
    %33727 = torch.prim.ListConstruct %int4_31294, %2482, %int1792_31295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33728 = torch.aten.view %33726, %33727 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33729 = torch.aten.mul.Tensor %33657, %33686 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33730 = torch.aten.mul.Tensor %33658, %33692 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33731 = torch.aten.mul.Tensor %33659, %33698 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33732 = torch.aten.mul.Tensor %33660, %33704 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33733 = torch.aten.mul.Tensor %33661, %33710 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33734 = torch.aten.mul.Tensor %33662, %33716 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33735 = torch.aten.mul.Tensor %33663, %33722 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %33736 = torch.aten.mul.Tensor %33664, %33728 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %33736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_31296 = torch.constant.int 1
    %int0_31297 = torch.constant.int 0
    %33737 = torch.prim.ListConstruct %int1_31296, %int0_31297 : (!torch.int, !torch.int) -> !torch.list<int>
    %33738 = torch.aten.permute %1224, %33737 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31298 = torch.constant.int 1
    %int0_31299 = torch.constant.int 0
    %33739 = torch.prim.ListConstruct %int1_31298, %int0_31299 : (!torch.int, !torch.int) -> !torch.list<int>
    %33740 = torch.aten.permute %1225, %33739 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31300 = torch.constant.int 1
    %int0_31301 = torch.constant.int 0
    %33741 = torch.prim.ListConstruct %int1_31300, %int0_31301 : (!torch.int, !torch.int) -> !torch.list<int>
    %33742 = torch.aten.permute %1226, %33741 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31302 = torch.constant.int 1
    %int0_31303 = torch.constant.int 0
    %33743 = torch.prim.ListConstruct %int1_31302, %int0_31303 : (!torch.int, !torch.int) -> !torch.list<int>
    %33744 = torch.aten.permute %1227, %33743 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31304 = torch.constant.int 1
    %int0_31305 = torch.constant.int 0
    %33745 = torch.prim.ListConstruct %int1_31304, %int0_31305 : (!torch.int, !torch.int) -> !torch.list<int>
    %33746 = torch.aten.permute %1228, %33745 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31306 = torch.constant.int 1
    %int0_31307 = torch.constant.int 0
    %33747 = torch.prim.ListConstruct %int1_31306, %int0_31307 : (!torch.int, !torch.int) -> !torch.list<int>
    %33748 = torch.aten.permute %1229, %33747 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31308 = torch.constant.int 1
    %int0_31309 = torch.constant.int 0
    %33749 = torch.prim.ListConstruct %int1_31308, %int0_31309 : (!torch.int, !torch.int) -> !torch.list<int>
    %33750 = torch.aten.permute %1230, %33749 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31310 = torch.constant.int 1
    %int0_31311 = torch.constant.int 0
    %33751 = torch.prim.ListConstruct %int1_31310, %int0_31311 : (!torch.int, !torch.int) -> !torch.list<int>
    %33752 = torch.aten.permute %1231, %33751 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_31312 = torch.constant.int 1
    %33753 = torch.aten.size.int %33614, %int1_31312 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31313 = torch.constant.int 4
    %33754 = torch.aten.mul.int %int4_31313, %33753 : !torch.int, !torch.int -> !torch.int
    %int1792_31314 = torch.constant.int 1792
    %33755 = torch.prim.ListConstruct %33754, %int1792_31314 : (!torch.int, !torch.int) -> !torch.list<int>
    %33756 = torch.aten.view %33729, %33755 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33756, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33757 = torch.aten.mm %33756, %33738 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33757, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31315 = torch.constant.int 4
    %int4096_31316 = torch.constant.int 4096
    %33758 = torch.prim.ListConstruct %int4_31315, %33753, %int4096_31316 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33759 = torch.aten.view %33757, %33758 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31317 = torch.constant.int 1
    %33760 = torch.aten.size.int %33620, %int1_31317 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31318 = torch.constant.int 4
    %33761 = torch.aten.mul.int %int4_31318, %33760 : !torch.int, !torch.int -> !torch.int
    %int1792_31319 = torch.constant.int 1792
    %33762 = torch.prim.ListConstruct %33761, %int1792_31319 : (!torch.int, !torch.int) -> !torch.list<int>
    %33763 = torch.aten.view %33730, %33762 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33763, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33764 = torch.aten.mm %33763, %33740 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33764, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31320 = torch.constant.int 4
    %int4096_31321 = torch.constant.int 4096
    %33765 = torch.prim.ListConstruct %int4_31320, %33760, %int4096_31321 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33766 = torch.aten.view %33764, %33765 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31322 = torch.constant.int 1
    %33767 = torch.aten.size.int %33626, %int1_31322 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31323 = torch.constant.int 4
    %33768 = torch.aten.mul.int %int4_31323, %33767 : !torch.int, !torch.int -> !torch.int
    %int1792_31324 = torch.constant.int 1792
    %33769 = torch.prim.ListConstruct %33768, %int1792_31324 : (!torch.int, !torch.int) -> !torch.list<int>
    %33770 = torch.aten.view %33731, %33769 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33770, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33771 = torch.aten.mm %33770, %33742 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33771, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31325 = torch.constant.int 4
    %int4096_31326 = torch.constant.int 4096
    %33772 = torch.prim.ListConstruct %int4_31325, %33767, %int4096_31326 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33773 = torch.aten.view %33771, %33772 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31327 = torch.constant.int 1
    %33774 = torch.aten.size.int %33632, %int1_31327 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31328 = torch.constant.int 4
    %33775 = torch.aten.mul.int %int4_31328, %33774 : !torch.int, !torch.int -> !torch.int
    %int1792_31329 = torch.constant.int 1792
    %33776 = torch.prim.ListConstruct %33775, %int1792_31329 : (!torch.int, !torch.int) -> !torch.list<int>
    %33777 = torch.aten.view %33732, %33776 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33777, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33778 = torch.aten.mm %33777, %33744 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33778, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31330 = torch.constant.int 4
    %int4096_31331 = torch.constant.int 4096
    %33779 = torch.prim.ListConstruct %int4_31330, %33774, %int4096_31331 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33780 = torch.aten.view %33778, %33779 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31332 = torch.constant.int 1
    %33781 = torch.aten.size.int %33638, %int1_31332 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31333 = torch.constant.int 4
    %33782 = torch.aten.mul.int %int4_31333, %33781 : !torch.int, !torch.int -> !torch.int
    %int1792_31334 = torch.constant.int 1792
    %33783 = torch.prim.ListConstruct %33782, %int1792_31334 : (!torch.int, !torch.int) -> !torch.list<int>
    %33784 = torch.aten.view %33733, %33783 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33784, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33785 = torch.aten.mm %33784, %33746 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33785, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31335 = torch.constant.int 4
    %int4096_31336 = torch.constant.int 4096
    %33786 = torch.prim.ListConstruct %int4_31335, %33781, %int4096_31336 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33787 = torch.aten.view %33785, %33786 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31337 = torch.constant.int 1
    %33788 = torch.aten.size.int %33644, %int1_31337 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31338 = torch.constant.int 4
    %33789 = torch.aten.mul.int %int4_31338, %33788 : !torch.int, !torch.int -> !torch.int
    %int1792_31339 = torch.constant.int 1792
    %33790 = torch.prim.ListConstruct %33789, %int1792_31339 : (!torch.int, !torch.int) -> !torch.list<int>
    %33791 = torch.aten.view %33734, %33790 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33791, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33792 = torch.aten.mm %33791, %33748 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33792, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31340 = torch.constant.int 4
    %int4096_31341 = torch.constant.int 4096
    %33793 = torch.prim.ListConstruct %int4_31340, %33788, %int4096_31341 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33794 = torch.aten.view %33792, %33793 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31342 = torch.constant.int 1
    %33795 = torch.aten.size.int %33650, %int1_31342 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31343 = torch.constant.int 4
    %33796 = torch.aten.mul.int %int4_31343, %33795 : !torch.int, !torch.int -> !torch.int
    %int1792_31344 = torch.constant.int 1792
    %33797 = torch.prim.ListConstruct %33796, %int1792_31344 : (!torch.int, !torch.int) -> !torch.list<int>
    %33798 = torch.aten.view %33735, %33797 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33798, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33799 = torch.aten.mm %33798, %33750 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33799, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31345 = torch.constant.int 4
    %int4096_31346 = torch.constant.int 4096
    %33800 = torch.prim.ListConstruct %int4_31345, %33795, %int4096_31346 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33801 = torch.aten.view %33799, %33800 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31347 = torch.constant.int 1
    %33802 = torch.aten.size.int %33656, %int1_31347 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_31348 = torch.constant.int 4
    %33803 = torch.aten.mul.int %int4_31348, %33802 : !torch.int, !torch.int -> !torch.int
    %int1792_31349 = torch.constant.int 1792
    %33804 = torch.prim.ListConstruct %33803, %int1792_31349 : (!torch.int, !torch.int) -> !torch.list<int>
    %33805 = torch.aten.view %33736, %33804 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %33805, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %33806 = torch.aten.mm %33805, %33752 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %33806, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_31350 = torch.constant.int 4
    %int4096_31351 = torch.constant.int 4096
    %33807 = torch.prim.ListConstruct %int4_31350, %33802, %int4096_31351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %33808 = torch.aten.view %33806, %33807 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33809 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31352 = arith.constant 1 : index
    %dim_31353 = tensor.dim %33809, %c1_31352 : tensor<4x?x4096xf16>
    %33810 = flow.tensor.transfer %33809 : tensor<4x?x4096xf16>{%dim_31353} to #hal.device.promise<@__device_0>
    %33811 = torch_c.from_builtin_tensor %33810 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33812 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31354 = arith.constant 1 : index
    %dim_31355 = tensor.dim %33812, %c1_31354 : tensor<4x?x4096xf16>
    %33813 = flow.tensor.transfer %33812 : tensor<4x?x4096xf16>{%dim_31355} to #hal.device.promise<@__device_0>
    %33814 = torch_c.from_builtin_tensor %33813 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33815 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31356 = arith.constant 1 : index
    %dim_31357 = tensor.dim %33815, %c1_31356 : tensor<4x?x4096xf16>
    %33816 = flow.tensor.transfer %33815 : tensor<4x?x4096xf16>{%dim_31357} to #hal.device.promise<@__device_0>
    %33817 = torch_c.from_builtin_tensor %33816 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33818 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31358 = arith.constant 1 : index
    %dim_31359 = tensor.dim %33818, %c1_31358 : tensor<4x?x4096xf16>
    %33819 = flow.tensor.transfer %33818 : tensor<4x?x4096xf16>{%dim_31359} to #hal.device.promise<@__device_0>
    %33820 = torch_c.from_builtin_tensor %33819 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33821 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31360 = arith.constant 1 : index
    %dim_31361 = tensor.dim %33821, %c1_31360 : tensor<4x?x4096xf16>
    %33822 = flow.tensor.transfer %33821 : tensor<4x?x4096xf16>{%dim_31361} to #hal.device.promise<@__device_0>
    %33823 = torch_c.from_builtin_tensor %33822 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33824 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31362 = arith.constant 1 : index
    %dim_31363 = tensor.dim %33824, %c1_31362 : tensor<4x?x4096xf16>
    %33825 = flow.tensor.transfer %33824 : tensor<4x?x4096xf16>{%dim_31363} to #hal.device.promise<@__device_0>
    %33826 = torch_c.from_builtin_tensor %33825 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33827 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31364 = arith.constant 1 : index
    %dim_31365 = tensor.dim %33827, %c1_31364 : tensor<4x?x4096xf16>
    %33828 = flow.tensor.transfer %33827 : tensor<4x?x4096xf16>{%dim_31365} to #hal.device.promise<@__device_0>
    %33829 = torch_c.from_builtin_tensor %33828 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31366 = torch.constant.int 1
    %33830 = torch.aten.add.Tensor %33759, %33811, %int1_31366 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31367 = torch.constant.int 1
    %33831 = torch.aten.add.Tensor %33830, %33814, %int1_31367 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31368 = torch.constant.int 1
    %33832 = torch.aten.add.Tensor %33831, %33817, %int1_31368 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31369 = torch.constant.int 1
    %33833 = torch.aten.add.Tensor %33832, %33820, %int1_31369 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31370 = torch.constant.int 1
    %33834 = torch.aten.add.Tensor %33833, %33823, %int1_31370 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31371 = torch.constant.int 1
    %33835 = torch.aten.add.Tensor %33834, %33826, %int1_31371 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31372 = torch.constant.int 1
    %33836 = torch.aten.add.Tensor %33835, %33829, %int1_31372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33837 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31373 = arith.constant 1 : index
    %dim_31374 = tensor.dim %33837, %c1_31373 : tensor<4x?x4096xf16>
    %33838 = flow.tensor.transfer %33837 : tensor<4x?x4096xf16>{%dim_31374} to #hal.device.promise<@__device_1>
    %33839 = torch_c.from_builtin_tensor %33838 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33840 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31375 = arith.constant 1 : index
    %dim_31376 = tensor.dim %33840, %c1_31375 : tensor<4x?x4096xf16>
    %33841 = flow.tensor.transfer %33840 : tensor<4x?x4096xf16>{%dim_31376} to #hal.device.promise<@__device_1>
    %33842 = torch_c.from_builtin_tensor %33841 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33843 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31377 = arith.constant 1 : index
    %dim_31378 = tensor.dim %33843, %c1_31377 : tensor<4x?x4096xf16>
    %33844 = flow.tensor.transfer %33843 : tensor<4x?x4096xf16>{%dim_31378} to #hal.device.promise<@__device_1>
    %33845 = torch_c.from_builtin_tensor %33844 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33846 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31379 = arith.constant 1 : index
    %dim_31380 = tensor.dim %33846, %c1_31379 : tensor<4x?x4096xf16>
    %33847 = flow.tensor.transfer %33846 : tensor<4x?x4096xf16>{%dim_31380} to #hal.device.promise<@__device_1>
    %33848 = torch_c.from_builtin_tensor %33847 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33849 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31381 = arith.constant 1 : index
    %dim_31382 = tensor.dim %33849, %c1_31381 : tensor<4x?x4096xf16>
    %33850 = flow.tensor.transfer %33849 : tensor<4x?x4096xf16>{%dim_31382} to #hal.device.promise<@__device_1>
    %33851 = torch_c.from_builtin_tensor %33850 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33852 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31383 = arith.constant 1 : index
    %dim_31384 = tensor.dim %33852, %c1_31383 : tensor<4x?x4096xf16>
    %33853 = flow.tensor.transfer %33852 : tensor<4x?x4096xf16>{%dim_31384} to #hal.device.promise<@__device_1>
    %33854 = torch_c.from_builtin_tensor %33853 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33855 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31385 = arith.constant 1 : index
    %dim_31386 = tensor.dim %33855, %c1_31385 : tensor<4x?x4096xf16>
    %33856 = flow.tensor.transfer %33855 : tensor<4x?x4096xf16>{%dim_31386} to #hal.device.promise<@__device_1>
    %33857 = torch_c.from_builtin_tensor %33856 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31387 = torch.constant.int 1
    %33858 = torch.aten.add.Tensor %33839, %33766, %int1_31387 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31388 = torch.constant.int 1
    %33859 = torch.aten.add.Tensor %33858, %33842, %int1_31388 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31389 = torch.constant.int 1
    %33860 = torch.aten.add.Tensor %33859, %33845, %int1_31389 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31390 = torch.constant.int 1
    %33861 = torch.aten.add.Tensor %33860, %33848, %int1_31390 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31391 = torch.constant.int 1
    %33862 = torch.aten.add.Tensor %33861, %33851, %int1_31391 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31392 = torch.constant.int 1
    %33863 = torch.aten.add.Tensor %33862, %33854, %int1_31392 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31393 = torch.constant.int 1
    %33864 = torch.aten.add.Tensor %33863, %33857, %int1_31393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33865 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31394 = arith.constant 1 : index
    %dim_31395 = tensor.dim %33865, %c1_31394 : tensor<4x?x4096xf16>
    %33866 = flow.tensor.transfer %33865 : tensor<4x?x4096xf16>{%dim_31395} to #hal.device.promise<@__device_2>
    %33867 = torch_c.from_builtin_tensor %33866 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33868 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31396 = arith.constant 1 : index
    %dim_31397 = tensor.dim %33868, %c1_31396 : tensor<4x?x4096xf16>
    %33869 = flow.tensor.transfer %33868 : tensor<4x?x4096xf16>{%dim_31397} to #hal.device.promise<@__device_2>
    %33870 = torch_c.from_builtin_tensor %33869 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33871 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31398 = arith.constant 1 : index
    %dim_31399 = tensor.dim %33871, %c1_31398 : tensor<4x?x4096xf16>
    %33872 = flow.tensor.transfer %33871 : tensor<4x?x4096xf16>{%dim_31399} to #hal.device.promise<@__device_2>
    %33873 = torch_c.from_builtin_tensor %33872 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33874 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31400 = arith.constant 1 : index
    %dim_31401 = tensor.dim %33874, %c1_31400 : tensor<4x?x4096xf16>
    %33875 = flow.tensor.transfer %33874 : tensor<4x?x4096xf16>{%dim_31401} to #hal.device.promise<@__device_2>
    %33876 = torch_c.from_builtin_tensor %33875 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33877 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31402 = arith.constant 1 : index
    %dim_31403 = tensor.dim %33877, %c1_31402 : tensor<4x?x4096xf16>
    %33878 = flow.tensor.transfer %33877 : tensor<4x?x4096xf16>{%dim_31403} to #hal.device.promise<@__device_2>
    %33879 = torch_c.from_builtin_tensor %33878 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33880 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31404 = arith.constant 1 : index
    %dim_31405 = tensor.dim %33880, %c1_31404 : tensor<4x?x4096xf16>
    %33881 = flow.tensor.transfer %33880 : tensor<4x?x4096xf16>{%dim_31405} to #hal.device.promise<@__device_2>
    %33882 = torch_c.from_builtin_tensor %33881 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33883 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31406 = arith.constant 1 : index
    %dim_31407 = tensor.dim %33883, %c1_31406 : tensor<4x?x4096xf16>
    %33884 = flow.tensor.transfer %33883 : tensor<4x?x4096xf16>{%dim_31407} to #hal.device.promise<@__device_2>
    %33885 = torch_c.from_builtin_tensor %33884 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31408 = torch.constant.int 1
    %33886 = torch.aten.add.Tensor %33867, %33870, %int1_31408 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31409 = torch.constant.int 1
    %33887 = torch.aten.add.Tensor %33886, %33773, %int1_31409 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31410 = torch.constant.int 1
    %33888 = torch.aten.add.Tensor %33887, %33873, %int1_31410 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31411 = torch.constant.int 1
    %33889 = torch.aten.add.Tensor %33888, %33876, %int1_31411 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31412 = torch.constant.int 1
    %33890 = torch.aten.add.Tensor %33889, %33879, %int1_31412 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31413 = torch.constant.int 1
    %33891 = torch.aten.add.Tensor %33890, %33882, %int1_31413 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31414 = torch.constant.int 1
    %33892 = torch.aten.add.Tensor %33891, %33885, %int1_31414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33893 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31415 = arith.constant 1 : index
    %dim_31416 = tensor.dim %33893, %c1_31415 : tensor<4x?x4096xf16>
    %33894 = flow.tensor.transfer %33893 : tensor<4x?x4096xf16>{%dim_31416} to #hal.device.promise<@__device_3>
    %33895 = torch_c.from_builtin_tensor %33894 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33896 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31417 = arith.constant 1 : index
    %dim_31418 = tensor.dim %33896, %c1_31417 : tensor<4x?x4096xf16>
    %33897 = flow.tensor.transfer %33896 : tensor<4x?x4096xf16>{%dim_31418} to #hal.device.promise<@__device_3>
    %33898 = torch_c.from_builtin_tensor %33897 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33899 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31419 = arith.constant 1 : index
    %dim_31420 = tensor.dim %33899, %c1_31419 : tensor<4x?x4096xf16>
    %33900 = flow.tensor.transfer %33899 : tensor<4x?x4096xf16>{%dim_31420} to #hal.device.promise<@__device_3>
    %33901 = torch_c.from_builtin_tensor %33900 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33902 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31421 = arith.constant 1 : index
    %dim_31422 = tensor.dim %33902, %c1_31421 : tensor<4x?x4096xf16>
    %33903 = flow.tensor.transfer %33902 : tensor<4x?x4096xf16>{%dim_31422} to #hal.device.promise<@__device_3>
    %33904 = torch_c.from_builtin_tensor %33903 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33905 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31423 = arith.constant 1 : index
    %dim_31424 = tensor.dim %33905, %c1_31423 : tensor<4x?x4096xf16>
    %33906 = flow.tensor.transfer %33905 : tensor<4x?x4096xf16>{%dim_31424} to #hal.device.promise<@__device_3>
    %33907 = torch_c.from_builtin_tensor %33906 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33908 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31425 = arith.constant 1 : index
    %dim_31426 = tensor.dim %33908, %c1_31425 : tensor<4x?x4096xf16>
    %33909 = flow.tensor.transfer %33908 : tensor<4x?x4096xf16>{%dim_31426} to #hal.device.promise<@__device_3>
    %33910 = torch_c.from_builtin_tensor %33909 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33911 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31427 = arith.constant 1 : index
    %dim_31428 = tensor.dim %33911, %c1_31427 : tensor<4x?x4096xf16>
    %33912 = flow.tensor.transfer %33911 : tensor<4x?x4096xf16>{%dim_31428} to #hal.device.promise<@__device_3>
    %33913 = torch_c.from_builtin_tensor %33912 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31429 = torch.constant.int 1
    %33914 = torch.aten.add.Tensor %33895, %33898, %int1_31429 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31430 = torch.constant.int 1
    %33915 = torch.aten.add.Tensor %33914, %33901, %int1_31430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31431 = torch.constant.int 1
    %33916 = torch.aten.add.Tensor %33915, %33780, %int1_31431 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31432 = torch.constant.int 1
    %33917 = torch.aten.add.Tensor %33916, %33904, %int1_31432 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31433 = torch.constant.int 1
    %33918 = torch.aten.add.Tensor %33917, %33907, %int1_31433 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31434 = torch.constant.int 1
    %33919 = torch.aten.add.Tensor %33918, %33910, %int1_31434 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31435 = torch.constant.int 1
    %33920 = torch.aten.add.Tensor %33919, %33913, %int1_31435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33921 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31436 = arith.constant 1 : index
    %dim_31437 = tensor.dim %33921, %c1_31436 : tensor<4x?x4096xf16>
    %33922 = flow.tensor.transfer %33921 : tensor<4x?x4096xf16>{%dim_31437} to #hal.device.promise<@__device_4>
    %33923 = torch_c.from_builtin_tensor %33922 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33924 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31438 = arith.constant 1 : index
    %dim_31439 = tensor.dim %33924, %c1_31438 : tensor<4x?x4096xf16>
    %33925 = flow.tensor.transfer %33924 : tensor<4x?x4096xf16>{%dim_31439} to #hal.device.promise<@__device_4>
    %33926 = torch_c.from_builtin_tensor %33925 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33927 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31440 = arith.constant 1 : index
    %dim_31441 = tensor.dim %33927, %c1_31440 : tensor<4x?x4096xf16>
    %33928 = flow.tensor.transfer %33927 : tensor<4x?x4096xf16>{%dim_31441} to #hal.device.promise<@__device_4>
    %33929 = torch_c.from_builtin_tensor %33928 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33930 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31442 = arith.constant 1 : index
    %dim_31443 = tensor.dim %33930, %c1_31442 : tensor<4x?x4096xf16>
    %33931 = flow.tensor.transfer %33930 : tensor<4x?x4096xf16>{%dim_31443} to #hal.device.promise<@__device_4>
    %33932 = torch_c.from_builtin_tensor %33931 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33933 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31444 = arith.constant 1 : index
    %dim_31445 = tensor.dim %33933, %c1_31444 : tensor<4x?x4096xf16>
    %33934 = flow.tensor.transfer %33933 : tensor<4x?x4096xf16>{%dim_31445} to #hal.device.promise<@__device_4>
    %33935 = torch_c.from_builtin_tensor %33934 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33936 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31446 = arith.constant 1 : index
    %dim_31447 = tensor.dim %33936, %c1_31446 : tensor<4x?x4096xf16>
    %33937 = flow.tensor.transfer %33936 : tensor<4x?x4096xf16>{%dim_31447} to #hal.device.promise<@__device_4>
    %33938 = torch_c.from_builtin_tensor %33937 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33939 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31448 = arith.constant 1 : index
    %dim_31449 = tensor.dim %33939, %c1_31448 : tensor<4x?x4096xf16>
    %33940 = flow.tensor.transfer %33939 : tensor<4x?x4096xf16>{%dim_31449} to #hal.device.promise<@__device_4>
    %33941 = torch_c.from_builtin_tensor %33940 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31450 = torch.constant.int 1
    %33942 = torch.aten.add.Tensor %33923, %33926, %int1_31450 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31451 = torch.constant.int 1
    %33943 = torch.aten.add.Tensor %33942, %33929, %int1_31451 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31452 = torch.constant.int 1
    %33944 = torch.aten.add.Tensor %33943, %33932, %int1_31452 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31453 = torch.constant.int 1
    %33945 = torch.aten.add.Tensor %33944, %33787, %int1_31453 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31454 = torch.constant.int 1
    %33946 = torch.aten.add.Tensor %33945, %33935, %int1_31454 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31455 = torch.constant.int 1
    %33947 = torch.aten.add.Tensor %33946, %33938, %int1_31455 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31456 = torch.constant.int 1
    %33948 = torch.aten.add.Tensor %33947, %33941, %int1_31456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33949 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31457 = arith.constant 1 : index
    %dim_31458 = tensor.dim %33949, %c1_31457 : tensor<4x?x4096xf16>
    %33950 = flow.tensor.transfer %33949 : tensor<4x?x4096xf16>{%dim_31458} to #hal.device.promise<@__device_5>
    %33951 = torch_c.from_builtin_tensor %33950 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33952 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31459 = arith.constant 1 : index
    %dim_31460 = tensor.dim %33952, %c1_31459 : tensor<4x?x4096xf16>
    %33953 = flow.tensor.transfer %33952 : tensor<4x?x4096xf16>{%dim_31460} to #hal.device.promise<@__device_5>
    %33954 = torch_c.from_builtin_tensor %33953 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33955 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31461 = arith.constant 1 : index
    %dim_31462 = tensor.dim %33955, %c1_31461 : tensor<4x?x4096xf16>
    %33956 = flow.tensor.transfer %33955 : tensor<4x?x4096xf16>{%dim_31462} to #hal.device.promise<@__device_5>
    %33957 = torch_c.from_builtin_tensor %33956 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33958 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31463 = arith.constant 1 : index
    %dim_31464 = tensor.dim %33958, %c1_31463 : tensor<4x?x4096xf16>
    %33959 = flow.tensor.transfer %33958 : tensor<4x?x4096xf16>{%dim_31464} to #hal.device.promise<@__device_5>
    %33960 = torch_c.from_builtin_tensor %33959 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33961 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31465 = arith.constant 1 : index
    %dim_31466 = tensor.dim %33961, %c1_31465 : tensor<4x?x4096xf16>
    %33962 = flow.tensor.transfer %33961 : tensor<4x?x4096xf16>{%dim_31466} to #hal.device.promise<@__device_5>
    %33963 = torch_c.from_builtin_tensor %33962 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33964 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31467 = arith.constant 1 : index
    %dim_31468 = tensor.dim %33964, %c1_31467 : tensor<4x?x4096xf16>
    %33965 = flow.tensor.transfer %33964 : tensor<4x?x4096xf16>{%dim_31468} to #hal.device.promise<@__device_5>
    %33966 = torch_c.from_builtin_tensor %33965 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33967 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31469 = arith.constant 1 : index
    %dim_31470 = tensor.dim %33967, %c1_31469 : tensor<4x?x4096xf16>
    %33968 = flow.tensor.transfer %33967 : tensor<4x?x4096xf16>{%dim_31470} to #hal.device.promise<@__device_5>
    %33969 = torch_c.from_builtin_tensor %33968 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31471 = torch.constant.int 1
    %33970 = torch.aten.add.Tensor %33951, %33954, %int1_31471 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31472 = torch.constant.int 1
    %33971 = torch.aten.add.Tensor %33970, %33957, %int1_31472 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31473 = torch.constant.int 1
    %33972 = torch.aten.add.Tensor %33971, %33960, %int1_31473 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31474 = torch.constant.int 1
    %33973 = torch.aten.add.Tensor %33972, %33963, %int1_31474 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31475 = torch.constant.int 1
    %33974 = torch.aten.add.Tensor %33973, %33794, %int1_31475 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31476 = torch.constant.int 1
    %33975 = torch.aten.add.Tensor %33974, %33966, %int1_31476 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31477 = torch.constant.int 1
    %33976 = torch.aten.add.Tensor %33975, %33969, %int1_31477 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33977 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31478 = arith.constant 1 : index
    %dim_31479 = tensor.dim %33977, %c1_31478 : tensor<4x?x4096xf16>
    %33978 = flow.tensor.transfer %33977 : tensor<4x?x4096xf16>{%dim_31479} to #hal.device.promise<@__device_6>
    %33979 = torch_c.from_builtin_tensor %33978 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33980 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31480 = arith.constant 1 : index
    %dim_31481 = tensor.dim %33980, %c1_31480 : tensor<4x?x4096xf16>
    %33981 = flow.tensor.transfer %33980 : tensor<4x?x4096xf16>{%dim_31481} to #hal.device.promise<@__device_6>
    %33982 = torch_c.from_builtin_tensor %33981 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33983 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31482 = arith.constant 1 : index
    %dim_31483 = tensor.dim %33983, %c1_31482 : tensor<4x?x4096xf16>
    %33984 = flow.tensor.transfer %33983 : tensor<4x?x4096xf16>{%dim_31483} to #hal.device.promise<@__device_6>
    %33985 = torch_c.from_builtin_tensor %33984 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33986 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31484 = arith.constant 1 : index
    %dim_31485 = tensor.dim %33986, %c1_31484 : tensor<4x?x4096xf16>
    %33987 = flow.tensor.transfer %33986 : tensor<4x?x4096xf16>{%dim_31485} to #hal.device.promise<@__device_6>
    %33988 = torch_c.from_builtin_tensor %33987 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33989 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31486 = arith.constant 1 : index
    %dim_31487 = tensor.dim %33989, %c1_31486 : tensor<4x?x4096xf16>
    %33990 = flow.tensor.transfer %33989 : tensor<4x?x4096xf16>{%dim_31487} to #hal.device.promise<@__device_6>
    %33991 = torch_c.from_builtin_tensor %33990 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33992 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31488 = arith.constant 1 : index
    %dim_31489 = tensor.dim %33992, %c1_31488 : tensor<4x?x4096xf16>
    %33993 = flow.tensor.transfer %33992 : tensor<4x?x4096xf16>{%dim_31489} to #hal.device.promise<@__device_6>
    %33994 = torch_c.from_builtin_tensor %33993 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %33995 = torch_c.to_builtin_tensor %33808 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31490 = arith.constant 1 : index
    %dim_31491 = tensor.dim %33995, %c1_31490 : tensor<4x?x4096xf16>
    %33996 = flow.tensor.transfer %33995 : tensor<4x?x4096xf16>{%dim_31491} to #hal.device.promise<@__device_6>
    %33997 = torch_c.from_builtin_tensor %33996 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31492 = torch.constant.int 1
    %33998 = torch.aten.add.Tensor %33979, %33982, %int1_31492 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31493 = torch.constant.int 1
    %33999 = torch.aten.add.Tensor %33998, %33985, %int1_31493 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %33999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31494 = torch.constant.int 1
    %34000 = torch.aten.add.Tensor %33999, %33988, %int1_31494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31495 = torch.constant.int 1
    %34001 = torch.aten.add.Tensor %34000, %33991, %int1_31495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31496 = torch.constant.int 1
    %34002 = torch.aten.add.Tensor %34001, %33994, %int1_31496 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31497 = torch.constant.int 1
    %34003 = torch.aten.add.Tensor %34002, %33801, %int1_31497 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31498 = torch.constant.int 1
    %34004 = torch.aten.add.Tensor %34003, %33997, %int1_31498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34005 = torch_c.to_builtin_tensor %33759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31499 = arith.constant 1 : index
    %dim_31500 = tensor.dim %34005, %c1_31499 : tensor<4x?x4096xf16>
    %34006 = flow.tensor.transfer %34005 : tensor<4x?x4096xf16>{%dim_31500} to #hal.device.promise<@__device_7>
    %34007 = torch_c.from_builtin_tensor %34006 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34008 = torch_c.to_builtin_tensor %33766 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31501 = arith.constant 1 : index
    %dim_31502 = tensor.dim %34008, %c1_31501 : tensor<4x?x4096xf16>
    %34009 = flow.tensor.transfer %34008 : tensor<4x?x4096xf16>{%dim_31502} to #hal.device.promise<@__device_7>
    %34010 = torch_c.from_builtin_tensor %34009 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34011 = torch_c.to_builtin_tensor %33773 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31503 = arith.constant 1 : index
    %dim_31504 = tensor.dim %34011, %c1_31503 : tensor<4x?x4096xf16>
    %34012 = flow.tensor.transfer %34011 : tensor<4x?x4096xf16>{%dim_31504} to #hal.device.promise<@__device_7>
    %34013 = torch_c.from_builtin_tensor %34012 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34014 = torch_c.to_builtin_tensor %33780 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31505 = arith.constant 1 : index
    %dim_31506 = tensor.dim %34014, %c1_31505 : tensor<4x?x4096xf16>
    %34015 = flow.tensor.transfer %34014 : tensor<4x?x4096xf16>{%dim_31506} to #hal.device.promise<@__device_7>
    %34016 = torch_c.from_builtin_tensor %34015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34017 = torch_c.to_builtin_tensor %33787 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31507 = arith.constant 1 : index
    %dim_31508 = tensor.dim %34017, %c1_31507 : tensor<4x?x4096xf16>
    %34018 = flow.tensor.transfer %34017 : tensor<4x?x4096xf16>{%dim_31508} to #hal.device.promise<@__device_7>
    %34019 = torch_c.from_builtin_tensor %34018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34020 = torch_c.to_builtin_tensor %33794 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31509 = arith.constant 1 : index
    %dim_31510 = tensor.dim %34020, %c1_31509 : tensor<4x?x4096xf16>
    %34021 = flow.tensor.transfer %34020 : tensor<4x?x4096xf16>{%dim_31510} to #hal.device.promise<@__device_7>
    %34022 = torch_c.from_builtin_tensor %34021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %34023 = torch_c.to_builtin_tensor %33801 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_31511 = arith.constant 1 : index
    %dim_31512 = tensor.dim %34023, %c1_31511 : tensor<4x?x4096xf16>
    %34024 = flow.tensor.transfer %34023 : tensor<4x?x4096xf16>{%dim_31512} to #hal.device.promise<@__device_7>
    %34025 = torch_c.from_builtin_tensor %34024 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31513 = torch.constant.int 1
    %34026 = torch.aten.add.Tensor %34007, %34010, %int1_31513 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31514 = torch.constant.int 1
    %34027 = torch.aten.add.Tensor %34026, %34013, %int1_31514 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31515 = torch.constant.int 1
    %34028 = torch.aten.add.Tensor %34027, %34016, %int1_31515 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31516 = torch.constant.int 1
    %34029 = torch.aten.add.Tensor %34028, %34019, %int1_31516 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31517 = torch.constant.int 1
    %34030 = torch.aten.add.Tensor %34029, %34022, %int1_31517 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31518 = torch.constant.int 1
    %34031 = torch.aten.add.Tensor %34030, %34025, %int1_31518 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31519 = torch.constant.int 1
    %34032 = torch.aten.add.Tensor %34031, %33808, %int1_31519 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31520 = torch.constant.int 1
    %34033 = torch.aten.add.Tensor %33513, %33836, %int1_31520 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31521 = torch.constant.int 1
    %34034 = torch.aten.add.Tensor %33514, %33864, %int1_31521 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31522 = torch.constant.int 1
    %34035 = torch.aten.add.Tensor %33515, %33892, %int1_31522 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31523 = torch.constant.int 1
    %34036 = torch.aten.add.Tensor %33516, %33920, %int1_31523 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31524 = torch.constant.int 1
    %34037 = torch.aten.add.Tensor %33517, %33948, %int1_31524 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31525 = torch.constant.int 1
    %34038 = torch.aten.add.Tensor %33518, %33976, %int1_31525 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31526 = torch.constant.int 1
    %34039 = torch.aten.add.Tensor %33519, %34004, %int1_31526 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31527 = torch.constant.int 1
    %34040 = torch.aten.add.Tensor %33520, %34032, %int1_31527 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_31528 = torch.constant.int 6
    %34041 = torch.prims.convert_element_type %34033, %int6_31528 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31529 = torch.constant.int 6
    %34042 = torch.prims.convert_element_type %34034, %int6_31529 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31530 = torch.constant.int 6
    %34043 = torch.prims.convert_element_type %34035, %int6_31530 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31531 = torch.constant.int 6
    %34044 = torch.prims.convert_element_type %34036, %int6_31531 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31532 = torch.constant.int 6
    %34045 = torch.prims.convert_element_type %34037, %int6_31532 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31533 = torch.constant.int 6
    %34046 = torch.prims.convert_element_type %34038, %int6_31533 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31534 = torch.constant.int 6
    %34047 = torch.prims.convert_element_type %34039, %int6_31534 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_31535 = torch.constant.int 6
    %34048 = torch.prims.convert_element_type %34040, %int6_31535 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31536 = torch.constant.int 2
    %34049 = torch.aten.pow.Tensor_Scalar %34041, %int2_31536 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31537 = torch.constant.int 2
    %34050 = torch.aten.pow.Tensor_Scalar %34042, %int2_31537 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31538 = torch.constant.int 2
    %34051 = torch.aten.pow.Tensor_Scalar %34043, %int2_31538 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31539 = torch.constant.int 2
    %34052 = torch.aten.pow.Tensor_Scalar %34044, %int2_31539 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31540 = torch.constant.int 2
    %34053 = torch.aten.pow.Tensor_Scalar %34045, %int2_31540 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31541 = torch.constant.int 2
    %34054 = torch.aten.pow.Tensor_Scalar %34046, %int2_31541 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31542 = torch.constant.int 2
    %34055 = torch.aten.pow.Tensor_Scalar %34047, %int2_31542 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_31543 = torch.constant.int 2
    %34056 = torch.aten.pow.Tensor_Scalar %34048, %int2_31543 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_31544 = torch.constant.int -1
    %34057 = torch.prim.ListConstruct %int-1_31544 : (!torch.int) -> !torch.list<int>
    %true_31545 = torch.constant.bool true
    %none_31546 = torch.constant.none
    %34058 = torch.aten.mean.dim %34049, %34057, %true_31545, %none_31546 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31547 = torch.constant.int -1
    %34059 = torch.prim.ListConstruct %int-1_31547 : (!torch.int) -> !torch.list<int>
    %true_31548 = torch.constant.bool true
    %none_31549 = torch.constant.none
    %34060 = torch.aten.mean.dim %34050, %34059, %true_31548, %none_31549 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31550 = torch.constant.int -1
    %34061 = torch.prim.ListConstruct %int-1_31550 : (!torch.int) -> !torch.list<int>
    %true_31551 = torch.constant.bool true
    %none_31552 = torch.constant.none
    %34062 = torch.aten.mean.dim %34051, %34061, %true_31551, %none_31552 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31553 = torch.constant.int -1
    %34063 = torch.prim.ListConstruct %int-1_31553 : (!torch.int) -> !torch.list<int>
    %true_31554 = torch.constant.bool true
    %none_31555 = torch.constant.none
    %34064 = torch.aten.mean.dim %34052, %34063, %true_31554, %none_31555 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31556 = torch.constant.int -1
    %34065 = torch.prim.ListConstruct %int-1_31556 : (!torch.int) -> !torch.list<int>
    %true_31557 = torch.constant.bool true
    %none_31558 = torch.constant.none
    %34066 = torch.aten.mean.dim %34053, %34065, %true_31557, %none_31558 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31559 = torch.constant.int -1
    %34067 = torch.prim.ListConstruct %int-1_31559 : (!torch.int) -> !torch.list<int>
    %true_31560 = torch.constant.bool true
    %none_31561 = torch.constant.none
    %34068 = torch.aten.mean.dim %34054, %34067, %true_31560, %none_31561 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31562 = torch.constant.int -1
    %34069 = torch.prim.ListConstruct %int-1_31562 : (!torch.int) -> !torch.list<int>
    %true_31563 = torch.constant.bool true
    %none_31564 = torch.constant.none
    %34070 = torch.aten.mean.dim %34055, %34069, %true_31563, %none_31564 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_31565 = torch.constant.int -1
    %34071 = torch.prim.ListConstruct %int-1_31565 : (!torch.int) -> !torch.list<int>
    %true_31566 = torch.constant.bool true
    %none_31567 = torch.constant.none
    %34072 = torch.aten.mean.dim %34056, %34071, %true_31566, %none_31567 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31568 = torch.constant.float 9.9999997473787516E-6
    %int1_31569 = torch.constant.int 1
    %34073 = torch.aten.add.Scalar %34058, %float9.999990e-06_31568, %int1_31569 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31570 = torch.constant.float 9.9999997473787516E-6
    %int1_31571 = torch.constant.int 1
    %34074 = torch.aten.add.Scalar %34060, %float9.999990e-06_31570, %int1_31571 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31572 = torch.constant.float 9.9999997473787516E-6
    %int1_31573 = torch.constant.int 1
    %34075 = torch.aten.add.Scalar %34062, %float9.999990e-06_31572, %int1_31573 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31574 = torch.constant.float 9.9999997473787516E-6
    %int1_31575 = torch.constant.int 1
    %34076 = torch.aten.add.Scalar %34064, %float9.999990e-06_31574, %int1_31575 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31576 = torch.constant.float 9.9999997473787516E-6
    %int1_31577 = torch.constant.int 1
    %34077 = torch.aten.add.Scalar %34066, %float9.999990e-06_31576, %int1_31577 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31578 = torch.constant.float 9.9999997473787516E-6
    %int1_31579 = torch.constant.int 1
    %34078 = torch.aten.add.Scalar %34068, %float9.999990e-06_31578, %int1_31579 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31580 = torch.constant.float 9.9999997473787516E-6
    %int1_31581 = torch.constant.int 1
    %34079 = torch.aten.add.Scalar %34070, %float9.999990e-06_31580, %int1_31581 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_31582 = torch.constant.float 9.9999997473787516E-6
    %int1_31583 = torch.constant.int 1
    %34080 = torch.aten.add.Scalar %34072, %float9.999990e-06_31582, %int1_31583 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34081 = torch.aten.rsqrt %34073 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34082 = torch.aten.rsqrt %34074 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34083 = torch.aten.rsqrt %34075 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34084 = torch.aten.rsqrt %34076 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34085 = torch.aten.rsqrt %34077 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34086 = torch.aten.rsqrt %34078 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34087 = torch.aten.rsqrt %34079 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34088 = torch.aten.rsqrt %34080 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %34088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %34089 = torch.aten.mul.Tensor %34041, %34081 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34090 = torch.aten.mul.Tensor %34042, %34082 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34091 = torch.aten.mul.Tensor %34043, %34083 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34092 = torch.aten.mul.Tensor %34044, %34084 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34093 = torch.aten.mul.Tensor %34045, %34085 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34094 = torch.aten.mul.Tensor %34046, %34086 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34095 = torch.aten.mul.Tensor %34047, %34087 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34096 = torch.aten.mul.Tensor %34048, %34088 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34097 = torch.aten.mul.Tensor %1232, %34089 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34098 = torch.aten.mul.Tensor %1233, %34090 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34099 = torch.aten.mul.Tensor %1234, %34091 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34100 = torch.aten.mul.Tensor %1235, %34092 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34101 = torch.aten.mul.Tensor %1236, %34093 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34102 = torch.aten.mul.Tensor %1237, %34094 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34103 = torch.aten.mul.Tensor %1238, %34095 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %34104 = torch.aten.mul.Tensor %1239, %34096 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %34104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_31584 = torch.constant.int 5
    %34105 = torch.prims.convert_element_type %34097, %int5_31584 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31585 = torch.constant.int 5
    %34106 = torch.prims.convert_element_type %34098, %int5_31585 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31586 = torch.constant.int 5
    %34107 = torch.prims.convert_element_type %34099, %int5_31586 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31587 = torch.constant.int 5
    %34108 = torch.prims.convert_element_type %34100, %int5_31587 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31588 = torch.constant.int 5
    %34109 = torch.prims.convert_element_type %34101, %int5_31588 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31589 = torch.constant.int 5
    %34110 = torch.prims.convert_element_type %34102, %int5_31589 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31590 = torch.constant.int 5
    %34111 = torch.prims.convert_element_type %34103, %int5_31590 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_31591 = torch.constant.int 5
    %34112 = torch.prims.convert_element_type %34104, %int5_31591 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %34112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_31592 = torch.constant.int 1
    %int0_31593 = torch.constant.int 0
    %34113 = torch.prim.ListConstruct %int1_31592, %int0_31593 : (!torch.int, !torch.int) -> !torch.list<int>
    %34114 = torch.aten.permute %1240, %34113 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31594 = torch.constant.int 1
    %int0_31595 = torch.constant.int 0
    %34115 = torch.prim.ListConstruct %int1_31594, %int0_31595 : (!torch.int, !torch.int) -> !torch.list<int>
    %34116 = torch.aten.permute %1241, %34115 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31596 = torch.constant.int 1
    %int0_31597 = torch.constant.int 0
    %34117 = torch.prim.ListConstruct %int1_31596, %int0_31597 : (!torch.int, !torch.int) -> !torch.list<int>
    %34118 = torch.aten.permute %1242, %34117 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31598 = torch.constant.int 1
    %int0_31599 = torch.constant.int 0
    %34119 = torch.prim.ListConstruct %int1_31598, %int0_31599 : (!torch.int, !torch.int) -> !torch.list<int>
    %34120 = torch.aten.permute %1243, %34119 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31600 = torch.constant.int 1
    %int0_31601 = torch.constant.int 0
    %34121 = torch.prim.ListConstruct %int1_31600, %int0_31601 : (!torch.int, !torch.int) -> !torch.list<int>
    %34122 = torch.aten.permute %1244, %34121 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31602 = torch.constant.int 1
    %int0_31603 = torch.constant.int 0
    %34123 = torch.prim.ListConstruct %int1_31602, %int0_31603 : (!torch.int, !torch.int) -> !torch.list<int>
    %34124 = torch.aten.permute %1245, %34123 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31604 = torch.constant.int 1
    %int0_31605 = torch.constant.int 0
    %34125 = torch.prim.ListConstruct %int1_31604, %int0_31605 : (!torch.int, !torch.int) -> !torch.list<int>
    %34126 = torch.aten.permute %1246, %34125 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_31606 = torch.constant.int 1
    %int0_31607 = torch.constant.int 0
    %34127 = torch.prim.ListConstruct %int1_31606, %int0_31607 : (!torch.int, !torch.int) -> !torch.list<int>
    %34128 = torch.aten.permute %1247, %34127 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_31608 = torch.constant.int 4
    %34129 = torch.aten.mul.int %int4_31608, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31609 = torch.constant.int 4096
    %34130 = torch.prim.ListConstruct %34129, %int4096_31609 : (!torch.int, !torch.int) -> !torch.list<int>
    %34131 = torch.aten.view %34105, %34130 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34131, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34132 = torch.aten.mm %34131, %34114 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34132, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31610 = torch.constant.int 4
    %int512_31611 = torch.constant.int 512
    %34133 = torch.prim.ListConstruct %int4_31610, %2482, %int512_31611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34134 = torch.aten.view %34132, %34133 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31612 = torch.constant.int 4
    %34135 = torch.aten.mul.int %int4_31612, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31613 = torch.constant.int 4096
    %34136 = torch.prim.ListConstruct %34135, %int4096_31613 : (!torch.int, !torch.int) -> !torch.list<int>
    %34137 = torch.aten.view %34106, %34136 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34137, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34138 = torch.aten.mm %34137, %34116 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34138, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31614 = torch.constant.int 4
    %int512_31615 = torch.constant.int 512
    %34139 = torch.prim.ListConstruct %int4_31614, %2482, %int512_31615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34140 = torch.aten.view %34138, %34139 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31616 = torch.constant.int 4
    %34141 = torch.aten.mul.int %int4_31616, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31617 = torch.constant.int 4096
    %34142 = torch.prim.ListConstruct %34141, %int4096_31617 : (!torch.int, !torch.int) -> !torch.list<int>
    %34143 = torch.aten.view %34107, %34142 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34143, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34144 = torch.aten.mm %34143, %34118 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34144, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31618 = torch.constant.int 4
    %int512_31619 = torch.constant.int 512
    %34145 = torch.prim.ListConstruct %int4_31618, %2482, %int512_31619 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34146 = torch.aten.view %34144, %34145 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31620 = torch.constant.int 4
    %34147 = torch.aten.mul.int %int4_31620, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31621 = torch.constant.int 4096
    %34148 = torch.prim.ListConstruct %34147, %int4096_31621 : (!torch.int, !torch.int) -> !torch.list<int>
    %34149 = torch.aten.view %34108, %34148 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34149, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34150 = torch.aten.mm %34149, %34120 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34150, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31622 = torch.constant.int 4
    %int512_31623 = torch.constant.int 512
    %34151 = torch.prim.ListConstruct %int4_31622, %2482, %int512_31623 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34152 = torch.aten.view %34150, %34151 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31624 = torch.constant.int 4
    %34153 = torch.aten.mul.int %int4_31624, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31625 = torch.constant.int 4096
    %34154 = torch.prim.ListConstruct %34153, %int4096_31625 : (!torch.int, !torch.int) -> !torch.list<int>
    %34155 = torch.aten.view %34109, %34154 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34155, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34156 = torch.aten.mm %34155, %34122 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34156, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31626 = torch.constant.int 4
    %int512_31627 = torch.constant.int 512
    %34157 = torch.prim.ListConstruct %int4_31626, %2482, %int512_31627 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34158 = torch.aten.view %34156, %34157 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31628 = torch.constant.int 4
    %34159 = torch.aten.mul.int %int4_31628, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31629 = torch.constant.int 4096
    %34160 = torch.prim.ListConstruct %34159, %int4096_31629 : (!torch.int, !torch.int) -> !torch.list<int>
    %34161 = torch.aten.view %34110, %34160 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34161, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34162 = torch.aten.mm %34161, %34124 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34162, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31630 = torch.constant.int 4
    %int512_31631 = torch.constant.int 512
    %34163 = torch.prim.ListConstruct %int4_31630, %2482, %int512_31631 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34164 = torch.aten.view %34162, %34163 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31632 = torch.constant.int 4
    %34165 = torch.aten.mul.int %int4_31632, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31633 = torch.constant.int 4096
    %34166 = torch.prim.ListConstruct %34165, %int4096_31633 : (!torch.int, !torch.int) -> !torch.list<int>
    %34167 = torch.aten.view %34111, %34166 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34167, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34168 = torch.aten.mm %34167, %34126 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34168, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31634 = torch.constant.int 4
    %int512_31635 = torch.constant.int 512
    %34169 = torch.prim.ListConstruct %int4_31634, %2482, %int512_31635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34170 = torch.aten.view %34168, %34169 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_31636 = torch.constant.int 4
    %34171 = torch.aten.mul.int %int4_31636, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31637 = torch.constant.int 4096
    %34172 = torch.prim.ListConstruct %34171, %int4096_31637 : (!torch.int, !torch.int) -> !torch.list<int>
    %34173 = torch.aten.view %34112, %34172 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34173, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34174 = torch.aten.mm %34173, %34128 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %34174, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_31638 = torch.constant.int 4
    %int512_31639 = torch.constant.int 512
    %34175 = torch.prim.ListConstruct %int4_31638, %2482, %int512_31639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34176 = torch.aten.view %34174, %34175 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %34176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_31640 = torch.constant.int 1
    %int0_31641 = torch.constant.int 0
    %34177 = torch.prim.ListConstruct %int1_31640, %int0_31641 : (!torch.int, !torch.int) -> !torch.list<int>
    %34178 = torch.aten.permute %1248, %34177 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31642 = torch.constant.int 1
    %int0_31643 = torch.constant.int 0
    %34179 = torch.prim.ListConstruct %int1_31642, %int0_31643 : (!torch.int, !torch.int) -> !torch.list<int>
    %34180 = torch.aten.permute %1249, %34179 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31644 = torch.constant.int 1
    %int0_31645 = torch.constant.int 0
    %34181 = torch.prim.ListConstruct %int1_31644, %int0_31645 : (!torch.int, !torch.int) -> !torch.list<int>
    %34182 = torch.aten.permute %1250, %34181 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31646 = torch.constant.int 1
    %int0_31647 = torch.constant.int 0
    %34183 = torch.prim.ListConstruct %int1_31646, %int0_31647 : (!torch.int, !torch.int) -> !torch.list<int>
    %34184 = torch.aten.permute %1251, %34183 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31648 = torch.constant.int 1
    %int0_31649 = torch.constant.int 0
    %34185 = torch.prim.ListConstruct %int1_31648, %int0_31649 : (!torch.int, !torch.int) -> !torch.list<int>
    %34186 = torch.aten.permute %1252, %34185 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31650 = torch.constant.int 1
    %int0_31651 = torch.constant.int 0
    %34187 = torch.prim.ListConstruct %int1_31650, %int0_31651 : (!torch.int, !torch.int) -> !torch.list<int>
    %34188 = torch.aten.permute %1253, %34187 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31652 = torch.constant.int 1
    %int0_31653 = torch.constant.int 0
    %34189 = torch.prim.ListConstruct %int1_31652, %int0_31653 : (!torch.int, !torch.int) -> !torch.list<int>
    %34190 = torch.aten.permute %1254, %34189 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31654 = torch.constant.int 1
    %int0_31655 = torch.constant.int 0
    %34191 = torch.prim.ListConstruct %int1_31654, %int0_31655 : (!torch.int, !torch.int) -> !torch.list<int>
    %34192 = torch.aten.permute %1255, %34191 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_31656 = torch.constant.int 4
    %34193 = torch.aten.mul.int %int4_31656, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31657 = torch.constant.int 4096
    %34194 = torch.prim.ListConstruct %34193, %int4096_31657 : (!torch.int, !torch.int) -> !torch.list<int>
    %34195 = torch.aten.view %34105, %34194 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34195, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34196 = torch.aten.mm %34195, %34178 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34196, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31658 = torch.constant.int 4
    %int128_31659 = torch.constant.int 128
    %34197 = torch.prim.ListConstruct %int4_31658, %2482, %int128_31659 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34198 = torch.aten.view %34196, %34197 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31660 = torch.constant.int 4
    %34199 = torch.aten.mul.int %int4_31660, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31661 = torch.constant.int 4096
    %34200 = torch.prim.ListConstruct %34199, %int4096_31661 : (!torch.int, !torch.int) -> !torch.list<int>
    %34201 = torch.aten.view %34106, %34200 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34201, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34202 = torch.aten.mm %34201, %34180 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34202, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31662 = torch.constant.int 4
    %int128_31663 = torch.constant.int 128
    %34203 = torch.prim.ListConstruct %int4_31662, %2482, %int128_31663 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34204 = torch.aten.view %34202, %34203 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31664 = torch.constant.int 4
    %34205 = torch.aten.mul.int %int4_31664, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31665 = torch.constant.int 4096
    %34206 = torch.prim.ListConstruct %34205, %int4096_31665 : (!torch.int, !torch.int) -> !torch.list<int>
    %34207 = torch.aten.view %34107, %34206 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34207, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34208 = torch.aten.mm %34207, %34182 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34208, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31666 = torch.constant.int 4
    %int128_31667 = torch.constant.int 128
    %34209 = torch.prim.ListConstruct %int4_31666, %2482, %int128_31667 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34210 = torch.aten.view %34208, %34209 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31668 = torch.constant.int 4
    %34211 = torch.aten.mul.int %int4_31668, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31669 = torch.constant.int 4096
    %34212 = torch.prim.ListConstruct %34211, %int4096_31669 : (!torch.int, !torch.int) -> !torch.list<int>
    %34213 = torch.aten.view %34108, %34212 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34213, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34214 = torch.aten.mm %34213, %34184 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34214, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31670 = torch.constant.int 4
    %int128_31671 = torch.constant.int 128
    %34215 = torch.prim.ListConstruct %int4_31670, %2482, %int128_31671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34216 = torch.aten.view %34214, %34215 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31672 = torch.constant.int 4
    %34217 = torch.aten.mul.int %int4_31672, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31673 = torch.constant.int 4096
    %34218 = torch.prim.ListConstruct %34217, %int4096_31673 : (!torch.int, !torch.int) -> !torch.list<int>
    %34219 = torch.aten.view %34109, %34218 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34219, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34220 = torch.aten.mm %34219, %34186 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34220, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31674 = torch.constant.int 4
    %int128_31675 = torch.constant.int 128
    %34221 = torch.prim.ListConstruct %int4_31674, %2482, %int128_31675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34222 = torch.aten.view %34220, %34221 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31676 = torch.constant.int 4
    %34223 = torch.aten.mul.int %int4_31676, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31677 = torch.constant.int 4096
    %34224 = torch.prim.ListConstruct %34223, %int4096_31677 : (!torch.int, !torch.int) -> !torch.list<int>
    %34225 = torch.aten.view %34110, %34224 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34225, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34226 = torch.aten.mm %34225, %34188 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34226, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31678 = torch.constant.int 4
    %int128_31679 = torch.constant.int 128
    %34227 = torch.prim.ListConstruct %int4_31678, %2482, %int128_31679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34228 = torch.aten.view %34226, %34227 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31680 = torch.constant.int 4
    %34229 = torch.aten.mul.int %int4_31680, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31681 = torch.constant.int 4096
    %34230 = torch.prim.ListConstruct %34229, %int4096_31681 : (!torch.int, !torch.int) -> !torch.list<int>
    %34231 = torch.aten.view %34111, %34230 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34231, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34232 = torch.aten.mm %34231, %34190 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34232, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31682 = torch.constant.int 4
    %int128_31683 = torch.constant.int 128
    %34233 = torch.prim.ListConstruct %int4_31682, %2482, %int128_31683 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34234 = torch.aten.view %34232, %34233 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31684 = torch.constant.int 4
    %34235 = torch.aten.mul.int %int4_31684, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31685 = torch.constant.int 4096
    %34236 = torch.prim.ListConstruct %34235, %int4096_31685 : (!torch.int, !torch.int) -> !torch.list<int>
    %34237 = torch.aten.view %34112, %34236 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34237, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34238 = torch.aten.mm %34237, %34192 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34238, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31686 = torch.constant.int 4
    %int128_31687 = torch.constant.int 128
    %34239 = torch.prim.ListConstruct %int4_31686, %2482, %int128_31687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34240 = torch.aten.view %34238, %34239 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_31688 = torch.constant.int 1
    %int0_31689 = torch.constant.int 0
    %34241 = torch.prim.ListConstruct %int1_31688, %int0_31689 : (!torch.int, !torch.int) -> !torch.list<int>
    %34242 = torch.aten.permute %1256, %34241 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31690 = torch.constant.int 1
    %int0_31691 = torch.constant.int 0
    %34243 = torch.prim.ListConstruct %int1_31690, %int0_31691 : (!torch.int, !torch.int) -> !torch.list<int>
    %34244 = torch.aten.permute %1257, %34243 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31692 = torch.constant.int 1
    %int0_31693 = torch.constant.int 0
    %34245 = torch.prim.ListConstruct %int1_31692, %int0_31693 : (!torch.int, !torch.int) -> !torch.list<int>
    %34246 = torch.aten.permute %1258, %34245 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31694 = torch.constant.int 1
    %int0_31695 = torch.constant.int 0
    %34247 = torch.prim.ListConstruct %int1_31694, %int0_31695 : (!torch.int, !torch.int) -> !torch.list<int>
    %34248 = torch.aten.permute %1259, %34247 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31696 = torch.constant.int 1
    %int0_31697 = torch.constant.int 0
    %34249 = torch.prim.ListConstruct %int1_31696, %int0_31697 : (!torch.int, !torch.int) -> !torch.list<int>
    %34250 = torch.aten.permute %1260, %34249 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31698 = torch.constant.int 1
    %int0_31699 = torch.constant.int 0
    %34251 = torch.prim.ListConstruct %int1_31698, %int0_31699 : (!torch.int, !torch.int) -> !torch.list<int>
    %34252 = torch.aten.permute %1261, %34251 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31700 = torch.constant.int 1
    %int0_31701 = torch.constant.int 0
    %34253 = torch.prim.ListConstruct %int1_31700, %int0_31701 : (!torch.int, !torch.int) -> !torch.list<int>
    %34254 = torch.aten.permute %1262, %34253 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_31702 = torch.constant.int 1
    %int0_31703 = torch.constant.int 0
    %34255 = torch.prim.ListConstruct %int1_31702, %int0_31703 : (!torch.int, !torch.int) -> !torch.list<int>
    %34256 = torch.aten.permute %1263, %34255 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_31704 = torch.constant.int 4
    %34257 = torch.aten.mul.int %int4_31704, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31705 = torch.constant.int 4096
    %34258 = torch.prim.ListConstruct %34257, %int4096_31705 : (!torch.int, !torch.int) -> !torch.list<int>
    %34259 = torch.aten.view %34105, %34258 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34259, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34260 = torch.aten.mm %34259, %34242 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34260, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31706 = torch.constant.int 4
    %int128_31707 = torch.constant.int 128
    %34261 = torch.prim.ListConstruct %int4_31706, %2482, %int128_31707 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34262 = torch.aten.view %34260, %34261 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31708 = torch.constant.int 4
    %34263 = torch.aten.mul.int %int4_31708, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31709 = torch.constant.int 4096
    %34264 = torch.prim.ListConstruct %34263, %int4096_31709 : (!torch.int, !torch.int) -> !torch.list<int>
    %34265 = torch.aten.view %34106, %34264 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34265, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34266 = torch.aten.mm %34265, %34244 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34266, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31710 = torch.constant.int 4
    %int128_31711 = torch.constant.int 128
    %34267 = torch.prim.ListConstruct %int4_31710, %2482, %int128_31711 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34268 = torch.aten.view %34266, %34267 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31712 = torch.constant.int 4
    %34269 = torch.aten.mul.int %int4_31712, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31713 = torch.constant.int 4096
    %34270 = torch.prim.ListConstruct %34269, %int4096_31713 : (!torch.int, !torch.int) -> !torch.list<int>
    %34271 = torch.aten.view %34107, %34270 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34271, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34272 = torch.aten.mm %34271, %34246 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34272, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31714 = torch.constant.int 4
    %int128_31715 = torch.constant.int 128
    %34273 = torch.prim.ListConstruct %int4_31714, %2482, %int128_31715 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34274 = torch.aten.view %34272, %34273 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31716 = torch.constant.int 4
    %34275 = torch.aten.mul.int %int4_31716, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31717 = torch.constant.int 4096
    %34276 = torch.prim.ListConstruct %34275, %int4096_31717 : (!torch.int, !torch.int) -> !torch.list<int>
    %34277 = torch.aten.view %34108, %34276 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34277, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34278 = torch.aten.mm %34277, %34248 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34278, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31718 = torch.constant.int 4
    %int128_31719 = torch.constant.int 128
    %34279 = torch.prim.ListConstruct %int4_31718, %2482, %int128_31719 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34280 = torch.aten.view %34278, %34279 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31720 = torch.constant.int 4
    %34281 = torch.aten.mul.int %int4_31720, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31721 = torch.constant.int 4096
    %34282 = torch.prim.ListConstruct %34281, %int4096_31721 : (!torch.int, !torch.int) -> !torch.list<int>
    %34283 = torch.aten.view %34109, %34282 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34283, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34284 = torch.aten.mm %34283, %34250 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34284, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31722 = torch.constant.int 4
    %int128_31723 = torch.constant.int 128
    %34285 = torch.prim.ListConstruct %int4_31722, %2482, %int128_31723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34286 = torch.aten.view %34284, %34285 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31724 = torch.constant.int 4
    %34287 = torch.aten.mul.int %int4_31724, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31725 = torch.constant.int 4096
    %34288 = torch.prim.ListConstruct %34287, %int4096_31725 : (!torch.int, !torch.int) -> !torch.list<int>
    %34289 = torch.aten.view %34110, %34288 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34289, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34290 = torch.aten.mm %34289, %34252 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34290, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31726 = torch.constant.int 4
    %int128_31727 = torch.constant.int 128
    %34291 = torch.prim.ListConstruct %int4_31726, %2482, %int128_31727 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34292 = torch.aten.view %34290, %34291 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31728 = torch.constant.int 4
    %34293 = torch.aten.mul.int %int4_31728, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31729 = torch.constant.int 4096
    %34294 = torch.prim.ListConstruct %34293, %int4096_31729 : (!torch.int, !torch.int) -> !torch.list<int>
    %34295 = torch.aten.view %34111, %34294 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34295, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34296 = torch.aten.mm %34295, %34254 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34296, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31730 = torch.constant.int 4
    %int128_31731 = torch.constant.int 128
    %34297 = torch.prim.ListConstruct %int4_31730, %2482, %int128_31731 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34298 = torch.aten.view %34296, %34297 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31732 = torch.constant.int 4
    %34299 = torch.aten.mul.int %int4_31732, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_31733 = torch.constant.int 4096
    %34300 = torch.prim.ListConstruct %34299, %int4096_31733 : (!torch.int, !torch.int) -> !torch.list<int>
    %34301 = torch.aten.view %34112, %34300 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %34301, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %34302 = torch.aten.mm %34301, %34256 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %34302, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_31734 = torch.constant.int 4
    %int128_31735 = torch.constant.int 128
    %34303 = torch.prim.ListConstruct %int4_31734, %2482, %int128_31735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34304 = torch.aten.view %34302, %34303 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %34304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_31736 = torch.constant.int 4
    %int4_31737 = torch.constant.int 4
    %int128_31738 = torch.constant.int 128
    %34305 = torch.prim.ListConstruct %int4_31736, %2482, %int4_31737, %int128_31738 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34306 = torch.aten.view %34134, %34305 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31739 = torch.constant.int 4
    %int4_31740 = torch.constant.int 4
    %int128_31741 = torch.constant.int 128
    %34307 = torch.prim.ListConstruct %int4_31739, %2482, %int4_31740, %int128_31741 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34308 = torch.aten.view %34140, %34307 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31742 = torch.constant.int 4
    %int4_31743 = torch.constant.int 4
    %int128_31744 = torch.constant.int 128
    %34309 = torch.prim.ListConstruct %int4_31742, %2482, %int4_31743, %int128_31744 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34310 = torch.aten.view %34146, %34309 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31745 = torch.constant.int 4
    %int4_31746 = torch.constant.int 4
    %int128_31747 = torch.constant.int 128
    %34311 = torch.prim.ListConstruct %int4_31745, %2482, %int4_31746, %int128_31747 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34312 = torch.aten.view %34152, %34311 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31748 = torch.constant.int 4
    %int4_31749 = torch.constant.int 4
    %int128_31750 = torch.constant.int 128
    %34313 = torch.prim.ListConstruct %int4_31748, %2482, %int4_31749, %int128_31750 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34314 = torch.aten.view %34158, %34313 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31751 = torch.constant.int 4
    %int4_31752 = torch.constant.int 4
    %int128_31753 = torch.constant.int 128
    %34315 = torch.prim.ListConstruct %int4_31751, %2482, %int4_31752, %int128_31753 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34316 = torch.aten.view %34164, %34315 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31754 = torch.constant.int 4
    %int4_31755 = torch.constant.int 4
    %int128_31756 = torch.constant.int 128
    %34317 = torch.prim.ListConstruct %int4_31754, %2482, %int4_31755, %int128_31756 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34318 = torch.aten.view %34170, %34317 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31757 = torch.constant.int 4
    %int4_31758 = torch.constant.int 4
    %int128_31759 = torch.constant.int 128
    %34319 = torch.prim.ListConstruct %int4_31757, %2482, %int4_31758, %int128_31759 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34320 = torch.aten.view %34176, %34319 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_31760 = torch.constant.int 4
    %int1_31761 = torch.constant.int 1
    %int128_31762 = torch.constant.int 128
    %34321 = torch.prim.ListConstruct %int4_31760, %2482, %int1_31761, %int128_31762 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34322 = torch.aten.view %34198, %34321 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31763 = torch.constant.int 4
    %int1_31764 = torch.constant.int 1
    %int128_31765 = torch.constant.int 128
    %34323 = torch.prim.ListConstruct %int4_31763, %2482, %int1_31764, %int128_31765 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34324 = torch.aten.view %34204, %34323 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31766 = torch.constant.int 4
    %int1_31767 = torch.constant.int 1
    %int128_31768 = torch.constant.int 128
    %34325 = torch.prim.ListConstruct %int4_31766, %2482, %int1_31767, %int128_31768 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34326 = torch.aten.view %34210, %34325 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31769 = torch.constant.int 4
    %int1_31770 = torch.constant.int 1
    %int128_31771 = torch.constant.int 128
    %34327 = torch.prim.ListConstruct %int4_31769, %2482, %int1_31770, %int128_31771 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34328 = torch.aten.view %34216, %34327 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31772 = torch.constant.int 4
    %int1_31773 = torch.constant.int 1
    %int128_31774 = torch.constant.int 128
    %34329 = torch.prim.ListConstruct %int4_31772, %2482, %int1_31773, %int128_31774 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34330 = torch.aten.view %34222, %34329 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31775 = torch.constant.int 4
    %int1_31776 = torch.constant.int 1
    %int128_31777 = torch.constant.int 128
    %34331 = torch.prim.ListConstruct %int4_31775, %2482, %int1_31776, %int128_31777 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34332 = torch.aten.view %34228, %34331 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31778 = torch.constant.int 4
    %int1_31779 = torch.constant.int 1
    %int128_31780 = torch.constant.int 128
    %34333 = torch.prim.ListConstruct %int4_31778, %2482, %int1_31779, %int128_31780 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34334 = torch.aten.view %34234, %34333 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31781 = torch.constant.int 4
    %int1_31782 = torch.constant.int 1
    %int128_31783 = torch.constant.int 128
    %34335 = torch.prim.ListConstruct %int4_31781, %2482, %int1_31782, %int128_31783 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34336 = torch.aten.view %34240, %34335 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31784 = torch.constant.int 4
    %int1_31785 = torch.constant.int 1
    %int128_31786 = torch.constant.int 128
    %34337 = torch.prim.ListConstruct %int4_31784, %2482, %int1_31785, %int128_31786 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34338 = torch.aten.view %34262, %34337 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31787 = torch.constant.int 4
    %int1_31788 = torch.constant.int 1
    %int128_31789 = torch.constant.int 128
    %34339 = torch.prim.ListConstruct %int4_31787, %2482, %int1_31788, %int128_31789 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34340 = torch.aten.view %34268, %34339 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31790 = torch.constant.int 4
    %int1_31791 = torch.constant.int 1
    %int128_31792 = torch.constant.int 128
    %34341 = torch.prim.ListConstruct %int4_31790, %2482, %int1_31791, %int128_31792 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34342 = torch.aten.view %34274, %34341 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31793 = torch.constant.int 4
    %int1_31794 = torch.constant.int 1
    %int128_31795 = torch.constant.int 128
    %34343 = torch.prim.ListConstruct %int4_31793, %2482, %int1_31794, %int128_31795 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34344 = torch.aten.view %34280, %34343 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31796 = torch.constant.int 4
    %int1_31797 = torch.constant.int 1
    %int128_31798 = torch.constant.int 128
    %34345 = torch.prim.ListConstruct %int4_31796, %2482, %int1_31797, %int128_31798 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34346 = torch.aten.view %34286, %34345 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31799 = torch.constant.int 4
    %int1_31800 = torch.constant.int 1
    %int128_31801 = torch.constant.int 128
    %34347 = torch.prim.ListConstruct %int4_31799, %2482, %int1_31800, %int128_31801 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34348 = torch.aten.view %34292, %34347 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31802 = torch.constant.int 4
    %int1_31803 = torch.constant.int 1
    %int128_31804 = torch.constant.int 128
    %34349 = torch.prim.ListConstruct %int4_31802, %2482, %int1_31803, %int128_31804 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34350 = torch.aten.view %34298, %34349 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_31805 = torch.constant.int 4
    %int1_31806 = torch.constant.int 1
    %int128_31807 = torch.constant.int 128
    %34351 = torch.prim.ListConstruct %int4_31805, %2482, %int1_31806, %int128_31807 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34352 = torch.aten.view %34304, %34351 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_31808 = torch.constant.int 131072
    %none_31809 = torch.constant.none
    %none_31810 = torch.constant.none
    %cpu_31811 = torch.constant.device "cpu"
    %false_31812 = torch.constant.bool false
    %34353 = torch.aten.arange %int131072_31808, %none_31809, %none_31810, %cpu_31811, %false_31812 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_31813 = torch.constant.int 0
    %int128_31814 = torch.constant.int 128
    %int2_31815 = torch.constant.int 2
    %none_31816 = torch.constant.none
    %none_31817 = torch.constant.none
    %cpu_31818 = torch.constant.device "cpu"
    %false_31819 = torch.constant.bool false
    %34354 = torch.aten.arange.start_step %int0_31813, %int128_31814, %int2_31815, %none_31816, %none_31817, %cpu_31818, %false_31819 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_31820 = torch.constant.int 0
    %int0_31821 = torch.constant.int 0
    %int64_31822 = torch.constant.int 64
    %int1_31823 = torch.constant.int 1
    %34355 = torch.aten.slice.Tensor %34354, %int0_31820, %int0_31821, %int64_31822, %int1_31823 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_31824 = torch.constant.int 6
    %34356 = torch.prims.convert_element_type %34355, %int6_31824 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_31825 = torch.constant.int 128
    %34357 = torch.aten.div.Scalar %34356, %int128_31825 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_31826 = torch.constant.float 5.000000e+05
    %34358 = torch.aten.pow.Scalar %float5.000000e05_31826, %34357 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %34359 = torch.aten.reciprocal %34358 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_31827 = torch.constant.float 1.000000e+00
    %34360 = torch.aten.mul.Scalar %34359, %float1.000000e00_31827 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_31828 = torch.constant.int 131072
    %int1_31829 = torch.constant.int 1
    %34361 = torch.prim.ListConstruct %int131072_31828, %int1_31829 : (!torch.int, !torch.int) -> !torch.list<int>
    %34362 = torch.aten.view %34353, %34361 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %34363 = torch.aten.mul.Tensor %34362, %34360 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %34364 = torch.aten.cos %34363 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %34365 = torch.aten.sin %34363 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %34366 = torch.aten.complex %34364, %34365 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %34367 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34368 = flow.tensor.transfer %34367 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %34369 = torch_c.from_builtin_tensor %34368 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34370 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34371 = flow.tensor.transfer %34370 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %34372 = torch_c.from_builtin_tensor %34371 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34373 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34374 = flow.tensor.transfer %34373 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %34375 = torch_c.from_builtin_tensor %34374 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34376 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34377 = flow.tensor.transfer %34376 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %34378 = torch_c.from_builtin_tensor %34377 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34379 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34380 = flow.tensor.transfer %34379 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %34381 = torch_c.from_builtin_tensor %34380 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34382 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34383 = flow.tensor.transfer %34382 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %34384 = torch_c.from_builtin_tensor %34383 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34385 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34386 = flow.tensor.transfer %34385 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %34387 = torch_c.from_builtin_tensor %34386 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34388 = torch_c.to_builtin_tensor %34366 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34389 = flow.tensor.transfer %34388 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %34390 = torch_c.from_builtin_tensor %34389 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_31830 = torch.constant.int 1
    %34391 = torch.aten.size.int %34134, %int1_31830 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31831 = torch.constant.int 0
    %34392 = torch.aten.add.int %int0_31831, %34391 : !torch.int, !torch.int -> !torch.int
    %int0_31832 = torch.constant.int 0
    %int0_31833 = torch.constant.int 0
    %int1_31834 = torch.constant.int 1
    %34393 = torch.aten.slice.Tensor %34369, %int0_31832, %int0_31833, %34392, %int1_31834 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34393, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31835 = torch.constant.int 1
    %int0_31836 = torch.constant.int 0
    %int9223372036854775807_31837 = torch.constant.int 9223372036854775807
    %int1_31838 = torch.constant.int 1
    %34394 = torch.aten.slice.Tensor %34393, %int1_31835, %int0_31836, %int9223372036854775807_31837, %int1_31838 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34394, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31839 = torch.constant.int 0
    %34395 = torch.aten.unsqueeze %34394, %int0_31839 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34395, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31840 = torch.constant.int 2
    %34396 = torch.aten.unsqueeze %34395, %int2_31840 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34396, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31841 = torch.constant.int 3
    %int0_31842 = torch.constant.int 0
    %int9223372036854775807_31843 = torch.constant.int 9223372036854775807
    %int1_31844 = torch.constant.int 1
    %34397 = torch.aten.slice.Tensor %34396, %int3_31841, %int0_31842, %int9223372036854775807_31843, %int1_31844 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34397, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34398 = torch_c.to_builtin_tensor %34306 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31845 = arith.constant 1 : index
    %dim_31846 = tensor.dim %34398, %c1_31845 : tensor<4x?x4x128xf16>
    %34399 = flow.tensor.bitcast %34398 : tensor<4x?x4x128xf16>{%dim_31846} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31846}
    %34400 = torch_c.from_builtin_tensor %34399 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34401 = torch.aten.mul.Tensor %34400, %34397 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34402 = torch_c.to_builtin_tensor %34401 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31847 = arith.constant 1 : index
    %dim_31848 = tensor.dim %34402, %c1_31847 : tensor<4x?x4x64xcomplex<f32>>
    %34403 = flow.tensor.bitcast %34402 : tensor<4x?x4x64xcomplex<f32>>{%dim_31848} -> tensor<4x?x4x128xf32>{%dim_31848}
    %34404 = torch_c.from_builtin_tensor %34403 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31849 = torch.constant.int 5
    %34405 = torch.prims.convert_element_type %34404, %int5_31849 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31850 = torch.constant.int 1
    %34406 = torch.aten.size.int %34140, %int1_31850 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31851 = torch.constant.int 0
    %34407 = torch.aten.add.int %int0_31851, %34406 : !torch.int, !torch.int -> !torch.int
    %int0_31852 = torch.constant.int 0
    %int0_31853 = torch.constant.int 0
    %int1_31854 = torch.constant.int 1
    %34408 = torch.aten.slice.Tensor %34372, %int0_31852, %int0_31853, %34407, %int1_31854 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34408, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31855 = torch.constant.int 1
    %int0_31856 = torch.constant.int 0
    %int9223372036854775807_31857 = torch.constant.int 9223372036854775807
    %int1_31858 = torch.constant.int 1
    %34409 = torch.aten.slice.Tensor %34408, %int1_31855, %int0_31856, %int9223372036854775807_31857, %int1_31858 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34409, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31859 = torch.constant.int 0
    %34410 = torch.aten.unsqueeze %34409, %int0_31859 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34410, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31860 = torch.constant.int 2
    %34411 = torch.aten.unsqueeze %34410, %int2_31860 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34411, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31861 = torch.constant.int 3
    %int0_31862 = torch.constant.int 0
    %int9223372036854775807_31863 = torch.constant.int 9223372036854775807
    %int1_31864 = torch.constant.int 1
    %34412 = torch.aten.slice.Tensor %34411, %int3_31861, %int0_31862, %int9223372036854775807_31863, %int1_31864 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34412, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34413 = torch_c.to_builtin_tensor %34308 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31865 = arith.constant 1 : index
    %dim_31866 = tensor.dim %34413, %c1_31865 : tensor<4x?x4x128xf16>
    %34414 = flow.tensor.bitcast %34413 : tensor<4x?x4x128xf16>{%dim_31866} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31866}
    %34415 = torch_c.from_builtin_tensor %34414 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34416 = torch.aten.mul.Tensor %34415, %34412 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34417 = torch_c.to_builtin_tensor %34416 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31867 = arith.constant 1 : index
    %dim_31868 = tensor.dim %34417, %c1_31867 : tensor<4x?x4x64xcomplex<f32>>
    %34418 = flow.tensor.bitcast %34417 : tensor<4x?x4x64xcomplex<f32>>{%dim_31868} -> tensor<4x?x4x128xf32>{%dim_31868}
    %34419 = torch_c.from_builtin_tensor %34418 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31869 = torch.constant.int 5
    %34420 = torch.prims.convert_element_type %34419, %int5_31869 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31870 = torch.constant.int 1
    %34421 = torch.aten.size.int %34146, %int1_31870 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31871 = torch.constant.int 0
    %34422 = torch.aten.add.int %int0_31871, %34421 : !torch.int, !torch.int -> !torch.int
    %int0_31872 = torch.constant.int 0
    %int0_31873 = torch.constant.int 0
    %int1_31874 = torch.constant.int 1
    %34423 = torch.aten.slice.Tensor %34375, %int0_31872, %int0_31873, %34422, %int1_31874 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34423, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31875 = torch.constant.int 1
    %int0_31876 = torch.constant.int 0
    %int9223372036854775807_31877 = torch.constant.int 9223372036854775807
    %int1_31878 = torch.constant.int 1
    %34424 = torch.aten.slice.Tensor %34423, %int1_31875, %int0_31876, %int9223372036854775807_31877, %int1_31878 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34424, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31879 = torch.constant.int 0
    %34425 = torch.aten.unsqueeze %34424, %int0_31879 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34425, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31880 = torch.constant.int 2
    %34426 = torch.aten.unsqueeze %34425, %int2_31880 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34426, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31881 = torch.constant.int 3
    %int0_31882 = torch.constant.int 0
    %int9223372036854775807_31883 = torch.constant.int 9223372036854775807
    %int1_31884 = torch.constant.int 1
    %34427 = torch.aten.slice.Tensor %34426, %int3_31881, %int0_31882, %int9223372036854775807_31883, %int1_31884 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34427, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34428 = torch_c.to_builtin_tensor %34310 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31885 = arith.constant 1 : index
    %dim_31886 = tensor.dim %34428, %c1_31885 : tensor<4x?x4x128xf16>
    %34429 = flow.tensor.bitcast %34428 : tensor<4x?x4x128xf16>{%dim_31886} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31886}
    %34430 = torch_c.from_builtin_tensor %34429 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34431 = torch.aten.mul.Tensor %34430, %34427 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34432 = torch_c.to_builtin_tensor %34431 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31887 = arith.constant 1 : index
    %dim_31888 = tensor.dim %34432, %c1_31887 : tensor<4x?x4x64xcomplex<f32>>
    %34433 = flow.tensor.bitcast %34432 : tensor<4x?x4x64xcomplex<f32>>{%dim_31888} -> tensor<4x?x4x128xf32>{%dim_31888}
    %34434 = torch_c.from_builtin_tensor %34433 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31889 = torch.constant.int 5
    %34435 = torch.prims.convert_element_type %34434, %int5_31889 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31890 = torch.constant.int 1
    %34436 = torch.aten.size.int %34152, %int1_31890 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31891 = torch.constant.int 0
    %34437 = torch.aten.add.int %int0_31891, %34436 : !torch.int, !torch.int -> !torch.int
    %int0_31892 = torch.constant.int 0
    %int0_31893 = torch.constant.int 0
    %int1_31894 = torch.constant.int 1
    %34438 = torch.aten.slice.Tensor %34378, %int0_31892, %int0_31893, %34437, %int1_31894 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34438, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31895 = torch.constant.int 1
    %int0_31896 = torch.constant.int 0
    %int9223372036854775807_31897 = torch.constant.int 9223372036854775807
    %int1_31898 = torch.constant.int 1
    %34439 = torch.aten.slice.Tensor %34438, %int1_31895, %int0_31896, %int9223372036854775807_31897, %int1_31898 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34439, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31899 = torch.constant.int 0
    %34440 = torch.aten.unsqueeze %34439, %int0_31899 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34440, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31900 = torch.constant.int 2
    %34441 = torch.aten.unsqueeze %34440, %int2_31900 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34441, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31901 = torch.constant.int 3
    %int0_31902 = torch.constant.int 0
    %int9223372036854775807_31903 = torch.constant.int 9223372036854775807
    %int1_31904 = torch.constant.int 1
    %34442 = torch.aten.slice.Tensor %34441, %int3_31901, %int0_31902, %int9223372036854775807_31903, %int1_31904 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34442, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34443 = torch_c.to_builtin_tensor %34312 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31905 = arith.constant 1 : index
    %dim_31906 = tensor.dim %34443, %c1_31905 : tensor<4x?x4x128xf16>
    %34444 = flow.tensor.bitcast %34443 : tensor<4x?x4x128xf16>{%dim_31906} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31906}
    %34445 = torch_c.from_builtin_tensor %34444 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34446 = torch.aten.mul.Tensor %34445, %34442 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34447 = torch_c.to_builtin_tensor %34446 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31907 = arith.constant 1 : index
    %dim_31908 = tensor.dim %34447, %c1_31907 : tensor<4x?x4x64xcomplex<f32>>
    %34448 = flow.tensor.bitcast %34447 : tensor<4x?x4x64xcomplex<f32>>{%dim_31908} -> tensor<4x?x4x128xf32>{%dim_31908}
    %34449 = torch_c.from_builtin_tensor %34448 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31909 = torch.constant.int 5
    %34450 = torch.prims.convert_element_type %34449, %int5_31909 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31910 = torch.constant.int 1
    %34451 = torch.aten.size.int %34158, %int1_31910 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31911 = torch.constant.int 0
    %34452 = torch.aten.add.int %int0_31911, %34451 : !torch.int, !torch.int -> !torch.int
    %int0_31912 = torch.constant.int 0
    %int0_31913 = torch.constant.int 0
    %int1_31914 = torch.constant.int 1
    %34453 = torch.aten.slice.Tensor %34381, %int0_31912, %int0_31913, %34452, %int1_31914 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34453, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31915 = torch.constant.int 1
    %int0_31916 = torch.constant.int 0
    %int9223372036854775807_31917 = torch.constant.int 9223372036854775807
    %int1_31918 = torch.constant.int 1
    %34454 = torch.aten.slice.Tensor %34453, %int1_31915, %int0_31916, %int9223372036854775807_31917, %int1_31918 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34454, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31919 = torch.constant.int 0
    %34455 = torch.aten.unsqueeze %34454, %int0_31919 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34455, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31920 = torch.constant.int 2
    %34456 = torch.aten.unsqueeze %34455, %int2_31920 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34456, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31921 = torch.constant.int 3
    %int0_31922 = torch.constant.int 0
    %int9223372036854775807_31923 = torch.constant.int 9223372036854775807
    %int1_31924 = torch.constant.int 1
    %34457 = torch.aten.slice.Tensor %34456, %int3_31921, %int0_31922, %int9223372036854775807_31923, %int1_31924 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34457, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34458 = torch_c.to_builtin_tensor %34314 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31925 = arith.constant 1 : index
    %dim_31926 = tensor.dim %34458, %c1_31925 : tensor<4x?x4x128xf16>
    %34459 = flow.tensor.bitcast %34458 : tensor<4x?x4x128xf16>{%dim_31926} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31926}
    %34460 = torch_c.from_builtin_tensor %34459 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34461 = torch.aten.mul.Tensor %34460, %34457 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34462 = torch_c.to_builtin_tensor %34461 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31927 = arith.constant 1 : index
    %dim_31928 = tensor.dim %34462, %c1_31927 : tensor<4x?x4x64xcomplex<f32>>
    %34463 = flow.tensor.bitcast %34462 : tensor<4x?x4x64xcomplex<f32>>{%dim_31928} -> tensor<4x?x4x128xf32>{%dim_31928}
    %34464 = torch_c.from_builtin_tensor %34463 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31929 = torch.constant.int 5
    %34465 = torch.prims.convert_element_type %34464, %int5_31929 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31930 = torch.constant.int 1
    %34466 = torch.aten.size.int %34164, %int1_31930 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31931 = torch.constant.int 0
    %34467 = torch.aten.add.int %int0_31931, %34466 : !torch.int, !torch.int -> !torch.int
    %int0_31932 = torch.constant.int 0
    %int0_31933 = torch.constant.int 0
    %int1_31934 = torch.constant.int 1
    %34468 = torch.aten.slice.Tensor %34384, %int0_31932, %int0_31933, %34467, %int1_31934 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34468, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31935 = torch.constant.int 1
    %int0_31936 = torch.constant.int 0
    %int9223372036854775807_31937 = torch.constant.int 9223372036854775807
    %int1_31938 = torch.constant.int 1
    %34469 = torch.aten.slice.Tensor %34468, %int1_31935, %int0_31936, %int9223372036854775807_31937, %int1_31938 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34469, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31939 = torch.constant.int 0
    %34470 = torch.aten.unsqueeze %34469, %int0_31939 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34470, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31940 = torch.constant.int 2
    %34471 = torch.aten.unsqueeze %34470, %int2_31940 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34471, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31941 = torch.constant.int 3
    %int0_31942 = torch.constant.int 0
    %int9223372036854775807_31943 = torch.constant.int 9223372036854775807
    %int1_31944 = torch.constant.int 1
    %34472 = torch.aten.slice.Tensor %34471, %int3_31941, %int0_31942, %int9223372036854775807_31943, %int1_31944 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34472, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34473 = torch_c.to_builtin_tensor %34316 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31945 = arith.constant 1 : index
    %dim_31946 = tensor.dim %34473, %c1_31945 : tensor<4x?x4x128xf16>
    %34474 = flow.tensor.bitcast %34473 : tensor<4x?x4x128xf16>{%dim_31946} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31946}
    %34475 = torch_c.from_builtin_tensor %34474 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34476 = torch.aten.mul.Tensor %34475, %34472 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34477 = torch_c.to_builtin_tensor %34476 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31947 = arith.constant 1 : index
    %dim_31948 = tensor.dim %34477, %c1_31947 : tensor<4x?x4x64xcomplex<f32>>
    %34478 = flow.tensor.bitcast %34477 : tensor<4x?x4x64xcomplex<f32>>{%dim_31948} -> tensor<4x?x4x128xf32>{%dim_31948}
    %34479 = torch_c.from_builtin_tensor %34478 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31949 = torch.constant.int 5
    %34480 = torch.prims.convert_element_type %34479, %int5_31949 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31950 = torch.constant.int 1
    %34481 = torch.aten.size.int %34170, %int1_31950 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31951 = torch.constant.int 0
    %34482 = torch.aten.add.int %int0_31951, %34481 : !torch.int, !torch.int -> !torch.int
    %int0_31952 = torch.constant.int 0
    %int0_31953 = torch.constant.int 0
    %int1_31954 = torch.constant.int 1
    %34483 = torch.aten.slice.Tensor %34387, %int0_31952, %int0_31953, %34482, %int1_31954 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34483, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31955 = torch.constant.int 1
    %int0_31956 = torch.constant.int 0
    %int9223372036854775807_31957 = torch.constant.int 9223372036854775807
    %int1_31958 = torch.constant.int 1
    %34484 = torch.aten.slice.Tensor %34483, %int1_31955, %int0_31956, %int9223372036854775807_31957, %int1_31958 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34484, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31959 = torch.constant.int 0
    %34485 = torch.aten.unsqueeze %34484, %int0_31959 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34485, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31960 = torch.constant.int 2
    %34486 = torch.aten.unsqueeze %34485, %int2_31960 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34486, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31961 = torch.constant.int 3
    %int0_31962 = torch.constant.int 0
    %int9223372036854775807_31963 = torch.constant.int 9223372036854775807
    %int1_31964 = torch.constant.int 1
    %34487 = torch.aten.slice.Tensor %34486, %int3_31961, %int0_31962, %int9223372036854775807_31963, %int1_31964 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34487, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34488 = torch_c.to_builtin_tensor %34318 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31965 = arith.constant 1 : index
    %dim_31966 = tensor.dim %34488, %c1_31965 : tensor<4x?x4x128xf16>
    %34489 = flow.tensor.bitcast %34488 : tensor<4x?x4x128xf16>{%dim_31966} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31966}
    %34490 = torch_c.from_builtin_tensor %34489 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34491 = torch.aten.mul.Tensor %34490, %34487 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34492 = torch_c.to_builtin_tensor %34491 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31967 = arith.constant 1 : index
    %dim_31968 = tensor.dim %34492, %c1_31967 : tensor<4x?x4x64xcomplex<f32>>
    %34493 = flow.tensor.bitcast %34492 : tensor<4x?x4x64xcomplex<f32>>{%dim_31968} -> tensor<4x?x4x128xf32>{%dim_31968}
    %34494 = torch_c.from_builtin_tensor %34493 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31969 = torch.constant.int 5
    %34495 = torch.prims.convert_element_type %34494, %int5_31969 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_31970 = torch.constant.int 1
    %34496 = torch.aten.size.int %34176, %int1_31970 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_31971 = torch.constant.int 0
    %34497 = torch.aten.add.int %int0_31971, %34496 : !torch.int, !torch.int -> !torch.int
    %int0_31972 = torch.constant.int 0
    %int0_31973 = torch.constant.int 0
    %int1_31974 = torch.constant.int 1
    %34498 = torch.aten.slice.Tensor %34390, %int0_31972, %int0_31973, %34497, %int1_31974 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34498, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_31975 = torch.constant.int 1
    %int0_31976 = torch.constant.int 0
    %int9223372036854775807_31977 = torch.constant.int 9223372036854775807
    %int1_31978 = torch.constant.int 1
    %34499 = torch.aten.slice.Tensor %34498, %int1_31975, %int0_31976, %int9223372036854775807_31977, %int1_31978 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34499, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_31979 = torch.constant.int 0
    %34500 = torch.aten.unsqueeze %34499, %int0_31979 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34500, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_31980 = torch.constant.int 2
    %34501 = torch.aten.unsqueeze %34500, %int2_31980 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34501, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_31981 = torch.constant.int 3
    %int0_31982 = torch.constant.int 0
    %int9223372036854775807_31983 = torch.constant.int 9223372036854775807
    %int1_31984 = torch.constant.int 1
    %34502 = torch.aten.slice.Tensor %34501, %int3_31981, %int0_31982, %int9223372036854775807_31983, %int1_31984 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34502, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34503 = torch_c.to_builtin_tensor %34320 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_31985 = arith.constant 1 : index
    %dim_31986 = tensor.dim %34503, %c1_31985 : tensor<4x?x4x128xf16>
    %34504 = flow.tensor.bitcast %34503 : tensor<4x?x4x128xf16>{%dim_31986} -> tensor<4x?x4x64xcomplex<f16>>{%dim_31986}
    %34505 = torch_c.from_builtin_tensor %34504 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %34505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %34506 = torch.aten.mul.Tensor %34505, %34502 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %34506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %34507 = torch_c.to_builtin_tensor %34506 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_31987 = arith.constant 1 : index
    %dim_31988 = tensor.dim %34507, %c1_31987 : tensor<4x?x4x64xcomplex<f32>>
    %34508 = flow.tensor.bitcast %34507 : tensor<4x?x4x64xcomplex<f32>>{%dim_31988} -> tensor<4x?x4x128xf32>{%dim_31988}
    %34509 = torch_c.from_builtin_tensor %34508 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %34509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_31989 = torch.constant.int 5
    %34510 = torch.prims.convert_element_type %34509, %int5_31989 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_31990 = torch.constant.int 131072
    %none_31991 = torch.constant.none
    %none_31992 = torch.constant.none
    %cpu_31993 = torch.constant.device "cpu"
    %false_31994 = torch.constant.bool false
    %34511 = torch.aten.arange %int131072_31990, %none_31991, %none_31992, %cpu_31993, %false_31994 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_31995 = torch.constant.int 0
    %int128_31996 = torch.constant.int 128
    %int2_31997 = torch.constant.int 2
    %none_31998 = torch.constant.none
    %none_31999 = torch.constant.none
    %cpu_32000 = torch.constant.device "cpu"
    %false_32001 = torch.constant.bool false
    %34512 = torch.aten.arange.start_step %int0_31995, %int128_31996, %int2_31997, %none_31998, %none_31999, %cpu_32000, %false_32001 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_32002 = torch.constant.int 0
    %int0_32003 = torch.constant.int 0
    %int64_32004 = torch.constant.int 64
    %int1_32005 = torch.constant.int 1
    %34513 = torch.aten.slice.Tensor %34512, %int0_32002, %int0_32003, %int64_32004, %int1_32005 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_32006 = torch.constant.int 6
    %34514 = torch.prims.convert_element_type %34513, %int6_32006 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_32007 = torch.constant.int 128
    %34515 = torch.aten.div.Scalar %34514, %int128_32007 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_32008 = torch.constant.float 5.000000e+05
    %34516 = torch.aten.pow.Scalar %float5.000000e05_32008, %34515 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %34517 = torch.aten.reciprocal %34516 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_32009 = torch.constant.float 1.000000e+00
    %34518 = torch.aten.mul.Scalar %34517, %float1.000000e00_32009 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_32010 = torch.constant.int 131072
    %int1_32011 = torch.constant.int 1
    %34519 = torch.prim.ListConstruct %int131072_32010, %int1_32011 : (!torch.int, !torch.int) -> !torch.list<int>
    %34520 = torch.aten.view %34511, %34519 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %34521 = torch.aten.mul.Tensor %34520, %34518 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %34522 = torch.aten.cos %34521 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %34523 = torch.aten.sin %34521 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %34524 = torch.aten.complex %34522, %34523 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %34525 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34526 = flow.tensor.transfer %34525 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %34527 = torch_c.from_builtin_tensor %34526 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34528 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34529 = flow.tensor.transfer %34528 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %34530 = torch_c.from_builtin_tensor %34529 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34531 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34532 = flow.tensor.transfer %34531 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %34533 = torch_c.from_builtin_tensor %34532 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34534 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34535 = flow.tensor.transfer %34534 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %34536 = torch_c.from_builtin_tensor %34535 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34537 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34538 = flow.tensor.transfer %34537 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %34539 = torch_c.from_builtin_tensor %34538 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34540 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34541 = flow.tensor.transfer %34540 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %34542 = torch_c.from_builtin_tensor %34541 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34543 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34544 = flow.tensor.transfer %34543 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %34545 = torch_c.from_builtin_tensor %34544 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %34546 = torch_c.to_builtin_tensor %34524 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %34547 = flow.tensor.transfer %34546 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %34548 = torch_c.from_builtin_tensor %34547 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_32012 = torch.constant.int 1
    %34549 = torch.aten.size.int %34198, %int1_32012 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32013 = torch.constant.int 0
    %34550 = torch.aten.add.int %int0_32013, %34549 : !torch.int, !torch.int -> !torch.int
    %int0_32014 = torch.constant.int 0
    %int0_32015 = torch.constant.int 0
    %int1_32016 = torch.constant.int 1
    %34551 = torch.aten.slice.Tensor %34527, %int0_32014, %int0_32015, %34550, %int1_32016 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34551, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32017 = torch.constant.int 1
    %int0_32018 = torch.constant.int 0
    %int9223372036854775807_32019 = torch.constant.int 9223372036854775807
    %int1_32020 = torch.constant.int 1
    %34552 = torch.aten.slice.Tensor %34551, %int1_32017, %int0_32018, %int9223372036854775807_32019, %int1_32020 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34552, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32021 = torch.constant.int 0
    %34553 = torch.aten.unsqueeze %34552, %int0_32021 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34553, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32022 = torch.constant.int 2
    %34554 = torch.aten.unsqueeze %34553, %int2_32022 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34554, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32023 = torch.constant.int 3
    %int0_32024 = torch.constant.int 0
    %int9223372036854775807_32025 = torch.constant.int 9223372036854775807
    %int1_32026 = torch.constant.int 1
    %34555 = torch.aten.slice.Tensor %34554, %int3_32023, %int0_32024, %int9223372036854775807_32025, %int1_32026 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34555, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34556 = torch_c.to_builtin_tensor %34322 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32027 = arith.constant 1 : index
    %dim_32028 = tensor.dim %34556, %c1_32027 : tensor<4x?x1x128xf16>
    %34557 = flow.tensor.bitcast %34556 : tensor<4x?x1x128xf16>{%dim_32028} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32028}
    %34558 = torch_c.from_builtin_tensor %34557 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34559 = torch.aten.mul.Tensor %34558, %34555 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34560 = torch_c.to_builtin_tensor %34559 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32029 = arith.constant 1 : index
    %dim_32030 = tensor.dim %34560, %c1_32029 : tensor<4x?x1x64xcomplex<f32>>
    %34561 = flow.tensor.bitcast %34560 : tensor<4x?x1x64xcomplex<f32>>{%dim_32030} -> tensor<4x?x1x128xf32>{%dim_32030}
    %34562 = torch_c.from_builtin_tensor %34561 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32031 = torch.constant.int 5
    %34563 = torch.prims.convert_element_type %34562, %int5_32031 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32032 = torch.constant.int 1
    %34564 = torch.aten.size.int %34204, %int1_32032 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32033 = torch.constant.int 0
    %34565 = torch.aten.add.int %int0_32033, %34564 : !torch.int, !torch.int -> !torch.int
    %int0_32034 = torch.constant.int 0
    %int0_32035 = torch.constant.int 0
    %int1_32036 = torch.constant.int 1
    %34566 = torch.aten.slice.Tensor %34530, %int0_32034, %int0_32035, %34565, %int1_32036 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34566, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32037 = torch.constant.int 1
    %int0_32038 = torch.constant.int 0
    %int9223372036854775807_32039 = torch.constant.int 9223372036854775807
    %int1_32040 = torch.constant.int 1
    %34567 = torch.aten.slice.Tensor %34566, %int1_32037, %int0_32038, %int9223372036854775807_32039, %int1_32040 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34567, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32041 = torch.constant.int 0
    %34568 = torch.aten.unsqueeze %34567, %int0_32041 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34568, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32042 = torch.constant.int 2
    %34569 = torch.aten.unsqueeze %34568, %int2_32042 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34569, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32043 = torch.constant.int 3
    %int0_32044 = torch.constant.int 0
    %int9223372036854775807_32045 = torch.constant.int 9223372036854775807
    %int1_32046 = torch.constant.int 1
    %34570 = torch.aten.slice.Tensor %34569, %int3_32043, %int0_32044, %int9223372036854775807_32045, %int1_32046 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34570, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34571 = torch_c.to_builtin_tensor %34324 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32047 = arith.constant 1 : index
    %dim_32048 = tensor.dim %34571, %c1_32047 : tensor<4x?x1x128xf16>
    %34572 = flow.tensor.bitcast %34571 : tensor<4x?x1x128xf16>{%dim_32048} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32048}
    %34573 = torch_c.from_builtin_tensor %34572 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34574 = torch.aten.mul.Tensor %34573, %34570 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34575 = torch_c.to_builtin_tensor %34574 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32049 = arith.constant 1 : index
    %dim_32050 = tensor.dim %34575, %c1_32049 : tensor<4x?x1x64xcomplex<f32>>
    %34576 = flow.tensor.bitcast %34575 : tensor<4x?x1x64xcomplex<f32>>{%dim_32050} -> tensor<4x?x1x128xf32>{%dim_32050}
    %34577 = torch_c.from_builtin_tensor %34576 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32051 = torch.constant.int 5
    %34578 = torch.prims.convert_element_type %34577, %int5_32051 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32052 = torch.constant.int 1
    %34579 = torch.aten.size.int %34210, %int1_32052 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32053 = torch.constant.int 0
    %34580 = torch.aten.add.int %int0_32053, %34579 : !torch.int, !torch.int -> !torch.int
    %int0_32054 = torch.constant.int 0
    %int0_32055 = torch.constant.int 0
    %int1_32056 = torch.constant.int 1
    %34581 = torch.aten.slice.Tensor %34533, %int0_32054, %int0_32055, %34580, %int1_32056 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34581, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32057 = torch.constant.int 1
    %int0_32058 = torch.constant.int 0
    %int9223372036854775807_32059 = torch.constant.int 9223372036854775807
    %int1_32060 = torch.constant.int 1
    %34582 = torch.aten.slice.Tensor %34581, %int1_32057, %int0_32058, %int9223372036854775807_32059, %int1_32060 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34582, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32061 = torch.constant.int 0
    %34583 = torch.aten.unsqueeze %34582, %int0_32061 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34583, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32062 = torch.constant.int 2
    %34584 = torch.aten.unsqueeze %34583, %int2_32062 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34584, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32063 = torch.constant.int 3
    %int0_32064 = torch.constant.int 0
    %int9223372036854775807_32065 = torch.constant.int 9223372036854775807
    %int1_32066 = torch.constant.int 1
    %34585 = torch.aten.slice.Tensor %34584, %int3_32063, %int0_32064, %int9223372036854775807_32065, %int1_32066 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34585, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34586 = torch_c.to_builtin_tensor %34326 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32067 = arith.constant 1 : index
    %dim_32068 = tensor.dim %34586, %c1_32067 : tensor<4x?x1x128xf16>
    %34587 = flow.tensor.bitcast %34586 : tensor<4x?x1x128xf16>{%dim_32068} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32068}
    %34588 = torch_c.from_builtin_tensor %34587 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34589 = torch.aten.mul.Tensor %34588, %34585 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34590 = torch_c.to_builtin_tensor %34589 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32069 = arith.constant 1 : index
    %dim_32070 = tensor.dim %34590, %c1_32069 : tensor<4x?x1x64xcomplex<f32>>
    %34591 = flow.tensor.bitcast %34590 : tensor<4x?x1x64xcomplex<f32>>{%dim_32070} -> tensor<4x?x1x128xf32>{%dim_32070}
    %34592 = torch_c.from_builtin_tensor %34591 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32071 = torch.constant.int 5
    %34593 = torch.prims.convert_element_type %34592, %int5_32071 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32072 = torch.constant.int 1
    %34594 = torch.aten.size.int %34216, %int1_32072 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32073 = torch.constant.int 0
    %34595 = torch.aten.add.int %int0_32073, %34594 : !torch.int, !torch.int -> !torch.int
    %int0_32074 = torch.constant.int 0
    %int0_32075 = torch.constant.int 0
    %int1_32076 = torch.constant.int 1
    %34596 = torch.aten.slice.Tensor %34536, %int0_32074, %int0_32075, %34595, %int1_32076 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34596, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32077 = torch.constant.int 1
    %int0_32078 = torch.constant.int 0
    %int9223372036854775807_32079 = torch.constant.int 9223372036854775807
    %int1_32080 = torch.constant.int 1
    %34597 = torch.aten.slice.Tensor %34596, %int1_32077, %int0_32078, %int9223372036854775807_32079, %int1_32080 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34597, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32081 = torch.constant.int 0
    %34598 = torch.aten.unsqueeze %34597, %int0_32081 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34598, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32082 = torch.constant.int 2
    %34599 = torch.aten.unsqueeze %34598, %int2_32082 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34599, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32083 = torch.constant.int 3
    %int0_32084 = torch.constant.int 0
    %int9223372036854775807_32085 = torch.constant.int 9223372036854775807
    %int1_32086 = torch.constant.int 1
    %34600 = torch.aten.slice.Tensor %34599, %int3_32083, %int0_32084, %int9223372036854775807_32085, %int1_32086 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34600, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34601 = torch_c.to_builtin_tensor %34328 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32087 = arith.constant 1 : index
    %dim_32088 = tensor.dim %34601, %c1_32087 : tensor<4x?x1x128xf16>
    %34602 = flow.tensor.bitcast %34601 : tensor<4x?x1x128xf16>{%dim_32088} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32088}
    %34603 = torch_c.from_builtin_tensor %34602 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34604 = torch.aten.mul.Tensor %34603, %34600 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34605 = torch_c.to_builtin_tensor %34604 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32089 = arith.constant 1 : index
    %dim_32090 = tensor.dim %34605, %c1_32089 : tensor<4x?x1x64xcomplex<f32>>
    %34606 = flow.tensor.bitcast %34605 : tensor<4x?x1x64xcomplex<f32>>{%dim_32090} -> tensor<4x?x1x128xf32>{%dim_32090}
    %34607 = torch_c.from_builtin_tensor %34606 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32091 = torch.constant.int 5
    %34608 = torch.prims.convert_element_type %34607, %int5_32091 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32092 = torch.constant.int 1
    %34609 = torch.aten.size.int %34222, %int1_32092 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32093 = torch.constant.int 0
    %34610 = torch.aten.add.int %int0_32093, %34609 : !torch.int, !torch.int -> !torch.int
    %int0_32094 = torch.constant.int 0
    %int0_32095 = torch.constant.int 0
    %int1_32096 = torch.constant.int 1
    %34611 = torch.aten.slice.Tensor %34539, %int0_32094, %int0_32095, %34610, %int1_32096 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34611, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32097 = torch.constant.int 1
    %int0_32098 = torch.constant.int 0
    %int9223372036854775807_32099 = torch.constant.int 9223372036854775807
    %int1_32100 = torch.constant.int 1
    %34612 = torch.aten.slice.Tensor %34611, %int1_32097, %int0_32098, %int9223372036854775807_32099, %int1_32100 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34612, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32101 = torch.constant.int 0
    %34613 = torch.aten.unsqueeze %34612, %int0_32101 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34613, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32102 = torch.constant.int 2
    %34614 = torch.aten.unsqueeze %34613, %int2_32102 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34614, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32103 = torch.constant.int 3
    %int0_32104 = torch.constant.int 0
    %int9223372036854775807_32105 = torch.constant.int 9223372036854775807
    %int1_32106 = torch.constant.int 1
    %34615 = torch.aten.slice.Tensor %34614, %int3_32103, %int0_32104, %int9223372036854775807_32105, %int1_32106 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34615, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34616 = torch_c.to_builtin_tensor %34330 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32107 = arith.constant 1 : index
    %dim_32108 = tensor.dim %34616, %c1_32107 : tensor<4x?x1x128xf16>
    %34617 = flow.tensor.bitcast %34616 : tensor<4x?x1x128xf16>{%dim_32108} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32108}
    %34618 = torch_c.from_builtin_tensor %34617 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34619 = torch.aten.mul.Tensor %34618, %34615 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34620 = torch_c.to_builtin_tensor %34619 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32109 = arith.constant 1 : index
    %dim_32110 = tensor.dim %34620, %c1_32109 : tensor<4x?x1x64xcomplex<f32>>
    %34621 = flow.tensor.bitcast %34620 : tensor<4x?x1x64xcomplex<f32>>{%dim_32110} -> tensor<4x?x1x128xf32>{%dim_32110}
    %34622 = torch_c.from_builtin_tensor %34621 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32111 = torch.constant.int 5
    %34623 = torch.prims.convert_element_type %34622, %int5_32111 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32112 = torch.constant.int 1
    %34624 = torch.aten.size.int %34228, %int1_32112 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32113 = torch.constant.int 0
    %34625 = torch.aten.add.int %int0_32113, %34624 : !torch.int, !torch.int -> !torch.int
    %int0_32114 = torch.constant.int 0
    %int0_32115 = torch.constant.int 0
    %int1_32116 = torch.constant.int 1
    %34626 = torch.aten.slice.Tensor %34542, %int0_32114, %int0_32115, %34625, %int1_32116 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34626, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32117 = torch.constant.int 1
    %int0_32118 = torch.constant.int 0
    %int9223372036854775807_32119 = torch.constant.int 9223372036854775807
    %int1_32120 = torch.constant.int 1
    %34627 = torch.aten.slice.Tensor %34626, %int1_32117, %int0_32118, %int9223372036854775807_32119, %int1_32120 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34627, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32121 = torch.constant.int 0
    %34628 = torch.aten.unsqueeze %34627, %int0_32121 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34628, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32122 = torch.constant.int 2
    %34629 = torch.aten.unsqueeze %34628, %int2_32122 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34629, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32123 = torch.constant.int 3
    %int0_32124 = torch.constant.int 0
    %int9223372036854775807_32125 = torch.constant.int 9223372036854775807
    %int1_32126 = torch.constant.int 1
    %34630 = torch.aten.slice.Tensor %34629, %int3_32123, %int0_32124, %int9223372036854775807_32125, %int1_32126 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34630, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34631 = torch_c.to_builtin_tensor %34332 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32127 = arith.constant 1 : index
    %dim_32128 = tensor.dim %34631, %c1_32127 : tensor<4x?x1x128xf16>
    %34632 = flow.tensor.bitcast %34631 : tensor<4x?x1x128xf16>{%dim_32128} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32128}
    %34633 = torch_c.from_builtin_tensor %34632 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34634 = torch.aten.mul.Tensor %34633, %34630 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34635 = torch_c.to_builtin_tensor %34634 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32129 = arith.constant 1 : index
    %dim_32130 = tensor.dim %34635, %c1_32129 : tensor<4x?x1x64xcomplex<f32>>
    %34636 = flow.tensor.bitcast %34635 : tensor<4x?x1x64xcomplex<f32>>{%dim_32130} -> tensor<4x?x1x128xf32>{%dim_32130}
    %34637 = torch_c.from_builtin_tensor %34636 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32131 = torch.constant.int 5
    %34638 = torch.prims.convert_element_type %34637, %int5_32131 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32132 = torch.constant.int 1
    %34639 = torch.aten.size.int %34234, %int1_32132 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32133 = torch.constant.int 0
    %34640 = torch.aten.add.int %int0_32133, %34639 : !torch.int, !torch.int -> !torch.int
    %int0_32134 = torch.constant.int 0
    %int0_32135 = torch.constant.int 0
    %int1_32136 = torch.constant.int 1
    %34641 = torch.aten.slice.Tensor %34545, %int0_32134, %int0_32135, %34640, %int1_32136 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34641, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32137 = torch.constant.int 1
    %int0_32138 = torch.constant.int 0
    %int9223372036854775807_32139 = torch.constant.int 9223372036854775807
    %int1_32140 = torch.constant.int 1
    %34642 = torch.aten.slice.Tensor %34641, %int1_32137, %int0_32138, %int9223372036854775807_32139, %int1_32140 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34642, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32141 = torch.constant.int 0
    %34643 = torch.aten.unsqueeze %34642, %int0_32141 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34643, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32142 = torch.constant.int 2
    %34644 = torch.aten.unsqueeze %34643, %int2_32142 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34644, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32143 = torch.constant.int 3
    %int0_32144 = torch.constant.int 0
    %int9223372036854775807_32145 = torch.constant.int 9223372036854775807
    %int1_32146 = torch.constant.int 1
    %34645 = torch.aten.slice.Tensor %34644, %int3_32143, %int0_32144, %int9223372036854775807_32145, %int1_32146 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34645, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34646 = torch_c.to_builtin_tensor %34334 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32147 = arith.constant 1 : index
    %dim_32148 = tensor.dim %34646, %c1_32147 : tensor<4x?x1x128xf16>
    %34647 = flow.tensor.bitcast %34646 : tensor<4x?x1x128xf16>{%dim_32148} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32148}
    %34648 = torch_c.from_builtin_tensor %34647 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34649 = torch.aten.mul.Tensor %34648, %34645 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34650 = torch_c.to_builtin_tensor %34649 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32149 = arith.constant 1 : index
    %dim_32150 = tensor.dim %34650, %c1_32149 : tensor<4x?x1x64xcomplex<f32>>
    %34651 = flow.tensor.bitcast %34650 : tensor<4x?x1x64xcomplex<f32>>{%dim_32150} -> tensor<4x?x1x128xf32>{%dim_32150}
    %34652 = torch_c.from_builtin_tensor %34651 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32151 = torch.constant.int 5
    %34653 = torch.prims.convert_element_type %34652, %int5_32151 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_32152 = torch.constant.int 1
    %34654 = torch.aten.size.int %34240, %int1_32152 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_32153 = torch.constant.int 0
    %34655 = torch.aten.add.int %int0_32153, %34654 : !torch.int, !torch.int -> !torch.int
    %int0_32154 = torch.constant.int 0
    %int0_32155 = torch.constant.int 0
    %int1_32156 = torch.constant.int 1
    %34656 = torch.aten.slice.Tensor %34548, %int0_32154, %int0_32155, %34655, %int1_32156 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34656, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_32157 = torch.constant.int 1
    %int0_32158 = torch.constant.int 0
    %int9223372036854775807_32159 = torch.constant.int 9223372036854775807
    %int1_32160 = torch.constant.int 1
    %34657 = torch.aten.slice.Tensor %34656, %int1_32157, %int0_32158, %int9223372036854775807_32159, %int1_32160 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %34657, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_32161 = torch.constant.int 0
    %34658 = torch.aten.unsqueeze %34657, %int0_32161 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %34658, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_32162 = torch.constant.int 2
    %34659 = torch.aten.unsqueeze %34658, %int2_32162 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34659, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_32163 = torch.constant.int 3
    %int0_32164 = torch.constant.int 0
    %int9223372036854775807_32165 = torch.constant.int 9223372036854775807
    %int1_32166 = torch.constant.int 1
    %34660 = torch.aten.slice.Tensor %34659, %int3_32163, %int0_32164, %int9223372036854775807_32165, %int1_32166 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34660, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %34661 = torch_c.to_builtin_tensor %34336 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_32167 = arith.constant 1 : index
    %dim_32168 = tensor.dim %34661, %c1_32167 : tensor<4x?x1x128xf16>
    %34662 = flow.tensor.bitcast %34661 : tensor<4x?x1x128xf16>{%dim_32168} -> tensor<4x?x1x64xcomplex<f16>>{%dim_32168}
    %34663 = torch_c.from_builtin_tensor %34662 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %34663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %34664 = torch.aten.mul.Tensor %34663, %34660 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %34664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %34665 = torch_c.to_builtin_tensor %34664 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_32169 = arith.constant 1 : index
    %dim_32170 = tensor.dim %34665, %c1_32169 : tensor<4x?x1x64xcomplex<f32>>
    %34666 = flow.tensor.bitcast %34665 : tensor<4x?x1x64xcomplex<f32>>{%dim_32170} -> tensor<4x?x1x128xf32>{%dim_32170}
    %34667 = torch_c.from_builtin_tensor %34666 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %34667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_32171 = torch.constant.int 5
    %34668 = torch.prims.convert_element_type %34667, %int5_32171 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %34668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_32172 = torch.constant.int 64
    %34669 = torch.aten.mul.Scalar %2364, %int64_32172 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34669, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32173 = torch.constant.int 64
    %34670 = torch.aten.mul.Scalar %2367, %int64_32173 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34670, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32174 = torch.constant.int 64
    %34671 = torch.aten.mul.Scalar %2370, %int64_32174 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34671, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32175 = torch.constant.int 64
    %34672 = torch.aten.mul.Scalar %2373, %int64_32175 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34672, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32176 = torch.constant.int 64
    %34673 = torch.aten.mul.Scalar %2376, %int64_32176 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34673, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32177 = torch.constant.int 64
    %34674 = torch.aten.mul.Scalar %2379, %int64_32177 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34674, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32178 = torch.constant.int 64
    %34675 = torch.aten.mul.Scalar %2382, %int64_32178 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34675, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_32179 = torch.constant.int 64
    %34676 = torch.aten.mul.Scalar %2385, %int64_32179 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34676, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34 = torch.constant.int 34
    %int1_32180 = torch.constant.int 1
    %34677 = torch.aten.add.Scalar %34669, %int34, %int1_32180 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34677, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32181 = torch.constant.int 34
    %int1_32182 = torch.constant.int 1
    %34678 = torch.aten.add.Scalar %34670, %int34_32181, %int1_32182 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34678, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32183 = torch.constant.int 34
    %int1_32184 = torch.constant.int 1
    %34679 = torch.aten.add.Scalar %34671, %int34_32183, %int1_32184 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34679, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32185 = torch.constant.int 34
    %int1_32186 = torch.constant.int 1
    %34680 = torch.aten.add.Scalar %34672, %int34_32185, %int1_32186 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34680, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32187 = torch.constant.int 34
    %int1_32188 = torch.constant.int 1
    %34681 = torch.aten.add.Scalar %34673, %int34_32187, %int1_32188 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34681, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32189 = torch.constant.int 34
    %int1_32190 = torch.constant.int 1
    %34682 = torch.aten.add.Scalar %34674, %int34_32189, %int1_32190 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34682, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32191 = torch.constant.int 34
    %int1_32192 = torch.constant.int 1
    %34683 = torch.aten.add.Scalar %34675, %int34_32191, %int1_32192 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34683, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int34_32193 = torch.constant.int 34
    %int1_32194 = torch.constant.int 1
    %34684 = torch.aten.add.Scalar %34676, %int34_32193, %int1_32194 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34684, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_32195 = torch.constant.int 4
    %int16_32196 = torch.constant.int 16
    %int1_32197 = torch.constant.int 1
    %int128_32198 = torch.constant.int 128
    %34685 = torch.prim.ListConstruct %int4_32195, %3095, %int16_32196, %int1_32197, %int128_32198 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34686 = torch.aten.view %34563, %34685 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34686, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32199 = torch.constant.int 4
    %int16_32200 = torch.constant.int 16
    %int1_32201 = torch.constant.int 1
    %int128_32202 = torch.constant.int 128
    %34687 = torch.prim.ListConstruct %int4_32199, %3095, %int16_32200, %int1_32201, %int128_32202 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34688 = torch.aten.view %34578, %34687 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34688, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32203 = torch.constant.int 4
    %int16_32204 = torch.constant.int 16
    %int1_32205 = torch.constant.int 1
    %int128_32206 = torch.constant.int 128
    %34689 = torch.prim.ListConstruct %int4_32203, %3095, %int16_32204, %int1_32205, %int128_32206 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34690 = torch.aten.view %34593, %34689 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34690, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32207 = torch.constant.int 4
    %int16_32208 = torch.constant.int 16
    %int1_32209 = torch.constant.int 1
    %int128_32210 = torch.constant.int 128
    %34691 = torch.prim.ListConstruct %int4_32207, %3095, %int16_32208, %int1_32209, %int128_32210 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34692 = torch.aten.view %34608, %34691 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34692, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32211 = torch.constant.int 4
    %int16_32212 = torch.constant.int 16
    %int1_32213 = torch.constant.int 1
    %int128_32214 = torch.constant.int 128
    %34693 = torch.prim.ListConstruct %int4_32211, %3095, %int16_32212, %int1_32213, %int128_32214 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34694 = torch.aten.view %34623, %34693 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34694, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32215 = torch.constant.int 4
    %int16_32216 = torch.constant.int 16
    %int1_32217 = torch.constant.int 1
    %int128_32218 = torch.constant.int 128
    %34695 = torch.prim.ListConstruct %int4_32215, %3095, %int16_32216, %int1_32217, %int128_32218 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34696 = torch.aten.view %34638, %34695 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34696, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32219 = torch.constant.int 4
    %int16_32220 = torch.constant.int 16
    %int1_32221 = torch.constant.int 1
    %int128_32222 = torch.constant.int 128
    %34697 = torch.prim.ListConstruct %int4_32219, %3095, %int16_32220, %int1_32221, %int128_32222 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34698 = torch.aten.view %34653, %34697 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34698, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32223 = torch.constant.int 4
    %int16_32224 = torch.constant.int 16
    %int1_32225 = torch.constant.int 1
    %int128_32226 = torch.constant.int 128
    %34699 = torch.prim.ListConstruct %int4_32223, %3095, %int16_32224, %int1_32225, %int128_32226 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34700 = torch.aten.view %34668, %34699 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34700, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32227 = torch.constant.int 4
    %34701 = torch.aten.mul.int %int4_32227, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32228 = torch.constant.int 16
    %int1_32229 = torch.constant.int 1
    %int128_32230 = torch.constant.int 128
    %34702 = torch.prim.ListConstruct %34701, %int16_32228, %int1_32229, %int128_32230 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34703 = torch.aten.view %34686, %34702 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34703, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32231 = torch.constant.int 4
    %34704 = torch.aten.mul.int %int4_32231, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32232 = torch.constant.int 16
    %int1_32233 = torch.constant.int 1
    %int128_32234 = torch.constant.int 128
    %34705 = torch.prim.ListConstruct %34704, %int16_32232, %int1_32233, %int128_32234 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34706 = torch.aten.view %34688, %34705 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34706, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32235 = torch.constant.int 4
    %34707 = torch.aten.mul.int %int4_32235, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32236 = torch.constant.int 16
    %int1_32237 = torch.constant.int 1
    %int128_32238 = torch.constant.int 128
    %34708 = torch.prim.ListConstruct %34707, %int16_32236, %int1_32237, %int128_32238 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34709 = torch.aten.view %34690, %34708 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34709, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32239 = torch.constant.int 4
    %34710 = torch.aten.mul.int %int4_32239, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32240 = torch.constant.int 16
    %int1_32241 = torch.constant.int 1
    %int128_32242 = torch.constant.int 128
    %34711 = torch.prim.ListConstruct %34710, %int16_32240, %int1_32241, %int128_32242 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34712 = torch.aten.view %34692, %34711 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34712, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32243 = torch.constant.int 4
    %34713 = torch.aten.mul.int %int4_32243, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32244 = torch.constant.int 16
    %int1_32245 = torch.constant.int 1
    %int128_32246 = torch.constant.int 128
    %34714 = torch.prim.ListConstruct %34713, %int16_32244, %int1_32245, %int128_32246 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34715 = torch.aten.view %34694, %34714 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34715, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32247 = torch.constant.int 4
    %34716 = torch.aten.mul.int %int4_32247, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32248 = torch.constant.int 16
    %int1_32249 = torch.constant.int 1
    %int128_32250 = torch.constant.int 128
    %34717 = torch.prim.ListConstruct %34716, %int16_32248, %int1_32249, %int128_32250 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34718 = torch.aten.view %34696, %34717 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34718, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32251 = torch.constant.int 4
    %34719 = torch.aten.mul.int %int4_32251, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32252 = torch.constant.int 16
    %int1_32253 = torch.constant.int 1
    %int128_32254 = torch.constant.int 128
    %34720 = torch.prim.ListConstruct %34719, %int16_32252, %int1_32253, %int128_32254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34721 = torch.aten.view %34698, %34720 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34721, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32255 = torch.constant.int 4
    %34722 = torch.aten.mul.int %int4_32255, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32256 = torch.constant.int 16
    %int1_32257 = torch.constant.int 1
    %int128_32258 = torch.constant.int 128
    %34723 = torch.prim.ListConstruct %34722, %int16_32256, %int1_32257, %int128_32258 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34724 = torch.aten.view %34700, %34723 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34724, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32259 = torch.constant.int 4
    %34725 = torch.aten.mul.int %int4_32259, %3095 : !torch.int, !torch.int -> !torch.int
    %34726 = torch.prim.ListConstruct %34725 : (!torch.int) -> !torch.list<int>
    %34727 = torch.aten.view %34677, %34726 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34727, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32260 = torch.constant.int 4
    %34728 = torch.aten.mul.int %int4_32260, %3095 : !torch.int, !torch.int -> !torch.int
    %34729 = torch.prim.ListConstruct %34728 : (!torch.int) -> !torch.list<int>
    %34730 = torch.aten.view %34678, %34729 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34730, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32261 = torch.constant.int 4
    %34731 = torch.aten.mul.int %int4_32261, %3095 : !torch.int, !torch.int -> !torch.int
    %34732 = torch.prim.ListConstruct %34731 : (!torch.int) -> !torch.list<int>
    %34733 = torch.aten.view %34679, %34732 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34733, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32262 = torch.constant.int 4
    %34734 = torch.aten.mul.int %int4_32262, %3095 : !torch.int, !torch.int -> !torch.int
    %34735 = torch.prim.ListConstruct %34734 : (!torch.int) -> !torch.list<int>
    %34736 = torch.aten.view %34680, %34735 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34736, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32263 = torch.constant.int 4
    %34737 = torch.aten.mul.int %int4_32263, %3095 : !torch.int, !torch.int -> !torch.int
    %34738 = torch.prim.ListConstruct %34737 : (!torch.int) -> !torch.list<int>
    %34739 = torch.aten.view %34681, %34738 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34739, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32264 = torch.constant.int 4
    %34740 = torch.aten.mul.int %int4_32264, %3095 : !torch.int, !torch.int -> !torch.int
    %34741 = torch.prim.ListConstruct %34740 : (!torch.int) -> !torch.list<int>
    %34742 = torch.aten.view %34682, %34741 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34742, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32265 = torch.constant.int 4
    %34743 = torch.aten.mul.int %int4_32265, %3095 : !torch.int, !torch.int -> !torch.int
    %34744 = torch.prim.ListConstruct %34743 : (!torch.int) -> !torch.list<int>
    %34745 = torch.aten.view %34683, %34744 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34745, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32266 = torch.constant.int 4
    %34746 = torch.aten.mul.int %int4_32266, %3095 : !torch.int, !torch.int -> !torch.int
    %34747 = torch.prim.ListConstruct %34746 : (!torch.int) -> !torch.list<int>
    %34748 = torch.aten.view %34684, %34747 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34748, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32267 = torch.constant.int 4
    %int16_32268 = torch.constant.int 16
    %int1_32269 = torch.constant.int 1
    %int128_32270 = torch.constant.int 128
    %34749 = torch.prim.ListConstruct %int4_32267, %3095, %int16_32268, %int1_32269, %int128_32270 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34750 = torch.aten.view %34338, %34749 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34750, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32271 = torch.constant.int 4
    %int16_32272 = torch.constant.int 16
    %int1_32273 = torch.constant.int 1
    %int128_32274 = torch.constant.int 128
    %34751 = torch.prim.ListConstruct %int4_32271, %3095, %int16_32272, %int1_32273, %int128_32274 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34752 = torch.aten.view %34340, %34751 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34752, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32275 = torch.constant.int 4
    %int16_32276 = torch.constant.int 16
    %int1_32277 = torch.constant.int 1
    %int128_32278 = torch.constant.int 128
    %34753 = torch.prim.ListConstruct %int4_32275, %3095, %int16_32276, %int1_32277, %int128_32278 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34754 = torch.aten.view %34342, %34753 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34754, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32279 = torch.constant.int 4
    %int16_32280 = torch.constant.int 16
    %int1_32281 = torch.constant.int 1
    %int128_32282 = torch.constant.int 128
    %34755 = torch.prim.ListConstruct %int4_32279, %3095, %int16_32280, %int1_32281, %int128_32282 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34756 = torch.aten.view %34344, %34755 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34756, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32283 = torch.constant.int 4
    %int16_32284 = torch.constant.int 16
    %int1_32285 = torch.constant.int 1
    %int128_32286 = torch.constant.int 128
    %34757 = torch.prim.ListConstruct %int4_32283, %3095, %int16_32284, %int1_32285, %int128_32286 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34758 = torch.aten.view %34346, %34757 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34758, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32287 = torch.constant.int 4
    %int16_32288 = torch.constant.int 16
    %int1_32289 = torch.constant.int 1
    %int128_32290 = torch.constant.int 128
    %34759 = torch.prim.ListConstruct %int4_32287, %3095, %int16_32288, %int1_32289, %int128_32290 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34760 = torch.aten.view %34348, %34759 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34760, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32291 = torch.constant.int 4
    %int16_32292 = torch.constant.int 16
    %int1_32293 = torch.constant.int 1
    %int128_32294 = torch.constant.int 128
    %34761 = torch.prim.ListConstruct %int4_32291, %3095, %int16_32292, %int1_32293, %int128_32294 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34762 = torch.aten.view %34350, %34761 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34762, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32295 = torch.constant.int 4
    %int16_32296 = torch.constant.int 16
    %int1_32297 = torch.constant.int 1
    %int128_32298 = torch.constant.int 128
    %34763 = torch.prim.ListConstruct %int4_32295, %3095, %int16_32296, %int1_32297, %int128_32298 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34764 = torch.aten.view %34352, %34763 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %34764, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_32299 = torch.constant.int 4
    %34765 = torch.aten.mul.int %int4_32299, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32300 = torch.constant.int 16
    %int1_32301 = torch.constant.int 1
    %int128_32302 = torch.constant.int 128
    %34766 = torch.prim.ListConstruct %34765, %int16_32300, %int1_32301, %int128_32302 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34767 = torch.aten.view %34750, %34766 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34767, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32303 = torch.constant.int 4
    %34768 = torch.aten.mul.int %int4_32303, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32304 = torch.constant.int 16
    %int1_32305 = torch.constant.int 1
    %int128_32306 = torch.constant.int 128
    %34769 = torch.prim.ListConstruct %34768, %int16_32304, %int1_32305, %int128_32306 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34770 = torch.aten.view %34752, %34769 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34770, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32307 = torch.constant.int 4
    %34771 = torch.aten.mul.int %int4_32307, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32308 = torch.constant.int 16
    %int1_32309 = torch.constant.int 1
    %int128_32310 = torch.constant.int 128
    %34772 = torch.prim.ListConstruct %34771, %int16_32308, %int1_32309, %int128_32310 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34773 = torch.aten.view %34754, %34772 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34773, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32311 = torch.constant.int 4
    %34774 = torch.aten.mul.int %int4_32311, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32312 = torch.constant.int 16
    %int1_32313 = torch.constant.int 1
    %int128_32314 = torch.constant.int 128
    %34775 = torch.prim.ListConstruct %34774, %int16_32312, %int1_32313, %int128_32314 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34776 = torch.aten.view %34756, %34775 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34776, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32315 = torch.constant.int 4
    %34777 = torch.aten.mul.int %int4_32315, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32316 = torch.constant.int 16
    %int1_32317 = torch.constant.int 1
    %int128_32318 = torch.constant.int 128
    %34778 = torch.prim.ListConstruct %34777, %int16_32316, %int1_32317, %int128_32318 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34779 = torch.aten.view %34758, %34778 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34779, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32319 = torch.constant.int 4
    %34780 = torch.aten.mul.int %int4_32319, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32320 = torch.constant.int 16
    %int1_32321 = torch.constant.int 1
    %int128_32322 = torch.constant.int 128
    %34781 = torch.prim.ListConstruct %34780, %int16_32320, %int1_32321, %int128_32322 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34782 = torch.aten.view %34760, %34781 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34782, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32323 = torch.constant.int 4
    %34783 = torch.aten.mul.int %int4_32323, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32324 = torch.constant.int 16
    %int1_32325 = torch.constant.int 1
    %int128_32326 = torch.constant.int 128
    %34784 = torch.prim.ListConstruct %34783, %int16_32324, %int1_32325, %int128_32326 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34785 = torch.aten.view %34762, %34784 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34785, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_32327 = torch.constant.int 4
    %34786 = torch.aten.mul.int %int4_32327, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_32328 = torch.constant.int 16
    %int1_32329 = torch.constant.int 1
    %int128_32330 = torch.constant.int 128
    %34787 = torch.prim.ListConstruct %34786, %int16_32328, %int1_32329, %int128_32330 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34788 = torch.aten.view %34764, %34787 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34788, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_32331 = torch.constant.int 1
    %int1_32332 = torch.constant.int 1
    %34789 = torch.aten.add.Scalar %34677, %int1_32331, %int1_32332 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34789, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32333 = torch.constant.int 1
    %int1_32334 = torch.constant.int 1
    %34790 = torch.aten.add.Scalar %34678, %int1_32333, %int1_32334 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34790, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32335 = torch.constant.int 1
    %int1_32336 = torch.constant.int 1
    %34791 = torch.aten.add.Scalar %34679, %int1_32335, %int1_32336 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34791, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32337 = torch.constant.int 1
    %int1_32338 = torch.constant.int 1
    %34792 = torch.aten.add.Scalar %34680, %int1_32337, %int1_32338 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34792, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32339 = torch.constant.int 1
    %int1_32340 = torch.constant.int 1
    %34793 = torch.aten.add.Scalar %34681, %int1_32339, %int1_32340 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34793, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32341 = torch.constant.int 1
    %int1_32342 = torch.constant.int 1
    %34794 = torch.aten.add.Scalar %34682, %int1_32341, %int1_32342 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34794, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32343 = torch.constant.int 1
    %int1_32344 = torch.constant.int 1
    %34795 = torch.aten.add.Scalar %34683, %int1_32343, %int1_32344 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34795, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_32345 = torch.constant.int 1
    %int1_32346 = torch.constant.int 1
    %34796 = torch.aten.add.Scalar %34684, %int1_32345, %int1_32346 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %34796, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_32347 = torch.constant.int 4
    %34797 = torch.aten.mul.int %int4_32347, %3095 : !torch.int, !torch.int -> !torch.int
    %34798 = torch.prim.ListConstruct %34797 : (!torch.int) -> !torch.list<int>
    %34799 = torch.aten.view %34789, %34798 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34799, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32348 = torch.constant.int 4
    %34800 = torch.aten.mul.int %int4_32348, %3095 : !torch.int, !torch.int -> !torch.int
    %34801 = torch.prim.ListConstruct %34800 : (!torch.int) -> !torch.list<int>
    %34802 = torch.aten.view %34790, %34801 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34802, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32349 = torch.constant.int 4
    %34803 = torch.aten.mul.int %int4_32349, %3095 : !torch.int, !torch.int -> !torch.int
    %34804 = torch.prim.ListConstruct %34803 : (!torch.int) -> !torch.list<int>
    %34805 = torch.aten.view %34791, %34804 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34805, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32350 = torch.constant.int 4
    %34806 = torch.aten.mul.int %int4_32350, %3095 : !torch.int, !torch.int -> !torch.int
    %34807 = torch.prim.ListConstruct %34806 : (!torch.int) -> !torch.list<int>
    %34808 = torch.aten.view %34792, %34807 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34808, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32351 = torch.constant.int 4
    %34809 = torch.aten.mul.int %int4_32351, %3095 : !torch.int, !torch.int -> !torch.int
    %34810 = torch.prim.ListConstruct %34809 : (!torch.int) -> !torch.list<int>
    %34811 = torch.aten.view %34793, %34810 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34811, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32352 = torch.constant.int 4
    %34812 = torch.aten.mul.int %int4_32352, %3095 : !torch.int, !torch.int -> !torch.int
    %34813 = torch.prim.ListConstruct %34812 : (!torch.int) -> !torch.list<int>
    %34814 = torch.aten.view %34794, %34813 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34814, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32353 = torch.constant.int 4
    %34815 = torch.aten.mul.int %int4_32353, %3095 : !torch.int, !torch.int -> !torch.int
    %34816 = torch.prim.ListConstruct %34815 : (!torch.int) -> !torch.list<int>
    %34817 = torch.aten.view %34795, %34816 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34817, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_32354 = torch.constant.int 4
    %34818 = torch.aten.mul.int %int4_32354, %3095 : !torch.int, !torch.int -> !torch.int
    %34819 = torch.prim.ListConstruct %34818 : (!torch.int) -> !torch.list<int>
    %34820 = torch.aten.view %34796, %34819 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34820, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %34821 = torch.prim.ListConstruct %34727, %34799 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32355 = torch.constant.int 0
    %34822 = torch.aten.cat %34821, %int0_32355 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34822, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34823 = torch.prim.ListConstruct %34730, %34802 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32356 = torch.constant.int 0
    %34824 = torch.aten.cat %34823, %int0_32356 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34824, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34825 = torch.prim.ListConstruct %34733, %34805 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32357 = torch.constant.int 0
    %34826 = torch.aten.cat %34825, %int0_32357 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34826, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34827 = torch.prim.ListConstruct %34736, %34808 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32358 = torch.constant.int 0
    %34828 = torch.aten.cat %34827, %int0_32358 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34828, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34829 = torch.prim.ListConstruct %34739, %34811 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32359 = torch.constant.int 0
    %34830 = torch.aten.cat %34829, %int0_32359 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34830, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34831 = torch.prim.ListConstruct %34742, %34814 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32360 = torch.constant.int 0
    %34832 = torch.aten.cat %34831, %int0_32360 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34832, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34833 = torch.prim.ListConstruct %34745, %34817 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32361 = torch.constant.int 0
    %34834 = torch.aten.cat %34833, %int0_32361 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34834, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34835 = torch.prim.ListConstruct %34748, %34820 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_32362 = torch.constant.int 0
    %34836 = torch.aten.cat %34835, %int0_32362 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %34836, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %34837 = torch.prim.ListConstruct %34703, %34767 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32363 = torch.constant.int 0
    %34838 = torch.aten.cat %34837, %int0_32363 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34838, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34839 = torch.prim.ListConstruct %34706, %34770 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32364 = torch.constant.int 0
    %34840 = torch.aten.cat %34839, %int0_32364 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34840, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34841 = torch.prim.ListConstruct %34709, %34773 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32365 = torch.constant.int 0
    %34842 = torch.aten.cat %34841, %int0_32365 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34842, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34843 = torch.prim.ListConstruct %34712, %34776 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32366 = torch.constant.int 0
    %34844 = torch.aten.cat %34843, %int0_32366 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34844, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34845 = torch.prim.ListConstruct %34715, %34779 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32367 = torch.constant.int 0
    %34846 = torch.aten.cat %34845, %int0_32367 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34846, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34847 = torch.prim.ListConstruct %34718, %34782 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32368 = torch.constant.int 0
    %34848 = torch.aten.cat %34847, %int0_32368 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34848, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34849 = torch.prim.ListConstruct %34721, %34785 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32369 = torch.constant.int 0
    %34850 = torch.aten.cat %34849, %int0_32369 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34850, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34851 = torch.prim.ListConstruct %34724, %34788 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_32370 = torch.constant.int 0
    %34852 = torch.aten.cat %34851, %int0_32370 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34852, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32371 = torch.constant.int 32
    %int2_32372 = torch.constant.int 2
    %int16_32373 = torch.constant.int 16
    %int1_32374 = torch.constant.int 1
    %int128_32375 = torch.constant.int 128
    %34853 = torch.prim.ListConstruct %3023, %int32_32371, %int2_32372, %int16_32373, %int1_32374, %int128_32375 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34854 = torch.aten.view %33003, %34853 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34854, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32376 = torch.constant.int 32
    %34855 = torch.aten.mul.int %3023, %int32_32376 : !torch.int, !torch.int -> !torch.int
    %int2_32377 = torch.constant.int 2
    %34856 = torch.aten.mul.int %34855, %int2_32377 : !torch.int, !torch.int -> !torch.int
    %int16_32378 = torch.constant.int 16
    %int1_32379 = torch.constant.int 1
    %int128_32380 = torch.constant.int 128
    %34857 = torch.prim.ListConstruct %34856, %int16_32378, %int1_32379, %int128_32380 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34858 = torch.aten.view %34854, %34857 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34858, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34859 = torch.prim.ListConstruct %34822 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32381 = torch.constant.bool false
    %34860 = torch.aten.index_put %34858, %34859, %34838, %false_32381 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34860, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32382 = torch.constant.int 32
    %int2_32383 = torch.constant.int 2
    %int16_32384 = torch.constant.int 16
    %int1_32385 = torch.constant.int 1
    %int128_32386 = torch.constant.int 128
    %34861 = torch.prim.ListConstruct %3023, %int32_32382, %int2_32383, %int16_32384, %int1_32385, %int128_32386 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34862 = torch.aten.view %34860, %34861 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34862, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32387 = torch.constant.int 131072
    %34863 = torch.prim.ListConstruct %3023, %int131072_32387 : (!torch.int, !torch.int) -> !torch.list<int>
    %34864 = torch.aten.view %34862, %34863 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34864, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32388 = torch.constant.int 32
    %int2_32389 = torch.constant.int 2
    %int16_32390 = torch.constant.int 16
    %int1_32391 = torch.constant.int 1
    %int128_32392 = torch.constant.int 128
    %34865 = torch.prim.ListConstruct %3026, %int32_32388, %int2_32389, %int16_32390, %int1_32391, %int128_32392 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34866 = torch.aten.view %33015, %34865 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34866, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32393 = torch.constant.int 32
    %34867 = torch.aten.mul.int %3026, %int32_32393 : !torch.int, !torch.int -> !torch.int
    %int2_32394 = torch.constant.int 2
    %34868 = torch.aten.mul.int %34867, %int2_32394 : !torch.int, !torch.int -> !torch.int
    %int16_32395 = torch.constant.int 16
    %int1_32396 = torch.constant.int 1
    %int128_32397 = torch.constant.int 128
    %34869 = torch.prim.ListConstruct %34868, %int16_32395, %int1_32396, %int128_32397 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34870 = torch.aten.view %34866, %34869 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34870, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34871 = torch.prim.ListConstruct %34824 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32398 = torch.constant.bool false
    %34872 = torch.aten.index_put %34870, %34871, %34840, %false_32398 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34872, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32399 = torch.constant.int 32
    %int2_32400 = torch.constant.int 2
    %int16_32401 = torch.constant.int 16
    %int1_32402 = torch.constant.int 1
    %int128_32403 = torch.constant.int 128
    %34873 = torch.prim.ListConstruct %3026, %int32_32399, %int2_32400, %int16_32401, %int1_32402, %int128_32403 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34874 = torch.aten.view %34872, %34873 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34874, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32404 = torch.constant.int 131072
    %34875 = torch.prim.ListConstruct %3026, %int131072_32404 : (!torch.int, !torch.int) -> !torch.list<int>
    %34876 = torch.aten.view %34874, %34875 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34876, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32405 = torch.constant.int 32
    %int2_32406 = torch.constant.int 2
    %int16_32407 = torch.constant.int 16
    %int1_32408 = torch.constant.int 1
    %int128_32409 = torch.constant.int 128
    %34877 = torch.prim.ListConstruct %3029, %int32_32405, %int2_32406, %int16_32407, %int1_32408, %int128_32409 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34878 = torch.aten.view %33027, %34877 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34878, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32410 = torch.constant.int 32
    %34879 = torch.aten.mul.int %3029, %int32_32410 : !torch.int, !torch.int -> !torch.int
    %int2_32411 = torch.constant.int 2
    %34880 = torch.aten.mul.int %34879, %int2_32411 : !torch.int, !torch.int -> !torch.int
    %int16_32412 = torch.constant.int 16
    %int1_32413 = torch.constant.int 1
    %int128_32414 = torch.constant.int 128
    %34881 = torch.prim.ListConstruct %34880, %int16_32412, %int1_32413, %int128_32414 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34882 = torch.aten.view %34878, %34881 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34882, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34883 = torch.prim.ListConstruct %34826 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32415 = torch.constant.bool false
    %34884 = torch.aten.index_put %34882, %34883, %34842, %false_32415 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34884, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32416 = torch.constant.int 32
    %int2_32417 = torch.constant.int 2
    %int16_32418 = torch.constant.int 16
    %int1_32419 = torch.constant.int 1
    %int128_32420 = torch.constant.int 128
    %34885 = torch.prim.ListConstruct %3029, %int32_32416, %int2_32417, %int16_32418, %int1_32419, %int128_32420 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34886 = torch.aten.view %34884, %34885 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34886, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32421 = torch.constant.int 131072
    %34887 = torch.prim.ListConstruct %3029, %int131072_32421 : (!torch.int, !torch.int) -> !torch.list<int>
    %34888 = torch.aten.view %34886, %34887 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34888, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32422 = torch.constant.int 32
    %int2_32423 = torch.constant.int 2
    %int16_32424 = torch.constant.int 16
    %int1_32425 = torch.constant.int 1
    %int128_32426 = torch.constant.int 128
    %34889 = torch.prim.ListConstruct %3032, %int32_32422, %int2_32423, %int16_32424, %int1_32425, %int128_32426 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34890 = torch.aten.view %33039, %34889 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34890, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32427 = torch.constant.int 32
    %34891 = torch.aten.mul.int %3032, %int32_32427 : !torch.int, !torch.int -> !torch.int
    %int2_32428 = torch.constant.int 2
    %34892 = torch.aten.mul.int %34891, %int2_32428 : !torch.int, !torch.int -> !torch.int
    %int16_32429 = torch.constant.int 16
    %int1_32430 = torch.constant.int 1
    %int128_32431 = torch.constant.int 128
    %34893 = torch.prim.ListConstruct %34892, %int16_32429, %int1_32430, %int128_32431 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34894 = torch.aten.view %34890, %34893 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34894, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34895 = torch.prim.ListConstruct %34828 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32432 = torch.constant.bool false
    %34896 = torch.aten.index_put %34894, %34895, %34844, %false_32432 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34896, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32433 = torch.constant.int 32
    %int2_32434 = torch.constant.int 2
    %int16_32435 = torch.constant.int 16
    %int1_32436 = torch.constant.int 1
    %int128_32437 = torch.constant.int 128
    %34897 = torch.prim.ListConstruct %3032, %int32_32433, %int2_32434, %int16_32435, %int1_32436, %int128_32437 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34898 = torch.aten.view %34896, %34897 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34898, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32438 = torch.constant.int 131072
    %34899 = torch.prim.ListConstruct %3032, %int131072_32438 : (!torch.int, !torch.int) -> !torch.list<int>
    %34900 = torch.aten.view %34898, %34899 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34900, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32439 = torch.constant.int 32
    %int2_32440 = torch.constant.int 2
    %int16_32441 = torch.constant.int 16
    %int1_32442 = torch.constant.int 1
    %int128_32443 = torch.constant.int 128
    %34901 = torch.prim.ListConstruct %3035, %int32_32439, %int2_32440, %int16_32441, %int1_32442, %int128_32443 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34902 = torch.aten.view %33051, %34901 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34902, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32444 = torch.constant.int 32
    %34903 = torch.aten.mul.int %3035, %int32_32444 : !torch.int, !torch.int -> !torch.int
    %int2_32445 = torch.constant.int 2
    %34904 = torch.aten.mul.int %34903, %int2_32445 : !torch.int, !torch.int -> !torch.int
    %int16_32446 = torch.constant.int 16
    %int1_32447 = torch.constant.int 1
    %int128_32448 = torch.constant.int 128
    %34905 = torch.prim.ListConstruct %34904, %int16_32446, %int1_32447, %int128_32448 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34906 = torch.aten.view %34902, %34905 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34906, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34907 = torch.prim.ListConstruct %34830 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32449 = torch.constant.bool false
    %34908 = torch.aten.index_put %34906, %34907, %34846, %false_32449 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34908, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32450 = torch.constant.int 32
    %int2_32451 = torch.constant.int 2
    %int16_32452 = torch.constant.int 16
    %int1_32453 = torch.constant.int 1
    %int128_32454 = torch.constant.int 128
    %34909 = torch.prim.ListConstruct %3035, %int32_32450, %int2_32451, %int16_32452, %int1_32453, %int128_32454 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34910 = torch.aten.view %34908, %34909 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34910, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32455 = torch.constant.int 131072
    %34911 = torch.prim.ListConstruct %3035, %int131072_32455 : (!torch.int, !torch.int) -> !torch.list<int>
    %34912 = torch.aten.view %34910, %34911 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34912, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32456 = torch.constant.int 32
    %int2_32457 = torch.constant.int 2
    %int16_32458 = torch.constant.int 16
    %int1_32459 = torch.constant.int 1
    %int128_32460 = torch.constant.int 128
    %34913 = torch.prim.ListConstruct %3038, %int32_32456, %int2_32457, %int16_32458, %int1_32459, %int128_32460 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34914 = torch.aten.view %33063, %34913 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34914, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32461 = torch.constant.int 32
    %34915 = torch.aten.mul.int %3038, %int32_32461 : !torch.int, !torch.int -> !torch.int
    %int2_32462 = torch.constant.int 2
    %34916 = torch.aten.mul.int %34915, %int2_32462 : !torch.int, !torch.int -> !torch.int
    %int16_32463 = torch.constant.int 16
    %int1_32464 = torch.constant.int 1
    %int128_32465 = torch.constant.int 128
    %34917 = torch.prim.ListConstruct %34916, %int16_32463, %int1_32464, %int128_32465 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34918 = torch.aten.view %34914, %34917 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34918, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34919 = torch.prim.ListConstruct %34832 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32466 = torch.constant.bool false
    %34920 = torch.aten.index_put %34918, %34919, %34848, %false_32466 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34920, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32467 = torch.constant.int 32
    %int2_32468 = torch.constant.int 2
    %int16_32469 = torch.constant.int 16
    %int1_32470 = torch.constant.int 1
    %int128_32471 = torch.constant.int 128
    %34921 = torch.prim.ListConstruct %3038, %int32_32467, %int2_32468, %int16_32469, %int1_32470, %int128_32471 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34922 = torch.aten.view %34920, %34921 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34922, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32472 = torch.constant.int 131072
    %34923 = torch.prim.ListConstruct %3038, %int131072_32472 : (!torch.int, !torch.int) -> !torch.list<int>
    %34924 = torch.aten.view %34922, %34923 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34924, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32473 = torch.constant.int 32
    %int2_32474 = torch.constant.int 2
    %int16_32475 = torch.constant.int 16
    %int1_32476 = torch.constant.int 1
    %int128_32477 = torch.constant.int 128
    %34925 = torch.prim.ListConstruct %3041, %int32_32473, %int2_32474, %int16_32475, %int1_32476, %int128_32477 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34926 = torch.aten.view %33075, %34925 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34926, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32478 = torch.constant.int 32
    %34927 = torch.aten.mul.int %3041, %int32_32478 : !torch.int, !torch.int -> !torch.int
    %int2_32479 = torch.constant.int 2
    %34928 = torch.aten.mul.int %34927, %int2_32479 : !torch.int, !torch.int -> !torch.int
    %int16_32480 = torch.constant.int 16
    %int1_32481 = torch.constant.int 1
    %int128_32482 = torch.constant.int 128
    %34929 = torch.prim.ListConstruct %34928, %int16_32480, %int1_32481, %int128_32482 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34930 = torch.aten.view %34926, %34929 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34930, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34931 = torch.prim.ListConstruct %34834 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32483 = torch.constant.bool false
    %34932 = torch.aten.index_put %34930, %34931, %34850, %false_32483 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34932, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32484 = torch.constant.int 32
    %int2_32485 = torch.constant.int 2
    %int16_32486 = torch.constant.int 16
    %int1_32487 = torch.constant.int 1
    %int128_32488 = torch.constant.int 128
    %34933 = torch.prim.ListConstruct %3041, %int32_32484, %int2_32485, %int16_32486, %int1_32487, %int128_32488 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34934 = torch.aten.view %34932, %34933 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34934, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32489 = torch.constant.int 131072
    %34935 = torch.prim.ListConstruct %3041, %int131072_32489 : (!torch.int, !torch.int) -> !torch.list<int>
    %34936 = torch.aten.view %34934, %34935 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34936, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_32490 = torch.constant.int 32
    %int2_32491 = torch.constant.int 2
    %int16_32492 = torch.constant.int 16
    %int1_32493 = torch.constant.int 1
    %int128_32494 = torch.constant.int 128
    %34937 = torch.prim.ListConstruct %3044, %int32_32490, %int2_32491, %int16_32492, %int1_32493, %int128_32494 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34938 = torch.aten.view %33087, %34937 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34938, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_32495 = torch.constant.int 32
    %34939 = torch.aten.mul.int %3044, %int32_32495 : !torch.int, !torch.int -> !torch.int
    %int2_32496 = torch.constant.int 2
    %34940 = torch.aten.mul.int %34939, %int2_32496 : !torch.int, !torch.int -> !torch.int
    %int16_32497 = torch.constant.int 16
    %int1_32498 = torch.constant.int 1
    %int128_32499 = torch.constant.int 128
    %34941 = torch.prim.ListConstruct %34940, %int16_32497, %int1_32498, %int128_32499 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34942 = torch.aten.view %34938, %34941 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34942, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %34943 = torch.prim.ListConstruct %34836 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_32500 = torch.constant.bool false
    %34944 = torch.aten.index_put %34942, %34943, %34852, %false_32500 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %34944, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_32501 = torch.constant.int 32
    %int2_32502 = torch.constant.int 2
    %int16_32503 = torch.constant.int 16
    %int1_32504 = torch.constant.int 1
    %int128_32505 = torch.constant.int 128
    %34945 = torch.prim.ListConstruct %3044, %int32_32501, %int2_32502, %int16_32503, %int1_32504, %int128_32505 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34946 = torch.aten.view %34944, %34945 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %34946, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_32506 = torch.constant.int 131072
    %34947 = torch.prim.ListConstruct %3044, %int131072_32506 : (!torch.int, !torch.int) -> !torch.list<int>
    %34948 = torch.aten.view %34946, %34947 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %34948, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_32507 = torch.constant.int -2
    %34949 = torch.aten.unsqueeze %34563, %int-2_32507 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32508 = torch.constant.int -2
    %34950 = torch.aten.unsqueeze %34578, %int-2_32508 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32509 = torch.constant.int -2
    %34951 = torch.aten.unsqueeze %34593, %int-2_32509 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32510 = torch.constant.int -2
    %34952 = torch.aten.unsqueeze %34608, %int-2_32510 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32511 = torch.constant.int -2
    %34953 = torch.aten.unsqueeze %34623, %int-2_32511 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32512 = torch.constant.int -2
    %34954 = torch.aten.unsqueeze %34638, %int-2_32512 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32513 = torch.constant.int -2
    %34955 = torch.aten.unsqueeze %34653, %int-2_32513 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32514 = torch.constant.int -2
    %34956 = torch.aten.unsqueeze %34668, %int-2_32514 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_32515 = torch.constant.int 4
    %int1_32516 = torch.constant.int 1
    %int4_32517 = torch.constant.int 4
    %int128_32518 = torch.constant.int 128
    %34957 = torch.prim.ListConstruct %int4_32515, %34549, %int1_32516, %int4_32517, %int128_32518 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32519 = torch.constant.bool false
    %34958 = torch.aten.expand %34949, %34957, %false_32519 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32520 = torch.constant.int 4
    %int1_32521 = torch.constant.int 1
    %int4_32522 = torch.constant.int 4
    %int128_32523 = torch.constant.int 128
    %34959 = torch.prim.ListConstruct %int4_32520, %34549, %int1_32521, %int4_32522, %int128_32523 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32524 = torch.constant.bool false
    %34960 = torch.aten.expand %34950, %34959, %false_32524 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32525 = torch.constant.int 4
    %int1_32526 = torch.constant.int 1
    %int4_32527 = torch.constant.int 4
    %int128_32528 = torch.constant.int 128
    %34961 = torch.prim.ListConstruct %int4_32525, %34549, %int1_32526, %int4_32527, %int128_32528 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32529 = torch.constant.bool false
    %34962 = torch.aten.expand %34951, %34961, %false_32529 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32530 = torch.constant.int 4
    %int1_32531 = torch.constant.int 1
    %int4_32532 = torch.constant.int 4
    %int128_32533 = torch.constant.int 128
    %34963 = torch.prim.ListConstruct %int4_32530, %34549, %int1_32531, %int4_32532, %int128_32533 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32534 = torch.constant.bool false
    %34964 = torch.aten.expand %34952, %34963, %false_32534 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32535 = torch.constant.int 4
    %int1_32536 = torch.constant.int 1
    %int4_32537 = torch.constant.int 4
    %int128_32538 = torch.constant.int 128
    %34965 = torch.prim.ListConstruct %int4_32535, %34549, %int1_32536, %int4_32537, %int128_32538 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32539 = torch.constant.bool false
    %34966 = torch.aten.expand %34953, %34965, %false_32539 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32540 = torch.constant.int 4
    %int1_32541 = torch.constant.int 1
    %int4_32542 = torch.constant.int 4
    %int128_32543 = torch.constant.int 128
    %34967 = torch.prim.ListConstruct %int4_32540, %34549, %int1_32541, %int4_32542, %int128_32543 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32544 = torch.constant.bool false
    %34968 = torch.aten.expand %34954, %34967, %false_32544 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32545 = torch.constant.int 4
    %int1_32546 = torch.constant.int 1
    %int4_32547 = torch.constant.int 4
    %int128_32548 = torch.constant.int 128
    %34969 = torch.prim.ListConstruct %int4_32545, %34549, %int1_32546, %int4_32547, %int128_32548 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32549 = torch.constant.bool false
    %34970 = torch.aten.expand %34955, %34969, %false_32549 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32550 = torch.constant.int 4
    %int1_32551 = torch.constant.int 1
    %int4_32552 = torch.constant.int 4
    %int128_32553 = torch.constant.int 128
    %34971 = torch.prim.ListConstruct %int4_32550, %34549, %int1_32551, %int4_32552, %int128_32553 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32554 = torch.constant.bool false
    %34972 = torch.aten.expand %34956, %34971, %false_32554 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32555 = torch.constant.int 4
    %int4_32556 = torch.constant.int 4
    %int128_32557 = torch.constant.int 128
    %34973 = torch.prim.ListConstruct %int4_32555, %34549, %int4_32556, %int128_32557 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34974 = torch.aten.view %34958, %34973 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32558 = torch.constant.int 4
    %int4_32559 = torch.constant.int 4
    %int128_32560 = torch.constant.int 128
    %34975 = torch.prim.ListConstruct %int4_32558, %34549, %int4_32559, %int128_32560 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34976 = torch.aten.view %34960, %34975 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32561 = torch.constant.int 4
    %int4_32562 = torch.constant.int 4
    %int128_32563 = torch.constant.int 128
    %34977 = torch.prim.ListConstruct %int4_32561, %34549, %int4_32562, %int128_32563 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34978 = torch.aten.view %34962, %34977 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32564 = torch.constant.int 4
    %int4_32565 = torch.constant.int 4
    %int128_32566 = torch.constant.int 128
    %34979 = torch.prim.ListConstruct %int4_32564, %34549, %int4_32565, %int128_32566 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34980 = torch.aten.view %34964, %34979 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32567 = torch.constant.int 4
    %int4_32568 = torch.constant.int 4
    %int128_32569 = torch.constant.int 128
    %34981 = torch.prim.ListConstruct %int4_32567, %34549, %int4_32568, %int128_32569 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34982 = torch.aten.view %34966, %34981 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32570 = torch.constant.int 4
    %int4_32571 = torch.constant.int 4
    %int128_32572 = torch.constant.int 128
    %34983 = torch.prim.ListConstruct %int4_32570, %34549, %int4_32571, %int128_32572 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34984 = torch.aten.view %34968, %34983 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32573 = torch.constant.int 4
    %int4_32574 = torch.constant.int 4
    %int128_32575 = torch.constant.int 128
    %34985 = torch.prim.ListConstruct %int4_32573, %34549, %int4_32574, %int128_32575 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34986 = torch.aten.view %34970, %34985 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32576 = torch.constant.int 4
    %int4_32577 = torch.constant.int 4
    %int128_32578 = torch.constant.int 128
    %34987 = torch.prim.ListConstruct %int4_32576, %34549, %int4_32577, %int128_32578 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %34988 = torch.aten.view %34972, %34987 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %34988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_32579 = torch.constant.int -2
    %34989 = torch.aten.unsqueeze %34338, %int-2_32579 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32580 = torch.constant.int -2
    %34990 = torch.aten.unsqueeze %34340, %int-2_32580 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32581 = torch.constant.int -2
    %34991 = torch.aten.unsqueeze %34342, %int-2_32581 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32582 = torch.constant.int -2
    %34992 = torch.aten.unsqueeze %34344, %int-2_32582 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32583 = torch.constant.int -2
    %34993 = torch.aten.unsqueeze %34346, %int-2_32583 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32584 = torch.constant.int -2
    %34994 = torch.aten.unsqueeze %34348, %int-2_32584 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32585 = torch.constant.int -2
    %34995 = torch.aten.unsqueeze %34350, %int-2_32585 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_32586 = torch.constant.int -2
    %34996 = torch.aten.unsqueeze %34352, %int-2_32586 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %34996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_32587 = torch.constant.int 1
    %34997 = torch.aten.size.int %34262, %int1_32587 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_32588 = torch.constant.int 4
    %int1_32589 = torch.constant.int 1
    %int4_32590 = torch.constant.int 4
    %int128_32591 = torch.constant.int 128
    %34998 = torch.prim.ListConstruct %int4_32588, %34997, %int1_32589, %int4_32590, %int128_32591 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32592 = torch.constant.bool false
    %34999 = torch.aten.expand %34989, %34998, %false_32592 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %34999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32593 = torch.constant.int 4
    %int1_32594 = torch.constant.int 1
    %int4_32595 = torch.constant.int 4
    %int128_32596 = torch.constant.int 128
    %35000 = torch.prim.ListConstruct %int4_32593, %34997, %int1_32594, %int4_32595, %int128_32596 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32597 = torch.constant.bool false
    %35001 = torch.aten.expand %34990, %35000, %false_32597 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32598 = torch.constant.int 4
    %int1_32599 = torch.constant.int 1
    %int4_32600 = torch.constant.int 4
    %int128_32601 = torch.constant.int 128
    %35002 = torch.prim.ListConstruct %int4_32598, %34997, %int1_32599, %int4_32600, %int128_32601 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32602 = torch.constant.bool false
    %35003 = torch.aten.expand %34991, %35002, %false_32602 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32603 = torch.constant.int 4
    %int1_32604 = torch.constant.int 1
    %int4_32605 = torch.constant.int 4
    %int128_32606 = torch.constant.int 128
    %35004 = torch.prim.ListConstruct %int4_32603, %34997, %int1_32604, %int4_32605, %int128_32606 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32607 = torch.constant.bool false
    %35005 = torch.aten.expand %34992, %35004, %false_32607 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32608 = torch.constant.int 4
    %int1_32609 = torch.constant.int 1
    %int4_32610 = torch.constant.int 4
    %int128_32611 = torch.constant.int 128
    %35006 = torch.prim.ListConstruct %int4_32608, %34997, %int1_32609, %int4_32610, %int128_32611 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32612 = torch.constant.bool false
    %35007 = torch.aten.expand %34993, %35006, %false_32612 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32613 = torch.constant.int 4
    %int1_32614 = torch.constant.int 1
    %int4_32615 = torch.constant.int 4
    %int128_32616 = torch.constant.int 128
    %35008 = torch.prim.ListConstruct %int4_32613, %34997, %int1_32614, %int4_32615, %int128_32616 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32617 = torch.constant.bool false
    %35009 = torch.aten.expand %34994, %35008, %false_32617 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32618 = torch.constant.int 4
    %int1_32619 = torch.constant.int 1
    %int4_32620 = torch.constant.int 4
    %int128_32621 = torch.constant.int 128
    %35010 = torch.prim.ListConstruct %int4_32618, %34997, %int1_32619, %int4_32620, %int128_32621 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32622 = torch.constant.bool false
    %35011 = torch.aten.expand %34995, %35010, %false_32622 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32623 = torch.constant.int 4
    %int1_32624 = torch.constant.int 1
    %int4_32625 = torch.constant.int 4
    %int128_32626 = torch.constant.int 128
    %35012 = torch.prim.ListConstruct %int4_32623, %34997, %int1_32624, %int4_32625, %int128_32626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_32627 = torch.constant.bool false
    %35013 = torch.aten.expand %34996, %35012, %false_32627 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %35013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_32628 = torch.constant.int 4
    %int4_32629 = torch.constant.int 4
    %int128_32630 = torch.constant.int 128
    %35014 = torch.prim.ListConstruct %int4_32628, %34997, %int4_32629, %int128_32630 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35015 = torch.aten.view %34999, %35014 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32631 = torch.constant.int 4
    %int4_32632 = torch.constant.int 4
    %int128_32633 = torch.constant.int 128
    %35016 = torch.prim.ListConstruct %int4_32631, %34997, %int4_32632, %int128_32633 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35017 = torch.aten.view %35001, %35016 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32634 = torch.constant.int 4
    %int4_32635 = torch.constant.int 4
    %int128_32636 = torch.constant.int 128
    %35018 = torch.prim.ListConstruct %int4_32634, %34997, %int4_32635, %int128_32636 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35019 = torch.aten.view %35003, %35018 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32637 = torch.constant.int 4
    %int4_32638 = torch.constant.int 4
    %int128_32639 = torch.constant.int 128
    %35020 = torch.prim.ListConstruct %int4_32637, %34997, %int4_32638, %int128_32639 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35021 = torch.aten.view %35005, %35020 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32640 = torch.constant.int 4
    %int4_32641 = torch.constant.int 4
    %int128_32642 = torch.constant.int 128
    %35022 = torch.prim.ListConstruct %int4_32640, %34997, %int4_32641, %int128_32642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35023 = torch.aten.view %35007, %35022 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32643 = torch.constant.int 4
    %int4_32644 = torch.constant.int 4
    %int128_32645 = torch.constant.int 128
    %35024 = torch.prim.ListConstruct %int4_32643, %34997, %int4_32644, %int128_32645 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35025 = torch.aten.view %35009, %35024 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32646 = torch.constant.int 4
    %int4_32647 = torch.constant.int 4
    %int128_32648 = torch.constant.int 128
    %35026 = torch.prim.ListConstruct %int4_32646, %34997, %int4_32647, %int128_32648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35027 = torch.aten.view %35011, %35026 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32649 = torch.constant.int 4
    %int4_32650 = torch.constant.int 4
    %int128_32651 = torch.constant.int 128
    %35028 = torch.prim.ListConstruct %int4_32649, %34997, %int4_32650, %int128_32651 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35029 = torch.aten.view %35013, %35028 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32652 = torch.constant.int 1
    %int2_32653 = torch.constant.int 2
    %35030 = torch.aten.transpose.int %34405, %int1_32652, %int2_32653 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35030, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32654 = torch.constant.int 1
    %int2_32655 = torch.constant.int 2
    %35031 = torch.aten.transpose.int %34420, %int1_32654, %int2_32655 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35031, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32656 = torch.constant.int 1
    %int2_32657 = torch.constant.int 2
    %35032 = torch.aten.transpose.int %34435, %int1_32656, %int2_32657 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35032, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32658 = torch.constant.int 1
    %int2_32659 = torch.constant.int 2
    %35033 = torch.aten.transpose.int %34450, %int1_32658, %int2_32659 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35033, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32660 = torch.constant.int 1
    %int2_32661 = torch.constant.int 2
    %35034 = torch.aten.transpose.int %34465, %int1_32660, %int2_32661 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35034, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32662 = torch.constant.int 1
    %int2_32663 = torch.constant.int 2
    %35035 = torch.aten.transpose.int %34480, %int1_32662, %int2_32663 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35035, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32664 = torch.constant.int 1
    %int2_32665 = torch.constant.int 2
    %35036 = torch.aten.transpose.int %34495, %int1_32664, %int2_32665 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35036, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32666 = torch.constant.int 1
    %int2_32667 = torch.constant.int 2
    %35037 = torch.aten.transpose.int %34510, %int1_32666, %int2_32667 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35037, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32668 = torch.constant.int 1
    %int2_32669 = torch.constant.int 2
    %35038 = torch.aten.transpose.int %34974, %int1_32668, %int2_32669 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35038, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32670 = torch.constant.int 1
    %int2_32671 = torch.constant.int 2
    %35039 = torch.aten.transpose.int %34976, %int1_32670, %int2_32671 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35039, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32672 = torch.constant.int 1
    %int2_32673 = torch.constant.int 2
    %35040 = torch.aten.transpose.int %34978, %int1_32672, %int2_32673 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35040, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32674 = torch.constant.int 1
    %int2_32675 = torch.constant.int 2
    %35041 = torch.aten.transpose.int %34980, %int1_32674, %int2_32675 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35041, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32676 = torch.constant.int 1
    %int2_32677 = torch.constant.int 2
    %35042 = torch.aten.transpose.int %34982, %int1_32676, %int2_32677 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35042, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32678 = torch.constant.int 1
    %int2_32679 = torch.constant.int 2
    %35043 = torch.aten.transpose.int %34984, %int1_32678, %int2_32679 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35043, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32680 = torch.constant.int 1
    %int2_32681 = torch.constant.int 2
    %35044 = torch.aten.transpose.int %34986, %int1_32680, %int2_32681 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35044, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32682 = torch.constant.int 1
    %int2_32683 = torch.constant.int 2
    %35045 = torch.aten.transpose.int %34988, %int1_32682, %int2_32683 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35045, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32684 = torch.constant.int 1
    %int2_32685 = torch.constant.int 2
    %35046 = torch.aten.transpose.int %35015, %int1_32684, %int2_32685 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35046, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32686 = torch.constant.int 1
    %int2_32687 = torch.constant.int 2
    %35047 = torch.aten.transpose.int %35017, %int1_32686, %int2_32687 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35047, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32688 = torch.constant.int 1
    %int2_32689 = torch.constant.int 2
    %35048 = torch.aten.transpose.int %35019, %int1_32688, %int2_32689 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35048, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32690 = torch.constant.int 1
    %int2_32691 = torch.constant.int 2
    %35049 = torch.aten.transpose.int %35021, %int1_32690, %int2_32691 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35049, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32692 = torch.constant.int 1
    %int2_32693 = torch.constant.int 2
    %35050 = torch.aten.transpose.int %35023, %int1_32692, %int2_32693 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35050, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32694 = torch.constant.int 1
    %int2_32695 = torch.constant.int 2
    %35051 = torch.aten.transpose.int %35025, %int1_32694, %int2_32695 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35051, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32696 = torch.constant.int 1
    %int2_32697 = torch.constant.int 2
    %35052 = torch.aten.transpose.int %35027, %int1_32696, %int2_32697 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35052, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32698 = torch.constant.int 1
    %int2_32699 = torch.constant.int 2
    %35053 = torch.aten.transpose.int %35029, %int1_32698, %int2_32699 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %35053, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32700 = torch.constant.float 0.000000e+00
    %true_32701 = torch.constant.bool true
    %none_32702 = torch.constant.none
    %none_32703 = torch.constant.none
    %35054:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35030, %35038, %35046, %float0.000000e00_32700, %true_32701, %none_32702, %none_32703) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35054#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32704 = torch.constant.float 0.000000e+00
    %true_32705 = torch.constant.bool true
    %none_32706 = torch.constant.none
    %none_32707 = torch.constant.none
    %35055:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35031, %35039, %35047, %float0.000000e00_32704, %true_32705, %none_32706, %none_32707) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35055#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32708 = torch.constant.float 0.000000e+00
    %true_32709 = torch.constant.bool true
    %none_32710 = torch.constant.none
    %none_32711 = torch.constant.none
    %35056:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35032, %35040, %35048, %float0.000000e00_32708, %true_32709, %none_32710, %none_32711) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35056#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32712 = torch.constant.float 0.000000e+00
    %true_32713 = torch.constant.bool true
    %none_32714 = torch.constant.none
    %none_32715 = torch.constant.none
    %35057:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35033, %35041, %35049, %float0.000000e00_32712, %true_32713, %none_32714, %none_32715) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35057#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32716 = torch.constant.float 0.000000e+00
    %true_32717 = torch.constant.bool true
    %none_32718 = torch.constant.none
    %none_32719 = torch.constant.none
    %35058:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35034, %35042, %35050, %float0.000000e00_32716, %true_32717, %none_32718, %none_32719) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35058#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32720 = torch.constant.float 0.000000e+00
    %true_32721 = torch.constant.bool true
    %none_32722 = torch.constant.none
    %none_32723 = torch.constant.none
    %35059:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35035, %35043, %35051, %float0.000000e00_32720, %true_32721, %none_32722, %none_32723) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35059#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32724 = torch.constant.float 0.000000e+00
    %true_32725 = torch.constant.bool true
    %none_32726 = torch.constant.none
    %none_32727 = torch.constant.none
    %35060:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35036, %35044, %35052, %float0.000000e00_32724, %true_32725, %none_32726, %none_32727) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35060#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_32728 = torch.constant.float 0.000000e+00
    %true_32729 = torch.constant.bool true
    %none_32730 = torch.constant.none
    %none_32731 = torch.constant.none
    %35061:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%35037, %35045, %35053, %float0.000000e00_32728, %true_32729, %none_32730, %none_32731) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %35061#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_32732 = torch.constant.int 1
    %int2_32733 = torch.constant.int 2
    %35062 = torch.aten.transpose.int %35054#0, %int1_32732, %int2_32733 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32734 = torch.constant.int 1
    %int2_32735 = torch.constant.int 2
    %35063 = torch.aten.transpose.int %35055#0, %int1_32734, %int2_32735 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32736 = torch.constant.int 1
    %int2_32737 = torch.constant.int 2
    %35064 = torch.aten.transpose.int %35056#0, %int1_32736, %int2_32737 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32738 = torch.constant.int 1
    %int2_32739 = torch.constant.int 2
    %35065 = torch.aten.transpose.int %35057#0, %int1_32738, %int2_32739 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32740 = torch.constant.int 1
    %int2_32741 = torch.constant.int 2
    %35066 = torch.aten.transpose.int %35058#0, %int1_32740, %int2_32741 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32742 = torch.constant.int 1
    %int2_32743 = torch.constant.int 2
    %35067 = torch.aten.transpose.int %35059#0, %int1_32742, %int2_32743 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32744 = torch.constant.int 1
    %int2_32745 = torch.constant.int 2
    %35068 = torch.aten.transpose.int %35060#0, %int1_32744, %int2_32745 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_32746 = torch.constant.int 1
    %int2_32747 = torch.constant.int 2
    %35069 = torch.aten.transpose.int %35061#0, %int1_32746, %int2_32747 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %35069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_32748 = torch.constant.int 4
    %int512_32749 = torch.constant.int 512
    %35070 = torch.prim.ListConstruct %int4_32748, %34391, %int512_32749 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35071 = torch.aten.view %35062, %35070 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32750 = torch.constant.int 4
    %int512_32751 = torch.constant.int 512
    %35072 = torch.prim.ListConstruct %int4_32750, %34406, %int512_32751 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35073 = torch.aten.view %35063, %35072 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32752 = torch.constant.int 4
    %int512_32753 = torch.constant.int 512
    %35074 = torch.prim.ListConstruct %int4_32752, %34421, %int512_32753 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35075 = torch.aten.view %35064, %35074 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32754 = torch.constant.int 4
    %int512_32755 = torch.constant.int 512
    %35076 = torch.prim.ListConstruct %int4_32754, %34436, %int512_32755 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35077 = torch.aten.view %35065, %35076 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32756 = torch.constant.int 4
    %int512_32757 = torch.constant.int 512
    %35078 = torch.prim.ListConstruct %int4_32756, %34451, %int512_32757 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35079 = torch.aten.view %35066, %35078 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32758 = torch.constant.int 4
    %int512_32759 = torch.constant.int 512
    %35080 = torch.prim.ListConstruct %int4_32758, %34466, %int512_32759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35081 = torch.aten.view %35067, %35080 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32760 = torch.constant.int 4
    %int512_32761 = torch.constant.int 512
    %35082 = torch.prim.ListConstruct %int4_32760, %34481, %int512_32761 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35083 = torch.aten.view %35068, %35082 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_32762 = torch.constant.int 4
    %int512_32763 = torch.constant.int 512
    %35084 = torch.prim.ListConstruct %int4_32762, %34496, %int512_32763 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35085 = torch.aten.view %35069, %35084 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_32764 = torch.constant.int 1
    %int0_32765 = torch.constant.int 0
    %35086 = torch.prim.ListConstruct %int1_32764, %int0_32765 : (!torch.int, !torch.int) -> !torch.list<int>
    %35087 = torch.aten.permute %1264, %35086 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32766 = torch.constant.int 1
    %int0_32767 = torch.constant.int 0
    %35088 = torch.prim.ListConstruct %int1_32766, %int0_32767 : (!torch.int, !torch.int) -> !torch.list<int>
    %35089 = torch.aten.permute %1265, %35088 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32768 = torch.constant.int 1
    %int0_32769 = torch.constant.int 0
    %35090 = torch.prim.ListConstruct %int1_32768, %int0_32769 : (!torch.int, !torch.int) -> !torch.list<int>
    %35091 = torch.aten.permute %1266, %35090 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32770 = torch.constant.int 1
    %int0_32771 = torch.constant.int 0
    %35092 = torch.prim.ListConstruct %int1_32770, %int0_32771 : (!torch.int, !torch.int) -> !torch.list<int>
    %35093 = torch.aten.permute %1267, %35092 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32772 = torch.constant.int 1
    %int0_32773 = torch.constant.int 0
    %35094 = torch.prim.ListConstruct %int1_32772, %int0_32773 : (!torch.int, !torch.int) -> !torch.list<int>
    %35095 = torch.aten.permute %1268, %35094 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32774 = torch.constant.int 1
    %int0_32775 = torch.constant.int 0
    %35096 = torch.prim.ListConstruct %int1_32774, %int0_32775 : (!torch.int, !torch.int) -> !torch.list<int>
    %35097 = torch.aten.permute %1269, %35096 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32776 = torch.constant.int 1
    %int0_32777 = torch.constant.int 0
    %35098 = torch.prim.ListConstruct %int1_32776, %int0_32777 : (!torch.int, !torch.int) -> !torch.list<int>
    %35099 = torch.aten.permute %1270, %35098 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_32778 = torch.constant.int 1
    %int0_32779 = torch.constant.int 0
    %35100 = torch.prim.ListConstruct %int1_32778, %int0_32779 : (!torch.int, !torch.int) -> !torch.list<int>
    %35101 = torch.aten.permute %1271, %35100 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_32780 = torch.constant.int 4
    %35102 = torch.aten.mul.int %int4_32780, %34391 : !torch.int, !torch.int -> !torch.int
    %int512_32781 = torch.constant.int 512
    %35103 = torch.prim.ListConstruct %35102, %int512_32781 : (!torch.int, !torch.int) -> !torch.list<int>
    %35104 = torch.aten.view %35071, %35103 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35104, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35105 = torch.aten.mm %35104, %35087 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35105, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32782 = torch.constant.int 4
    %int4096_32783 = torch.constant.int 4096
    %35106 = torch.prim.ListConstruct %int4_32782, %34391, %int4096_32783 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35107 = torch.aten.view %35105, %35106 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32784 = torch.constant.int 4
    %35108 = torch.aten.mul.int %int4_32784, %34406 : !torch.int, !torch.int -> !torch.int
    %int512_32785 = torch.constant.int 512
    %35109 = torch.prim.ListConstruct %35108, %int512_32785 : (!torch.int, !torch.int) -> !torch.list<int>
    %35110 = torch.aten.view %35073, %35109 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35110, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35111 = torch.aten.mm %35110, %35089 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35111, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32786 = torch.constant.int 4
    %int4096_32787 = torch.constant.int 4096
    %35112 = torch.prim.ListConstruct %int4_32786, %34406, %int4096_32787 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35113 = torch.aten.view %35111, %35112 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32788 = torch.constant.int 4
    %35114 = torch.aten.mul.int %int4_32788, %34421 : !torch.int, !torch.int -> !torch.int
    %int512_32789 = torch.constant.int 512
    %35115 = torch.prim.ListConstruct %35114, %int512_32789 : (!torch.int, !torch.int) -> !torch.list<int>
    %35116 = torch.aten.view %35075, %35115 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35116, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35117 = torch.aten.mm %35116, %35091 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35117, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32790 = torch.constant.int 4
    %int4096_32791 = torch.constant.int 4096
    %35118 = torch.prim.ListConstruct %int4_32790, %34421, %int4096_32791 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35119 = torch.aten.view %35117, %35118 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32792 = torch.constant.int 4
    %35120 = torch.aten.mul.int %int4_32792, %34436 : !torch.int, !torch.int -> !torch.int
    %int512_32793 = torch.constant.int 512
    %35121 = torch.prim.ListConstruct %35120, %int512_32793 : (!torch.int, !torch.int) -> !torch.list<int>
    %35122 = torch.aten.view %35077, %35121 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35122, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35123 = torch.aten.mm %35122, %35093 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35123, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32794 = torch.constant.int 4
    %int4096_32795 = torch.constant.int 4096
    %35124 = torch.prim.ListConstruct %int4_32794, %34436, %int4096_32795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35125 = torch.aten.view %35123, %35124 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32796 = torch.constant.int 4
    %35126 = torch.aten.mul.int %int4_32796, %34451 : !torch.int, !torch.int -> !torch.int
    %int512_32797 = torch.constant.int 512
    %35127 = torch.prim.ListConstruct %35126, %int512_32797 : (!torch.int, !torch.int) -> !torch.list<int>
    %35128 = torch.aten.view %35079, %35127 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35128, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35129 = torch.aten.mm %35128, %35095 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35129, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32798 = torch.constant.int 4
    %int4096_32799 = torch.constant.int 4096
    %35130 = torch.prim.ListConstruct %int4_32798, %34451, %int4096_32799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35131 = torch.aten.view %35129, %35130 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32800 = torch.constant.int 4
    %35132 = torch.aten.mul.int %int4_32800, %34466 : !torch.int, !torch.int -> !torch.int
    %int512_32801 = torch.constant.int 512
    %35133 = torch.prim.ListConstruct %35132, %int512_32801 : (!torch.int, !torch.int) -> !torch.list<int>
    %35134 = torch.aten.view %35081, %35133 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35134, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35135 = torch.aten.mm %35134, %35097 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35135, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32802 = torch.constant.int 4
    %int4096_32803 = torch.constant.int 4096
    %35136 = torch.prim.ListConstruct %int4_32802, %34466, %int4096_32803 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35137 = torch.aten.view %35135, %35136 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32804 = torch.constant.int 4
    %35138 = torch.aten.mul.int %int4_32804, %34481 : !torch.int, !torch.int -> !torch.int
    %int512_32805 = torch.constant.int 512
    %35139 = torch.prim.ListConstruct %35138, %int512_32805 : (!torch.int, !torch.int) -> !torch.list<int>
    %35140 = torch.aten.view %35083, %35139 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35140, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35141 = torch.aten.mm %35140, %35099 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35141, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32806 = torch.constant.int 4
    %int4096_32807 = torch.constant.int 4096
    %35142 = torch.prim.ListConstruct %int4_32806, %34481, %int4096_32807 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35143 = torch.aten.view %35141, %35142 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_32808 = torch.constant.int 4
    %35144 = torch.aten.mul.int %int4_32808, %34496 : !torch.int, !torch.int -> !torch.int
    %int512_32809 = torch.constant.int 512
    %35145 = torch.prim.ListConstruct %35144, %int512_32809 : (!torch.int, !torch.int) -> !torch.list<int>
    %35146 = torch.aten.view %35085, %35145 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35146, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %35147 = torch.aten.mm %35146, %35101 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35147, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_32810 = torch.constant.int 4
    %int4096_32811 = torch.constant.int 4096
    %35148 = torch.prim.ListConstruct %int4_32810, %34496, %int4096_32811 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35149 = torch.aten.view %35147, %35148 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35150 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32812 = arith.constant 1 : index
    %dim_32813 = tensor.dim %35150, %c1_32812 : tensor<4x?x4096xf16>
    %35151 = flow.tensor.transfer %35150 : tensor<4x?x4096xf16>{%dim_32813} to #hal.device.promise<@__device_0>
    %35152 = torch_c.from_builtin_tensor %35151 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35153 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32814 = arith.constant 1 : index
    %dim_32815 = tensor.dim %35153, %c1_32814 : tensor<4x?x4096xf16>
    %35154 = flow.tensor.transfer %35153 : tensor<4x?x4096xf16>{%dim_32815} to #hal.device.promise<@__device_0>
    %35155 = torch_c.from_builtin_tensor %35154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35156 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32816 = arith.constant 1 : index
    %dim_32817 = tensor.dim %35156, %c1_32816 : tensor<4x?x4096xf16>
    %35157 = flow.tensor.transfer %35156 : tensor<4x?x4096xf16>{%dim_32817} to #hal.device.promise<@__device_0>
    %35158 = torch_c.from_builtin_tensor %35157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35159 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32818 = arith.constant 1 : index
    %dim_32819 = tensor.dim %35159, %c1_32818 : tensor<4x?x4096xf16>
    %35160 = flow.tensor.transfer %35159 : tensor<4x?x4096xf16>{%dim_32819} to #hal.device.promise<@__device_0>
    %35161 = torch_c.from_builtin_tensor %35160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35162 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32820 = arith.constant 1 : index
    %dim_32821 = tensor.dim %35162, %c1_32820 : tensor<4x?x4096xf16>
    %35163 = flow.tensor.transfer %35162 : tensor<4x?x4096xf16>{%dim_32821} to #hal.device.promise<@__device_0>
    %35164 = torch_c.from_builtin_tensor %35163 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35165 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32822 = arith.constant 1 : index
    %dim_32823 = tensor.dim %35165, %c1_32822 : tensor<4x?x4096xf16>
    %35166 = flow.tensor.transfer %35165 : tensor<4x?x4096xf16>{%dim_32823} to #hal.device.promise<@__device_0>
    %35167 = torch_c.from_builtin_tensor %35166 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35168 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32824 = arith.constant 1 : index
    %dim_32825 = tensor.dim %35168, %c1_32824 : tensor<4x?x4096xf16>
    %35169 = flow.tensor.transfer %35168 : tensor<4x?x4096xf16>{%dim_32825} to #hal.device.promise<@__device_0>
    %35170 = torch_c.from_builtin_tensor %35169 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32826 = torch.constant.int 1
    %35171 = torch.aten.add.Tensor %35107, %35152, %int1_32826 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32827 = torch.constant.int 1
    %35172 = torch.aten.add.Tensor %35171, %35155, %int1_32827 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32828 = torch.constant.int 1
    %35173 = torch.aten.add.Tensor %35172, %35158, %int1_32828 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32829 = torch.constant.int 1
    %35174 = torch.aten.add.Tensor %35173, %35161, %int1_32829 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32830 = torch.constant.int 1
    %35175 = torch.aten.add.Tensor %35174, %35164, %int1_32830 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32831 = torch.constant.int 1
    %35176 = torch.aten.add.Tensor %35175, %35167, %int1_32831 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32832 = torch.constant.int 1
    %35177 = torch.aten.add.Tensor %35176, %35170, %int1_32832 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35178 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32833 = arith.constant 1 : index
    %dim_32834 = tensor.dim %35178, %c1_32833 : tensor<4x?x4096xf16>
    %35179 = flow.tensor.transfer %35178 : tensor<4x?x4096xf16>{%dim_32834} to #hal.device.promise<@__device_1>
    %35180 = torch_c.from_builtin_tensor %35179 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35181 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32835 = arith.constant 1 : index
    %dim_32836 = tensor.dim %35181, %c1_32835 : tensor<4x?x4096xf16>
    %35182 = flow.tensor.transfer %35181 : tensor<4x?x4096xf16>{%dim_32836} to #hal.device.promise<@__device_1>
    %35183 = torch_c.from_builtin_tensor %35182 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35184 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32837 = arith.constant 1 : index
    %dim_32838 = tensor.dim %35184, %c1_32837 : tensor<4x?x4096xf16>
    %35185 = flow.tensor.transfer %35184 : tensor<4x?x4096xf16>{%dim_32838} to #hal.device.promise<@__device_1>
    %35186 = torch_c.from_builtin_tensor %35185 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35187 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32839 = arith.constant 1 : index
    %dim_32840 = tensor.dim %35187, %c1_32839 : tensor<4x?x4096xf16>
    %35188 = flow.tensor.transfer %35187 : tensor<4x?x4096xf16>{%dim_32840} to #hal.device.promise<@__device_1>
    %35189 = torch_c.from_builtin_tensor %35188 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35190 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32841 = arith.constant 1 : index
    %dim_32842 = tensor.dim %35190, %c1_32841 : tensor<4x?x4096xf16>
    %35191 = flow.tensor.transfer %35190 : tensor<4x?x4096xf16>{%dim_32842} to #hal.device.promise<@__device_1>
    %35192 = torch_c.from_builtin_tensor %35191 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35193 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32843 = arith.constant 1 : index
    %dim_32844 = tensor.dim %35193, %c1_32843 : tensor<4x?x4096xf16>
    %35194 = flow.tensor.transfer %35193 : tensor<4x?x4096xf16>{%dim_32844} to #hal.device.promise<@__device_1>
    %35195 = torch_c.from_builtin_tensor %35194 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35196 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32845 = arith.constant 1 : index
    %dim_32846 = tensor.dim %35196, %c1_32845 : tensor<4x?x4096xf16>
    %35197 = flow.tensor.transfer %35196 : tensor<4x?x4096xf16>{%dim_32846} to #hal.device.promise<@__device_1>
    %35198 = torch_c.from_builtin_tensor %35197 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32847 = torch.constant.int 1
    %35199 = torch.aten.add.Tensor %35180, %35113, %int1_32847 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32848 = torch.constant.int 1
    %35200 = torch.aten.add.Tensor %35199, %35183, %int1_32848 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32849 = torch.constant.int 1
    %35201 = torch.aten.add.Tensor %35200, %35186, %int1_32849 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32850 = torch.constant.int 1
    %35202 = torch.aten.add.Tensor %35201, %35189, %int1_32850 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32851 = torch.constant.int 1
    %35203 = torch.aten.add.Tensor %35202, %35192, %int1_32851 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32852 = torch.constant.int 1
    %35204 = torch.aten.add.Tensor %35203, %35195, %int1_32852 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32853 = torch.constant.int 1
    %35205 = torch.aten.add.Tensor %35204, %35198, %int1_32853 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35206 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32854 = arith.constant 1 : index
    %dim_32855 = tensor.dim %35206, %c1_32854 : tensor<4x?x4096xf16>
    %35207 = flow.tensor.transfer %35206 : tensor<4x?x4096xf16>{%dim_32855} to #hal.device.promise<@__device_2>
    %35208 = torch_c.from_builtin_tensor %35207 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35209 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32856 = arith.constant 1 : index
    %dim_32857 = tensor.dim %35209, %c1_32856 : tensor<4x?x4096xf16>
    %35210 = flow.tensor.transfer %35209 : tensor<4x?x4096xf16>{%dim_32857} to #hal.device.promise<@__device_2>
    %35211 = torch_c.from_builtin_tensor %35210 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35212 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32858 = arith.constant 1 : index
    %dim_32859 = tensor.dim %35212, %c1_32858 : tensor<4x?x4096xf16>
    %35213 = flow.tensor.transfer %35212 : tensor<4x?x4096xf16>{%dim_32859} to #hal.device.promise<@__device_2>
    %35214 = torch_c.from_builtin_tensor %35213 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35215 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32860 = arith.constant 1 : index
    %dim_32861 = tensor.dim %35215, %c1_32860 : tensor<4x?x4096xf16>
    %35216 = flow.tensor.transfer %35215 : tensor<4x?x4096xf16>{%dim_32861} to #hal.device.promise<@__device_2>
    %35217 = torch_c.from_builtin_tensor %35216 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35218 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32862 = arith.constant 1 : index
    %dim_32863 = tensor.dim %35218, %c1_32862 : tensor<4x?x4096xf16>
    %35219 = flow.tensor.transfer %35218 : tensor<4x?x4096xf16>{%dim_32863} to #hal.device.promise<@__device_2>
    %35220 = torch_c.from_builtin_tensor %35219 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35221 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32864 = arith.constant 1 : index
    %dim_32865 = tensor.dim %35221, %c1_32864 : tensor<4x?x4096xf16>
    %35222 = flow.tensor.transfer %35221 : tensor<4x?x4096xf16>{%dim_32865} to #hal.device.promise<@__device_2>
    %35223 = torch_c.from_builtin_tensor %35222 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35224 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32866 = arith.constant 1 : index
    %dim_32867 = tensor.dim %35224, %c1_32866 : tensor<4x?x4096xf16>
    %35225 = flow.tensor.transfer %35224 : tensor<4x?x4096xf16>{%dim_32867} to #hal.device.promise<@__device_2>
    %35226 = torch_c.from_builtin_tensor %35225 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32868 = torch.constant.int 1
    %35227 = torch.aten.add.Tensor %35208, %35211, %int1_32868 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32869 = torch.constant.int 1
    %35228 = torch.aten.add.Tensor %35227, %35119, %int1_32869 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32870 = torch.constant.int 1
    %35229 = torch.aten.add.Tensor %35228, %35214, %int1_32870 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32871 = torch.constant.int 1
    %35230 = torch.aten.add.Tensor %35229, %35217, %int1_32871 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32872 = torch.constant.int 1
    %35231 = torch.aten.add.Tensor %35230, %35220, %int1_32872 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32873 = torch.constant.int 1
    %35232 = torch.aten.add.Tensor %35231, %35223, %int1_32873 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32874 = torch.constant.int 1
    %35233 = torch.aten.add.Tensor %35232, %35226, %int1_32874 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35234 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32875 = arith.constant 1 : index
    %dim_32876 = tensor.dim %35234, %c1_32875 : tensor<4x?x4096xf16>
    %35235 = flow.tensor.transfer %35234 : tensor<4x?x4096xf16>{%dim_32876} to #hal.device.promise<@__device_3>
    %35236 = torch_c.from_builtin_tensor %35235 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35237 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32877 = arith.constant 1 : index
    %dim_32878 = tensor.dim %35237, %c1_32877 : tensor<4x?x4096xf16>
    %35238 = flow.tensor.transfer %35237 : tensor<4x?x4096xf16>{%dim_32878} to #hal.device.promise<@__device_3>
    %35239 = torch_c.from_builtin_tensor %35238 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35240 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32879 = arith.constant 1 : index
    %dim_32880 = tensor.dim %35240, %c1_32879 : tensor<4x?x4096xf16>
    %35241 = flow.tensor.transfer %35240 : tensor<4x?x4096xf16>{%dim_32880} to #hal.device.promise<@__device_3>
    %35242 = torch_c.from_builtin_tensor %35241 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35243 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32881 = arith.constant 1 : index
    %dim_32882 = tensor.dim %35243, %c1_32881 : tensor<4x?x4096xf16>
    %35244 = flow.tensor.transfer %35243 : tensor<4x?x4096xf16>{%dim_32882} to #hal.device.promise<@__device_3>
    %35245 = torch_c.from_builtin_tensor %35244 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35246 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32883 = arith.constant 1 : index
    %dim_32884 = tensor.dim %35246, %c1_32883 : tensor<4x?x4096xf16>
    %35247 = flow.tensor.transfer %35246 : tensor<4x?x4096xf16>{%dim_32884} to #hal.device.promise<@__device_3>
    %35248 = torch_c.from_builtin_tensor %35247 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35249 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32885 = arith.constant 1 : index
    %dim_32886 = tensor.dim %35249, %c1_32885 : tensor<4x?x4096xf16>
    %35250 = flow.tensor.transfer %35249 : tensor<4x?x4096xf16>{%dim_32886} to #hal.device.promise<@__device_3>
    %35251 = torch_c.from_builtin_tensor %35250 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35252 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32887 = arith.constant 1 : index
    %dim_32888 = tensor.dim %35252, %c1_32887 : tensor<4x?x4096xf16>
    %35253 = flow.tensor.transfer %35252 : tensor<4x?x4096xf16>{%dim_32888} to #hal.device.promise<@__device_3>
    %35254 = torch_c.from_builtin_tensor %35253 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32889 = torch.constant.int 1
    %35255 = torch.aten.add.Tensor %35236, %35239, %int1_32889 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32890 = torch.constant.int 1
    %35256 = torch.aten.add.Tensor %35255, %35242, %int1_32890 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32891 = torch.constant.int 1
    %35257 = torch.aten.add.Tensor %35256, %35125, %int1_32891 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32892 = torch.constant.int 1
    %35258 = torch.aten.add.Tensor %35257, %35245, %int1_32892 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32893 = torch.constant.int 1
    %35259 = torch.aten.add.Tensor %35258, %35248, %int1_32893 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32894 = torch.constant.int 1
    %35260 = torch.aten.add.Tensor %35259, %35251, %int1_32894 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32895 = torch.constant.int 1
    %35261 = torch.aten.add.Tensor %35260, %35254, %int1_32895 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35262 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32896 = arith.constant 1 : index
    %dim_32897 = tensor.dim %35262, %c1_32896 : tensor<4x?x4096xf16>
    %35263 = flow.tensor.transfer %35262 : tensor<4x?x4096xf16>{%dim_32897} to #hal.device.promise<@__device_4>
    %35264 = torch_c.from_builtin_tensor %35263 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35265 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32898 = arith.constant 1 : index
    %dim_32899 = tensor.dim %35265, %c1_32898 : tensor<4x?x4096xf16>
    %35266 = flow.tensor.transfer %35265 : tensor<4x?x4096xf16>{%dim_32899} to #hal.device.promise<@__device_4>
    %35267 = torch_c.from_builtin_tensor %35266 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35268 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32900 = arith.constant 1 : index
    %dim_32901 = tensor.dim %35268, %c1_32900 : tensor<4x?x4096xf16>
    %35269 = flow.tensor.transfer %35268 : tensor<4x?x4096xf16>{%dim_32901} to #hal.device.promise<@__device_4>
    %35270 = torch_c.from_builtin_tensor %35269 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35271 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32902 = arith.constant 1 : index
    %dim_32903 = tensor.dim %35271, %c1_32902 : tensor<4x?x4096xf16>
    %35272 = flow.tensor.transfer %35271 : tensor<4x?x4096xf16>{%dim_32903} to #hal.device.promise<@__device_4>
    %35273 = torch_c.from_builtin_tensor %35272 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35274 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32904 = arith.constant 1 : index
    %dim_32905 = tensor.dim %35274, %c1_32904 : tensor<4x?x4096xf16>
    %35275 = flow.tensor.transfer %35274 : tensor<4x?x4096xf16>{%dim_32905} to #hal.device.promise<@__device_4>
    %35276 = torch_c.from_builtin_tensor %35275 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35277 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32906 = arith.constant 1 : index
    %dim_32907 = tensor.dim %35277, %c1_32906 : tensor<4x?x4096xf16>
    %35278 = flow.tensor.transfer %35277 : tensor<4x?x4096xf16>{%dim_32907} to #hal.device.promise<@__device_4>
    %35279 = torch_c.from_builtin_tensor %35278 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35280 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32908 = arith.constant 1 : index
    %dim_32909 = tensor.dim %35280, %c1_32908 : tensor<4x?x4096xf16>
    %35281 = flow.tensor.transfer %35280 : tensor<4x?x4096xf16>{%dim_32909} to #hal.device.promise<@__device_4>
    %35282 = torch_c.from_builtin_tensor %35281 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32910 = torch.constant.int 1
    %35283 = torch.aten.add.Tensor %35264, %35267, %int1_32910 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32911 = torch.constant.int 1
    %35284 = torch.aten.add.Tensor %35283, %35270, %int1_32911 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32912 = torch.constant.int 1
    %35285 = torch.aten.add.Tensor %35284, %35273, %int1_32912 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32913 = torch.constant.int 1
    %35286 = torch.aten.add.Tensor %35285, %35131, %int1_32913 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32914 = torch.constant.int 1
    %35287 = torch.aten.add.Tensor %35286, %35276, %int1_32914 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32915 = torch.constant.int 1
    %35288 = torch.aten.add.Tensor %35287, %35279, %int1_32915 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32916 = torch.constant.int 1
    %35289 = torch.aten.add.Tensor %35288, %35282, %int1_32916 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35290 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32917 = arith.constant 1 : index
    %dim_32918 = tensor.dim %35290, %c1_32917 : tensor<4x?x4096xf16>
    %35291 = flow.tensor.transfer %35290 : tensor<4x?x4096xf16>{%dim_32918} to #hal.device.promise<@__device_5>
    %35292 = torch_c.from_builtin_tensor %35291 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35293 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32919 = arith.constant 1 : index
    %dim_32920 = tensor.dim %35293, %c1_32919 : tensor<4x?x4096xf16>
    %35294 = flow.tensor.transfer %35293 : tensor<4x?x4096xf16>{%dim_32920} to #hal.device.promise<@__device_5>
    %35295 = torch_c.from_builtin_tensor %35294 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35296 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32921 = arith.constant 1 : index
    %dim_32922 = tensor.dim %35296, %c1_32921 : tensor<4x?x4096xf16>
    %35297 = flow.tensor.transfer %35296 : tensor<4x?x4096xf16>{%dim_32922} to #hal.device.promise<@__device_5>
    %35298 = torch_c.from_builtin_tensor %35297 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35299 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32923 = arith.constant 1 : index
    %dim_32924 = tensor.dim %35299, %c1_32923 : tensor<4x?x4096xf16>
    %35300 = flow.tensor.transfer %35299 : tensor<4x?x4096xf16>{%dim_32924} to #hal.device.promise<@__device_5>
    %35301 = torch_c.from_builtin_tensor %35300 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35302 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32925 = arith.constant 1 : index
    %dim_32926 = tensor.dim %35302, %c1_32925 : tensor<4x?x4096xf16>
    %35303 = flow.tensor.transfer %35302 : tensor<4x?x4096xf16>{%dim_32926} to #hal.device.promise<@__device_5>
    %35304 = torch_c.from_builtin_tensor %35303 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35305 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32927 = arith.constant 1 : index
    %dim_32928 = tensor.dim %35305, %c1_32927 : tensor<4x?x4096xf16>
    %35306 = flow.tensor.transfer %35305 : tensor<4x?x4096xf16>{%dim_32928} to #hal.device.promise<@__device_5>
    %35307 = torch_c.from_builtin_tensor %35306 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35308 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32929 = arith.constant 1 : index
    %dim_32930 = tensor.dim %35308, %c1_32929 : tensor<4x?x4096xf16>
    %35309 = flow.tensor.transfer %35308 : tensor<4x?x4096xf16>{%dim_32930} to #hal.device.promise<@__device_5>
    %35310 = torch_c.from_builtin_tensor %35309 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32931 = torch.constant.int 1
    %35311 = torch.aten.add.Tensor %35292, %35295, %int1_32931 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32932 = torch.constant.int 1
    %35312 = torch.aten.add.Tensor %35311, %35298, %int1_32932 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32933 = torch.constant.int 1
    %35313 = torch.aten.add.Tensor %35312, %35301, %int1_32933 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32934 = torch.constant.int 1
    %35314 = torch.aten.add.Tensor %35313, %35304, %int1_32934 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32935 = torch.constant.int 1
    %35315 = torch.aten.add.Tensor %35314, %35137, %int1_32935 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32936 = torch.constant.int 1
    %35316 = torch.aten.add.Tensor %35315, %35307, %int1_32936 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32937 = torch.constant.int 1
    %35317 = torch.aten.add.Tensor %35316, %35310, %int1_32937 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35318 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32938 = arith.constant 1 : index
    %dim_32939 = tensor.dim %35318, %c1_32938 : tensor<4x?x4096xf16>
    %35319 = flow.tensor.transfer %35318 : tensor<4x?x4096xf16>{%dim_32939} to #hal.device.promise<@__device_6>
    %35320 = torch_c.from_builtin_tensor %35319 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35321 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32940 = arith.constant 1 : index
    %dim_32941 = tensor.dim %35321, %c1_32940 : tensor<4x?x4096xf16>
    %35322 = flow.tensor.transfer %35321 : tensor<4x?x4096xf16>{%dim_32941} to #hal.device.promise<@__device_6>
    %35323 = torch_c.from_builtin_tensor %35322 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35324 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32942 = arith.constant 1 : index
    %dim_32943 = tensor.dim %35324, %c1_32942 : tensor<4x?x4096xf16>
    %35325 = flow.tensor.transfer %35324 : tensor<4x?x4096xf16>{%dim_32943} to #hal.device.promise<@__device_6>
    %35326 = torch_c.from_builtin_tensor %35325 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35327 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32944 = arith.constant 1 : index
    %dim_32945 = tensor.dim %35327, %c1_32944 : tensor<4x?x4096xf16>
    %35328 = flow.tensor.transfer %35327 : tensor<4x?x4096xf16>{%dim_32945} to #hal.device.promise<@__device_6>
    %35329 = torch_c.from_builtin_tensor %35328 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35330 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32946 = arith.constant 1 : index
    %dim_32947 = tensor.dim %35330, %c1_32946 : tensor<4x?x4096xf16>
    %35331 = flow.tensor.transfer %35330 : tensor<4x?x4096xf16>{%dim_32947} to #hal.device.promise<@__device_6>
    %35332 = torch_c.from_builtin_tensor %35331 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35333 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32948 = arith.constant 1 : index
    %dim_32949 = tensor.dim %35333, %c1_32948 : tensor<4x?x4096xf16>
    %35334 = flow.tensor.transfer %35333 : tensor<4x?x4096xf16>{%dim_32949} to #hal.device.promise<@__device_6>
    %35335 = torch_c.from_builtin_tensor %35334 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35336 = torch_c.to_builtin_tensor %35149 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32950 = arith.constant 1 : index
    %dim_32951 = tensor.dim %35336, %c1_32950 : tensor<4x?x4096xf16>
    %35337 = flow.tensor.transfer %35336 : tensor<4x?x4096xf16>{%dim_32951} to #hal.device.promise<@__device_6>
    %35338 = torch_c.from_builtin_tensor %35337 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32952 = torch.constant.int 1
    %35339 = torch.aten.add.Tensor %35320, %35323, %int1_32952 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32953 = torch.constant.int 1
    %35340 = torch.aten.add.Tensor %35339, %35326, %int1_32953 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32954 = torch.constant.int 1
    %35341 = torch.aten.add.Tensor %35340, %35329, %int1_32954 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32955 = torch.constant.int 1
    %35342 = torch.aten.add.Tensor %35341, %35332, %int1_32955 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32956 = torch.constant.int 1
    %35343 = torch.aten.add.Tensor %35342, %35335, %int1_32956 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32957 = torch.constant.int 1
    %35344 = torch.aten.add.Tensor %35343, %35143, %int1_32957 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32958 = torch.constant.int 1
    %35345 = torch.aten.add.Tensor %35344, %35338, %int1_32958 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35346 = torch_c.to_builtin_tensor %35107 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32959 = arith.constant 1 : index
    %dim_32960 = tensor.dim %35346, %c1_32959 : tensor<4x?x4096xf16>
    %35347 = flow.tensor.transfer %35346 : tensor<4x?x4096xf16>{%dim_32960} to #hal.device.promise<@__device_7>
    %35348 = torch_c.from_builtin_tensor %35347 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35349 = torch_c.to_builtin_tensor %35113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32961 = arith.constant 1 : index
    %dim_32962 = tensor.dim %35349, %c1_32961 : tensor<4x?x4096xf16>
    %35350 = flow.tensor.transfer %35349 : tensor<4x?x4096xf16>{%dim_32962} to #hal.device.promise<@__device_7>
    %35351 = torch_c.from_builtin_tensor %35350 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35352 = torch_c.to_builtin_tensor %35119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32963 = arith.constant 1 : index
    %dim_32964 = tensor.dim %35352, %c1_32963 : tensor<4x?x4096xf16>
    %35353 = flow.tensor.transfer %35352 : tensor<4x?x4096xf16>{%dim_32964} to #hal.device.promise<@__device_7>
    %35354 = torch_c.from_builtin_tensor %35353 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35355 = torch_c.to_builtin_tensor %35125 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32965 = arith.constant 1 : index
    %dim_32966 = tensor.dim %35355, %c1_32965 : tensor<4x?x4096xf16>
    %35356 = flow.tensor.transfer %35355 : tensor<4x?x4096xf16>{%dim_32966} to #hal.device.promise<@__device_7>
    %35357 = torch_c.from_builtin_tensor %35356 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35358 = torch_c.to_builtin_tensor %35131 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32967 = arith.constant 1 : index
    %dim_32968 = tensor.dim %35358, %c1_32967 : tensor<4x?x4096xf16>
    %35359 = flow.tensor.transfer %35358 : tensor<4x?x4096xf16>{%dim_32968} to #hal.device.promise<@__device_7>
    %35360 = torch_c.from_builtin_tensor %35359 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35361 = torch_c.to_builtin_tensor %35137 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32969 = arith.constant 1 : index
    %dim_32970 = tensor.dim %35361, %c1_32969 : tensor<4x?x4096xf16>
    %35362 = flow.tensor.transfer %35361 : tensor<4x?x4096xf16>{%dim_32970} to #hal.device.promise<@__device_7>
    %35363 = torch_c.from_builtin_tensor %35362 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35364 = torch_c.to_builtin_tensor %35143 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_32971 = arith.constant 1 : index
    %dim_32972 = tensor.dim %35364, %c1_32971 : tensor<4x?x4096xf16>
    %35365 = flow.tensor.transfer %35364 : tensor<4x?x4096xf16>{%dim_32972} to #hal.device.promise<@__device_7>
    %35366 = torch_c.from_builtin_tensor %35365 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32973 = torch.constant.int 1
    %35367 = torch.aten.add.Tensor %35348, %35351, %int1_32973 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32974 = torch.constant.int 1
    %35368 = torch.aten.add.Tensor %35367, %35354, %int1_32974 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32975 = torch.constant.int 1
    %35369 = torch.aten.add.Tensor %35368, %35357, %int1_32975 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32976 = torch.constant.int 1
    %35370 = torch.aten.add.Tensor %35369, %35360, %int1_32976 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32977 = torch.constant.int 1
    %35371 = torch.aten.add.Tensor %35370, %35363, %int1_32977 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32978 = torch.constant.int 1
    %35372 = torch.aten.add.Tensor %35371, %35366, %int1_32978 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32979 = torch.constant.int 1
    %35373 = torch.aten.add.Tensor %35372, %35149, %int1_32979 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32980 = torch.constant.int 1
    %35374 = torch.aten.add.Tensor %34033, %35177, %int1_32980 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32981 = torch.constant.int 1
    %35375 = torch.aten.add.Tensor %34034, %35205, %int1_32981 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32982 = torch.constant.int 1
    %35376 = torch.aten.add.Tensor %34035, %35233, %int1_32982 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32983 = torch.constant.int 1
    %35377 = torch.aten.add.Tensor %34036, %35261, %int1_32983 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32984 = torch.constant.int 1
    %35378 = torch.aten.add.Tensor %34037, %35289, %int1_32984 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32985 = torch.constant.int 1
    %35379 = torch.aten.add.Tensor %34038, %35317, %int1_32985 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32986 = torch.constant.int 1
    %35380 = torch.aten.add.Tensor %34039, %35345, %int1_32986 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_32987 = torch.constant.int 1
    %35381 = torch.aten.add.Tensor %34040, %35373, %int1_32987 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_32988 = torch.constant.int 6
    %35382 = torch.prims.convert_element_type %35374, %int6_32988 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32989 = torch.constant.int 6
    %35383 = torch.prims.convert_element_type %35375, %int6_32989 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32990 = torch.constant.int 6
    %35384 = torch.prims.convert_element_type %35376, %int6_32990 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32991 = torch.constant.int 6
    %35385 = torch.prims.convert_element_type %35377, %int6_32991 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32992 = torch.constant.int 6
    %35386 = torch.prims.convert_element_type %35378, %int6_32992 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32993 = torch.constant.int 6
    %35387 = torch.prims.convert_element_type %35379, %int6_32993 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32994 = torch.constant.int 6
    %35388 = torch.prims.convert_element_type %35380, %int6_32994 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_32995 = torch.constant.int 6
    %35389 = torch.prims.convert_element_type %35381, %int6_32995 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_32996 = torch.constant.int 2
    %35390 = torch.aten.pow.Tensor_Scalar %35382, %int2_32996 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_32997 = torch.constant.int 2
    %35391 = torch.aten.pow.Tensor_Scalar %35383, %int2_32997 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_32998 = torch.constant.int 2
    %35392 = torch.aten.pow.Tensor_Scalar %35384, %int2_32998 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_32999 = torch.constant.int 2
    %35393 = torch.aten.pow.Tensor_Scalar %35385, %int2_32999 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33000 = torch.constant.int 2
    %35394 = torch.aten.pow.Tensor_Scalar %35386, %int2_33000 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33001 = torch.constant.int 2
    %35395 = torch.aten.pow.Tensor_Scalar %35387, %int2_33001 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33002 = torch.constant.int 2
    %35396 = torch.aten.pow.Tensor_Scalar %35388, %int2_33002 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33003 = torch.constant.int 2
    %35397 = torch.aten.pow.Tensor_Scalar %35389, %int2_33003 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_33004 = torch.constant.int -1
    %35398 = torch.prim.ListConstruct %int-1_33004 : (!torch.int) -> !torch.list<int>
    %true_33005 = torch.constant.bool true
    %none_33006 = torch.constant.none
    %35399 = torch.aten.mean.dim %35390, %35398, %true_33005, %none_33006 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33007 = torch.constant.int -1
    %35400 = torch.prim.ListConstruct %int-1_33007 : (!torch.int) -> !torch.list<int>
    %true_33008 = torch.constant.bool true
    %none_33009 = torch.constant.none
    %35401 = torch.aten.mean.dim %35391, %35400, %true_33008, %none_33009 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33010 = torch.constant.int -1
    %35402 = torch.prim.ListConstruct %int-1_33010 : (!torch.int) -> !torch.list<int>
    %true_33011 = torch.constant.bool true
    %none_33012 = torch.constant.none
    %35403 = torch.aten.mean.dim %35392, %35402, %true_33011, %none_33012 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33013 = torch.constant.int -1
    %35404 = torch.prim.ListConstruct %int-1_33013 : (!torch.int) -> !torch.list<int>
    %true_33014 = torch.constant.bool true
    %none_33015 = torch.constant.none
    %35405 = torch.aten.mean.dim %35393, %35404, %true_33014, %none_33015 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33016 = torch.constant.int -1
    %35406 = torch.prim.ListConstruct %int-1_33016 : (!torch.int) -> !torch.list<int>
    %true_33017 = torch.constant.bool true
    %none_33018 = torch.constant.none
    %35407 = torch.aten.mean.dim %35394, %35406, %true_33017, %none_33018 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33019 = torch.constant.int -1
    %35408 = torch.prim.ListConstruct %int-1_33019 : (!torch.int) -> !torch.list<int>
    %true_33020 = torch.constant.bool true
    %none_33021 = torch.constant.none
    %35409 = torch.aten.mean.dim %35395, %35408, %true_33020, %none_33021 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33022 = torch.constant.int -1
    %35410 = torch.prim.ListConstruct %int-1_33022 : (!torch.int) -> !torch.list<int>
    %true_33023 = torch.constant.bool true
    %none_33024 = torch.constant.none
    %35411 = torch.aten.mean.dim %35396, %35410, %true_33023, %none_33024 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33025 = torch.constant.int -1
    %35412 = torch.prim.ListConstruct %int-1_33025 : (!torch.int) -> !torch.list<int>
    %true_33026 = torch.constant.bool true
    %none_33027 = torch.constant.none
    %35413 = torch.aten.mean.dim %35397, %35412, %true_33026, %none_33027 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33028 = torch.constant.float 9.9999997473787516E-6
    %int1_33029 = torch.constant.int 1
    %35414 = torch.aten.add.Scalar %35399, %float9.999990e-06_33028, %int1_33029 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33030 = torch.constant.float 9.9999997473787516E-6
    %int1_33031 = torch.constant.int 1
    %35415 = torch.aten.add.Scalar %35401, %float9.999990e-06_33030, %int1_33031 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33032 = torch.constant.float 9.9999997473787516E-6
    %int1_33033 = torch.constant.int 1
    %35416 = torch.aten.add.Scalar %35403, %float9.999990e-06_33032, %int1_33033 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33034 = torch.constant.float 9.9999997473787516E-6
    %int1_33035 = torch.constant.int 1
    %35417 = torch.aten.add.Scalar %35405, %float9.999990e-06_33034, %int1_33035 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33036 = torch.constant.float 9.9999997473787516E-6
    %int1_33037 = torch.constant.int 1
    %35418 = torch.aten.add.Scalar %35407, %float9.999990e-06_33036, %int1_33037 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33038 = torch.constant.float 9.9999997473787516E-6
    %int1_33039 = torch.constant.int 1
    %35419 = torch.aten.add.Scalar %35409, %float9.999990e-06_33038, %int1_33039 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33040 = torch.constant.float 9.9999997473787516E-6
    %int1_33041 = torch.constant.int 1
    %35420 = torch.aten.add.Scalar %35411, %float9.999990e-06_33040, %int1_33041 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33042 = torch.constant.float 9.9999997473787516E-6
    %int1_33043 = torch.constant.int 1
    %35421 = torch.aten.add.Scalar %35413, %float9.999990e-06_33042, %int1_33043 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35422 = torch.aten.rsqrt %35414 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35423 = torch.aten.rsqrt %35415 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35424 = torch.aten.rsqrt %35416 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35425 = torch.aten.rsqrt %35417 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35426 = torch.aten.rsqrt %35418 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35427 = torch.aten.rsqrt %35419 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35428 = torch.aten.rsqrt %35420 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35429 = torch.aten.rsqrt %35421 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35430 = torch.aten.mul.Tensor %35382, %35422 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35431 = torch.aten.mul.Tensor %35383, %35423 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35432 = torch.aten.mul.Tensor %35384, %35424 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35433 = torch.aten.mul.Tensor %35385, %35425 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35434 = torch.aten.mul.Tensor %35386, %35426 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35435 = torch.aten.mul.Tensor %35387, %35427 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35436 = torch.aten.mul.Tensor %35388, %35428 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35437 = torch.aten.mul.Tensor %35389, %35429 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35438 = torch.aten.mul.Tensor %1272, %35430 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35439 = torch.aten.mul.Tensor %1273, %35431 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35440 = torch.aten.mul.Tensor %1274, %35432 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35441 = torch.aten.mul.Tensor %1275, %35433 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35442 = torch.aten.mul.Tensor %1276, %35434 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35443 = torch.aten.mul.Tensor %1277, %35435 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35444 = torch.aten.mul.Tensor %1278, %35436 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35445 = torch.aten.mul.Tensor %1279, %35437 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_33044 = torch.constant.int 5
    %35446 = torch.prims.convert_element_type %35438, %int5_33044 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33045 = torch.constant.int 5
    %35447 = torch.prims.convert_element_type %35439, %int5_33045 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33046 = torch.constant.int 5
    %35448 = torch.prims.convert_element_type %35440, %int5_33046 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33047 = torch.constant.int 5
    %35449 = torch.prims.convert_element_type %35441, %int5_33047 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33048 = torch.constant.int 5
    %35450 = torch.prims.convert_element_type %35442, %int5_33048 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33049 = torch.constant.int 5
    %35451 = torch.prims.convert_element_type %35443, %int5_33049 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33050 = torch.constant.int 5
    %35452 = torch.prims.convert_element_type %35444, %int5_33050 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33051 = torch.constant.int 5
    %35453 = torch.prims.convert_element_type %35445, %int5_33051 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33052 = torch.constant.int 1
    %int0_33053 = torch.constant.int 0
    %35454 = torch.prim.ListConstruct %int1_33052, %int0_33053 : (!torch.int, !torch.int) -> !torch.list<int>
    %35455 = torch.aten.permute %1280, %35454 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33054 = torch.constant.int 1
    %int0_33055 = torch.constant.int 0
    %35456 = torch.prim.ListConstruct %int1_33054, %int0_33055 : (!torch.int, !torch.int) -> !torch.list<int>
    %35457 = torch.aten.permute %1281, %35456 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33056 = torch.constant.int 1
    %int0_33057 = torch.constant.int 0
    %35458 = torch.prim.ListConstruct %int1_33056, %int0_33057 : (!torch.int, !torch.int) -> !torch.list<int>
    %35459 = torch.aten.permute %1282, %35458 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33058 = torch.constant.int 1
    %int0_33059 = torch.constant.int 0
    %35460 = torch.prim.ListConstruct %int1_33058, %int0_33059 : (!torch.int, !torch.int) -> !torch.list<int>
    %35461 = torch.aten.permute %1283, %35460 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33060 = torch.constant.int 1
    %int0_33061 = torch.constant.int 0
    %35462 = torch.prim.ListConstruct %int1_33060, %int0_33061 : (!torch.int, !torch.int) -> !torch.list<int>
    %35463 = torch.aten.permute %1284, %35462 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33062 = torch.constant.int 1
    %int0_33063 = torch.constant.int 0
    %35464 = torch.prim.ListConstruct %int1_33062, %int0_33063 : (!torch.int, !torch.int) -> !torch.list<int>
    %35465 = torch.aten.permute %1285, %35464 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33064 = torch.constant.int 1
    %int0_33065 = torch.constant.int 0
    %35466 = torch.prim.ListConstruct %int1_33064, %int0_33065 : (!torch.int, !torch.int) -> !torch.list<int>
    %35467 = torch.aten.permute %1286, %35466 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33066 = torch.constant.int 1
    %int0_33067 = torch.constant.int 0
    %35468 = torch.prim.ListConstruct %int1_33066, %int0_33067 : (!torch.int, !torch.int) -> !torch.list<int>
    %35469 = torch.aten.permute %1287, %35468 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_33068 = torch.constant.int 4
    %35470 = torch.aten.mul.int %int4_33068, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33069 = torch.constant.int 4096
    %35471 = torch.prim.ListConstruct %35470, %int4096_33069 : (!torch.int, !torch.int) -> !torch.list<int>
    %35472 = torch.aten.view %35446, %35471 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35472, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35473 = torch.aten.mm %35472, %35455 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35473, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33070 = torch.constant.int 4
    %int1792_33071 = torch.constant.int 1792
    %35474 = torch.prim.ListConstruct %int4_33070, %2482, %int1792_33071 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35475 = torch.aten.view %35473, %35474 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33072 = torch.constant.int 4
    %35476 = torch.aten.mul.int %int4_33072, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33073 = torch.constant.int 4096
    %35477 = torch.prim.ListConstruct %35476, %int4096_33073 : (!torch.int, !torch.int) -> !torch.list<int>
    %35478 = torch.aten.view %35447, %35477 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35478, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35479 = torch.aten.mm %35478, %35457 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35479, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33074 = torch.constant.int 4
    %int1792_33075 = torch.constant.int 1792
    %35480 = torch.prim.ListConstruct %int4_33074, %2482, %int1792_33075 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35481 = torch.aten.view %35479, %35480 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33076 = torch.constant.int 4
    %35482 = torch.aten.mul.int %int4_33076, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33077 = torch.constant.int 4096
    %35483 = torch.prim.ListConstruct %35482, %int4096_33077 : (!torch.int, !torch.int) -> !torch.list<int>
    %35484 = torch.aten.view %35448, %35483 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35484, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35485 = torch.aten.mm %35484, %35459 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35485, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33078 = torch.constant.int 4
    %int1792_33079 = torch.constant.int 1792
    %35486 = torch.prim.ListConstruct %int4_33078, %2482, %int1792_33079 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35487 = torch.aten.view %35485, %35486 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33080 = torch.constant.int 4
    %35488 = torch.aten.mul.int %int4_33080, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33081 = torch.constant.int 4096
    %35489 = torch.prim.ListConstruct %35488, %int4096_33081 : (!torch.int, !torch.int) -> !torch.list<int>
    %35490 = torch.aten.view %35449, %35489 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35490, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35491 = torch.aten.mm %35490, %35461 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35491, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33082 = torch.constant.int 4
    %int1792_33083 = torch.constant.int 1792
    %35492 = torch.prim.ListConstruct %int4_33082, %2482, %int1792_33083 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35493 = torch.aten.view %35491, %35492 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33084 = torch.constant.int 4
    %35494 = torch.aten.mul.int %int4_33084, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33085 = torch.constant.int 4096
    %35495 = torch.prim.ListConstruct %35494, %int4096_33085 : (!torch.int, !torch.int) -> !torch.list<int>
    %35496 = torch.aten.view %35450, %35495 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35496, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35497 = torch.aten.mm %35496, %35463 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35497, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33086 = torch.constant.int 4
    %int1792_33087 = torch.constant.int 1792
    %35498 = torch.prim.ListConstruct %int4_33086, %2482, %int1792_33087 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35499 = torch.aten.view %35497, %35498 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33088 = torch.constant.int 4
    %35500 = torch.aten.mul.int %int4_33088, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33089 = torch.constant.int 4096
    %35501 = torch.prim.ListConstruct %35500, %int4096_33089 : (!torch.int, !torch.int) -> !torch.list<int>
    %35502 = torch.aten.view %35451, %35501 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35502, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35503 = torch.aten.mm %35502, %35465 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35503, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33090 = torch.constant.int 4
    %int1792_33091 = torch.constant.int 1792
    %35504 = torch.prim.ListConstruct %int4_33090, %2482, %int1792_33091 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35505 = torch.aten.view %35503, %35504 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33092 = torch.constant.int 4
    %35506 = torch.aten.mul.int %int4_33092, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33093 = torch.constant.int 4096
    %35507 = torch.prim.ListConstruct %35506, %int4096_33093 : (!torch.int, !torch.int) -> !torch.list<int>
    %35508 = torch.aten.view %35452, %35507 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35508, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35509 = torch.aten.mm %35508, %35467 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35509, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33094 = torch.constant.int 4
    %int1792_33095 = torch.constant.int 1792
    %35510 = torch.prim.ListConstruct %int4_33094, %2482, %int1792_33095 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35511 = torch.aten.view %35509, %35510 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33096 = torch.constant.int 4
    %35512 = torch.aten.mul.int %int4_33096, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33097 = torch.constant.int 4096
    %35513 = torch.prim.ListConstruct %35512, %int4096_33097 : (!torch.int, !torch.int) -> !torch.list<int>
    %35514 = torch.aten.view %35453, %35513 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35514, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35515 = torch.aten.mm %35514, %35469 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35515, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33098 = torch.constant.int 4
    %int1792_33099 = torch.constant.int 1792
    %35516 = torch.prim.ListConstruct %int4_33098, %2482, %int1792_33099 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35517 = torch.aten.view %35515, %35516 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35518 = torch.aten.silu %35475 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35519 = torch.aten.silu %35481 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35520 = torch.aten.silu %35487 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35521 = torch.aten.silu %35493 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35522 = torch.aten.silu %35499 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35523 = torch.aten.silu %35505 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35524 = torch.aten.silu %35511 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35525 = torch.aten.silu %35517 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_33100 = torch.constant.int 1
    %int0_33101 = torch.constant.int 0
    %35526 = torch.prim.ListConstruct %int1_33100, %int0_33101 : (!torch.int, !torch.int) -> !torch.list<int>
    %35527 = torch.aten.permute %1288, %35526 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33102 = torch.constant.int 1
    %int0_33103 = torch.constant.int 0
    %35528 = torch.prim.ListConstruct %int1_33102, %int0_33103 : (!torch.int, !torch.int) -> !torch.list<int>
    %35529 = torch.aten.permute %1289, %35528 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33104 = torch.constant.int 1
    %int0_33105 = torch.constant.int 0
    %35530 = torch.prim.ListConstruct %int1_33104, %int0_33105 : (!torch.int, !torch.int) -> !torch.list<int>
    %35531 = torch.aten.permute %1290, %35530 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33106 = torch.constant.int 1
    %int0_33107 = torch.constant.int 0
    %35532 = torch.prim.ListConstruct %int1_33106, %int0_33107 : (!torch.int, !torch.int) -> !torch.list<int>
    %35533 = torch.aten.permute %1291, %35532 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33108 = torch.constant.int 1
    %int0_33109 = torch.constant.int 0
    %35534 = torch.prim.ListConstruct %int1_33108, %int0_33109 : (!torch.int, !torch.int) -> !torch.list<int>
    %35535 = torch.aten.permute %1292, %35534 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33110 = torch.constant.int 1
    %int0_33111 = torch.constant.int 0
    %35536 = torch.prim.ListConstruct %int1_33110, %int0_33111 : (!torch.int, !torch.int) -> !torch.list<int>
    %35537 = torch.aten.permute %1293, %35536 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33112 = torch.constant.int 1
    %int0_33113 = torch.constant.int 0
    %35538 = torch.prim.ListConstruct %int1_33112, %int0_33113 : (!torch.int, !torch.int) -> !torch.list<int>
    %35539 = torch.aten.permute %1294, %35538 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_33114 = torch.constant.int 1
    %int0_33115 = torch.constant.int 0
    %35540 = torch.prim.ListConstruct %int1_33114, %int0_33115 : (!torch.int, !torch.int) -> !torch.list<int>
    %35541 = torch.aten.permute %1295, %35540 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_33116 = torch.constant.int 4
    %35542 = torch.aten.mul.int %int4_33116, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33117 = torch.constant.int 4096
    %35543 = torch.prim.ListConstruct %35542, %int4096_33117 : (!torch.int, !torch.int) -> !torch.list<int>
    %35544 = torch.aten.view %35446, %35543 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35544, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35545 = torch.aten.mm %35544, %35527 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35545, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33118 = torch.constant.int 4
    %int1792_33119 = torch.constant.int 1792
    %35546 = torch.prim.ListConstruct %int4_33118, %2482, %int1792_33119 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35547 = torch.aten.view %35545, %35546 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33120 = torch.constant.int 4
    %35548 = torch.aten.mul.int %int4_33120, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33121 = torch.constant.int 4096
    %35549 = torch.prim.ListConstruct %35548, %int4096_33121 : (!torch.int, !torch.int) -> !torch.list<int>
    %35550 = torch.aten.view %35447, %35549 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35550, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35551 = torch.aten.mm %35550, %35529 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35551, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33122 = torch.constant.int 4
    %int1792_33123 = torch.constant.int 1792
    %35552 = torch.prim.ListConstruct %int4_33122, %2482, %int1792_33123 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35553 = torch.aten.view %35551, %35552 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33124 = torch.constant.int 4
    %35554 = torch.aten.mul.int %int4_33124, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33125 = torch.constant.int 4096
    %35555 = torch.prim.ListConstruct %35554, %int4096_33125 : (!torch.int, !torch.int) -> !torch.list<int>
    %35556 = torch.aten.view %35448, %35555 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35556, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35557 = torch.aten.mm %35556, %35531 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35557, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33126 = torch.constant.int 4
    %int1792_33127 = torch.constant.int 1792
    %35558 = torch.prim.ListConstruct %int4_33126, %2482, %int1792_33127 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35559 = torch.aten.view %35557, %35558 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33128 = torch.constant.int 4
    %35560 = torch.aten.mul.int %int4_33128, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33129 = torch.constant.int 4096
    %35561 = torch.prim.ListConstruct %35560, %int4096_33129 : (!torch.int, !torch.int) -> !torch.list<int>
    %35562 = torch.aten.view %35449, %35561 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35562, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35563 = torch.aten.mm %35562, %35533 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35563, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33130 = torch.constant.int 4
    %int1792_33131 = torch.constant.int 1792
    %35564 = torch.prim.ListConstruct %int4_33130, %2482, %int1792_33131 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35565 = torch.aten.view %35563, %35564 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33132 = torch.constant.int 4
    %35566 = torch.aten.mul.int %int4_33132, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33133 = torch.constant.int 4096
    %35567 = torch.prim.ListConstruct %35566, %int4096_33133 : (!torch.int, !torch.int) -> !torch.list<int>
    %35568 = torch.aten.view %35450, %35567 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35568, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35569 = torch.aten.mm %35568, %35535 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35569, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33134 = torch.constant.int 4
    %int1792_33135 = torch.constant.int 1792
    %35570 = torch.prim.ListConstruct %int4_33134, %2482, %int1792_33135 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35571 = torch.aten.view %35569, %35570 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33136 = torch.constant.int 4
    %35572 = torch.aten.mul.int %int4_33136, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33137 = torch.constant.int 4096
    %35573 = torch.prim.ListConstruct %35572, %int4096_33137 : (!torch.int, !torch.int) -> !torch.list<int>
    %35574 = torch.aten.view %35451, %35573 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35574, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35575 = torch.aten.mm %35574, %35537 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35575, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33138 = torch.constant.int 4
    %int1792_33139 = torch.constant.int 1792
    %35576 = torch.prim.ListConstruct %int4_33138, %2482, %int1792_33139 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35577 = torch.aten.view %35575, %35576 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33140 = torch.constant.int 4
    %35578 = torch.aten.mul.int %int4_33140, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33141 = torch.constant.int 4096
    %35579 = torch.prim.ListConstruct %35578, %int4096_33141 : (!torch.int, !torch.int) -> !torch.list<int>
    %35580 = torch.aten.view %35452, %35579 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35580, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35581 = torch.aten.mm %35580, %35539 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35581, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33142 = torch.constant.int 4
    %int1792_33143 = torch.constant.int 1792
    %35582 = torch.prim.ListConstruct %int4_33142, %2482, %int1792_33143 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35583 = torch.aten.view %35581, %35582 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_33144 = torch.constant.int 4
    %35584 = torch.aten.mul.int %int4_33144, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33145 = torch.constant.int 4096
    %35585 = torch.prim.ListConstruct %35584, %int4096_33145 : (!torch.int, !torch.int) -> !torch.list<int>
    %35586 = torch.aten.view %35453, %35585 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35586, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35587 = torch.aten.mm %35586, %35541 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35587, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_33146 = torch.constant.int 4
    %int1792_33147 = torch.constant.int 1792
    %35588 = torch.prim.ListConstruct %int4_33146, %2482, %int1792_33147 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35589 = torch.aten.view %35587, %35588 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35590 = torch.aten.mul.Tensor %35518, %35547 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35591 = torch.aten.mul.Tensor %35519, %35553 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35592 = torch.aten.mul.Tensor %35520, %35559 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35593 = torch.aten.mul.Tensor %35521, %35565 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35594 = torch.aten.mul.Tensor %35522, %35571 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35595 = torch.aten.mul.Tensor %35523, %35577 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35596 = torch.aten.mul.Tensor %35524, %35583 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %35597 = torch.aten.mul.Tensor %35525, %35589 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %35597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_33148 = torch.constant.int 1
    %int0_33149 = torch.constant.int 0
    %35598 = torch.prim.ListConstruct %int1_33148, %int0_33149 : (!torch.int, !torch.int) -> !torch.list<int>
    %35599 = torch.aten.permute %1296, %35598 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33150 = torch.constant.int 1
    %int0_33151 = torch.constant.int 0
    %35600 = torch.prim.ListConstruct %int1_33150, %int0_33151 : (!torch.int, !torch.int) -> !torch.list<int>
    %35601 = torch.aten.permute %1297, %35600 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33152 = torch.constant.int 1
    %int0_33153 = torch.constant.int 0
    %35602 = torch.prim.ListConstruct %int1_33152, %int0_33153 : (!torch.int, !torch.int) -> !torch.list<int>
    %35603 = torch.aten.permute %1298, %35602 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33154 = torch.constant.int 1
    %int0_33155 = torch.constant.int 0
    %35604 = torch.prim.ListConstruct %int1_33154, %int0_33155 : (!torch.int, !torch.int) -> !torch.list<int>
    %35605 = torch.aten.permute %1299, %35604 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33156 = torch.constant.int 1
    %int0_33157 = torch.constant.int 0
    %35606 = torch.prim.ListConstruct %int1_33156, %int0_33157 : (!torch.int, !torch.int) -> !torch.list<int>
    %35607 = torch.aten.permute %1300, %35606 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33158 = torch.constant.int 1
    %int0_33159 = torch.constant.int 0
    %35608 = torch.prim.ListConstruct %int1_33158, %int0_33159 : (!torch.int, !torch.int) -> !torch.list<int>
    %35609 = torch.aten.permute %1301, %35608 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33160 = torch.constant.int 1
    %int0_33161 = torch.constant.int 0
    %35610 = torch.prim.ListConstruct %int1_33160, %int0_33161 : (!torch.int, !torch.int) -> !torch.list<int>
    %35611 = torch.aten.permute %1302, %35610 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33162 = torch.constant.int 1
    %int0_33163 = torch.constant.int 0
    %35612 = torch.prim.ListConstruct %int1_33162, %int0_33163 : (!torch.int, !torch.int) -> !torch.list<int>
    %35613 = torch.aten.permute %1303, %35612 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_33164 = torch.constant.int 1
    %35614 = torch.aten.size.int %35475, %int1_33164 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33165 = torch.constant.int 4
    %35615 = torch.aten.mul.int %int4_33165, %35614 : !torch.int, !torch.int -> !torch.int
    %int1792_33166 = torch.constant.int 1792
    %35616 = torch.prim.ListConstruct %35615, %int1792_33166 : (!torch.int, !torch.int) -> !torch.list<int>
    %35617 = torch.aten.view %35590, %35616 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35617, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35618 = torch.aten.mm %35617, %35599 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35618, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33167 = torch.constant.int 4
    %int4096_33168 = torch.constant.int 4096
    %35619 = torch.prim.ListConstruct %int4_33167, %35614, %int4096_33168 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35620 = torch.aten.view %35618, %35619 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33169 = torch.constant.int 1
    %35621 = torch.aten.size.int %35481, %int1_33169 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33170 = torch.constant.int 4
    %35622 = torch.aten.mul.int %int4_33170, %35621 : !torch.int, !torch.int -> !torch.int
    %int1792_33171 = torch.constant.int 1792
    %35623 = torch.prim.ListConstruct %35622, %int1792_33171 : (!torch.int, !torch.int) -> !torch.list<int>
    %35624 = torch.aten.view %35591, %35623 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35624, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35625 = torch.aten.mm %35624, %35601 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35625, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33172 = torch.constant.int 4
    %int4096_33173 = torch.constant.int 4096
    %35626 = torch.prim.ListConstruct %int4_33172, %35621, %int4096_33173 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35627 = torch.aten.view %35625, %35626 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33174 = torch.constant.int 1
    %35628 = torch.aten.size.int %35487, %int1_33174 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33175 = torch.constant.int 4
    %35629 = torch.aten.mul.int %int4_33175, %35628 : !torch.int, !torch.int -> !torch.int
    %int1792_33176 = torch.constant.int 1792
    %35630 = torch.prim.ListConstruct %35629, %int1792_33176 : (!torch.int, !torch.int) -> !torch.list<int>
    %35631 = torch.aten.view %35592, %35630 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35631, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35632 = torch.aten.mm %35631, %35603 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35632, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33177 = torch.constant.int 4
    %int4096_33178 = torch.constant.int 4096
    %35633 = torch.prim.ListConstruct %int4_33177, %35628, %int4096_33178 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35634 = torch.aten.view %35632, %35633 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33179 = torch.constant.int 1
    %35635 = torch.aten.size.int %35493, %int1_33179 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33180 = torch.constant.int 4
    %35636 = torch.aten.mul.int %int4_33180, %35635 : !torch.int, !torch.int -> !torch.int
    %int1792_33181 = torch.constant.int 1792
    %35637 = torch.prim.ListConstruct %35636, %int1792_33181 : (!torch.int, !torch.int) -> !torch.list<int>
    %35638 = torch.aten.view %35593, %35637 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35638, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35639 = torch.aten.mm %35638, %35605 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35639, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33182 = torch.constant.int 4
    %int4096_33183 = torch.constant.int 4096
    %35640 = torch.prim.ListConstruct %int4_33182, %35635, %int4096_33183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35641 = torch.aten.view %35639, %35640 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33184 = torch.constant.int 1
    %35642 = torch.aten.size.int %35499, %int1_33184 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33185 = torch.constant.int 4
    %35643 = torch.aten.mul.int %int4_33185, %35642 : !torch.int, !torch.int -> !torch.int
    %int1792_33186 = torch.constant.int 1792
    %35644 = torch.prim.ListConstruct %35643, %int1792_33186 : (!torch.int, !torch.int) -> !torch.list<int>
    %35645 = torch.aten.view %35594, %35644 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35645, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35646 = torch.aten.mm %35645, %35607 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35646, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33187 = torch.constant.int 4
    %int4096_33188 = torch.constant.int 4096
    %35647 = torch.prim.ListConstruct %int4_33187, %35642, %int4096_33188 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35648 = torch.aten.view %35646, %35647 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33189 = torch.constant.int 1
    %35649 = torch.aten.size.int %35505, %int1_33189 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33190 = torch.constant.int 4
    %35650 = torch.aten.mul.int %int4_33190, %35649 : !torch.int, !torch.int -> !torch.int
    %int1792_33191 = torch.constant.int 1792
    %35651 = torch.prim.ListConstruct %35650, %int1792_33191 : (!torch.int, !torch.int) -> !torch.list<int>
    %35652 = torch.aten.view %35595, %35651 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35652, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35653 = torch.aten.mm %35652, %35609 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35653, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33192 = torch.constant.int 4
    %int4096_33193 = torch.constant.int 4096
    %35654 = torch.prim.ListConstruct %int4_33192, %35649, %int4096_33193 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35655 = torch.aten.view %35653, %35654 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33194 = torch.constant.int 1
    %35656 = torch.aten.size.int %35511, %int1_33194 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33195 = torch.constant.int 4
    %35657 = torch.aten.mul.int %int4_33195, %35656 : !torch.int, !torch.int -> !torch.int
    %int1792_33196 = torch.constant.int 1792
    %35658 = torch.prim.ListConstruct %35657, %int1792_33196 : (!torch.int, !torch.int) -> !torch.list<int>
    %35659 = torch.aten.view %35596, %35658 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35659, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35660 = torch.aten.mm %35659, %35611 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35660, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33197 = torch.constant.int 4
    %int4096_33198 = torch.constant.int 4096
    %35661 = torch.prim.ListConstruct %int4_33197, %35656, %int4096_33198 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35662 = torch.aten.view %35660, %35661 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33199 = torch.constant.int 1
    %35663 = torch.aten.size.int %35517, %int1_33199 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_33200 = torch.constant.int 4
    %35664 = torch.aten.mul.int %int4_33200, %35663 : !torch.int, !torch.int -> !torch.int
    %int1792_33201 = torch.constant.int 1792
    %35665 = torch.prim.ListConstruct %35664, %int1792_33201 : (!torch.int, !torch.int) -> !torch.list<int>
    %35666 = torch.aten.view %35597, %35665 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %35666, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %35667 = torch.aten.mm %35666, %35613 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35667, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_33202 = torch.constant.int 4
    %int4096_33203 = torch.constant.int 4096
    %35668 = torch.prim.ListConstruct %int4_33202, %35663, %int4096_33203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35669 = torch.aten.view %35667, %35668 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35670 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33204 = arith.constant 1 : index
    %dim_33205 = tensor.dim %35670, %c1_33204 : tensor<4x?x4096xf16>
    %35671 = flow.tensor.transfer %35670 : tensor<4x?x4096xf16>{%dim_33205} to #hal.device.promise<@__device_0>
    %35672 = torch_c.from_builtin_tensor %35671 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35673 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33206 = arith.constant 1 : index
    %dim_33207 = tensor.dim %35673, %c1_33206 : tensor<4x?x4096xf16>
    %35674 = flow.tensor.transfer %35673 : tensor<4x?x4096xf16>{%dim_33207} to #hal.device.promise<@__device_0>
    %35675 = torch_c.from_builtin_tensor %35674 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35676 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33208 = arith.constant 1 : index
    %dim_33209 = tensor.dim %35676, %c1_33208 : tensor<4x?x4096xf16>
    %35677 = flow.tensor.transfer %35676 : tensor<4x?x4096xf16>{%dim_33209} to #hal.device.promise<@__device_0>
    %35678 = torch_c.from_builtin_tensor %35677 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35679 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33210 = arith.constant 1 : index
    %dim_33211 = tensor.dim %35679, %c1_33210 : tensor<4x?x4096xf16>
    %35680 = flow.tensor.transfer %35679 : tensor<4x?x4096xf16>{%dim_33211} to #hal.device.promise<@__device_0>
    %35681 = torch_c.from_builtin_tensor %35680 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35682 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33212 = arith.constant 1 : index
    %dim_33213 = tensor.dim %35682, %c1_33212 : tensor<4x?x4096xf16>
    %35683 = flow.tensor.transfer %35682 : tensor<4x?x4096xf16>{%dim_33213} to #hal.device.promise<@__device_0>
    %35684 = torch_c.from_builtin_tensor %35683 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35685 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33214 = arith.constant 1 : index
    %dim_33215 = tensor.dim %35685, %c1_33214 : tensor<4x?x4096xf16>
    %35686 = flow.tensor.transfer %35685 : tensor<4x?x4096xf16>{%dim_33215} to #hal.device.promise<@__device_0>
    %35687 = torch_c.from_builtin_tensor %35686 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35688 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33216 = arith.constant 1 : index
    %dim_33217 = tensor.dim %35688, %c1_33216 : tensor<4x?x4096xf16>
    %35689 = flow.tensor.transfer %35688 : tensor<4x?x4096xf16>{%dim_33217} to #hal.device.promise<@__device_0>
    %35690 = torch_c.from_builtin_tensor %35689 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33218 = torch.constant.int 1
    %35691 = torch.aten.add.Tensor %35620, %35672, %int1_33218 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33219 = torch.constant.int 1
    %35692 = torch.aten.add.Tensor %35691, %35675, %int1_33219 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33220 = torch.constant.int 1
    %35693 = torch.aten.add.Tensor %35692, %35678, %int1_33220 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33221 = torch.constant.int 1
    %35694 = torch.aten.add.Tensor %35693, %35681, %int1_33221 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33222 = torch.constant.int 1
    %35695 = torch.aten.add.Tensor %35694, %35684, %int1_33222 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33223 = torch.constant.int 1
    %35696 = torch.aten.add.Tensor %35695, %35687, %int1_33223 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33224 = torch.constant.int 1
    %35697 = torch.aten.add.Tensor %35696, %35690, %int1_33224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35698 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33225 = arith.constant 1 : index
    %dim_33226 = tensor.dim %35698, %c1_33225 : tensor<4x?x4096xf16>
    %35699 = flow.tensor.transfer %35698 : tensor<4x?x4096xf16>{%dim_33226} to #hal.device.promise<@__device_1>
    %35700 = torch_c.from_builtin_tensor %35699 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35701 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33227 = arith.constant 1 : index
    %dim_33228 = tensor.dim %35701, %c1_33227 : tensor<4x?x4096xf16>
    %35702 = flow.tensor.transfer %35701 : tensor<4x?x4096xf16>{%dim_33228} to #hal.device.promise<@__device_1>
    %35703 = torch_c.from_builtin_tensor %35702 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35704 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33229 = arith.constant 1 : index
    %dim_33230 = tensor.dim %35704, %c1_33229 : tensor<4x?x4096xf16>
    %35705 = flow.tensor.transfer %35704 : tensor<4x?x4096xf16>{%dim_33230} to #hal.device.promise<@__device_1>
    %35706 = torch_c.from_builtin_tensor %35705 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35707 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33231 = arith.constant 1 : index
    %dim_33232 = tensor.dim %35707, %c1_33231 : tensor<4x?x4096xf16>
    %35708 = flow.tensor.transfer %35707 : tensor<4x?x4096xf16>{%dim_33232} to #hal.device.promise<@__device_1>
    %35709 = torch_c.from_builtin_tensor %35708 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35710 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33233 = arith.constant 1 : index
    %dim_33234 = tensor.dim %35710, %c1_33233 : tensor<4x?x4096xf16>
    %35711 = flow.tensor.transfer %35710 : tensor<4x?x4096xf16>{%dim_33234} to #hal.device.promise<@__device_1>
    %35712 = torch_c.from_builtin_tensor %35711 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35713 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33235 = arith.constant 1 : index
    %dim_33236 = tensor.dim %35713, %c1_33235 : tensor<4x?x4096xf16>
    %35714 = flow.tensor.transfer %35713 : tensor<4x?x4096xf16>{%dim_33236} to #hal.device.promise<@__device_1>
    %35715 = torch_c.from_builtin_tensor %35714 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35716 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33237 = arith.constant 1 : index
    %dim_33238 = tensor.dim %35716, %c1_33237 : tensor<4x?x4096xf16>
    %35717 = flow.tensor.transfer %35716 : tensor<4x?x4096xf16>{%dim_33238} to #hal.device.promise<@__device_1>
    %35718 = torch_c.from_builtin_tensor %35717 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33239 = torch.constant.int 1
    %35719 = torch.aten.add.Tensor %35700, %35627, %int1_33239 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33240 = torch.constant.int 1
    %35720 = torch.aten.add.Tensor %35719, %35703, %int1_33240 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33241 = torch.constant.int 1
    %35721 = torch.aten.add.Tensor %35720, %35706, %int1_33241 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33242 = torch.constant.int 1
    %35722 = torch.aten.add.Tensor %35721, %35709, %int1_33242 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33243 = torch.constant.int 1
    %35723 = torch.aten.add.Tensor %35722, %35712, %int1_33243 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33244 = torch.constant.int 1
    %35724 = torch.aten.add.Tensor %35723, %35715, %int1_33244 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33245 = torch.constant.int 1
    %35725 = torch.aten.add.Tensor %35724, %35718, %int1_33245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35726 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33246 = arith.constant 1 : index
    %dim_33247 = tensor.dim %35726, %c1_33246 : tensor<4x?x4096xf16>
    %35727 = flow.tensor.transfer %35726 : tensor<4x?x4096xf16>{%dim_33247} to #hal.device.promise<@__device_2>
    %35728 = torch_c.from_builtin_tensor %35727 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35729 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33248 = arith.constant 1 : index
    %dim_33249 = tensor.dim %35729, %c1_33248 : tensor<4x?x4096xf16>
    %35730 = flow.tensor.transfer %35729 : tensor<4x?x4096xf16>{%dim_33249} to #hal.device.promise<@__device_2>
    %35731 = torch_c.from_builtin_tensor %35730 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35732 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33250 = arith.constant 1 : index
    %dim_33251 = tensor.dim %35732, %c1_33250 : tensor<4x?x4096xf16>
    %35733 = flow.tensor.transfer %35732 : tensor<4x?x4096xf16>{%dim_33251} to #hal.device.promise<@__device_2>
    %35734 = torch_c.from_builtin_tensor %35733 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35735 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33252 = arith.constant 1 : index
    %dim_33253 = tensor.dim %35735, %c1_33252 : tensor<4x?x4096xf16>
    %35736 = flow.tensor.transfer %35735 : tensor<4x?x4096xf16>{%dim_33253} to #hal.device.promise<@__device_2>
    %35737 = torch_c.from_builtin_tensor %35736 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35738 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33254 = arith.constant 1 : index
    %dim_33255 = tensor.dim %35738, %c1_33254 : tensor<4x?x4096xf16>
    %35739 = flow.tensor.transfer %35738 : tensor<4x?x4096xf16>{%dim_33255} to #hal.device.promise<@__device_2>
    %35740 = torch_c.from_builtin_tensor %35739 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35741 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33256 = arith.constant 1 : index
    %dim_33257 = tensor.dim %35741, %c1_33256 : tensor<4x?x4096xf16>
    %35742 = flow.tensor.transfer %35741 : tensor<4x?x4096xf16>{%dim_33257} to #hal.device.promise<@__device_2>
    %35743 = torch_c.from_builtin_tensor %35742 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35744 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33258 = arith.constant 1 : index
    %dim_33259 = tensor.dim %35744, %c1_33258 : tensor<4x?x4096xf16>
    %35745 = flow.tensor.transfer %35744 : tensor<4x?x4096xf16>{%dim_33259} to #hal.device.promise<@__device_2>
    %35746 = torch_c.from_builtin_tensor %35745 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33260 = torch.constant.int 1
    %35747 = torch.aten.add.Tensor %35728, %35731, %int1_33260 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33261 = torch.constant.int 1
    %35748 = torch.aten.add.Tensor %35747, %35634, %int1_33261 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33262 = torch.constant.int 1
    %35749 = torch.aten.add.Tensor %35748, %35734, %int1_33262 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33263 = torch.constant.int 1
    %35750 = torch.aten.add.Tensor %35749, %35737, %int1_33263 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33264 = torch.constant.int 1
    %35751 = torch.aten.add.Tensor %35750, %35740, %int1_33264 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33265 = torch.constant.int 1
    %35752 = torch.aten.add.Tensor %35751, %35743, %int1_33265 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33266 = torch.constant.int 1
    %35753 = torch.aten.add.Tensor %35752, %35746, %int1_33266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35754 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33267 = arith.constant 1 : index
    %dim_33268 = tensor.dim %35754, %c1_33267 : tensor<4x?x4096xf16>
    %35755 = flow.tensor.transfer %35754 : tensor<4x?x4096xf16>{%dim_33268} to #hal.device.promise<@__device_3>
    %35756 = torch_c.from_builtin_tensor %35755 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35757 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33269 = arith.constant 1 : index
    %dim_33270 = tensor.dim %35757, %c1_33269 : tensor<4x?x4096xf16>
    %35758 = flow.tensor.transfer %35757 : tensor<4x?x4096xf16>{%dim_33270} to #hal.device.promise<@__device_3>
    %35759 = torch_c.from_builtin_tensor %35758 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35760 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33271 = arith.constant 1 : index
    %dim_33272 = tensor.dim %35760, %c1_33271 : tensor<4x?x4096xf16>
    %35761 = flow.tensor.transfer %35760 : tensor<4x?x4096xf16>{%dim_33272} to #hal.device.promise<@__device_3>
    %35762 = torch_c.from_builtin_tensor %35761 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35763 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33273 = arith.constant 1 : index
    %dim_33274 = tensor.dim %35763, %c1_33273 : tensor<4x?x4096xf16>
    %35764 = flow.tensor.transfer %35763 : tensor<4x?x4096xf16>{%dim_33274} to #hal.device.promise<@__device_3>
    %35765 = torch_c.from_builtin_tensor %35764 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35766 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33275 = arith.constant 1 : index
    %dim_33276 = tensor.dim %35766, %c1_33275 : tensor<4x?x4096xf16>
    %35767 = flow.tensor.transfer %35766 : tensor<4x?x4096xf16>{%dim_33276} to #hal.device.promise<@__device_3>
    %35768 = torch_c.from_builtin_tensor %35767 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35769 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33277 = arith.constant 1 : index
    %dim_33278 = tensor.dim %35769, %c1_33277 : tensor<4x?x4096xf16>
    %35770 = flow.tensor.transfer %35769 : tensor<4x?x4096xf16>{%dim_33278} to #hal.device.promise<@__device_3>
    %35771 = torch_c.from_builtin_tensor %35770 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35772 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33279 = arith.constant 1 : index
    %dim_33280 = tensor.dim %35772, %c1_33279 : tensor<4x?x4096xf16>
    %35773 = flow.tensor.transfer %35772 : tensor<4x?x4096xf16>{%dim_33280} to #hal.device.promise<@__device_3>
    %35774 = torch_c.from_builtin_tensor %35773 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33281 = torch.constant.int 1
    %35775 = torch.aten.add.Tensor %35756, %35759, %int1_33281 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33282 = torch.constant.int 1
    %35776 = torch.aten.add.Tensor %35775, %35762, %int1_33282 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33283 = torch.constant.int 1
    %35777 = torch.aten.add.Tensor %35776, %35641, %int1_33283 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33284 = torch.constant.int 1
    %35778 = torch.aten.add.Tensor %35777, %35765, %int1_33284 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33285 = torch.constant.int 1
    %35779 = torch.aten.add.Tensor %35778, %35768, %int1_33285 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33286 = torch.constant.int 1
    %35780 = torch.aten.add.Tensor %35779, %35771, %int1_33286 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33287 = torch.constant.int 1
    %35781 = torch.aten.add.Tensor %35780, %35774, %int1_33287 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35782 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33288 = arith.constant 1 : index
    %dim_33289 = tensor.dim %35782, %c1_33288 : tensor<4x?x4096xf16>
    %35783 = flow.tensor.transfer %35782 : tensor<4x?x4096xf16>{%dim_33289} to #hal.device.promise<@__device_4>
    %35784 = torch_c.from_builtin_tensor %35783 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35785 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33290 = arith.constant 1 : index
    %dim_33291 = tensor.dim %35785, %c1_33290 : tensor<4x?x4096xf16>
    %35786 = flow.tensor.transfer %35785 : tensor<4x?x4096xf16>{%dim_33291} to #hal.device.promise<@__device_4>
    %35787 = torch_c.from_builtin_tensor %35786 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35788 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33292 = arith.constant 1 : index
    %dim_33293 = tensor.dim %35788, %c1_33292 : tensor<4x?x4096xf16>
    %35789 = flow.tensor.transfer %35788 : tensor<4x?x4096xf16>{%dim_33293} to #hal.device.promise<@__device_4>
    %35790 = torch_c.from_builtin_tensor %35789 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35791 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33294 = arith.constant 1 : index
    %dim_33295 = tensor.dim %35791, %c1_33294 : tensor<4x?x4096xf16>
    %35792 = flow.tensor.transfer %35791 : tensor<4x?x4096xf16>{%dim_33295} to #hal.device.promise<@__device_4>
    %35793 = torch_c.from_builtin_tensor %35792 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35794 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33296 = arith.constant 1 : index
    %dim_33297 = tensor.dim %35794, %c1_33296 : tensor<4x?x4096xf16>
    %35795 = flow.tensor.transfer %35794 : tensor<4x?x4096xf16>{%dim_33297} to #hal.device.promise<@__device_4>
    %35796 = torch_c.from_builtin_tensor %35795 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35797 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33298 = arith.constant 1 : index
    %dim_33299 = tensor.dim %35797, %c1_33298 : tensor<4x?x4096xf16>
    %35798 = flow.tensor.transfer %35797 : tensor<4x?x4096xf16>{%dim_33299} to #hal.device.promise<@__device_4>
    %35799 = torch_c.from_builtin_tensor %35798 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35800 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33300 = arith.constant 1 : index
    %dim_33301 = tensor.dim %35800, %c1_33300 : tensor<4x?x4096xf16>
    %35801 = flow.tensor.transfer %35800 : tensor<4x?x4096xf16>{%dim_33301} to #hal.device.promise<@__device_4>
    %35802 = torch_c.from_builtin_tensor %35801 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33302 = torch.constant.int 1
    %35803 = torch.aten.add.Tensor %35784, %35787, %int1_33302 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33303 = torch.constant.int 1
    %35804 = torch.aten.add.Tensor %35803, %35790, %int1_33303 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33304 = torch.constant.int 1
    %35805 = torch.aten.add.Tensor %35804, %35793, %int1_33304 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33305 = torch.constant.int 1
    %35806 = torch.aten.add.Tensor %35805, %35648, %int1_33305 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33306 = torch.constant.int 1
    %35807 = torch.aten.add.Tensor %35806, %35796, %int1_33306 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33307 = torch.constant.int 1
    %35808 = torch.aten.add.Tensor %35807, %35799, %int1_33307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33308 = torch.constant.int 1
    %35809 = torch.aten.add.Tensor %35808, %35802, %int1_33308 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35810 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33309 = arith.constant 1 : index
    %dim_33310 = tensor.dim %35810, %c1_33309 : tensor<4x?x4096xf16>
    %35811 = flow.tensor.transfer %35810 : tensor<4x?x4096xf16>{%dim_33310} to #hal.device.promise<@__device_5>
    %35812 = torch_c.from_builtin_tensor %35811 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35813 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33311 = arith.constant 1 : index
    %dim_33312 = tensor.dim %35813, %c1_33311 : tensor<4x?x4096xf16>
    %35814 = flow.tensor.transfer %35813 : tensor<4x?x4096xf16>{%dim_33312} to #hal.device.promise<@__device_5>
    %35815 = torch_c.from_builtin_tensor %35814 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35816 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33313 = arith.constant 1 : index
    %dim_33314 = tensor.dim %35816, %c1_33313 : tensor<4x?x4096xf16>
    %35817 = flow.tensor.transfer %35816 : tensor<4x?x4096xf16>{%dim_33314} to #hal.device.promise<@__device_5>
    %35818 = torch_c.from_builtin_tensor %35817 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35819 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33315 = arith.constant 1 : index
    %dim_33316 = tensor.dim %35819, %c1_33315 : tensor<4x?x4096xf16>
    %35820 = flow.tensor.transfer %35819 : tensor<4x?x4096xf16>{%dim_33316} to #hal.device.promise<@__device_5>
    %35821 = torch_c.from_builtin_tensor %35820 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35822 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33317 = arith.constant 1 : index
    %dim_33318 = tensor.dim %35822, %c1_33317 : tensor<4x?x4096xf16>
    %35823 = flow.tensor.transfer %35822 : tensor<4x?x4096xf16>{%dim_33318} to #hal.device.promise<@__device_5>
    %35824 = torch_c.from_builtin_tensor %35823 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35825 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33319 = arith.constant 1 : index
    %dim_33320 = tensor.dim %35825, %c1_33319 : tensor<4x?x4096xf16>
    %35826 = flow.tensor.transfer %35825 : tensor<4x?x4096xf16>{%dim_33320} to #hal.device.promise<@__device_5>
    %35827 = torch_c.from_builtin_tensor %35826 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35828 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33321 = arith.constant 1 : index
    %dim_33322 = tensor.dim %35828, %c1_33321 : tensor<4x?x4096xf16>
    %35829 = flow.tensor.transfer %35828 : tensor<4x?x4096xf16>{%dim_33322} to #hal.device.promise<@__device_5>
    %35830 = torch_c.from_builtin_tensor %35829 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33323 = torch.constant.int 1
    %35831 = torch.aten.add.Tensor %35812, %35815, %int1_33323 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33324 = torch.constant.int 1
    %35832 = torch.aten.add.Tensor %35831, %35818, %int1_33324 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33325 = torch.constant.int 1
    %35833 = torch.aten.add.Tensor %35832, %35821, %int1_33325 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33326 = torch.constant.int 1
    %35834 = torch.aten.add.Tensor %35833, %35824, %int1_33326 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33327 = torch.constant.int 1
    %35835 = torch.aten.add.Tensor %35834, %35655, %int1_33327 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33328 = torch.constant.int 1
    %35836 = torch.aten.add.Tensor %35835, %35827, %int1_33328 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33329 = torch.constant.int 1
    %35837 = torch.aten.add.Tensor %35836, %35830, %int1_33329 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35838 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33330 = arith.constant 1 : index
    %dim_33331 = tensor.dim %35838, %c1_33330 : tensor<4x?x4096xf16>
    %35839 = flow.tensor.transfer %35838 : tensor<4x?x4096xf16>{%dim_33331} to #hal.device.promise<@__device_6>
    %35840 = torch_c.from_builtin_tensor %35839 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35841 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33332 = arith.constant 1 : index
    %dim_33333 = tensor.dim %35841, %c1_33332 : tensor<4x?x4096xf16>
    %35842 = flow.tensor.transfer %35841 : tensor<4x?x4096xf16>{%dim_33333} to #hal.device.promise<@__device_6>
    %35843 = torch_c.from_builtin_tensor %35842 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35844 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33334 = arith.constant 1 : index
    %dim_33335 = tensor.dim %35844, %c1_33334 : tensor<4x?x4096xf16>
    %35845 = flow.tensor.transfer %35844 : tensor<4x?x4096xf16>{%dim_33335} to #hal.device.promise<@__device_6>
    %35846 = torch_c.from_builtin_tensor %35845 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35847 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33336 = arith.constant 1 : index
    %dim_33337 = tensor.dim %35847, %c1_33336 : tensor<4x?x4096xf16>
    %35848 = flow.tensor.transfer %35847 : tensor<4x?x4096xf16>{%dim_33337} to #hal.device.promise<@__device_6>
    %35849 = torch_c.from_builtin_tensor %35848 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35850 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33338 = arith.constant 1 : index
    %dim_33339 = tensor.dim %35850, %c1_33338 : tensor<4x?x4096xf16>
    %35851 = flow.tensor.transfer %35850 : tensor<4x?x4096xf16>{%dim_33339} to #hal.device.promise<@__device_6>
    %35852 = torch_c.from_builtin_tensor %35851 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35853 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33340 = arith.constant 1 : index
    %dim_33341 = tensor.dim %35853, %c1_33340 : tensor<4x?x4096xf16>
    %35854 = flow.tensor.transfer %35853 : tensor<4x?x4096xf16>{%dim_33341} to #hal.device.promise<@__device_6>
    %35855 = torch_c.from_builtin_tensor %35854 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35856 = torch_c.to_builtin_tensor %35669 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33342 = arith.constant 1 : index
    %dim_33343 = tensor.dim %35856, %c1_33342 : tensor<4x?x4096xf16>
    %35857 = flow.tensor.transfer %35856 : tensor<4x?x4096xf16>{%dim_33343} to #hal.device.promise<@__device_6>
    %35858 = torch_c.from_builtin_tensor %35857 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33344 = torch.constant.int 1
    %35859 = torch.aten.add.Tensor %35840, %35843, %int1_33344 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33345 = torch.constant.int 1
    %35860 = torch.aten.add.Tensor %35859, %35846, %int1_33345 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33346 = torch.constant.int 1
    %35861 = torch.aten.add.Tensor %35860, %35849, %int1_33346 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33347 = torch.constant.int 1
    %35862 = torch.aten.add.Tensor %35861, %35852, %int1_33347 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33348 = torch.constant.int 1
    %35863 = torch.aten.add.Tensor %35862, %35855, %int1_33348 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33349 = torch.constant.int 1
    %35864 = torch.aten.add.Tensor %35863, %35662, %int1_33349 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33350 = torch.constant.int 1
    %35865 = torch.aten.add.Tensor %35864, %35858, %int1_33350 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35866 = torch_c.to_builtin_tensor %35620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33351 = arith.constant 1 : index
    %dim_33352 = tensor.dim %35866, %c1_33351 : tensor<4x?x4096xf16>
    %35867 = flow.tensor.transfer %35866 : tensor<4x?x4096xf16>{%dim_33352} to #hal.device.promise<@__device_7>
    %35868 = torch_c.from_builtin_tensor %35867 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35869 = torch_c.to_builtin_tensor %35627 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33353 = arith.constant 1 : index
    %dim_33354 = tensor.dim %35869, %c1_33353 : tensor<4x?x4096xf16>
    %35870 = flow.tensor.transfer %35869 : tensor<4x?x4096xf16>{%dim_33354} to #hal.device.promise<@__device_7>
    %35871 = torch_c.from_builtin_tensor %35870 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35872 = torch_c.to_builtin_tensor %35634 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33355 = arith.constant 1 : index
    %dim_33356 = tensor.dim %35872, %c1_33355 : tensor<4x?x4096xf16>
    %35873 = flow.tensor.transfer %35872 : tensor<4x?x4096xf16>{%dim_33356} to #hal.device.promise<@__device_7>
    %35874 = torch_c.from_builtin_tensor %35873 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35875 = torch_c.to_builtin_tensor %35641 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33357 = arith.constant 1 : index
    %dim_33358 = tensor.dim %35875, %c1_33357 : tensor<4x?x4096xf16>
    %35876 = flow.tensor.transfer %35875 : tensor<4x?x4096xf16>{%dim_33358} to #hal.device.promise<@__device_7>
    %35877 = torch_c.from_builtin_tensor %35876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35878 = torch_c.to_builtin_tensor %35648 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33359 = arith.constant 1 : index
    %dim_33360 = tensor.dim %35878, %c1_33359 : tensor<4x?x4096xf16>
    %35879 = flow.tensor.transfer %35878 : tensor<4x?x4096xf16>{%dim_33360} to #hal.device.promise<@__device_7>
    %35880 = torch_c.from_builtin_tensor %35879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35881 = torch_c.to_builtin_tensor %35655 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33361 = arith.constant 1 : index
    %dim_33362 = tensor.dim %35881, %c1_33361 : tensor<4x?x4096xf16>
    %35882 = flow.tensor.transfer %35881 : tensor<4x?x4096xf16>{%dim_33362} to #hal.device.promise<@__device_7>
    %35883 = torch_c.from_builtin_tensor %35882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %35884 = torch_c.to_builtin_tensor %35662 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_33363 = arith.constant 1 : index
    %dim_33364 = tensor.dim %35884, %c1_33363 : tensor<4x?x4096xf16>
    %35885 = flow.tensor.transfer %35884 : tensor<4x?x4096xf16>{%dim_33364} to #hal.device.promise<@__device_7>
    %35886 = torch_c.from_builtin_tensor %35885 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33365 = torch.constant.int 1
    %35887 = torch.aten.add.Tensor %35868, %35871, %int1_33365 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33366 = torch.constant.int 1
    %35888 = torch.aten.add.Tensor %35887, %35874, %int1_33366 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33367 = torch.constant.int 1
    %35889 = torch.aten.add.Tensor %35888, %35877, %int1_33367 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33368 = torch.constant.int 1
    %35890 = torch.aten.add.Tensor %35889, %35880, %int1_33368 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33369 = torch.constant.int 1
    %35891 = torch.aten.add.Tensor %35890, %35883, %int1_33369 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33370 = torch.constant.int 1
    %35892 = torch.aten.add.Tensor %35891, %35886, %int1_33370 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33371 = torch.constant.int 1
    %35893 = torch.aten.add.Tensor %35892, %35669, %int1_33371 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33372 = torch.constant.int 1
    %35894 = torch.aten.add.Tensor %35374, %35697, %int1_33372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33373 = torch.constant.int 1
    %35895 = torch.aten.add.Tensor %35375, %35725, %int1_33373 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33374 = torch.constant.int 1
    %35896 = torch.aten.add.Tensor %35376, %35753, %int1_33374 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33375 = torch.constant.int 1
    %35897 = torch.aten.add.Tensor %35377, %35781, %int1_33375 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33376 = torch.constant.int 1
    %35898 = torch.aten.add.Tensor %35378, %35809, %int1_33376 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33377 = torch.constant.int 1
    %35899 = torch.aten.add.Tensor %35379, %35837, %int1_33377 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33378 = torch.constant.int 1
    %35900 = torch.aten.add.Tensor %35380, %35865, %int1_33378 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33379 = torch.constant.int 1
    %35901 = torch.aten.add.Tensor %35381, %35893, %int1_33379 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_33380 = torch.constant.int 6
    %35902 = torch.prims.convert_element_type %35894, %int6_33380 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33381 = torch.constant.int 6
    %35903 = torch.prims.convert_element_type %35895, %int6_33381 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33382 = torch.constant.int 6
    %35904 = torch.prims.convert_element_type %35896, %int6_33382 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33383 = torch.constant.int 6
    %35905 = torch.prims.convert_element_type %35897, %int6_33383 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33384 = torch.constant.int 6
    %35906 = torch.prims.convert_element_type %35898, %int6_33384 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33385 = torch.constant.int 6
    %35907 = torch.prims.convert_element_type %35899, %int6_33385 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33386 = torch.constant.int 6
    %35908 = torch.prims.convert_element_type %35900, %int6_33386 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_33387 = torch.constant.int 6
    %35909 = torch.prims.convert_element_type %35901, %int6_33387 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33388 = torch.constant.int 2
    %35910 = torch.aten.pow.Tensor_Scalar %35902, %int2_33388 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33389 = torch.constant.int 2
    %35911 = torch.aten.pow.Tensor_Scalar %35903, %int2_33389 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33390 = torch.constant.int 2
    %35912 = torch.aten.pow.Tensor_Scalar %35904, %int2_33390 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33391 = torch.constant.int 2
    %35913 = torch.aten.pow.Tensor_Scalar %35905, %int2_33391 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33392 = torch.constant.int 2
    %35914 = torch.aten.pow.Tensor_Scalar %35906, %int2_33392 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33393 = torch.constant.int 2
    %35915 = torch.aten.pow.Tensor_Scalar %35907, %int2_33393 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33394 = torch.constant.int 2
    %35916 = torch.aten.pow.Tensor_Scalar %35908, %int2_33394 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_33395 = torch.constant.int 2
    %35917 = torch.aten.pow.Tensor_Scalar %35909, %int2_33395 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_33396 = torch.constant.int -1
    %35918 = torch.prim.ListConstruct %int-1_33396 : (!torch.int) -> !torch.list<int>
    %true_33397 = torch.constant.bool true
    %none_33398 = torch.constant.none
    %35919 = torch.aten.mean.dim %35910, %35918, %true_33397, %none_33398 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33399 = torch.constant.int -1
    %35920 = torch.prim.ListConstruct %int-1_33399 : (!torch.int) -> !torch.list<int>
    %true_33400 = torch.constant.bool true
    %none_33401 = torch.constant.none
    %35921 = torch.aten.mean.dim %35911, %35920, %true_33400, %none_33401 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33402 = torch.constant.int -1
    %35922 = torch.prim.ListConstruct %int-1_33402 : (!torch.int) -> !torch.list<int>
    %true_33403 = torch.constant.bool true
    %none_33404 = torch.constant.none
    %35923 = torch.aten.mean.dim %35912, %35922, %true_33403, %none_33404 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33405 = torch.constant.int -1
    %35924 = torch.prim.ListConstruct %int-1_33405 : (!torch.int) -> !torch.list<int>
    %true_33406 = torch.constant.bool true
    %none_33407 = torch.constant.none
    %35925 = torch.aten.mean.dim %35913, %35924, %true_33406, %none_33407 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33408 = torch.constant.int -1
    %35926 = torch.prim.ListConstruct %int-1_33408 : (!torch.int) -> !torch.list<int>
    %true_33409 = torch.constant.bool true
    %none_33410 = torch.constant.none
    %35927 = torch.aten.mean.dim %35914, %35926, %true_33409, %none_33410 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33411 = torch.constant.int -1
    %35928 = torch.prim.ListConstruct %int-1_33411 : (!torch.int) -> !torch.list<int>
    %true_33412 = torch.constant.bool true
    %none_33413 = torch.constant.none
    %35929 = torch.aten.mean.dim %35915, %35928, %true_33412, %none_33413 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33414 = torch.constant.int -1
    %35930 = torch.prim.ListConstruct %int-1_33414 : (!torch.int) -> !torch.list<int>
    %true_33415 = torch.constant.bool true
    %none_33416 = torch.constant.none
    %35931 = torch.aten.mean.dim %35916, %35930, %true_33415, %none_33416 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_33417 = torch.constant.int -1
    %35932 = torch.prim.ListConstruct %int-1_33417 : (!torch.int) -> !torch.list<int>
    %true_33418 = torch.constant.bool true
    %none_33419 = torch.constant.none
    %35933 = torch.aten.mean.dim %35917, %35932, %true_33418, %none_33419 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33420 = torch.constant.float 9.9999997473787516E-6
    %int1_33421 = torch.constant.int 1
    %35934 = torch.aten.add.Scalar %35919, %float9.999990e-06_33420, %int1_33421 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33422 = torch.constant.float 9.9999997473787516E-6
    %int1_33423 = torch.constant.int 1
    %35935 = torch.aten.add.Scalar %35921, %float9.999990e-06_33422, %int1_33423 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33424 = torch.constant.float 9.9999997473787516E-6
    %int1_33425 = torch.constant.int 1
    %35936 = torch.aten.add.Scalar %35923, %float9.999990e-06_33424, %int1_33425 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33426 = torch.constant.float 9.9999997473787516E-6
    %int1_33427 = torch.constant.int 1
    %35937 = torch.aten.add.Scalar %35925, %float9.999990e-06_33426, %int1_33427 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33428 = torch.constant.float 9.9999997473787516E-6
    %int1_33429 = torch.constant.int 1
    %35938 = torch.aten.add.Scalar %35927, %float9.999990e-06_33428, %int1_33429 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33430 = torch.constant.float 9.9999997473787516E-6
    %int1_33431 = torch.constant.int 1
    %35939 = torch.aten.add.Scalar %35929, %float9.999990e-06_33430, %int1_33431 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33432 = torch.constant.float 9.9999997473787516E-6
    %int1_33433 = torch.constant.int 1
    %35940 = torch.aten.add.Scalar %35931, %float9.999990e-06_33432, %int1_33433 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_33434 = torch.constant.float 9.9999997473787516E-6
    %int1_33435 = torch.constant.int 1
    %35941 = torch.aten.add.Scalar %35933, %float9.999990e-06_33434, %int1_33435 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35942 = torch.aten.rsqrt %35934 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35943 = torch.aten.rsqrt %35935 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35944 = torch.aten.rsqrt %35936 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35945 = torch.aten.rsqrt %35937 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35946 = torch.aten.rsqrt %35938 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35947 = torch.aten.rsqrt %35939 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35948 = torch.aten.rsqrt %35940 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35949 = torch.aten.rsqrt %35941 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %35949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %35950 = torch.aten.mul.Tensor %35902, %35942 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35951 = torch.aten.mul.Tensor %35903, %35943 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35952 = torch.aten.mul.Tensor %35904, %35944 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35953 = torch.aten.mul.Tensor %35905, %35945 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35954 = torch.aten.mul.Tensor %35906, %35946 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35955 = torch.aten.mul.Tensor %35907, %35947 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35956 = torch.aten.mul.Tensor %35908, %35948 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35957 = torch.aten.mul.Tensor %35909, %35949 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35958 = torch.aten.mul.Tensor %1304, %35950 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35959 = torch.aten.mul.Tensor %1305, %35951 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35960 = torch.aten.mul.Tensor %1306, %35952 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35961 = torch.aten.mul.Tensor %1307, %35953 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35962 = torch.aten.mul.Tensor %1308, %35954 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35963 = torch.aten.mul.Tensor %1309, %35955 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35964 = torch.aten.mul.Tensor %1310, %35956 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %35965 = torch.aten.mul.Tensor %1311, %35957 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %35965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_33436 = torch.constant.int 5
    %35966 = torch.prims.convert_element_type %35958, %int5_33436 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33437 = torch.constant.int 5
    %35967 = torch.prims.convert_element_type %35959, %int5_33437 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33438 = torch.constant.int 5
    %35968 = torch.prims.convert_element_type %35960, %int5_33438 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33439 = torch.constant.int 5
    %35969 = torch.prims.convert_element_type %35961, %int5_33439 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33440 = torch.constant.int 5
    %35970 = torch.prims.convert_element_type %35962, %int5_33440 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33441 = torch.constant.int 5
    %35971 = torch.prims.convert_element_type %35963, %int5_33441 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33442 = torch.constant.int 5
    %35972 = torch.prims.convert_element_type %35964, %int5_33442 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_33443 = torch.constant.int 5
    %35973 = torch.prims.convert_element_type %35965, %int5_33443 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %35973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_33444 = torch.constant.int 1
    %int0_33445 = torch.constant.int 0
    %35974 = torch.prim.ListConstruct %int1_33444, %int0_33445 : (!torch.int, !torch.int) -> !torch.list<int>
    %35975 = torch.aten.permute %1312, %35974 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33446 = torch.constant.int 1
    %int0_33447 = torch.constant.int 0
    %35976 = torch.prim.ListConstruct %int1_33446, %int0_33447 : (!torch.int, !torch.int) -> !torch.list<int>
    %35977 = torch.aten.permute %1313, %35976 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33448 = torch.constant.int 1
    %int0_33449 = torch.constant.int 0
    %35978 = torch.prim.ListConstruct %int1_33448, %int0_33449 : (!torch.int, !torch.int) -> !torch.list<int>
    %35979 = torch.aten.permute %1314, %35978 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33450 = torch.constant.int 1
    %int0_33451 = torch.constant.int 0
    %35980 = torch.prim.ListConstruct %int1_33450, %int0_33451 : (!torch.int, !torch.int) -> !torch.list<int>
    %35981 = torch.aten.permute %1315, %35980 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33452 = torch.constant.int 1
    %int0_33453 = torch.constant.int 0
    %35982 = torch.prim.ListConstruct %int1_33452, %int0_33453 : (!torch.int, !torch.int) -> !torch.list<int>
    %35983 = torch.aten.permute %1316, %35982 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33454 = torch.constant.int 1
    %int0_33455 = torch.constant.int 0
    %35984 = torch.prim.ListConstruct %int1_33454, %int0_33455 : (!torch.int, !torch.int) -> !torch.list<int>
    %35985 = torch.aten.permute %1317, %35984 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33456 = torch.constant.int 1
    %int0_33457 = torch.constant.int 0
    %35986 = torch.prim.ListConstruct %int1_33456, %int0_33457 : (!torch.int, !torch.int) -> !torch.list<int>
    %35987 = torch.aten.permute %1318, %35986 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_33458 = torch.constant.int 1
    %int0_33459 = torch.constant.int 0
    %35988 = torch.prim.ListConstruct %int1_33458, %int0_33459 : (!torch.int, !torch.int) -> !torch.list<int>
    %35989 = torch.aten.permute %1319, %35988 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_33460 = torch.constant.int 4
    %35990 = torch.aten.mul.int %int4_33460, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33461 = torch.constant.int 4096
    %35991 = torch.prim.ListConstruct %35990, %int4096_33461 : (!torch.int, !torch.int) -> !torch.list<int>
    %35992 = torch.aten.view %35966, %35991 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35992, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35993 = torch.aten.mm %35992, %35975 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35993, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33462 = torch.constant.int 4
    %int512_33463 = torch.constant.int 512
    %35994 = torch.prim.ListConstruct %int4_33462, %2482, %int512_33463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %35995 = torch.aten.view %35993, %35994 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %35995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33464 = torch.constant.int 4
    %35996 = torch.aten.mul.int %int4_33464, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33465 = torch.constant.int 4096
    %35997 = torch.prim.ListConstruct %35996, %int4096_33465 : (!torch.int, !torch.int) -> !torch.list<int>
    %35998 = torch.aten.view %35967, %35997 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %35998, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %35999 = torch.aten.mm %35998, %35977 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %35999, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33466 = torch.constant.int 4
    %int512_33467 = torch.constant.int 512
    %36000 = torch.prim.ListConstruct %int4_33466, %2482, %int512_33467 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36001 = torch.aten.view %35999, %36000 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33468 = torch.constant.int 4
    %36002 = torch.aten.mul.int %int4_33468, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33469 = torch.constant.int 4096
    %36003 = torch.prim.ListConstruct %36002, %int4096_33469 : (!torch.int, !torch.int) -> !torch.list<int>
    %36004 = torch.aten.view %35968, %36003 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36004, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36005 = torch.aten.mm %36004, %35979 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36005, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33470 = torch.constant.int 4
    %int512_33471 = torch.constant.int 512
    %36006 = torch.prim.ListConstruct %int4_33470, %2482, %int512_33471 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36007 = torch.aten.view %36005, %36006 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33472 = torch.constant.int 4
    %36008 = torch.aten.mul.int %int4_33472, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33473 = torch.constant.int 4096
    %36009 = torch.prim.ListConstruct %36008, %int4096_33473 : (!torch.int, !torch.int) -> !torch.list<int>
    %36010 = torch.aten.view %35969, %36009 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36010, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36011 = torch.aten.mm %36010, %35981 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36011, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33474 = torch.constant.int 4
    %int512_33475 = torch.constant.int 512
    %36012 = torch.prim.ListConstruct %int4_33474, %2482, %int512_33475 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36013 = torch.aten.view %36011, %36012 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33476 = torch.constant.int 4
    %36014 = torch.aten.mul.int %int4_33476, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33477 = torch.constant.int 4096
    %36015 = torch.prim.ListConstruct %36014, %int4096_33477 : (!torch.int, !torch.int) -> !torch.list<int>
    %36016 = torch.aten.view %35970, %36015 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36016, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36017 = torch.aten.mm %36016, %35983 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36017, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33478 = torch.constant.int 4
    %int512_33479 = torch.constant.int 512
    %36018 = torch.prim.ListConstruct %int4_33478, %2482, %int512_33479 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36019 = torch.aten.view %36017, %36018 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33480 = torch.constant.int 4
    %36020 = torch.aten.mul.int %int4_33480, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33481 = torch.constant.int 4096
    %36021 = torch.prim.ListConstruct %36020, %int4096_33481 : (!torch.int, !torch.int) -> !torch.list<int>
    %36022 = torch.aten.view %35971, %36021 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36022, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36023 = torch.aten.mm %36022, %35985 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36023, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33482 = torch.constant.int 4
    %int512_33483 = torch.constant.int 512
    %36024 = torch.prim.ListConstruct %int4_33482, %2482, %int512_33483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36025 = torch.aten.view %36023, %36024 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33484 = torch.constant.int 4
    %36026 = torch.aten.mul.int %int4_33484, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33485 = torch.constant.int 4096
    %36027 = torch.prim.ListConstruct %36026, %int4096_33485 : (!torch.int, !torch.int) -> !torch.list<int>
    %36028 = torch.aten.view %35972, %36027 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36028, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36029 = torch.aten.mm %36028, %35987 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36029, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33486 = torch.constant.int 4
    %int512_33487 = torch.constant.int 512
    %36030 = torch.prim.ListConstruct %int4_33486, %2482, %int512_33487 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36031 = torch.aten.view %36029, %36030 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_33488 = torch.constant.int 4
    %36032 = torch.aten.mul.int %int4_33488, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33489 = torch.constant.int 4096
    %36033 = torch.prim.ListConstruct %36032, %int4096_33489 : (!torch.int, !torch.int) -> !torch.list<int>
    %36034 = torch.aten.view %35973, %36033 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36034, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36035 = torch.aten.mm %36034, %35989 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36035, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_33490 = torch.constant.int 4
    %int512_33491 = torch.constant.int 512
    %36036 = torch.prim.ListConstruct %int4_33490, %2482, %int512_33491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36037 = torch.aten.view %36035, %36036 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_33492 = torch.constant.int 1
    %int0_33493 = torch.constant.int 0
    %36038 = torch.prim.ListConstruct %int1_33492, %int0_33493 : (!torch.int, !torch.int) -> !torch.list<int>
    %36039 = torch.aten.permute %1320, %36038 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33494 = torch.constant.int 1
    %int0_33495 = torch.constant.int 0
    %36040 = torch.prim.ListConstruct %int1_33494, %int0_33495 : (!torch.int, !torch.int) -> !torch.list<int>
    %36041 = torch.aten.permute %1321, %36040 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33496 = torch.constant.int 1
    %int0_33497 = torch.constant.int 0
    %36042 = torch.prim.ListConstruct %int1_33496, %int0_33497 : (!torch.int, !torch.int) -> !torch.list<int>
    %36043 = torch.aten.permute %1322, %36042 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33498 = torch.constant.int 1
    %int0_33499 = torch.constant.int 0
    %36044 = torch.prim.ListConstruct %int1_33498, %int0_33499 : (!torch.int, !torch.int) -> !torch.list<int>
    %36045 = torch.aten.permute %1323, %36044 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33500 = torch.constant.int 1
    %int0_33501 = torch.constant.int 0
    %36046 = torch.prim.ListConstruct %int1_33500, %int0_33501 : (!torch.int, !torch.int) -> !torch.list<int>
    %36047 = torch.aten.permute %1324, %36046 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33502 = torch.constant.int 1
    %int0_33503 = torch.constant.int 0
    %36048 = torch.prim.ListConstruct %int1_33502, %int0_33503 : (!torch.int, !torch.int) -> !torch.list<int>
    %36049 = torch.aten.permute %1325, %36048 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33504 = torch.constant.int 1
    %int0_33505 = torch.constant.int 0
    %36050 = torch.prim.ListConstruct %int1_33504, %int0_33505 : (!torch.int, !torch.int) -> !torch.list<int>
    %36051 = torch.aten.permute %1326, %36050 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33506 = torch.constant.int 1
    %int0_33507 = torch.constant.int 0
    %36052 = torch.prim.ListConstruct %int1_33506, %int0_33507 : (!torch.int, !torch.int) -> !torch.list<int>
    %36053 = torch.aten.permute %1327, %36052 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_33508 = torch.constant.int 4
    %36054 = torch.aten.mul.int %int4_33508, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33509 = torch.constant.int 4096
    %36055 = torch.prim.ListConstruct %36054, %int4096_33509 : (!torch.int, !torch.int) -> !torch.list<int>
    %36056 = torch.aten.view %35966, %36055 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36056, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36057 = torch.aten.mm %36056, %36039 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36057, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33510 = torch.constant.int 4
    %int128_33511 = torch.constant.int 128
    %36058 = torch.prim.ListConstruct %int4_33510, %2482, %int128_33511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36059 = torch.aten.view %36057, %36058 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33512 = torch.constant.int 4
    %36060 = torch.aten.mul.int %int4_33512, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33513 = torch.constant.int 4096
    %36061 = torch.prim.ListConstruct %36060, %int4096_33513 : (!torch.int, !torch.int) -> !torch.list<int>
    %36062 = torch.aten.view %35967, %36061 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36062, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36063 = torch.aten.mm %36062, %36041 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36063, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33514 = torch.constant.int 4
    %int128_33515 = torch.constant.int 128
    %36064 = torch.prim.ListConstruct %int4_33514, %2482, %int128_33515 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36065 = torch.aten.view %36063, %36064 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33516 = torch.constant.int 4
    %36066 = torch.aten.mul.int %int4_33516, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33517 = torch.constant.int 4096
    %36067 = torch.prim.ListConstruct %36066, %int4096_33517 : (!torch.int, !torch.int) -> !torch.list<int>
    %36068 = torch.aten.view %35968, %36067 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36068, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36069 = torch.aten.mm %36068, %36043 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36069, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33518 = torch.constant.int 4
    %int128_33519 = torch.constant.int 128
    %36070 = torch.prim.ListConstruct %int4_33518, %2482, %int128_33519 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36071 = torch.aten.view %36069, %36070 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33520 = torch.constant.int 4
    %36072 = torch.aten.mul.int %int4_33520, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33521 = torch.constant.int 4096
    %36073 = torch.prim.ListConstruct %36072, %int4096_33521 : (!torch.int, !torch.int) -> !torch.list<int>
    %36074 = torch.aten.view %35969, %36073 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36074, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36075 = torch.aten.mm %36074, %36045 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36075, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33522 = torch.constant.int 4
    %int128_33523 = torch.constant.int 128
    %36076 = torch.prim.ListConstruct %int4_33522, %2482, %int128_33523 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36077 = torch.aten.view %36075, %36076 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33524 = torch.constant.int 4
    %36078 = torch.aten.mul.int %int4_33524, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33525 = torch.constant.int 4096
    %36079 = torch.prim.ListConstruct %36078, %int4096_33525 : (!torch.int, !torch.int) -> !torch.list<int>
    %36080 = torch.aten.view %35970, %36079 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36080, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36081 = torch.aten.mm %36080, %36047 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36081, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33526 = torch.constant.int 4
    %int128_33527 = torch.constant.int 128
    %36082 = torch.prim.ListConstruct %int4_33526, %2482, %int128_33527 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36083 = torch.aten.view %36081, %36082 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33528 = torch.constant.int 4
    %36084 = torch.aten.mul.int %int4_33528, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33529 = torch.constant.int 4096
    %36085 = torch.prim.ListConstruct %36084, %int4096_33529 : (!torch.int, !torch.int) -> !torch.list<int>
    %36086 = torch.aten.view %35971, %36085 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36086, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36087 = torch.aten.mm %36086, %36049 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36087, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33530 = torch.constant.int 4
    %int128_33531 = torch.constant.int 128
    %36088 = torch.prim.ListConstruct %int4_33530, %2482, %int128_33531 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36089 = torch.aten.view %36087, %36088 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33532 = torch.constant.int 4
    %36090 = torch.aten.mul.int %int4_33532, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33533 = torch.constant.int 4096
    %36091 = torch.prim.ListConstruct %36090, %int4096_33533 : (!torch.int, !torch.int) -> !torch.list<int>
    %36092 = torch.aten.view %35972, %36091 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36092, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36093 = torch.aten.mm %36092, %36051 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36093, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33534 = torch.constant.int 4
    %int128_33535 = torch.constant.int 128
    %36094 = torch.prim.ListConstruct %int4_33534, %2482, %int128_33535 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36095 = torch.aten.view %36093, %36094 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33536 = torch.constant.int 4
    %36096 = torch.aten.mul.int %int4_33536, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33537 = torch.constant.int 4096
    %36097 = torch.prim.ListConstruct %36096, %int4096_33537 : (!torch.int, !torch.int) -> !torch.list<int>
    %36098 = torch.aten.view %35973, %36097 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36098, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36099 = torch.aten.mm %36098, %36053 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36099, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33538 = torch.constant.int 4
    %int128_33539 = torch.constant.int 128
    %36100 = torch.prim.ListConstruct %int4_33538, %2482, %int128_33539 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36101 = torch.aten.view %36099, %36100 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_33540 = torch.constant.int 1
    %int0_33541 = torch.constant.int 0
    %36102 = torch.prim.ListConstruct %int1_33540, %int0_33541 : (!torch.int, !torch.int) -> !torch.list<int>
    %36103 = torch.aten.permute %1328, %36102 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33542 = torch.constant.int 1
    %int0_33543 = torch.constant.int 0
    %36104 = torch.prim.ListConstruct %int1_33542, %int0_33543 : (!torch.int, !torch.int) -> !torch.list<int>
    %36105 = torch.aten.permute %1329, %36104 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33544 = torch.constant.int 1
    %int0_33545 = torch.constant.int 0
    %36106 = torch.prim.ListConstruct %int1_33544, %int0_33545 : (!torch.int, !torch.int) -> !torch.list<int>
    %36107 = torch.aten.permute %1330, %36106 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33546 = torch.constant.int 1
    %int0_33547 = torch.constant.int 0
    %36108 = torch.prim.ListConstruct %int1_33546, %int0_33547 : (!torch.int, !torch.int) -> !torch.list<int>
    %36109 = torch.aten.permute %1331, %36108 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33548 = torch.constant.int 1
    %int0_33549 = torch.constant.int 0
    %36110 = torch.prim.ListConstruct %int1_33548, %int0_33549 : (!torch.int, !torch.int) -> !torch.list<int>
    %36111 = torch.aten.permute %1332, %36110 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33550 = torch.constant.int 1
    %int0_33551 = torch.constant.int 0
    %36112 = torch.prim.ListConstruct %int1_33550, %int0_33551 : (!torch.int, !torch.int) -> !torch.list<int>
    %36113 = torch.aten.permute %1333, %36112 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33552 = torch.constant.int 1
    %int0_33553 = torch.constant.int 0
    %36114 = torch.prim.ListConstruct %int1_33552, %int0_33553 : (!torch.int, !torch.int) -> !torch.list<int>
    %36115 = torch.aten.permute %1334, %36114 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_33554 = torch.constant.int 1
    %int0_33555 = torch.constant.int 0
    %36116 = torch.prim.ListConstruct %int1_33554, %int0_33555 : (!torch.int, !torch.int) -> !torch.list<int>
    %36117 = torch.aten.permute %1335, %36116 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_33556 = torch.constant.int 4
    %36118 = torch.aten.mul.int %int4_33556, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33557 = torch.constant.int 4096
    %36119 = torch.prim.ListConstruct %36118, %int4096_33557 : (!torch.int, !torch.int) -> !torch.list<int>
    %36120 = torch.aten.view %35966, %36119 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36120, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36121 = torch.aten.mm %36120, %36103 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36121, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33558 = torch.constant.int 4
    %int128_33559 = torch.constant.int 128
    %36122 = torch.prim.ListConstruct %int4_33558, %2482, %int128_33559 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36123 = torch.aten.view %36121, %36122 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33560 = torch.constant.int 4
    %36124 = torch.aten.mul.int %int4_33560, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33561 = torch.constant.int 4096
    %36125 = torch.prim.ListConstruct %36124, %int4096_33561 : (!torch.int, !torch.int) -> !torch.list<int>
    %36126 = torch.aten.view %35967, %36125 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36126, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36127 = torch.aten.mm %36126, %36105 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36127, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33562 = torch.constant.int 4
    %int128_33563 = torch.constant.int 128
    %36128 = torch.prim.ListConstruct %int4_33562, %2482, %int128_33563 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36129 = torch.aten.view %36127, %36128 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33564 = torch.constant.int 4
    %36130 = torch.aten.mul.int %int4_33564, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33565 = torch.constant.int 4096
    %36131 = torch.prim.ListConstruct %36130, %int4096_33565 : (!torch.int, !torch.int) -> !torch.list<int>
    %36132 = torch.aten.view %35968, %36131 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36132, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36133 = torch.aten.mm %36132, %36107 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36133, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33566 = torch.constant.int 4
    %int128_33567 = torch.constant.int 128
    %36134 = torch.prim.ListConstruct %int4_33566, %2482, %int128_33567 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36135 = torch.aten.view %36133, %36134 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33568 = torch.constant.int 4
    %36136 = torch.aten.mul.int %int4_33568, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33569 = torch.constant.int 4096
    %36137 = torch.prim.ListConstruct %36136, %int4096_33569 : (!torch.int, !torch.int) -> !torch.list<int>
    %36138 = torch.aten.view %35969, %36137 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36138, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36139 = torch.aten.mm %36138, %36109 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36139, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33570 = torch.constant.int 4
    %int128_33571 = torch.constant.int 128
    %36140 = torch.prim.ListConstruct %int4_33570, %2482, %int128_33571 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36141 = torch.aten.view %36139, %36140 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33572 = torch.constant.int 4
    %36142 = torch.aten.mul.int %int4_33572, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33573 = torch.constant.int 4096
    %36143 = torch.prim.ListConstruct %36142, %int4096_33573 : (!torch.int, !torch.int) -> !torch.list<int>
    %36144 = torch.aten.view %35970, %36143 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36144, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36145 = torch.aten.mm %36144, %36111 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36145, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33574 = torch.constant.int 4
    %int128_33575 = torch.constant.int 128
    %36146 = torch.prim.ListConstruct %int4_33574, %2482, %int128_33575 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36147 = torch.aten.view %36145, %36146 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33576 = torch.constant.int 4
    %36148 = torch.aten.mul.int %int4_33576, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33577 = torch.constant.int 4096
    %36149 = torch.prim.ListConstruct %36148, %int4096_33577 : (!torch.int, !torch.int) -> !torch.list<int>
    %36150 = torch.aten.view %35971, %36149 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36150, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36151 = torch.aten.mm %36150, %36113 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36151, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33578 = torch.constant.int 4
    %int128_33579 = torch.constant.int 128
    %36152 = torch.prim.ListConstruct %int4_33578, %2482, %int128_33579 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36153 = torch.aten.view %36151, %36152 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33580 = torch.constant.int 4
    %36154 = torch.aten.mul.int %int4_33580, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33581 = torch.constant.int 4096
    %36155 = torch.prim.ListConstruct %36154, %int4096_33581 : (!torch.int, !torch.int) -> !torch.list<int>
    %36156 = torch.aten.view %35972, %36155 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36156, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36157 = torch.aten.mm %36156, %36115 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36157, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33582 = torch.constant.int 4
    %int128_33583 = torch.constant.int 128
    %36158 = torch.prim.ListConstruct %int4_33582, %2482, %int128_33583 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36159 = torch.aten.view %36157, %36158 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33584 = torch.constant.int 4
    %36160 = torch.aten.mul.int %int4_33584, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_33585 = torch.constant.int 4096
    %36161 = torch.prim.ListConstruct %36160, %int4096_33585 : (!torch.int, !torch.int) -> !torch.list<int>
    %36162 = torch.aten.view %35973, %36161 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36162, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %36163 = torch.aten.mm %36162, %36117 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %36163, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_33586 = torch.constant.int 4
    %int128_33587 = torch.constant.int 128
    %36164 = torch.prim.ListConstruct %int4_33586, %2482, %int128_33587 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36165 = torch.aten.view %36163, %36164 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %36165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_33588 = torch.constant.int 4
    %int4_33589 = torch.constant.int 4
    %int128_33590 = torch.constant.int 128
    %36166 = torch.prim.ListConstruct %int4_33588, %2482, %int4_33589, %int128_33590 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36167 = torch.aten.view %35995, %36166 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33591 = torch.constant.int 4
    %int4_33592 = torch.constant.int 4
    %int128_33593 = torch.constant.int 128
    %36168 = torch.prim.ListConstruct %int4_33591, %2482, %int4_33592, %int128_33593 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36169 = torch.aten.view %36001, %36168 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33594 = torch.constant.int 4
    %int4_33595 = torch.constant.int 4
    %int128_33596 = torch.constant.int 128
    %36170 = torch.prim.ListConstruct %int4_33594, %2482, %int4_33595, %int128_33596 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36171 = torch.aten.view %36007, %36170 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33597 = torch.constant.int 4
    %int4_33598 = torch.constant.int 4
    %int128_33599 = torch.constant.int 128
    %36172 = torch.prim.ListConstruct %int4_33597, %2482, %int4_33598, %int128_33599 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36173 = torch.aten.view %36013, %36172 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33600 = torch.constant.int 4
    %int4_33601 = torch.constant.int 4
    %int128_33602 = torch.constant.int 128
    %36174 = torch.prim.ListConstruct %int4_33600, %2482, %int4_33601, %int128_33602 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36175 = torch.aten.view %36019, %36174 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33603 = torch.constant.int 4
    %int4_33604 = torch.constant.int 4
    %int128_33605 = torch.constant.int 128
    %36176 = torch.prim.ListConstruct %int4_33603, %2482, %int4_33604, %int128_33605 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36177 = torch.aten.view %36025, %36176 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33606 = torch.constant.int 4
    %int4_33607 = torch.constant.int 4
    %int128_33608 = torch.constant.int 128
    %36178 = torch.prim.ListConstruct %int4_33606, %2482, %int4_33607, %int128_33608 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36179 = torch.aten.view %36031, %36178 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33609 = torch.constant.int 4
    %int4_33610 = torch.constant.int 4
    %int128_33611 = torch.constant.int 128
    %36180 = torch.prim.ListConstruct %int4_33609, %2482, %int4_33610, %int128_33611 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36181 = torch.aten.view %36037, %36180 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_33612 = torch.constant.int 4
    %int1_33613 = torch.constant.int 1
    %int128_33614 = torch.constant.int 128
    %36182 = torch.prim.ListConstruct %int4_33612, %2482, %int1_33613, %int128_33614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36183 = torch.aten.view %36059, %36182 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33615 = torch.constant.int 4
    %int1_33616 = torch.constant.int 1
    %int128_33617 = torch.constant.int 128
    %36184 = torch.prim.ListConstruct %int4_33615, %2482, %int1_33616, %int128_33617 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36185 = torch.aten.view %36065, %36184 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33618 = torch.constant.int 4
    %int1_33619 = torch.constant.int 1
    %int128_33620 = torch.constant.int 128
    %36186 = torch.prim.ListConstruct %int4_33618, %2482, %int1_33619, %int128_33620 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36187 = torch.aten.view %36071, %36186 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33621 = torch.constant.int 4
    %int1_33622 = torch.constant.int 1
    %int128_33623 = torch.constant.int 128
    %36188 = torch.prim.ListConstruct %int4_33621, %2482, %int1_33622, %int128_33623 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36189 = torch.aten.view %36077, %36188 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33624 = torch.constant.int 4
    %int1_33625 = torch.constant.int 1
    %int128_33626 = torch.constant.int 128
    %36190 = torch.prim.ListConstruct %int4_33624, %2482, %int1_33625, %int128_33626 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36191 = torch.aten.view %36083, %36190 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33627 = torch.constant.int 4
    %int1_33628 = torch.constant.int 1
    %int128_33629 = torch.constant.int 128
    %36192 = torch.prim.ListConstruct %int4_33627, %2482, %int1_33628, %int128_33629 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36193 = torch.aten.view %36089, %36192 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33630 = torch.constant.int 4
    %int1_33631 = torch.constant.int 1
    %int128_33632 = torch.constant.int 128
    %36194 = torch.prim.ListConstruct %int4_33630, %2482, %int1_33631, %int128_33632 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36195 = torch.aten.view %36095, %36194 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33633 = torch.constant.int 4
    %int1_33634 = torch.constant.int 1
    %int128_33635 = torch.constant.int 128
    %36196 = torch.prim.ListConstruct %int4_33633, %2482, %int1_33634, %int128_33635 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36197 = torch.aten.view %36101, %36196 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33636 = torch.constant.int 4
    %int1_33637 = torch.constant.int 1
    %int128_33638 = torch.constant.int 128
    %36198 = torch.prim.ListConstruct %int4_33636, %2482, %int1_33637, %int128_33638 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36199 = torch.aten.view %36123, %36198 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33639 = torch.constant.int 4
    %int1_33640 = torch.constant.int 1
    %int128_33641 = torch.constant.int 128
    %36200 = torch.prim.ListConstruct %int4_33639, %2482, %int1_33640, %int128_33641 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36201 = torch.aten.view %36129, %36200 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33642 = torch.constant.int 4
    %int1_33643 = torch.constant.int 1
    %int128_33644 = torch.constant.int 128
    %36202 = torch.prim.ListConstruct %int4_33642, %2482, %int1_33643, %int128_33644 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36203 = torch.aten.view %36135, %36202 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33645 = torch.constant.int 4
    %int1_33646 = torch.constant.int 1
    %int128_33647 = torch.constant.int 128
    %36204 = torch.prim.ListConstruct %int4_33645, %2482, %int1_33646, %int128_33647 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36205 = torch.aten.view %36141, %36204 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33648 = torch.constant.int 4
    %int1_33649 = torch.constant.int 1
    %int128_33650 = torch.constant.int 128
    %36206 = torch.prim.ListConstruct %int4_33648, %2482, %int1_33649, %int128_33650 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36207 = torch.aten.view %36147, %36206 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33651 = torch.constant.int 4
    %int1_33652 = torch.constant.int 1
    %int128_33653 = torch.constant.int 128
    %36208 = torch.prim.ListConstruct %int4_33651, %2482, %int1_33652, %int128_33653 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36209 = torch.aten.view %36153, %36208 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33654 = torch.constant.int 4
    %int1_33655 = torch.constant.int 1
    %int128_33656 = torch.constant.int 128
    %36210 = torch.prim.ListConstruct %int4_33654, %2482, %int1_33655, %int128_33656 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36211 = torch.aten.view %36159, %36210 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_33657 = torch.constant.int 4
    %int1_33658 = torch.constant.int 1
    %int128_33659 = torch.constant.int 128
    %36212 = torch.prim.ListConstruct %int4_33657, %2482, %int1_33658, %int128_33659 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36213 = torch.aten.view %36165, %36212 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_33660 = torch.constant.int 131072
    %none_33661 = torch.constant.none
    %none_33662 = torch.constant.none
    %cpu_33663 = torch.constant.device "cpu"
    %false_33664 = torch.constant.bool false
    %36214 = torch.aten.arange %int131072_33660, %none_33661, %none_33662, %cpu_33663, %false_33664 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_33665 = torch.constant.int 0
    %int128_33666 = torch.constant.int 128
    %int2_33667 = torch.constant.int 2
    %none_33668 = torch.constant.none
    %none_33669 = torch.constant.none
    %cpu_33670 = torch.constant.device "cpu"
    %false_33671 = torch.constant.bool false
    %36215 = torch.aten.arange.start_step %int0_33665, %int128_33666, %int2_33667, %none_33668, %none_33669, %cpu_33670, %false_33671 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_33672 = torch.constant.int 0
    %int0_33673 = torch.constant.int 0
    %int64_33674 = torch.constant.int 64
    %int1_33675 = torch.constant.int 1
    %36216 = torch.aten.slice.Tensor %36215, %int0_33672, %int0_33673, %int64_33674, %int1_33675 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_33676 = torch.constant.int 6
    %36217 = torch.prims.convert_element_type %36216, %int6_33676 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_33677 = torch.constant.int 128
    %36218 = torch.aten.div.Scalar %36217, %int128_33677 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_33678 = torch.constant.float 5.000000e+05
    %36219 = torch.aten.pow.Scalar %float5.000000e05_33678, %36218 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %36220 = torch.aten.reciprocal %36219 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_33679 = torch.constant.float 1.000000e+00
    %36221 = torch.aten.mul.Scalar %36220, %float1.000000e00_33679 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_33680 = torch.constant.int 131072
    %int1_33681 = torch.constant.int 1
    %36222 = torch.prim.ListConstruct %int131072_33680, %int1_33681 : (!torch.int, !torch.int) -> !torch.list<int>
    %36223 = torch.aten.view %36214, %36222 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %36224 = torch.aten.mul.Tensor %36223, %36221 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %36225 = torch.aten.cos %36224 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %36226 = torch.aten.sin %36224 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %36227 = torch.aten.complex %36225, %36226 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %36228 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36229 = flow.tensor.transfer %36228 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %36230 = torch_c.from_builtin_tensor %36229 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36231 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36232 = flow.tensor.transfer %36231 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %36233 = torch_c.from_builtin_tensor %36232 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36234 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36235 = flow.tensor.transfer %36234 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %36236 = torch_c.from_builtin_tensor %36235 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36237 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36238 = flow.tensor.transfer %36237 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %36239 = torch_c.from_builtin_tensor %36238 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36240 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36241 = flow.tensor.transfer %36240 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %36242 = torch_c.from_builtin_tensor %36241 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36243 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36244 = flow.tensor.transfer %36243 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %36245 = torch_c.from_builtin_tensor %36244 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36246 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36247 = flow.tensor.transfer %36246 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %36248 = torch_c.from_builtin_tensor %36247 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36249 = torch_c.to_builtin_tensor %36227 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36250 = flow.tensor.transfer %36249 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %36251 = torch_c.from_builtin_tensor %36250 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_33682 = torch.constant.int 1
    %36252 = torch.aten.size.int %35995, %int1_33682 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33683 = torch.constant.int 0
    %36253 = torch.aten.add.int %int0_33683, %36252 : !torch.int, !torch.int -> !torch.int
    %int0_33684 = torch.constant.int 0
    %int0_33685 = torch.constant.int 0
    %int1_33686 = torch.constant.int 1
    %36254 = torch.aten.slice.Tensor %36230, %int0_33684, %int0_33685, %36253, %int1_33686 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36254, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33687 = torch.constant.int 1
    %int0_33688 = torch.constant.int 0
    %int9223372036854775807_33689 = torch.constant.int 9223372036854775807
    %int1_33690 = torch.constant.int 1
    %36255 = torch.aten.slice.Tensor %36254, %int1_33687, %int0_33688, %int9223372036854775807_33689, %int1_33690 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36255, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33691 = torch.constant.int 0
    %36256 = torch.aten.unsqueeze %36255, %int0_33691 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36256, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33692 = torch.constant.int 2
    %36257 = torch.aten.unsqueeze %36256, %int2_33692 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36257, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33693 = torch.constant.int 3
    %int0_33694 = torch.constant.int 0
    %int9223372036854775807_33695 = torch.constant.int 9223372036854775807
    %int1_33696 = torch.constant.int 1
    %36258 = torch.aten.slice.Tensor %36257, %int3_33693, %int0_33694, %int9223372036854775807_33695, %int1_33696 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36258, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36259 = torch_c.to_builtin_tensor %36167 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33697 = arith.constant 1 : index
    %dim_33698 = tensor.dim %36259, %c1_33697 : tensor<4x?x4x128xf16>
    %36260 = flow.tensor.bitcast %36259 : tensor<4x?x4x128xf16>{%dim_33698} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33698}
    %36261 = torch_c.from_builtin_tensor %36260 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36262 = torch.aten.mul.Tensor %36261, %36258 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36263 = torch_c.to_builtin_tensor %36262 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33699 = arith.constant 1 : index
    %dim_33700 = tensor.dim %36263, %c1_33699 : tensor<4x?x4x64xcomplex<f32>>
    %36264 = flow.tensor.bitcast %36263 : tensor<4x?x4x64xcomplex<f32>>{%dim_33700} -> tensor<4x?x4x128xf32>{%dim_33700}
    %36265 = torch_c.from_builtin_tensor %36264 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33701 = torch.constant.int 5
    %36266 = torch.prims.convert_element_type %36265, %int5_33701 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33702 = torch.constant.int 1
    %36267 = torch.aten.size.int %36001, %int1_33702 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33703 = torch.constant.int 0
    %36268 = torch.aten.add.int %int0_33703, %36267 : !torch.int, !torch.int -> !torch.int
    %int0_33704 = torch.constant.int 0
    %int0_33705 = torch.constant.int 0
    %int1_33706 = torch.constant.int 1
    %36269 = torch.aten.slice.Tensor %36233, %int0_33704, %int0_33705, %36268, %int1_33706 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36269, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33707 = torch.constant.int 1
    %int0_33708 = torch.constant.int 0
    %int9223372036854775807_33709 = torch.constant.int 9223372036854775807
    %int1_33710 = torch.constant.int 1
    %36270 = torch.aten.slice.Tensor %36269, %int1_33707, %int0_33708, %int9223372036854775807_33709, %int1_33710 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36270, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33711 = torch.constant.int 0
    %36271 = torch.aten.unsqueeze %36270, %int0_33711 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36271, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33712 = torch.constant.int 2
    %36272 = torch.aten.unsqueeze %36271, %int2_33712 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36272, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33713 = torch.constant.int 3
    %int0_33714 = torch.constant.int 0
    %int9223372036854775807_33715 = torch.constant.int 9223372036854775807
    %int1_33716 = torch.constant.int 1
    %36273 = torch.aten.slice.Tensor %36272, %int3_33713, %int0_33714, %int9223372036854775807_33715, %int1_33716 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36273, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36274 = torch_c.to_builtin_tensor %36169 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33717 = arith.constant 1 : index
    %dim_33718 = tensor.dim %36274, %c1_33717 : tensor<4x?x4x128xf16>
    %36275 = flow.tensor.bitcast %36274 : tensor<4x?x4x128xf16>{%dim_33718} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33718}
    %36276 = torch_c.from_builtin_tensor %36275 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36277 = torch.aten.mul.Tensor %36276, %36273 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36278 = torch_c.to_builtin_tensor %36277 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33719 = arith.constant 1 : index
    %dim_33720 = tensor.dim %36278, %c1_33719 : tensor<4x?x4x64xcomplex<f32>>
    %36279 = flow.tensor.bitcast %36278 : tensor<4x?x4x64xcomplex<f32>>{%dim_33720} -> tensor<4x?x4x128xf32>{%dim_33720}
    %36280 = torch_c.from_builtin_tensor %36279 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33721 = torch.constant.int 5
    %36281 = torch.prims.convert_element_type %36280, %int5_33721 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33722 = torch.constant.int 1
    %36282 = torch.aten.size.int %36007, %int1_33722 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33723 = torch.constant.int 0
    %36283 = torch.aten.add.int %int0_33723, %36282 : !torch.int, !torch.int -> !torch.int
    %int0_33724 = torch.constant.int 0
    %int0_33725 = torch.constant.int 0
    %int1_33726 = torch.constant.int 1
    %36284 = torch.aten.slice.Tensor %36236, %int0_33724, %int0_33725, %36283, %int1_33726 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36284, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33727 = torch.constant.int 1
    %int0_33728 = torch.constant.int 0
    %int9223372036854775807_33729 = torch.constant.int 9223372036854775807
    %int1_33730 = torch.constant.int 1
    %36285 = torch.aten.slice.Tensor %36284, %int1_33727, %int0_33728, %int9223372036854775807_33729, %int1_33730 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36285, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33731 = torch.constant.int 0
    %36286 = torch.aten.unsqueeze %36285, %int0_33731 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36286, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33732 = torch.constant.int 2
    %36287 = torch.aten.unsqueeze %36286, %int2_33732 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36287, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33733 = torch.constant.int 3
    %int0_33734 = torch.constant.int 0
    %int9223372036854775807_33735 = torch.constant.int 9223372036854775807
    %int1_33736 = torch.constant.int 1
    %36288 = torch.aten.slice.Tensor %36287, %int3_33733, %int0_33734, %int9223372036854775807_33735, %int1_33736 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36288, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36289 = torch_c.to_builtin_tensor %36171 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33737 = arith.constant 1 : index
    %dim_33738 = tensor.dim %36289, %c1_33737 : tensor<4x?x4x128xf16>
    %36290 = flow.tensor.bitcast %36289 : tensor<4x?x4x128xf16>{%dim_33738} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33738}
    %36291 = torch_c.from_builtin_tensor %36290 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36292 = torch.aten.mul.Tensor %36291, %36288 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36293 = torch_c.to_builtin_tensor %36292 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33739 = arith.constant 1 : index
    %dim_33740 = tensor.dim %36293, %c1_33739 : tensor<4x?x4x64xcomplex<f32>>
    %36294 = flow.tensor.bitcast %36293 : tensor<4x?x4x64xcomplex<f32>>{%dim_33740} -> tensor<4x?x4x128xf32>{%dim_33740}
    %36295 = torch_c.from_builtin_tensor %36294 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33741 = torch.constant.int 5
    %36296 = torch.prims.convert_element_type %36295, %int5_33741 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33742 = torch.constant.int 1
    %36297 = torch.aten.size.int %36013, %int1_33742 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33743 = torch.constant.int 0
    %36298 = torch.aten.add.int %int0_33743, %36297 : !torch.int, !torch.int -> !torch.int
    %int0_33744 = torch.constant.int 0
    %int0_33745 = torch.constant.int 0
    %int1_33746 = torch.constant.int 1
    %36299 = torch.aten.slice.Tensor %36239, %int0_33744, %int0_33745, %36298, %int1_33746 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36299, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33747 = torch.constant.int 1
    %int0_33748 = torch.constant.int 0
    %int9223372036854775807_33749 = torch.constant.int 9223372036854775807
    %int1_33750 = torch.constant.int 1
    %36300 = torch.aten.slice.Tensor %36299, %int1_33747, %int0_33748, %int9223372036854775807_33749, %int1_33750 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36300, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33751 = torch.constant.int 0
    %36301 = torch.aten.unsqueeze %36300, %int0_33751 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36301, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33752 = torch.constant.int 2
    %36302 = torch.aten.unsqueeze %36301, %int2_33752 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36302, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33753 = torch.constant.int 3
    %int0_33754 = torch.constant.int 0
    %int9223372036854775807_33755 = torch.constant.int 9223372036854775807
    %int1_33756 = torch.constant.int 1
    %36303 = torch.aten.slice.Tensor %36302, %int3_33753, %int0_33754, %int9223372036854775807_33755, %int1_33756 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36303, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36304 = torch_c.to_builtin_tensor %36173 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33757 = arith.constant 1 : index
    %dim_33758 = tensor.dim %36304, %c1_33757 : tensor<4x?x4x128xf16>
    %36305 = flow.tensor.bitcast %36304 : tensor<4x?x4x128xf16>{%dim_33758} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33758}
    %36306 = torch_c.from_builtin_tensor %36305 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36307 = torch.aten.mul.Tensor %36306, %36303 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36308 = torch_c.to_builtin_tensor %36307 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33759 = arith.constant 1 : index
    %dim_33760 = tensor.dim %36308, %c1_33759 : tensor<4x?x4x64xcomplex<f32>>
    %36309 = flow.tensor.bitcast %36308 : tensor<4x?x4x64xcomplex<f32>>{%dim_33760} -> tensor<4x?x4x128xf32>{%dim_33760}
    %36310 = torch_c.from_builtin_tensor %36309 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33761 = torch.constant.int 5
    %36311 = torch.prims.convert_element_type %36310, %int5_33761 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33762 = torch.constant.int 1
    %36312 = torch.aten.size.int %36019, %int1_33762 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33763 = torch.constant.int 0
    %36313 = torch.aten.add.int %int0_33763, %36312 : !torch.int, !torch.int -> !torch.int
    %int0_33764 = torch.constant.int 0
    %int0_33765 = torch.constant.int 0
    %int1_33766 = torch.constant.int 1
    %36314 = torch.aten.slice.Tensor %36242, %int0_33764, %int0_33765, %36313, %int1_33766 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36314, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33767 = torch.constant.int 1
    %int0_33768 = torch.constant.int 0
    %int9223372036854775807_33769 = torch.constant.int 9223372036854775807
    %int1_33770 = torch.constant.int 1
    %36315 = torch.aten.slice.Tensor %36314, %int1_33767, %int0_33768, %int9223372036854775807_33769, %int1_33770 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36315, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33771 = torch.constant.int 0
    %36316 = torch.aten.unsqueeze %36315, %int0_33771 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36316, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33772 = torch.constant.int 2
    %36317 = torch.aten.unsqueeze %36316, %int2_33772 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36317, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33773 = torch.constant.int 3
    %int0_33774 = torch.constant.int 0
    %int9223372036854775807_33775 = torch.constant.int 9223372036854775807
    %int1_33776 = torch.constant.int 1
    %36318 = torch.aten.slice.Tensor %36317, %int3_33773, %int0_33774, %int9223372036854775807_33775, %int1_33776 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36318, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36319 = torch_c.to_builtin_tensor %36175 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33777 = arith.constant 1 : index
    %dim_33778 = tensor.dim %36319, %c1_33777 : tensor<4x?x4x128xf16>
    %36320 = flow.tensor.bitcast %36319 : tensor<4x?x4x128xf16>{%dim_33778} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33778}
    %36321 = torch_c.from_builtin_tensor %36320 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36322 = torch.aten.mul.Tensor %36321, %36318 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36323 = torch_c.to_builtin_tensor %36322 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33779 = arith.constant 1 : index
    %dim_33780 = tensor.dim %36323, %c1_33779 : tensor<4x?x4x64xcomplex<f32>>
    %36324 = flow.tensor.bitcast %36323 : tensor<4x?x4x64xcomplex<f32>>{%dim_33780} -> tensor<4x?x4x128xf32>{%dim_33780}
    %36325 = torch_c.from_builtin_tensor %36324 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33781 = torch.constant.int 5
    %36326 = torch.prims.convert_element_type %36325, %int5_33781 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33782 = torch.constant.int 1
    %36327 = torch.aten.size.int %36025, %int1_33782 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33783 = torch.constant.int 0
    %36328 = torch.aten.add.int %int0_33783, %36327 : !torch.int, !torch.int -> !torch.int
    %int0_33784 = torch.constant.int 0
    %int0_33785 = torch.constant.int 0
    %int1_33786 = torch.constant.int 1
    %36329 = torch.aten.slice.Tensor %36245, %int0_33784, %int0_33785, %36328, %int1_33786 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36329, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33787 = torch.constant.int 1
    %int0_33788 = torch.constant.int 0
    %int9223372036854775807_33789 = torch.constant.int 9223372036854775807
    %int1_33790 = torch.constant.int 1
    %36330 = torch.aten.slice.Tensor %36329, %int1_33787, %int0_33788, %int9223372036854775807_33789, %int1_33790 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36330, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33791 = torch.constant.int 0
    %36331 = torch.aten.unsqueeze %36330, %int0_33791 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36331, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33792 = torch.constant.int 2
    %36332 = torch.aten.unsqueeze %36331, %int2_33792 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36332, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33793 = torch.constant.int 3
    %int0_33794 = torch.constant.int 0
    %int9223372036854775807_33795 = torch.constant.int 9223372036854775807
    %int1_33796 = torch.constant.int 1
    %36333 = torch.aten.slice.Tensor %36332, %int3_33793, %int0_33794, %int9223372036854775807_33795, %int1_33796 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36333, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36334 = torch_c.to_builtin_tensor %36177 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33797 = arith.constant 1 : index
    %dim_33798 = tensor.dim %36334, %c1_33797 : tensor<4x?x4x128xf16>
    %36335 = flow.tensor.bitcast %36334 : tensor<4x?x4x128xf16>{%dim_33798} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33798}
    %36336 = torch_c.from_builtin_tensor %36335 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36337 = torch.aten.mul.Tensor %36336, %36333 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36338 = torch_c.to_builtin_tensor %36337 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33799 = arith.constant 1 : index
    %dim_33800 = tensor.dim %36338, %c1_33799 : tensor<4x?x4x64xcomplex<f32>>
    %36339 = flow.tensor.bitcast %36338 : tensor<4x?x4x64xcomplex<f32>>{%dim_33800} -> tensor<4x?x4x128xf32>{%dim_33800}
    %36340 = torch_c.from_builtin_tensor %36339 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33801 = torch.constant.int 5
    %36341 = torch.prims.convert_element_type %36340, %int5_33801 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33802 = torch.constant.int 1
    %36342 = torch.aten.size.int %36031, %int1_33802 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33803 = torch.constant.int 0
    %36343 = torch.aten.add.int %int0_33803, %36342 : !torch.int, !torch.int -> !torch.int
    %int0_33804 = torch.constant.int 0
    %int0_33805 = torch.constant.int 0
    %int1_33806 = torch.constant.int 1
    %36344 = torch.aten.slice.Tensor %36248, %int0_33804, %int0_33805, %36343, %int1_33806 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36344, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33807 = torch.constant.int 1
    %int0_33808 = torch.constant.int 0
    %int9223372036854775807_33809 = torch.constant.int 9223372036854775807
    %int1_33810 = torch.constant.int 1
    %36345 = torch.aten.slice.Tensor %36344, %int1_33807, %int0_33808, %int9223372036854775807_33809, %int1_33810 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36345, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33811 = torch.constant.int 0
    %36346 = torch.aten.unsqueeze %36345, %int0_33811 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36346, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33812 = torch.constant.int 2
    %36347 = torch.aten.unsqueeze %36346, %int2_33812 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36347, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33813 = torch.constant.int 3
    %int0_33814 = torch.constant.int 0
    %int9223372036854775807_33815 = torch.constant.int 9223372036854775807
    %int1_33816 = torch.constant.int 1
    %36348 = torch.aten.slice.Tensor %36347, %int3_33813, %int0_33814, %int9223372036854775807_33815, %int1_33816 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36348, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36349 = torch_c.to_builtin_tensor %36179 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33817 = arith.constant 1 : index
    %dim_33818 = tensor.dim %36349, %c1_33817 : tensor<4x?x4x128xf16>
    %36350 = flow.tensor.bitcast %36349 : tensor<4x?x4x128xf16>{%dim_33818} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33818}
    %36351 = torch_c.from_builtin_tensor %36350 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36352 = torch.aten.mul.Tensor %36351, %36348 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36353 = torch_c.to_builtin_tensor %36352 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33819 = arith.constant 1 : index
    %dim_33820 = tensor.dim %36353, %c1_33819 : tensor<4x?x4x64xcomplex<f32>>
    %36354 = flow.tensor.bitcast %36353 : tensor<4x?x4x64xcomplex<f32>>{%dim_33820} -> tensor<4x?x4x128xf32>{%dim_33820}
    %36355 = torch_c.from_builtin_tensor %36354 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33821 = torch.constant.int 5
    %36356 = torch.prims.convert_element_type %36355, %int5_33821 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_33822 = torch.constant.int 1
    %36357 = torch.aten.size.int %36037, %int1_33822 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_33823 = torch.constant.int 0
    %36358 = torch.aten.add.int %int0_33823, %36357 : !torch.int, !torch.int -> !torch.int
    %int0_33824 = torch.constant.int 0
    %int0_33825 = torch.constant.int 0
    %int1_33826 = torch.constant.int 1
    %36359 = torch.aten.slice.Tensor %36251, %int0_33824, %int0_33825, %36358, %int1_33826 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36359, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33827 = torch.constant.int 1
    %int0_33828 = torch.constant.int 0
    %int9223372036854775807_33829 = torch.constant.int 9223372036854775807
    %int1_33830 = torch.constant.int 1
    %36360 = torch.aten.slice.Tensor %36359, %int1_33827, %int0_33828, %int9223372036854775807_33829, %int1_33830 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36360, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33831 = torch.constant.int 0
    %36361 = torch.aten.unsqueeze %36360, %int0_33831 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36361, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33832 = torch.constant.int 2
    %36362 = torch.aten.unsqueeze %36361, %int2_33832 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36362, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33833 = torch.constant.int 3
    %int0_33834 = torch.constant.int 0
    %int9223372036854775807_33835 = torch.constant.int 9223372036854775807
    %int1_33836 = torch.constant.int 1
    %36363 = torch.aten.slice.Tensor %36362, %int3_33833, %int0_33834, %int9223372036854775807_33835, %int1_33836 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36363, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36364 = torch_c.to_builtin_tensor %36181 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_33837 = arith.constant 1 : index
    %dim_33838 = tensor.dim %36364, %c1_33837 : tensor<4x?x4x128xf16>
    %36365 = flow.tensor.bitcast %36364 : tensor<4x?x4x128xf16>{%dim_33838} -> tensor<4x?x4x64xcomplex<f16>>{%dim_33838}
    %36366 = torch_c.from_builtin_tensor %36365 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %36366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %36367 = torch.aten.mul.Tensor %36366, %36363 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %36367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %36368 = torch_c.to_builtin_tensor %36367 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_33839 = arith.constant 1 : index
    %dim_33840 = tensor.dim %36368, %c1_33839 : tensor<4x?x4x64xcomplex<f32>>
    %36369 = flow.tensor.bitcast %36368 : tensor<4x?x4x64xcomplex<f32>>{%dim_33840} -> tensor<4x?x4x128xf32>{%dim_33840}
    %36370 = torch_c.from_builtin_tensor %36369 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %36370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_33841 = torch.constant.int 5
    %36371 = torch.prims.convert_element_type %36370, %int5_33841 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_33842 = torch.constant.int 131072
    %none_33843 = torch.constant.none
    %none_33844 = torch.constant.none
    %cpu_33845 = torch.constant.device "cpu"
    %false_33846 = torch.constant.bool false
    %36372 = torch.aten.arange %int131072_33842, %none_33843, %none_33844, %cpu_33845, %false_33846 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_33847 = torch.constant.int 0
    %int128_33848 = torch.constant.int 128
    %int2_33849 = torch.constant.int 2
    %none_33850 = torch.constant.none
    %none_33851 = torch.constant.none
    %cpu_33852 = torch.constant.device "cpu"
    %false_33853 = torch.constant.bool false
    %36373 = torch.aten.arange.start_step %int0_33847, %int128_33848, %int2_33849, %none_33850, %none_33851, %cpu_33852, %false_33853 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_33854 = torch.constant.int 0
    %int0_33855 = torch.constant.int 0
    %int64_33856 = torch.constant.int 64
    %int1_33857 = torch.constant.int 1
    %36374 = torch.aten.slice.Tensor %36373, %int0_33854, %int0_33855, %int64_33856, %int1_33857 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_33858 = torch.constant.int 6
    %36375 = torch.prims.convert_element_type %36374, %int6_33858 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_33859 = torch.constant.int 128
    %36376 = torch.aten.div.Scalar %36375, %int128_33859 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_33860 = torch.constant.float 5.000000e+05
    %36377 = torch.aten.pow.Scalar %float5.000000e05_33860, %36376 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %36378 = torch.aten.reciprocal %36377 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_33861 = torch.constant.float 1.000000e+00
    %36379 = torch.aten.mul.Scalar %36378, %float1.000000e00_33861 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_33862 = torch.constant.int 131072
    %int1_33863 = torch.constant.int 1
    %36380 = torch.prim.ListConstruct %int131072_33862, %int1_33863 : (!torch.int, !torch.int) -> !torch.list<int>
    %36381 = torch.aten.view %36372, %36380 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %36382 = torch.aten.mul.Tensor %36381, %36379 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %36383 = torch.aten.cos %36382 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %36384 = torch.aten.sin %36382 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %36385 = torch.aten.complex %36383, %36384 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %36386 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36387 = flow.tensor.transfer %36386 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %36388 = torch_c.from_builtin_tensor %36387 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36389 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36390 = flow.tensor.transfer %36389 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %36391 = torch_c.from_builtin_tensor %36390 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36392 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36393 = flow.tensor.transfer %36392 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %36394 = torch_c.from_builtin_tensor %36393 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36395 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36396 = flow.tensor.transfer %36395 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %36397 = torch_c.from_builtin_tensor %36396 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36398 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36399 = flow.tensor.transfer %36398 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %36400 = torch_c.from_builtin_tensor %36399 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36401 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36402 = flow.tensor.transfer %36401 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %36403 = torch_c.from_builtin_tensor %36402 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36404 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36405 = flow.tensor.transfer %36404 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %36406 = torch_c.from_builtin_tensor %36405 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %36407 = torch_c.to_builtin_tensor %36385 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %36408 = flow.tensor.transfer %36407 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %36409 = torch_c.from_builtin_tensor %36408 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_33864 = torch.constant.int 1
    %36410 = torch.aten.size.int %36059, %int1_33864 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33865 = torch.constant.int 0
    %36411 = torch.aten.add.int %int0_33865, %36410 : !torch.int, !torch.int -> !torch.int
    %int0_33866 = torch.constant.int 0
    %int0_33867 = torch.constant.int 0
    %int1_33868 = torch.constant.int 1
    %36412 = torch.aten.slice.Tensor %36388, %int0_33866, %int0_33867, %36411, %int1_33868 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36412, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33869 = torch.constant.int 1
    %int0_33870 = torch.constant.int 0
    %int9223372036854775807_33871 = torch.constant.int 9223372036854775807
    %int1_33872 = torch.constant.int 1
    %36413 = torch.aten.slice.Tensor %36412, %int1_33869, %int0_33870, %int9223372036854775807_33871, %int1_33872 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36413, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33873 = torch.constant.int 0
    %36414 = torch.aten.unsqueeze %36413, %int0_33873 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36414, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33874 = torch.constant.int 2
    %36415 = torch.aten.unsqueeze %36414, %int2_33874 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36415, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33875 = torch.constant.int 3
    %int0_33876 = torch.constant.int 0
    %int9223372036854775807_33877 = torch.constant.int 9223372036854775807
    %int1_33878 = torch.constant.int 1
    %36416 = torch.aten.slice.Tensor %36415, %int3_33875, %int0_33876, %int9223372036854775807_33877, %int1_33878 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36416, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36417 = torch_c.to_builtin_tensor %36183 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33879 = arith.constant 1 : index
    %dim_33880 = tensor.dim %36417, %c1_33879 : tensor<4x?x1x128xf16>
    %36418 = flow.tensor.bitcast %36417 : tensor<4x?x1x128xf16>{%dim_33880} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33880}
    %36419 = torch_c.from_builtin_tensor %36418 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36420 = torch.aten.mul.Tensor %36419, %36416 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36421 = torch_c.to_builtin_tensor %36420 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33881 = arith.constant 1 : index
    %dim_33882 = tensor.dim %36421, %c1_33881 : tensor<4x?x1x64xcomplex<f32>>
    %36422 = flow.tensor.bitcast %36421 : tensor<4x?x1x64xcomplex<f32>>{%dim_33882} -> tensor<4x?x1x128xf32>{%dim_33882}
    %36423 = torch_c.from_builtin_tensor %36422 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33883 = torch.constant.int 5
    %36424 = torch.prims.convert_element_type %36423, %int5_33883 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33884 = torch.constant.int 1
    %36425 = torch.aten.size.int %36065, %int1_33884 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33885 = torch.constant.int 0
    %36426 = torch.aten.add.int %int0_33885, %36425 : !torch.int, !torch.int -> !torch.int
    %int0_33886 = torch.constant.int 0
    %int0_33887 = torch.constant.int 0
    %int1_33888 = torch.constant.int 1
    %36427 = torch.aten.slice.Tensor %36391, %int0_33886, %int0_33887, %36426, %int1_33888 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36427, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33889 = torch.constant.int 1
    %int0_33890 = torch.constant.int 0
    %int9223372036854775807_33891 = torch.constant.int 9223372036854775807
    %int1_33892 = torch.constant.int 1
    %36428 = torch.aten.slice.Tensor %36427, %int1_33889, %int0_33890, %int9223372036854775807_33891, %int1_33892 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36428, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33893 = torch.constant.int 0
    %36429 = torch.aten.unsqueeze %36428, %int0_33893 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36429, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33894 = torch.constant.int 2
    %36430 = torch.aten.unsqueeze %36429, %int2_33894 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36430, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33895 = torch.constant.int 3
    %int0_33896 = torch.constant.int 0
    %int9223372036854775807_33897 = torch.constant.int 9223372036854775807
    %int1_33898 = torch.constant.int 1
    %36431 = torch.aten.slice.Tensor %36430, %int3_33895, %int0_33896, %int9223372036854775807_33897, %int1_33898 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36431, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36432 = torch_c.to_builtin_tensor %36185 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33899 = arith.constant 1 : index
    %dim_33900 = tensor.dim %36432, %c1_33899 : tensor<4x?x1x128xf16>
    %36433 = flow.tensor.bitcast %36432 : tensor<4x?x1x128xf16>{%dim_33900} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33900}
    %36434 = torch_c.from_builtin_tensor %36433 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36435 = torch.aten.mul.Tensor %36434, %36431 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36436 = torch_c.to_builtin_tensor %36435 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33901 = arith.constant 1 : index
    %dim_33902 = tensor.dim %36436, %c1_33901 : tensor<4x?x1x64xcomplex<f32>>
    %36437 = flow.tensor.bitcast %36436 : tensor<4x?x1x64xcomplex<f32>>{%dim_33902} -> tensor<4x?x1x128xf32>{%dim_33902}
    %36438 = torch_c.from_builtin_tensor %36437 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33903 = torch.constant.int 5
    %36439 = torch.prims.convert_element_type %36438, %int5_33903 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33904 = torch.constant.int 1
    %36440 = torch.aten.size.int %36071, %int1_33904 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33905 = torch.constant.int 0
    %36441 = torch.aten.add.int %int0_33905, %36440 : !torch.int, !torch.int -> !torch.int
    %int0_33906 = torch.constant.int 0
    %int0_33907 = torch.constant.int 0
    %int1_33908 = torch.constant.int 1
    %36442 = torch.aten.slice.Tensor %36394, %int0_33906, %int0_33907, %36441, %int1_33908 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36442, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33909 = torch.constant.int 1
    %int0_33910 = torch.constant.int 0
    %int9223372036854775807_33911 = torch.constant.int 9223372036854775807
    %int1_33912 = torch.constant.int 1
    %36443 = torch.aten.slice.Tensor %36442, %int1_33909, %int0_33910, %int9223372036854775807_33911, %int1_33912 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36443, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33913 = torch.constant.int 0
    %36444 = torch.aten.unsqueeze %36443, %int0_33913 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36444, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33914 = torch.constant.int 2
    %36445 = torch.aten.unsqueeze %36444, %int2_33914 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36445, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33915 = torch.constant.int 3
    %int0_33916 = torch.constant.int 0
    %int9223372036854775807_33917 = torch.constant.int 9223372036854775807
    %int1_33918 = torch.constant.int 1
    %36446 = torch.aten.slice.Tensor %36445, %int3_33915, %int0_33916, %int9223372036854775807_33917, %int1_33918 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36446, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36447 = torch_c.to_builtin_tensor %36187 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33919 = arith.constant 1 : index
    %dim_33920 = tensor.dim %36447, %c1_33919 : tensor<4x?x1x128xf16>
    %36448 = flow.tensor.bitcast %36447 : tensor<4x?x1x128xf16>{%dim_33920} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33920}
    %36449 = torch_c.from_builtin_tensor %36448 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36450 = torch.aten.mul.Tensor %36449, %36446 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36451 = torch_c.to_builtin_tensor %36450 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33921 = arith.constant 1 : index
    %dim_33922 = tensor.dim %36451, %c1_33921 : tensor<4x?x1x64xcomplex<f32>>
    %36452 = flow.tensor.bitcast %36451 : tensor<4x?x1x64xcomplex<f32>>{%dim_33922} -> tensor<4x?x1x128xf32>{%dim_33922}
    %36453 = torch_c.from_builtin_tensor %36452 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33923 = torch.constant.int 5
    %36454 = torch.prims.convert_element_type %36453, %int5_33923 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33924 = torch.constant.int 1
    %36455 = torch.aten.size.int %36077, %int1_33924 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33925 = torch.constant.int 0
    %36456 = torch.aten.add.int %int0_33925, %36455 : !torch.int, !torch.int -> !torch.int
    %int0_33926 = torch.constant.int 0
    %int0_33927 = torch.constant.int 0
    %int1_33928 = torch.constant.int 1
    %36457 = torch.aten.slice.Tensor %36397, %int0_33926, %int0_33927, %36456, %int1_33928 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36457, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33929 = torch.constant.int 1
    %int0_33930 = torch.constant.int 0
    %int9223372036854775807_33931 = torch.constant.int 9223372036854775807
    %int1_33932 = torch.constant.int 1
    %36458 = torch.aten.slice.Tensor %36457, %int1_33929, %int0_33930, %int9223372036854775807_33931, %int1_33932 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36458, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33933 = torch.constant.int 0
    %36459 = torch.aten.unsqueeze %36458, %int0_33933 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36459, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33934 = torch.constant.int 2
    %36460 = torch.aten.unsqueeze %36459, %int2_33934 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36460, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33935 = torch.constant.int 3
    %int0_33936 = torch.constant.int 0
    %int9223372036854775807_33937 = torch.constant.int 9223372036854775807
    %int1_33938 = torch.constant.int 1
    %36461 = torch.aten.slice.Tensor %36460, %int3_33935, %int0_33936, %int9223372036854775807_33937, %int1_33938 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36461, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36462 = torch_c.to_builtin_tensor %36189 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33939 = arith.constant 1 : index
    %dim_33940 = tensor.dim %36462, %c1_33939 : tensor<4x?x1x128xf16>
    %36463 = flow.tensor.bitcast %36462 : tensor<4x?x1x128xf16>{%dim_33940} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33940}
    %36464 = torch_c.from_builtin_tensor %36463 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36465 = torch.aten.mul.Tensor %36464, %36461 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36466 = torch_c.to_builtin_tensor %36465 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33941 = arith.constant 1 : index
    %dim_33942 = tensor.dim %36466, %c1_33941 : tensor<4x?x1x64xcomplex<f32>>
    %36467 = flow.tensor.bitcast %36466 : tensor<4x?x1x64xcomplex<f32>>{%dim_33942} -> tensor<4x?x1x128xf32>{%dim_33942}
    %36468 = torch_c.from_builtin_tensor %36467 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33943 = torch.constant.int 5
    %36469 = torch.prims.convert_element_type %36468, %int5_33943 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33944 = torch.constant.int 1
    %36470 = torch.aten.size.int %36083, %int1_33944 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33945 = torch.constant.int 0
    %36471 = torch.aten.add.int %int0_33945, %36470 : !torch.int, !torch.int -> !torch.int
    %int0_33946 = torch.constant.int 0
    %int0_33947 = torch.constant.int 0
    %int1_33948 = torch.constant.int 1
    %36472 = torch.aten.slice.Tensor %36400, %int0_33946, %int0_33947, %36471, %int1_33948 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36472, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33949 = torch.constant.int 1
    %int0_33950 = torch.constant.int 0
    %int9223372036854775807_33951 = torch.constant.int 9223372036854775807
    %int1_33952 = torch.constant.int 1
    %36473 = torch.aten.slice.Tensor %36472, %int1_33949, %int0_33950, %int9223372036854775807_33951, %int1_33952 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36473, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33953 = torch.constant.int 0
    %36474 = torch.aten.unsqueeze %36473, %int0_33953 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36474, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33954 = torch.constant.int 2
    %36475 = torch.aten.unsqueeze %36474, %int2_33954 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36475, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33955 = torch.constant.int 3
    %int0_33956 = torch.constant.int 0
    %int9223372036854775807_33957 = torch.constant.int 9223372036854775807
    %int1_33958 = torch.constant.int 1
    %36476 = torch.aten.slice.Tensor %36475, %int3_33955, %int0_33956, %int9223372036854775807_33957, %int1_33958 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36476, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36477 = torch_c.to_builtin_tensor %36191 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33959 = arith.constant 1 : index
    %dim_33960 = tensor.dim %36477, %c1_33959 : tensor<4x?x1x128xf16>
    %36478 = flow.tensor.bitcast %36477 : tensor<4x?x1x128xf16>{%dim_33960} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33960}
    %36479 = torch_c.from_builtin_tensor %36478 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36480 = torch.aten.mul.Tensor %36479, %36476 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36481 = torch_c.to_builtin_tensor %36480 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33961 = arith.constant 1 : index
    %dim_33962 = tensor.dim %36481, %c1_33961 : tensor<4x?x1x64xcomplex<f32>>
    %36482 = flow.tensor.bitcast %36481 : tensor<4x?x1x64xcomplex<f32>>{%dim_33962} -> tensor<4x?x1x128xf32>{%dim_33962}
    %36483 = torch_c.from_builtin_tensor %36482 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33963 = torch.constant.int 5
    %36484 = torch.prims.convert_element_type %36483, %int5_33963 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33964 = torch.constant.int 1
    %36485 = torch.aten.size.int %36089, %int1_33964 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33965 = torch.constant.int 0
    %36486 = torch.aten.add.int %int0_33965, %36485 : !torch.int, !torch.int -> !torch.int
    %int0_33966 = torch.constant.int 0
    %int0_33967 = torch.constant.int 0
    %int1_33968 = torch.constant.int 1
    %36487 = torch.aten.slice.Tensor %36403, %int0_33966, %int0_33967, %36486, %int1_33968 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36487, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33969 = torch.constant.int 1
    %int0_33970 = torch.constant.int 0
    %int9223372036854775807_33971 = torch.constant.int 9223372036854775807
    %int1_33972 = torch.constant.int 1
    %36488 = torch.aten.slice.Tensor %36487, %int1_33969, %int0_33970, %int9223372036854775807_33971, %int1_33972 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36488, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33973 = torch.constant.int 0
    %36489 = torch.aten.unsqueeze %36488, %int0_33973 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36489, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33974 = torch.constant.int 2
    %36490 = torch.aten.unsqueeze %36489, %int2_33974 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36490, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33975 = torch.constant.int 3
    %int0_33976 = torch.constant.int 0
    %int9223372036854775807_33977 = torch.constant.int 9223372036854775807
    %int1_33978 = torch.constant.int 1
    %36491 = torch.aten.slice.Tensor %36490, %int3_33975, %int0_33976, %int9223372036854775807_33977, %int1_33978 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36491, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36492 = torch_c.to_builtin_tensor %36193 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33979 = arith.constant 1 : index
    %dim_33980 = tensor.dim %36492, %c1_33979 : tensor<4x?x1x128xf16>
    %36493 = flow.tensor.bitcast %36492 : tensor<4x?x1x128xf16>{%dim_33980} -> tensor<4x?x1x64xcomplex<f16>>{%dim_33980}
    %36494 = torch_c.from_builtin_tensor %36493 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36495 = torch.aten.mul.Tensor %36494, %36491 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36496 = torch_c.to_builtin_tensor %36495 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_33981 = arith.constant 1 : index
    %dim_33982 = tensor.dim %36496, %c1_33981 : tensor<4x?x1x64xcomplex<f32>>
    %36497 = flow.tensor.bitcast %36496 : tensor<4x?x1x64xcomplex<f32>>{%dim_33982} -> tensor<4x?x1x128xf32>{%dim_33982}
    %36498 = torch_c.from_builtin_tensor %36497 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_33983 = torch.constant.int 5
    %36499 = torch.prims.convert_element_type %36498, %int5_33983 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_33984 = torch.constant.int 1
    %36500 = torch.aten.size.int %36095, %int1_33984 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_33985 = torch.constant.int 0
    %36501 = torch.aten.add.int %int0_33985, %36500 : !torch.int, !torch.int -> !torch.int
    %int0_33986 = torch.constant.int 0
    %int0_33987 = torch.constant.int 0
    %int1_33988 = torch.constant.int 1
    %36502 = torch.aten.slice.Tensor %36406, %int0_33986, %int0_33987, %36501, %int1_33988 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36502, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_33989 = torch.constant.int 1
    %int0_33990 = torch.constant.int 0
    %int9223372036854775807_33991 = torch.constant.int 9223372036854775807
    %int1_33992 = torch.constant.int 1
    %36503 = torch.aten.slice.Tensor %36502, %int1_33989, %int0_33990, %int9223372036854775807_33991, %int1_33992 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36503, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_33993 = torch.constant.int 0
    %36504 = torch.aten.unsqueeze %36503, %int0_33993 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36504, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_33994 = torch.constant.int 2
    %36505 = torch.aten.unsqueeze %36504, %int2_33994 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36505, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_33995 = torch.constant.int 3
    %int0_33996 = torch.constant.int 0
    %int9223372036854775807_33997 = torch.constant.int 9223372036854775807
    %int1_33998 = torch.constant.int 1
    %36506 = torch.aten.slice.Tensor %36505, %int3_33995, %int0_33996, %int9223372036854775807_33997, %int1_33998 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36506, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36507 = torch_c.to_builtin_tensor %36195 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_33999 = arith.constant 1 : index
    %dim_34000 = tensor.dim %36507, %c1_33999 : tensor<4x?x1x128xf16>
    %36508 = flow.tensor.bitcast %36507 : tensor<4x?x1x128xf16>{%dim_34000} -> tensor<4x?x1x64xcomplex<f16>>{%dim_34000}
    %36509 = torch_c.from_builtin_tensor %36508 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36510 = torch.aten.mul.Tensor %36509, %36506 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36511 = torch_c.to_builtin_tensor %36510 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_34001 = arith.constant 1 : index
    %dim_34002 = tensor.dim %36511, %c1_34001 : tensor<4x?x1x64xcomplex<f32>>
    %36512 = flow.tensor.bitcast %36511 : tensor<4x?x1x64xcomplex<f32>>{%dim_34002} -> tensor<4x?x1x128xf32>{%dim_34002}
    %36513 = torch_c.from_builtin_tensor %36512 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_34003 = torch.constant.int 5
    %36514 = torch.prims.convert_element_type %36513, %int5_34003 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_34004 = torch.constant.int 1
    %36515 = torch.aten.size.int %36101, %int1_34004 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_34005 = torch.constant.int 0
    %36516 = torch.aten.add.int %int0_34005, %36515 : !torch.int, !torch.int -> !torch.int
    %int0_34006 = torch.constant.int 0
    %int0_34007 = torch.constant.int 0
    %int1_34008 = torch.constant.int 1
    %36517 = torch.aten.slice.Tensor %36409, %int0_34006, %int0_34007, %36516, %int1_34008 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36517, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_34009 = torch.constant.int 1
    %int0_34010 = torch.constant.int 0
    %int9223372036854775807_34011 = torch.constant.int 9223372036854775807
    %int1_34012 = torch.constant.int 1
    %36518 = torch.aten.slice.Tensor %36517, %int1_34009, %int0_34010, %int9223372036854775807_34011, %int1_34012 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %36518, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_34013 = torch.constant.int 0
    %36519 = torch.aten.unsqueeze %36518, %int0_34013 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %36519, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_34014 = torch.constant.int 2
    %36520 = torch.aten.unsqueeze %36519, %int2_34014 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36520, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_34015 = torch.constant.int 3
    %int0_34016 = torch.constant.int 0
    %int9223372036854775807_34017 = torch.constant.int 9223372036854775807
    %int1_34018 = torch.constant.int 1
    %36521 = torch.aten.slice.Tensor %36520, %int3_34015, %int0_34016, %int9223372036854775807_34017, %int1_34018 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36521, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %36522 = torch_c.to_builtin_tensor %36197 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_34019 = arith.constant 1 : index
    %dim_34020 = tensor.dim %36522, %c1_34019 : tensor<4x?x1x128xf16>
    %36523 = flow.tensor.bitcast %36522 : tensor<4x?x1x128xf16>{%dim_34020} -> tensor<4x?x1x64xcomplex<f16>>{%dim_34020}
    %36524 = torch_c.from_builtin_tensor %36523 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %36524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %36525 = torch.aten.mul.Tensor %36524, %36521 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %36525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %36526 = torch_c.to_builtin_tensor %36525 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_34021 = arith.constant 1 : index
    %dim_34022 = tensor.dim %36526, %c1_34021 : tensor<4x?x1x64xcomplex<f32>>
    %36527 = flow.tensor.bitcast %36526 : tensor<4x?x1x64xcomplex<f32>>{%dim_34022} -> tensor<4x?x1x128xf32>{%dim_34022}
    %36528 = torch_c.from_builtin_tensor %36527 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %36528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_34023 = torch.constant.int 5
    %36529 = torch.prims.convert_element_type %36528, %int5_34023 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %36529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_34024 = torch.constant.int 64
    %36530 = torch.aten.mul.Scalar %2364, %int64_34024 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36530, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34025 = torch.constant.int 64
    %36531 = torch.aten.mul.Scalar %2367, %int64_34025 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36531, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34026 = torch.constant.int 64
    %36532 = torch.aten.mul.Scalar %2370, %int64_34026 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36532, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34027 = torch.constant.int 64
    %36533 = torch.aten.mul.Scalar %2373, %int64_34027 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36533, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34028 = torch.constant.int 64
    %36534 = torch.aten.mul.Scalar %2376, %int64_34028 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36534, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34029 = torch.constant.int 64
    %36535 = torch.aten.mul.Scalar %2379, %int64_34029 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36535, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34030 = torch.constant.int 64
    %36536 = torch.aten.mul.Scalar %2382, %int64_34030 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36536, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_34031 = torch.constant.int 64
    %36537 = torch.aten.mul.Scalar %2385, %int64_34031 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36537, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36 = torch.constant.int 36
    %int1_34032 = torch.constant.int 1
    %36538 = torch.aten.add.Scalar %36530, %int36, %int1_34032 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36538, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34033 = torch.constant.int 36
    %int1_34034 = torch.constant.int 1
    %36539 = torch.aten.add.Scalar %36531, %int36_34033, %int1_34034 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36539, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34035 = torch.constant.int 36
    %int1_34036 = torch.constant.int 1
    %36540 = torch.aten.add.Scalar %36532, %int36_34035, %int1_34036 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36540, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34037 = torch.constant.int 36
    %int1_34038 = torch.constant.int 1
    %36541 = torch.aten.add.Scalar %36533, %int36_34037, %int1_34038 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36541, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34039 = torch.constant.int 36
    %int1_34040 = torch.constant.int 1
    %36542 = torch.aten.add.Scalar %36534, %int36_34039, %int1_34040 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36542, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34041 = torch.constant.int 36
    %int1_34042 = torch.constant.int 1
    %36543 = torch.aten.add.Scalar %36535, %int36_34041, %int1_34042 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36543, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34043 = torch.constant.int 36
    %int1_34044 = torch.constant.int 1
    %36544 = torch.aten.add.Scalar %36536, %int36_34043, %int1_34044 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36544, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int36_34045 = torch.constant.int 36
    %int1_34046 = torch.constant.int 1
    %36545 = torch.aten.add.Scalar %36537, %int36_34045, %int1_34046 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36545, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_34047 = torch.constant.int 4
    %int16_34048 = torch.constant.int 16
    %int1_34049 = torch.constant.int 1
    %int128_34050 = torch.constant.int 128
    %36546 = torch.prim.ListConstruct %int4_34047, %3095, %int16_34048, %int1_34049, %int128_34050 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36547 = torch.aten.view %36424, %36546 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36547, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34051 = torch.constant.int 4
    %int16_34052 = torch.constant.int 16
    %int1_34053 = torch.constant.int 1
    %int128_34054 = torch.constant.int 128
    %36548 = torch.prim.ListConstruct %int4_34051, %3095, %int16_34052, %int1_34053, %int128_34054 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36549 = torch.aten.view %36439, %36548 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36549, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34055 = torch.constant.int 4
    %int16_34056 = torch.constant.int 16
    %int1_34057 = torch.constant.int 1
    %int128_34058 = torch.constant.int 128
    %36550 = torch.prim.ListConstruct %int4_34055, %3095, %int16_34056, %int1_34057, %int128_34058 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36551 = torch.aten.view %36454, %36550 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36551, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34059 = torch.constant.int 4
    %int16_34060 = torch.constant.int 16
    %int1_34061 = torch.constant.int 1
    %int128_34062 = torch.constant.int 128
    %36552 = torch.prim.ListConstruct %int4_34059, %3095, %int16_34060, %int1_34061, %int128_34062 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36553 = torch.aten.view %36469, %36552 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36553, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34063 = torch.constant.int 4
    %int16_34064 = torch.constant.int 16
    %int1_34065 = torch.constant.int 1
    %int128_34066 = torch.constant.int 128
    %36554 = torch.prim.ListConstruct %int4_34063, %3095, %int16_34064, %int1_34065, %int128_34066 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36555 = torch.aten.view %36484, %36554 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36555, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34067 = torch.constant.int 4
    %int16_34068 = torch.constant.int 16
    %int1_34069 = torch.constant.int 1
    %int128_34070 = torch.constant.int 128
    %36556 = torch.prim.ListConstruct %int4_34067, %3095, %int16_34068, %int1_34069, %int128_34070 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36557 = torch.aten.view %36499, %36556 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36557, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34071 = torch.constant.int 4
    %int16_34072 = torch.constant.int 16
    %int1_34073 = torch.constant.int 1
    %int128_34074 = torch.constant.int 128
    %36558 = torch.prim.ListConstruct %int4_34071, %3095, %int16_34072, %int1_34073, %int128_34074 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36559 = torch.aten.view %36514, %36558 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36559, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34075 = torch.constant.int 4
    %int16_34076 = torch.constant.int 16
    %int1_34077 = torch.constant.int 1
    %int128_34078 = torch.constant.int 128
    %36560 = torch.prim.ListConstruct %int4_34075, %3095, %int16_34076, %int1_34077, %int128_34078 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36561 = torch.aten.view %36529, %36560 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36561, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34079 = torch.constant.int 4
    %36562 = torch.aten.mul.int %int4_34079, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34080 = torch.constant.int 16
    %int1_34081 = torch.constant.int 1
    %int128_34082 = torch.constant.int 128
    %36563 = torch.prim.ListConstruct %36562, %int16_34080, %int1_34081, %int128_34082 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36564 = torch.aten.view %36547, %36563 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36564, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34083 = torch.constant.int 4
    %36565 = torch.aten.mul.int %int4_34083, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34084 = torch.constant.int 16
    %int1_34085 = torch.constant.int 1
    %int128_34086 = torch.constant.int 128
    %36566 = torch.prim.ListConstruct %36565, %int16_34084, %int1_34085, %int128_34086 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36567 = torch.aten.view %36549, %36566 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36567, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34087 = torch.constant.int 4
    %36568 = torch.aten.mul.int %int4_34087, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34088 = torch.constant.int 16
    %int1_34089 = torch.constant.int 1
    %int128_34090 = torch.constant.int 128
    %36569 = torch.prim.ListConstruct %36568, %int16_34088, %int1_34089, %int128_34090 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36570 = torch.aten.view %36551, %36569 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36570, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34091 = torch.constant.int 4
    %36571 = torch.aten.mul.int %int4_34091, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34092 = torch.constant.int 16
    %int1_34093 = torch.constant.int 1
    %int128_34094 = torch.constant.int 128
    %36572 = torch.prim.ListConstruct %36571, %int16_34092, %int1_34093, %int128_34094 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36573 = torch.aten.view %36553, %36572 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36573, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34095 = torch.constant.int 4
    %36574 = torch.aten.mul.int %int4_34095, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34096 = torch.constant.int 16
    %int1_34097 = torch.constant.int 1
    %int128_34098 = torch.constant.int 128
    %36575 = torch.prim.ListConstruct %36574, %int16_34096, %int1_34097, %int128_34098 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36576 = torch.aten.view %36555, %36575 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36576, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34099 = torch.constant.int 4
    %36577 = torch.aten.mul.int %int4_34099, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34100 = torch.constant.int 16
    %int1_34101 = torch.constant.int 1
    %int128_34102 = torch.constant.int 128
    %36578 = torch.prim.ListConstruct %36577, %int16_34100, %int1_34101, %int128_34102 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36579 = torch.aten.view %36557, %36578 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36579, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34103 = torch.constant.int 4
    %36580 = torch.aten.mul.int %int4_34103, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34104 = torch.constant.int 16
    %int1_34105 = torch.constant.int 1
    %int128_34106 = torch.constant.int 128
    %36581 = torch.prim.ListConstruct %36580, %int16_34104, %int1_34105, %int128_34106 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36582 = torch.aten.view %36559, %36581 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36582, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34107 = torch.constant.int 4
    %36583 = torch.aten.mul.int %int4_34107, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34108 = torch.constant.int 16
    %int1_34109 = torch.constant.int 1
    %int128_34110 = torch.constant.int 128
    %36584 = torch.prim.ListConstruct %36583, %int16_34108, %int1_34109, %int128_34110 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36585 = torch.aten.view %36561, %36584 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36585, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34111 = torch.constant.int 4
    %36586 = torch.aten.mul.int %int4_34111, %3095 : !torch.int, !torch.int -> !torch.int
    %36587 = torch.prim.ListConstruct %36586 : (!torch.int) -> !torch.list<int>
    %36588 = torch.aten.view %36538, %36587 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36588, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34112 = torch.constant.int 4
    %36589 = torch.aten.mul.int %int4_34112, %3095 : !torch.int, !torch.int -> !torch.int
    %36590 = torch.prim.ListConstruct %36589 : (!torch.int) -> !torch.list<int>
    %36591 = torch.aten.view %36539, %36590 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36591, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34113 = torch.constant.int 4
    %36592 = torch.aten.mul.int %int4_34113, %3095 : !torch.int, !torch.int -> !torch.int
    %36593 = torch.prim.ListConstruct %36592 : (!torch.int) -> !torch.list<int>
    %36594 = torch.aten.view %36540, %36593 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36594, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34114 = torch.constant.int 4
    %36595 = torch.aten.mul.int %int4_34114, %3095 : !torch.int, !torch.int -> !torch.int
    %36596 = torch.prim.ListConstruct %36595 : (!torch.int) -> !torch.list<int>
    %36597 = torch.aten.view %36541, %36596 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36597, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34115 = torch.constant.int 4
    %36598 = torch.aten.mul.int %int4_34115, %3095 : !torch.int, !torch.int -> !torch.int
    %36599 = torch.prim.ListConstruct %36598 : (!torch.int) -> !torch.list<int>
    %36600 = torch.aten.view %36542, %36599 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36600, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34116 = torch.constant.int 4
    %36601 = torch.aten.mul.int %int4_34116, %3095 : !torch.int, !torch.int -> !torch.int
    %36602 = torch.prim.ListConstruct %36601 : (!torch.int) -> !torch.list<int>
    %36603 = torch.aten.view %36543, %36602 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36603, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34117 = torch.constant.int 4
    %36604 = torch.aten.mul.int %int4_34117, %3095 : !torch.int, !torch.int -> !torch.int
    %36605 = torch.prim.ListConstruct %36604 : (!torch.int) -> !torch.list<int>
    %36606 = torch.aten.view %36544, %36605 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36606, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34118 = torch.constant.int 4
    %36607 = torch.aten.mul.int %int4_34118, %3095 : !torch.int, !torch.int -> !torch.int
    %36608 = torch.prim.ListConstruct %36607 : (!torch.int) -> !torch.list<int>
    %36609 = torch.aten.view %36545, %36608 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36609, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34119 = torch.constant.int 4
    %int16_34120 = torch.constant.int 16
    %int1_34121 = torch.constant.int 1
    %int128_34122 = torch.constant.int 128
    %36610 = torch.prim.ListConstruct %int4_34119, %3095, %int16_34120, %int1_34121, %int128_34122 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36611 = torch.aten.view %36199, %36610 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36611, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34123 = torch.constant.int 4
    %int16_34124 = torch.constant.int 16
    %int1_34125 = torch.constant.int 1
    %int128_34126 = torch.constant.int 128
    %36612 = torch.prim.ListConstruct %int4_34123, %3095, %int16_34124, %int1_34125, %int128_34126 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36613 = torch.aten.view %36201, %36612 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36613, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34127 = torch.constant.int 4
    %int16_34128 = torch.constant.int 16
    %int1_34129 = torch.constant.int 1
    %int128_34130 = torch.constant.int 128
    %36614 = torch.prim.ListConstruct %int4_34127, %3095, %int16_34128, %int1_34129, %int128_34130 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36615 = torch.aten.view %36203, %36614 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36615, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34131 = torch.constant.int 4
    %int16_34132 = torch.constant.int 16
    %int1_34133 = torch.constant.int 1
    %int128_34134 = torch.constant.int 128
    %36616 = torch.prim.ListConstruct %int4_34131, %3095, %int16_34132, %int1_34133, %int128_34134 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36617 = torch.aten.view %36205, %36616 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36617, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34135 = torch.constant.int 4
    %int16_34136 = torch.constant.int 16
    %int1_34137 = torch.constant.int 1
    %int128_34138 = torch.constant.int 128
    %36618 = torch.prim.ListConstruct %int4_34135, %3095, %int16_34136, %int1_34137, %int128_34138 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36619 = torch.aten.view %36207, %36618 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36619, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34139 = torch.constant.int 4
    %int16_34140 = torch.constant.int 16
    %int1_34141 = torch.constant.int 1
    %int128_34142 = torch.constant.int 128
    %36620 = torch.prim.ListConstruct %int4_34139, %3095, %int16_34140, %int1_34141, %int128_34142 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36621 = torch.aten.view %36209, %36620 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36621, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34143 = torch.constant.int 4
    %int16_34144 = torch.constant.int 16
    %int1_34145 = torch.constant.int 1
    %int128_34146 = torch.constant.int 128
    %36622 = torch.prim.ListConstruct %int4_34143, %3095, %int16_34144, %int1_34145, %int128_34146 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36623 = torch.aten.view %36211, %36622 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36623, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34147 = torch.constant.int 4
    %int16_34148 = torch.constant.int 16
    %int1_34149 = torch.constant.int 1
    %int128_34150 = torch.constant.int 128
    %36624 = torch.prim.ListConstruct %int4_34147, %3095, %int16_34148, %int1_34149, %int128_34150 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36625 = torch.aten.view %36213, %36624 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %36625, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_34151 = torch.constant.int 4
    %36626 = torch.aten.mul.int %int4_34151, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34152 = torch.constant.int 16
    %int1_34153 = torch.constant.int 1
    %int128_34154 = torch.constant.int 128
    %36627 = torch.prim.ListConstruct %36626, %int16_34152, %int1_34153, %int128_34154 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36628 = torch.aten.view %36611, %36627 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36628, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34155 = torch.constant.int 4
    %36629 = torch.aten.mul.int %int4_34155, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34156 = torch.constant.int 16
    %int1_34157 = torch.constant.int 1
    %int128_34158 = torch.constant.int 128
    %36630 = torch.prim.ListConstruct %36629, %int16_34156, %int1_34157, %int128_34158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36631 = torch.aten.view %36613, %36630 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36631, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34159 = torch.constant.int 4
    %36632 = torch.aten.mul.int %int4_34159, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34160 = torch.constant.int 16
    %int1_34161 = torch.constant.int 1
    %int128_34162 = torch.constant.int 128
    %36633 = torch.prim.ListConstruct %36632, %int16_34160, %int1_34161, %int128_34162 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36634 = torch.aten.view %36615, %36633 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36634, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34163 = torch.constant.int 4
    %36635 = torch.aten.mul.int %int4_34163, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34164 = torch.constant.int 16
    %int1_34165 = torch.constant.int 1
    %int128_34166 = torch.constant.int 128
    %36636 = torch.prim.ListConstruct %36635, %int16_34164, %int1_34165, %int128_34166 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36637 = torch.aten.view %36617, %36636 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36637, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34167 = torch.constant.int 4
    %36638 = torch.aten.mul.int %int4_34167, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34168 = torch.constant.int 16
    %int1_34169 = torch.constant.int 1
    %int128_34170 = torch.constant.int 128
    %36639 = torch.prim.ListConstruct %36638, %int16_34168, %int1_34169, %int128_34170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36640 = torch.aten.view %36619, %36639 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36640, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34171 = torch.constant.int 4
    %36641 = torch.aten.mul.int %int4_34171, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34172 = torch.constant.int 16
    %int1_34173 = torch.constant.int 1
    %int128_34174 = torch.constant.int 128
    %36642 = torch.prim.ListConstruct %36641, %int16_34172, %int1_34173, %int128_34174 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36643 = torch.aten.view %36621, %36642 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36643, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34175 = torch.constant.int 4
    %36644 = torch.aten.mul.int %int4_34175, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34176 = torch.constant.int 16
    %int1_34177 = torch.constant.int 1
    %int128_34178 = torch.constant.int 128
    %36645 = torch.prim.ListConstruct %36644, %int16_34176, %int1_34177, %int128_34178 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36646 = torch.aten.view %36623, %36645 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36646, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_34179 = torch.constant.int 4
    %36647 = torch.aten.mul.int %int4_34179, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_34180 = torch.constant.int 16
    %int1_34181 = torch.constant.int 1
    %int128_34182 = torch.constant.int 128
    %36648 = torch.prim.ListConstruct %36647, %int16_34180, %int1_34181, %int128_34182 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36649 = torch.aten.view %36625, %36648 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36649, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_34183 = torch.constant.int 1
    %int1_34184 = torch.constant.int 1
    %36650 = torch.aten.add.Scalar %36538, %int1_34183, %int1_34184 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36650, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34185 = torch.constant.int 1
    %int1_34186 = torch.constant.int 1
    %36651 = torch.aten.add.Scalar %36539, %int1_34185, %int1_34186 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36651, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34187 = torch.constant.int 1
    %int1_34188 = torch.constant.int 1
    %36652 = torch.aten.add.Scalar %36540, %int1_34187, %int1_34188 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36652, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34189 = torch.constant.int 1
    %int1_34190 = torch.constant.int 1
    %36653 = torch.aten.add.Scalar %36541, %int1_34189, %int1_34190 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36653, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34191 = torch.constant.int 1
    %int1_34192 = torch.constant.int 1
    %36654 = torch.aten.add.Scalar %36542, %int1_34191, %int1_34192 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36654, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34193 = torch.constant.int 1
    %int1_34194 = torch.constant.int 1
    %36655 = torch.aten.add.Scalar %36543, %int1_34193, %int1_34194 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36655, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34195 = torch.constant.int 1
    %int1_34196 = torch.constant.int 1
    %36656 = torch.aten.add.Scalar %36544, %int1_34195, %int1_34196 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36656, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_34197 = torch.constant.int 1
    %int1_34198 = torch.constant.int 1
    %36657 = torch.aten.add.Scalar %36545, %int1_34197, %int1_34198 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %36657, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_34199 = torch.constant.int 4
    %36658 = torch.aten.mul.int %int4_34199, %3095 : !torch.int, !torch.int -> !torch.int
    %36659 = torch.prim.ListConstruct %36658 : (!torch.int) -> !torch.list<int>
    %36660 = torch.aten.view %36650, %36659 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36660, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34200 = torch.constant.int 4
    %36661 = torch.aten.mul.int %int4_34200, %3095 : !torch.int, !torch.int -> !torch.int
    %36662 = torch.prim.ListConstruct %36661 : (!torch.int) -> !torch.list<int>
    %36663 = torch.aten.view %36651, %36662 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36663, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34201 = torch.constant.int 4
    %36664 = torch.aten.mul.int %int4_34201, %3095 : !torch.int, !torch.int -> !torch.int
    %36665 = torch.prim.ListConstruct %36664 : (!torch.int) -> !torch.list<int>
    %36666 = torch.aten.view %36652, %36665 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36666, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34202 = torch.constant.int 4
    %36667 = torch.aten.mul.int %int4_34202, %3095 : !torch.int, !torch.int -> !torch.int
    %36668 = torch.prim.ListConstruct %36667 : (!torch.int) -> !torch.list<int>
    %36669 = torch.aten.view %36653, %36668 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36669, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34203 = torch.constant.int 4
    %36670 = torch.aten.mul.int %int4_34203, %3095 : !torch.int, !torch.int -> !torch.int
    %36671 = torch.prim.ListConstruct %36670 : (!torch.int) -> !torch.list<int>
    %36672 = torch.aten.view %36654, %36671 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36672, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34204 = torch.constant.int 4
    %36673 = torch.aten.mul.int %int4_34204, %3095 : !torch.int, !torch.int -> !torch.int
    %36674 = torch.prim.ListConstruct %36673 : (!torch.int) -> !torch.list<int>
    %36675 = torch.aten.view %36655, %36674 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36675, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34205 = torch.constant.int 4
    %36676 = torch.aten.mul.int %int4_34205, %3095 : !torch.int, !torch.int -> !torch.int
    %36677 = torch.prim.ListConstruct %36676 : (!torch.int) -> !torch.list<int>
    %36678 = torch.aten.view %36656, %36677 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36678, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_34206 = torch.constant.int 4
    %36679 = torch.aten.mul.int %int4_34206, %3095 : !torch.int, !torch.int -> !torch.int
    %36680 = torch.prim.ListConstruct %36679 : (!torch.int) -> !torch.list<int>
    %36681 = torch.aten.view %36657, %36680 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36681, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %36682 = torch.prim.ListConstruct %36588, %36660 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34207 = torch.constant.int 0
    %36683 = torch.aten.cat %36682, %int0_34207 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36683, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36684 = torch.prim.ListConstruct %36591, %36663 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34208 = torch.constant.int 0
    %36685 = torch.aten.cat %36684, %int0_34208 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36685, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36686 = torch.prim.ListConstruct %36594, %36666 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34209 = torch.constant.int 0
    %36687 = torch.aten.cat %36686, %int0_34209 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36687, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36688 = torch.prim.ListConstruct %36597, %36669 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34210 = torch.constant.int 0
    %36689 = torch.aten.cat %36688, %int0_34210 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36689, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36690 = torch.prim.ListConstruct %36600, %36672 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34211 = torch.constant.int 0
    %36691 = torch.aten.cat %36690, %int0_34211 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36691, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36692 = torch.prim.ListConstruct %36603, %36675 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34212 = torch.constant.int 0
    %36693 = torch.aten.cat %36692, %int0_34212 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36693, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36694 = torch.prim.ListConstruct %36606, %36678 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34213 = torch.constant.int 0
    %36695 = torch.aten.cat %36694, %int0_34213 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36695, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36696 = torch.prim.ListConstruct %36609, %36681 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_34214 = torch.constant.int 0
    %36697 = torch.aten.cat %36696, %int0_34214 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %36697, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %36698 = torch.prim.ListConstruct %36564, %36628 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34215 = torch.constant.int 0
    %36699 = torch.aten.cat %36698, %int0_34215 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36699, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36700 = torch.prim.ListConstruct %36567, %36631 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34216 = torch.constant.int 0
    %36701 = torch.aten.cat %36700, %int0_34216 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36701, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36702 = torch.prim.ListConstruct %36570, %36634 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34217 = torch.constant.int 0
    %36703 = torch.aten.cat %36702, %int0_34217 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36703, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36704 = torch.prim.ListConstruct %36573, %36637 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34218 = torch.constant.int 0
    %36705 = torch.aten.cat %36704, %int0_34218 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36705, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36706 = torch.prim.ListConstruct %36576, %36640 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34219 = torch.constant.int 0
    %36707 = torch.aten.cat %36706, %int0_34219 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36707, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36708 = torch.prim.ListConstruct %36579, %36643 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34220 = torch.constant.int 0
    %36709 = torch.aten.cat %36708, %int0_34220 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36709, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36710 = torch.prim.ListConstruct %36582, %36646 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34221 = torch.constant.int 0
    %36711 = torch.aten.cat %36710, %int0_34221 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36711, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36712 = torch.prim.ListConstruct %36585, %36649 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_34222 = torch.constant.int 0
    %36713 = torch.aten.cat %36712, %int0_34222 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36713, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34223 = torch.constant.int 32
    %int2_34224 = torch.constant.int 2
    %int16_34225 = torch.constant.int 16
    %int1_34226 = torch.constant.int 1
    %int128_34227 = torch.constant.int 128
    %36714 = torch.prim.ListConstruct %3023, %int32_34223, %int2_34224, %int16_34225, %int1_34226, %int128_34227 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36715 = torch.aten.view %34864, %36714 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36715, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34228 = torch.constant.int 32
    %36716 = torch.aten.mul.int %3023, %int32_34228 : !torch.int, !torch.int -> !torch.int
    %int2_34229 = torch.constant.int 2
    %36717 = torch.aten.mul.int %36716, %int2_34229 : !torch.int, !torch.int -> !torch.int
    %int16_34230 = torch.constant.int 16
    %int1_34231 = torch.constant.int 1
    %int128_34232 = torch.constant.int 128
    %36718 = torch.prim.ListConstruct %36717, %int16_34230, %int1_34231, %int128_34232 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36719 = torch.aten.view %36715, %36718 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36719, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36720 = torch.prim.ListConstruct %36683 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34233 = torch.constant.bool false
    %36721 = torch.aten.index_put %36719, %36720, %36699, %false_34233 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36721, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34234 = torch.constant.int 32
    %int2_34235 = torch.constant.int 2
    %int16_34236 = torch.constant.int 16
    %int1_34237 = torch.constant.int 1
    %int128_34238 = torch.constant.int 128
    %36722 = torch.prim.ListConstruct %3023, %int32_34234, %int2_34235, %int16_34236, %int1_34237, %int128_34238 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36723 = torch.aten.view %36721, %36722 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36723, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34239 = torch.constant.int 131072
    %36724 = torch.prim.ListConstruct %3023, %int131072_34239 : (!torch.int, !torch.int) -> !torch.list<int>
    %36725 = torch.aten.view %36723, %36724 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36725, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34240 = torch.constant.int 32
    %int2_34241 = torch.constant.int 2
    %int16_34242 = torch.constant.int 16
    %int1_34243 = torch.constant.int 1
    %int128_34244 = torch.constant.int 128
    %36726 = torch.prim.ListConstruct %3026, %int32_34240, %int2_34241, %int16_34242, %int1_34243, %int128_34244 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36727 = torch.aten.view %34876, %36726 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36727, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34245 = torch.constant.int 32
    %36728 = torch.aten.mul.int %3026, %int32_34245 : !torch.int, !torch.int -> !torch.int
    %int2_34246 = torch.constant.int 2
    %36729 = torch.aten.mul.int %36728, %int2_34246 : !torch.int, !torch.int -> !torch.int
    %int16_34247 = torch.constant.int 16
    %int1_34248 = torch.constant.int 1
    %int128_34249 = torch.constant.int 128
    %36730 = torch.prim.ListConstruct %36729, %int16_34247, %int1_34248, %int128_34249 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36731 = torch.aten.view %36727, %36730 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36731, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36732 = torch.prim.ListConstruct %36685 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34250 = torch.constant.bool false
    %36733 = torch.aten.index_put %36731, %36732, %36701, %false_34250 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36733, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34251 = torch.constant.int 32
    %int2_34252 = torch.constant.int 2
    %int16_34253 = torch.constant.int 16
    %int1_34254 = torch.constant.int 1
    %int128_34255 = torch.constant.int 128
    %36734 = torch.prim.ListConstruct %3026, %int32_34251, %int2_34252, %int16_34253, %int1_34254, %int128_34255 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36735 = torch.aten.view %36733, %36734 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36735, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34256 = torch.constant.int 131072
    %36736 = torch.prim.ListConstruct %3026, %int131072_34256 : (!torch.int, !torch.int) -> !torch.list<int>
    %36737 = torch.aten.view %36735, %36736 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36737, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34257 = torch.constant.int 32
    %int2_34258 = torch.constant.int 2
    %int16_34259 = torch.constant.int 16
    %int1_34260 = torch.constant.int 1
    %int128_34261 = torch.constant.int 128
    %36738 = torch.prim.ListConstruct %3029, %int32_34257, %int2_34258, %int16_34259, %int1_34260, %int128_34261 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36739 = torch.aten.view %34888, %36738 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36739, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34262 = torch.constant.int 32
    %36740 = torch.aten.mul.int %3029, %int32_34262 : !torch.int, !torch.int -> !torch.int
    %int2_34263 = torch.constant.int 2
    %36741 = torch.aten.mul.int %36740, %int2_34263 : !torch.int, !torch.int -> !torch.int
    %int16_34264 = torch.constant.int 16
    %int1_34265 = torch.constant.int 1
    %int128_34266 = torch.constant.int 128
    %36742 = torch.prim.ListConstruct %36741, %int16_34264, %int1_34265, %int128_34266 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36743 = torch.aten.view %36739, %36742 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36743, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36744 = torch.prim.ListConstruct %36687 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34267 = torch.constant.bool false
    %36745 = torch.aten.index_put %36743, %36744, %36703, %false_34267 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36745, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34268 = torch.constant.int 32
    %int2_34269 = torch.constant.int 2
    %int16_34270 = torch.constant.int 16
    %int1_34271 = torch.constant.int 1
    %int128_34272 = torch.constant.int 128
    %36746 = torch.prim.ListConstruct %3029, %int32_34268, %int2_34269, %int16_34270, %int1_34271, %int128_34272 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36747 = torch.aten.view %36745, %36746 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36747, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34273 = torch.constant.int 131072
    %36748 = torch.prim.ListConstruct %3029, %int131072_34273 : (!torch.int, !torch.int) -> !torch.list<int>
    %36749 = torch.aten.view %36747, %36748 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36749, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34274 = torch.constant.int 32
    %int2_34275 = torch.constant.int 2
    %int16_34276 = torch.constant.int 16
    %int1_34277 = torch.constant.int 1
    %int128_34278 = torch.constant.int 128
    %36750 = torch.prim.ListConstruct %3032, %int32_34274, %int2_34275, %int16_34276, %int1_34277, %int128_34278 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36751 = torch.aten.view %34900, %36750 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36751, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34279 = torch.constant.int 32
    %36752 = torch.aten.mul.int %3032, %int32_34279 : !torch.int, !torch.int -> !torch.int
    %int2_34280 = torch.constant.int 2
    %36753 = torch.aten.mul.int %36752, %int2_34280 : !torch.int, !torch.int -> !torch.int
    %int16_34281 = torch.constant.int 16
    %int1_34282 = torch.constant.int 1
    %int128_34283 = torch.constant.int 128
    %36754 = torch.prim.ListConstruct %36753, %int16_34281, %int1_34282, %int128_34283 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36755 = torch.aten.view %36751, %36754 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36755, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36756 = torch.prim.ListConstruct %36689 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34284 = torch.constant.bool false
    %36757 = torch.aten.index_put %36755, %36756, %36705, %false_34284 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36757, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34285 = torch.constant.int 32
    %int2_34286 = torch.constant.int 2
    %int16_34287 = torch.constant.int 16
    %int1_34288 = torch.constant.int 1
    %int128_34289 = torch.constant.int 128
    %36758 = torch.prim.ListConstruct %3032, %int32_34285, %int2_34286, %int16_34287, %int1_34288, %int128_34289 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36759 = torch.aten.view %36757, %36758 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36759, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34290 = torch.constant.int 131072
    %36760 = torch.prim.ListConstruct %3032, %int131072_34290 : (!torch.int, !torch.int) -> !torch.list<int>
    %36761 = torch.aten.view %36759, %36760 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36761, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34291 = torch.constant.int 32
    %int2_34292 = torch.constant.int 2
    %int16_34293 = torch.constant.int 16
    %int1_34294 = torch.constant.int 1
    %int128_34295 = torch.constant.int 128
    %36762 = torch.prim.ListConstruct %3035, %int32_34291, %int2_34292, %int16_34293, %int1_34294, %int128_34295 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36763 = torch.aten.view %34912, %36762 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36763, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34296 = torch.constant.int 32
    %36764 = torch.aten.mul.int %3035, %int32_34296 : !torch.int, !torch.int -> !torch.int
    %int2_34297 = torch.constant.int 2
    %36765 = torch.aten.mul.int %36764, %int2_34297 : !torch.int, !torch.int -> !torch.int
    %int16_34298 = torch.constant.int 16
    %int1_34299 = torch.constant.int 1
    %int128_34300 = torch.constant.int 128
    %36766 = torch.prim.ListConstruct %36765, %int16_34298, %int1_34299, %int128_34300 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36767 = torch.aten.view %36763, %36766 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36767, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36768 = torch.prim.ListConstruct %36691 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34301 = torch.constant.bool false
    %36769 = torch.aten.index_put %36767, %36768, %36707, %false_34301 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36769, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34302 = torch.constant.int 32
    %int2_34303 = torch.constant.int 2
    %int16_34304 = torch.constant.int 16
    %int1_34305 = torch.constant.int 1
    %int128_34306 = torch.constant.int 128
    %36770 = torch.prim.ListConstruct %3035, %int32_34302, %int2_34303, %int16_34304, %int1_34305, %int128_34306 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36771 = torch.aten.view %36769, %36770 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36771, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34307 = torch.constant.int 131072
    %36772 = torch.prim.ListConstruct %3035, %int131072_34307 : (!torch.int, !torch.int) -> !torch.list<int>
    %36773 = torch.aten.view %36771, %36772 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36773, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34308 = torch.constant.int 32
    %int2_34309 = torch.constant.int 2
    %int16_34310 = torch.constant.int 16
    %int1_34311 = torch.constant.int 1
    %int128_34312 = torch.constant.int 128
    %36774 = torch.prim.ListConstruct %3038, %int32_34308, %int2_34309, %int16_34310, %int1_34311, %int128_34312 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36775 = torch.aten.view %34924, %36774 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36775, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34313 = torch.constant.int 32
    %36776 = torch.aten.mul.int %3038, %int32_34313 : !torch.int, !torch.int -> !torch.int
    %int2_34314 = torch.constant.int 2
    %36777 = torch.aten.mul.int %36776, %int2_34314 : !torch.int, !torch.int -> !torch.int
    %int16_34315 = torch.constant.int 16
    %int1_34316 = torch.constant.int 1
    %int128_34317 = torch.constant.int 128
    %36778 = torch.prim.ListConstruct %36777, %int16_34315, %int1_34316, %int128_34317 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36779 = torch.aten.view %36775, %36778 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36779, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36780 = torch.prim.ListConstruct %36693 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34318 = torch.constant.bool false
    %36781 = torch.aten.index_put %36779, %36780, %36709, %false_34318 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36781, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34319 = torch.constant.int 32
    %int2_34320 = torch.constant.int 2
    %int16_34321 = torch.constant.int 16
    %int1_34322 = torch.constant.int 1
    %int128_34323 = torch.constant.int 128
    %36782 = torch.prim.ListConstruct %3038, %int32_34319, %int2_34320, %int16_34321, %int1_34322, %int128_34323 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36783 = torch.aten.view %36781, %36782 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36783, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34324 = torch.constant.int 131072
    %36784 = torch.prim.ListConstruct %3038, %int131072_34324 : (!torch.int, !torch.int) -> !torch.list<int>
    %36785 = torch.aten.view %36783, %36784 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36785, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34325 = torch.constant.int 32
    %int2_34326 = torch.constant.int 2
    %int16_34327 = torch.constant.int 16
    %int1_34328 = torch.constant.int 1
    %int128_34329 = torch.constant.int 128
    %36786 = torch.prim.ListConstruct %3041, %int32_34325, %int2_34326, %int16_34327, %int1_34328, %int128_34329 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36787 = torch.aten.view %34936, %36786 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36787, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34330 = torch.constant.int 32
    %36788 = torch.aten.mul.int %3041, %int32_34330 : !torch.int, !torch.int -> !torch.int
    %int2_34331 = torch.constant.int 2
    %36789 = torch.aten.mul.int %36788, %int2_34331 : !torch.int, !torch.int -> !torch.int
    %int16_34332 = torch.constant.int 16
    %int1_34333 = torch.constant.int 1
    %int128_34334 = torch.constant.int 128
    %36790 = torch.prim.ListConstruct %36789, %int16_34332, %int1_34333, %int128_34334 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36791 = torch.aten.view %36787, %36790 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36791, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36792 = torch.prim.ListConstruct %36695 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34335 = torch.constant.bool false
    %36793 = torch.aten.index_put %36791, %36792, %36711, %false_34335 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36793, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34336 = torch.constant.int 32
    %int2_34337 = torch.constant.int 2
    %int16_34338 = torch.constant.int 16
    %int1_34339 = torch.constant.int 1
    %int128_34340 = torch.constant.int 128
    %36794 = torch.prim.ListConstruct %3041, %int32_34336, %int2_34337, %int16_34338, %int1_34339, %int128_34340 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36795 = torch.aten.view %36793, %36794 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36795, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34341 = torch.constant.int 131072
    %36796 = torch.prim.ListConstruct %3041, %int131072_34341 : (!torch.int, !torch.int) -> !torch.list<int>
    %36797 = torch.aten.view %36795, %36796 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36797, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_34342 = torch.constant.int 32
    %int2_34343 = torch.constant.int 2
    %int16_34344 = torch.constant.int 16
    %int1_34345 = torch.constant.int 1
    %int128_34346 = torch.constant.int 128
    %36798 = torch.prim.ListConstruct %3044, %int32_34342, %int2_34343, %int16_34344, %int1_34345, %int128_34346 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36799 = torch.aten.view %34948, %36798 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36799, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_34347 = torch.constant.int 32
    %36800 = torch.aten.mul.int %3044, %int32_34347 : !torch.int, !torch.int -> !torch.int
    %int2_34348 = torch.constant.int 2
    %36801 = torch.aten.mul.int %36800, %int2_34348 : !torch.int, !torch.int -> !torch.int
    %int16_34349 = torch.constant.int 16
    %int1_34350 = torch.constant.int 1
    %int128_34351 = torch.constant.int 128
    %36802 = torch.prim.ListConstruct %36801, %int16_34349, %int1_34350, %int128_34351 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36803 = torch.aten.view %36799, %36802 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36803, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %36804 = torch.prim.ListConstruct %36697 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_34352 = torch.constant.bool false
    %36805 = torch.aten.index_put %36803, %36804, %36713, %false_34352 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %36805, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_34353 = torch.constant.int 32
    %int2_34354 = torch.constant.int 2
    %int16_34355 = torch.constant.int 16
    %int1_34356 = torch.constant.int 1
    %int128_34357 = torch.constant.int 128
    %36806 = torch.prim.ListConstruct %3044, %int32_34353, %int2_34354, %int16_34355, %int1_34356, %int128_34357 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36807 = torch.aten.view %36805, %36806 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %36807, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_34358 = torch.constant.int 131072
    %36808 = torch.prim.ListConstruct %3044, %int131072_34358 : (!torch.int, !torch.int) -> !torch.list<int>
    %36809 = torch.aten.view %36807, %36808 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %36809, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_34359 = torch.constant.int -2
    %36810 = torch.aten.unsqueeze %36424, %int-2_34359 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34360 = torch.constant.int -2
    %36811 = torch.aten.unsqueeze %36439, %int-2_34360 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34361 = torch.constant.int -2
    %36812 = torch.aten.unsqueeze %36454, %int-2_34361 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34362 = torch.constant.int -2
    %36813 = torch.aten.unsqueeze %36469, %int-2_34362 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34363 = torch.constant.int -2
    %36814 = torch.aten.unsqueeze %36484, %int-2_34363 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34364 = torch.constant.int -2
    %36815 = torch.aten.unsqueeze %36499, %int-2_34364 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34365 = torch.constant.int -2
    %36816 = torch.aten.unsqueeze %36514, %int-2_34365 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34366 = torch.constant.int -2
    %36817 = torch.aten.unsqueeze %36529, %int-2_34366 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_34367 = torch.constant.int 4
    %int1_34368 = torch.constant.int 1
    %int4_34369 = torch.constant.int 4
    %int128_34370 = torch.constant.int 128
    %36818 = torch.prim.ListConstruct %int4_34367, %36410, %int1_34368, %int4_34369, %int128_34370 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34371 = torch.constant.bool false
    %36819 = torch.aten.expand %36810, %36818, %false_34371 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34372 = torch.constant.int 4
    %int1_34373 = torch.constant.int 1
    %int4_34374 = torch.constant.int 4
    %int128_34375 = torch.constant.int 128
    %36820 = torch.prim.ListConstruct %int4_34372, %36410, %int1_34373, %int4_34374, %int128_34375 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34376 = torch.constant.bool false
    %36821 = torch.aten.expand %36811, %36820, %false_34376 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34377 = torch.constant.int 4
    %int1_34378 = torch.constant.int 1
    %int4_34379 = torch.constant.int 4
    %int128_34380 = torch.constant.int 128
    %36822 = torch.prim.ListConstruct %int4_34377, %36410, %int1_34378, %int4_34379, %int128_34380 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34381 = torch.constant.bool false
    %36823 = torch.aten.expand %36812, %36822, %false_34381 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34382 = torch.constant.int 4
    %int1_34383 = torch.constant.int 1
    %int4_34384 = torch.constant.int 4
    %int128_34385 = torch.constant.int 128
    %36824 = torch.prim.ListConstruct %int4_34382, %36410, %int1_34383, %int4_34384, %int128_34385 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34386 = torch.constant.bool false
    %36825 = torch.aten.expand %36813, %36824, %false_34386 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34387 = torch.constant.int 4
    %int1_34388 = torch.constant.int 1
    %int4_34389 = torch.constant.int 4
    %int128_34390 = torch.constant.int 128
    %36826 = torch.prim.ListConstruct %int4_34387, %36410, %int1_34388, %int4_34389, %int128_34390 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34391 = torch.constant.bool false
    %36827 = torch.aten.expand %36814, %36826, %false_34391 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34392 = torch.constant.int 4
    %int1_34393 = torch.constant.int 1
    %int4_34394 = torch.constant.int 4
    %int128_34395 = torch.constant.int 128
    %36828 = torch.prim.ListConstruct %int4_34392, %36410, %int1_34393, %int4_34394, %int128_34395 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34396 = torch.constant.bool false
    %36829 = torch.aten.expand %36815, %36828, %false_34396 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34397 = torch.constant.int 4
    %int1_34398 = torch.constant.int 1
    %int4_34399 = torch.constant.int 4
    %int128_34400 = torch.constant.int 128
    %36830 = torch.prim.ListConstruct %int4_34397, %36410, %int1_34398, %int4_34399, %int128_34400 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34401 = torch.constant.bool false
    %36831 = torch.aten.expand %36816, %36830, %false_34401 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34402 = torch.constant.int 4
    %int1_34403 = torch.constant.int 1
    %int4_34404 = torch.constant.int 4
    %int128_34405 = torch.constant.int 128
    %36832 = torch.prim.ListConstruct %int4_34402, %36410, %int1_34403, %int4_34404, %int128_34405 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34406 = torch.constant.bool false
    %36833 = torch.aten.expand %36817, %36832, %false_34406 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34407 = torch.constant.int 4
    %int4_34408 = torch.constant.int 4
    %int128_34409 = torch.constant.int 128
    %36834 = torch.prim.ListConstruct %int4_34407, %36410, %int4_34408, %int128_34409 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36835 = torch.aten.view %36819, %36834 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34410 = torch.constant.int 4
    %int4_34411 = torch.constant.int 4
    %int128_34412 = torch.constant.int 128
    %36836 = torch.prim.ListConstruct %int4_34410, %36410, %int4_34411, %int128_34412 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36837 = torch.aten.view %36821, %36836 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34413 = torch.constant.int 4
    %int4_34414 = torch.constant.int 4
    %int128_34415 = torch.constant.int 128
    %36838 = torch.prim.ListConstruct %int4_34413, %36410, %int4_34414, %int128_34415 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36839 = torch.aten.view %36823, %36838 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34416 = torch.constant.int 4
    %int4_34417 = torch.constant.int 4
    %int128_34418 = torch.constant.int 128
    %36840 = torch.prim.ListConstruct %int4_34416, %36410, %int4_34417, %int128_34418 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36841 = torch.aten.view %36825, %36840 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34419 = torch.constant.int 4
    %int4_34420 = torch.constant.int 4
    %int128_34421 = torch.constant.int 128
    %36842 = torch.prim.ListConstruct %int4_34419, %36410, %int4_34420, %int128_34421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36843 = torch.aten.view %36827, %36842 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34422 = torch.constant.int 4
    %int4_34423 = torch.constant.int 4
    %int128_34424 = torch.constant.int 128
    %36844 = torch.prim.ListConstruct %int4_34422, %36410, %int4_34423, %int128_34424 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36845 = torch.aten.view %36829, %36844 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34425 = torch.constant.int 4
    %int4_34426 = torch.constant.int 4
    %int128_34427 = torch.constant.int 128
    %36846 = torch.prim.ListConstruct %int4_34425, %36410, %int4_34426, %int128_34427 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36847 = torch.aten.view %36831, %36846 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34428 = torch.constant.int 4
    %int4_34429 = torch.constant.int 4
    %int128_34430 = torch.constant.int 128
    %36848 = torch.prim.ListConstruct %int4_34428, %36410, %int4_34429, %int128_34430 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36849 = torch.aten.view %36833, %36848 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_34431 = torch.constant.int -2
    %36850 = torch.aten.unsqueeze %36199, %int-2_34431 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34432 = torch.constant.int -2
    %36851 = torch.aten.unsqueeze %36201, %int-2_34432 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34433 = torch.constant.int -2
    %36852 = torch.aten.unsqueeze %36203, %int-2_34433 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34434 = torch.constant.int -2
    %36853 = torch.aten.unsqueeze %36205, %int-2_34434 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34435 = torch.constant.int -2
    %36854 = torch.aten.unsqueeze %36207, %int-2_34435 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34436 = torch.constant.int -2
    %36855 = torch.aten.unsqueeze %36209, %int-2_34436 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34437 = torch.constant.int -2
    %36856 = torch.aten.unsqueeze %36211, %int-2_34437 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_34438 = torch.constant.int -2
    %36857 = torch.aten.unsqueeze %36213, %int-2_34438 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %36857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_34439 = torch.constant.int 1
    %36858 = torch.aten.size.int %36123, %int1_34439 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_34440 = torch.constant.int 4
    %int1_34441 = torch.constant.int 1
    %int4_34442 = torch.constant.int 4
    %int128_34443 = torch.constant.int 128
    %36859 = torch.prim.ListConstruct %int4_34440, %36858, %int1_34441, %int4_34442, %int128_34443 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34444 = torch.constant.bool false
    %36860 = torch.aten.expand %36850, %36859, %false_34444 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34445 = torch.constant.int 4
    %int1_34446 = torch.constant.int 1
    %int4_34447 = torch.constant.int 4
    %int128_34448 = torch.constant.int 128
    %36861 = torch.prim.ListConstruct %int4_34445, %36858, %int1_34446, %int4_34447, %int128_34448 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34449 = torch.constant.bool false
    %36862 = torch.aten.expand %36851, %36861, %false_34449 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34450 = torch.constant.int 4
    %int1_34451 = torch.constant.int 1
    %int4_34452 = torch.constant.int 4
    %int128_34453 = torch.constant.int 128
    %36863 = torch.prim.ListConstruct %int4_34450, %36858, %int1_34451, %int4_34452, %int128_34453 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34454 = torch.constant.bool false
    %36864 = torch.aten.expand %36852, %36863, %false_34454 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34455 = torch.constant.int 4
    %int1_34456 = torch.constant.int 1
    %int4_34457 = torch.constant.int 4
    %int128_34458 = torch.constant.int 128
    %36865 = torch.prim.ListConstruct %int4_34455, %36858, %int1_34456, %int4_34457, %int128_34458 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34459 = torch.constant.bool false
    %36866 = torch.aten.expand %36853, %36865, %false_34459 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34460 = torch.constant.int 4
    %int1_34461 = torch.constant.int 1
    %int4_34462 = torch.constant.int 4
    %int128_34463 = torch.constant.int 128
    %36867 = torch.prim.ListConstruct %int4_34460, %36858, %int1_34461, %int4_34462, %int128_34463 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34464 = torch.constant.bool false
    %36868 = torch.aten.expand %36854, %36867, %false_34464 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34465 = torch.constant.int 4
    %int1_34466 = torch.constant.int 1
    %int4_34467 = torch.constant.int 4
    %int128_34468 = torch.constant.int 128
    %36869 = torch.prim.ListConstruct %int4_34465, %36858, %int1_34466, %int4_34467, %int128_34468 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34469 = torch.constant.bool false
    %36870 = torch.aten.expand %36855, %36869, %false_34469 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34470 = torch.constant.int 4
    %int1_34471 = torch.constant.int 1
    %int4_34472 = torch.constant.int 4
    %int128_34473 = torch.constant.int 128
    %36871 = torch.prim.ListConstruct %int4_34470, %36858, %int1_34471, %int4_34472, %int128_34473 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34474 = torch.constant.bool false
    %36872 = torch.aten.expand %36856, %36871, %false_34474 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34475 = torch.constant.int 4
    %int1_34476 = torch.constant.int 1
    %int4_34477 = torch.constant.int 4
    %int128_34478 = torch.constant.int 128
    %36873 = torch.prim.ListConstruct %int4_34475, %36858, %int1_34476, %int4_34477, %int128_34478 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_34479 = torch.constant.bool false
    %36874 = torch.aten.expand %36857, %36873, %false_34479 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %36874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_34480 = torch.constant.int 4
    %int4_34481 = torch.constant.int 4
    %int128_34482 = torch.constant.int 128
    %36875 = torch.prim.ListConstruct %int4_34480, %36858, %int4_34481, %int128_34482 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36876 = torch.aten.view %36860, %36875 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34483 = torch.constant.int 4
    %int4_34484 = torch.constant.int 4
    %int128_34485 = torch.constant.int 128
    %36877 = torch.prim.ListConstruct %int4_34483, %36858, %int4_34484, %int128_34485 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36878 = torch.aten.view %36862, %36877 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34486 = torch.constant.int 4
    %int4_34487 = torch.constant.int 4
    %int128_34488 = torch.constant.int 128
    %36879 = torch.prim.ListConstruct %int4_34486, %36858, %int4_34487, %int128_34488 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36880 = torch.aten.view %36864, %36879 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34489 = torch.constant.int 4
    %int4_34490 = torch.constant.int 4
    %int128_34491 = torch.constant.int 128
    %36881 = torch.prim.ListConstruct %int4_34489, %36858, %int4_34490, %int128_34491 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36882 = torch.aten.view %36866, %36881 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34492 = torch.constant.int 4
    %int4_34493 = torch.constant.int 4
    %int128_34494 = torch.constant.int 128
    %36883 = torch.prim.ListConstruct %int4_34492, %36858, %int4_34493, %int128_34494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36884 = torch.aten.view %36868, %36883 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34495 = torch.constant.int 4
    %int4_34496 = torch.constant.int 4
    %int128_34497 = torch.constant.int 128
    %36885 = torch.prim.ListConstruct %int4_34495, %36858, %int4_34496, %int128_34497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36886 = torch.aten.view %36870, %36885 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34498 = torch.constant.int 4
    %int4_34499 = torch.constant.int 4
    %int128_34500 = torch.constant.int 128
    %36887 = torch.prim.ListConstruct %int4_34498, %36858, %int4_34499, %int128_34500 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36888 = torch.aten.view %36872, %36887 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34501 = torch.constant.int 4
    %int4_34502 = torch.constant.int 4
    %int128_34503 = torch.constant.int 128
    %36889 = torch.prim.ListConstruct %int4_34501, %36858, %int4_34502, %int128_34503 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36890 = torch.aten.view %36874, %36889 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34504 = torch.constant.int 1
    %int2_34505 = torch.constant.int 2
    %36891 = torch.aten.transpose.int %36266, %int1_34504, %int2_34505 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36891, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34506 = torch.constant.int 1
    %int2_34507 = torch.constant.int 2
    %36892 = torch.aten.transpose.int %36281, %int1_34506, %int2_34507 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36892, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34508 = torch.constant.int 1
    %int2_34509 = torch.constant.int 2
    %36893 = torch.aten.transpose.int %36296, %int1_34508, %int2_34509 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36893, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34510 = torch.constant.int 1
    %int2_34511 = torch.constant.int 2
    %36894 = torch.aten.transpose.int %36311, %int1_34510, %int2_34511 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36894, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34512 = torch.constant.int 1
    %int2_34513 = torch.constant.int 2
    %36895 = torch.aten.transpose.int %36326, %int1_34512, %int2_34513 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36895, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34514 = torch.constant.int 1
    %int2_34515 = torch.constant.int 2
    %36896 = torch.aten.transpose.int %36341, %int1_34514, %int2_34515 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36896, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34516 = torch.constant.int 1
    %int2_34517 = torch.constant.int 2
    %36897 = torch.aten.transpose.int %36356, %int1_34516, %int2_34517 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36897, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34518 = torch.constant.int 1
    %int2_34519 = torch.constant.int 2
    %36898 = torch.aten.transpose.int %36371, %int1_34518, %int2_34519 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36898, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34520 = torch.constant.int 1
    %int2_34521 = torch.constant.int 2
    %36899 = torch.aten.transpose.int %36835, %int1_34520, %int2_34521 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36899, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34522 = torch.constant.int 1
    %int2_34523 = torch.constant.int 2
    %36900 = torch.aten.transpose.int %36837, %int1_34522, %int2_34523 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36900, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34524 = torch.constant.int 1
    %int2_34525 = torch.constant.int 2
    %36901 = torch.aten.transpose.int %36839, %int1_34524, %int2_34525 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36901, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34526 = torch.constant.int 1
    %int2_34527 = torch.constant.int 2
    %36902 = torch.aten.transpose.int %36841, %int1_34526, %int2_34527 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36902, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34528 = torch.constant.int 1
    %int2_34529 = torch.constant.int 2
    %36903 = torch.aten.transpose.int %36843, %int1_34528, %int2_34529 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36903, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34530 = torch.constant.int 1
    %int2_34531 = torch.constant.int 2
    %36904 = torch.aten.transpose.int %36845, %int1_34530, %int2_34531 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36904, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34532 = torch.constant.int 1
    %int2_34533 = torch.constant.int 2
    %36905 = torch.aten.transpose.int %36847, %int1_34532, %int2_34533 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36905, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34534 = torch.constant.int 1
    %int2_34535 = torch.constant.int 2
    %36906 = torch.aten.transpose.int %36849, %int1_34534, %int2_34535 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36906, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34536 = torch.constant.int 1
    %int2_34537 = torch.constant.int 2
    %36907 = torch.aten.transpose.int %36876, %int1_34536, %int2_34537 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36907, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34538 = torch.constant.int 1
    %int2_34539 = torch.constant.int 2
    %36908 = torch.aten.transpose.int %36878, %int1_34538, %int2_34539 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36908, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34540 = torch.constant.int 1
    %int2_34541 = torch.constant.int 2
    %36909 = torch.aten.transpose.int %36880, %int1_34540, %int2_34541 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36909, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34542 = torch.constant.int 1
    %int2_34543 = torch.constant.int 2
    %36910 = torch.aten.transpose.int %36882, %int1_34542, %int2_34543 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36910, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34544 = torch.constant.int 1
    %int2_34545 = torch.constant.int 2
    %36911 = torch.aten.transpose.int %36884, %int1_34544, %int2_34545 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36911, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34546 = torch.constant.int 1
    %int2_34547 = torch.constant.int 2
    %36912 = torch.aten.transpose.int %36886, %int1_34546, %int2_34547 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36912, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34548 = torch.constant.int 1
    %int2_34549 = torch.constant.int 2
    %36913 = torch.aten.transpose.int %36888, %int1_34548, %int2_34549 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36913, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34550 = torch.constant.int 1
    %int2_34551 = torch.constant.int 2
    %36914 = torch.aten.transpose.int %36890, %int1_34550, %int2_34551 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %36914, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34552 = torch.constant.float 0.000000e+00
    %true_34553 = torch.constant.bool true
    %none_34554 = torch.constant.none
    %none_34555 = torch.constant.none
    %36915:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36891, %36899, %36907, %float0.000000e00_34552, %true_34553, %none_34554, %none_34555) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36915#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34556 = torch.constant.float 0.000000e+00
    %true_34557 = torch.constant.bool true
    %none_34558 = torch.constant.none
    %none_34559 = torch.constant.none
    %36916:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36892, %36900, %36908, %float0.000000e00_34556, %true_34557, %none_34558, %none_34559) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36916#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34560 = torch.constant.float 0.000000e+00
    %true_34561 = torch.constant.bool true
    %none_34562 = torch.constant.none
    %none_34563 = torch.constant.none
    %36917:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36893, %36901, %36909, %float0.000000e00_34560, %true_34561, %none_34562, %none_34563) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36917#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34564 = torch.constant.float 0.000000e+00
    %true_34565 = torch.constant.bool true
    %none_34566 = torch.constant.none
    %none_34567 = torch.constant.none
    %36918:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36894, %36902, %36910, %float0.000000e00_34564, %true_34565, %none_34566, %none_34567) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36918#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34568 = torch.constant.float 0.000000e+00
    %true_34569 = torch.constant.bool true
    %none_34570 = torch.constant.none
    %none_34571 = torch.constant.none
    %36919:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36895, %36903, %36911, %float0.000000e00_34568, %true_34569, %none_34570, %none_34571) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36919#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34572 = torch.constant.float 0.000000e+00
    %true_34573 = torch.constant.bool true
    %none_34574 = torch.constant.none
    %none_34575 = torch.constant.none
    %36920:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36896, %36904, %36912, %float0.000000e00_34572, %true_34573, %none_34574, %none_34575) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36920#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34576 = torch.constant.float 0.000000e+00
    %true_34577 = torch.constant.bool true
    %none_34578 = torch.constant.none
    %none_34579 = torch.constant.none
    %36921:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36897, %36905, %36913, %float0.000000e00_34576, %true_34577, %none_34578, %none_34579) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36921#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_34580 = torch.constant.float 0.000000e+00
    %true_34581 = torch.constant.bool true
    %none_34582 = torch.constant.none
    %none_34583 = torch.constant.none
    %36922:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%36898, %36906, %36914, %float0.000000e00_34580, %true_34581, %none_34582, %none_34583) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %36922#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_34584 = torch.constant.int 1
    %int2_34585 = torch.constant.int 2
    %36923 = torch.aten.transpose.int %36915#0, %int1_34584, %int2_34585 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34586 = torch.constant.int 1
    %int2_34587 = torch.constant.int 2
    %36924 = torch.aten.transpose.int %36916#0, %int1_34586, %int2_34587 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34588 = torch.constant.int 1
    %int2_34589 = torch.constant.int 2
    %36925 = torch.aten.transpose.int %36917#0, %int1_34588, %int2_34589 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34590 = torch.constant.int 1
    %int2_34591 = torch.constant.int 2
    %36926 = torch.aten.transpose.int %36918#0, %int1_34590, %int2_34591 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34592 = torch.constant.int 1
    %int2_34593 = torch.constant.int 2
    %36927 = torch.aten.transpose.int %36919#0, %int1_34592, %int2_34593 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34594 = torch.constant.int 1
    %int2_34595 = torch.constant.int 2
    %36928 = torch.aten.transpose.int %36920#0, %int1_34594, %int2_34595 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34596 = torch.constant.int 1
    %int2_34597 = torch.constant.int 2
    %36929 = torch.aten.transpose.int %36921#0, %int1_34596, %int2_34597 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_34598 = torch.constant.int 1
    %int2_34599 = torch.constant.int 2
    %36930 = torch.aten.transpose.int %36922#0, %int1_34598, %int2_34599 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %36930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_34600 = torch.constant.int 4
    %int512_34601 = torch.constant.int 512
    %36931 = torch.prim.ListConstruct %int4_34600, %36252, %int512_34601 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36932 = torch.aten.view %36923, %36931 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34602 = torch.constant.int 4
    %int512_34603 = torch.constant.int 512
    %36933 = torch.prim.ListConstruct %int4_34602, %36267, %int512_34603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36934 = torch.aten.view %36924, %36933 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34604 = torch.constant.int 4
    %int512_34605 = torch.constant.int 512
    %36935 = torch.prim.ListConstruct %int4_34604, %36282, %int512_34605 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36936 = torch.aten.view %36925, %36935 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34606 = torch.constant.int 4
    %int512_34607 = torch.constant.int 512
    %36937 = torch.prim.ListConstruct %int4_34606, %36297, %int512_34607 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36938 = torch.aten.view %36926, %36937 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34608 = torch.constant.int 4
    %int512_34609 = torch.constant.int 512
    %36939 = torch.prim.ListConstruct %int4_34608, %36312, %int512_34609 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36940 = torch.aten.view %36927, %36939 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34610 = torch.constant.int 4
    %int512_34611 = torch.constant.int 512
    %36941 = torch.prim.ListConstruct %int4_34610, %36327, %int512_34611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36942 = torch.aten.view %36928, %36941 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34612 = torch.constant.int 4
    %int512_34613 = torch.constant.int 512
    %36943 = torch.prim.ListConstruct %int4_34612, %36342, %int512_34613 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36944 = torch.aten.view %36929, %36943 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_34614 = torch.constant.int 4
    %int512_34615 = torch.constant.int 512
    %36945 = torch.prim.ListConstruct %int4_34614, %36357, %int512_34615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36946 = torch.aten.view %36930, %36945 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %36946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_34616 = torch.constant.int 1
    %int0_34617 = torch.constant.int 0
    %36947 = torch.prim.ListConstruct %int1_34616, %int0_34617 : (!torch.int, !torch.int) -> !torch.list<int>
    %36948 = torch.aten.permute %1336, %36947 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34618 = torch.constant.int 1
    %int0_34619 = torch.constant.int 0
    %36949 = torch.prim.ListConstruct %int1_34618, %int0_34619 : (!torch.int, !torch.int) -> !torch.list<int>
    %36950 = torch.aten.permute %1337, %36949 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34620 = torch.constant.int 1
    %int0_34621 = torch.constant.int 0
    %36951 = torch.prim.ListConstruct %int1_34620, %int0_34621 : (!torch.int, !torch.int) -> !torch.list<int>
    %36952 = torch.aten.permute %1338, %36951 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34622 = torch.constant.int 1
    %int0_34623 = torch.constant.int 0
    %36953 = torch.prim.ListConstruct %int1_34622, %int0_34623 : (!torch.int, !torch.int) -> !torch.list<int>
    %36954 = torch.aten.permute %1339, %36953 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34624 = torch.constant.int 1
    %int0_34625 = torch.constant.int 0
    %36955 = torch.prim.ListConstruct %int1_34624, %int0_34625 : (!torch.int, !torch.int) -> !torch.list<int>
    %36956 = torch.aten.permute %1340, %36955 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34626 = torch.constant.int 1
    %int0_34627 = torch.constant.int 0
    %36957 = torch.prim.ListConstruct %int1_34626, %int0_34627 : (!torch.int, !torch.int) -> !torch.list<int>
    %36958 = torch.aten.permute %1341, %36957 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34628 = torch.constant.int 1
    %int0_34629 = torch.constant.int 0
    %36959 = torch.prim.ListConstruct %int1_34628, %int0_34629 : (!torch.int, !torch.int) -> !torch.list<int>
    %36960 = torch.aten.permute %1342, %36959 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_34630 = torch.constant.int 1
    %int0_34631 = torch.constant.int 0
    %36961 = torch.prim.ListConstruct %int1_34630, %int0_34631 : (!torch.int, !torch.int) -> !torch.list<int>
    %36962 = torch.aten.permute %1343, %36961 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_34632 = torch.constant.int 4
    %36963 = torch.aten.mul.int %int4_34632, %36252 : !torch.int, !torch.int -> !torch.int
    %int512_34633 = torch.constant.int 512
    %36964 = torch.prim.ListConstruct %36963, %int512_34633 : (!torch.int, !torch.int) -> !torch.list<int>
    %36965 = torch.aten.view %36932, %36964 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36965, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36966 = torch.aten.mm %36965, %36948 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36966, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34634 = torch.constant.int 4
    %int4096_34635 = torch.constant.int 4096
    %36967 = torch.prim.ListConstruct %int4_34634, %36252, %int4096_34635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36968 = torch.aten.view %36966, %36967 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34636 = torch.constant.int 4
    %36969 = torch.aten.mul.int %int4_34636, %36267 : !torch.int, !torch.int -> !torch.int
    %int512_34637 = torch.constant.int 512
    %36970 = torch.prim.ListConstruct %36969, %int512_34637 : (!torch.int, !torch.int) -> !torch.list<int>
    %36971 = torch.aten.view %36934, %36970 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36971, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36972 = torch.aten.mm %36971, %36950 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36972, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34638 = torch.constant.int 4
    %int4096_34639 = torch.constant.int 4096
    %36973 = torch.prim.ListConstruct %int4_34638, %36267, %int4096_34639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36974 = torch.aten.view %36972, %36973 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34640 = torch.constant.int 4
    %36975 = torch.aten.mul.int %int4_34640, %36282 : !torch.int, !torch.int -> !torch.int
    %int512_34641 = torch.constant.int 512
    %36976 = torch.prim.ListConstruct %36975, %int512_34641 : (!torch.int, !torch.int) -> !torch.list<int>
    %36977 = torch.aten.view %36936, %36976 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36977, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36978 = torch.aten.mm %36977, %36952 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36978, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34642 = torch.constant.int 4
    %int4096_34643 = torch.constant.int 4096
    %36979 = torch.prim.ListConstruct %int4_34642, %36282, %int4096_34643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36980 = torch.aten.view %36978, %36979 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34644 = torch.constant.int 4
    %36981 = torch.aten.mul.int %int4_34644, %36297 : !torch.int, !torch.int -> !torch.int
    %int512_34645 = torch.constant.int 512
    %36982 = torch.prim.ListConstruct %36981, %int512_34645 : (!torch.int, !torch.int) -> !torch.list<int>
    %36983 = torch.aten.view %36938, %36982 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36983, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36984 = torch.aten.mm %36983, %36954 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36984, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34646 = torch.constant.int 4
    %int4096_34647 = torch.constant.int 4096
    %36985 = torch.prim.ListConstruct %int4_34646, %36297, %int4096_34647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36986 = torch.aten.view %36984, %36985 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34648 = torch.constant.int 4
    %36987 = torch.aten.mul.int %int4_34648, %36312 : !torch.int, !torch.int -> !torch.int
    %int512_34649 = torch.constant.int 512
    %36988 = torch.prim.ListConstruct %36987, %int512_34649 : (!torch.int, !torch.int) -> !torch.list<int>
    %36989 = torch.aten.view %36940, %36988 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36989, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36990 = torch.aten.mm %36989, %36956 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36990, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34650 = torch.constant.int 4
    %int4096_34651 = torch.constant.int 4096
    %36991 = torch.prim.ListConstruct %int4_34650, %36312, %int4096_34651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36992 = torch.aten.view %36990, %36991 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34652 = torch.constant.int 4
    %36993 = torch.aten.mul.int %int4_34652, %36327 : !torch.int, !torch.int -> !torch.int
    %int512_34653 = torch.constant.int 512
    %36994 = torch.prim.ListConstruct %36993, %int512_34653 : (!torch.int, !torch.int) -> !torch.list<int>
    %36995 = torch.aten.view %36942, %36994 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %36995, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %36996 = torch.aten.mm %36995, %36958 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %36996, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34654 = torch.constant.int 4
    %int4096_34655 = torch.constant.int 4096
    %36997 = torch.prim.ListConstruct %int4_34654, %36327, %int4096_34655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %36998 = torch.aten.view %36996, %36997 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %36998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34656 = torch.constant.int 4
    %36999 = torch.aten.mul.int %int4_34656, %36342 : !torch.int, !torch.int -> !torch.int
    %int512_34657 = torch.constant.int 512
    %37000 = torch.prim.ListConstruct %36999, %int512_34657 : (!torch.int, !torch.int) -> !torch.list<int>
    %37001 = torch.aten.view %36944, %37000 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37001, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %37002 = torch.aten.mm %37001, %36960 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37002, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34658 = torch.constant.int 4
    %int4096_34659 = torch.constant.int 4096
    %37003 = torch.prim.ListConstruct %int4_34658, %36342, %int4096_34659 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37004 = torch.aten.view %37002, %37003 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_34660 = torch.constant.int 4
    %37005 = torch.aten.mul.int %int4_34660, %36357 : !torch.int, !torch.int -> !torch.int
    %int512_34661 = torch.constant.int 512
    %37006 = torch.prim.ListConstruct %37005, %int512_34661 : (!torch.int, !torch.int) -> !torch.list<int>
    %37007 = torch.aten.view %36946, %37006 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37007, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %37008 = torch.aten.mm %37007, %36962 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37008, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_34662 = torch.constant.int 4
    %int4096_34663 = torch.constant.int 4096
    %37009 = torch.prim.ListConstruct %int4_34662, %36357, %int4096_34663 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37010 = torch.aten.view %37008, %37009 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37011 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34664 = arith.constant 1 : index
    %dim_34665 = tensor.dim %37011, %c1_34664 : tensor<4x?x4096xf16>
    %37012 = flow.tensor.transfer %37011 : tensor<4x?x4096xf16>{%dim_34665} to #hal.device.promise<@__device_0>
    %37013 = torch_c.from_builtin_tensor %37012 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37014 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34666 = arith.constant 1 : index
    %dim_34667 = tensor.dim %37014, %c1_34666 : tensor<4x?x4096xf16>
    %37015 = flow.tensor.transfer %37014 : tensor<4x?x4096xf16>{%dim_34667} to #hal.device.promise<@__device_0>
    %37016 = torch_c.from_builtin_tensor %37015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37017 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34668 = arith.constant 1 : index
    %dim_34669 = tensor.dim %37017, %c1_34668 : tensor<4x?x4096xf16>
    %37018 = flow.tensor.transfer %37017 : tensor<4x?x4096xf16>{%dim_34669} to #hal.device.promise<@__device_0>
    %37019 = torch_c.from_builtin_tensor %37018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37020 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34670 = arith.constant 1 : index
    %dim_34671 = tensor.dim %37020, %c1_34670 : tensor<4x?x4096xf16>
    %37021 = flow.tensor.transfer %37020 : tensor<4x?x4096xf16>{%dim_34671} to #hal.device.promise<@__device_0>
    %37022 = torch_c.from_builtin_tensor %37021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37023 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34672 = arith.constant 1 : index
    %dim_34673 = tensor.dim %37023, %c1_34672 : tensor<4x?x4096xf16>
    %37024 = flow.tensor.transfer %37023 : tensor<4x?x4096xf16>{%dim_34673} to #hal.device.promise<@__device_0>
    %37025 = torch_c.from_builtin_tensor %37024 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37026 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34674 = arith.constant 1 : index
    %dim_34675 = tensor.dim %37026, %c1_34674 : tensor<4x?x4096xf16>
    %37027 = flow.tensor.transfer %37026 : tensor<4x?x4096xf16>{%dim_34675} to #hal.device.promise<@__device_0>
    %37028 = torch_c.from_builtin_tensor %37027 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37029 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34676 = arith.constant 1 : index
    %dim_34677 = tensor.dim %37029, %c1_34676 : tensor<4x?x4096xf16>
    %37030 = flow.tensor.transfer %37029 : tensor<4x?x4096xf16>{%dim_34677} to #hal.device.promise<@__device_0>
    %37031 = torch_c.from_builtin_tensor %37030 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34678 = torch.constant.int 1
    %37032 = torch.aten.add.Tensor %36968, %37013, %int1_34678 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34679 = torch.constant.int 1
    %37033 = torch.aten.add.Tensor %37032, %37016, %int1_34679 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34680 = torch.constant.int 1
    %37034 = torch.aten.add.Tensor %37033, %37019, %int1_34680 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34681 = torch.constant.int 1
    %37035 = torch.aten.add.Tensor %37034, %37022, %int1_34681 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34682 = torch.constant.int 1
    %37036 = torch.aten.add.Tensor %37035, %37025, %int1_34682 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34683 = torch.constant.int 1
    %37037 = torch.aten.add.Tensor %37036, %37028, %int1_34683 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34684 = torch.constant.int 1
    %37038 = torch.aten.add.Tensor %37037, %37031, %int1_34684 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37039 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34685 = arith.constant 1 : index
    %dim_34686 = tensor.dim %37039, %c1_34685 : tensor<4x?x4096xf16>
    %37040 = flow.tensor.transfer %37039 : tensor<4x?x4096xf16>{%dim_34686} to #hal.device.promise<@__device_1>
    %37041 = torch_c.from_builtin_tensor %37040 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37042 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34687 = arith.constant 1 : index
    %dim_34688 = tensor.dim %37042, %c1_34687 : tensor<4x?x4096xf16>
    %37043 = flow.tensor.transfer %37042 : tensor<4x?x4096xf16>{%dim_34688} to #hal.device.promise<@__device_1>
    %37044 = torch_c.from_builtin_tensor %37043 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37045 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34689 = arith.constant 1 : index
    %dim_34690 = tensor.dim %37045, %c1_34689 : tensor<4x?x4096xf16>
    %37046 = flow.tensor.transfer %37045 : tensor<4x?x4096xf16>{%dim_34690} to #hal.device.promise<@__device_1>
    %37047 = torch_c.from_builtin_tensor %37046 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37048 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34691 = arith.constant 1 : index
    %dim_34692 = tensor.dim %37048, %c1_34691 : tensor<4x?x4096xf16>
    %37049 = flow.tensor.transfer %37048 : tensor<4x?x4096xf16>{%dim_34692} to #hal.device.promise<@__device_1>
    %37050 = torch_c.from_builtin_tensor %37049 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37051 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34693 = arith.constant 1 : index
    %dim_34694 = tensor.dim %37051, %c1_34693 : tensor<4x?x4096xf16>
    %37052 = flow.tensor.transfer %37051 : tensor<4x?x4096xf16>{%dim_34694} to #hal.device.promise<@__device_1>
    %37053 = torch_c.from_builtin_tensor %37052 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37054 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34695 = arith.constant 1 : index
    %dim_34696 = tensor.dim %37054, %c1_34695 : tensor<4x?x4096xf16>
    %37055 = flow.tensor.transfer %37054 : tensor<4x?x4096xf16>{%dim_34696} to #hal.device.promise<@__device_1>
    %37056 = torch_c.from_builtin_tensor %37055 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37057 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34697 = arith.constant 1 : index
    %dim_34698 = tensor.dim %37057, %c1_34697 : tensor<4x?x4096xf16>
    %37058 = flow.tensor.transfer %37057 : tensor<4x?x4096xf16>{%dim_34698} to #hal.device.promise<@__device_1>
    %37059 = torch_c.from_builtin_tensor %37058 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34699 = torch.constant.int 1
    %37060 = torch.aten.add.Tensor %37041, %36974, %int1_34699 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34700 = torch.constant.int 1
    %37061 = torch.aten.add.Tensor %37060, %37044, %int1_34700 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34701 = torch.constant.int 1
    %37062 = torch.aten.add.Tensor %37061, %37047, %int1_34701 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34702 = torch.constant.int 1
    %37063 = torch.aten.add.Tensor %37062, %37050, %int1_34702 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34703 = torch.constant.int 1
    %37064 = torch.aten.add.Tensor %37063, %37053, %int1_34703 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34704 = torch.constant.int 1
    %37065 = torch.aten.add.Tensor %37064, %37056, %int1_34704 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34705 = torch.constant.int 1
    %37066 = torch.aten.add.Tensor %37065, %37059, %int1_34705 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37067 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34706 = arith.constant 1 : index
    %dim_34707 = tensor.dim %37067, %c1_34706 : tensor<4x?x4096xf16>
    %37068 = flow.tensor.transfer %37067 : tensor<4x?x4096xf16>{%dim_34707} to #hal.device.promise<@__device_2>
    %37069 = torch_c.from_builtin_tensor %37068 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37070 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34708 = arith.constant 1 : index
    %dim_34709 = tensor.dim %37070, %c1_34708 : tensor<4x?x4096xf16>
    %37071 = flow.tensor.transfer %37070 : tensor<4x?x4096xf16>{%dim_34709} to #hal.device.promise<@__device_2>
    %37072 = torch_c.from_builtin_tensor %37071 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37073 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34710 = arith.constant 1 : index
    %dim_34711 = tensor.dim %37073, %c1_34710 : tensor<4x?x4096xf16>
    %37074 = flow.tensor.transfer %37073 : tensor<4x?x4096xf16>{%dim_34711} to #hal.device.promise<@__device_2>
    %37075 = torch_c.from_builtin_tensor %37074 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37076 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34712 = arith.constant 1 : index
    %dim_34713 = tensor.dim %37076, %c1_34712 : tensor<4x?x4096xf16>
    %37077 = flow.tensor.transfer %37076 : tensor<4x?x4096xf16>{%dim_34713} to #hal.device.promise<@__device_2>
    %37078 = torch_c.from_builtin_tensor %37077 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37079 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34714 = arith.constant 1 : index
    %dim_34715 = tensor.dim %37079, %c1_34714 : tensor<4x?x4096xf16>
    %37080 = flow.tensor.transfer %37079 : tensor<4x?x4096xf16>{%dim_34715} to #hal.device.promise<@__device_2>
    %37081 = torch_c.from_builtin_tensor %37080 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37082 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34716 = arith.constant 1 : index
    %dim_34717 = tensor.dim %37082, %c1_34716 : tensor<4x?x4096xf16>
    %37083 = flow.tensor.transfer %37082 : tensor<4x?x4096xf16>{%dim_34717} to #hal.device.promise<@__device_2>
    %37084 = torch_c.from_builtin_tensor %37083 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37085 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34718 = arith.constant 1 : index
    %dim_34719 = tensor.dim %37085, %c1_34718 : tensor<4x?x4096xf16>
    %37086 = flow.tensor.transfer %37085 : tensor<4x?x4096xf16>{%dim_34719} to #hal.device.promise<@__device_2>
    %37087 = torch_c.from_builtin_tensor %37086 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34720 = torch.constant.int 1
    %37088 = torch.aten.add.Tensor %37069, %37072, %int1_34720 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34721 = torch.constant.int 1
    %37089 = torch.aten.add.Tensor %37088, %36980, %int1_34721 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34722 = torch.constant.int 1
    %37090 = torch.aten.add.Tensor %37089, %37075, %int1_34722 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34723 = torch.constant.int 1
    %37091 = torch.aten.add.Tensor %37090, %37078, %int1_34723 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34724 = torch.constant.int 1
    %37092 = torch.aten.add.Tensor %37091, %37081, %int1_34724 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34725 = torch.constant.int 1
    %37093 = torch.aten.add.Tensor %37092, %37084, %int1_34725 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34726 = torch.constant.int 1
    %37094 = torch.aten.add.Tensor %37093, %37087, %int1_34726 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37095 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34727 = arith.constant 1 : index
    %dim_34728 = tensor.dim %37095, %c1_34727 : tensor<4x?x4096xf16>
    %37096 = flow.tensor.transfer %37095 : tensor<4x?x4096xf16>{%dim_34728} to #hal.device.promise<@__device_3>
    %37097 = torch_c.from_builtin_tensor %37096 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37098 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34729 = arith.constant 1 : index
    %dim_34730 = tensor.dim %37098, %c1_34729 : tensor<4x?x4096xf16>
    %37099 = flow.tensor.transfer %37098 : tensor<4x?x4096xf16>{%dim_34730} to #hal.device.promise<@__device_3>
    %37100 = torch_c.from_builtin_tensor %37099 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37101 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34731 = arith.constant 1 : index
    %dim_34732 = tensor.dim %37101, %c1_34731 : tensor<4x?x4096xf16>
    %37102 = flow.tensor.transfer %37101 : tensor<4x?x4096xf16>{%dim_34732} to #hal.device.promise<@__device_3>
    %37103 = torch_c.from_builtin_tensor %37102 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37104 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34733 = arith.constant 1 : index
    %dim_34734 = tensor.dim %37104, %c1_34733 : tensor<4x?x4096xf16>
    %37105 = flow.tensor.transfer %37104 : tensor<4x?x4096xf16>{%dim_34734} to #hal.device.promise<@__device_3>
    %37106 = torch_c.from_builtin_tensor %37105 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37107 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34735 = arith.constant 1 : index
    %dim_34736 = tensor.dim %37107, %c1_34735 : tensor<4x?x4096xf16>
    %37108 = flow.tensor.transfer %37107 : tensor<4x?x4096xf16>{%dim_34736} to #hal.device.promise<@__device_3>
    %37109 = torch_c.from_builtin_tensor %37108 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37110 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34737 = arith.constant 1 : index
    %dim_34738 = tensor.dim %37110, %c1_34737 : tensor<4x?x4096xf16>
    %37111 = flow.tensor.transfer %37110 : tensor<4x?x4096xf16>{%dim_34738} to #hal.device.promise<@__device_3>
    %37112 = torch_c.from_builtin_tensor %37111 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37113 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34739 = arith.constant 1 : index
    %dim_34740 = tensor.dim %37113, %c1_34739 : tensor<4x?x4096xf16>
    %37114 = flow.tensor.transfer %37113 : tensor<4x?x4096xf16>{%dim_34740} to #hal.device.promise<@__device_3>
    %37115 = torch_c.from_builtin_tensor %37114 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34741 = torch.constant.int 1
    %37116 = torch.aten.add.Tensor %37097, %37100, %int1_34741 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34742 = torch.constant.int 1
    %37117 = torch.aten.add.Tensor %37116, %37103, %int1_34742 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34743 = torch.constant.int 1
    %37118 = torch.aten.add.Tensor %37117, %36986, %int1_34743 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34744 = torch.constant.int 1
    %37119 = torch.aten.add.Tensor %37118, %37106, %int1_34744 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34745 = torch.constant.int 1
    %37120 = torch.aten.add.Tensor %37119, %37109, %int1_34745 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34746 = torch.constant.int 1
    %37121 = torch.aten.add.Tensor %37120, %37112, %int1_34746 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34747 = torch.constant.int 1
    %37122 = torch.aten.add.Tensor %37121, %37115, %int1_34747 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37123 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34748 = arith.constant 1 : index
    %dim_34749 = tensor.dim %37123, %c1_34748 : tensor<4x?x4096xf16>
    %37124 = flow.tensor.transfer %37123 : tensor<4x?x4096xf16>{%dim_34749} to #hal.device.promise<@__device_4>
    %37125 = torch_c.from_builtin_tensor %37124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37126 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34750 = arith.constant 1 : index
    %dim_34751 = tensor.dim %37126, %c1_34750 : tensor<4x?x4096xf16>
    %37127 = flow.tensor.transfer %37126 : tensor<4x?x4096xf16>{%dim_34751} to #hal.device.promise<@__device_4>
    %37128 = torch_c.from_builtin_tensor %37127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37129 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34752 = arith.constant 1 : index
    %dim_34753 = tensor.dim %37129, %c1_34752 : tensor<4x?x4096xf16>
    %37130 = flow.tensor.transfer %37129 : tensor<4x?x4096xf16>{%dim_34753} to #hal.device.promise<@__device_4>
    %37131 = torch_c.from_builtin_tensor %37130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37132 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34754 = arith.constant 1 : index
    %dim_34755 = tensor.dim %37132, %c1_34754 : tensor<4x?x4096xf16>
    %37133 = flow.tensor.transfer %37132 : tensor<4x?x4096xf16>{%dim_34755} to #hal.device.promise<@__device_4>
    %37134 = torch_c.from_builtin_tensor %37133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37135 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34756 = arith.constant 1 : index
    %dim_34757 = tensor.dim %37135, %c1_34756 : tensor<4x?x4096xf16>
    %37136 = flow.tensor.transfer %37135 : tensor<4x?x4096xf16>{%dim_34757} to #hal.device.promise<@__device_4>
    %37137 = torch_c.from_builtin_tensor %37136 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37138 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34758 = arith.constant 1 : index
    %dim_34759 = tensor.dim %37138, %c1_34758 : tensor<4x?x4096xf16>
    %37139 = flow.tensor.transfer %37138 : tensor<4x?x4096xf16>{%dim_34759} to #hal.device.promise<@__device_4>
    %37140 = torch_c.from_builtin_tensor %37139 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37141 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34760 = arith.constant 1 : index
    %dim_34761 = tensor.dim %37141, %c1_34760 : tensor<4x?x4096xf16>
    %37142 = flow.tensor.transfer %37141 : tensor<4x?x4096xf16>{%dim_34761} to #hal.device.promise<@__device_4>
    %37143 = torch_c.from_builtin_tensor %37142 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34762 = torch.constant.int 1
    %37144 = torch.aten.add.Tensor %37125, %37128, %int1_34762 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34763 = torch.constant.int 1
    %37145 = torch.aten.add.Tensor %37144, %37131, %int1_34763 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34764 = torch.constant.int 1
    %37146 = torch.aten.add.Tensor %37145, %37134, %int1_34764 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34765 = torch.constant.int 1
    %37147 = torch.aten.add.Tensor %37146, %36992, %int1_34765 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34766 = torch.constant.int 1
    %37148 = torch.aten.add.Tensor %37147, %37137, %int1_34766 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34767 = torch.constant.int 1
    %37149 = torch.aten.add.Tensor %37148, %37140, %int1_34767 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34768 = torch.constant.int 1
    %37150 = torch.aten.add.Tensor %37149, %37143, %int1_34768 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37151 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34769 = arith.constant 1 : index
    %dim_34770 = tensor.dim %37151, %c1_34769 : tensor<4x?x4096xf16>
    %37152 = flow.tensor.transfer %37151 : tensor<4x?x4096xf16>{%dim_34770} to #hal.device.promise<@__device_5>
    %37153 = torch_c.from_builtin_tensor %37152 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37154 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34771 = arith.constant 1 : index
    %dim_34772 = tensor.dim %37154, %c1_34771 : tensor<4x?x4096xf16>
    %37155 = flow.tensor.transfer %37154 : tensor<4x?x4096xf16>{%dim_34772} to #hal.device.promise<@__device_5>
    %37156 = torch_c.from_builtin_tensor %37155 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37157 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34773 = arith.constant 1 : index
    %dim_34774 = tensor.dim %37157, %c1_34773 : tensor<4x?x4096xf16>
    %37158 = flow.tensor.transfer %37157 : tensor<4x?x4096xf16>{%dim_34774} to #hal.device.promise<@__device_5>
    %37159 = torch_c.from_builtin_tensor %37158 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37160 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34775 = arith.constant 1 : index
    %dim_34776 = tensor.dim %37160, %c1_34775 : tensor<4x?x4096xf16>
    %37161 = flow.tensor.transfer %37160 : tensor<4x?x4096xf16>{%dim_34776} to #hal.device.promise<@__device_5>
    %37162 = torch_c.from_builtin_tensor %37161 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37163 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34777 = arith.constant 1 : index
    %dim_34778 = tensor.dim %37163, %c1_34777 : tensor<4x?x4096xf16>
    %37164 = flow.tensor.transfer %37163 : tensor<4x?x4096xf16>{%dim_34778} to #hal.device.promise<@__device_5>
    %37165 = torch_c.from_builtin_tensor %37164 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37166 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34779 = arith.constant 1 : index
    %dim_34780 = tensor.dim %37166, %c1_34779 : tensor<4x?x4096xf16>
    %37167 = flow.tensor.transfer %37166 : tensor<4x?x4096xf16>{%dim_34780} to #hal.device.promise<@__device_5>
    %37168 = torch_c.from_builtin_tensor %37167 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37169 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34781 = arith.constant 1 : index
    %dim_34782 = tensor.dim %37169, %c1_34781 : tensor<4x?x4096xf16>
    %37170 = flow.tensor.transfer %37169 : tensor<4x?x4096xf16>{%dim_34782} to #hal.device.promise<@__device_5>
    %37171 = torch_c.from_builtin_tensor %37170 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34783 = torch.constant.int 1
    %37172 = torch.aten.add.Tensor %37153, %37156, %int1_34783 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34784 = torch.constant.int 1
    %37173 = torch.aten.add.Tensor %37172, %37159, %int1_34784 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34785 = torch.constant.int 1
    %37174 = torch.aten.add.Tensor %37173, %37162, %int1_34785 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34786 = torch.constant.int 1
    %37175 = torch.aten.add.Tensor %37174, %37165, %int1_34786 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34787 = torch.constant.int 1
    %37176 = torch.aten.add.Tensor %37175, %36998, %int1_34787 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34788 = torch.constant.int 1
    %37177 = torch.aten.add.Tensor %37176, %37168, %int1_34788 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34789 = torch.constant.int 1
    %37178 = torch.aten.add.Tensor %37177, %37171, %int1_34789 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37179 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34790 = arith.constant 1 : index
    %dim_34791 = tensor.dim %37179, %c1_34790 : tensor<4x?x4096xf16>
    %37180 = flow.tensor.transfer %37179 : tensor<4x?x4096xf16>{%dim_34791} to #hal.device.promise<@__device_6>
    %37181 = torch_c.from_builtin_tensor %37180 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37182 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34792 = arith.constant 1 : index
    %dim_34793 = tensor.dim %37182, %c1_34792 : tensor<4x?x4096xf16>
    %37183 = flow.tensor.transfer %37182 : tensor<4x?x4096xf16>{%dim_34793} to #hal.device.promise<@__device_6>
    %37184 = torch_c.from_builtin_tensor %37183 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37185 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34794 = arith.constant 1 : index
    %dim_34795 = tensor.dim %37185, %c1_34794 : tensor<4x?x4096xf16>
    %37186 = flow.tensor.transfer %37185 : tensor<4x?x4096xf16>{%dim_34795} to #hal.device.promise<@__device_6>
    %37187 = torch_c.from_builtin_tensor %37186 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37188 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34796 = arith.constant 1 : index
    %dim_34797 = tensor.dim %37188, %c1_34796 : tensor<4x?x4096xf16>
    %37189 = flow.tensor.transfer %37188 : tensor<4x?x4096xf16>{%dim_34797} to #hal.device.promise<@__device_6>
    %37190 = torch_c.from_builtin_tensor %37189 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37191 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34798 = arith.constant 1 : index
    %dim_34799 = tensor.dim %37191, %c1_34798 : tensor<4x?x4096xf16>
    %37192 = flow.tensor.transfer %37191 : tensor<4x?x4096xf16>{%dim_34799} to #hal.device.promise<@__device_6>
    %37193 = torch_c.from_builtin_tensor %37192 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37194 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34800 = arith.constant 1 : index
    %dim_34801 = tensor.dim %37194, %c1_34800 : tensor<4x?x4096xf16>
    %37195 = flow.tensor.transfer %37194 : tensor<4x?x4096xf16>{%dim_34801} to #hal.device.promise<@__device_6>
    %37196 = torch_c.from_builtin_tensor %37195 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37197 = torch_c.to_builtin_tensor %37010 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34802 = arith.constant 1 : index
    %dim_34803 = tensor.dim %37197, %c1_34802 : tensor<4x?x4096xf16>
    %37198 = flow.tensor.transfer %37197 : tensor<4x?x4096xf16>{%dim_34803} to #hal.device.promise<@__device_6>
    %37199 = torch_c.from_builtin_tensor %37198 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34804 = torch.constant.int 1
    %37200 = torch.aten.add.Tensor %37181, %37184, %int1_34804 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34805 = torch.constant.int 1
    %37201 = torch.aten.add.Tensor %37200, %37187, %int1_34805 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34806 = torch.constant.int 1
    %37202 = torch.aten.add.Tensor %37201, %37190, %int1_34806 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34807 = torch.constant.int 1
    %37203 = torch.aten.add.Tensor %37202, %37193, %int1_34807 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34808 = torch.constant.int 1
    %37204 = torch.aten.add.Tensor %37203, %37196, %int1_34808 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34809 = torch.constant.int 1
    %37205 = torch.aten.add.Tensor %37204, %37004, %int1_34809 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34810 = torch.constant.int 1
    %37206 = torch.aten.add.Tensor %37205, %37199, %int1_34810 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37207 = torch_c.to_builtin_tensor %36968 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34811 = arith.constant 1 : index
    %dim_34812 = tensor.dim %37207, %c1_34811 : tensor<4x?x4096xf16>
    %37208 = flow.tensor.transfer %37207 : tensor<4x?x4096xf16>{%dim_34812} to #hal.device.promise<@__device_7>
    %37209 = torch_c.from_builtin_tensor %37208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37210 = torch_c.to_builtin_tensor %36974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34813 = arith.constant 1 : index
    %dim_34814 = tensor.dim %37210, %c1_34813 : tensor<4x?x4096xf16>
    %37211 = flow.tensor.transfer %37210 : tensor<4x?x4096xf16>{%dim_34814} to #hal.device.promise<@__device_7>
    %37212 = torch_c.from_builtin_tensor %37211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37213 = torch_c.to_builtin_tensor %36980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34815 = arith.constant 1 : index
    %dim_34816 = tensor.dim %37213, %c1_34815 : tensor<4x?x4096xf16>
    %37214 = flow.tensor.transfer %37213 : tensor<4x?x4096xf16>{%dim_34816} to #hal.device.promise<@__device_7>
    %37215 = torch_c.from_builtin_tensor %37214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37216 = torch_c.to_builtin_tensor %36986 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34817 = arith.constant 1 : index
    %dim_34818 = tensor.dim %37216, %c1_34817 : tensor<4x?x4096xf16>
    %37217 = flow.tensor.transfer %37216 : tensor<4x?x4096xf16>{%dim_34818} to #hal.device.promise<@__device_7>
    %37218 = torch_c.from_builtin_tensor %37217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37219 = torch_c.to_builtin_tensor %36992 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34819 = arith.constant 1 : index
    %dim_34820 = tensor.dim %37219, %c1_34819 : tensor<4x?x4096xf16>
    %37220 = flow.tensor.transfer %37219 : tensor<4x?x4096xf16>{%dim_34820} to #hal.device.promise<@__device_7>
    %37221 = torch_c.from_builtin_tensor %37220 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37222 = torch_c.to_builtin_tensor %36998 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34821 = arith.constant 1 : index
    %dim_34822 = tensor.dim %37222, %c1_34821 : tensor<4x?x4096xf16>
    %37223 = flow.tensor.transfer %37222 : tensor<4x?x4096xf16>{%dim_34822} to #hal.device.promise<@__device_7>
    %37224 = torch_c.from_builtin_tensor %37223 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37225 = torch_c.to_builtin_tensor %37004 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_34823 = arith.constant 1 : index
    %dim_34824 = tensor.dim %37225, %c1_34823 : tensor<4x?x4096xf16>
    %37226 = flow.tensor.transfer %37225 : tensor<4x?x4096xf16>{%dim_34824} to #hal.device.promise<@__device_7>
    %37227 = torch_c.from_builtin_tensor %37226 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34825 = torch.constant.int 1
    %37228 = torch.aten.add.Tensor %37209, %37212, %int1_34825 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34826 = torch.constant.int 1
    %37229 = torch.aten.add.Tensor %37228, %37215, %int1_34826 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34827 = torch.constant.int 1
    %37230 = torch.aten.add.Tensor %37229, %37218, %int1_34827 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34828 = torch.constant.int 1
    %37231 = torch.aten.add.Tensor %37230, %37221, %int1_34828 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34829 = torch.constant.int 1
    %37232 = torch.aten.add.Tensor %37231, %37224, %int1_34829 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34830 = torch.constant.int 1
    %37233 = torch.aten.add.Tensor %37232, %37227, %int1_34830 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34831 = torch.constant.int 1
    %37234 = torch.aten.add.Tensor %37233, %37010, %int1_34831 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34832 = torch.constant.int 1
    %37235 = torch.aten.add.Tensor %35894, %37038, %int1_34832 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34833 = torch.constant.int 1
    %37236 = torch.aten.add.Tensor %35895, %37066, %int1_34833 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34834 = torch.constant.int 1
    %37237 = torch.aten.add.Tensor %35896, %37094, %int1_34834 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34835 = torch.constant.int 1
    %37238 = torch.aten.add.Tensor %35897, %37122, %int1_34835 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34836 = torch.constant.int 1
    %37239 = torch.aten.add.Tensor %35898, %37150, %int1_34836 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34837 = torch.constant.int 1
    %37240 = torch.aten.add.Tensor %35899, %37178, %int1_34837 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34838 = torch.constant.int 1
    %37241 = torch.aten.add.Tensor %35900, %37206, %int1_34838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34839 = torch.constant.int 1
    %37242 = torch.aten.add.Tensor %35901, %37234, %int1_34839 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_34840 = torch.constant.int 6
    %37243 = torch.prims.convert_element_type %37235, %int6_34840 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34841 = torch.constant.int 6
    %37244 = torch.prims.convert_element_type %37236, %int6_34841 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34842 = torch.constant.int 6
    %37245 = torch.prims.convert_element_type %37237, %int6_34842 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34843 = torch.constant.int 6
    %37246 = torch.prims.convert_element_type %37238, %int6_34843 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34844 = torch.constant.int 6
    %37247 = torch.prims.convert_element_type %37239, %int6_34844 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34845 = torch.constant.int 6
    %37248 = torch.prims.convert_element_type %37240, %int6_34845 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34846 = torch.constant.int 6
    %37249 = torch.prims.convert_element_type %37241, %int6_34846 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_34847 = torch.constant.int 6
    %37250 = torch.prims.convert_element_type %37242, %int6_34847 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34848 = torch.constant.int 2
    %37251 = torch.aten.pow.Tensor_Scalar %37243, %int2_34848 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34849 = torch.constant.int 2
    %37252 = torch.aten.pow.Tensor_Scalar %37244, %int2_34849 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34850 = torch.constant.int 2
    %37253 = torch.aten.pow.Tensor_Scalar %37245, %int2_34850 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34851 = torch.constant.int 2
    %37254 = torch.aten.pow.Tensor_Scalar %37246, %int2_34851 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34852 = torch.constant.int 2
    %37255 = torch.aten.pow.Tensor_Scalar %37247, %int2_34852 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34853 = torch.constant.int 2
    %37256 = torch.aten.pow.Tensor_Scalar %37248, %int2_34853 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34854 = torch.constant.int 2
    %37257 = torch.aten.pow.Tensor_Scalar %37249, %int2_34854 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_34855 = torch.constant.int 2
    %37258 = torch.aten.pow.Tensor_Scalar %37250, %int2_34855 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_34856 = torch.constant.int -1
    %37259 = torch.prim.ListConstruct %int-1_34856 : (!torch.int) -> !torch.list<int>
    %true_34857 = torch.constant.bool true
    %none_34858 = torch.constant.none
    %37260 = torch.aten.mean.dim %37251, %37259, %true_34857, %none_34858 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34859 = torch.constant.int -1
    %37261 = torch.prim.ListConstruct %int-1_34859 : (!torch.int) -> !torch.list<int>
    %true_34860 = torch.constant.bool true
    %none_34861 = torch.constant.none
    %37262 = torch.aten.mean.dim %37252, %37261, %true_34860, %none_34861 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34862 = torch.constant.int -1
    %37263 = torch.prim.ListConstruct %int-1_34862 : (!torch.int) -> !torch.list<int>
    %true_34863 = torch.constant.bool true
    %none_34864 = torch.constant.none
    %37264 = torch.aten.mean.dim %37253, %37263, %true_34863, %none_34864 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34865 = torch.constant.int -1
    %37265 = torch.prim.ListConstruct %int-1_34865 : (!torch.int) -> !torch.list<int>
    %true_34866 = torch.constant.bool true
    %none_34867 = torch.constant.none
    %37266 = torch.aten.mean.dim %37254, %37265, %true_34866, %none_34867 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34868 = torch.constant.int -1
    %37267 = torch.prim.ListConstruct %int-1_34868 : (!torch.int) -> !torch.list<int>
    %true_34869 = torch.constant.bool true
    %none_34870 = torch.constant.none
    %37268 = torch.aten.mean.dim %37255, %37267, %true_34869, %none_34870 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34871 = torch.constant.int -1
    %37269 = torch.prim.ListConstruct %int-1_34871 : (!torch.int) -> !torch.list<int>
    %true_34872 = torch.constant.bool true
    %none_34873 = torch.constant.none
    %37270 = torch.aten.mean.dim %37256, %37269, %true_34872, %none_34873 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34874 = torch.constant.int -1
    %37271 = torch.prim.ListConstruct %int-1_34874 : (!torch.int) -> !torch.list<int>
    %true_34875 = torch.constant.bool true
    %none_34876 = torch.constant.none
    %37272 = torch.aten.mean.dim %37257, %37271, %true_34875, %none_34876 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_34877 = torch.constant.int -1
    %37273 = torch.prim.ListConstruct %int-1_34877 : (!torch.int) -> !torch.list<int>
    %true_34878 = torch.constant.bool true
    %none_34879 = torch.constant.none
    %37274 = torch.aten.mean.dim %37258, %37273, %true_34878, %none_34879 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34880 = torch.constant.float 9.9999997473787516E-6
    %int1_34881 = torch.constant.int 1
    %37275 = torch.aten.add.Scalar %37260, %float9.999990e-06_34880, %int1_34881 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34882 = torch.constant.float 9.9999997473787516E-6
    %int1_34883 = torch.constant.int 1
    %37276 = torch.aten.add.Scalar %37262, %float9.999990e-06_34882, %int1_34883 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34884 = torch.constant.float 9.9999997473787516E-6
    %int1_34885 = torch.constant.int 1
    %37277 = torch.aten.add.Scalar %37264, %float9.999990e-06_34884, %int1_34885 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34886 = torch.constant.float 9.9999997473787516E-6
    %int1_34887 = torch.constant.int 1
    %37278 = torch.aten.add.Scalar %37266, %float9.999990e-06_34886, %int1_34887 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34888 = torch.constant.float 9.9999997473787516E-6
    %int1_34889 = torch.constant.int 1
    %37279 = torch.aten.add.Scalar %37268, %float9.999990e-06_34888, %int1_34889 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34890 = torch.constant.float 9.9999997473787516E-6
    %int1_34891 = torch.constant.int 1
    %37280 = torch.aten.add.Scalar %37270, %float9.999990e-06_34890, %int1_34891 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34892 = torch.constant.float 9.9999997473787516E-6
    %int1_34893 = torch.constant.int 1
    %37281 = torch.aten.add.Scalar %37272, %float9.999990e-06_34892, %int1_34893 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_34894 = torch.constant.float 9.9999997473787516E-6
    %int1_34895 = torch.constant.int 1
    %37282 = torch.aten.add.Scalar %37274, %float9.999990e-06_34894, %int1_34895 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37283 = torch.aten.rsqrt %37275 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37284 = torch.aten.rsqrt %37276 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37285 = torch.aten.rsqrt %37277 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37286 = torch.aten.rsqrt %37278 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37287 = torch.aten.rsqrt %37279 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37288 = torch.aten.rsqrt %37280 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37289 = torch.aten.rsqrt %37281 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37290 = torch.aten.rsqrt %37282 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37291 = torch.aten.mul.Tensor %37243, %37283 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37292 = torch.aten.mul.Tensor %37244, %37284 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37293 = torch.aten.mul.Tensor %37245, %37285 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37294 = torch.aten.mul.Tensor %37246, %37286 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37295 = torch.aten.mul.Tensor %37247, %37287 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37296 = torch.aten.mul.Tensor %37248, %37288 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37297 = torch.aten.mul.Tensor %37249, %37289 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37298 = torch.aten.mul.Tensor %37250, %37290 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37299 = torch.aten.mul.Tensor %1344, %37291 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37300 = torch.aten.mul.Tensor %1345, %37292 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37301 = torch.aten.mul.Tensor %1346, %37293 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37302 = torch.aten.mul.Tensor %1347, %37294 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37303 = torch.aten.mul.Tensor %1348, %37295 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37304 = torch.aten.mul.Tensor %1349, %37296 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37305 = torch.aten.mul.Tensor %1350, %37297 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37306 = torch.aten.mul.Tensor %1351, %37298 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_34896 = torch.constant.int 5
    %37307 = torch.prims.convert_element_type %37299, %int5_34896 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34897 = torch.constant.int 5
    %37308 = torch.prims.convert_element_type %37300, %int5_34897 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34898 = torch.constant.int 5
    %37309 = torch.prims.convert_element_type %37301, %int5_34898 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34899 = torch.constant.int 5
    %37310 = torch.prims.convert_element_type %37302, %int5_34899 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34900 = torch.constant.int 5
    %37311 = torch.prims.convert_element_type %37303, %int5_34900 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34901 = torch.constant.int 5
    %37312 = torch.prims.convert_element_type %37304, %int5_34901 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34902 = torch.constant.int 5
    %37313 = torch.prims.convert_element_type %37305, %int5_34902 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_34903 = torch.constant.int 5
    %37314 = torch.prims.convert_element_type %37306, %int5_34903 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_34904 = torch.constant.int 1
    %int0_34905 = torch.constant.int 0
    %37315 = torch.prim.ListConstruct %int1_34904, %int0_34905 : (!torch.int, !torch.int) -> !torch.list<int>
    %37316 = torch.aten.permute %1352, %37315 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34906 = torch.constant.int 1
    %int0_34907 = torch.constant.int 0
    %37317 = torch.prim.ListConstruct %int1_34906, %int0_34907 : (!torch.int, !torch.int) -> !torch.list<int>
    %37318 = torch.aten.permute %1353, %37317 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34908 = torch.constant.int 1
    %int0_34909 = torch.constant.int 0
    %37319 = torch.prim.ListConstruct %int1_34908, %int0_34909 : (!torch.int, !torch.int) -> !torch.list<int>
    %37320 = torch.aten.permute %1354, %37319 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34910 = torch.constant.int 1
    %int0_34911 = torch.constant.int 0
    %37321 = torch.prim.ListConstruct %int1_34910, %int0_34911 : (!torch.int, !torch.int) -> !torch.list<int>
    %37322 = torch.aten.permute %1355, %37321 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34912 = torch.constant.int 1
    %int0_34913 = torch.constant.int 0
    %37323 = torch.prim.ListConstruct %int1_34912, %int0_34913 : (!torch.int, !torch.int) -> !torch.list<int>
    %37324 = torch.aten.permute %1356, %37323 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34914 = torch.constant.int 1
    %int0_34915 = torch.constant.int 0
    %37325 = torch.prim.ListConstruct %int1_34914, %int0_34915 : (!torch.int, !torch.int) -> !torch.list<int>
    %37326 = torch.aten.permute %1357, %37325 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34916 = torch.constant.int 1
    %int0_34917 = torch.constant.int 0
    %37327 = torch.prim.ListConstruct %int1_34916, %int0_34917 : (!torch.int, !torch.int) -> !torch.list<int>
    %37328 = torch.aten.permute %1358, %37327 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34918 = torch.constant.int 1
    %int0_34919 = torch.constant.int 0
    %37329 = torch.prim.ListConstruct %int1_34918, %int0_34919 : (!torch.int, !torch.int) -> !torch.list<int>
    %37330 = torch.aten.permute %1359, %37329 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_34920 = torch.constant.int 4
    %37331 = torch.aten.mul.int %int4_34920, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34921 = torch.constant.int 4096
    %37332 = torch.prim.ListConstruct %37331, %int4096_34921 : (!torch.int, !torch.int) -> !torch.list<int>
    %37333 = torch.aten.view %37307, %37332 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37333, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37334 = torch.aten.mm %37333, %37316 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37334, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34922 = torch.constant.int 4
    %int1792_34923 = torch.constant.int 1792
    %37335 = torch.prim.ListConstruct %int4_34922, %2482, %int1792_34923 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37336 = torch.aten.view %37334, %37335 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34924 = torch.constant.int 4
    %37337 = torch.aten.mul.int %int4_34924, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34925 = torch.constant.int 4096
    %37338 = torch.prim.ListConstruct %37337, %int4096_34925 : (!torch.int, !torch.int) -> !torch.list<int>
    %37339 = torch.aten.view %37308, %37338 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37339, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37340 = torch.aten.mm %37339, %37318 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37340, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34926 = torch.constant.int 4
    %int1792_34927 = torch.constant.int 1792
    %37341 = torch.prim.ListConstruct %int4_34926, %2482, %int1792_34927 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37342 = torch.aten.view %37340, %37341 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34928 = torch.constant.int 4
    %37343 = torch.aten.mul.int %int4_34928, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34929 = torch.constant.int 4096
    %37344 = torch.prim.ListConstruct %37343, %int4096_34929 : (!torch.int, !torch.int) -> !torch.list<int>
    %37345 = torch.aten.view %37309, %37344 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37345, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37346 = torch.aten.mm %37345, %37320 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37346, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34930 = torch.constant.int 4
    %int1792_34931 = torch.constant.int 1792
    %37347 = torch.prim.ListConstruct %int4_34930, %2482, %int1792_34931 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37348 = torch.aten.view %37346, %37347 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34932 = torch.constant.int 4
    %37349 = torch.aten.mul.int %int4_34932, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34933 = torch.constant.int 4096
    %37350 = torch.prim.ListConstruct %37349, %int4096_34933 : (!torch.int, !torch.int) -> !torch.list<int>
    %37351 = torch.aten.view %37310, %37350 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37351, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37352 = torch.aten.mm %37351, %37322 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37352, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34934 = torch.constant.int 4
    %int1792_34935 = torch.constant.int 1792
    %37353 = torch.prim.ListConstruct %int4_34934, %2482, %int1792_34935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37354 = torch.aten.view %37352, %37353 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34936 = torch.constant.int 4
    %37355 = torch.aten.mul.int %int4_34936, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34937 = torch.constant.int 4096
    %37356 = torch.prim.ListConstruct %37355, %int4096_34937 : (!torch.int, !torch.int) -> !torch.list<int>
    %37357 = torch.aten.view %37311, %37356 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37357, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37358 = torch.aten.mm %37357, %37324 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37358, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34938 = torch.constant.int 4
    %int1792_34939 = torch.constant.int 1792
    %37359 = torch.prim.ListConstruct %int4_34938, %2482, %int1792_34939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37360 = torch.aten.view %37358, %37359 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34940 = torch.constant.int 4
    %37361 = torch.aten.mul.int %int4_34940, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34941 = torch.constant.int 4096
    %37362 = torch.prim.ListConstruct %37361, %int4096_34941 : (!torch.int, !torch.int) -> !torch.list<int>
    %37363 = torch.aten.view %37312, %37362 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37363, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37364 = torch.aten.mm %37363, %37326 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37364, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34942 = torch.constant.int 4
    %int1792_34943 = torch.constant.int 1792
    %37365 = torch.prim.ListConstruct %int4_34942, %2482, %int1792_34943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37366 = torch.aten.view %37364, %37365 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34944 = torch.constant.int 4
    %37367 = torch.aten.mul.int %int4_34944, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34945 = torch.constant.int 4096
    %37368 = torch.prim.ListConstruct %37367, %int4096_34945 : (!torch.int, !torch.int) -> !torch.list<int>
    %37369 = torch.aten.view %37313, %37368 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37369, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37370 = torch.aten.mm %37369, %37328 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37370, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34946 = torch.constant.int 4
    %int1792_34947 = torch.constant.int 1792
    %37371 = torch.prim.ListConstruct %int4_34946, %2482, %int1792_34947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37372 = torch.aten.view %37370, %37371 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34948 = torch.constant.int 4
    %37373 = torch.aten.mul.int %int4_34948, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34949 = torch.constant.int 4096
    %37374 = torch.prim.ListConstruct %37373, %int4096_34949 : (!torch.int, !torch.int) -> !torch.list<int>
    %37375 = torch.aten.view %37314, %37374 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37375, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37376 = torch.aten.mm %37375, %37330 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37376, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34950 = torch.constant.int 4
    %int1792_34951 = torch.constant.int 1792
    %37377 = torch.prim.ListConstruct %int4_34950, %2482, %int1792_34951 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37378 = torch.aten.view %37376, %37377 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37379 = torch.aten.silu %37336 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37380 = torch.aten.silu %37342 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37381 = torch.aten.silu %37348 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37382 = torch.aten.silu %37354 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37383 = torch.aten.silu %37360 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37384 = torch.aten.silu %37366 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37385 = torch.aten.silu %37372 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37386 = torch.aten.silu %37378 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_34952 = torch.constant.int 1
    %int0_34953 = torch.constant.int 0
    %37387 = torch.prim.ListConstruct %int1_34952, %int0_34953 : (!torch.int, !torch.int) -> !torch.list<int>
    %37388 = torch.aten.permute %1360, %37387 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34954 = torch.constant.int 1
    %int0_34955 = torch.constant.int 0
    %37389 = torch.prim.ListConstruct %int1_34954, %int0_34955 : (!torch.int, !torch.int) -> !torch.list<int>
    %37390 = torch.aten.permute %1361, %37389 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34956 = torch.constant.int 1
    %int0_34957 = torch.constant.int 0
    %37391 = torch.prim.ListConstruct %int1_34956, %int0_34957 : (!torch.int, !torch.int) -> !torch.list<int>
    %37392 = torch.aten.permute %1362, %37391 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34958 = torch.constant.int 1
    %int0_34959 = torch.constant.int 0
    %37393 = torch.prim.ListConstruct %int1_34958, %int0_34959 : (!torch.int, !torch.int) -> !torch.list<int>
    %37394 = torch.aten.permute %1363, %37393 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34960 = torch.constant.int 1
    %int0_34961 = torch.constant.int 0
    %37395 = torch.prim.ListConstruct %int1_34960, %int0_34961 : (!torch.int, !torch.int) -> !torch.list<int>
    %37396 = torch.aten.permute %1364, %37395 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34962 = torch.constant.int 1
    %int0_34963 = torch.constant.int 0
    %37397 = torch.prim.ListConstruct %int1_34962, %int0_34963 : (!torch.int, !torch.int) -> !torch.list<int>
    %37398 = torch.aten.permute %1365, %37397 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34964 = torch.constant.int 1
    %int0_34965 = torch.constant.int 0
    %37399 = torch.prim.ListConstruct %int1_34964, %int0_34965 : (!torch.int, !torch.int) -> !torch.list<int>
    %37400 = torch.aten.permute %1366, %37399 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_34966 = torch.constant.int 1
    %int0_34967 = torch.constant.int 0
    %37401 = torch.prim.ListConstruct %int1_34966, %int0_34967 : (!torch.int, !torch.int) -> !torch.list<int>
    %37402 = torch.aten.permute %1367, %37401 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_34968 = torch.constant.int 4
    %37403 = torch.aten.mul.int %int4_34968, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34969 = torch.constant.int 4096
    %37404 = torch.prim.ListConstruct %37403, %int4096_34969 : (!torch.int, !torch.int) -> !torch.list<int>
    %37405 = torch.aten.view %37307, %37404 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37405, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37406 = torch.aten.mm %37405, %37388 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37406, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34970 = torch.constant.int 4
    %int1792_34971 = torch.constant.int 1792
    %37407 = torch.prim.ListConstruct %int4_34970, %2482, %int1792_34971 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37408 = torch.aten.view %37406, %37407 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34972 = torch.constant.int 4
    %37409 = torch.aten.mul.int %int4_34972, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34973 = torch.constant.int 4096
    %37410 = torch.prim.ListConstruct %37409, %int4096_34973 : (!torch.int, !torch.int) -> !torch.list<int>
    %37411 = torch.aten.view %37308, %37410 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37411, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37412 = torch.aten.mm %37411, %37390 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37412, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34974 = torch.constant.int 4
    %int1792_34975 = torch.constant.int 1792
    %37413 = torch.prim.ListConstruct %int4_34974, %2482, %int1792_34975 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37414 = torch.aten.view %37412, %37413 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34976 = torch.constant.int 4
    %37415 = torch.aten.mul.int %int4_34976, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34977 = torch.constant.int 4096
    %37416 = torch.prim.ListConstruct %37415, %int4096_34977 : (!torch.int, !torch.int) -> !torch.list<int>
    %37417 = torch.aten.view %37309, %37416 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37417, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37418 = torch.aten.mm %37417, %37392 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37418, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34978 = torch.constant.int 4
    %int1792_34979 = torch.constant.int 1792
    %37419 = torch.prim.ListConstruct %int4_34978, %2482, %int1792_34979 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37420 = torch.aten.view %37418, %37419 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34980 = torch.constant.int 4
    %37421 = torch.aten.mul.int %int4_34980, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34981 = torch.constant.int 4096
    %37422 = torch.prim.ListConstruct %37421, %int4096_34981 : (!torch.int, !torch.int) -> !torch.list<int>
    %37423 = torch.aten.view %37310, %37422 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37423, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37424 = torch.aten.mm %37423, %37394 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37424, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34982 = torch.constant.int 4
    %int1792_34983 = torch.constant.int 1792
    %37425 = torch.prim.ListConstruct %int4_34982, %2482, %int1792_34983 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37426 = torch.aten.view %37424, %37425 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34984 = torch.constant.int 4
    %37427 = torch.aten.mul.int %int4_34984, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34985 = torch.constant.int 4096
    %37428 = torch.prim.ListConstruct %37427, %int4096_34985 : (!torch.int, !torch.int) -> !torch.list<int>
    %37429 = torch.aten.view %37311, %37428 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37429, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37430 = torch.aten.mm %37429, %37396 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37430, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34986 = torch.constant.int 4
    %int1792_34987 = torch.constant.int 1792
    %37431 = torch.prim.ListConstruct %int4_34986, %2482, %int1792_34987 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37432 = torch.aten.view %37430, %37431 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34988 = torch.constant.int 4
    %37433 = torch.aten.mul.int %int4_34988, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34989 = torch.constant.int 4096
    %37434 = torch.prim.ListConstruct %37433, %int4096_34989 : (!torch.int, !torch.int) -> !torch.list<int>
    %37435 = torch.aten.view %37312, %37434 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37435, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37436 = torch.aten.mm %37435, %37398 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37436, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34990 = torch.constant.int 4
    %int1792_34991 = torch.constant.int 1792
    %37437 = torch.prim.ListConstruct %int4_34990, %2482, %int1792_34991 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37438 = torch.aten.view %37436, %37437 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34992 = torch.constant.int 4
    %37439 = torch.aten.mul.int %int4_34992, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34993 = torch.constant.int 4096
    %37440 = torch.prim.ListConstruct %37439, %int4096_34993 : (!torch.int, !torch.int) -> !torch.list<int>
    %37441 = torch.aten.view %37313, %37440 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37441, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37442 = torch.aten.mm %37441, %37400 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37442, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34994 = torch.constant.int 4
    %int1792_34995 = torch.constant.int 1792
    %37443 = torch.prim.ListConstruct %int4_34994, %2482, %int1792_34995 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37444 = torch.aten.view %37442, %37443 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_34996 = torch.constant.int 4
    %37445 = torch.aten.mul.int %int4_34996, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_34997 = torch.constant.int 4096
    %37446 = torch.prim.ListConstruct %37445, %int4096_34997 : (!torch.int, !torch.int) -> !torch.list<int>
    %37447 = torch.aten.view %37314, %37446 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37447, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37448 = torch.aten.mm %37447, %37402 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37448, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_34998 = torch.constant.int 4
    %int1792_34999 = torch.constant.int 1792
    %37449 = torch.prim.ListConstruct %int4_34998, %2482, %int1792_34999 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37450 = torch.aten.view %37448, %37449 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37451 = torch.aten.mul.Tensor %37379, %37408 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37452 = torch.aten.mul.Tensor %37380, %37414 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37453 = torch.aten.mul.Tensor %37381, %37420 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37454 = torch.aten.mul.Tensor %37382, %37426 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37455 = torch.aten.mul.Tensor %37383, %37432 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37456 = torch.aten.mul.Tensor %37384, %37438 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37457 = torch.aten.mul.Tensor %37385, %37444 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %37458 = torch.aten.mul.Tensor %37386, %37450 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %37458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_35000 = torch.constant.int 1
    %int0_35001 = torch.constant.int 0
    %37459 = torch.prim.ListConstruct %int1_35000, %int0_35001 : (!torch.int, !torch.int) -> !torch.list<int>
    %37460 = torch.aten.permute %1368, %37459 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35002 = torch.constant.int 1
    %int0_35003 = torch.constant.int 0
    %37461 = torch.prim.ListConstruct %int1_35002, %int0_35003 : (!torch.int, !torch.int) -> !torch.list<int>
    %37462 = torch.aten.permute %1369, %37461 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35004 = torch.constant.int 1
    %int0_35005 = torch.constant.int 0
    %37463 = torch.prim.ListConstruct %int1_35004, %int0_35005 : (!torch.int, !torch.int) -> !torch.list<int>
    %37464 = torch.aten.permute %1370, %37463 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35006 = torch.constant.int 1
    %int0_35007 = torch.constant.int 0
    %37465 = torch.prim.ListConstruct %int1_35006, %int0_35007 : (!torch.int, !torch.int) -> !torch.list<int>
    %37466 = torch.aten.permute %1371, %37465 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35008 = torch.constant.int 1
    %int0_35009 = torch.constant.int 0
    %37467 = torch.prim.ListConstruct %int1_35008, %int0_35009 : (!torch.int, !torch.int) -> !torch.list<int>
    %37468 = torch.aten.permute %1372, %37467 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35010 = torch.constant.int 1
    %int0_35011 = torch.constant.int 0
    %37469 = torch.prim.ListConstruct %int1_35010, %int0_35011 : (!torch.int, !torch.int) -> !torch.list<int>
    %37470 = torch.aten.permute %1373, %37469 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35012 = torch.constant.int 1
    %int0_35013 = torch.constant.int 0
    %37471 = torch.prim.ListConstruct %int1_35012, %int0_35013 : (!torch.int, !torch.int) -> !torch.list<int>
    %37472 = torch.aten.permute %1374, %37471 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35014 = torch.constant.int 1
    %int0_35015 = torch.constant.int 0
    %37473 = torch.prim.ListConstruct %int1_35014, %int0_35015 : (!torch.int, !torch.int) -> !torch.list<int>
    %37474 = torch.aten.permute %1375, %37473 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_35016 = torch.constant.int 1
    %37475 = torch.aten.size.int %37336, %int1_35016 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35017 = torch.constant.int 4
    %37476 = torch.aten.mul.int %int4_35017, %37475 : !torch.int, !torch.int -> !torch.int
    %int1792_35018 = torch.constant.int 1792
    %37477 = torch.prim.ListConstruct %37476, %int1792_35018 : (!torch.int, !torch.int) -> !torch.list<int>
    %37478 = torch.aten.view %37451, %37477 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37478, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37479 = torch.aten.mm %37478, %37460 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37479, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35019 = torch.constant.int 4
    %int4096_35020 = torch.constant.int 4096
    %37480 = torch.prim.ListConstruct %int4_35019, %37475, %int4096_35020 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37481 = torch.aten.view %37479, %37480 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35021 = torch.constant.int 1
    %37482 = torch.aten.size.int %37342, %int1_35021 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35022 = torch.constant.int 4
    %37483 = torch.aten.mul.int %int4_35022, %37482 : !torch.int, !torch.int -> !torch.int
    %int1792_35023 = torch.constant.int 1792
    %37484 = torch.prim.ListConstruct %37483, %int1792_35023 : (!torch.int, !torch.int) -> !torch.list<int>
    %37485 = torch.aten.view %37452, %37484 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37485, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37486 = torch.aten.mm %37485, %37462 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37486, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35024 = torch.constant.int 4
    %int4096_35025 = torch.constant.int 4096
    %37487 = torch.prim.ListConstruct %int4_35024, %37482, %int4096_35025 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37488 = torch.aten.view %37486, %37487 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35026 = torch.constant.int 1
    %37489 = torch.aten.size.int %37348, %int1_35026 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35027 = torch.constant.int 4
    %37490 = torch.aten.mul.int %int4_35027, %37489 : !torch.int, !torch.int -> !torch.int
    %int1792_35028 = torch.constant.int 1792
    %37491 = torch.prim.ListConstruct %37490, %int1792_35028 : (!torch.int, !torch.int) -> !torch.list<int>
    %37492 = torch.aten.view %37453, %37491 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37492, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37493 = torch.aten.mm %37492, %37464 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37493, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35029 = torch.constant.int 4
    %int4096_35030 = torch.constant.int 4096
    %37494 = torch.prim.ListConstruct %int4_35029, %37489, %int4096_35030 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37495 = torch.aten.view %37493, %37494 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35031 = torch.constant.int 1
    %37496 = torch.aten.size.int %37354, %int1_35031 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35032 = torch.constant.int 4
    %37497 = torch.aten.mul.int %int4_35032, %37496 : !torch.int, !torch.int -> !torch.int
    %int1792_35033 = torch.constant.int 1792
    %37498 = torch.prim.ListConstruct %37497, %int1792_35033 : (!torch.int, !torch.int) -> !torch.list<int>
    %37499 = torch.aten.view %37454, %37498 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37499, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37500 = torch.aten.mm %37499, %37466 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37500, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35034 = torch.constant.int 4
    %int4096_35035 = torch.constant.int 4096
    %37501 = torch.prim.ListConstruct %int4_35034, %37496, %int4096_35035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37502 = torch.aten.view %37500, %37501 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35036 = torch.constant.int 1
    %37503 = torch.aten.size.int %37360, %int1_35036 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35037 = torch.constant.int 4
    %37504 = torch.aten.mul.int %int4_35037, %37503 : !torch.int, !torch.int -> !torch.int
    %int1792_35038 = torch.constant.int 1792
    %37505 = torch.prim.ListConstruct %37504, %int1792_35038 : (!torch.int, !torch.int) -> !torch.list<int>
    %37506 = torch.aten.view %37455, %37505 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37506, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37507 = torch.aten.mm %37506, %37468 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37507, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35039 = torch.constant.int 4
    %int4096_35040 = torch.constant.int 4096
    %37508 = torch.prim.ListConstruct %int4_35039, %37503, %int4096_35040 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37509 = torch.aten.view %37507, %37508 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35041 = torch.constant.int 1
    %37510 = torch.aten.size.int %37366, %int1_35041 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35042 = torch.constant.int 4
    %37511 = torch.aten.mul.int %int4_35042, %37510 : !torch.int, !torch.int -> !torch.int
    %int1792_35043 = torch.constant.int 1792
    %37512 = torch.prim.ListConstruct %37511, %int1792_35043 : (!torch.int, !torch.int) -> !torch.list<int>
    %37513 = torch.aten.view %37456, %37512 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37513, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37514 = torch.aten.mm %37513, %37470 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37514, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35044 = torch.constant.int 4
    %int4096_35045 = torch.constant.int 4096
    %37515 = torch.prim.ListConstruct %int4_35044, %37510, %int4096_35045 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37516 = torch.aten.view %37514, %37515 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35046 = torch.constant.int 1
    %37517 = torch.aten.size.int %37372, %int1_35046 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35047 = torch.constant.int 4
    %37518 = torch.aten.mul.int %int4_35047, %37517 : !torch.int, !torch.int -> !torch.int
    %int1792_35048 = torch.constant.int 1792
    %37519 = torch.prim.ListConstruct %37518, %int1792_35048 : (!torch.int, !torch.int) -> !torch.list<int>
    %37520 = torch.aten.view %37457, %37519 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37520, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37521 = torch.aten.mm %37520, %37472 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37521, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35049 = torch.constant.int 4
    %int4096_35050 = torch.constant.int 4096
    %37522 = torch.prim.ListConstruct %int4_35049, %37517, %int4096_35050 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37523 = torch.aten.view %37521, %37522 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35051 = torch.constant.int 1
    %37524 = torch.aten.size.int %37378, %int1_35051 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_35052 = torch.constant.int 4
    %37525 = torch.aten.mul.int %int4_35052, %37524 : !torch.int, !torch.int -> !torch.int
    %int1792_35053 = torch.constant.int 1792
    %37526 = torch.prim.ListConstruct %37525, %int1792_35053 : (!torch.int, !torch.int) -> !torch.list<int>
    %37527 = torch.aten.view %37458, %37526 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %37527, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %37528 = torch.aten.mm %37527, %37474 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37528, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_35054 = torch.constant.int 4
    %int4096_35055 = torch.constant.int 4096
    %37529 = torch.prim.ListConstruct %int4_35054, %37524, %int4096_35055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37530 = torch.aten.view %37528, %37529 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37531 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35056 = arith.constant 1 : index
    %dim_35057 = tensor.dim %37531, %c1_35056 : tensor<4x?x4096xf16>
    %37532 = flow.tensor.transfer %37531 : tensor<4x?x4096xf16>{%dim_35057} to #hal.device.promise<@__device_0>
    %37533 = torch_c.from_builtin_tensor %37532 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37534 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35058 = arith.constant 1 : index
    %dim_35059 = tensor.dim %37534, %c1_35058 : tensor<4x?x4096xf16>
    %37535 = flow.tensor.transfer %37534 : tensor<4x?x4096xf16>{%dim_35059} to #hal.device.promise<@__device_0>
    %37536 = torch_c.from_builtin_tensor %37535 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37537 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35060 = arith.constant 1 : index
    %dim_35061 = tensor.dim %37537, %c1_35060 : tensor<4x?x4096xf16>
    %37538 = flow.tensor.transfer %37537 : tensor<4x?x4096xf16>{%dim_35061} to #hal.device.promise<@__device_0>
    %37539 = torch_c.from_builtin_tensor %37538 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37540 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35062 = arith.constant 1 : index
    %dim_35063 = tensor.dim %37540, %c1_35062 : tensor<4x?x4096xf16>
    %37541 = flow.tensor.transfer %37540 : tensor<4x?x4096xf16>{%dim_35063} to #hal.device.promise<@__device_0>
    %37542 = torch_c.from_builtin_tensor %37541 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37543 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35064 = arith.constant 1 : index
    %dim_35065 = tensor.dim %37543, %c1_35064 : tensor<4x?x4096xf16>
    %37544 = flow.tensor.transfer %37543 : tensor<4x?x4096xf16>{%dim_35065} to #hal.device.promise<@__device_0>
    %37545 = torch_c.from_builtin_tensor %37544 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37546 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35066 = arith.constant 1 : index
    %dim_35067 = tensor.dim %37546, %c1_35066 : tensor<4x?x4096xf16>
    %37547 = flow.tensor.transfer %37546 : tensor<4x?x4096xf16>{%dim_35067} to #hal.device.promise<@__device_0>
    %37548 = torch_c.from_builtin_tensor %37547 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37549 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35068 = arith.constant 1 : index
    %dim_35069 = tensor.dim %37549, %c1_35068 : tensor<4x?x4096xf16>
    %37550 = flow.tensor.transfer %37549 : tensor<4x?x4096xf16>{%dim_35069} to #hal.device.promise<@__device_0>
    %37551 = torch_c.from_builtin_tensor %37550 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35070 = torch.constant.int 1
    %37552 = torch.aten.add.Tensor %37481, %37533, %int1_35070 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35071 = torch.constant.int 1
    %37553 = torch.aten.add.Tensor %37552, %37536, %int1_35071 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35072 = torch.constant.int 1
    %37554 = torch.aten.add.Tensor %37553, %37539, %int1_35072 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35073 = torch.constant.int 1
    %37555 = torch.aten.add.Tensor %37554, %37542, %int1_35073 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35074 = torch.constant.int 1
    %37556 = torch.aten.add.Tensor %37555, %37545, %int1_35074 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35075 = torch.constant.int 1
    %37557 = torch.aten.add.Tensor %37556, %37548, %int1_35075 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35076 = torch.constant.int 1
    %37558 = torch.aten.add.Tensor %37557, %37551, %int1_35076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37559 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35077 = arith.constant 1 : index
    %dim_35078 = tensor.dim %37559, %c1_35077 : tensor<4x?x4096xf16>
    %37560 = flow.tensor.transfer %37559 : tensor<4x?x4096xf16>{%dim_35078} to #hal.device.promise<@__device_1>
    %37561 = torch_c.from_builtin_tensor %37560 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37562 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35079 = arith.constant 1 : index
    %dim_35080 = tensor.dim %37562, %c1_35079 : tensor<4x?x4096xf16>
    %37563 = flow.tensor.transfer %37562 : tensor<4x?x4096xf16>{%dim_35080} to #hal.device.promise<@__device_1>
    %37564 = torch_c.from_builtin_tensor %37563 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37565 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35081 = arith.constant 1 : index
    %dim_35082 = tensor.dim %37565, %c1_35081 : tensor<4x?x4096xf16>
    %37566 = flow.tensor.transfer %37565 : tensor<4x?x4096xf16>{%dim_35082} to #hal.device.promise<@__device_1>
    %37567 = torch_c.from_builtin_tensor %37566 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37568 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35083 = arith.constant 1 : index
    %dim_35084 = tensor.dim %37568, %c1_35083 : tensor<4x?x4096xf16>
    %37569 = flow.tensor.transfer %37568 : tensor<4x?x4096xf16>{%dim_35084} to #hal.device.promise<@__device_1>
    %37570 = torch_c.from_builtin_tensor %37569 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37571 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35085 = arith.constant 1 : index
    %dim_35086 = tensor.dim %37571, %c1_35085 : tensor<4x?x4096xf16>
    %37572 = flow.tensor.transfer %37571 : tensor<4x?x4096xf16>{%dim_35086} to #hal.device.promise<@__device_1>
    %37573 = torch_c.from_builtin_tensor %37572 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37574 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35087 = arith.constant 1 : index
    %dim_35088 = tensor.dim %37574, %c1_35087 : tensor<4x?x4096xf16>
    %37575 = flow.tensor.transfer %37574 : tensor<4x?x4096xf16>{%dim_35088} to #hal.device.promise<@__device_1>
    %37576 = torch_c.from_builtin_tensor %37575 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37577 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35089 = arith.constant 1 : index
    %dim_35090 = tensor.dim %37577, %c1_35089 : tensor<4x?x4096xf16>
    %37578 = flow.tensor.transfer %37577 : tensor<4x?x4096xf16>{%dim_35090} to #hal.device.promise<@__device_1>
    %37579 = torch_c.from_builtin_tensor %37578 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35091 = torch.constant.int 1
    %37580 = torch.aten.add.Tensor %37561, %37488, %int1_35091 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35092 = torch.constant.int 1
    %37581 = torch.aten.add.Tensor %37580, %37564, %int1_35092 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35093 = torch.constant.int 1
    %37582 = torch.aten.add.Tensor %37581, %37567, %int1_35093 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35094 = torch.constant.int 1
    %37583 = torch.aten.add.Tensor %37582, %37570, %int1_35094 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35095 = torch.constant.int 1
    %37584 = torch.aten.add.Tensor %37583, %37573, %int1_35095 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35096 = torch.constant.int 1
    %37585 = torch.aten.add.Tensor %37584, %37576, %int1_35096 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35097 = torch.constant.int 1
    %37586 = torch.aten.add.Tensor %37585, %37579, %int1_35097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37587 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35098 = arith.constant 1 : index
    %dim_35099 = tensor.dim %37587, %c1_35098 : tensor<4x?x4096xf16>
    %37588 = flow.tensor.transfer %37587 : tensor<4x?x4096xf16>{%dim_35099} to #hal.device.promise<@__device_2>
    %37589 = torch_c.from_builtin_tensor %37588 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37590 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35100 = arith.constant 1 : index
    %dim_35101 = tensor.dim %37590, %c1_35100 : tensor<4x?x4096xf16>
    %37591 = flow.tensor.transfer %37590 : tensor<4x?x4096xf16>{%dim_35101} to #hal.device.promise<@__device_2>
    %37592 = torch_c.from_builtin_tensor %37591 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37593 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35102 = arith.constant 1 : index
    %dim_35103 = tensor.dim %37593, %c1_35102 : tensor<4x?x4096xf16>
    %37594 = flow.tensor.transfer %37593 : tensor<4x?x4096xf16>{%dim_35103} to #hal.device.promise<@__device_2>
    %37595 = torch_c.from_builtin_tensor %37594 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37596 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35104 = arith.constant 1 : index
    %dim_35105 = tensor.dim %37596, %c1_35104 : tensor<4x?x4096xf16>
    %37597 = flow.tensor.transfer %37596 : tensor<4x?x4096xf16>{%dim_35105} to #hal.device.promise<@__device_2>
    %37598 = torch_c.from_builtin_tensor %37597 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37599 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35106 = arith.constant 1 : index
    %dim_35107 = tensor.dim %37599, %c1_35106 : tensor<4x?x4096xf16>
    %37600 = flow.tensor.transfer %37599 : tensor<4x?x4096xf16>{%dim_35107} to #hal.device.promise<@__device_2>
    %37601 = torch_c.from_builtin_tensor %37600 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37602 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35108 = arith.constant 1 : index
    %dim_35109 = tensor.dim %37602, %c1_35108 : tensor<4x?x4096xf16>
    %37603 = flow.tensor.transfer %37602 : tensor<4x?x4096xf16>{%dim_35109} to #hal.device.promise<@__device_2>
    %37604 = torch_c.from_builtin_tensor %37603 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37605 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35110 = arith.constant 1 : index
    %dim_35111 = tensor.dim %37605, %c1_35110 : tensor<4x?x4096xf16>
    %37606 = flow.tensor.transfer %37605 : tensor<4x?x4096xf16>{%dim_35111} to #hal.device.promise<@__device_2>
    %37607 = torch_c.from_builtin_tensor %37606 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35112 = torch.constant.int 1
    %37608 = torch.aten.add.Tensor %37589, %37592, %int1_35112 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35113 = torch.constant.int 1
    %37609 = torch.aten.add.Tensor %37608, %37495, %int1_35113 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35114 = torch.constant.int 1
    %37610 = torch.aten.add.Tensor %37609, %37595, %int1_35114 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35115 = torch.constant.int 1
    %37611 = torch.aten.add.Tensor %37610, %37598, %int1_35115 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35116 = torch.constant.int 1
    %37612 = torch.aten.add.Tensor %37611, %37601, %int1_35116 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35117 = torch.constant.int 1
    %37613 = torch.aten.add.Tensor %37612, %37604, %int1_35117 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35118 = torch.constant.int 1
    %37614 = torch.aten.add.Tensor %37613, %37607, %int1_35118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37615 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35119 = arith.constant 1 : index
    %dim_35120 = tensor.dim %37615, %c1_35119 : tensor<4x?x4096xf16>
    %37616 = flow.tensor.transfer %37615 : tensor<4x?x4096xf16>{%dim_35120} to #hal.device.promise<@__device_3>
    %37617 = torch_c.from_builtin_tensor %37616 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37618 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35121 = arith.constant 1 : index
    %dim_35122 = tensor.dim %37618, %c1_35121 : tensor<4x?x4096xf16>
    %37619 = flow.tensor.transfer %37618 : tensor<4x?x4096xf16>{%dim_35122} to #hal.device.promise<@__device_3>
    %37620 = torch_c.from_builtin_tensor %37619 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37621 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35123 = arith.constant 1 : index
    %dim_35124 = tensor.dim %37621, %c1_35123 : tensor<4x?x4096xf16>
    %37622 = flow.tensor.transfer %37621 : tensor<4x?x4096xf16>{%dim_35124} to #hal.device.promise<@__device_3>
    %37623 = torch_c.from_builtin_tensor %37622 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37624 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35125 = arith.constant 1 : index
    %dim_35126 = tensor.dim %37624, %c1_35125 : tensor<4x?x4096xf16>
    %37625 = flow.tensor.transfer %37624 : tensor<4x?x4096xf16>{%dim_35126} to #hal.device.promise<@__device_3>
    %37626 = torch_c.from_builtin_tensor %37625 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37627 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35127 = arith.constant 1 : index
    %dim_35128 = tensor.dim %37627, %c1_35127 : tensor<4x?x4096xf16>
    %37628 = flow.tensor.transfer %37627 : tensor<4x?x4096xf16>{%dim_35128} to #hal.device.promise<@__device_3>
    %37629 = torch_c.from_builtin_tensor %37628 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37630 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35129 = arith.constant 1 : index
    %dim_35130 = tensor.dim %37630, %c1_35129 : tensor<4x?x4096xf16>
    %37631 = flow.tensor.transfer %37630 : tensor<4x?x4096xf16>{%dim_35130} to #hal.device.promise<@__device_3>
    %37632 = torch_c.from_builtin_tensor %37631 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37633 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35131 = arith.constant 1 : index
    %dim_35132 = tensor.dim %37633, %c1_35131 : tensor<4x?x4096xf16>
    %37634 = flow.tensor.transfer %37633 : tensor<4x?x4096xf16>{%dim_35132} to #hal.device.promise<@__device_3>
    %37635 = torch_c.from_builtin_tensor %37634 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35133 = torch.constant.int 1
    %37636 = torch.aten.add.Tensor %37617, %37620, %int1_35133 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35134 = torch.constant.int 1
    %37637 = torch.aten.add.Tensor %37636, %37623, %int1_35134 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35135 = torch.constant.int 1
    %37638 = torch.aten.add.Tensor %37637, %37502, %int1_35135 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35136 = torch.constant.int 1
    %37639 = torch.aten.add.Tensor %37638, %37626, %int1_35136 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35137 = torch.constant.int 1
    %37640 = torch.aten.add.Tensor %37639, %37629, %int1_35137 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35138 = torch.constant.int 1
    %37641 = torch.aten.add.Tensor %37640, %37632, %int1_35138 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35139 = torch.constant.int 1
    %37642 = torch.aten.add.Tensor %37641, %37635, %int1_35139 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37643 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35140 = arith.constant 1 : index
    %dim_35141 = tensor.dim %37643, %c1_35140 : tensor<4x?x4096xf16>
    %37644 = flow.tensor.transfer %37643 : tensor<4x?x4096xf16>{%dim_35141} to #hal.device.promise<@__device_4>
    %37645 = torch_c.from_builtin_tensor %37644 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37646 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35142 = arith.constant 1 : index
    %dim_35143 = tensor.dim %37646, %c1_35142 : tensor<4x?x4096xf16>
    %37647 = flow.tensor.transfer %37646 : tensor<4x?x4096xf16>{%dim_35143} to #hal.device.promise<@__device_4>
    %37648 = torch_c.from_builtin_tensor %37647 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37649 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35144 = arith.constant 1 : index
    %dim_35145 = tensor.dim %37649, %c1_35144 : tensor<4x?x4096xf16>
    %37650 = flow.tensor.transfer %37649 : tensor<4x?x4096xf16>{%dim_35145} to #hal.device.promise<@__device_4>
    %37651 = torch_c.from_builtin_tensor %37650 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37652 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35146 = arith.constant 1 : index
    %dim_35147 = tensor.dim %37652, %c1_35146 : tensor<4x?x4096xf16>
    %37653 = flow.tensor.transfer %37652 : tensor<4x?x4096xf16>{%dim_35147} to #hal.device.promise<@__device_4>
    %37654 = torch_c.from_builtin_tensor %37653 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37655 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35148 = arith.constant 1 : index
    %dim_35149 = tensor.dim %37655, %c1_35148 : tensor<4x?x4096xf16>
    %37656 = flow.tensor.transfer %37655 : tensor<4x?x4096xf16>{%dim_35149} to #hal.device.promise<@__device_4>
    %37657 = torch_c.from_builtin_tensor %37656 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37658 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35150 = arith.constant 1 : index
    %dim_35151 = tensor.dim %37658, %c1_35150 : tensor<4x?x4096xf16>
    %37659 = flow.tensor.transfer %37658 : tensor<4x?x4096xf16>{%dim_35151} to #hal.device.promise<@__device_4>
    %37660 = torch_c.from_builtin_tensor %37659 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37661 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35152 = arith.constant 1 : index
    %dim_35153 = tensor.dim %37661, %c1_35152 : tensor<4x?x4096xf16>
    %37662 = flow.tensor.transfer %37661 : tensor<4x?x4096xf16>{%dim_35153} to #hal.device.promise<@__device_4>
    %37663 = torch_c.from_builtin_tensor %37662 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35154 = torch.constant.int 1
    %37664 = torch.aten.add.Tensor %37645, %37648, %int1_35154 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35155 = torch.constant.int 1
    %37665 = torch.aten.add.Tensor %37664, %37651, %int1_35155 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35156 = torch.constant.int 1
    %37666 = torch.aten.add.Tensor %37665, %37654, %int1_35156 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35157 = torch.constant.int 1
    %37667 = torch.aten.add.Tensor %37666, %37509, %int1_35157 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35158 = torch.constant.int 1
    %37668 = torch.aten.add.Tensor %37667, %37657, %int1_35158 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35159 = torch.constant.int 1
    %37669 = torch.aten.add.Tensor %37668, %37660, %int1_35159 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35160 = torch.constant.int 1
    %37670 = torch.aten.add.Tensor %37669, %37663, %int1_35160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37671 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35161 = arith.constant 1 : index
    %dim_35162 = tensor.dim %37671, %c1_35161 : tensor<4x?x4096xf16>
    %37672 = flow.tensor.transfer %37671 : tensor<4x?x4096xf16>{%dim_35162} to #hal.device.promise<@__device_5>
    %37673 = torch_c.from_builtin_tensor %37672 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37674 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35163 = arith.constant 1 : index
    %dim_35164 = tensor.dim %37674, %c1_35163 : tensor<4x?x4096xf16>
    %37675 = flow.tensor.transfer %37674 : tensor<4x?x4096xf16>{%dim_35164} to #hal.device.promise<@__device_5>
    %37676 = torch_c.from_builtin_tensor %37675 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37677 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35165 = arith.constant 1 : index
    %dim_35166 = tensor.dim %37677, %c1_35165 : tensor<4x?x4096xf16>
    %37678 = flow.tensor.transfer %37677 : tensor<4x?x4096xf16>{%dim_35166} to #hal.device.promise<@__device_5>
    %37679 = torch_c.from_builtin_tensor %37678 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37680 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35167 = arith.constant 1 : index
    %dim_35168 = tensor.dim %37680, %c1_35167 : tensor<4x?x4096xf16>
    %37681 = flow.tensor.transfer %37680 : tensor<4x?x4096xf16>{%dim_35168} to #hal.device.promise<@__device_5>
    %37682 = torch_c.from_builtin_tensor %37681 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37683 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35169 = arith.constant 1 : index
    %dim_35170 = tensor.dim %37683, %c1_35169 : tensor<4x?x4096xf16>
    %37684 = flow.tensor.transfer %37683 : tensor<4x?x4096xf16>{%dim_35170} to #hal.device.promise<@__device_5>
    %37685 = torch_c.from_builtin_tensor %37684 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37686 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35171 = arith.constant 1 : index
    %dim_35172 = tensor.dim %37686, %c1_35171 : tensor<4x?x4096xf16>
    %37687 = flow.tensor.transfer %37686 : tensor<4x?x4096xf16>{%dim_35172} to #hal.device.promise<@__device_5>
    %37688 = torch_c.from_builtin_tensor %37687 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37689 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35173 = arith.constant 1 : index
    %dim_35174 = tensor.dim %37689, %c1_35173 : tensor<4x?x4096xf16>
    %37690 = flow.tensor.transfer %37689 : tensor<4x?x4096xf16>{%dim_35174} to #hal.device.promise<@__device_5>
    %37691 = torch_c.from_builtin_tensor %37690 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35175 = torch.constant.int 1
    %37692 = torch.aten.add.Tensor %37673, %37676, %int1_35175 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35176 = torch.constant.int 1
    %37693 = torch.aten.add.Tensor %37692, %37679, %int1_35176 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35177 = torch.constant.int 1
    %37694 = torch.aten.add.Tensor %37693, %37682, %int1_35177 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35178 = torch.constant.int 1
    %37695 = torch.aten.add.Tensor %37694, %37685, %int1_35178 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35179 = torch.constant.int 1
    %37696 = torch.aten.add.Tensor %37695, %37516, %int1_35179 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35180 = torch.constant.int 1
    %37697 = torch.aten.add.Tensor %37696, %37688, %int1_35180 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35181 = torch.constant.int 1
    %37698 = torch.aten.add.Tensor %37697, %37691, %int1_35181 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37699 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35182 = arith.constant 1 : index
    %dim_35183 = tensor.dim %37699, %c1_35182 : tensor<4x?x4096xf16>
    %37700 = flow.tensor.transfer %37699 : tensor<4x?x4096xf16>{%dim_35183} to #hal.device.promise<@__device_6>
    %37701 = torch_c.from_builtin_tensor %37700 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37702 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35184 = arith.constant 1 : index
    %dim_35185 = tensor.dim %37702, %c1_35184 : tensor<4x?x4096xf16>
    %37703 = flow.tensor.transfer %37702 : tensor<4x?x4096xf16>{%dim_35185} to #hal.device.promise<@__device_6>
    %37704 = torch_c.from_builtin_tensor %37703 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37705 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35186 = arith.constant 1 : index
    %dim_35187 = tensor.dim %37705, %c1_35186 : tensor<4x?x4096xf16>
    %37706 = flow.tensor.transfer %37705 : tensor<4x?x4096xf16>{%dim_35187} to #hal.device.promise<@__device_6>
    %37707 = torch_c.from_builtin_tensor %37706 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37708 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35188 = arith.constant 1 : index
    %dim_35189 = tensor.dim %37708, %c1_35188 : tensor<4x?x4096xf16>
    %37709 = flow.tensor.transfer %37708 : tensor<4x?x4096xf16>{%dim_35189} to #hal.device.promise<@__device_6>
    %37710 = torch_c.from_builtin_tensor %37709 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37711 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35190 = arith.constant 1 : index
    %dim_35191 = tensor.dim %37711, %c1_35190 : tensor<4x?x4096xf16>
    %37712 = flow.tensor.transfer %37711 : tensor<4x?x4096xf16>{%dim_35191} to #hal.device.promise<@__device_6>
    %37713 = torch_c.from_builtin_tensor %37712 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37714 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35192 = arith.constant 1 : index
    %dim_35193 = tensor.dim %37714, %c1_35192 : tensor<4x?x4096xf16>
    %37715 = flow.tensor.transfer %37714 : tensor<4x?x4096xf16>{%dim_35193} to #hal.device.promise<@__device_6>
    %37716 = torch_c.from_builtin_tensor %37715 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37717 = torch_c.to_builtin_tensor %37530 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35194 = arith.constant 1 : index
    %dim_35195 = tensor.dim %37717, %c1_35194 : tensor<4x?x4096xf16>
    %37718 = flow.tensor.transfer %37717 : tensor<4x?x4096xf16>{%dim_35195} to #hal.device.promise<@__device_6>
    %37719 = torch_c.from_builtin_tensor %37718 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35196 = torch.constant.int 1
    %37720 = torch.aten.add.Tensor %37701, %37704, %int1_35196 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35197 = torch.constant.int 1
    %37721 = torch.aten.add.Tensor %37720, %37707, %int1_35197 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35198 = torch.constant.int 1
    %37722 = torch.aten.add.Tensor %37721, %37710, %int1_35198 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35199 = torch.constant.int 1
    %37723 = torch.aten.add.Tensor %37722, %37713, %int1_35199 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35200 = torch.constant.int 1
    %37724 = torch.aten.add.Tensor %37723, %37716, %int1_35200 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35201 = torch.constant.int 1
    %37725 = torch.aten.add.Tensor %37724, %37523, %int1_35201 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35202 = torch.constant.int 1
    %37726 = torch.aten.add.Tensor %37725, %37719, %int1_35202 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37727 = torch_c.to_builtin_tensor %37481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35203 = arith.constant 1 : index
    %dim_35204 = tensor.dim %37727, %c1_35203 : tensor<4x?x4096xf16>
    %37728 = flow.tensor.transfer %37727 : tensor<4x?x4096xf16>{%dim_35204} to #hal.device.promise<@__device_7>
    %37729 = torch_c.from_builtin_tensor %37728 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37730 = torch_c.to_builtin_tensor %37488 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35205 = arith.constant 1 : index
    %dim_35206 = tensor.dim %37730, %c1_35205 : tensor<4x?x4096xf16>
    %37731 = flow.tensor.transfer %37730 : tensor<4x?x4096xf16>{%dim_35206} to #hal.device.promise<@__device_7>
    %37732 = torch_c.from_builtin_tensor %37731 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37733 = torch_c.to_builtin_tensor %37495 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35207 = arith.constant 1 : index
    %dim_35208 = tensor.dim %37733, %c1_35207 : tensor<4x?x4096xf16>
    %37734 = flow.tensor.transfer %37733 : tensor<4x?x4096xf16>{%dim_35208} to #hal.device.promise<@__device_7>
    %37735 = torch_c.from_builtin_tensor %37734 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37736 = torch_c.to_builtin_tensor %37502 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35209 = arith.constant 1 : index
    %dim_35210 = tensor.dim %37736, %c1_35209 : tensor<4x?x4096xf16>
    %37737 = flow.tensor.transfer %37736 : tensor<4x?x4096xf16>{%dim_35210} to #hal.device.promise<@__device_7>
    %37738 = torch_c.from_builtin_tensor %37737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37739 = torch_c.to_builtin_tensor %37509 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35211 = arith.constant 1 : index
    %dim_35212 = tensor.dim %37739, %c1_35211 : tensor<4x?x4096xf16>
    %37740 = flow.tensor.transfer %37739 : tensor<4x?x4096xf16>{%dim_35212} to #hal.device.promise<@__device_7>
    %37741 = torch_c.from_builtin_tensor %37740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37742 = torch_c.to_builtin_tensor %37516 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35213 = arith.constant 1 : index
    %dim_35214 = tensor.dim %37742, %c1_35213 : tensor<4x?x4096xf16>
    %37743 = flow.tensor.transfer %37742 : tensor<4x?x4096xf16>{%dim_35214} to #hal.device.promise<@__device_7>
    %37744 = torch_c.from_builtin_tensor %37743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %37745 = torch_c.to_builtin_tensor %37523 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_35215 = arith.constant 1 : index
    %dim_35216 = tensor.dim %37745, %c1_35215 : tensor<4x?x4096xf16>
    %37746 = flow.tensor.transfer %37745 : tensor<4x?x4096xf16>{%dim_35216} to #hal.device.promise<@__device_7>
    %37747 = torch_c.from_builtin_tensor %37746 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35217 = torch.constant.int 1
    %37748 = torch.aten.add.Tensor %37729, %37732, %int1_35217 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35218 = torch.constant.int 1
    %37749 = torch.aten.add.Tensor %37748, %37735, %int1_35218 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35219 = torch.constant.int 1
    %37750 = torch.aten.add.Tensor %37749, %37738, %int1_35219 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35220 = torch.constant.int 1
    %37751 = torch.aten.add.Tensor %37750, %37741, %int1_35220 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35221 = torch.constant.int 1
    %37752 = torch.aten.add.Tensor %37751, %37744, %int1_35221 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35222 = torch.constant.int 1
    %37753 = torch.aten.add.Tensor %37752, %37747, %int1_35222 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35223 = torch.constant.int 1
    %37754 = torch.aten.add.Tensor %37753, %37530, %int1_35223 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35224 = torch.constant.int 1
    %37755 = torch.aten.add.Tensor %37235, %37558, %int1_35224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35225 = torch.constant.int 1
    %37756 = torch.aten.add.Tensor %37236, %37586, %int1_35225 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35226 = torch.constant.int 1
    %37757 = torch.aten.add.Tensor %37237, %37614, %int1_35226 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35227 = torch.constant.int 1
    %37758 = torch.aten.add.Tensor %37238, %37642, %int1_35227 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35228 = torch.constant.int 1
    %37759 = torch.aten.add.Tensor %37239, %37670, %int1_35228 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35229 = torch.constant.int 1
    %37760 = torch.aten.add.Tensor %37240, %37698, %int1_35229 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35230 = torch.constant.int 1
    %37761 = torch.aten.add.Tensor %37241, %37726, %int1_35230 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35231 = torch.constant.int 1
    %37762 = torch.aten.add.Tensor %37242, %37754, %int1_35231 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_35232 = torch.constant.int 6
    %37763 = torch.prims.convert_element_type %37755, %int6_35232 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35233 = torch.constant.int 6
    %37764 = torch.prims.convert_element_type %37756, %int6_35233 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35234 = torch.constant.int 6
    %37765 = torch.prims.convert_element_type %37757, %int6_35234 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35235 = torch.constant.int 6
    %37766 = torch.prims.convert_element_type %37758, %int6_35235 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35236 = torch.constant.int 6
    %37767 = torch.prims.convert_element_type %37759, %int6_35236 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35237 = torch.constant.int 6
    %37768 = torch.prims.convert_element_type %37760, %int6_35237 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35238 = torch.constant.int 6
    %37769 = torch.prims.convert_element_type %37761, %int6_35238 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_35239 = torch.constant.int 6
    %37770 = torch.prims.convert_element_type %37762, %int6_35239 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35240 = torch.constant.int 2
    %37771 = torch.aten.pow.Tensor_Scalar %37763, %int2_35240 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35241 = torch.constant.int 2
    %37772 = torch.aten.pow.Tensor_Scalar %37764, %int2_35241 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35242 = torch.constant.int 2
    %37773 = torch.aten.pow.Tensor_Scalar %37765, %int2_35242 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35243 = torch.constant.int 2
    %37774 = torch.aten.pow.Tensor_Scalar %37766, %int2_35243 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35244 = torch.constant.int 2
    %37775 = torch.aten.pow.Tensor_Scalar %37767, %int2_35244 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35245 = torch.constant.int 2
    %37776 = torch.aten.pow.Tensor_Scalar %37768, %int2_35245 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35246 = torch.constant.int 2
    %37777 = torch.aten.pow.Tensor_Scalar %37769, %int2_35246 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_35247 = torch.constant.int 2
    %37778 = torch.aten.pow.Tensor_Scalar %37770, %int2_35247 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_35248 = torch.constant.int -1
    %37779 = torch.prim.ListConstruct %int-1_35248 : (!torch.int) -> !torch.list<int>
    %true_35249 = torch.constant.bool true
    %none_35250 = torch.constant.none
    %37780 = torch.aten.mean.dim %37771, %37779, %true_35249, %none_35250 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35251 = torch.constant.int -1
    %37781 = torch.prim.ListConstruct %int-1_35251 : (!torch.int) -> !torch.list<int>
    %true_35252 = torch.constant.bool true
    %none_35253 = torch.constant.none
    %37782 = torch.aten.mean.dim %37772, %37781, %true_35252, %none_35253 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35254 = torch.constant.int -1
    %37783 = torch.prim.ListConstruct %int-1_35254 : (!torch.int) -> !torch.list<int>
    %true_35255 = torch.constant.bool true
    %none_35256 = torch.constant.none
    %37784 = torch.aten.mean.dim %37773, %37783, %true_35255, %none_35256 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35257 = torch.constant.int -1
    %37785 = torch.prim.ListConstruct %int-1_35257 : (!torch.int) -> !torch.list<int>
    %true_35258 = torch.constant.bool true
    %none_35259 = torch.constant.none
    %37786 = torch.aten.mean.dim %37774, %37785, %true_35258, %none_35259 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35260 = torch.constant.int -1
    %37787 = torch.prim.ListConstruct %int-1_35260 : (!torch.int) -> !torch.list<int>
    %true_35261 = torch.constant.bool true
    %none_35262 = torch.constant.none
    %37788 = torch.aten.mean.dim %37775, %37787, %true_35261, %none_35262 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35263 = torch.constant.int -1
    %37789 = torch.prim.ListConstruct %int-1_35263 : (!torch.int) -> !torch.list<int>
    %true_35264 = torch.constant.bool true
    %none_35265 = torch.constant.none
    %37790 = torch.aten.mean.dim %37776, %37789, %true_35264, %none_35265 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35266 = torch.constant.int -1
    %37791 = torch.prim.ListConstruct %int-1_35266 : (!torch.int) -> !torch.list<int>
    %true_35267 = torch.constant.bool true
    %none_35268 = torch.constant.none
    %37792 = torch.aten.mean.dim %37777, %37791, %true_35267, %none_35268 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_35269 = torch.constant.int -1
    %37793 = torch.prim.ListConstruct %int-1_35269 : (!torch.int) -> !torch.list<int>
    %true_35270 = torch.constant.bool true
    %none_35271 = torch.constant.none
    %37794 = torch.aten.mean.dim %37778, %37793, %true_35270, %none_35271 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35272 = torch.constant.float 9.9999997473787516E-6
    %int1_35273 = torch.constant.int 1
    %37795 = torch.aten.add.Scalar %37780, %float9.999990e-06_35272, %int1_35273 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35274 = torch.constant.float 9.9999997473787516E-6
    %int1_35275 = torch.constant.int 1
    %37796 = torch.aten.add.Scalar %37782, %float9.999990e-06_35274, %int1_35275 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35276 = torch.constant.float 9.9999997473787516E-6
    %int1_35277 = torch.constant.int 1
    %37797 = torch.aten.add.Scalar %37784, %float9.999990e-06_35276, %int1_35277 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35278 = torch.constant.float 9.9999997473787516E-6
    %int1_35279 = torch.constant.int 1
    %37798 = torch.aten.add.Scalar %37786, %float9.999990e-06_35278, %int1_35279 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35280 = torch.constant.float 9.9999997473787516E-6
    %int1_35281 = torch.constant.int 1
    %37799 = torch.aten.add.Scalar %37788, %float9.999990e-06_35280, %int1_35281 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35282 = torch.constant.float 9.9999997473787516E-6
    %int1_35283 = torch.constant.int 1
    %37800 = torch.aten.add.Scalar %37790, %float9.999990e-06_35282, %int1_35283 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35284 = torch.constant.float 9.9999997473787516E-6
    %int1_35285 = torch.constant.int 1
    %37801 = torch.aten.add.Scalar %37792, %float9.999990e-06_35284, %int1_35285 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_35286 = torch.constant.float 9.9999997473787516E-6
    %int1_35287 = torch.constant.int 1
    %37802 = torch.aten.add.Scalar %37794, %float9.999990e-06_35286, %int1_35287 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37803 = torch.aten.rsqrt %37795 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37804 = torch.aten.rsqrt %37796 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37805 = torch.aten.rsqrt %37797 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37806 = torch.aten.rsqrt %37798 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37807 = torch.aten.rsqrt %37799 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37808 = torch.aten.rsqrt %37800 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37809 = torch.aten.rsqrt %37801 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37810 = torch.aten.rsqrt %37802 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %37810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %37811 = torch.aten.mul.Tensor %37763, %37803 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37812 = torch.aten.mul.Tensor %37764, %37804 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37813 = torch.aten.mul.Tensor %37765, %37805 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37814 = torch.aten.mul.Tensor %37766, %37806 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37815 = torch.aten.mul.Tensor %37767, %37807 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37816 = torch.aten.mul.Tensor %37768, %37808 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37817 = torch.aten.mul.Tensor %37769, %37809 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37818 = torch.aten.mul.Tensor %37770, %37810 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37819 = torch.aten.mul.Tensor %1376, %37811 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37820 = torch.aten.mul.Tensor %1377, %37812 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37821 = torch.aten.mul.Tensor %1378, %37813 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37822 = torch.aten.mul.Tensor %1379, %37814 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37823 = torch.aten.mul.Tensor %1380, %37815 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37824 = torch.aten.mul.Tensor %1381, %37816 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37825 = torch.aten.mul.Tensor %1382, %37817 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %37826 = torch.aten.mul.Tensor %1383, %37818 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %37826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_35288 = torch.constant.int 5
    %37827 = torch.prims.convert_element_type %37819, %int5_35288 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35289 = torch.constant.int 5
    %37828 = torch.prims.convert_element_type %37820, %int5_35289 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35290 = torch.constant.int 5
    %37829 = torch.prims.convert_element_type %37821, %int5_35290 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35291 = torch.constant.int 5
    %37830 = torch.prims.convert_element_type %37822, %int5_35291 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35292 = torch.constant.int 5
    %37831 = torch.prims.convert_element_type %37823, %int5_35292 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35293 = torch.constant.int 5
    %37832 = torch.prims.convert_element_type %37824, %int5_35293 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35294 = torch.constant.int 5
    %37833 = torch.prims.convert_element_type %37825, %int5_35294 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_35295 = torch.constant.int 5
    %37834 = torch.prims.convert_element_type %37826, %int5_35295 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %37834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_35296 = torch.constant.int 1
    %int0_35297 = torch.constant.int 0
    %37835 = torch.prim.ListConstruct %int1_35296, %int0_35297 : (!torch.int, !torch.int) -> !torch.list<int>
    %37836 = torch.aten.permute %1384, %37835 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35298 = torch.constant.int 1
    %int0_35299 = torch.constant.int 0
    %37837 = torch.prim.ListConstruct %int1_35298, %int0_35299 : (!torch.int, !torch.int) -> !torch.list<int>
    %37838 = torch.aten.permute %1385, %37837 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35300 = torch.constant.int 1
    %int0_35301 = torch.constant.int 0
    %37839 = torch.prim.ListConstruct %int1_35300, %int0_35301 : (!torch.int, !torch.int) -> !torch.list<int>
    %37840 = torch.aten.permute %1386, %37839 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35302 = torch.constant.int 1
    %int0_35303 = torch.constant.int 0
    %37841 = torch.prim.ListConstruct %int1_35302, %int0_35303 : (!torch.int, !torch.int) -> !torch.list<int>
    %37842 = torch.aten.permute %1387, %37841 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35304 = torch.constant.int 1
    %int0_35305 = torch.constant.int 0
    %37843 = torch.prim.ListConstruct %int1_35304, %int0_35305 : (!torch.int, !torch.int) -> !torch.list<int>
    %37844 = torch.aten.permute %1388, %37843 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35306 = torch.constant.int 1
    %int0_35307 = torch.constant.int 0
    %37845 = torch.prim.ListConstruct %int1_35306, %int0_35307 : (!torch.int, !torch.int) -> !torch.list<int>
    %37846 = torch.aten.permute %1389, %37845 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35308 = torch.constant.int 1
    %int0_35309 = torch.constant.int 0
    %37847 = torch.prim.ListConstruct %int1_35308, %int0_35309 : (!torch.int, !torch.int) -> !torch.list<int>
    %37848 = torch.aten.permute %1390, %37847 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_35310 = torch.constant.int 1
    %int0_35311 = torch.constant.int 0
    %37849 = torch.prim.ListConstruct %int1_35310, %int0_35311 : (!torch.int, !torch.int) -> !torch.list<int>
    %37850 = torch.aten.permute %1391, %37849 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_35312 = torch.constant.int 4
    %37851 = torch.aten.mul.int %int4_35312, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35313 = torch.constant.int 4096
    %37852 = torch.prim.ListConstruct %37851, %int4096_35313 : (!torch.int, !torch.int) -> !torch.list<int>
    %37853 = torch.aten.view %37827, %37852 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37853, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37854 = torch.aten.mm %37853, %37836 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37854, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35314 = torch.constant.int 4
    %int512_35315 = torch.constant.int 512
    %37855 = torch.prim.ListConstruct %int4_35314, %2482, %int512_35315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37856 = torch.aten.view %37854, %37855 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35316 = torch.constant.int 4
    %37857 = torch.aten.mul.int %int4_35316, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35317 = torch.constant.int 4096
    %37858 = torch.prim.ListConstruct %37857, %int4096_35317 : (!torch.int, !torch.int) -> !torch.list<int>
    %37859 = torch.aten.view %37828, %37858 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37859, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37860 = torch.aten.mm %37859, %37838 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37860, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35318 = torch.constant.int 4
    %int512_35319 = torch.constant.int 512
    %37861 = torch.prim.ListConstruct %int4_35318, %2482, %int512_35319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37862 = torch.aten.view %37860, %37861 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35320 = torch.constant.int 4
    %37863 = torch.aten.mul.int %int4_35320, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35321 = torch.constant.int 4096
    %37864 = torch.prim.ListConstruct %37863, %int4096_35321 : (!torch.int, !torch.int) -> !torch.list<int>
    %37865 = torch.aten.view %37829, %37864 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37865, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37866 = torch.aten.mm %37865, %37840 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37866, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35322 = torch.constant.int 4
    %int512_35323 = torch.constant.int 512
    %37867 = torch.prim.ListConstruct %int4_35322, %2482, %int512_35323 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37868 = torch.aten.view %37866, %37867 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35324 = torch.constant.int 4
    %37869 = torch.aten.mul.int %int4_35324, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35325 = torch.constant.int 4096
    %37870 = torch.prim.ListConstruct %37869, %int4096_35325 : (!torch.int, !torch.int) -> !torch.list<int>
    %37871 = torch.aten.view %37830, %37870 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37871, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37872 = torch.aten.mm %37871, %37842 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37872, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35326 = torch.constant.int 4
    %int512_35327 = torch.constant.int 512
    %37873 = torch.prim.ListConstruct %int4_35326, %2482, %int512_35327 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37874 = torch.aten.view %37872, %37873 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35328 = torch.constant.int 4
    %37875 = torch.aten.mul.int %int4_35328, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35329 = torch.constant.int 4096
    %37876 = torch.prim.ListConstruct %37875, %int4096_35329 : (!torch.int, !torch.int) -> !torch.list<int>
    %37877 = torch.aten.view %37831, %37876 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37877, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37878 = torch.aten.mm %37877, %37844 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37878, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35330 = torch.constant.int 4
    %int512_35331 = torch.constant.int 512
    %37879 = torch.prim.ListConstruct %int4_35330, %2482, %int512_35331 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37880 = torch.aten.view %37878, %37879 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35332 = torch.constant.int 4
    %37881 = torch.aten.mul.int %int4_35332, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35333 = torch.constant.int 4096
    %37882 = torch.prim.ListConstruct %37881, %int4096_35333 : (!torch.int, !torch.int) -> !torch.list<int>
    %37883 = torch.aten.view %37832, %37882 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37883, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37884 = torch.aten.mm %37883, %37846 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37884, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35334 = torch.constant.int 4
    %int512_35335 = torch.constant.int 512
    %37885 = torch.prim.ListConstruct %int4_35334, %2482, %int512_35335 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37886 = torch.aten.view %37884, %37885 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35336 = torch.constant.int 4
    %37887 = torch.aten.mul.int %int4_35336, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35337 = torch.constant.int 4096
    %37888 = torch.prim.ListConstruct %37887, %int4096_35337 : (!torch.int, !torch.int) -> !torch.list<int>
    %37889 = torch.aten.view %37833, %37888 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37889, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37890 = torch.aten.mm %37889, %37848 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37890, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35338 = torch.constant.int 4
    %int512_35339 = torch.constant.int 512
    %37891 = torch.prim.ListConstruct %int4_35338, %2482, %int512_35339 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37892 = torch.aten.view %37890, %37891 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_35340 = torch.constant.int 4
    %37893 = torch.aten.mul.int %int4_35340, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35341 = torch.constant.int 4096
    %37894 = torch.prim.ListConstruct %37893, %int4096_35341 : (!torch.int, !torch.int) -> !torch.list<int>
    %37895 = torch.aten.view %37834, %37894 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37895, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37896 = torch.aten.mm %37895, %37850 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %37896, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_35342 = torch.constant.int 4
    %int512_35343 = torch.constant.int 512
    %37897 = torch.prim.ListConstruct %int4_35342, %2482, %int512_35343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37898 = torch.aten.view %37896, %37897 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %37898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_35344 = torch.constant.int 1
    %int0_35345 = torch.constant.int 0
    %37899 = torch.prim.ListConstruct %int1_35344, %int0_35345 : (!torch.int, !torch.int) -> !torch.list<int>
    %37900 = torch.aten.permute %1392, %37899 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35346 = torch.constant.int 1
    %int0_35347 = torch.constant.int 0
    %37901 = torch.prim.ListConstruct %int1_35346, %int0_35347 : (!torch.int, !torch.int) -> !torch.list<int>
    %37902 = torch.aten.permute %1393, %37901 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35348 = torch.constant.int 1
    %int0_35349 = torch.constant.int 0
    %37903 = torch.prim.ListConstruct %int1_35348, %int0_35349 : (!torch.int, !torch.int) -> !torch.list<int>
    %37904 = torch.aten.permute %1394, %37903 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35350 = torch.constant.int 1
    %int0_35351 = torch.constant.int 0
    %37905 = torch.prim.ListConstruct %int1_35350, %int0_35351 : (!torch.int, !torch.int) -> !torch.list<int>
    %37906 = torch.aten.permute %1395, %37905 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35352 = torch.constant.int 1
    %int0_35353 = torch.constant.int 0
    %37907 = torch.prim.ListConstruct %int1_35352, %int0_35353 : (!torch.int, !torch.int) -> !torch.list<int>
    %37908 = torch.aten.permute %1396, %37907 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35354 = torch.constant.int 1
    %int0_35355 = torch.constant.int 0
    %37909 = torch.prim.ListConstruct %int1_35354, %int0_35355 : (!torch.int, !torch.int) -> !torch.list<int>
    %37910 = torch.aten.permute %1397, %37909 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35356 = torch.constant.int 1
    %int0_35357 = torch.constant.int 0
    %37911 = torch.prim.ListConstruct %int1_35356, %int0_35357 : (!torch.int, !torch.int) -> !torch.list<int>
    %37912 = torch.aten.permute %1398, %37911 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35358 = torch.constant.int 1
    %int0_35359 = torch.constant.int 0
    %37913 = torch.prim.ListConstruct %int1_35358, %int0_35359 : (!torch.int, !torch.int) -> !torch.list<int>
    %37914 = torch.aten.permute %1399, %37913 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_35360 = torch.constant.int 4
    %37915 = torch.aten.mul.int %int4_35360, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35361 = torch.constant.int 4096
    %37916 = torch.prim.ListConstruct %37915, %int4096_35361 : (!torch.int, !torch.int) -> !torch.list<int>
    %37917 = torch.aten.view %37827, %37916 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37917, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37918 = torch.aten.mm %37917, %37900 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37918, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35362 = torch.constant.int 4
    %int128_35363 = torch.constant.int 128
    %37919 = torch.prim.ListConstruct %int4_35362, %2482, %int128_35363 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37920 = torch.aten.view %37918, %37919 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35364 = torch.constant.int 4
    %37921 = torch.aten.mul.int %int4_35364, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35365 = torch.constant.int 4096
    %37922 = torch.prim.ListConstruct %37921, %int4096_35365 : (!torch.int, !torch.int) -> !torch.list<int>
    %37923 = torch.aten.view %37828, %37922 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37923, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37924 = torch.aten.mm %37923, %37902 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37924, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35366 = torch.constant.int 4
    %int128_35367 = torch.constant.int 128
    %37925 = torch.prim.ListConstruct %int4_35366, %2482, %int128_35367 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37926 = torch.aten.view %37924, %37925 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35368 = torch.constant.int 4
    %37927 = torch.aten.mul.int %int4_35368, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35369 = torch.constant.int 4096
    %37928 = torch.prim.ListConstruct %37927, %int4096_35369 : (!torch.int, !torch.int) -> !torch.list<int>
    %37929 = torch.aten.view %37829, %37928 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37929, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37930 = torch.aten.mm %37929, %37904 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37930, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35370 = torch.constant.int 4
    %int128_35371 = torch.constant.int 128
    %37931 = torch.prim.ListConstruct %int4_35370, %2482, %int128_35371 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37932 = torch.aten.view %37930, %37931 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35372 = torch.constant.int 4
    %37933 = torch.aten.mul.int %int4_35372, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35373 = torch.constant.int 4096
    %37934 = torch.prim.ListConstruct %37933, %int4096_35373 : (!torch.int, !torch.int) -> !torch.list<int>
    %37935 = torch.aten.view %37830, %37934 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37935, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37936 = torch.aten.mm %37935, %37906 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37936, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35374 = torch.constant.int 4
    %int128_35375 = torch.constant.int 128
    %37937 = torch.prim.ListConstruct %int4_35374, %2482, %int128_35375 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37938 = torch.aten.view %37936, %37937 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35376 = torch.constant.int 4
    %37939 = torch.aten.mul.int %int4_35376, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35377 = torch.constant.int 4096
    %37940 = torch.prim.ListConstruct %37939, %int4096_35377 : (!torch.int, !torch.int) -> !torch.list<int>
    %37941 = torch.aten.view %37831, %37940 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37941, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37942 = torch.aten.mm %37941, %37908 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37942, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35378 = torch.constant.int 4
    %int128_35379 = torch.constant.int 128
    %37943 = torch.prim.ListConstruct %int4_35378, %2482, %int128_35379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37944 = torch.aten.view %37942, %37943 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35380 = torch.constant.int 4
    %37945 = torch.aten.mul.int %int4_35380, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35381 = torch.constant.int 4096
    %37946 = torch.prim.ListConstruct %37945, %int4096_35381 : (!torch.int, !torch.int) -> !torch.list<int>
    %37947 = torch.aten.view %37832, %37946 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37947, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37948 = torch.aten.mm %37947, %37910 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37948, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35382 = torch.constant.int 4
    %int128_35383 = torch.constant.int 128
    %37949 = torch.prim.ListConstruct %int4_35382, %2482, %int128_35383 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37950 = torch.aten.view %37948, %37949 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35384 = torch.constant.int 4
    %37951 = torch.aten.mul.int %int4_35384, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35385 = torch.constant.int 4096
    %37952 = torch.prim.ListConstruct %37951, %int4096_35385 : (!torch.int, !torch.int) -> !torch.list<int>
    %37953 = torch.aten.view %37833, %37952 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37953, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37954 = torch.aten.mm %37953, %37912 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37954, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35386 = torch.constant.int 4
    %int128_35387 = torch.constant.int 128
    %37955 = torch.prim.ListConstruct %int4_35386, %2482, %int128_35387 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37956 = torch.aten.view %37954, %37955 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35388 = torch.constant.int 4
    %37957 = torch.aten.mul.int %int4_35388, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35389 = torch.constant.int 4096
    %37958 = torch.prim.ListConstruct %37957, %int4096_35389 : (!torch.int, !torch.int) -> !torch.list<int>
    %37959 = torch.aten.view %37834, %37958 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37959, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37960 = torch.aten.mm %37959, %37914 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37960, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35390 = torch.constant.int 4
    %int128_35391 = torch.constant.int 128
    %37961 = torch.prim.ListConstruct %int4_35390, %2482, %int128_35391 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37962 = torch.aten.view %37960, %37961 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_35392 = torch.constant.int 1
    %int0_35393 = torch.constant.int 0
    %37963 = torch.prim.ListConstruct %int1_35392, %int0_35393 : (!torch.int, !torch.int) -> !torch.list<int>
    %37964 = torch.aten.permute %1400, %37963 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35394 = torch.constant.int 1
    %int0_35395 = torch.constant.int 0
    %37965 = torch.prim.ListConstruct %int1_35394, %int0_35395 : (!torch.int, !torch.int) -> !torch.list<int>
    %37966 = torch.aten.permute %1401, %37965 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35396 = torch.constant.int 1
    %int0_35397 = torch.constant.int 0
    %37967 = torch.prim.ListConstruct %int1_35396, %int0_35397 : (!torch.int, !torch.int) -> !torch.list<int>
    %37968 = torch.aten.permute %1402, %37967 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35398 = torch.constant.int 1
    %int0_35399 = torch.constant.int 0
    %37969 = torch.prim.ListConstruct %int1_35398, %int0_35399 : (!torch.int, !torch.int) -> !torch.list<int>
    %37970 = torch.aten.permute %1403, %37969 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35400 = torch.constant.int 1
    %int0_35401 = torch.constant.int 0
    %37971 = torch.prim.ListConstruct %int1_35400, %int0_35401 : (!torch.int, !torch.int) -> !torch.list<int>
    %37972 = torch.aten.permute %1404, %37971 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35402 = torch.constant.int 1
    %int0_35403 = torch.constant.int 0
    %37973 = torch.prim.ListConstruct %int1_35402, %int0_35403 : (!torch.int, !torch.int) -> !torch.list<int>
    %37974 = torch.aten.permute %1405, %37973 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35404 = torch.constant.int 1
    %int0_35405 = torch.constant.int 0
    %37975 = torch.prim.ListConstruct %int1_35404, %int0_35405 : (!torch.int, !torch.int) -> !torch.list<int>
    %37976 = torch.aten.permute %1406, %37975 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_35406 = torch.constant.int 1
    %int0_35407 = torch.constant.int 0
    %37977 = torch.prim.ListConstruct %int1_35406, %int0_35407 : (!torch.int, !torch.int) -> !torch.list<int>
    %37978 = torch.aten.permute %1407, %37977 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_35408 = torch.constant.int 4
    %37979 = torch.aten.mul.int %int4_35408, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35409 = torch.constant.int 4096
    %37980 = torch.prim.ListConstruct %37979, %int4096_35409 : (!torch.int, !torch.int) -> !torch.list<int>
    %37981 = torch.aten.view %37827, %37980 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37981, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37982 = torch.aten.mm %37981, %37964 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37982, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35410 = torch.constant.int 4
    %int128_35411 = torch.constant.int 128
    %37983 = torch.prim.ListConstruct %int4_35410, %2482, %int128_35411 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37984 = torch.aten.view %37982, %37983 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35412 = torch.constant.int 4
    %37985 = torch.aten.mul.int %int4_35412, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35413 = torch.constant.int 4096
    %37986 = torch.prim.ListConstruct %37985, %int4096_35413 : (!torch.int, !torch.int) -> !torch.list<int>
    %37987 = torch.aten.view %37828, %37986 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37987, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37988 = torch.aten.mm %37987, %37966 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37988, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35414 = torch.constant.int 4
    %int128_35415 = torch.constant.int 128
    %37989 = torch.prim.ListConstruct %int4_35414, %2482, %int128_35415 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37990 = torch.aten.view %37988, %37989 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35416 = torch.constant.int 4
    %37991 = torch.aten.mul.int %int4_35416, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35417 = torch.constant.int 4096
    %37992 = torch.prim.ListConstruct %37991, %int4096_35417 : (!torch.int, !torch.int) -> !torch.list<int>
    %37993 = torch.aten.view %37829, %37992 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37993, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %37994 = torch.aten.mm %37993, %37968 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %37994, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35418 = torch.constant.int 4
    %int128_35419 = torch.constant.int 128
    %37995 = torch.prim.ListConstruct %int4_35418, %2482, %int128_35419 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %37996 = torch.aten.view %37994, %37995 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %37996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35420 = torch.constant.int 4
    %37997 = torch.aten.mul.int %int4_35420, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35421 = torch.constant.int 4096
    %37998 = torch.prim.ListConstruct %37997, %int4096_35421 : (!torch.int, !torch.int) -> !torch.list<int>
    %37999 = torch.aten.view %37830, %37998 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %37999, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %38000 = torch.aten.mm %37999, %37970 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %38000, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35422 = torch.constant.int 4
    %int128_35423 = torch.constant.int 128
    %38001 = torch.prim.ListConstruct %int4_35422, %2482, %int128_35423 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38002 = torch.aten.view %38000, %38001 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %38002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35424 = torch.constant.int 4
    %38003 = torch.aten.mul.int %int4_35424, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35425 = torch.constant.int 4096
    %38004 = torch.prim.ListConstruct %38003, %int4096_35425 : (!torch.int, !torch.int) -> !torch.list<int>
    %38005 = torch.aten.view %37831, %38004 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38005, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %38006 = torch.aten.mm %38005, %37972 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %38006, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35426 = torch.constant.int 4
    %int128_35427 = torch.constant.int 128
    %38007 = torch.prim.ListConstruct %int4_35426, %2482, %int128_35427 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38008 = torch.aten.view %38006, %38007 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %38008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35428 = torch.constant.int 4
    %38009 = torch.aten.mul.int %int4_35428, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35429 = torch.constant.int 4096
    %38010 = torch.prim.ListConstruct %38009, %int4096_35429 : (!torch.int, !torch.int) -> !torch.list<int>
    %38011 = torch.aten.view %37832, %38010 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38011, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %38012 = torch.aten.mm %38011, %37974 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %38012, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35430 = torch.constant.int 4
    %int128_35431 = torch.constant.int 128
    %38013 = torch.prim.ListConstruct %int4_35430, %2482, %int128_35431 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38014 = torch.aten.view %38012, %38013 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %38014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35432 = torch.constant.int 4
    %38015 = torch.aten.mul.int %int4_35432, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35433 = torch.constant.int 4096
    %38016 = torch.prim.ListConstruct %38015, %int4096_35433 : (!torch.int, !torch.int) -> !torch.list<int>
    %38017 = torch.aten.view %37833, %38016 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38017, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %38018 = torch.aten.mm %38017, %37976 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %38018, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35434 = torch.constant.int 4
    %int128_35435 = torch.constant.int 128
    %38019 = torch.prim.ListConstruct %int4_35434, %2482, %int128_35435 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38020 = torch.aten.view %38018, %38019 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %38020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35436 = torch.constant.int 4
    %38021 = torch.aten.mul.int %int4_35436, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_35437 = torch.constant.int 4096
    %38022 = torch.prim.ListConstruct %38021, %int4096_35437 : (!torch.int, !torch.int) -> !torch.list<int>
    %38023 = torch.aten.view %37834, %38022 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38023, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %38024 = torch.aten.mm %38023, %37978 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %38024, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_35438 = torch.constant.int 4
    %int128_35439 = torch.constant.int 128
    %38025 = torch.prim.ListConstruct %int4_35438, %2482, %int128_35439 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38026 = torch.aten.view %38024, %38025 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %38026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_35440 = torch.constant.int 4
    %int4_35441 = torch.constant.int 4
    %int128_35442 = torch.constant.int 128
    %38027 = torch.prim.ListConstruct %int4_35440, %2482, %int4_35441, %int128_35442 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38028 = torch.aten.view %37856, %38027 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35443 = torch.constant.int 4
    %int4_35444 = torch.constant.int 4
    %int128_35445 = torch.constant.int 128
    %38029 = torch.prim.ListConstruct %int4_35443, %2482, %int4_35444, %int128_35445 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38030 = torch.aten.view %37862, %38029 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35446 = torch.constant.int 4
    %int4_35447 = torch.constant.int 4
    %int128_35448 = torch.constant.int 128
    %38031 = torch.prim.ListConstruct %int4_35446, %2482, %int4_35447, %int128_35448 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38032 = torch.aten.view %37868, %38031 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35449 = torch.constant.int 4
    %int4_35450 = torch.constant.int 4
    %int128_35451 = torch.constant.int 128
    %38033 = torch.prim.ListConstruct %int4_35449, %2482, %int4_35450, %int128_35451 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38034 = torch.aten.view %37874, %38033 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35452 = torch.constant.int 4
    %int4_35453 = torch.constant.int 4
    %int128_35454 = torch.constant.int 128
    %38035 = torch.prim.ListConstruct %int4_35452, %2482, %int4_35453, %int128_35454 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38036 = torch.aten.view %37880, %38035 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35455 = torch.constant.int 4
    %int4_35456 = torch.constant.int 4
    %int128_35457 = torch.constant.int 128
    %38037 = torch.prim.ListConstruct %int4_35455, %2482, %int4_35456, %int128_35457 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38038 = torch.aten.view %37886, %38037 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35458 = torch.constant.int 4
    %int4_35459 = torch.constant.int 4
    %int128_35460 = torch.constant.int 128
    %38039 = torch.prim.ListConstruct %int4_35458, %2482, %int4_35459, %int128_35460 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38040 = torch.aten.view %37892, %38039 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35461 = torch.constant.int 4
    %int4_35462 = torch.constant.int 4
    %int128_35463 = torch.constant.int 128
    %38041 = torch.prim.ListConstruct %int4_35461, %2482, %int4_35462, %int128_35463 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38042 = torch.aten.view %37898, %38041 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_35464 = torch.constant.int 4
    %int1_35465 = torch.constant.int 1
    %int128_35466 = torch.constant.int 128
    %38043 = torch.prim.ListConstruct %int4_35464, %2482, %int1_35465, %int128_35466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38044 = torch.aten.view %37920, %38043 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35467 = torch.constant.int 4
    %int1_35468 = torch.constant.int 1
    %int128_35469 = torch.constant.int 128
    %38045 = torch.prim.ListConstruct %int4_35467, %2482, %int1_35468, %int128_35469 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38046 = torch.aten.view %37926, %38045 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35470 = torch.constant.int 4
    %int1_35471 = torch.constant.int 1
    %int128_35472 = torch.constant.int 128
    %38047 = torch.prim.ListConstruct %int4_35470, %2482, %int1_35471, %int128_35472 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38048 = torch.aten.view %37932, %38047 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35473 = torch.constant.int 4
    %int1_35474 = torch.constant.int 1
    %int128_35475 = torch.constant.int 128
    %38049 = torch.prim.ListConstruct %int4_35473, %2482, %int1_35474, %int128_35475 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38050 = torch.aten.view %37938, %38049 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35476 = torch.constant.int 4
    %int1_35477 = torch.constant.int 1
    %int128_35478 = torch.constant.int 128
    %38051 = torch.prim.ListConstruct %int4_35476, %2482, %int1_35477, %int128_35478 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38052 = torch.aten.view %37944, %38051 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35479 = torch.constant.int 4
    %int1_35480 = torch.constant.int 1
    %int128_35481 = torch.constant.int 128
    %38053 = torch.prim.ListConstruct %int4_35479, %2482, %int1_35480, %int128_35481 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38054 = torch.aten.view %37950, %38053 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35482 = torch.constant.int 4
    %int1_35483 = torch.constant.int 1
    %int128_35484 = torch.constant.int 128
    %38055 = torch.prim.ListConstruct %int4_35482, %2482, %int1_35483, %int128_35484 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38056 = torch.aten.view %37956, %38055 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35485 = torch.constant.int 4
    %int1_35486 = torch.constant.int 1
    %int128_35487 = torch.constant.int 128
    %38057 = torch.prim.ListConstruct %int4_35485, %2482, %int1_35486, %int128_35487 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38058 = torch.aten.view %37962, %38057 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35488 = torch.constant.int 4
    %int1_35489 = torch.constant.int 1
    %int128_35490 = torch.constant.int 128
    %38059 = torch.prim.ListConstruct %int4_35488, %2482, %int1_35489, %int128_35490 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38060 = torch.aten.view %37984, %38059 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35491 = torch.constant.int 4
    %int1_35492 = torch.constant.int 1
    %int128_35493 = torch.constant.int 128
    %38061 = torch.prim.ListConstruct %int4_35491, %2482, %int1_35492, %int128_35493 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38062 = torch.aten.view %37990, %38061 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35494 = torch.constant.int 4
    %int1_35495 = torch.constant.int 1
    %int128_35496 = torch.constant.int 128
    %38063 = torch.prim.ListConstruct %int4_35494, %2482, %int1_35495, %int128_35496 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38064 = torch.aten.view %37996, %38063 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35497 = torch.constant.int 4
    %int1_35498 = torch.constant.int 1
    %int128_35499 = torch.constant.int 128
    %38065 = torch.prim.ListConstruct %int4_35497, %2482, %int1_35498, %int128_35499 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38066 = torch.aten.view %38002, %38065 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35500 = torch.constant.int 4
    %int1_35501 = torch.constant.int 1
    %int128_35502 = torch.constant.int 128
    %38067 = torch.prim.ListConstruct %int4_35500, %2482, %int1_35501, %int128_35502 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38068 = torch.aten.view %38008, %38067 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35503 = torch.constant.int 4
    %int1_35504 = torch.constant.int 1
    %int128_35505 = torch.constant.int 128
    %38069 = torch.prim.ListConstruct %int4_35503, %2482, %int1_35504, %int128_35505 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38070 = torch.aten.view %38014, %38069 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35506 = torch.constant.int 4
    %int1_35507 = torch.constant.int 1
    %int128_35508 = torch.constant.int 128
    %38071 = torch.prim.ListConstruct %int4_35506, %2482, %int1_35507, %int128_35508 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38072 = torch.aten.view %38020, %38071 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_35509 = torch.constant.int 4
    %int1_35510 = torch.constant.int 1
    %int128_35511 = torch.constant.int 128
    %38073 = torch.prim.ListConstruct %int4_35509, %2482, %int1_35510, %int128_35511 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38074 = torch.aten.view %38026, %38073 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_35512 = torch.constant.int 131072
    %none_35513 = torch.constant.none
    %none_35514 = torch.constant.none
    %cpu_35515 = torch.constant.device "cpu"
    %false_35516 = torch.constant.bool false
    %38075 = torch.aten.arange %int131072_35512, %none_35513, %none_35514, %cpu_35515, %false_35516 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_35517 = torch.constant.int 0
    %int128_35518 = torch.constant.int 128
    %int2_35519 = torch.constant.int 2
    %none_35520 = torch.constant.none
    %none_35521 = torch.constant.none
    %cpu_35522 = torch.constant.device "cpu"
    %false_35523 = torch.constant.bool false
    %38076 = torch.aten.arange.start_step %int0_35517, %int128_35518, %int2_35519, %none_35520, %none_35521, %cpu_35522, %false_35523 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_35524 = torch.constant.int 0
    %int0_35525 = torch.constant.int 0
    %int64_35526 = torch.constant.int 64
    %int1_35527 = torch.constant.int 1
    %38077 = torch.aten.slice.Tensor %38076, %int0_35524, %int0_35525, %int64_35526, %int1_35527 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_35528 = torch.constant.int 6
    %38078 = torch.prims.convert_element_type %38077, %int6_35528 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_35529 = torch.constant.int 128
    %38079 = torch.aten.div.Scalar %38078, %int128_35529 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_35530 = torch.constant.float 5.000000e+05
    %38080 = torch.aten.pow.Scalar %float5.000000e05_35530, %38079 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %38081 = torch.aten.reciprocal %38080 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_35531 = torch.constant.float 1.000000e+00
    %38082 = torch.aten.mul.Scalar %38081, %float1.000000e00_35531 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_35532 = torch.constant.int 131072
    %int1_35533 = torch.constant.int 1
    %38083 = torch.prim.ListConstruct %int131072_35532, %int1_35533 : (!torch.int, !torch.int) -> !torch.list<int>
    %38084 = torch.aten.view %38075, %38083 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %38085 = torch.aten.mul.Tensor %38084, %38082 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %38086 = torch.aten.cos %38085 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %38087 = torch.aten.sin %38085 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %38088 = torch.aten.complex %38086, %38087 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %38089 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38090 = flow.tensor.transfer %38089 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %38091 = torch_c.from_builtin_tensor %38090 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38092 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38093 = flow.tensor.transfer %38092 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %38094 = torch_c.from_builtin_tensor %38093 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38095 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38096 = flow.tensor.transfer %38095 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %38097 = torch_c.from_builtin_tensor %38096 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38098 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38099 = flow.tensor.transfer %38098 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %38100 = torch_c.from_builtin_tensor %38099 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38101 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38102 = flow.tensor.transfer %38101 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %38103 = torch_c.from_builtin_tensor %38102 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38104 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38105 = flow.tensor.transfer %38104 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %38106 = torch_c.from_builtin_tensor %38105 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38107 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38108 = flow.tensor.transfer %38107 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %38109 = torch_c.from_builtin_tensor %38108 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38110 = torch_c.to_builtin_tensor %38088 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38111 = flow.tensor.transfer %38110 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %38112 = torch_c.from_builtin_tensor %38111 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_35534 = torch.constant.int 1
    %38113 = torch.aten.size.int %37856, %int1_35534 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35535 = torch.constant.int 0
    %38114 = torch.aten.add.int %int0_35535, %38113 : !torch.int, !torch.int -> !torch.int
    %int0_35536 = torch.constant.int 0
    %int0_35537 = torch.constant.int 0
    %int1_35538 = torch.constant.int 1
    %38115 = torch.aten.slice.Tensor %38091, %int0_35536, %int0_35537, %38114, %int1_35538 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38115, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35539 = torch.constant.int 1
    %int0_35540 = torch.constant.int 0
    %int9223372036854775807_35541 = torch.constant.int 9223372036854775807
    %int1_35542 = torch.constant.int 1
    %38116 = torch.aten.slice.Tensor %38115, %int1_35539, %int0_35540, %int9223372036854775807_35541, %int1_35542 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38116, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35543 = torch.constant.int 0
    %38117 = torch.aten.unsqueeze %38116, %int0_35543 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38117, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35544 = torch.constant.int 2
    %38118 = torch.aten.unsqueeze %38117, %int2_35544 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38118, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35545 = torch.constant.int 3
    %int0_35546 = torch.constant.int 0
    %int9223372036854775807_35547 = torch.constant.int 9223372036854775807
    %int1_35548 = torch.constant.int 1
    %38119 = torch.aten.slice.Tensor %38118, %int3_35545, %int0_35546, %int9223372036854775807_35547, %int1_35548 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38119, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38120 = torch_c.to_builtin_tensor %38028 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35549 = arith.constant 1 : index
    %dim_35550 = tensor.dim %38120, %c1_35549 : tensor<4x?x4x128xf16>
    %38121 = flow.tensor.bitcast %38120 : tensor<4x?x4x128xf16>{%dim_35550} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35550}
    %38122 = torch_c.from_builtin_tensor %38121 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38123 = torch.aten.mul.Tensor %38122, %38119 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38124 = torch_c.to_builtin_tensor %38123 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35551 = arith.constant 1 : index
    %dim_35552 = tensor.dim %38124, %c1_35551 : tensor<4x?x4x64xcomplex<f32>>
    %38125 = flow.tensor.bitcast %38124 : tensor<4x?x4x64xcomplex<f32>>{%dim_35552} -> tensor<4x?x4x128xf32>{%dim_35552}
    %38126 = torch_c.from_builtin_tensor %38125 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35553 = torch.constant.int 5
    %38127 = torch.prims.convert_element_type %38126, %int5_35553 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35554 = torch.constant.int 1
    %38128 = torch.aten.size.int %37862, %int1_35554 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35555 = torch.constant.int 0
    %38129 = torch.aten.add.int %int0_35555, %38128 : !torch.int, !torch.int -> !torch.int
    %int0_35556 = torch.constant.int 0
    %int0_35557 = torch.constant.int 0
    %int1_35558 = torch.constant.int 1
    %38130 = torch.aten.slice.Tensor %38094, %int0_35556, %int0_35557, %38129, %int1_35558 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38130, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35559 = torch.constant.int 1
    %int0_35560 = torch.constant.int 0
    %int9223372036854775807_35561 = torch.constant.int 9223372036854775807
    %int1_35562 = torch.constant.int 1
    %38131 = torch.aten.slice.Tensor %38130, %int1_35559, %int0_35560, %int9223372036854775807_35561, %int1_35562 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38131, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35563 = torch.constant.int 0
    %38132 = torch.aten.unsqueeze %38131, %int0_35563 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38132, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35564 = torch.constant.int 2
    %38133 = torch.aten.unsqueeze %38132, %int2_35564 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38133, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35565 = torch.constant.int 3
    %int0_35566 = torch.constant.int 0
    %int9223372036854775807_35567 = torch.constant.int 9223372036854775807
    %int1_35568 = torch.constant.int 1
    %38134 = torch.aten.slice.Tensor %38133, %int3_35565, %int0_35566, %int9223372036854775807_35567, %int1_35568 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38134, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38135 = torch_c.to_builtin_tensor %38030 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35569 = arith.constant 1 : index
    %dim_35570 = tensor.dim %38135, %c1_35569 : tensor<4x?x4x128xf16>
    %38136 = flow.tensor.bitcast %38135 : tensor<4x?x4x128xf16>{%dim_35570} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35570}
    %38137 = torch_c.from_builtin_tensor %38136 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38138 = torch.aten.mul.Tensor %38137, %38134 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38139 = torch_c.to_builtin_tensor %38138 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35571 = arith.constant 1 : index
    %dim_35572 = tensor.dim %38139, %c1_35571 : tensor<4x?x4x64xcomplex<f32>>
    %38140 = flow.tensor.bitcast %38139 : tensor<4x?x4x64xcomplex<f32>>{%dim_35572} -> tensor<4x?x4x128xf32>{%dim_35572}
    %38141 = torch_c.from_builtin_tensor %38140 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35573 = torch.constant.int 5
    %38142 = torch.prims.convert_element_type %38141, %int5_35573 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35574 = torch.constant.int 1
    %38143 = torch.aten.size.int %37868, %int1_35574 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35575 = torch.constant.int 0
    %38144 = torch.aten.add.int %int0_35575, %38143 : !torch.int, !torch.int -> !torch.int
    %int0_35576 = torch.constant.int 0
    %int0_35577 = torch.constant.int 0
    %int1_35578 = torch.constant.int 1
    %38145 = torch.aten.slice.Tensor %38097, %int0_35576, %int0_35577, %38144, %int1_35578 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38145, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35579 = torch.constant.int 1
    %int0_35580 = torch.constant.int 0
    %int9223372036854775807_35581 = torch.constant.int 9223372036854775807
    %int1_35582 = torch.constant.int 1
    %38146 = torch.aten.slice.Tensor %38145, %int1_35579, %int0_35580, %int9223372036854775807_35581, %int1_35582 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38146, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35583 = torch.constant.int 0
    %38147 = torch.aten.unsqueeze %38146, %int0_35583 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38147, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35584 = torch.constant.int 2
    %38148 = torch.aten.unsqueeze %38147, %int2_35584 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38148, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35585 = torch.constant.int 3
    %int0_35586 = torch.constant.int 0
    %int9223372036854775807_35587 = torch.constant.int 9223372036854775807
    %int1_35588 = torch.constant.int 1
    %38149 = torch.aten.slice.Tensor %38148, %int3_35585, %int0_35586, %int9223372036854775807_35587, %int1_35588 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38149, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38150 = torch_c.to_builtin_tensor %38032 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35589 = arith.constant 1 : index
    %dim_35590 = tensor.dim %38150, %c1_35589 : tensor<4x?x4x128xf16>
    %38151 = flow.tensor.bitcast %38150 : tensor<4x?x4x128xf16>{%dim_35590} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35590}
    %38152 = torch_c.from_builtin_tensor %38151 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38153 = torch.aten.mul.Tensor %38152, %38149 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38154 = torch_c.to_builtin_tensor %38153 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35591 = arith.constant 1 : index
    %dim_35592 = tensor.dim %38154, %c1_35591 : tensor<4x?x4x64xcomplex<f32>>
    %38155 = flow.tensor.bitcast %38154 : tensor<4x?x4x64xcomplex<f32>>{%dim_35592} -> tensor<4x?x4x128xf32>{%dim_35592}
    %38156 = torch_c.from_builtin_tensor %38155 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35593 = torch.constant.int 5
    %38157 = torch.prims.convert_element_type %38156, %int5_35593 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35594 = torch.constant.int 1
    %38158 = torch.aten.size.int %37874, %int1_35594 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35595 = torch.constant.int 0
    %38159 = torch.aten.add.int %int0_35595, %38158 : !torch.int, !torch.int -> !torch.int
    %int0_35596 = torch.constant.int 0
    %int0_35597 = torch.constant.int 0
    %int1_35598 = torch.constant.int 1
    %38160 = torch.aten.slice.Tensor %38100, %int0_35596, %int0_35597, %38159, %int1_35598 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38160, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35599 = torch.constant.int 1
    %int0_35600 = torch.constant.int 0
    %int9223372036854775807_35601 = torch.constant.int 9223372036854775807
    %int1_35602 = torch.constant.int 1
    %38161 = torch.aten.slice.Tensor %38160, %int1_35599, %int0_35600, %int9223372036854775807_35601, %int1_35602 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38161, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35603 = torch.constant.int 0
    %38162 = torch.aten.unsqueeze %38161, %int0_35603 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38162, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35604 = torch.constant.int 2
    %38163 = torch.aten.unsqueeze %38162, %int2_35604 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38163, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35605 = torch.constant.int 3
    %int0_35606 = torch.constant.int 0
    %int9223372036854775807_35607 = torch.constant.int 9223372036854775807
    %int1_35608 = torch.constant.int 1
    %38164 = torch.aten.slice.Tensor %38163, %int3_35605, %int0_35606, %int9223372036854775807_35607, %int1_35608 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38164, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38165 = torch_c.to_builtin_tensor %38034 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35609 = arith.constant 1 : index
    %dim_35610 = tensor.dim %38165, %c1_35609 : tensor<4x?x4x128xf16>
    %38166 = flow.tensor.bitcast %38165 : tensor<4x?x4x128xf16>{%dim_35610} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35610}
    %38167 = torch_c.from_builtin_tensor %38166 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38168 = torch.aten.mul.Tensor %38167, %38164 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38169 = torch_c.to_builtin_tensor %38168 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35611 = arith.constant 1 : index
    %dim_35612 = tensor.dim %38169, %c1_35611 : tensor<4x?x4x64xcomplex<f32>>
    %38170 = flow.tensor.bitcast %38169 : tensor<4x?x4x64xcomplex<f32>>{%dim_35612} -> tensor<4x?x4x128xf32>{%dim_35612}
    %38171 = torch_c.from_builtin_tensor %38170 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35613 = torch.constant.int 5
    %38172 = torch.prims.convert_element_type %38171, %int5_35613 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35614 = torch.constant.int 1
    %38173 = torch.aten.size.int %37880, %int1_35614 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35615 = torch.constant.int 0
    %38174 = torch.aten.add.int %int0_35615, %38173 : !torch.int, !torch.int -> !torch.int
    %int0_35616 = torch.constant.int 0
    %int0_35617 = torch.constant.int 0
    %int1_35618 = torch.constant.int 1
    %38175 = torch.aten.slice.Tensor %38103, %int0_35616, %int0_35617, %38174, %int1_35618 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38175, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35619 = torch.constant.int 1
    %int0_35620 = torch.constant.int 0
    %int9223372036854775807_35621 = torch.constant.int 9223372036854775807
    %int1_35622 = torch.constant.int 1
    %38176 = torch.aten.slice.Tensor %38175, %int1_35619, %int0_35620, %int9223372036854775807_35621, %int1_35622 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38176, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35623 = torch.constant.int 0
    %38177 = torch.aten.unsqueeze %38176, %int0_35623 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38177, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35624 = torch.constant.int 2
    %38178 = torch.aten.unsqueeze %38177, %int2_35624 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38178, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35625 = torch.constant.int 3
    %int0_35626 = torch.constant.int 0
    %int9223372036854775807_35627 = torch.constant.int 9223372036854775807
    %int1_35628 = torch.constant.int 1
    %38179 = torch.aten.slice.Tensor %38178, %int3_35625, %int0_35626, %int9223372036854775807_35627, %int1_35628 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38179, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38180 = torch_c.to_builtin_tensor %38036 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35629 = arith.constant 1 : index
    %dim_35630 = tensor.dim %38180, %c1_35629 : tensor<4x?x4x128xf16>
    %38181 = flow.tensor.bitcast %38180 : tensor<4x?x4x128xf16>{%dim_35630} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35630}
    %38182 = torch_c.from_builtin_tensor %38181 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38183 = torch.aten.mul.Tensor %38182, %38179 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38184 = torch_c.to_builtin_tensor %38183 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35631 = arith.constant 1 : index
    %dim_35632 = tensor.dim %38184, %c1_35631 : tensor<4x?x4x64xcomplex<f32>>
    %38185 = flow.tensor.bitcast %38184 : tensor<4x?x4x64xcomplex<f32>>{%dim_35632} -> tensor<4x?x4x128xf32>{%dim_35632}
    %38186 = torch_c.from_builtin_tensor %38185 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35633 = torch.constant.int 5
    %38187 = torch.prims.convert_element_type %38186, %int5_35633 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35634 = torch.constant.int 1
    %38188 = torch.aten.size.int %37886, %int1_35634 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35635 = torch.constant.int 0
    %38189 = torch.aten.add.int %int0_35635, %38188 : !torch.int, !torch.int -> !torch.int
    %int0_35636 = torch.constant.int 0
    %int0_35637 = torch.constant.int 0
    %int1_35638 = torch.constant.int 1
    %38190 = torch.aten.slice.Tensor %38106, %int0_35636, %int0_35637, %38189, %int1_35638 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38190, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35639 = torch.constant.int 1
    %int0_35640 = torch.constant.int 0
    %int9223372036854775807_35641 = torch.constant.int 9223372036854775807
    %int1_35642 = torch.constant.int 1
    %38191 = torch.aten.slice.Tensor %38190, %int1_35639, %int0_35640, %int9223372036854775807_35641, %int1_35642 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38191, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35643 = torch.constant.int 0
    %38192 = torch.aten.unsqueeze %38191, %int0_35643 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38192, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35644 = torch.constant.int 2
    %38193 = torch.aten.unsqueeze %38192, %int2_35644 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38193, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35645 = torch.constant.int 3
    %int0_35646 = torch.constant.int 0
    %int9223372036854775807_35647 = torch.constant.int 9223372036854775807
    %int1_35648 = torch.constant.int 1
    %38194 = torch.aten.slice.Tensor %38193, %int3_35645, %int0_35646, %int9223372036854775807_35647, %int1_35648 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38194, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38195 = torch_c.to_builtin_tensor %38038 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35649 = arith.constant 1 : index
    %dim_35650 = tensor.dim %38195, %c1_35649 : tensor<4x?x4x128xf16>
    %38196 = flow.tensor.bitcast %38195 : tensor<4x?x4x128xf16>{%dim_35650} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35650}
    %38197 = torch_c.from_builtin_tensor %38196 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38198 = torch.aten.mul.Tensor %38197, %38194 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38199 = torch_c.to_builtin_tensor %38198 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35651 = arith.constant 1 : index
    %dim_35652 = tensor.dim %38199, %c1_35651 : tensor<4x?x4x64xcomplex<f32>>
    %38200 = flow.tensor.bitcast %38199 : tensor<4x?x4x64xcomplex<f32>>{%dim_35652} -> tensor<4x?x4x128xf32>{%dim_35652}
    %38201 = torch_c.from_builtin_tensor %38200 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35653 = torch.constant.int 5
    %38202 = torch.prims.convert_element_type %38201, %int5_35653 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35654 = torch.constant.int 1
    %38203 = torch.aten.size.int %37892, %int1_35654 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35655 = torch.constant.int 0
    %38204 = torch.aten.add.int %int0_35655, %38203 : !torch.int, !torch.int -> !torch.int
    %int0_35656 = torch.constant.int 0
    %int0_35657 = torch.constant.int 0
    %int1_35658 = torch.constant.int 1
    %38205 = torch.aten.slice.Tensor %38109, %int0_35656, %int0_35657, %38204, %int1_35658 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38205, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35659 = torch.constant.int 1
    %int0_35660 = torch.constant.int 0
    %int9223372036854775807_35661 = torch.constant.int 9223372036854775807
    %int1_35662 = torch.constant.int 1
    %38206 = torch.aten.slice.Tensor %38205, %int1_35659, %int0_35660, %int9223372036854775807_35661, %int1_35662 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38206, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35663 = torch.constant.int 0
    %38207 = torch.aten.unsqueeze %38206, %int0_35663 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38207, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35664 = torch.constant.int 2
    %38208 = torch.aten.unsqueeze %38207, %int2_35664 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38208, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35665 = torch.constant.int 3
    %int0_35666 = torch.constant.int 0
    %int9223372036854775807_35667 = torch.constant.int 9223372036854775807
    %int1_35668 = torch.constant.int 1
    %38209 = torch.aten.slice.Tensor %38208, %int3_35665, %int0_35666, %int9223372036854775807_35667, %int1_35668 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38209, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38210 = torch_c.to_builtin_tensor %38040 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35669 = arith.constant 1 : index
    %dim_35670 = tensor.dim %38210, %c1_35669 : tensor<4x?x4x128xf16>
    %38211 = flow.tensor.bitcast %38210 : tensor<4x?x4x128xf16>{%dim_35670} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35670}
    %38212 = torch_c.from_builtin_tensor %38211 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38213 = torch.aten.mul.Tensor %38212, %38209 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38214 = torch_c.to_builtin_tensor %38213 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35671 = arith.constant 1 : index
    %dim_35672 = tensor.dim %38214, %c1_35671 : tensor<4x?x4x64xcomplex<f32>>
    %38215 = flow.tensor.bitcast %38214 : tensor<4x?x4x64xcomplex<f32>>{%dim_35672} -> tensor<4x?x4x128xf32>{%dim_35672}
    %38216 = torch_c.from_builtin_tensor %38215 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35673 = torch.constant.int 5
    %38217 = torch.prims.convert_element_type %38216, %int5_35673 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_35674 = torch.constant.int 1
    %38218 = torch.aten.size.int %37898, %int1_35674 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_35675 = torch.constant.int 0
    %38219 = torch.aten.add.int %int0_35675, %38218 : !torch.int, !torch.int -> !torch.int
    %int0_35676 = torch.constant.int 0
    %int0_35677 = torch.constant.int 0
    %int1_35678 = torch.constant.int 1
    %38220 = torch.aten.slice.Tensor %38112, %int0_35676, %int0_35677, %38219, %int1_35678 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38220, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35679 = torch.constant.int 1
    %int0_35680 = torch.constant.int 0
    %int9223372036854775807_35681 = torch.constant.int 9223372036854775807
    %int1_35682 = torch.constant.int 1
    %38221 = torch.aten.slice.Tensor %38220, %int1_35679, %int0_35680, %int9223372036854775807_35681, %int1_35682 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38221, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35683 = torch.constant.int 0
    %38222 = torch.aten.unsqueeze %38221, %int0_35683 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38222, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35684 = torch.constant.int 2
    %38223 = torch.aten.unsqueeze %38222, %int2_35684 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38223, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35685 = torch.constant.int 3
    %int0_35686 = torch.constant.int 0
    %int9223372036854775807_35687 = torch.constant.int 9223372036854775807
    %int1_35688 = torch.constant.int 1
    %38224 = torch.aten.slice.Tensor %38223, %int3_35685, %int0_35686, %int9223372036854775807_35687, %int1_35688 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38224, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38225 = torch_c.to_builtin_tensor %38042 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_35689 = arith.constant 1 : index
    %dim_35690 = tensor.dim %38225, %c1_35689 : tensor<4x?x4x128xf16>
    %38226 = flow.tensor.bitcast %38225 : tensor<4x?x4x128xf16>{%dim_35690} -> tensor<4x?x4x64xcomplex<f16>>{%dim_35690}
    %38227 = torch_c.from_builtin_tensor %38226 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %38227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %38228 = torch.aten.mul.Tensor %38227, %38224 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %38228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %38229 = torch_c.to_builtin_tensor %38228 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_35691 = arith.constant 1 : index
    %dim_35692 = tensor.dim %38229, %c1_35691 : tensor<4x?x4x64xcomplex<f32>>
    %38230 = flow.tensor.bitcast %38229 : tensor<4x?x4x64xcomplex<f32>>{%dim_35692} -> tensor<4x?x4x128xf32>{%dim_35692}
    %38231 = torch_c.from_builtin_tensor %38230 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %38231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_35693 = torch.constant.int 5
    %38232 = torch.prims.convert_element_type %38231, %int5_35693 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_35694 = torch.constant.int 131072
    %none_35695 = torch.constant.none
    %none_35696 = torch.constant.none
    %cpu_35697 = torch.constant.device "cpu"
    %false_35698 = torch.constant.bool false
    %38233 = torch.aten.arange %int131072_35694, %none_35695, %none_35696, %cpu_35697, %false_35698 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_35699 = torch.constant.int 0
    %int128_35700 = torch.constant.int 128
    %int2_35701 = torch.constant.int 2
    %none_35702 = torch.constant.none
    %none_35703 = torch.constant.none
    %cpu_35704 = torch.constant.device "cpu"
    %false_35705 = torch.constant.bool false
    %38234 = torch.aten.arange.start_step %int0_35699, %int128_35700, %int2_35701, %none_35702, %none_35703, %cpu_35704, %false_35705 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_35706 = torch.constant.int 0
    %int0_35707 = torch.constant.int 0
    %int64_35708 = torch.constant.int 64
    %int1_35709 = torch.constant.int 1
    %38235 = torch.aten.slice.Tensor %38234, %int0_35706, %int0_35707, %int64_35708, %int1_35709 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_35710 = torch.constant.int 6
    %38236 = torch.prims.convert_element_type %38235, %int6_35710 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_35711 = torch.constant.int 128
    %38237 = torch.aten.div.Scalar %38236, %int128_35711 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_35712 = torch.constant.float 5.000000e+05
    %38238 = torch.aten.pow.Scalar %float5.000000e05_35712, %38237 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %38239 = torch.aten.reciprocal %38238 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_35713 = torch.constant.float 1.000000e+00
    %38240 = torch.aten.mul.Scalar %38239, %float1.000000e00_35713 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_35714 = torch.constant.int 131072
    %int1_35715 = torch.constant.int 1
    %38241 = torch.prim.ListConstruct %int131072_35714, %int1_35715 : (!torch.int, !torch.int) -> !torch.list<int>
    %38242 = torch.aten.view %38233, %38241 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %38243 = torch.aten.mul.Tensor %38242, %38240 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %38244 = torch.aten.cos %38243 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %38245 = torch.aten.sin %38243 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %38246 = torch.aten.complex %38244, %38245 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %38247 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38248 = flow.tensor.transfer %38247 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %38249 = torch_c.from_builtin_tensor %38248 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38250 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38251 = flow.tensor.transfer %38250 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %38252 = torch_c.from_builtin_tensor %38251 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38253 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38254 = flow.tensor.transfer %38253 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %38255 = torch_c.from_builtin_tensor %38254 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38256 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38257 = flow.tensor.transfer %38256 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %38258 = torch_c.from_builtin_tensor %38257 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38259 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38260 = flow.tensor.transfer %38259 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %38261 = torch_c.from_builtin_tensor %38260 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38262 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38263 = flow.tensor.transfer %38262 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %38264 = torch_c.from_builtin_tensor %38263 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38265 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38266 = flow.tensor.transfer %38265 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %38267 = torch_c.from_builtin_tensor %38266 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %38268 = torch_c.to_builtin_tensor %38246 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %38269 = flow.tensor.transfer %38268 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %38270 = torch_c.from_builtin_tensor %38269 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_35716 = torch.constant.int 1
    %38271 = torch.aten.size.int %37920, %int1_35716 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35717 = torch.constant.int 0
    %38272 = torch.aten.add.int %int0_35717, %38271 : !torch.int, !torch.int -> !torch.int
    %int0_35718 = torch.constant.int 0
    %int0_35719 = torch.constant.int 0
    %int1_35720 = torch.constant.int 1
    %38273 = torch.aten.slice.Tensor %38249, %int0_35718, %int0_35719, %38272, %int1_35720 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38273, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35721 = torch.constant.int 1
    %int0_35722 = torch.constant.int 0
    %int9223372036854775807_35723 = torch.constant.int 9223372036854775807
    %int1_35724 = torch.constant.int 1
    %38274 = torch.aten.slice.Tensor %38273, %int1_35721, %int0_35722, %int9223372036854775807_35723, %int1_35724 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38274, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35725 = torch.constant.int 0
    %38275 = torch.aten.unsqueeze %38274, %int0_35725 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38275, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35726 = torch.constant.int 2
    %38276 = torch.aten.unsqueeze %38275, %int2_35726 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38276, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35727 = torch.constant.int 3
    %int0_35728 = torch.constant.int 0
    %int9223372036854775807_35729 = torch.constant.int 9223372036854775807
    %int1_35730 = torch.constant.int 1
    %38277 = torch.aten.slice.Tensor %38276, %int3_35727, %int0_35728, %int9223372036854775807_35729, %int1_35730 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38277, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38278 = torch_c.to_builtin_tensor %38044 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35731 = arith.constant 1 : index
    %dim_35732 = tensor.dim %38278, %c1_35731 : tensor<4x?x1x128xf16>
    %38279 = flow.tensor.bitcast %38278 : tensor<4x?x1x128xf16>{%dim_35732} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35732}
    %38280 = torch_c.from_builtin_tensor %38279 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38281 = torch.aten.mul.Tensor %38280, %38277 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38282 = torch_c.to_builtin_tensor %38281 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35733 = arith.constant 1 : index
    %dim_35734 = tensor.dim %38282, %c1_35733 : tensor<4x?x1x64xcomplex<f32>>
    %38283 = flow.tensor.bitcast %38282 : tensor<4x?x1x64xcomplex<f32>>{%dim_35734} -> tensor<4x?x1x128xf32>{%dim_35734}
    %38284 = torch_c.from_builtin_tensor %38283 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35735 = torch.constant.int 5
    %38285 = torch.prims.convert_element_type %38284, %int5_35735 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35736 = torch.constant.int 1
    %38286 = torch.aten.size.int %37926, %int1_35736 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35737 = torch.constant.int 0
    %38287 = torch.aten.add.int %int0_35737, %38286 : !torch.int, !torch.int -> !torch.int
    %int0_35738 = torch.constant.int 0
    %int0_35739 = torch.constant.int 0
    %int1_35740 = torch.constant.int 1
    %38288 = torch.aten.slice.Tensor %38252, %int0_35738, %int0_35739, %38287, %int1_35740 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38288, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35741 = torch.constant.int 1
    %int0_35742 = torch.constant.int 0
    %int9223372036854775807_35743 = torch.constant.int 9223372036854775807
    %int1_35744 = torch.constant.int 1
    %38289 = torch.aten.slice.Tensor %38288, %int1_35741, %int0_35742, %int9223372036854775807_35743, %int1_35744 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38289, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35745 = torch.constant.int 0
    %38290 = torch.aten.unsqueeze %38289, %int0_35745 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38290, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35746 = torch.constant.int 2
    %38291 = torch.aten.unsqueeze %38290, %int2_35746 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38291, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35747 = torch.constant.int 3
    %int0_35748 = torch.constant.int 0
    %int9223372036854775807_35749 = torch.constant.int 9223372036854775807
    %int1_35750 = torch.constant.int 1
    %38292 = torch.aten.slice.Tensor %38291, %int3_35747, %int0_35748, %int9223372036854775807_35749, %int1_35750 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38292, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38293 = torch_c.to_builtin_tensor %38046 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35751 = arith.constant 1 : index
    %dim_35752 = tensor.dim %38293, %c1_35751 : tensor<4x?x1x128xf16>
    %38294 = flow.tensor.bitcast %38293 : tensor<4x?x1x128xf16>{%dim_35752} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35752}
    %38295 = torch_c.from_builtin_tensor %38294 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38296 = torch.aten.mul.Tensor %38295, %38292 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38297 = torch_c.to_builtin_tensor %38296 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35753 = arith.constant 1 : index
    %dim_35754 = tensor.dim %38297, %c1_35753 : tensor<4x?x1x64xcomplex<f32>>
    %38298 = flow.tensor.bitcast %38297 : tensor<4x?x1x64xcomplex<f32>>{%dim_35754} -> tensor<4x?x1x128xf32>{%dim_35754}
    %38299 = torch_c.from_builtin_tensor %38298 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35755 = torch.constant.int 5
    %38300 = torch.prims.convert_element_type %38299, %int5_35755 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35756 = torch.constant.int 1
    %38301 = torch.aten.size.int %37932, %int1_35756 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35757 = torch.constant.int 0
    %38302 = torch.aten.add.int %int0_35757, %38301 : !torch.int, !torch.int -> !torch.int
    %int0_35758 = torch.constant.int 0
    %int0_35759 = torch.constant.int 0
    %int1_35760 = torch.constant.int 1
    %38303 = torch.aten.slice.Tensor %38255, %int0_35758, %int0_35759, %38302, %int1_35760 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38303, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35761 = torch.constant.int 1
    %int0_35762 = torch.constant.int 0
    %int9223372036854775807_35763 = torch.constant.int 9223372036854775807
    %int1_35764 = torch.constant.int 1
    %38304 = torch.aten.slice.Tensor %38303, %int1_35761, %int0_35762, %int9223372036854775807_35763, %int1_35764 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38304, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35765 = torch.constant.int 0
    %38305 = torch.aten.unsqueeze %38304, %int0_35765 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38305, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35766 = torch.constant.int 2
    %38306 = torch.aten.unsqueeze %38305, %int2_35766 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38306, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35767 = torch.constant.int 3
    %int0_35768 = torch.constant.int 0
    %int9223372036854775807_35769 = torch.constant.int 9223372036854775807
    %int1_35770 = torch.constant.int 1
    %38307 = torch.aten.slice.Tensor %38306, %int3_35767, %int0_35768, %int9223372036854775807_35769, %int1_35770 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38307, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38308 = torch_c.to_builtin_tensor %38048 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35771 = arith.constant 1 : index
    %dim_35772 = tensor.dim %38308, %c1_35771 : tensor<4x?x1x128xf16>
    %38309 = flow.tensor.bitcast %38308 : tensor<4x?x1x128xf16>{%dim_35772} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35772}
    %38310 = torch_c.from_builtin_tensor %38309 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38311 = torch.aten.mul.Tensor %38310, %38307 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38312 = torch_c.to_builtin_tensor %38311 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35773 = arith.constant 1 : index
    %dim_35774 = tensor.dim %38312, %c1_35773 : tensor<4x?x1x64xcomplex<f32>>
    %38313 = flow.tensor.bitcast %38312 : tensor<4x?x1x64xcomplex<f32>>{%dim_35774} -> tensor<4x?x1x128xf32>{%dim_35774}
    %38314 = torch_c.from_builtin_tensor %38313 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35775 = torch.constant.int 5
    %38315 = torch.prims.convert_element_type %38314, %int5_35775 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35776 = torch.constant.int 1
    %38316 = torch.aten.size.int %37938, %int1_35776 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35777 = torch.constant.int 0
    %38317 = torch.aten.add.int %int0_35777, %38316 : !torch.int, !torch.int -> !torch.int
    %int0_35778 = torch.constant.int 0
    %int0_35779 = torch.constant.int 0
    %int1_35780 = torch.constant.int 1
    %38318 = torch.aten.slice.Tensor %38258, %int0_35778, %int0_35779, %38317, %int1_35780 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38318, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35781 = torch.constant.int 1
    %int0_35782 = torch.constant.int 0
    %int9223372036854775807_35783 = torch.constant.int 9223372036854775807
    %int1_35784 = torch.constant.int 1
    %38319 = torch.aten.slice.Tensor %38318, %int1_35781, %int0_35782, %int9223372036854775807_35783, %int1_35784 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38319, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35785 = torch.constant.int 0
    %38320 = torch.aten.unsqueeze %38319, %int0_35785 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38320, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35786 = torch.constant.int 2
    %38321 = torch.aten.unsqueeze %38320, %int2_35786 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38321, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35787 = torch.constant.int 3
    %int0_35788 = torch.constant.int 0
    %int9223372036854775807_35789 = torch.constant.int 9223372036854775807
    %int1_35790 = torch.constant.int 1
    %38322 = torch.aten.slice.Tensor %38321, %int3_35787, %int0_35788, %int9223372036854775807_35789, %int1_35790 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38322, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38323 = torch_c.to_builtin_tensor %38050 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35791 = arith.constant 1 : index
    %dim_35792 = tensor.dim %38323, %c1_35791 : tensor<4x?x1x128xf16>
    %38324 = flow.tensor.bitcast %38323 : tensor<4x?x1x128xf16>{%dim_35792} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35792}
    %38325 = torch_c.from_builtin_tensor %38324 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38326 = torch.aten.mul.Tensor %38325, %38322 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38327 = torch_c.to_builtin_tensor %38326 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35793 = arith.constant 1 : index
    %dim_35794 = tensor.dim %38327, %c1_35793 : tensor<4x?x1x64xcomplex<f32>>
    %38328 = flow.tensor.bitcast %38327 : tensor<4x?x1x64xcomplex<f32>>{%dim_35794} -> tensor<4x?x1x128xf32>{%dim_35794}
    %38329 = torch_c.from_builtin_tensor %38328 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35795 = torch.constant.int 5
    %38330 = torch.prims.convert_element_type %38329, %int5_35795 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35796 = torch.constant.int 1
    %38331 = torch.aten.size.int %37944, %int1_35796 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35797 = torch.constant.int 0
    %38332 = torch.aten.add.int %int0_35797, %38331 : !torch.int, !torch.int -> !torch.int
    %int0_35798 = torch.constant.int 0
    %int0_35799 = torch.constant.int 0
    %int1_35800 = torch.constant.int 1
    %38333 = torch.aten.slice.Tensor %38261, %int0_35798, %int0_35799, %38332, %int1_35800 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38333, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35801 = torch.constant.int 1
    %int0_35802 = torch.constant.int 0
    %int9223372036854775807_35803 = torch.constant.int 9223372036854775807
    %int1_35804 = torch.constant.int 1
    %38334 = torch.aten.slice.Tensor %38333, %int1_35801, %int0_35802, %int9223372036854775807_35803, %int1_35804 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38334, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35805 = torch.constant.int 0
    %38335 = torch.aten.unsqueeze %38334, %int0_35805 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38335, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35806 = torch.constant.int 2
    %38336 = torch.aten.unsqueeze %38335, %int2_35806 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38336, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35807 = torch.constant.int 3
    %int0_35808 = torch.constant.int 0
    %int9223372036854775807_35809 = torch.constant.int 9223372036854775807
    %int1_35810 = torch.constant.int 1
    %38337 = torch.aten.slice.Tensor %38336, %int3_35807, %int0_35808, %int9223372036854775807_35809, %int1_35810 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38337, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38338 = torch_c.to_builtin_tensor %38052 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35811 = arith.constant 1 : index
    %dim_35812 = tensor.dim %38338, %c1_35811 : tensor<4x?x1x128xf16>
    %38339 = flow.tensor.bitcast %38338 : tensor<4x?x1x128xf16>{%dim_35812} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35812}
    %38340 = torch_c.from_builtin_tensor %38339 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38341 = torch.aten.mul.Tensor %38340, %38337 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38342 = torch_c.to_builtin_tensor %38341 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35813 = arith.constant 1 : index
    %dim_35814 = tensor.dim %38342, %c1_35813 : tensor<4x?x1x64xcomplex<f32>>
    %38343 = flow.tensor.bitcast %38342 : tensor<4x?x1x64xcomplex<f32>>{%dim_35814} -> tensor<4x?x1x128xf32>{%dim_35814}
    %38344 = torch_c.from_builtin_tensor %38343 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35815 = torch.constant.int 5
    %38345 = torch.prims.convert_element_type %38344, %int5_35815 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35816 = torch.constant.int 1
    %38346 = torch.aten.size.int %37950, %int1_35816 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35817 = torch.constant.int 0
    %38347 = torch.aten.add.int %int0_35817, %38346 : !torch.int, !torch.int -> !torch.int
    %int0_35818 = torch.constant.int 0
    %int0_35819 = torch.constant.int 0
    %int1_35820 = torch.constant.int 1
    %38348 = torch.aten.slice.Tensor %38264, %int0_35818, %int0_35819, %38347, %int1_35820 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38348, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35821 = torch.constant.int 1
    %int0_35822 = torch.constant.int 0
    %int9223372036854775807_35823 = torch.constant.int 9223372036854775807
    %int1_35824 = torch.constant.int 1
    %38349 = torch.aten.slice.Tensor %38348, %int1_35821, %int0_35822, %int9223372036854775807_35823, %int1_35824 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38349, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35825 = torch.constant.int 0
    %38350 = torch.aten.unsqueeze %38349, %int0_35825 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38350, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35826 = torch.constant.int 2
    %38351 = torch.aten.unsqueeze %38350, %int2_35826 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38351, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35827 = torch.constant.int 3
    %int0_35828 = torch.constant.int 0
    %int9223372036854775807_35829 = torch.constant.int 9223372036854775807
    %int1_35830 = torch.constant.int 1
    %38352 = torch.aten.slice.Tensor %38351, %int3_35827, %int0_35828, %int9223372036854775807_35829, %int1_35830 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38352, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38353 = torch_c.to_builtin_tensor %38054 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35831 = arith.constant 1 : index
    %dim_35832 = tensor.dim %38353, %c1_35831 : tensor<4x?x1x128xf16>
    %38354 = flow.tensor.bitcast %38353 : tensor<4x?x1x128xf16>{%dim_35832} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35832}
    %38355 = torch_c.from_builtin_tensor %38354 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38356 = torch.aten.mul.Tensor %38355, %38352 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38357 = torch_c.to_builtin_tensor %38356 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35833 = arith.constant 1 : index
    %dim_35834 = tensor.dim %38357, %c1_35833 : tensor<4x?x1x64xcomplex<f32>>
    %38358 = flow.tensor.bitcast %38357 : tensor<4x?x1x64xcomplex<f32>>{%dim_35834} -> tensor<4x?x1x128xf32>{%dim_35834}
    %38359 = torch_c.from_builtin_tensor %38358 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35835 = torch.constant.int 5
    %38360 = torch.prims.convert_element_type %38359, %int5_35835 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35836 = torch.constant.int 1
    %38361 = torch.aten.size.int %37956, %int1_35836 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35837 = torch.constant.int 0
    %38362 = torch.aten.add.int %int0_35837, %38361 : !torch.int, !torch.int -> !torch.int
    %int0_35838 = torch.constant.int 0
    %int0_35839 = torch.constant.int 0
    %int1_35840 = torch.constant.int 1
    %38363 = torch.aten.slice.Tensor %38267, %int0_35838, %int0_35839, %38362, %int1_35840 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38363, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35841 = torch.constant.int 1
    %int0_35842 = torch.constant.int 0
    %int9223372036854775807_35843 = torch.constant.int 9223372036854775807
    %int1_35844 = torch.constant.int 1
    %38364 = torch.aten.slice.Tensor %38363, %int1_35841, %int0_35842, %int9223372036854775807_35843, %int1_35844 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38364, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35845 = torch.constant.int 0
    %38365 = torch.aten.unsqueeze %38364, %int0_35845 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38365, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35846 = torch.constant.int 2
    %38366 = torch.aten.unsqueeze %38365, %int2_35846 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38366, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35847 = torch.constant.int 3
    %int0_35848 = torch.constant.int 0
    %int9223372036854775807_35849 = torch.constant.int 9223372036854775807
    %int1_35850 = torch.constant.int 1
    %38367 = torch.aten.slice.Tensor %38366, %int3_35847, %int0_35848, %int9223372036854775807_35849, %int1_35850 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38367, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38368 = torch_c.to_builtin_tensor %38056 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35851 = arith.constant 1 : index
    %dim_35852 = tensor.dim %38368, %c1_35851 : tensor<4x?x1x128xf16>
    %38369 = flow.tensor.bitcast %38368 : tensor<4x?x1x128xf16>{%dim_35852} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35852}
    %38370 = torch_c.from_builtin_tensor %38369 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38371 = torch.aten.mul.Tensor %38370, %38367 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38372 = torch_c.to_builtin_tensor %38371 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35853 = arith.constant 1 : index
    %dim_35854 = tensor.dim %38372, %c1_35853 : tensor<4x?x1x64xcomplex<f32>>
    %38373 = flow.tensor.bitcast %38372 : tensor<4x?x1x64xcomplex<f32>>{%dim_35854} -> tensor<4x?x1x128xf32>{%dim_35854}
    %38374 = torch_c.from_builtin_tensor %38373 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35855 = torch.constant.int 5
    %38375 = torch.prims.convert_element_type %38374, %int5_35855 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_35856 = torch.constant.int 1
    %38376 = torch.aten.size.int %37962, %int1_35856 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_35857 = torch.constant.int 0
    %38377 = torch.aten.add.int %int0_35857, %38376 : !torch.int, !torch.int -> !torch.int
    %int0_35858 = torch.constant.int 0
    %int0_35859 = torch.constant.int 0
    %int1_35860 = torch.constant.int 1
    %38378 = torch.aten.slice.Tensor %38270, %int0_35858, %int0_35859, %38377, %int1_35860 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38378, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_35861 = torch.constant.int 1
    %int0_35862 = torch.constant.int 0
    %int9223372036854775807_35863 = torch.constant.int 9223372036854775807
    %int1_35864 = torch.constant.int 1
    %38379 = torch.aten.slice.Tensor %38378, %int1_35861, %int0_35862, %int9223372036854775807_35863, %int1_35864 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %38379, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_35865 = torch.constant.int 0
    %38380 = torch.aten.unsqueeze %38379, %int0_35865 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %38380, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_35866 = torch.constant.int 2
    %38381 = torch.aten.unsqueeze %38380, %int2_35866 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38381, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_35867 = torch.constant.int 3
    %int0_35868 = torch.constant.int 0
    %int9223372036854775807_35869 = torch.constant.int 9223372036854775807
    %int1_35870 = torch.constant.int 1
    %38382 = torch.aten.slice.Tensor %38381, %int3_35867, %int0_35868, %int9223372036854775807_35869, %int1_35870 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38382, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %38383 = torch_c.to_builtin_tensor %38058 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_35871 = arith.constant 1 : index
    %dim_35872 = tensor.dim %38383, %c1_35871 : tensor<4x?x1x128xf16>
    %38384 = flow.tensor.bitcast %38383 : tensor<4x?x1x128xf16>{%dim_35872} -> tensor<4x?x1x64xcomplex<f16>>{%dim_35872}
    %38385 = torch_c.from_builtin_tensor %38384 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %38385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %38386 = torch.aten.mul.Tensor %38385, %38382 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %38386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %38387 = torch_c.to_builtin_tensor %38386 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_35873 = arith.constant 1 : index
    %dim_35874 = tensor.dim %38387, %c1_35873 : tensor<4x?x1x64xcomplex<f32>>
    %38388 = flow.tensor.bitcast %38387 : tensor<4x?x1x64xcomplex<f32>>{%dim_35874} -> tensor<4x?x1x128xf32>{%dim_35874}
    %38389 = torch_c.from_builtin_tensor %38388 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %38389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_35875 = torch.constant.int 5
    %38390 = torch.prims.convert_element_type %38389, %int5_35875 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %38390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_35876 = torch.constant.int 64
    %38391 = torch.aten.mul.Scalar %2364, %int64_35876 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38391, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35877 = torch.constant.int 64
    %38392 = torch.aten.mul.Scalar %2367, %int64_35877 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38392, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35878 = torch.constant.int 64
    %38393 = torch.aten.mul.Scalar %2370, %int64_35878 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38393, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35879 = torch.constant.int 64
    %38394 = torch.aten.mul.Scalar %2373, %int64_35879 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38394, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35880 = torch.constant.int 64
    %38395 = torch.aten.mul.Scalar %2376, %int64_35880 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38395, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35881 = torch.constant.int 64
    %38396 = torch.aten.mul.Scalar %2379, %int64_35881 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38396, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35882 = torch.constant.int 64
    %38397 = torch.aten.mul.Scalar %2382, %int64_35882 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38397, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_35883 = torch.constant.int 64
    %38398 = torch.aten.mul.Scalar %2385, %int64_35883 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38398, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38 = torch.constant.int 38
    %int1_35884 = torch.constant.int 1
    %38399 = torch.aten.add.Scalar %38391, %int38, %int1_35884 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38399, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35885 = torch.constant.int 38
    %int1_35886 = torch.constant.int 1
    %38400 = torch.aten.add.Scalar %38392, %int38_35885, %int1_35886 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38400, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35887 = torch.constant.int 38
    %int1_35888 = torch.constant.int 1
    %38401 = torch.aten.add.Scalar %38393, %int38_35887, %int1_35888 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38401, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35889 = torch.constant.int 38
    %int1_35890 = torch.constant.int 1
    %38402 = torch.aten.add.Scalar %38394, %int38_35889, %int1_35890 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38402, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35891 = torch.constant.int 38
    %int1_35892 = torch.constant.int 1
    %38403 = torch.aten.add.Scalar %38395, %int38_35891, %int1_35892 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38403, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35893 = torch.constant.int 38
    %int1_35894 = torch.constant.int 1
    %38404 = torch.aten.add.Scalar %38396, %int38_35893, %int1_35894 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38404, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35895 = torch.constant.int 38
    %int1_35896 = torch.constant.int 1
    %38405 = torch.aten.add.Scalar %38397, %int38_35895, %int1_35896 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38405, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int38_35897 = torch.constant.int 38
    %int1_35898 = torch.constant.int 1
    %38406 = torch.aten.add.Scalar %38398, %int38_35897, %int1_35898 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38406, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_35899 = torch.constant.int 4
    %int16_35900 = torch.constant.int 16
    %int1_35901 = torch.constant.int 1
    %int128_35902 = torch.constant.int 128
    %38407 = torch.prim.ListConstruct %int4_35899, %3095, %int16_35900, %int1_35901, %int128_35902 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38408 = torch.aten.view %38285, %38407 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38408, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35903 = torch.constant.int 4
    %int16_35904 = torch.constant.int 16
    %int1_35905 = torch.constant.int 1
    %int128_35906 = torch.constant.int 128
    %38409 = torch.prim.ListConstruct %int4_35903, %3095, %int16_35904, %int1_35905, %int128_35906 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38410 = torch.aten.view %38300, %38409 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38410, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35907 = torch.constant.int 4
    %int16_35908 = torch.constant.int 16
    %int1_35909 = torch.constant.int 1
    %int128_35910 = torch.constant.int 128
    %38411 = torch.prim.ListConstruct %int4_35907, %3095, %int16_35908, %int1_35909, %int128_35910 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38412 = torch.aten.view %38315, %38411 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38412, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35911 = torch.constant.int 4
    %int16_35912 = torch.constant.int 16
    %int1_35913 = torch.constant.int 1
    %int128_35914 = torch.constant.int 128
    %38413 = torch.prim.ListConstruct %int4_35911, %3095, %int16_35912, %int1_35913, %int128_35914 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38414 = torch.aten.view %38330, %38413 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38414, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35915 = torch.constant.int 4
    %int16_35916 = torch.constant.int 16
    %int1_35917 = torch.constant.int 1
    %int128_35918 = torch.constant.int 128
    %38415 = torch.prim.ListConstruct %int4_35915, %3095, %int16_35916, %int1_35917, %int128_35918 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38416 = torch.aten.view %38345, %38415 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38416, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35919 = torch.constant.int 4
    %int16_35920 = torch.constant.int 16
    %int1_35921 = torch.constant.int 1
    %int128_35922 = torch.constant.int 128
    %38417 = torch.prim.ListConstruct %int4_35919, %3095, %int16_35920, %int1_35921, %int128_35922 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38418 = torch.aten.view %38360, %38417 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38418, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35923 = torch.constant.int 4
    %int16_35924 = torch.constant.int 16
    %int1_35925 = torch.constant.int 1
    %int128_35926 = torch.constant.int 128
    %38419 = torch.prim.ListConstruct %int4_35923, %3095, %int16_35924, %int1_35925, %int128_35926 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38420 = torch.aten.view %38375, %38419 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38420, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35927 = torch.constant.int 4
    %int16_35928 = torch.constant.int 16
    %int1_35929 = torch.constant.int 1
    %int128_35930 = torch.constant.int 128
    %38421 = torch.prim.ListConstruct %int4_35927, %3095, %int16_35928, %int1_35929, %int128_35930 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38422 = torch.aten.view %38390, %38421 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38422, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35931 = torch.constant.int 4
    %38423 = torch.aten.mul.int %int4_35931, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35932 = torch.constant.int 16
    %int1_35933 = torch.constant.int 1
    %int128_35934 = torch.constant.int 128
    %38424 = torch.prim.ListConstruct %38423, %int16_35932, %int1_35933, %int128_35934 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38425 = torch.aten.view %38408, %38424 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38425, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35935 = torch.constant.int 4
    %38426 = torch.aten.mul.int %int4_35935, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35936 = torch.constant.int 16
    %int1_35937 = torch.constant.int 1
    %int128_35938 = torch.constant.int 128
    %38427 = torch.prim.ListConstruct %38426, %int16_35936, %int1_35937, %int128_35938 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38428 = torch.aten.view %38410, %38427 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38428, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35939 = torch.constant.int 4
    %38429 = torch.aten.mul.int %int4_35939, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35940 = torch.constant.int 16
    %int1_35941 = torch.constant.int 1
    %int128_35942 = torch.constant.int 128
    %38430 = torch.prim.ListConstruct %38429, %int16_35940, %int1_35941, %int128_35942 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38431 = torch.aten.view %38412, %38430 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38431, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35943 = torch.constant.int 4
    %38432 = torch.aten.mul.int %int4_35943, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35944 = torch.constant.int 16
    %int1_35945 = torch.constant.int 1
    %int128_35946 = torch.constant.int 128
    %38433 = torch.prim.ListConstruct %38432, %int16_35944, %int1_35945, %int128_35946 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38434 = torch.aten.view %38414, %38433 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38434, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35947 = torch.constant.int 4
    %38435 = torch.aten.mul.int %int4_35947, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35948 = torch.constant.int 16
    %int1_35949 = torch.constant.int 1
    %int128_35950 = torch.constant.int 128
    %38436 = torch.prim.ListConstruct %38435, %int16_35948, %int1_35949, %int128_35950 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38437 = torch.aten.view %38416, %38436 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38437, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35951 = torch.constant.int 4
    %38438 = torch.aten.mul.int %int4_35951, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35952 = torch.constant.int 16
    %int1_35953 = torch.constant.int 1
    %int128_35954 = torch.constant.int 128
    %38439 = torch.prim.ListConstruct %38438, %int16_35952, %int1_35953, %int128_35954 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38440 = torch.aten.view %38418, %38439 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38440, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35955 = torch.constant.int 4
    %38441 = torch.aten.mul.int %int4_35955, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35956 = torch.constant.int 16
    %int1_35957 = torch.constant.int 1
    %int128_35958 = torch.constant.int 128
    %38442 = torch.prim.ListConstruct %38441, %int16_35956, %int1_35957, %int128_35958 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38443 = torch.aten.view %38420, %38442 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38443, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35959 = torch.constant.int 4
    %38444 = torch.aten.mul.int %int4_35959, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_35960 = torch.constant.int 16
    %int1_35961 = torch.constant.int 1
    %int128_35962 = torch.constant.int 128
    %38445 = torch.prim.ListConstruct %38444, %int16_35960, %int1_35961, %int128_35962 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38446 = torch.aten.view %38422, %38445 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38446, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_35963 = torch.constant.int 4
    %38447 = torch.aten.mul.int %int4_35963, %3095 : !torch.int, !torch.int -> !torch.int
    %38448 = torch.prim.ListConstruct %38447 : (!torch.int) -> !torch.list<int>
    %38449 = torch.aten.view %38399, %38448 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38449, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35964 = torch.constant.int 4
    %38450 = torch.aten.mul.int %int4_35964, %3095 : !torch.int, !torch.int -> !torch.int
    %38451 = torch.prim.ListConstruct %38450 : (!torch.int) -> !torch.list<int>
    %38452 = torch.aten.view %38400, %38451 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38452, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35965 = torch.constant.int 4
    %38453 = torch.aten.mul.int %int4_35965, %3095 : !torch.int, !torch.int -> !torch.int
    %38454 = torch.prim.ListConstruct %38453 : (!torch.int) -> !torch.list<int>
    %38455 = torch.aten.view %38401, %38454 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38455, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35966 = torch.constant.int 4
    %38456 = torch.aten.mul.int %int4_35966, %3095 : !torch.int, !torch.int -> !torch.int
    %38457 = torch.prim.ListConstruct %38456 : (!torch.int) -> !torch.list<int>
    %38458 = torch.aten.view %38402, %38457 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38458, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35967 = torch.constant.int 4
    %38459 = torch.aten.mul.int %int4_35967, %3095 : !torch.int, !torch.int -> !torch.int
    %38460 = torch.prim.ListConstruct %38459 : (!torch.int) -> !torch.list<int>
    %38461 = torch.aten.view %38403, %38460 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38461, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35968 = torch.constant.int 4
    %38462 = torch.aten.mul.int %int4_35968, %3095 : !torch.int, !torch.int -> !torch.int
    %38463 = torch.prim.ListConstruct %38462 : (!torch.int) -> !torch.list<int>
    %38464 = torch.aten.view %38404, %38463 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38464, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35969 = torch.constant.int 4
    %38465 = torch.aten.mul.int %int4_35969, %3095 : !torch.int, !torch.int -> !torch.int
    %38466 = torch.prim.ListConstruct %38465 : (!torch.int) -> !torch.list<int>
    %38467 = torch.aten.view %38405, %38466 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38467, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35970 = torch.constant.int 4
    %38468 = torch.aten.mul.int %int4_35970, %3095 : !torch.int, !torch.int -> !torch.int
    %38469 = torch.prim.ListConstruct %38468 : (!torch.int) -> !torch.list<int>
    %38470 = torch.aten.view %38406, %38469 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38470, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_35971 = torch.constant.int 4
    %int16_35972 = torch.constant.int 16
    %int1_35973 = torch.constant.int 1
    %int128_35974 = torch.constant.int 128
    %38471 = torch.prim.ListConstruct %int4_35971, %3095, %int16_35972, %int1_35973, %int128_35974 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38472 = torch.aten.view %38060, %38471 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38472, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35975 = torch.constant.int 4
    %int16_35976 = torch.constant.int 16
    %int1_35977 = torch.constant.int 1
    %int128_35978 = torch.constant.int 128
    %38473 = torch.prim.ListConstruct %int4_35975, %3095, %int16_35976, %int1_35977, %int128_35978 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38474 = torch.aten.view %38062, %38473 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38474, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35979 = torch.constant.int 4
    %int16_35980 = torch.constant.int 16
    %int1_35981 = torch.constant.int 1
    %int128_35982 = torch.constant.int 128
    %38475 = torch.prim.ListConstruct %int4_35979, %3095, %int16_35980, %int1_35981, %int128_35982 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38476 = torch.aten.view %38064, %38475 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38476, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35983 = torch.constant.int 4
    %int16_35984 = torch.constant.int 16
    %int1_35985 = torch.constant.int 1
    %int128_35986 = torch.constant.int 128
    %38477 = torch.prim.ListConstruct %int4_35983, %3095, %int16_35984, %int1_35985, %int128_35986 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38478 = torch.aten.view %38066, %38477 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38478, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35987 = torch.constant.int 4
    %int16_35988 = torch.constant.int 16
    %int1_35989 = torch.constant.int 1
    %int128_35990 = torch.constant.int 128
    %38479 = torch.prim.ListConstruct %int4_35987, %3095, %int16_35988, %int1_35989, %int128_35990 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38480 = torch.aten.view %38068, %38479 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38480, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35991 = torch.constant.int 4
    %int16_35992 = torch.constant.int 16
    %int1_35993 = torch.constant.int 1
    %int128_35994 = torch.constant.int 128
    %38481 = torch.prim.ListConstruct %int4_35991, %3095, %int16_35992, %int1_35993, %int128_35994 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38482 = torch.aten.view %38070, %38481 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38482, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35995 = torch.constant.int 4
    %int16_35996 = torch.constant.int 16
    %int1_35997 = torch.constant.int 1
    %int128_35998 = torch.constant.int 128
    %38483 = torch.prim.ListConstruct %int4_35995, %3095, %int16_35996, %int1_35997, %int128_35998 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38484 = torch.aten.view %38072, %38483 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38484, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_35999 = torch.constant.int 4
    %int16_36000 = torch.constant.int 16
    %int1_36001 = torch.constant.int 1
    %int128_36002 = torch.constant.int 128
    %38485 = torch.prim.ListConstruct %int4_35999, %3095, %int16_36000, %int1_36001, %int128_36002 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38486 = torch.aten.view %38074, %38485 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %38486, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_36003 = torch.constant.int 4
    %38487 = torch.aten.mul.int %int4_36003, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36004 = torch.constant.int 16
    %int1_36005 = torch.constant.int 1
    %int128_36006 = torch.constant.int 128
    %38488 = torch.prim.ListConstruct %38487, %int16_36004, %int1_36005, %int128_36006 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38489 = torch.aten.view %38472, %38488 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38489, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36007 = torch.constant.int 4
    %38490 = torch.aten.mul.int %int4_36007, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36008 = torch.constant.int 16
    %int1_36009 = torch.constant.int 1
    %int128_36010 = torch.constant.int 128
    %38491 = torch.prim.ListConstruct %38490, %int16_36008, %int1_36009, %int128_36010 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38492 = torch.aten.view %38474, %38491 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38492, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36011 = torch.constant.int 4
    %38493 = torch.aten.mul.int %int4_36011, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36012 = torch.constant.int 16
    %int1_36013 = torch.constant.int 1
    %int128_36014 = torch.constant.int 128
    %38494 = torch.prim.ListConstruct %38493, %int16_36012, %int1_36013, %int128_36014 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38495 = torch.aten.view %38476, %38494 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38495, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36015 = torch.constant.int 4
    %38496 = torch.aten.mul.int %int4_36015, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36016 = torch.constant.int 16
    %int1_36017 = torch.constant.int 1
    %int128_36018 = torch.constant.int 128
    %38497 = torch.prim.ListConstruct %38496, %int16_36016, %int1_36017, %int128_36018 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38498 = torch.aten.view %38478, %38497 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38498, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36019 = torch.constant.int 4
    %38499 = torch.aten.mul.int %int4_36019, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36020 = torch.constant.int 16
    %int1_36021 = torch.constant.int 1
    %int128_36022 = torch.constant.int 128
    %38500 = torch.prim.ListConstruct %38499, %int16_36020, %int1_36021, %int128_36022 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38501 = torch.aten.view %38480, %38500 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38501, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36023 = torch.constant.int 4
    %38502 = torch.aten.mul.int %int4_36023, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36024 = torch.constant.int 16
    %int1_36025 = torch.constant.int 1
    %int128_36026 = torch.constant.int 128
    %38503 = torch.prim.ListConstruct %38502, %int16_36024, %int1_36025, %int128_36026 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38504 = torch.aten.view %38482, %38503 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38504, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36027 = torch.constant.int 4
    %38505 = torch.aten.mul.int %int4_36027, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36028 = torch.constant.int 16
    %int1_36029 = torch.constant.int 1
    %int128_36030 = torch.constant.int 128
    %38506 = torch.prim.ListConstruct %38505, %int16_36028, %int1_36029, %int128_36030 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38507 = torch.aten.view %38484, %38506 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38507, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_36031 = torch.constant.int 4
    %38508 = torch.aten.mul.int %int4_36031, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_36032 = torch.constant.int 16
    %int1_36033 = torch.constant.int 1
    %int128_36034 = torch.constant.int 128
    %38509 = torch.prim.ListConstruct %38508, %int16_36032, %int1_36033, %int128_36034 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38510 = torch.aten.view %38486, %38509 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38510, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_36035 = torch.constant.int 1
    %int1_36036 = torch.constant.int 1
    %38511 = torch.aten.add.Scalar %38399, %int1_36035, %int1_36036 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38511, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36037 = torch.constant.int 1
    %int1_36038 = torch.constant.int 1
    %38512 = torch.aten.add.Scalar %38400, %int1_36037, %int1_36038 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38512, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36039 = torch.constant.int 1
    %int1_36040 = torch.constant.int 1
    %38513 = torch.aten.add.Scalar %38401, %int1_36039, %int1_36040 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38513, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36041 = torch.constant.int 1
    %int1_36042 = torch.constant.int 1
    %38514 = torch.aten.add.Scalar %38402, %int1_36041, %int1_36042 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38514, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36043 = torch.constant.int 1
    %int1_36044 = torch.constant.int 1
    %38515 = torch.aten.add.Scalar %38403, %int1_36043, %int1_36044 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38515, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36045 = torch.constant.int 1
    %int1_36046 = torch.constant.int 1
    %38516 = torch.aten.add.Scalar %38404, %int1_36045, %int1_36046 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38516, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36047 = torch.constant.int 1
    %int1_36048 = torch.constant.int 1
    %38517 = torch.aten.add.Scalar %38405, %int1_36047, %int1_36048 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38517, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_36049 = torch.constant.int 1
    %int1_36050 = torch.constant.int 1
    %38518 = torch.aten.add.Scalar %38406, %int1_36049, %int1_36050 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %38518, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_36051 = torch.constant.int 4
    %38519 = torch.aten.mul.int %int4_36051, %3095 : !torch.int, !torch.int -> !torch.int
    %38520 = torch.prim.ListConstruct %38519 : (!torch.int) -> !torch.list<int>
    %38521 = torch.aten.view %38511, %38520 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38521, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36052 = torch.constant.int 4
    %38522 = torch.aten.mul.int %int4_36052, %3095 : !torch.int, !torch.int -> !torch.int
    %38523 = torch.prim.ListConstruct %38522 : (!torch.int) -> !torch.list<int>
    %38524 = torch.aten.view %38512, %38523 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38524, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36053 = torch.constant.int 4
    %38525 = torch.aten.mul.int %int4_36053, %3095 : !torch.int, !torch.int -> !torch.int
    %38526 = torch.prim.ListConstruct %38525 : (!torch.int) -> !torch.list<int>
    %38527 = torch.aten.view %38513, %38526 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38527, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36054 = torch.constant.int 4
    %38528 = torch.aten.mul.int %int4_36054, %3095 : !torch.int, !torch.int -> !torch.int
    %38529 = torch.prim.ListConstruct %38528 : (!torch.int) -> !torch.list<int>
    %38530 = torch.aten.view %38514, %38529 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38530, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36055 = torch.constant.int 4
    %38531 = torch.aten.mul.int %int4_36055, %3095 : !torch.int, !torch.int -> !torch.int
    %38532 = torch.prim.ListConstruct %38531 : (!torch.int) -> !torch.list<int>
    %38533 = torch.aten.view %38515, %38532 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38533, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36056 = torch.constant.int 4
    %38534 = torch.aten.mul.int %int4_36056, %3095 : !torch.int, !torch.int -> !torch.int
    %38535 = torch.prim.ListConstruct %38534 : (!torch.int) -> !torch.list<int>
    %38536 = torch.aten.view %38516, %38535 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38536, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36057 = torch.constant.int 4
    %38537 = torch.aten.mul.int %int4_36057, %3095 : !torch.int, !torch.int -> !torch.int
    %38538 = torch.prim.ListConstruct %38537 : (!torch.int) -> !torch.list<int>
    %38539 = torch.aten.view %38517, %38538 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38539, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_36058 = torch.constant.int 4
    %38540 = torch.aten.mul.int %int4_36058, %3095 : !torch.int, !torch.int -> !torch.int
    %38541 = torch.prim.ListConstruct %38540 : (!torch.int) -> !torch.list<int>
    %38542 = torch.aten.view %38518, %38541 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38542, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %38543 = torch.prim.ListConstruct %38449, %38521 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36059 = torch.constant.int 0
    %38544 = torch.aten.cat %38543, %int0_36059 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38544, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38545 = torch.prim.ListConstruct %38452, %38524 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36060 = torch.constant.int 0
    %38546 = torch.aten.cat %38545, %int0_36060 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38546, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38547 = torch.prim.ListConstruct %38455, %38527 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36061 = torch.constant.int 0
    %38548 = torch.aten.cat %38547, %int0_36061 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38548, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38549 = torch.prim.ListConstruct %38458, %38530 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36062 = torch.constant.int 0
    %38550 = torch.aten.cat %38549, %int0_36062 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38550, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38551 = torch.prim.ListConstruct %38461, %38533 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36063 = torch.constant.int 0
    %38552 = torch.aten.cat %38551, %int0_36063 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38552, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38553 = torch.prim.ListConstruct %38464, %38536 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36064 = torch.constant.int 0
    %38554 = torch.aten.cat %38553, %int0_36064 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38554, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38555 = torch.prim.ListConstruct %38467, %38539 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36065 = torch.constant.int 0
    %38556 = torch.aten.cat %38555, %int0_36065 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38556, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38557 = torch.prim.ListConstruct %38470, %38542 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_36066 = torch.constant.int 0
    %38558 = torch.aten.cat %38557, %int0_36066 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %38558, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %38559 = torch.prim.ListConstruct %38425, %38489 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36067 = torch.constant.int 0
    %38560 = torch.aten.cat %38559, %int0_36067 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38560, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38561 = torch.prim.ListConstruct %38428, %38492 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36068 = torch.constant.int 0
    %38562 = torch.aten.cat %38561, %int0_36068 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38562, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38563 = torch.prim.ListConstruct %38431, %38495 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36069 = torch.constant.int 0
    %38564 = torch.aten.cat %38563, %int0_36069 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38564, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38565 = torch.prim.ListConstruct %38434, %38498 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36070 = torch.constant.int 0
    %38566 = torch.aten.cat %38565, %int0_36070 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38566, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38567 = torch.prim.ListConstruct %38437, %38501 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36071 = torch.constant.int 0
    %38568 = torch.aten.cat %38567, %int0_36071 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38568, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38569 = torch.prim.ListConstruct %38440, %38504 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36072 = torch.constant.int 0
    %38570 = torch.aten.cat %38569, %int0_36072 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38570, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38571 = torch.prim.ListConstruct %38443, %38507 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36073 = torch.constant.int 0
    %38572 = torch.aten.cat %38571, %int0_36073 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38572, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38573 = torch.prim.ListConstruct %38446, %38510 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_36074 = torch.constant.int 0
    %38574 = torch.aten.cat %38573, %int0_36074 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38574, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36075 = torch.constant.int 32
    %int2_36076 = torch.constant.int 2
    %int16_36077 = torch.constant.int 16
    %int1_36078 = torch.constant.int 1
    %int128_36079 = torch.constant.int 128
    %38575 = torch.prim.ListConstruct %3023, %int32_36075, %int2_36076, %int16_36077, %int1_36078, %int128_36079 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38576 = torch.aten.view %36725, %38575 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38576, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36080 = torch.constant.int 32
    %38577 = torch.aten.mul.int %3023, %int32_36080 : !torch.int, !torch.int -> !torch.int
    %int2_36081 = torch.constant.int 2
    %38578 = torch.aten.mul.int %38577, %int2_36081 : !torch.int, !torch.int -> !torch.int
    %int16_36082 = torch.constant.int 16
    %int1_36083 = torch.constant.int 1
    %int128_36084 = torch.constant.int 128
    %38579 = torch.prim.ListConstruct %38578, %int16_36082, %int1_36083, %int128_36084 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38580 = torch.aten.view %38576, %38579 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38580, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38581 = torch.prim.ListConstruct %38544 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36085 = torch.constant.bool false
    %38582 = torch.aten.index_put %38580, %38581, %38560, %false_36085 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38582, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36086 = torch.constant.int 32
    %int2_36087 = torch.constant.int 2
    %int16_36088 = torch.constant.int 16
    %int1_36089 = torch.constant.int 1
    %int128_36090 = torch.constant.int 128
    %38583 = torch.prim.ListConstruct %3023, %int32_36086, %int2_36087, %int16_36088, %int1_36089, %int128_36090 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38584 = torch.aten.view %38582, %38583 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38584, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36091 = torch.constant.int 131072
    %38585 = torch.prim.ListConstruct %3023, %int131072_36091 : (!torch.int, !torch.int) -> !torch.list<int>
    %38586 = torch.aten.view %38584, %38585 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38586, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36092 = torch.constant.int 32
    %int2_36093 = torch.constant.int 2
    %int16_36094 = torch.constant.int 16
    %int1_36095 = torch.constant.int 1
    %int128_36096 = torch.constant.int 128
    %38587 = torch.prim.ListConstruct %3026, %int32_36092, %int2_36093, %int16_36094, %int1_36095, %int128_36096 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38588 = torch.aten.view %36737, %38587 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38588, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36097 = torch.constant.int 32
    %38589 = torch.aten.mul.int %3026, %int32_36097 : !torch.int, !torch.int -> !torch.int
    %int2_36098 = torch.constant.int 2
    %38590 = torch.aten.mul.int %38589, %int2_36098 : !torch.int, !torch.int -> !torch.int
    %int16_36099 = torch.constant.int 16
    %int1_36100 = torch.constant.int 1
    %int128_36101 = torch.constant.int 128
    %38591 = torch.prim.ListConstruct %38590, %int16_36099, %int1_36100, %int128_36101 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38592 = torch.aten.view %38588, %38591 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38592, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38593 = torch.prim.ListConstruct %38546 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36102 = torch.constant.bool false
    %38594 = torch.aten.index_put %38592, %38593, %38562, %false_36102 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38594, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36103 = torch.constant.int 32
    %int2_36104 = torch.constant.int 2
    %int16_36105 = torch.constant.int 16
    %int1_36106 = torch.constant.int 1
    %int128_36107 = torch.constant.int 128
    %38595 = torch.prim.ListConstruct %3026, %int32_36103, %int2_36104, %int16_36105, %int1_36106, %int128_36107 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38596 = torch.aten.view %38594, %38595 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38596, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36108 = torch.constant.int 131072
    %38597 = torch.prim.ListConstruct %3026, %int131072_36108 : (!torch.int, !torch.int) -> !torch.list<int>
    %38598 = torch.aten.view %38596, %38597 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38598, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36109 = torch.constant.int 32
    %int2_36110 = torch.constant.int 2
    %int16_36111 = torch.constant.int 16
    %int1_36112 = torch.constant.int 1
    %int128_36113 = torch.constant.int 128
    %38599 = torch.prim.ListConstruct %3029, %int32_36109, %int2_36110, %int16_36111, %int1_36112, %int128_36113 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38600 = torch.aten.view %36749, %38599 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38600, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36114 = torch.constant.int 32
    %38601 = torch.aten.mul.int %3029, %int32_36114 : !torch.int, !torch.int -> !torch.int
    %int2_36115 = torch.constant.int 2
    %38602 = torch.aten.mul.int %38601, %int2_36115 : !torch.int, !torch.int -> !torch.int
    %int16_36116 = torch.constant.int 16
    %int1_36117 = torch.constant.int 1
    %int128_36118 = torch.constant.int 128
    %38603 = torch.prim.ListConstruct %38602, %int16_36116, %int1_36117, %int128_36118 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38604 = torch.aten.view %38600, %38603 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38604, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38605 = torch.prim.ListConstruct %38548 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36119 = torch.constant.bool false
    %38606 = torch.aten.index_put %38604, %38605, %38564, %false_36119 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38606, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36120 = torch.constant.int 32
    %int2_36121 = torch.constant.int 2
    %int16_36122 = torch.constant.int 16
    %int1_36123 = torch.constant.int 1
    %int128_36124 = torch.constant.int 128
    %38607 = torch.prim.ListConstruct %3029, %int32_36120, %int2_36121, %int16_36122, %int1_36123, %int128_36124 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38608 = torch.aten.view %38606, %38607 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38608, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36125 = torch.constant.int 131072
    %38609 = torch.prim.ListConstruct %3029, %int131072_36125 : (!torch.int, !torch.int) -> !torch.list<int>
    %38610 = torch.aten.view %38608, %38609 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38610, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36126 = torch.constant.int 32
    %int2_36127 = torch.constant.int 2
    %int16_36128 = torch.constant.int 16
    %int1_36129 = torch.constant.int 1
    %int128_36130 = torch.constant.int 128
    %38611 = torch.prim.ListConstruct %3032, %int32_36126, %int2_36127, %int16_36128, %int1_36129, %int128_36130 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38612 = torch.aten.view %36761, %38611 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38612, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36131 = torch.constant.int 32
    %38613 = torch.aten.mul.int %3032, %int32_36131 : !torch.int, !torch.int -> !torch.int
    %int2_36132 = torch.constant.int 2
    %38614 = torch.aten.mul.int %38613, %int2_36132 : !torch.int, !torch.int -> !torch.int
    %int16_36133 = torch.constant.int 16
    %int1_36134 = torch.constant.int 1
    %int128_36135 = torch.constant.int 128
    %38615 = torch.prim.ListConstruct %38614, %int16_36133, %int1_36134, %int128_36135 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38616 = torch.aten.view %38612, %38615 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38616, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38617 = torch.prim.ListConstruct %38550 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36136 = torch.constant.bool false
    %38618 = torch.aten.index_put %38616, %38617, %38566, %false_36136 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38618, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36137 = torch.constant.int 32
    %int2_36138 = torch.constant.int 2
    %int16_36139 = torch.constant.int 16
    %int1_36140 = torch.constant.int 1
    %int128_36141 = torch.constant.int 128
    %38619 = torch.prim.ListConstruct %3032, %int32_36137, %int2_36138, %int16_36139, %int1_36140, %int128_36141 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38620 = torch.aten.view %38618, %38619 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38620, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36142 = torch.constant.int 131072
    %38621 = torch.prim.ListConstruct %3032, %int131072_36142 : (!torch.int, !torch.int) -> !torch.list<int>
    %38622 = torch.aten.view %38620, %38621 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38622, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36143 = torch.constant.int 32
    %int2_36144 = torch.constant.int 2
    %int16_36145 = torch.constant.int 16
    %int1_36146 = torch.constant.int 1
    %int128_36147 = torch.constant.int 128
    %38623 = torch.prim.ListConstruct %3035, %int32_36143, %int2_36144, %int16_36145, %int1_36146, %int128_36147 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38624 = torch.aten.view %36773, %38623 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38624, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36148 = torch.constant.int 32
    %38625 = torch.aten.mul.int %3035, %int32_36148 : !torch.int, !torch.int -> !torch.int
    %int2_36149 = torch.constant.int 2
    %38626 = torch.aten.mul.int %38625, %int2_36149 : !torch.int, !torch.int -> !torch.int
    %int16_36150 = torch.constant.int 16
    %int1_36151 = torch.constant.int 1
    %int128_36152 = torch.constant.int 128
    %38627 = torch.prim.ListConstruct %38626, %int16_36150, %int1_36151, %int128_36152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38628 = torch.aten.view %38624, %38627 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38628, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38629 = torch.prim.ListConstruct %38552 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36153 = torch.constant.bool false
    %38630 = torch.aten.index_put %38628, %38629, %38568, %false_36153 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38630, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36154 = torch.constant.int 32
    %int2_36155 = torch.constant.int 2
    %int16_36156 = torch.constant.int 16
    %int1_36157 = torch.constant.int 1
    %int128_36158 = torch.constant.int 128
    %38631 = torch.prim.ListConstruct %3035, %int32_36154, %int2_36155, %int16_36156, %int1_36157, %int128_36158 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38632 = torch.aten.view %38630, %38631 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38632, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36159 = torch.constant.int 131072
    %38633 = torch.prim.ListConstruct %3035, %int131072_36159 : (!torch.int, !torch.int) -> !torch.list<int>
    %38634 = torch.aten.view %38632, %38633 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38634, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36160 = torch.constant.int 32
    %int2_36161 = torch.constant.int 2
    %int16_36162 = torch.constant.int 16
    %int1_36163 = torch.constant.int 1
    %int128_36164 = torch.constant.int 128
    %38635 = torch.prim.ListConstruct %3038, %int32_36160, %int2_36161, %int16_36162, %int1_36163, %int128_36164 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38636 = torch.aten.view %36785, %38635 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38636, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36165 = torch.constant.int 32
    %38637 = torch.aten.mul.int %3038, %int32_36165 : !torch.int, !torch.int -> !torch.int
    %int2_36166 = torch.constant.int 2
    %38638 = torch.aten.mul.int %38637, %int2_36166 : !torch.int, !torch.int -> !torch.int
    %int16_36167 = torch.constant.int 16
    %int1_36168 = torch.constant.int 1
    %int128_36169 = torch.constant.int 128
    %38639 = torch.prim.ListConstruct %38638, %int16_36167, %int1_36168, %int128_36169 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38640 = torch.aten.view %38636, %38639 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38640, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38641 = torch.prim.ListConstruct %38554 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36170 = torch.constant.bool false
    %38642 = torch.aten.index_put %38640, %38641, %38570, %false_36170 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38642, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36171 = torch.constant.int 32
    %int2_36172 = torch.constant.int 2
    %int16_36173 = torch.constant.int 16
    %int1_36174 = torch.constant.int 1
    %int128_36175 = torch.constant.int 128
    %38643 = torch.prim.ListConstruct %3038, %int32_36171, %int2_36172, %int16_36173, %int1_36174, %int128_36175 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38644 = torch.aten.view %38642, %38643 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38644, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36176 = torch.constant.int 131072
    %38645 = torch.prim.ListConstruct %3038, %int131072_36176 : (!torch.int, !torch.int) -> !torch.list<int>
    %38646 = torch.aten.view %38644, %38645 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38646, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36177 = torch.constant.int 32
    %int2_36178 = torch.constant.int 2
    %int16_36179 = torch.constant.int 16
    %int1_36180 = torch.constant.int 1
    %int128_36181 = torch.constant.int 128
    %38647 = torch.prim.ListConstruct %3041, %int32_36177, %int2_36178, %int16_36179, %int1_36180, %int128_36181 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38648 = torch.aten.view %36797, %38647 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38648, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36182 = torch.constant.int 32
    %38649 = torch.aten.mul.int %3041, %int32_36182 : !torch.int, !torch.int -> !torch.int
    %int2_36183 = torch.constant.int 2
    %38650 = torch.aten.mul.int %38649, %int2_36183 : !torch.int, !torch.int -> !torch.int
    %int16_36184 = torch.constant.int 16
    %int1_36185 = torch.constant.int 1
    %int128_36186 = torch.constant.int 128
    %38651 = torch.prim.ListConstruct %38650, %int16_36184, %int1_36185, %int128_36186 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38652 = torch.aten.view %38648, %38651 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38652, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38653 = torch.prim.ListConstruct %38556 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36187 = torch.constant.bool false
    %38654 = torch.aten.index_put %38652, %38653, %38572, %false_36187 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38654, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36188 = torch.constant.int 32
    %int2_36189 = torch.constant.int 2
    %int16_36190 = torch.constant.int 16
    %int1_36191 = torch.constant.int 1
    %int128_36192 = torch.constant.int 128
    %38655 = torch.prim.ListConstruct %3041, %int32_36188, %int2_36189, %int16_36190, %int1_36191, %int128_36192 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38656 = torch.aten.view %38654, %38655 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38656, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36193 = torch.constant.int 131072
    %38657 = torch.prim.ListConstruct %3041, %int131072_36193 : (!torch.int, !torch.int) -> !torch.list<int>
    %38658 = torch.aten.view %38656, %38657 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38658, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_36194 = torch.constant.int 32
    %int2_36195 = torch.constant.int 2
    %int16_36196 = torch.constant.int 16
    %int1_36197 = torch.constant.int 1
    %int128_36198 = torch.constant.int 128
    %38659 = torch.prim.ListConstruct %3044, %int32_36194, %int2_36195, %int16_36196, %int1_36197, %int128_36198 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38660 = torch.aten.view %36809, %38659 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38660, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_36199 = torch.constant.int 32
    %38661 = torch.aten.mul.int %3044, %int32_36199 : !torch.int, !torch.int -> !torch.int
    %int2_36200 = torch.constant.int 2
    %38662 = torch.aten.mul.int %38661, %int2_36200 : !torch.int, !torch.int -> !torch.int
    %int16_36201 = torch.constant.int 16
    %int1_36202 = torch.constant.int 1
    %int128_36203 = torch.constant.int 128
    %38663 = torch.prim.ListConstruct %38662, %int16_36201, %int1_36202, %int128_36203 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38664 = torch.aten.view %38660, %38663 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38664, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %38665 = torch.prim.ListConstruct %38558 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_36204 = torch.constant.bool false
    %38666 = torch.aten.index_put %38664, %38665, %38574, %false_36204 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %38666, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_36205 = torch.constant.int 32
    %int2_36206 = torch.constant.int 2
    %int16_36207 = torch.constant.int 16
    %int1_36208 = torch.constant.int 1
    %int128_36209 = torch.constant.int 128
    %38667 = torch.prim.ListConstruct %3044, %int32_36205, %int2_36206, %int16_36207, %int1_36208, %int128_36209 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38668 = torch.aten.view %38666, %38667 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %38668, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_36210 = torch.constant.int 131072
    %38669 = torch.prim.ListConstruct %3044, %int131072_36210 : (!torch.int, !torch.int) -> !torch.list<int>
    %38670 = torch.aten.view %38668, %38669 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %38670, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_36211 = torch.constant.int -2
    %38671 = torch.aten.unsqueeze %38285, %int-2_36211 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36212 = torch.constant.int -2
    %38672 = torch.aten.unsqueeze %38300, %int-2_36212 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36213 = torch.constant.int -2
    %38673 = torch.aten.unsqueeze %38315, %int-2_36213 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36214 = torch.constant.int -2
    %38674 = torch.aten.unsqueeze %38330, %int-2_36214 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36215 = torch.constant.int -2
    %38675 = torch.aten.unsqueeze %38345, %int-2_36215 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36216 = torch.constant.int -2
    %38676 = torch.aten.unsqueeze %38360, %int-2_36216 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36217 = torch.constant.int -2
    %38677 = torch.aten.unsqueeze %38375, %int-2_36217 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36218 = torch.constant.int -2
    %38678 = torch.aten.unsqueeze %38390, %int-2_36218 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_36219 = torch.constant.int 4
    %int1_36220 = torch.constant.int 1
    %int4_36221 = torch.constant.int 4
    %int128_36222 = torch.constant.int 128
    %38679 = torch.prim.ListConstruct %int4_36219, %38271, %int1_36220, %int4_36221, %int128_36222 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36223 = torch.constant.bool false
    %38680 = torch.aten.expand %38671, %38679, %false_36223 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36224 = torch.constant.int 4
    %int1_36225 = torch.constant.int 1
    %int4_36226 = torch.constant.int 4
    %int128_36227 = torch.constant.int 128
    %38681 = torch.prim.ListConstruct %int4_36224, %38271, %int1_36225, %int4_36226, %int128_36227 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36228 = torch.constant.bool false
    %38682 = torch.aten.expand %38672, %38681, %false_36228 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36229 = torch.constant.int 4
    %int1_36230 = torch.constant.int 1
    %int4_36231 = torch.constant.int 4
    %int128_36232 = torch.constant.int 128
    %38683 = torch.prim.ListConstruct %int4_36229, %38271, %int1_36230, %int4_36231, %int128_36232 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36233 = torch.constant.bool false
    %38684 = torch.aten.expand %38673, %38683, %false_36233 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36234 = torch.constant.int 4
    %int1_36235 = torch.constant.int 1
    %int4_36236 = torch.constant.int 4
    %int128_36237 = torch.constant.int 128
    %38685 = torch.prim.ListConstruct %int4_36234, %38271, %int1_36235, %int4_36236, %int128_36237 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36238 = torch.constant.bool false
    %38686 = torch.aten.expand %38674, %38685, %false_36238 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36239 = torch.constant.int 4
    %int1_36240 = torch.constant.int 1
    %int4_36241 = torch.constant.int 4
    %int128_36242 = torch.constant.int 128
    %38687 = torch.prim.ListConstruct %int4_36239, %38271, %int1_36240, %int4_36241, %int128_36242 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36243 = torch.constant.bool false
    %38688 = torch.aten.expand %38675, %38687, %false_36243 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36244 = torch.constant.int 4
    %int1_36245 = torch.constant.int 1
    %int4_36246 = torch.constant.int 4
    %int128_36247 = torch.constant.int 128
    %38689 = torch.prim.ListConstruct %int4_36244, %38271, %int1_36245, %int4_36246, %int128_36247 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36248 = torch.constant.bool false
    %38690 = torch.aten.expand %38676, %38689, %false_36248 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36249 = torch.constant.int 4
    %int1_36250 = torch.constant.int 1
    %int4_36251 = torch.constant.int 4
    %int128_36252 = torch.constant.int 128
    %38691 = torch.prim.ListConstruct %int4_36249, %38271, %int1_36250, %int4_36251, %int128_36252 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36253 = torch.constant.bool false
    %38692 = torch.aten.expand %38677, %38691, %false_36253 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36254 = torch.constant.int 4
    %int1_36255 = torch.constant.int 1
    %int4_36256 = torch.constant.int 4
    %int128_36257 = torch.constant.int 128
    %38693 = torch.prim.ListConstruct %int4_36254, %38271, %int1_36255, %int4_36256, %int128_36257 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36258 = torch.constant.bool false
    %38694 = torch.aten.expand %38678, %38693, %false_36258 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36259 = torch.constant.int 4
    %int4_36260 = torch.constant.int 4
    %int128_36261 = torch.constant.int 128
    %38695 = torch.prim.ListConstruct %int4_36259, %38271, %int4_36260, %int128_36261 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38696 = torch.aten.view %38680, %38695 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36262 = torch.constant.int 4
    %int4_36263 = torch.constant.int 4
    %int128_36264 = torch.constant.int 128
    %38697 = torch.prim.ListConstruct %int4_36262, %38271, %int4_36263, %int128_36264 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38698 = torch.aten.view %38682, %38697 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36265 = torch.constant.int 4
    %int4_36266 = torch.constant.int 4
    %int128_36267 = torch.constant.int 128
    %38699 = torch.prim.ListConstruct %int4_36265, %38271, %int4_36266, %int128_36267 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38700 = torch.aten.view %38684, %38699 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36268 = torch.constant.int 4
    %int4_36269 = torch.constant.int 4
    %int128_36270 = torch.constant.int 128
    %38701 = torch.prim.ListConstruct %int4_36268, %38271, %int4_36269, %int128_36270 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38702 = torch.aten.view %38686, %38701 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36271 = torch.constant.int 4
    %int4_36272 = torch.constant.int 4
    %int128_36273 = torch.constant.int 128
    %38703 = torch.prim.ListConstruct %int4_36271, %38271, %int4_36272, %int128_36273 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38704 = torch.aten.view %38688, %38703 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36274 = torch.constant.int 4
    %int4_36275 = torch.constant.int 4
    %int128_36276 = torch.constant.int 128
    %38705 = torch.prim.ListConstruct %int4_36274, %38271, %int4_36275, %int128_36276 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38706 = torch.aten.view %38690, %38705 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36277 = torch.constant.int 4
    %int4_36278 = torch.constant.int 4
    %int128_36279 = torch.constant.int 128
    %38707 = torch.prim.ListConstruct %int4_36277, %38271, %int4_36278, %int128_36279 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38708 = torch.aten.view %38692, %38707 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36280 = torch.constant.int 4
    %int4_36281 = torch.constant.int 4
    %int128_36282 = torch.constant.int 128
    %38709 = torch.prim.ListConstruct %int4_36280, %38271, %int4_36281, %int128_36282 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38710 = torch.aten.view %38694, %38709 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_36283 = torch.constant.int -2
    %38711 = torch.aten.unsqueeze %38060, %int-2_36283 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36284 = torch.constant.int -2
    %38712 = torch.aten.unsqueeze %38062, %int-2_36284 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36285 = torch.constant.int -2
    %38713 = torch.aten.unsqueeze %38064, %int-2_36285 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36286 = torch.constant.int -2
    %38714 = torch.aten.unsqueeze %38066, %int-2_36286 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36287 = torch.constant.int -2
    %38715 = torch.aten.unsqueeze %38068, %int-2_36287 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36288 = torch.constant.int -2
    %38716 = torch.aten.unsqueeze %38070, %int-2_36288 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36289 = torch.constant.int -2
    %38717 = torch.aten.unsqueeze %38072, %int-2_36289 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_36290 = torch.constant.int -2
    %38718 = torch.aten.unsqueeze %38074, %int-2_36290 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %38718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_36291 = torch.constant.int 1
    %38719 = torch.aten.size.int %37984, %int1_36291 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_36292 = torch.constant.int 4
    %int1_36293 = torch.constant.int 1
    %int4_36294 = torch.constant.int 4
    %int128_36295 = torch.constant.int 128
    %38720 = torch.prim.ListConstruct %int4_36292, %38719, %int1_36293, %int4_36294, %int128_36295 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36296 = torch.constant.bool false
    %38721 = torch.aten.expand %38711, %38720, %false_36296 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36297 = torch.constant.int 4
    %int1_36298 = torch.constant.int 1
    %int4_36299 = torch.constant.int 4
    %int128_36300 = torch.constant.int 128
    %38722 = torch.prim.ListConstruct %int4_36297, %38719, %int1_36298, %int4_36299, %int128_36300 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36301 = torch.constant.bool false
    %38723 = torch.aten.expand %38712, %38722, %false_36301 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36302 = torch.constant.int 4
    %int1_36303 = torch.constant.int 1
    %int4_36304 = torch.constant.int 4
    %int128_36305 = torch.constant.int 128
    %38724 = torch.prim.ListConstruct %int4_36302, %38719, %int1_36303, %int4_36304, %int128_36305 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36306 = torch.constant.bool false
    %38725 = torch.aten.expand %38713, %38724, %false_36306 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36307 = torch.constant.int 4
    %int1_36308 = torch.constant.int 1
    %int4_36309 = torch.constant.int 4
    %int128_36310 = torch.constant.int 128
    %38726 = torch.prim.ListConstruct %int4_36307, %38719, %int1_36308, %int4_36309, %int128_36310 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36311 = torch.constant.bool false
    %38727 = torch.aten.expand %38714, %38726, %false_36311 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36312 = torch.constant.int 4
    %int1_36313 = torch.constant.int 1
    %int4_36314 = torch.constant.int 4
    %int128_36315 = torch.constant.int 128
    %38728 = torch.prim.ListConstruct %int4_36312, %38719, %int1_36313, %int4_36314, %int128_36315 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36316 = torch.constant.bool false
    %38729 = torch.aten.expand %38715, %38728, %false_36316 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36317 = torch.constant.int 4
    %int1_36318 = torch.constant.int 1
    %int4_36319 = torch.constant.int 4
    %int128_36320 = torch.constant.int 128
    %38730 = torch.prim.ListConstruct %int4_36317, %38719, %int1_36318, %int4_36319, %int128_36320 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36321 = torch.constant.bool false
    %38731 = torch.aten.expand %38716, %38730, %false_36321 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36322 = torch.constant.int 4
    %int1_36323 = torch.constant.int 1
    %int4_36324 = torch.constant.int 4
    %int128_36325 = torch.constant.int 128
    %38732 = torch.prim.ListConstruct %int4_36322, %38719, %int1_36323, %int4_36324, %int128_36325 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36326 = torch.constant.bool false
    %38733 = torch.aten.expand %38717, %38732, %false_36326 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36327 = torch.constant.int 4
    %int1_36328 = torch.constant.int 1
    %int4_36329 = torch.constant.int 4
    %int128_36330 = torch.constant.int 128
    %38734 = torch.prim.ListConstruct %int4_36327, %38719, %int1_36328, %int4_36329, %int128_36330 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_36331 = torch.constant.bool false
    %38735 = torch.aten.expand %38718, %38734, %false_36331 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %38735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_36332 = torch.constant.int 4
    %int4_36333 = torch.constant.int 4
    %int128_36334 = torch.constant.int 128
    %38736 = torch.prim.ListConstruct %int4_36332, %38719, %int4_36333, %int128_36334 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38737 = torch.aten.view %38721, %38736 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36335 = torch.constant.int 4
    %int4_36336 = torch.constant.int 4
    %int128_36337 = torch.constant.int 128
    %38738 = torch.prim.ListConstruct %int4_36335, %38719, %int4_36336, %int128_36337 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38739 = torch.aten.view %38723, %38738 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36338 = torch.constant.int 4
    %int4_36339 = torch.constant.int 4
    %int128_36340 = torch.constant.int 128
    %38740 = torch.prim.ListConstruct %int4_36338, %38719, %int4_36339, %int128_36340 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38741 = torch.aten.view %38725, %38740 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36341 = torch.constant.int 4
    %int4_36342 = torch.constant.int 4
    %int128_36343 = torch.constant.int 128
    %38742 = torch.prim.ListConstruct %int4_36341, %38719, %int4_36342, %int128_36343 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38743 = torch.aten.view %38727, %38742 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36344 = torch.constant.int 4
    %int4_36345 = torch.constant.int 4
    %int128_36346 = torch.constant.int 128
    %38744 = torch.prim.ListConstruct %int4_36344, %38719, %int4_36345, %int128_36346 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38745 = torch.aten.view %38729, %38744 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36347 = torch.constant.int 4
    %int4_36348 = torch.constant.int 4
    %int128_36349 = torch.constant.int 128
    %38746 = torch.prim.ListConstruct %int4_36347, %38719, %int4_36348, %int128_36349 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38747 = torch.aten.view %38731, %38746 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36350 = torch.constant.int 4
    %int4_36351 = torch.constant.int 4
    %int128_36352 = torch.constant.int 128
    %38748 = torch.prim.ListConstruct %int4_36350, %38719, %int4_36351, %int128_36352 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38749 = torch.aten.view %38733, %38748 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36353 = torch.constant.int 4
    %int4_36354 = torch.constant.int 4
    %int128_36355 = torch.constant.int 128
    %38750 = torch.prim.ListConstruct %int4_36353, %38719, %int4_36354, %int128_36355 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38751 = torch.aten.view %38735, %38750 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36356 = torch.constant.int 1
    %int2_36357 = torch.constant.int 2
    %38752 = torch.aten.transpose.int %38127, %int1_36356, %int2_36357 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38752, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36358 = torch.constant.int 1
    %int2_36359 = torch.constant.int 2
    %38753 = torch.aten.transpose.int %38142, %int1_36358, %int2_36359 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38753, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36360 = torch.constant.int 1
    %int2_36361 = torch.constant.int 2
    %38754 = torch.aten.transpose.int %38157, %int1_36360, %int2_36361 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38754, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36362 = torch.constant.int 1
    %int2_36363 = torch.constant.int 2
    %38755 = torch.aten.transpose.int %38172, %int1_36362, %int2_36363 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38755, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36364 = torch.constant.int 1
    %int2_36365 = torch.constant.int 2
    %38756 = torch.aten.transpose.int %38187, %int1_36364, %int2_36365 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38756, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36366 = torch.constant.int 1
    %int2_36367 = torch.constant.int 2
    %38757 = torch.aten.transpose.int %38202, %int1_36366, %int2_36367 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38757, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36368 = torch.constant.int 1
    %int2_36369 = torch.constant.int 2
    %38758 = torch.aten.transpose.int %38217, %int1_36368, %int2_36369 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38758, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36370 = torch.constant.int 1
    %int2_36371 = torch.constant.int 2
    %38759 = torch.aten.transpose.int %38232, %int1_36370, %int2_36371 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38759, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36372 = torch.constant.int 1
    %int2_36373 = torch.constant.int 2
    %38760 = torch.aten.transpose.int %38696, %int1_36372, %int2_36373 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38760, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36374 = torch.constant.int 1
    %int2_36375 = torch.constant.int 2
    %38761 = torch.aten.transpose.int %38698, %int1_36374, %int2_36375 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38761, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36376 = torch.constant.int 1
    %int2_36377 = torch.constant.int 2
    %38762 = torch.aten.transpose.int %38700, %int1_36376, %int2_36377 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38762, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36378 = torch.constant.int 1
    %int2_36379 = torch.constant.int 2
    %38763 = torch.aten.transpose.int %38702, %int1_36378, %int2_36379 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38763, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36380 = torch.constant.int 1
    %int2_36381 = torch.constant.int 2
    %38764 = torch.aten.transpose.int %38704, %int1_36380, %int2_36381 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38764, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36382 = torch.constant.int 1
    %int2_36383 = torch.constant.int 2
    %38765 = torch.aten.transpose.int %38706, %int1_36382, %int2_36383 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38765, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36384 = torch.constant.int 1
    %int2_36385 = torch.constant.int 2
    %38766 = torch.aten.transpose.int %38708, %int1_36384, %int2_36385 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38766, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36386 = torch.constant.int 1
    %int2_36387 = torch.constant.int 2
    %38767 = torch.aten.transpose.int %38710, %int1_36386, %int2_36387 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38767, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36388 = torch.constant.int 1
    %int2_36389 = torch.constant.int 2
    %38768 = torch.aten.transpose.int %38737, %int1_36388, %int2_36389 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38768, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36390 = torch.constant.int 1
    %int2_36391 = torch.constant.int 2
    %38769 = torch.aten.transpose.int %38739, %int1_36390, %int2_36391 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38769, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36392 = torch.constant.int 1
    %int2_36393 = torch.constant.int 2
    %38770 = torch.aten.transpose.int %38741, %int1_36392, %int2_36393 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38770, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36394 = torch.constant.int 1
    %int2_36395 = torch.constant.int 2
    %38771 = torch.aten.transpose.int %38743, %int1_36394, %int2_36395 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38771, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36396 = torch.constant.int 1
    %int2_36397 = torch.constant.int 2
    %38772 = torch.aten.transpose.int %38745, %int1_36396, %int2_36397 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38772, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36398 = torch.constant.int 1
    %int2_36399 = torch.constant.int 2
    %38773 = torch.aten.transpose.int %38747, %int1_36398, %int2_36399 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38773, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36400 = torch.constant.int 1
    %int2_36401 = torch.constant.int 2
    %38774 = torch.aten.transpose.int %38749, %int1_36400, %int2_36401 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38774, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36402 = torch.constant.int 1
    %int2_36403 = torch.constant.int 2
    %38775 = torch.aten.transpose.int %38751, %int1_36402, %int2_36403 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %38775, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36404 = torch.constant.float 0.000000e+00
    %true_36405 = torch.constant.bool true
    %none_36406 = torch.constant.none
    %none_36407 = torch.constant.none
    %38776:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38752, %38760, %38768, %float0.000000e00_36404, %true_36405, %none_36406, %none_36407) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38776#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36408 = torch.constant.float 0.000000e+00
    %true_36409 = torch.constant.bool true
    %none_36410 = torch.constant.none
    %none_36411 = torch.constant.none
    %38777:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38753, %38761, %38769, %float0.000000e00_36408, %true_36409, %none_36410, %none_36411) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38777#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36412 = torch.constant.float 0.000000e+00
    %true_36413 = torch.constant.bool true
    %none_36414 = torch.constant.none
    %none_36415 = torch.constant.none
    %38778:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38754, %38762, %38770, %float0.000000e00_36412, %true_36413, %none_36414, %none_36415) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38778#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36416 = torch.constant.float 0.000000e+00
    %true_36417 = torch.constant.bool true
    %none_36418 = torch.constant.none
    %none_36419 = torch.constant.none
    %38779:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38755, %38763, %38771, %float0.000000e00_36416, %true_36417, %none_36418, %none_36419) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38779#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36420 = torch.constant.float 0.000000e+00
    %true_36421 = torch.constant.bool true
    %none_36422 = torch.constant.none
    %none_36423 = torch.constant.none
    %38780:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38756, %38764, %38772, %float0.000000e00_36420, %true_36421, %none_36422, %none_36423) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38780#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36424 = torch.constant.float 0.000000e+00
    %true_36425 = torch.constant.bool true
    %none_36426 = torch.constant.none
    %none_36427 = torch.constant.none
    %38781:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38757, %38765, %38773, %float0.000000e00_36424, %true_36425, %none_36426, %none_36427) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38781#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36428 = torch.constant.float 0.000000e+00
    %true_36429 = torch.constant.bool true
    %none_36430 = torch.constant.none
    %none_36431 = torch.constant.none
    %38782:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38758, %38766, %38774, %float0.000000e00_36428, %true_36429, %none_36430, %none_36431) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38782#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_36432 = torch.constant.float 0.000000e+00
    %true_36433 = torch.constant.bool true
    %none_36434 = torch.constant.none
    %none_36435 = torch.constant.none
    %38783:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%38759, %38767, %38775, %float0.000000e00_36432, %true_36433, %none_36434, %none_36435) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %38783#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_36436 = torch.constant.int 1
    %int2_36437 = torch.constant.int 2
    %38784 = torch.aten.transpose.int %38776#0, %int1_36436, %int2_36437 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36438 = torch.constant.int 1
    %int2_36439 = torch.constant.int 2
    %38785 = torch.aten.transpose.int %38777#0, %int1_36438, %int2_36439 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36440 = torch.constant.int 1
    %int2_36441 = torch.constant.int 2
    %38786 = torch.aten.transpose.int %38778#0, %int1_36440, %int2_36441 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36442 = torch.constant.int 1
    %int2_36443 = torch.constant.int 2
    %38787 = torch.aten.transpose.int %38779#0, %int1_36442, %int2_36443 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36444 = torch.constant.int 1
    %int2_36445 = torch.constant.int 2
    %38788 = torch.aten.transpose.int %38780#0, %int1_36444, %int2_36445 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36446 = torch.constant.int 1
    %int2_36447 = torch.constant.int 2
    %38789 = torch.aten.transpose.int %38781#0, %int1_36446, %int2_36447 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36448 = torch.constant.int 1
    %int2_36449 = torch.constant.int 2
    %38790 = torch.aten.transpose.int %38782#0, %int1_36448, %int2_36449 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_36450 = torch.constant.int 1
    %int2_36451 = torch.constant.int 2
    %38791 = torch.aten.transpose.int %38783#0, %int1_36450, %int2_36451 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %38791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_36452 = torch.constant.int 4
    %int512_36453 = torch.constant.int 512
    %38792 = torch.prim.ListConstruct %int4_36452, %38113, %int512_36453 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38793 = torch.aten.view %38784, %38792 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36454 = torch.constant.int 4
    %int512_36455 = torch.constant.int 512
    %38794 = torch.prim.ListConstruct %int4_36454, %38128, %int512_36455 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38795 = torch.aten.view %38785, %38794 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36456 = torch.constant.int 4
    %int512_36457 = torch.constant.int 512
    %38796 = torch.prim.ListConstruct %int4_36456, %38143, %int512_36457 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38797 = torch.aten.view %38786, %38796 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36458 = torch.constant.int 4
    %int512_36459 = torch.constant.int 512
    %38798 = torch.prim.ListConstruct %int4_36458, %38158, %int512_36459 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38799 = torch.aten.view %38787, %38798 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36460 = torch.constant.int 4
    %int512_36461 = torch.constant.int 512
    %38800 = torch.prim.ListConstruct %int4_36460, %38173, %int512_36461 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38801 = torch.aten.view %38788, %38800 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36462 = torch.constant.int 4
    %int512_36463 = torch.constant.int 512
    %38802 = torch.prim.ListConstruct %int4_36462, %38188, %int512_36463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38803 = torch.aten.view %38789, %38802 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36464 = torch.constant.int 4
    %int512_36465 = torch.constant.int 512
    %38804 = torch.prim.ListConstruct %int4_36464, %38203, %int512_36465 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38805 = torch.aten.view %38790, %38804 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_36466 = torch.constant.int 4
    %int512_36467 = torch.constant.int 512
    %38806 = torch.prim.ListConstruct %int4_36466, %38218, %int512_36467 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38807 = torch.aten.view %38791, %38806 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %38807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_36468 = torch.constant.int 1
    %int0_36469 = torch.constant.int 0
    %38808 = torch.prim.ListConstruct %int1_36468, %int0_36469 : (!torch.int, !torch.int) -> !torch.list<int>
    %38809 = torch.aten.permute %1408, %38808 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36470 = torch.constant.int 1
    %int0_36471 = torch.constant.int 0
    %38810 = torch.prim.ListConstruct %int1_36470, %int0_36471 : (!torch.int, !torch.int) -> !torch.list<int>
    %38811 = torch.aten.permute %1409, %38810 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36472 = torch.constant.int 1
    %int0_36473 = torch.constant.int 0
    %38812 = torch.prim.ListConstruct %int1_36472, %int0_36473 : (!torch.int, !torch.int) -> !torch.list<int>
    %38813 = torch.aten.permute %1410, %38812 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36474 = torch.constant.int 1
    %int0_36475 = torch.constant.int 0
    %38814 = torch.prim.ListConstruct %int1_36474, %int0_36475 : (!torch.int, !torch.int) -> !torch.list<int>
    %38815 = torch.aten.permute %1411, %38814 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36476 = torch.constant.int 1
    %int0_36477 = torch.constant.int 0
    %38816 = torch.prim.ListConstruct %int1_36476, %int0_36477 : (!torch.int, !torch.int) -> !torch.list<int>
    %38817 = torch.aten.permute %1412, %38816 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36478 = torch.constant.int 1
    %int0_36479 = torch.constant.int 0
    %38818 = torch.prim.ListConstruct %int1_36478, %int0_36479 : (!torch.int, !torch.int) -> !torch.list<int>
    %38819 = torch.aten.permute %1413, %38818 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36480 = torch.constant.int 1
    %int0_36481 = torch.constant.int 0
    %38820 = torch.prim.ListConstruct %int1_36480, %int0_36481 : (!torch.int, !torch.int) -> !torch.list<int>
    %38821 = torch.aten.permute %1414, %38820 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_36482 = torch.constant.int 1
    %int0_36483 = torch.constant.int 0
    %38822 = torch.prim.ListConstruct %int1_36482, %int0_36483 : (!torch.int, !torch.int) -> !torch.list<int>
    %38823 = torch.aten.permute %1415, %38822 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_36484 = torch.constant.int 4
    %38824 = torch.aten.mul.int %int4_36484, %38113 : !torch.int, !torch.int -> !torch.int
    %int512_36485 = torch.constant.int 512
    %38825 = torch.prim.ListConstruct %38824, %int512_36485 : (!torch.int, !torch.int) -> !torch.list<int>
    %38826 = torch.aten.view %38793, %38825 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38826, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38827 = torch.aten.mm %38826, %38809 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38827, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36486 = torch.constant.int 4
    %int4096_36487 = torch.constant.int 4096
    %38828 = torch.prim.ListConstruct %int4_36486, %38113, %int4096_36487 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38829 = torch.aten.view %38827, %38828 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36488 = torch.constant.int 4
    %38830 = torch.aten.mul.int %int4_36488, %38128 : !torch.int, !torch.int -> !torch.int
    %int512_36489 = torch.constant.int 512
    %38831 = torch.prim.ListConstruct %38830, %int512_36489 : (!torch.int, !torch.int) -> !torch.list<int>
    %38832 = torch.aten.view %38795, %38831 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38832, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38833 = torch.aten.mm %38832, %38811 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38833, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36490 = torch.constant.int 4
    %int4096_36491 = torch.constant.int 4096
    %38834 = torch.prim.ListConstruct %int4_36490, %38128, %int4096_36491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38835 = torch.aten.view %38833, %38834 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36492 = torch.constant.int 4
    %38836 = torch.aten.mul.int %int4_36492, %38143 : !torch.int, !torch.int -> !torch.int
    %int512_36493 = torch.constant.int 512
    %38837 = torch.prim.ListConstruct %38836, %int512_36493 : (!torch.int, !torch.int) -> !torch.list<int>
    %38838 = torch.aten.view %38797, %38837 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38838, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38839 = torch.aten.mm %38838, %38813 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38839, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36494 = torch.constant.int 4
    %int4096_36495 = torch.constant.int 4096
    %38840 = torch.prim.ListConstruct %int4_36494, %38143, %int4096_36495 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38841 = torch.aten.view %38839, %38840 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36496 = torch.constant.int 4
    %38842 = torch.aten.mul.int %int4_36496, %38158 : !torch.int, !torch.int -> !torch.int
    %int512_36497 = torch.constant.int 512
    %38843 = torch.prim.ListConstruct %38842, %int512_36497 : (!torch.int, !torch.int) -> !torch.list<int>
    %38844 = torch.aten.view %38799, %38843 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38844, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38845 = torch.aten.mm %38844, %38815 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38845, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36498 = torch.constant.int 4
    %int4096_36499 = torch.constant.int 4096
    %38846 = torch.prim.ListConstruct %int4_36498, %38158, %int4096_36499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38847 = torch.aten.view %38845, %38846 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36500 = torch.constant.int 4
    %38848 = torch.aten.mul.int %int4_36500, %38173 : !torch.int, !torch.int -> !torch.int
    %int512_36501 = torch.constant.int 512
    %38849 = torch.prim.ListConstruct %38848, %int512_36501 : (!torch.int, !torch.int) -> !torch.list<int>
    %38850 = torch.aten.view %38801, %38849 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38850, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38851 = torch.aten.mm %38850, %38817 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38851, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36502 = torch.constant.int 4
    %int4096_36503 = torch.constant.int 4096
    %38852 = torch.prim.ListConstruct %int4_36502, %38173, %int4096_36503 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38853 = torch.aten.view %38851, %38852 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36504 = torch.constant.int 4
    %38854 = torch.aten.mul.int %int4_36504, %38188 : !torch.int, !torch.int -> !torch.int
    %int512_36505 = torch.constant.int 512
    %38855 = torch.prim.ListConstruct %38854, %int512_36505 : (!torch.int, !torch.int) -> !torch.list<int>
    %38856 = torch.aten.view %38803, %38855 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38856, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38857 = torch.aten.mm %38856, %38819 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38857, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36506 = torch.constant.int 4
    %int4096_36507 = torch.constant.int 4096
    %38858 = torch.prim.ListConstruct %int4_36506, %38188, %int4096_36507 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38859 = torch.aten.view %38857, %38858 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36508 = torch.constant.int 4
    %38860 = torch.aten.mul.int %int4_36508, %38203 : !torch.int, !torch.int -> !torch.int
    %int512_36509 = torch.constant.int 512
    %38861 = torch.prim.ListConstruct %38860, %int512_36509 : (!torch.int, !torch.int) -> !torch.list<int>
    %38862 = torch.aten.view %38805, %38861 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38862, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38863 = torch.aten.mm %38862, %38821 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38863, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36510 = torch.constant.int 4
    %int4096_36511 = torch.constant.int 4096
    %38864 = torch.prim.ListConstruct %int4_36510, %38203, %int4096_36511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38865 = torch.aten.view %38863, %38864 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_36512 = torch.constant.int 4
    %38866 = torch.aten.mul.int %int4_36512, %38218 : !torch.int, !torch.int -> !torch.int
    %int512_36513 = torch.constant.int 512
    %38867 = torch.prim.ListConstruct %38866, %int512_36513 : (!torch.int, !torch.int) -> !torch.list<int>
    %38868 = torch.aten.view %38807, %38867 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %38868, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %38869 = torch.aten.mm %38868, %38823 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %38869, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36514 = torch.constant.int 4
    %int4096_36515 = torch.constant.int 4096
    %38870 = torch.prim.ListConstruct %int4_36514, %38218, %int4096_36515 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %38871 = torch.aten.view %38869, %38870 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38872 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36516 = arith.constant 1 : index
    %dim_36517 = tensor.dim %38872, %c1_36516 : tensor<4x?x4096xf16>
    %38873 = flow.tensor.transfer %38872 : tensor<4x?x4096xf16>{%dim_36517} to #hal.device.promise<@__device_0>
    %38874 = torch_c.from_builtin_tensor %38873 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38875 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36518 = arith.constant 1 : index
    %dim_36519 = tensor.dim %38875, %c1_36518 : tensor<4x?x4096xf16>
    %38876 = flow.tensor.transfer %38875 : tensor<4x?x4096xf16>{%dim_36519} to #hal.device.promise<@__device_0>
    %38877 = torch_c.from_builtin_tensor %38876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38878 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36520 = arith.constant 1 : index
    %dim_36521 = tensor.dim %38878, %c1_36520 : tensor<4x?x4096xf16>
    %38879 = flow.tensor.transfer %38878 : tensor<4x?x4096xf16>{%dim_36521} to #hal.device.promise<@__device_0>
    %38880 = torch_c.from_builtin_tensor %38879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38881 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36522 = arith.constant 1 : index
    %dim_36523 = tensor.dim %38881, %c1_36522 : tensor<4x?x4096xf16>
    %38882 = flow.tensor.transfer %38881 : tensor<4x?x4096xf16>{%dim_36523} to #hal.device.promise<@__device_0>
    %38883 = torch_c.from_builtin_tensor %38882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38884 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36524 = arith.constant 1 : index
    %dim_36525 = tensor.dim %38884, %c1_36524 : tensor<4x?x4096xf16>
    %38885 = flow.tensor.transfer %38884 : tensor<4x?x4096xf16>{%dim_36525} to #hal.device.promise<@__device_0>
    %38886 = torch_c.from_builtin_tensor %38885 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38887 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36526 = arith.constant 1 : index
    %dim_36527 = tensor.dim %38887, %c1_36526 : tensor<4x?x4096xf16>
    %38888 = flow.tensor.transfer %38887 : tensor<4x?x4096xf16>{%dim_36527} to #hal.device.promise<@__device_0>
    %38889 = torch_c.from_builtin_tensor %38888 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38890 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36528 = arith.constant 1 : index
    %dim_36529 = tensor.dim %38890, %c1_36528 : tensor<4x?x4096xf16>
    %38891 = flow.tensor.transfer %38890 : tensor<4x?x4096xf16>{%dim_36529} to #hal.device.promise<@__device_0>
    %38892 = torch_c.from_builtin_tensor %38891 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36530 = torch.constant.int 1
    %38893 = torch.aten.add.Tensor %38829, %38874, %int1_36530 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36531 = torch.constant.int 1
    %38894 = torch.aten.add.Tensor %38893, %38877, %int1_36531 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36532 = torch.constant.int 1
    %38895 = torch.aten.add.Tensor %38894, %38880, %int1_36532 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36533 = torch.constant.int 1
    %38896 = torch.aten.add.Tensor %38895, %38883, %int1_36533 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36534 = torch.constant.int 1
    %38897 = torch.aten.add.Tensor %38896, %38886, %int1_36534 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36535 = torch.constant.int 1
    %38898 = torch.aten.add.Tensor %38897, %38889, %int1_36535 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36536 = torch.constant.int 1
    %38899 = torch.aten.add.Tensor %38898, %38892, %int1_36536 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38900 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36537 = arith.constant 1 : index
    %dim_36538 = tensor.dim %38900, %c1_36537 : tensor<4x?x4096xf16>
    %38901 = flow.tensor.transfer %38900 : tensor<4x?x4096xf16>{%dim_36538} to #hal.device.promise<@__device_1>
    %38902 = torch_c.from_builtin_tensor %38901 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38903 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36539 = arith.constant 1 : index
    %dim_36540 = tensor.dim %38903, %c1_36539 : tensor<4x?x4096xf16>
    %38904 = flow.tensor.transfer %38903 : tensor<4x?x4096xf16>{%dim_36540} to #hal.device.promise<@__device_1>
    %38905 = torch_c.from_builtin_tensor %38904 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38906 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36541 = arith.constant 1 : index
    %dim_36542 = tensor.dim %38906, %c1_36541 : tensor<4x?x4096xf16>
    %38907 = flow.tensor.transfer %38906 : tensor<4x?x4096xf16>{%dim_36542} to #hal.device.promise<@__device_1>
    %38908 = torch_c.from_builtin_tensor %38907 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38909 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36543 = arith.constant 1 : index
    %dim_36544 = tensor.dim %38909, %c1_36543 : tensor<4x?x4096xf16>
    %38910 = flow.tensor.transfer %38909 : tensor<4x?x4096xf16>{%dim_36544} to #hal.device.promise<@__device_1>
    %38911 = torch_c.from_builtin_tensor %38910 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38912 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36545 = arith.constant 1 : index
    %dim_36546 = tensor.dim %38912, %c1_36545 : tensor<4x?x4096xf16>
    %38913 = flow.tensor.transfer %38912 : tensor<4x?x4096xf16>{%dim_36546} to #hal.device.promise<@__device_1>
    %38914 = torch_c.from_builtin_tensor %38913 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38915 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36547 = arith.constant 1 : index
    %dim_36548 = tensor.dim %38915, %c1_36547 : tensor<4x?x4096xf16>
    %38916 = flow.tensor.transfer %38915 : tensor<4x?x4096xf16>{%dim_36548} to #hal.device.promise<@__device_1>
    %38917 = torch_c.from_builtin_tensor %38916 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38918 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36549 = arith.constant 1 : index
    %dim_36550 = tensor.dim %38918, %c1_36549 : tensor<4x?x4096xf16>
    %38919 = flow.tensor.transfer %38918 : tensor<4x?x4096xf16>{%dim_36550} to #hal.device.promise<@__device_1>
    %38920 = torch_c.from_builtin_tensor %38919 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36551 = torch.constant.int 1
    %38921 = torch.aten.add.Tensor %38902, %38835, %int1_36551 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36552 = torch.constant.int 1
    %38922 = torch.aten.add.Tensor %38921, %38905, %int1_36552 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36553 = torch.constant.int 1
    %38923 = torch.aten.add.Tensor %38922, %38908, %int1_36553 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36554 = torch.constant.int 1
    %38924 = torch.aten.add.Tensor %38923, %38911, %int1_36554 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36555 = torch.constant.int 1
    %38925 = torch.aten.add.Tensor %38924, %38914, %int1_36555 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36556 = torch.constant.int 1
    %38926 = torch.aten.add.Tensor %38925, %38917, %int1_36556 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36557 = torch.constant.int 1
    %38927 = torch.aten.add.Tensor %38926, %38920, %int1_36557 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38928 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36558 = arith.constant 1 : index
    %dim_36559 = tensor.dim %38928, %c1_36558 : tensor<4x?x4096xf16>
    %38929 = flow.tensor.transfer %38928 : tensor<4x?x4096xf16>{%dim_36559} to #hal.device.promise<@__device_2>
    %38930 = torch_c.from_builtin_tensor %38929 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38931 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36560 = arith.constant 1 : index
    %dim_36561 = tensor.dim %38931, %c1_36560 : tensor<4x?x4096xf16>
    %38932 = flow.tensor.transfer %38931 : tensor<4x?x4096xf16>{%dim_36561} to #hal.device.promise<@__device_2>
    %38933 = torch_c.from_builtin_tensor %38932 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38934 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36562 = arith.constant 1 : index
    %dim_36563 = tensor.dim %38934, %c1_36562 : tensor<4x?x4096xf16>
    %38935 = flow.tensor.transfer %38934 : tensor<4x?x4096xf16>{%dim_36563} to #hal.device.promise<@__device_2>
    %38936 = torch_c.from_builtin_tensor %38935 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38937 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36564 = arith.constant 1 : index
    %dim_36565 = tensor.dim %38937, %c1_36564 : tensor<4x?x4096xf16>
    %38938 = flow.tensor.transfer %38937 : tensor<4x?x4096xf16>{%dim_36565} to #hal.device.promise<@__device_2>
    %38939 = torch_c.from_builtin_tensor %38938 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38940 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36566 = arith.constant 1 : index
    %dim_36567 = tensor.dim %38940, %c1_36566 : tensor<4x?x4096xf16>
    %38941 = flow.tensor.transfer %38940 : tensor<4x?x4096xf16>{%dim_36567} to #hal.device.promise<@__device_2>
    %38942 = torch_c.from_builtin_tensor %38941 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38943 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36568 = arith.constant 1 : index
    %dim_36569 = tensor.dim %38943, %c1_36568 : tensor<4x?x4096xf16>
    %38944 = flow.tensor.transfer %38943 : tensor<4x?x4096xf16>{%dim_36569} to #hal.device.promise<@__device_2>
    %38945 = torch_c.from_builtin_tensor %38944 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38946 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36570 = arith.constant 1 : index
    %dim_36571 = tensor.dim %38946, %c1_36570 : tensor<4x?x4096xf16>
    %38947 = flow.tensor.transfer %38946 : tensor<4x?x4096xf16>{%dim_36571} to #hal.device.promise<@__device_2>
    %38948 = torch_c.from_builtin_tensor %38947 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36572 = torch.constant.int 1
    %38949 = torch.aten.add.Tensor %38930, %38933, %int1_36572 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36573 = torch.constant.int 1
    %38950 = torch.aten.add.Tensor %38949, %38841, %int1_36573 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36574 = torch.constant.int 1
    %38951 = torch.aten.add.Tensor %38950, %38936, %int1_36574 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36575 = torch.constant.int 1
    %38952 = torch.aten.add.Tensor %38951, %38939, %int1_36575 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36576 = torch.constant.int 1
    %38953 = torch.aten.add.Tensor %38952, %38942, %int1_36576 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36577 = torch.constant.int 1
    %38954 = torch.aten.add.Tensor %38953, %38945, %int1_36577 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36578 = torch.constant.int 1
    %38955 = torch.aten.add.Tensor %38954, %38948, %int1_36578 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38956 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36579 = arith.constant 1 : index
    %dim_36580 = tensor.dim %38956, %c1_36579 : tensor<4x?x4096xf16>
    %38957 = flow.tensor.transfer %38956 : tensor<4x?x4096xf16>{%dim_36580} to #hal.device.promise<@__device_3>
    %38958 = torch_c.from_builtin_tensor %38957 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38959 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36581 = arith.constant 1 : index
    %dim_36582 = tensor.dim %38959, %c1_36581 : tensor<4x?x4096xf16>
    %38960 = flow.tensor.transfer %38959 : tensor<4x?x4096xf16>{%dim_36582} to #hal.device.promise<@__device_3>
    %38961 = torch_c.from_builtin_tensor %38960 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38962 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36583 = arith.constant 1 : index
    %dim_36584 = tensor.dim %38962, %c1_36583 : tensor<4x?x4096xf16>
    %38963 = flow.tensor.transfer %38962 : tensor<4x?x4096xf16>{%dim_36584} to #hal.device.promise<@__device_3>
    %38964 = torch_c.from_builtin_tensor %38963 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38965 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36585 = arith.constant 1 : index
    %dim_36586 = tensor.dim %38965, %c1_36585 : tensor<4x?x4096xf16>
    %38966 = flow.tensor.transfer %38965 : tensor<4x?x4096xf16>{%dim_36586} to #hal.device.promise<@__device_3>
    %38967 = torch_c.from_builtin_tensor %38966 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38968 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36587 = arith.constant 1 : index
    %dim_36588 = tensor.dim %38968, %c1_36587 : tensor<4x?x4096xf16>
    %38969 = flow.tensor.transfer %38968 : tensor<4x?x4096xf16>{%dim_36588} to #hal.device.promise<@__device_3>
    %38970 = torch_c.from_builtin_tensor %38969 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38971 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36589 = arith.constant 1 : index
    %dim_36590 = tensor.dim %38971, %c1_36589 : tensor<4x?x4096xf16>
    %38972 = flow.tensor.transfer %38971 : tensor<4x?x4096xf16>{%dim_36590} to #hal.device.promise<@__device_3>
    %38973 = torch_c.from_builtin_tensor %38972 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38974 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36591 = arith.constant 1 : index
    %dim_36592 = tensor.dim %38974, %c1_36591 : tensor<4x?x4096xf16>
    %38975 = flow.tensor.transfer %38974 : tensor<4x?x4096xf16>{%dim_36592} to #hal.device.promise<@__device_3>
    %38976 = torch_c.from_builtin_tensor %38975 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36593 = torch.constant.int 1
    %38977 = torch.aten.add.Tensor %38958, %38961, %int1_36593 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36594 = torch.constant.int 1
    %38978 = torch.aten.add.Tensor %38977, %38964, %int1_36594 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36595 = torch.constant.int 1
    %38979 = torch.aten.add.Tensor %38978, %38847, %int1_36595 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36596 = torch.constant.int 1
    %38980 = torch.aten.add.Tensor %38979, %38967, %int1_36596 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36597 = torch.constant.int 1
    %38981 = torch.aten.add.Tensor %38980, %38970, %int1_36597 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36598 = torch.constant.int 1
    %38982 = torch.aten.add.Tensor %38981, %38973, %int1_36598 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36599 = torch.constant.int 1
    %38983 = torch.aten.add.Tensor %38982, %38976, %int1_36599 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38984 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36600 = arith.constant 1 : index
    %dim_36601 = tensor.dim %38984, %c1_36600 : tensor<4x?x4096xf16>
    %38985 = flow.tensor.transfer %38984 : tensor<4x?x4096xf16>{%dim_36601} to #hal.device.promise<@__device_4>
    %38986 = torch_c.from_builtin_tensor %38985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38987 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36602 = arith.constant 1 : index
    %dim_36603 = tensor.dim %38987, %c1_36602 : tensor<4x?x4096xf16>
    %38988 = flow.tensor.transfer %38987 : tensor<4x?x4096xf16>{%dim_36603} to #hal.device.promise<@__device_4>
    %38989 = torch_c.from_builtin_tensor %38988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38990 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36604 = arith.constant 1 : index
    %dim_36605 = tensor.dim %38990, %c1_36604 : tensor<4x?x4096xf16>
    %38991 = flow.tensor.transfer %38990 : tensor<4x?x4096xf16>{%dim_36605} to #hal.device.promise<@__device_4>
    %38992 = torch_c.from_builtin_tensor %38991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38993 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36606 = arith.constant 1 : index
    %dim_36607 = tensor.dim %38993, %c1_36606 : tensor<4x?x4096xf16>
    %38994 = flow.tensor.transfer %38993 : tensor<4x?x4096xf16>{%dim_36607} to #hal.device.promise<@__device_4>
    %38995 = torch_c.from_builtin_tensor %38994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38996 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36608 = arith.constant 1 : index
    %dim_36609 = tensor.dim %38996, %c1_36608 : tensor<4x?x4096xf16>
    %38997 = flow.tensor.transfer %38996 : tensor<4x?x4096xf16>{%dim_36609} to #hal.device.promise<@__device_4>
    %38998 = torch_c.from_builtin_tensor %38997 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %38998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %38999 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36610 = arith.constant 1 : index
    %dim_36611 = tensor.dim %38999, %c1_36610 : tensor<4x?x4096xf16>
    %39000 = flow.tensor.transfer %38999 : tensor<4x?x4096xf16>{%dim_36611} to #hal.device.promise<@__device_4>
    %39001 = torch_c.from_builtin_tensor %39000 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39002 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36612 = arith.constant 1 : index
    %dim_36613 = tensor.dim %39002, %c1_36612 : tensor<4x?x4096xf16>
    %39003 = flow.tensor.transfer %39002 : tensor<4x?x4096xf16>{%dim_36613} to #hal.device.promise<@__device_4>
    %39004 = torch_c.from_builtin_tensor %39003 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36614 = torch.constant.int 1
    %39005 = torch.aten.add.Tensor %38986, %38989, %int1_36614 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36615 = torch.constant.int 1
    %39006 = torch.aten.add.Tensor %39005, %38992, %int1_36615 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36616 = torch.constant.int 1
    %39007 = torch.aten.add.Tensor %39006, %38995, %int1_36616 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36617 = torch.constant.int 1
    %39008 = torch.aten.add.Tensor %39007, %38853, %int1_36617 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36618 = torch.constant.int 1
    %39009 = torch.aten.add.Tensor %39008, %38998, %int1_36618 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36619 = torch.constant.int 1
    %39010 = torch.aten.add.Tensor %39009, %39001, %int1_36619 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36620 = torch.constant.int 1
    %39011 = torch.aten.add.Tensor %39010, %39004, %int1_36620 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39012 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36621 = arith.constant 1 : index
    %dim_36622 = tensor.dim %39012, %c1_36621 : tensor<4x?x4096xf16>
    %39013 = flow.tensor.transfer %39012 : tensor<4x?x4096xf16>{%dim_36622} to #hal.device.promise<@__device_5>
    %39014 = torch_c.from_builtin_tensor %39013 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39015 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36623 = arith.constant 1 : index
    %dim_36624 = tensor.dim %39015, %c1_36623 : tensor<4x?x4096xf16>
    %39016 = flow.tensor.transfer %39015 : tensor<4x?x4096xf16>{%dim_36624} to #hal.device.promise<@__device_5>
    %39017 = torch_c.from_builtin_tensor %39016 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39018 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36625 = arith.constant 1 : index
    %dim_36626 = tensor.dim %39018, %c1_36625 : tensor<4x?x4096xf16>
    %39019 = flow.tensor.transfer %39018 : tensor<4x?x4096xf16>{%dim_36626} to #hal.device.promise<@__device_5>
    %39020 = torch_c.from_builtin_tensor %39019 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39021 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36627 = arith.constant 1 : index
    %dim_36628 = tensor.dim %39021, %c1_36627 : tensor<4x?x4096xf16>
    %39022 = flow.tensor.transfer %39021 : tensor<4x?x4096xf16>{%dim_36628} to #hal.device.promise<@__device_5>
    %39023 = torch_c.from_builtin_tensor %39022 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39024 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36629 = arith.constant 1 : index
    %dim_36630 = tensor.dim %39024, %c1_36629 : tensor<4x?x4096xf16>
    %39025 = flow.tensor.transfer %39024 : tensor<4x?x4096xf16>{%dim_36630} to #hal.device.promise<@__device_5>
    %39026 = torch_c.from_builtin_tensor %39025 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39027 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36631 = arith.constant 1 : index
    %dim_36632 = tensor.dim %39027, %c1_36631 : tensor<4x?x4096xf16>
    %39028 = flow.tensor.transfer %39027 : tensor<4x?x4096xf16>{%dim_36632} to #hal.device.promise<@__device_5>
    %39029 = torch_c.from_builtin_tensor %39028 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39030 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36633 = arith.constant 1 : index
    %dim_36634 = tensor.dim %39030, %c1_36633 : tensor<4x?x4096xf16>
    %39031 = flow.tensor.transfer %39030 : tensor<4x?x4096xf16>{%dim_36634} to #hal.device.promise<@__device_5>
    %39032 = torch_c.from_builtin_tensor %39031 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36635 = torch.constant.int 1
    %39033 = torch.aten.add.Tensor %39014, %39017, %int1_36635 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36636 = torch.constant.int 1
    %39034 = torch.aten.add.Tensor %39033, %39020, %int1_36636 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36637 = torch.constant.int 1
    %39035 = torch.aten.add.Tensor %39034, %39023, %int1_36637 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36638 = torch.constant.int 1
    %39036 = torch.aten.add.Tensor %39035, %39026, %int1_36638 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36639 = torch.constant.int 1
    %39037 = torch.aten.add.Tensor %39036, %38859, %int1_36639 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36640 = torch.constant.int 1
    %39038 = torch.aten.add.Tensor %39037, %39029, %int1_36640 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36641 = torch.constant.int 1
    %39039 = torch.aten.add.Tensor %39038, %39032, %int1_36641 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39040 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36642 = arith.constant 1 : index
    %dim_36643 = tensor.dim %39040, %c1_36642 : tensor<4x?x4096xf16>
    %39041 = flow.tensor.transfer %39040 : tensor<4x?x4096xf16>{%dim_36643} to #hal.device.promise<@__device_6>
    %39042 = torch_c.from_builtin_tensor %39041 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39043 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36644 = arith.constant 1 : index
    %dim_36645 = tensor.dim %39043, %c1_36644 : tensor<4x?x4096xf16>
    %39044 = flow.tensor.transfer %39043 : tensor<4x?x4096xf16>{%dim_36645} to #hal.device.promise<@__device_6>
    %39045 = torch_c.from_builtin_tensor %39044 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39046 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36646 = arith.constant 1 : index
    %dim_36647 = tensor.dim %39046, %c1_36646 : tensor<4x?x4096xf16>
    %39047 = flow.tensor.transfer %39046 : tensor<4x?x4096xf16>{%dim_36647} to #hal.device.promise<@__device_6>
    %39048 = torch_c.from_builtin_tensor %39047 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39049 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36648 = arith.constant 1 : index
    %dim_36649 = tensor.dim %39049, %c1_36648 : tensor<4x?x4096xf16>
    %39050 = flow.tensor.transfer %39049 : tensor<4x?x4096xf16>{%dim_36649} to #hal.device.promise<@__device_6>
    %39051 = torch_c.from_builtin_tensor %39050 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39052 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36650 = arith.constant 1 : index
    %dim_36651 = tensor.dim %39052, %c1_36650 : tensor<4x?x4096xf16>
    %39053 = flow.tensor.transfer %39052 : tensor<4x?x4096xf16>{%dim_36651} to #hal.device.promise<@__device_6>
    %39054 = torch_c.from_builtin_tensor %39053 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39055 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36652 = arith.constant 1 : index
    %dim_36653 = tensor.dim %39055, %c1_36652 : tensor<4x?x4096xf16>
    %39056 = flow.tensor.transfer %39055 : tensor<4x?x4096xf16>{%dim_36653} to #hal.device.promise<@__device_6>
    %39057 = torch_c.from_builtin_tensor %39056 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39058 = torch_c.to_builtin_tensor %38871 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36654 = arith.constant 1 : index
    %dim_36655 = tensor.dim %39058, %c1_36654 : tensor<4x?x4096xf16>
    %39059 = flow.tensor.transfer %39058 : tensor<4x?x4096xf16>{%dim_36655} to #hal.device.promise<@__device_6>
    %39060 = torch_c.from_builtin_tensor %39059 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36656 = torch.constant.int 1
    %39061 = torch.aten.add.Tensor %39042, %39045, %int1_36656 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36657 = torch.constant.int 1
    %39062 = torch.aten.add.Tensor %39061, %39048, %int1_36657 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36658 = torch.constant.int 1
    %39063 = torch.aten.add.Tensor %39062, %39051, %int1_36658 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36659 = torch.constant.int 1
    %39064 = torch.aten.add.Tensor %39063, %39054, %int1_36659 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36660 = torch.constant.int 1
    %39065 = torch.aten.add.Tensor %39064, %39057, %int1_36660 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36661 = torch.constant.int 1
    %39066 = torch.aten.add.Tensor %39065, %38865, %int1_36661 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36662 = torch.constant.int 1
    %39067 = torch.aten.add.Tensor %39066, %39060, %int1_36662 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39068 = torch_c.to_builtin_tensor %38829 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36663 = arith.constant 1 : index
    %dim_36664 = tensor.dim %39068, %c1_36663 : tensor<4x?x4096xf16>
    %39069 = flow.tensor.transfer %39068 : tensor<4x?x4096xf16>{%dim_36664} to #hal.device.promise<@__device_7>
    %39070 = torch_c.from_builtin_tensor %39069 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39071 = torch_c.to_builtin_tensor %38835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36665 = arith.constant 1 : index
    %dim_36666 = tensor.dim %39071, %c1_36665 : tensor<4x?x4096xf16>
    %39072 = flow.tensor.transfer %39071 : tensor<4x?x4096xf16>{%dim_36666} to #hal.device.promise<@__device_7>
    %39073 = torch_c.from_builtin_tensor %39072 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39074 = torch_c.to_builtin_tensor %38841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36667 = arith.constant 1 : index
    %dim_36668 = tensor.dim %39074, %c1_36667 : tensor<4x?x4096xf16>
    %39075 = flow.tensor.transfer %39074 : tensor<4x?x4096xf16>{%dim_36668} to #hal.device.promise<@__device_7>
    %39076 = torch_c.from_builtin_tensor %39075 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39077 = torch_c.to_builtin_tensor %38847 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36669 = arith.constant 1 : index
    %dim_36670 = tensor.dim %39077, %c1_36669 : tensor<4x?x4096xf16>
    %39078 = flow.tensor.transfer %39077 : tensor<4x?x4096xf16>{%dim_36670} to #hal.device.promise<@__device_7>
    %39079 = torch_c.from_builtin_tensor %39078 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39080 = torch_c.to_builtin_tensor %38853 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36671 = arith.constant 1 : index
    %dim_36672 = tensor.dim %39080, %c1_36671 : tensor<4x?x4096xf16>
    %39081 = flow.tensor.transfer %39080 : tensor<4x?x4096xf16>{%dim_36672} to #hal.device.promise<@__device_7>
    %39082 = torch_c.from_builtin_tensor %39081 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39083 = torch_c.to_builtin_tensor %38859 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36673 = arith.constant 1 : index
    %dim_36674 = tensor.dim %39083, %c1_36673 : tensor<4x?x4096xf16>
    %39084 = flow.tensor.transfer %39083 : tensor<4x?x4096xf16>{%dim_36674} to #hal.device.promise<@__device_7>
    %39085 = torch_c.from_builtin_tensor %39084 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39086 = torch_c.to_builtin_tensor %38865 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36675 = arith.constant 1 : index
    %dim_36676 = tensor.dim %39086, %c1_36675 : tensor<4x?x4096xf16>
    %39087 = flow.tensor.transfer %39086 : tensor<4x?x4096xf16>{%dim_36676} to #hal.device.promise<@__device_7>
    %39088 = torch_c.from_builtin_tensor %39087 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36677 = torch.constant.int 1
    %39089 = torch.aten.add.Tensor %39070, %39073, %int1_36677 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36678 = torch.constant.int 1
    %39090 = torch.aten.add.Tensor %39089, %39076, %int1_36678 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36679 = torch.constant.int 1
    %39091 = torch.aten.add.Tensor %39090, %39079, %int1_36679 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36680 = torch.constant.int 1
    %39092 = torch.aten.add.Tensor %39091, %39082, %int1_36680 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36681 = torch.constant.int 1
    %39093 = torch.aten.add.Tensor %39092, %39085, %int1_36681 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36682 = torch.constant.int 1
    %39094 = torch.aten.add.Tensor %39093, %39088, %int1_36682 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36683 = torch.constant.int 1
    %39095 = torch.aten.add.Tensor %39094, %38871, %int1_36683 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36684 = torch.constant.int 1
    %39096 = torch.aten.add.Tensor %37755, %38899, %int1_36684 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36685 = torch.constant.int 1
    %39097 = torch.aten.add.Tensor %37756, %38927, %int1_36685 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36686 = torch.constant.int 1
    %39098 = torch.aten.add.Tensor %37757, %38955, %int1_36686 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36687 = torch.constant.int 1
    %39099 = torch.aten.add.Tensor %37758, %38983, %int1_36687 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36688 = torch.constant.int 1
    %39100 = torch.aten.add.Tensor %37759, %39011, %int1_36688 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36689 = torch.constant.int 1
    %39101 = torch.aten.add.Tensor %37760, %39039, %int1_36689 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36690 = torch.constant.int 1
    %39102 = torch.aten.add.Tensor %37761, %39067, %int1_36690 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36691 = torch.constant.int 1
    %39103 = torch.aten.add.Tensor %37762, %39095, %int1_36691 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_36692 = torch.constant.int 6
    %39104 = torch.prims.convert_element_type %39096, %int6_36692 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36693 = torch.constant.int 6
    %39105 = torch.prims.convert_element_type %39097, %int6_36693 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36694 = torch.constant.int 6
    %39106 = torch.prims.convert_element_type %39098, %int6_36694 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36695 = torch.constant.int 6
    %39107 = torch.prims.convert_element_type %39099, %int6_36695 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36696 = torch.constant.int 6
    %39108 = torch.prims.convert_element_type %39100, %int6_36696 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36697 = torch.constant.int 6
    %39109 = torch.prims.convert_element_type %39101, %int6_36697 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36698 = torch.constant.int 6
    %39110 = torch.prims.convert_element_type %39102, %int6_36698 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_36699 = torch.constant.int 6
    %39111 = torch.prims.convert_element_type %39103, %int6_36699 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36700 = torch.constant.int 2
    %39112 = torch.aten.pow.Tensor_Scalar %39104, %int2_36700 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36701 = torch.constant.int 2
    %39113 = torch.aten.pow.Tensor_Scalar %39105, %int2_36701 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36702 = torch.constant.int 2
    %39114 = torch.aten.pow.Tensor_Scalar %39106, %int2_36702 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36703 = torch.constant.int 2
    %39115 = torch.aten.pow.Tensor_Scalar %39107, %int2_36703 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36704 = torch.constant.int 2
    %39116 = torch.aten.pow.Tensor_Scalar %39108, %int2_36704 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36705 = torch.constant.int 2
    %39117 = torch.aten.pow.Tensor_Scalar %39109, %int2_36705 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36706 = torch.constant.int 2
    %39118 = torch.aten.pow.Tensor_Scalar %39110, %int2_36706 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_36707 = torch.constant.int 2
    %39119 = torch.aten.pow.Tensor_Scalar %39111, %int2_36707 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_36708 = torch.constant.int -1
    %39120 = torch.prim.ListConstruct %int-1_36708 : (!torch.int) -> !torch.list<int>
    %true_36709 = torch.constant.bool true
    %none_36710 = torch.constant.none
    %39121 = torch.aten.mean.dim %39112, %39120, %true_36709, %none_36710 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36711 = torch.constant.int -1
    %39122 = torch.prim.ListConstruct %int-1_36711 : (!torch.int) -> !torch.list<int>
    %true_36712 = torch.constant.bool true
    %none_36713 = torch.constant.none
    %39123 = torch.aten.mean.dim %39113, %39122, %true_36712, %none_36713 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36714 = torch.constant.int -1
    %39124 = torch.prim.ListConstruct %int-1_36714 : (!torch.int) -> !torch.list<int>
    %true_36715 = torch.constant.bool true
    %none_36716 = torch.constant.none
    %39125 = torch.aten.mean.dim %39114, %39124, %true_36715, %none_36716 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36717 = torch.constant.int -1
    %39126 = torch.prim.ListConstruct %int-1_36717 : (!torch.int) -> !torch.list<int>
    %true_36718 = torch.constant.bool true
    %none_36719 = torch.constant.none
    %39127 = torch.aten.mean.dim %39115, %39126, %true_36718, %none_36719 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36720 = torch.constant.int -1
    %39128 = torch.prim.ListConstruct %int-1_36720 : (!torch.int) -> !torch.list<int>
    %true_36721 = torch.constant.bool true
    %none_36722 = torch.constant.none
    %39129 = torch.aten.mean.dim %39116, %39128, %true_36721, %none_36722 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36723 = torch.constant.int -1
    %39130 = torch.prim.ListConstruct %int-1_36723 : (!torch.int) -> !torch.list<int>
    %true_36724 = torch.constant.bool true
    %none_36725 = torch.constant.none
    %39131 = torch.aten.mean.dim %39117, %39130, %true_36724, %none_36725 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36726 = torch.constant.int -1
    %39132 = torch.prim.ListConstruct %int-1_36726 : (!torch.int) -> !torch.list<int>
    %true_36727 = torch.constant.bool true
    %none_36728 = torch.constant.none
    %39133 = torch.aten.mean.dim %39118, %39132, %true_36727, %none_36728 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_36729 = torch.constant.int -1
    %39134 = torch.prim.ListConstruct %int-1_36729 : (!torch.int) -> !torch.list<int>
    %true_36730 = torch.constant.bool true
    %none_36731 = torch.constant.none
    %39135 = torch.aten.mean.dim %39119, %39134, %true_36730, %none_36731 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36732 = torch.constant.float 9.9999997473787516E-6
    %int1_36733 = torch.constant.int 1
    %39136 = torch.aten.add.Scalar %39121, %float9.999990e-06_36732, %int1_36733 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36734 = torch.constant.float 9.9999997473787516E-6
    %int1_36735 = torch.constant.int 1
    %39137 = torch.aten.add.Scalar %39123, %float9.999990e-06_36734, %int1_36735 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36736 = torch.constant.float 9.9999997473787516E-6
    %int1_36737 = torch.constant.int 1
    %39138 = torch.aten.add.Scalar %39125, %float9.999990e-06_36736, %int1_36737 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36738 = torch.constant.float 9.9999997473787516E-6
    %int1_36739 = torch.constant.int 1
    %39139 = torch.aten.add.Scalar %39127, %float9.999990e-06_36738, %int1_36739 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36740 = torch.constant.float 9.9999997473787516E-6
    %int1_36741 = torch.constant.int 1
    %39140 = torch.aten.add.Scalar %39129, %float9.999990e-06_36740, %int1_36741 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36742 = torch.constant.float 9.9999997473787516E-6
    %int1_36743 = torch.constant.int 1
    %39141 = torch.aten.add.Scalar %39131, %float9.999990e-06_36742, %int1_36743 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36744 = torch.constant.float 9.9999997473787516E-6
    %int1_36745 = torch.constant.int 1
    %39142 = torch.aten.add.Scalar %39133, %float9.999990e-06_36744, %int1_36745 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_36746 = torch.constant.float 9.9999997473787516E-6
    %int1_36747 = torch.constant.int 1
    %39143 = torch.aten.add.Scalar %39135, %float9.999990e-06_36746, %int1_36747 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39144 = torch.aten.rsqrt %39136 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39145 = torch.aten.rsqrt %39137 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39146 = torch.aten.rsqrt %39138 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39147 = torch.aten.rsqrt %39139 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39148 = torch.aten.rsqrt %39140 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39149 = torch.aten.rsqrt %39141 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39150 = torch.aten.rsqrt %39142 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39151 = torch.aten.rsqrt %39143 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39152 = torch.aten.mul.Tensor %39104, %39144 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39153 = torch.aten.mul.Tensor %39105, %39145 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39154 = torch.aten.mul.Tensor %39106, %39146 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39155 = torch.aten.mul.Tensor %39107, %39147 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39156 = torch.aten.mul.Tensor %39108, %39148 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39157 = torch.aten.mul.Tensor %39109, %39149 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39158 = torch.aten.mul.Tensor %39110, %39150 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39159 = torch.aten.mul.Tensor %39111, %39151 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39160 = torch.aten.mul.Tensor %1416, %39152 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39161 = torch.aten.mul.Tensor %1417, %39153 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39162 = torch.aten.mul.Tensor %1418, %39154 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39163 = torch.aten.mul.Tensor %1419, %39155 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39164 = torch.aten.mul.Tensor %1420, %39156 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39165 = torch.aten.mul.Tensor %1421, %39157 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39166 = torch.aten.mul.Tensor %1422, %39158 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39167 = torch.aten.mul.Tensor %1423, %39159 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_36748 = torch.constant.int 5
    %39168 = torch.prims.convert_element_type %39160, %int5_36748 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36749 = torch.constant.int 5
    %39169 = torch.prims.convert_element_type %39161, %int5_36749 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36750 = torch.constant.int 5
    %39170 = torch.prims.convert_element_type %39162, %int5_36750 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36751 = torch.constant.int 5
    %39171 = torch.prims.convert_element_type %39163, %int5_36751 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36752 = torch.constant.int 5
    %39172 = torch.prims.convert_element_type %39164, %int5_36752 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36753 = torch.constant.int 5
    %39173 = torch.prims.convert_element_type %39165, %int5_36753 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36754 = torch.constant.int 5
    %39174 = torch.prims.convert_element_type %39166, %int5_36754 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_36755 = torch.constant.int 5
    %39175 = torch.prims.convert_element_type %39167, %int5_36755 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36756 = torch.constant.int 1
    %int0_36757 = torch.constant.int 0
    %39176 = torch.prim.ListConstruct %int1_36756, %int0_36757 : (!torch.int, !torch.int) -> !torch.list<int>
    %39177 = torch.aten.permute %1424, %39176 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36758 = torch.constant.int 1
    %int0_36759 = torch.constant.int 0
    %39178 = torch.prim.ListConstruct %int1_36758, %int0_36759 : (!torch.int, !torch.int) -> !torch.list<int>
    %39179 = torch.aten.permute %1425, %39178 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36760 = torch.constant.int 1
    %int0_36761 = torch.constant.int 0
    %39180 = torch.prim.ListConstruct %int1_36760, %int0_36761 : (!torch.int, !torch.int) -> !torch.list<int>
    %39181 = torch.aten.permute %1426, %39180 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36762 = torch.constant.int 1
    %int0_36763 = torch.constant.int 0
    %39182 = torch.prim.ListConstruct %int1_36762, %int0_36763 : (!torch.int, !torch.int) -> !torch.list<int>
    %39183 = torch.aten.permute %1427, %39182 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36764 = torch.constant.int 1
    %int0_36765 = torch.constant.int 0
    %39184 = torch.prim.ListConstruct %int1_36764, %int0_36765 : (!torch.int, !torch.int) -> !torch.list<int>
    %39185 = torch.aten.permute %1428, %39184 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36766 = torch.constant.int 1
    %int0_36767 = torch.constant.int 0
    %39186 = torch.prim.ListConstruct %int1_36766, %int0_36767 : (!torch.int, !torch.int) -> !torch.list<int>
    %39187 = torch.aten.permute %1429, %39186 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36768 = torch.constant.int 1
    %int0_36769 = torch.constant.int 0
    %39188 = torch.prim.ListConstruct %int1_36768, %int0_36769 : (!torch.int, !torch.int) -> !torch.list<int>
    %39189 = torch.aten.permute %1430, %39188 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36770 = torch.constant.int 1
    %int0_36771 = torch.constant.int 0
    %39190 = torch.prim.ListConstruct %int1_36770, %int0_36771 : (!torch.int, !torch.int) -> !torch.list<int>
    %39191 = torch.aten.permute %1431, %39190 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_36772 = torch.constant.int 4
    %39192 = torch.aten.mul.int %int4_36772, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36773 = torch.constant.int 4096
    %39193 = torch.prim.ListConstruct %39192, %int4096_36773 : (!torch.int, !torch.int) -> !torch.list<int>
    %39194 = torch.aten.view %39168, %39193 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39194, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39195 = torch.aten.mm %39194, %39177 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39195, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36774 = torch.constant.int 4
    %int1792_36775 = torch.constant.int 1792
    %39196 = torch.prim.ListConstruct %int4_36774, %2482, %int1792_36775 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39197 = torch.aten.view %39195, %39196 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36776 = torch.constant.int 4
    %39198 = torch.aten.mul.int %int4_36776, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36777 = torch.constant.int 4096
    %39199 = torch.prim.ListConstruct %39198, %int4096_36777 : (!torch.int, !torch.int) -> !torch.list<int>
    %39200 = torch.aten.view %39169, %39199 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39200, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39201 = torch.aten.mm %39200, %39179 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39201, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36778 = torch.constant.int 4
    %int1792_36779 = torch.constant.int 1792
    %39202 = torch.prim.ListConstruct %int4_36778, %2482, %int1792_36779 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39203 = torch.aten.view %39201, %39202 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36780 = torch.constant.int 4
    %39204 = torch.aten.mul.int %int4_36780, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36781 = torch.constant.int 4096
    %39205 = torch.prim.ListConstruct %39204, %int4096_36781 : (!torch.int, !torch.int) -> !torch.list<int>
    %39206 = torch.aten.view %39170, %39205 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39206, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39207 = torch.aten.mm %39206, %39181 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39207, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36782 = torch.constant.int 4
    %int1792_36783 = torch.constant.int 1792
    %39208 = torch.prim.ListConstruct %int4_36782, %2482, %int1792_36783 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39209 = torch.aten.view %39207, %39208 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36784 = torch.constant.int 4
    %39210 = torch.aten.mul.int %int4_36784, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36785 = torch.constant.int 4096
    %39211 = torch.prim.ListConstruct %39210, %int4096_36785 : (!torch.int, !torch.int) -> !torch.list<int>
    %39212 = torch.aten.view %39171, %39211 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39212, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39213 = torch.aten.mm %39212, %39183 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39213, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36786 = torch.constant.int 4
    %int1792_36787 = torch.constant.int 1792
    %39214 = torch.prim.ListConstruct %int4_36786, %2482, %int1792_36787 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39215 = torch.aten.view %39213, %39214 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36788 = torch.constant.int 4
    %39216 = torch.aten.mul.int %int4_36788, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36789 = torch.constant.int 4096
    %39217 = torch.prim.ListConstruct %39216, %int4096_36789 : (!torch.int, !torch.int) -> !torch.list<int>
    %39218 = torch.aten.view %39172, %39217 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39218, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39219 = torch.aten.mm %39218, %39185 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39219, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36790 = torch.constant.int 4
    %int1792_36791 = torch.constant.int 1792
    %39220 = torch.prim.ListConstruct %int4_36790, %2482, %int1792_36791 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39221 = torch.aten.view %39219, %39220 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36792 = torch.constant.int 4
    %39222 = torch.aten.mul.int %int4_36792, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36793 = torch.constant.int 4096
    %39223 = torch.prim.ListConstruct %39222, %int4096_36793 : (!torch.int, !torch.int) -> !torch.list<int>
    %39224 = torch.aten.view %39173, %39223 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39224, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39225 = torch.aten.mm %39224, %39187 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39225, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36794 = torch.constant.int 4
    %int1792_36795 = torch.constant.int 1792
    %39226 = torch.prim.ListConstruct %int4_36794, %2482, %int1792_36795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39227 = torch.aten.view %39225, %39226 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36796 = torch.constant.int 4
    %39228 = torch.aten.mul.int %int4_36796, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36797 = torch.constant.int 4096
    %39229 = torch.prim.ListConstruct %39228, %int4096_36797 : (!torch.int, !torch.int) -> !torch.list<int>
    %39230 = torch.aten.view %39174, %39229 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39230, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39231 = torch.aten.mm %39230, %39189 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39231, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36798 = torch.constant.int 4
    %int1792_36799 = torch.constant.int 1792
    %39232 = torch.prim.ListConstruct %int4_36798, %2482, %int1792_36799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39233 = torch.aten.view %39231, %39232 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36800 = torch.constant.int 4
    %39234 = torch.aten.mul.int %int4_36800, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36801 = torch.constant.int 4096
    %39235 = torch.prim.ListConstruct %39234, %int4096_36801 : (!torch.int, !torch.int) -> !torch.list<int>
    %39236 = torch.aten.view %39175, %39235 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39236, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39237 = torch.aten.mm %39236, %39191 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39237, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36802 = torch.constant.int 4
    %int1792_36803 = torch.constant.int 1792
    %39238 = torch.prim.ListConstruct %int4_36802, %2482, %int1792_36803 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39239 = torch.aten.view %39237, %39238 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39240 = torch.aten.silu %39197 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39241 = torch.aten.silu %39203 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39242 = torch.aten.silu %39209 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39243 = torch.aten.silu %39215 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39244 = torch.aten.silu %39221 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39245 = torch.aten.silu %39227 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39246 = torch.aten.silu %39233 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39247 = torch.aten.silu %39239 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_36804 = torch.constant.int 1
    %int0_36805 = torch.constant.int 0
    %39248 = torch.prim.ListConstruct %int1_36804, %int0_36805 : (!torch.int, !torch.int) -> !torch.list<int>
    %39249 = torch.aten.permute %1432, %39248 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36806 = torch.constant.int 1
    %int0_36807 = torch.constant.int 0
    %39250 = torch.prim.ListConstruct %int1_36806, %int0_36807 : (!torch.int, !torch.int) -> !torch.list<int>
    %39251 = torch.aten.permute %1433, %39250 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36808 = torch.constant.int 1
    %int0_36809 = torch.constant.int 0
    %39252 = torch.prim.ListConstruct %int1_36808, %int0_36809 : (!torch.int, !torch.int) -> !torch.list<int>
    %39253 = torch.aten.permute %1434, %39252 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36810 = torch.constant.int 1
    %int0_36811 = torch.constant.int 0
    %39254 = torch.prim.ListConstruct %int1_36810, %int0_36811 : (!torch.int, !torch.int) -> !torch.list<int>
    %39255 = torch.aten.permute %1435, %39254 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36812 = torch.constant.int 1
    %int0_36813 = torch.constant.int 0
    %39256 = torch.prim.ListConstruct %int1_36812, %int0_36813 : (!torch.int, !torch.int) -> !torch.list<int>
    %39257 = torch.aten.permute %1436, %39256 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36814 = torch.constant.int 1
    %int0_36815 = torch.constant.int 0
    %39258 = torch.prim.ListConstruct %int1_36814, %int0_36815 : (!torch.int, !torch.int) -> !torch.list<int>
    %39259 = torch.aten.permute %1437, %39258 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36816 = torch.constant.int 1
    %int0_36817 = torch.constant.int 0
    %39260 = torch.prim.ListConstruct %int1_36816, %int0_36817 : (!torch.int, !torch.int) -> !torch.list<int>
    %39261 = torch.aten.permute %1438, %39260 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_36818 = torch.constant.int 1
    %int0_36819 = torch.constant.int 0
    %39262 = torch.prim.ListConstruct %int1_36818, %int0_36819 : (!torch.int, !torch.int) -> !torch.list<int>
    %39263 = torch.aten.permute %1439, %39262 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_36820 = torch.constant.int 4
    %39264 = torch.aten.mul.int %int4_36820, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36821 = torch.constant.int 4096
    %39265 = torch.prim.ListConstruct %39264, %int4096_36821 : (!torch.int, !torch.int) -> !torch.list<int>
    %39266 = torch.aten.view %39168, %39265 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39266, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39267 = torch.aten.mm %39266, %39249 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39267, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36822 = torch.constant.int 4
    %int1792_36823 = torch.constant.int 1792
    %39268 = torch.prim.ListConstruct %int4_36822, %2482, %int1792_36823 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39269 = torch.aten.view %39267, %39268 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36824 = torch.constant.int 4
    %39270 = torch.aten.mul.int %int4_36824, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36825 = torch.constant.int 4096
    %39271 = torch.prim.ListConstruct %39270, %int4096_36825 : (!torch.int, !torch.int) -> !torch.list<int>
    %39272 = torch.aten.view %39169, %39271 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39272, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39273 = torch.aten.mm %39272, %39251 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39273, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36826 = torch.constant.int 4
    %int1792_36827 = torch.constant.int 1792
    %39274 = torch.prim.ListConstruct %int4_36826, %2482, %int1792_36827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39275 = torch.aten.view %39273, %39274 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36828 = torch.constant.int 4
    %39276 = torch.aten.mul.int %int4_36828, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36829 = torch.constant.int 4096
    %39277 = torch.prim.ListConstruct %39276, %int4096_36829 : (!torch.int, !torch.int) -> !torch.list<int>
    %39278 = torch.aten.view %39170, %39277 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39278, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39279 = torch.aten.mm %39278, %39253 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39279, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36830 = torch.constant.int 4
    %int1792_36831 = torch.constant.int 1792
    %39280 = torch.prim.ListConstruct %int4_36830, %2482, %int1792_36831 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39281 = torch.aten.view %39279, %39280 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36832 = torch.constant.int 4
    %39282 = torch.aten.mul.int %int4_36832, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36833 = torch.constant.int 4096
    %39283 = torch.prim.ListConstruct %39282, %int4096_36833 : (!torch.int, !torch.int) -> !torch.list<int>
    %39284 = torch.aten.view %39171, %39283 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39284, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39285 = torch.aten.mm %39284, %39255 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39285, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36834 = torch.constant.int 4
    %int1792_36835 = torch.constant.int 1792
    %39286 = torch.prim.ListConstruct %int4_36834, %2482, %int1792_36835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39287 = torch.aten.view %39285, %39286 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36836 = torch.constant.int 4
    %39288 = torch.aten.mul.int %int4_36836, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36837 = torch.constant.int 4096
    %39289 = torch.prim.ListConstruct %39288, %int4096_36837 : (!torch.int, !torch.int) -> !torch.list<int>
    %39290 = torch.aten.view %39172, %39289 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39290, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39291 = torch.aten.mm %39290, %39257 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39291, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36838 = torch.constant.int 4
    %int1792_36839 = torch.constant.int 1792
    %39292 = torch.prim.ListConstruct %int4_36838, %2482, %int1792_36839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39293 = torch.aten.view %39291, %39292 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36840 = torch.constant.int 4
    %39294 = torch.aten.mul.int %int4_36840, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36841 = torch.constant.int 4096
    %39295 = torch.prim.ListConstruct %39294, %int4096_36841 : (!torch.int, !torch.int) -> !torch.list<int>
    %39296 = torch.aten.view %39173, %39295 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39296, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39297 = torch.aten.mm %39296, %39259 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39297, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36842 = torch.constant.int 4
    %int1792_36843 = torch.constant.int 1792
    %39298 = torch.prim.ListConstruct %int4_36842, %2482, %int1792_36843 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39299 = torch.aten.view %39297, %39298 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36844 = torch.constant.int 4
    %39300 = torch.aten.mul.int %int4_36844, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36845 = torch.constant.int 4096
    %39301 = torch.prim.ListConstruct %39300, %int4096_36845 : (!torch.int, !torch.int) -> !torch.list<int>
    %39302 = torch.aten.view %39174, %39301 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39302, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39303 = torch.aten.mm %39302, %39261 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39303, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36846 = torch.constant.int 4
    %int1792_36847 = torch.constant.int 1792
    %39304 = torch.prim.ListConstruct %int4_36846, %2482, %int1792_36847 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39305 = torch.aten.view %39303, %39304 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_36848 = torch.constant.int 4
    %39306 = torch.aten.mul.int %int4_36848, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_36849 = torch.constant.int 4096
    %39307 = torch.prim.ListConstruct %39306, %int4096_36849 : (!torch.int, !torch.int) -> !torch.list<int>
    %39308 = torch.aten.view %39175, %39307 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39308, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39309 = torch.aten.mm %39308, %39263 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39309, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_36850 = torch.constant.int 4
    %int1792_36851 = torch.constant.int 1792
    %39310 = torch.prim.ListConstruct %int4_36850, %2482, %int1792_36851 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39311 = torch.aten.view %39309, %39310 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39312 = torch.aten.mul.Tensor %39240, %39269 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39313 = torch.aten.mul.Tensor %39241, %39275 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39314 = torch.aten.mul.Tensor %39242, %39281 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39315 = torch.aten.mul.Tensor %39243, %39287 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39316 = torch.aten.mul.Tensor %39244, %39293 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39317 = torch.aten.mul.Tensor %39245, %39299 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39318 = torch.aten.mul.Tensor %39246, %39305 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %39319 = torch.aten.mul.Tensor %39247, %39311 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %39319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_36852 = torch.constant.int 1
    %int0_36853 = torch.constant.int 0
    %39320 = torch.prim.ListConstruct %int1_36852, %int0_36853 : (!torch.int, !torch.int) -> !torch.list<int>
    %39321 = torch.aten.permute %1440, %39320 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36854 = torch.constant.int 1
    %int0_36855 = torch.constant.int 0
    %39322 = torch.prim.ListConstruct %int1_36854, %int0_36855 : (!torch.int, !torch.int) -> !torch.list<int>
    %39323 = torch.aten.permute %1441, %39322 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36856 = torch.constant.int 1
    %int0_36857 = torch.constant.int 0
    %39324 = torch.prim.ListConstruct %int1_36856, %int0_36857 : (!torch.int, !torch.int) -> !torch.list<int>
    %39325 = torch.aten.permute %1442, %39324 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36858 = torch.constant.int 1
    %int0_36859 = torch.constant.int 0
    %39326 = torch.prim.ListConstruct %int1_36858, %int0_36859 : (!torch.int, !torch.int) -> !torch.list<int>
    %39327 = torch.aten.permute %1443, %39326 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36860 = torch.constant.int 1
    %int0_36861 = torch.constant.int 0
    %39328 = torch.prim.ListConstruct %int1_36860, %int0_36861 : (!torch.int, !torch.int) -> !torch.list<int>
    %39329 = torch.aten.permute %1444, %39328 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36862 = torch.constant.int 1
    %int0_36863 = torch.constant.int 0
    %39330 = torch.prim.ListConstruct %int1_36862, %int0_36863 : (!torch.int, !torch.int) -> !torch.list<int>
    %39331 = torch.aten.permute %1445, %39330 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36864 = torch.constant.int 1
    %int0_36865 = torch.constant.int 0
    %39332 = torch.prim.ListConstruct %int1_36864, %int0_36865 : (!torch.int, !torch.int) -> !torch.list<int>
    %39333 = torch.aten.permute %1446, %39332 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36866 = torch.constant.int 1
    %int0_36867 = torch.constant.int 0
    %39334 = torch.prim.ListConstruct %int1_36866, %int0_36867 : (!torch.int, !torch.int) -> !torch.list<int>
    %39335 = torch.aten.permute %1447, %39334 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_36868 = torch.constant.int 1
    %39336 = torch.aten.size.int %39197, %int1_36868 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36869 = torch.constant.int 4
    %39337 = torch.aten.mul.int %int4_36869, %39336 : !torch.int, !torch.int -> !torch.int
    %int1792_36870 = torch.constant.int 1792
    %39338 = torch.prim.ListConstruct %39337, %int1792_36870 : (!torch.int, !torch.int) -> !torch.list<int>
    %39339 = torch.aten.view %39312, %39338 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39339, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39340 = torch.aten.mm %39339, %39321 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39340, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36871 = torch.constant.int 4
    %int4096_36872 = torch.constant.int 4096
    %39341 = torch.prim.ListConstruct %int4_36871, %39336, %int4096_36872 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39342 = torch.aten.view %39340, %39341 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36873 = torch.constant.int 1
    %39343 = torch.aten.size.int %39203, %int1_36873 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36874 = torch.constant.int 4
    %39344 = torch.aten.mul.int %int4_36874, %39343 : !torch.int, !torch.int -> !torch.int
    %int1792_36875 = torch.constant.int 1792
    %39345 = torch.prim.ListConstruct %39344, %int1792_36875 : (!torch.int, !torch.int) -> !torch.list<int>
    %39346 = torch.aten.view %39313, %39345 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39346, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39347 = torch.aten.mm %39346, %39323 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39347, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36876 = torch.constant.int 4
    %int4096_36877 = torch.constant.int 4096
    %39348 = torch.prim.ListConstruct %int4_36876, %39343, %int4096_36877 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39349 = torch.aten.view %39347, %39348 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36878 = torch.constant.int 1
    %39350 = torch.aten.size.int %39209, %int1_36878 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36879 = torch.constant.int 4
    %39351 = torch.aten.mul.int %int4_36879, %39350 : !torch.int, !torch.int -> !torch.int
    %int1792_36880 = torch.constant.int 1792
    %39352 = torch.prim.ListConstruct %39351, %int1792_36880 : (!torch.int, !torch.int) -> !torch.list<int>
    %39353 = torch.aten.view %39314, %39352 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39353, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39354 = torch.aten.mm %39353, %39325 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39354, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36881 = torch.constant.int 4
    %int4096_36882 = torch.constant.int 4096
    %39355 = torch.prim.ListConstruct %int4_36881, %39350, %int4096_36882 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39356 = torch.aten.view %39354, %39355 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36883 = torch.constant.int 1
    %39357 = torch.aten.size.int %39215, %int1_36883 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36884 = torch.constant.int 4
    %39358 = torch.aten.mul.int %int4_36884, %39357 : !torch.int, !torch.int -> !torch.int
    %int1792_36885 = torch.constant.int 1792
    %39359 = torch.prim.ListConstruct %39358, %int1792_36885 : (!torch.int, !torch.int) -> !torch.list<int>
    %39360 = torch.aten.view %39315, %39359 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39360, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39361 = torch.aten.mm %39360, %39327 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39361, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36886 = torch.constant.int 4
    %int4096_36887 = torch.constant.int 4096
    %39362 = torch.prim.ListConstruct %int4_36886, %39357, %int4096_36887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39363 = torch.aten.view %39361, %39362 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36888 = torch.constant.int 1
    %39364 = torch.aten.size.int %39221, %int1_36888 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36889 = torch.constant.int 4
    %39365 = torch.aten.mul.int %int4_36889, %39364 : !torch.int, !torch.int -> !torch.int
    %int1792_36890 = torch.constant.int 1792
    %39366 = torch.prim.ListConstruct %39365, %int1792_36890 : (!torch.int, !torch.int) -> !torch.list<int>
    %39367 = torch.aten.view %39316, %39366 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39367, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39368 = torch.aten.mm %39367, %39329 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39368, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36891 = torch.constant.int 4
    %int4096_36892 = torch.constant.int 4096
    %39369 = torch.prim.ListConstruct %int4_36891, %39364, %int4096_36892 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39370 = torch.aten.view %39368, %39369 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36893 = torch.constant.int 1
    %39371 = torch.aten.size.int %39227, %int1_36893 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36894 = torch.constant.int 4
    %39372 = torch.aten.mul.int %int4_36894, %39371 : !torch.int, !torch.int -> !torch.int
    %int1792_36895 = torch.constant.int 1792
    %39373 = torch.prim.ListConstruct %39372, %int1792_36895 : (!torch.int, !torch.int) -> !torch.list<int>
    %39374 = torch.aten.view %39317, %39373 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39374, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39375 = torch.aten.mm %39374, %39331 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39375, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36896 = torch.constant.int 4
    %int4096_36897 = torch.constant.int 4096
    %39376 = torch.prim.ListConstruct %int4_36896, %39371, %int4096_36897 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39377 = torch.aten.view %39375, %39376 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36898 = torch.constant.int 1
    %39378 = torch.aten.size.int %39233, %int1_36898 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36899 = torch.constant.int 4
    %39379 = torch.aten.mul.int %int4_36899, %39378 : !torch.int, !torch.int -> !torch.int
    %int1792_36900 = torch.constant.int 1792
    %39380 = torch.prim.ListConstruct %39379, %int1792_36900 : (!torch.int, !torch.int) -> !torch.list<int>
    %39381 = torch.aten.view %39318, %39380 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39381, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39382 = torch.aten.mm %39381, %39333 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39382, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36901 = torch.constant.int 4
    %int4096_36902 = torch.constant.int 4096
    %39383 = torch.prim.ListConstruct %int4_36901, %39378, %int4096_36902 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39384 = torch.aten.view %39382, %39383 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36903 = torch.constant.int 1
    %39385 = torch.aten.size.int %39239, %int1_36903 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_36904 = torch.constant.int 4
    %39386 = torch.aten.mul.int %int4_36904, %39385 : !torch.int, !torch.int -> !torch.int
    %int1792_36905 = torch.constant.int 1792
    %39387 = torch.prim.ListConstruct %39386, %int1792_36905 : (!torch.int, !torch.int) -> !torch.list<int>
    %39388 = torch.aten.view %39319, %39387 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %39388, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %39389 = torch.aten.mm %39388, %39335 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39389, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_36906 = torch.constant.int 4
    %int4096_36907 = torch.constant.int 4096
    %39390 = torch.prim.ListConstruct %int4_36906, %39385, %int4096_36907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39391 = torch.aten.view %39389, %39390 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39392 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36908 = arith.constant 1 : index
    %dim_36909 = tensor.dim %39392, %c1_36908 : tensor<4x?x4096xf16>
    %39393 = flow.tensor.transfer %39392 : tensor<4x?x4096xf16>{%dim_36909} to #hal.device.promise<@__device_0>
    %39394 = torch_c.from_builtin_tensor %39393 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39395 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36910 = arith.constant 1 : index
    %dim_36911 = tensor.dim %39395, %c1_36910 : tensor<4x?x4096xf16>
    %39396 = flow.tensor.transfer %39395 : tensor<4x?x4096xf16>{%dim_36911} to #hal.device.promise<@__device_0>
    %39397 = torch_c.from_builtin_tensor %39396 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39398 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36912 = arith.constant 1 : index
    %dim_36913 = tensor.dim %39398, %c1_36912 : tensor<4x?x4096xf16>
    %39399 = flow.tensor.transfer %39398 : tensor<4x?x4096xf16>{%dim_36913} to #hal.device.promise<@__device_0>
    %39400 = torch_c.from_builtin_tensor %39399 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39401 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36914 = arith.constant 1 : index
    %dim_36915 = tensor.dim %39401, %c1_36914 : tensor<4x?x4096xf16>
    %39402 = flow.tensor.transfer %39401 : tensor<4x?x4096xf16>{%dim_36915} to #hal.device.promise<@__device_0>
    %39403 = torch_c.from_builtin_tensor %39402 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39404 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36916 = arith.constant 1 : index
    %dim_36917 = tensor.dim %39404, %c1_36916 : tensor<4x?x4096xf16>
    %39405 = flow.tensor.transfer %39404 : tensor<4x?x4096xf16>{%dim_36917} to #hal.device.promise<@__device_0>
    %39406 = torch_c.from_builtin_tensor %39405 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39407 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36918 = arith.constant 1 : index
    %dim_36919 = tensor.dim %39407, %c1_36918 : tensor<4x?x4096xf16>
    %39408 = flow.tensor.transfer %39407 : tensor<4x?x4096xf16>{%dim_36919} to #hal.device.promise<@__device_0>
    %39409 = torch_c.from_builtin_tensor %39408 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39410 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36920 = arith.constant 1 : index
    %dim_36921 = tensor.dim %39410, %c1_36920 : tensor<4x?x4096xf16>
    %39411 = flow.tensor.transfer %39410 : tensor<4x?x4096xf16>{%dim_36921} to #hal.device.promise<@__device_0>
    %39412 = torch_c.from_builtin_tensor %39411 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36922 = torch.constant.int 1
    %39413 = torch.aten.add.Tensor %39342, %39394, %int1_36922 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36923 = torch.constant.int 1
    %39414 = torch.aten.add.Tensor %39413, %39397, %int1_36923 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36924 = torch.constant.int 1
    %39415 = torch.aten.add.Tensor %39414, %39400, %int1_36924 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36925 = torch.constant.int 1
    %39416 = torch.aten.add.Tensor %39415, %39403, %int1_36925 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36926 = torch.constant.int 1
    %39417 = torch.aten.add.Tensor %39416, %39406, %int1_36926 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36927 = torch.constant.int 1
    %39418 = torch.aten.add.Tensor %39417, %39409, %int1_36927 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36928 = torch.constant.int 1
    %39419 = torch.aten.add.Tensor %39418, %39412, %int1_36928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39420 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36929 = arith.constant 1 : index
    %dim_36930 = tensor.dim %39420, %c1_36929 : tensor<4x?x4096xf16>
    %39421 = flow.tensor.transfer %39420 : tensor<4x?x4096xf16>{%dim_36930} to #hal.device.promise<@__device_1>
    %39422 = torch_c.from_builtin_tensor %39421 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39423 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36931 = arith.constant 1 : index
    %dim_36932 = tensor.dim %39423, %c1_36931 : tensor<4x?x4096xf16>
    %39424 = flow.tensor.transfer %39423 : tensor<4x?x4096xf16>{%dim_36932} to #hal.device.promise<@__device_1>
    %39425 = torch_c.from_builtin_tensor %39424 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39426 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36933 = arith.constant 1 : index
    %dim_36934 = tensor.dim %39426, %c1_36933 : tensor<4x?x4096xf16>
    %39427 = flow.tensor.transfer %39426 : tensor<4x?x4096xf16>{%dim_36934} to #hal.device.promise<@__device_1>
    %39428 = torch_c.from_builtin_tensor %39427 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39429 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36935 = arith.constant 1 : index
    %dim_36936 = tensor.dim %39429, %c1_36935 : tensor<4x?x4096xf16>
    %39430 = flow.tensor.transfer %39429 : tensor<4x?x4096xf16>{%dim_36936} to #hal.device.promise<@__device_1>
    %39431 = torch_c.from_builtin_tensor %39430 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39432 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36937 = arith.constant 1 : index
    %dim_36938 = tensor.dim %39432, %c1_36937 : tensor<4x?x4096xf16>
    %39433 = flow.tensor.transfer %39432 : tensor<4x?x4096xf16>{%dim_36938} to #hal.device.promise<@__device_1>
    %39434 = torch_c.from_builtin_tensor %39433 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39435 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36939 = arith.constant 1 : index
    %dim_36940 = tensor.dim %39435, %c1_36939 : tensor<4x?x4096xf16>
    %39436 = flow.tensor.transfer %39435 : tensor<4x?x4096xf16>{%dim_36940} to #hal.device.promise<@__device_1>
    %39437 = torch_c.from_builtin_tensor %39436 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39438 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36941 = arith.constant 1 : index
    %dim_36942 = tensor.dim %39438, %c1_36941 : tensor<4x?x4096xf16>
    %39439 = flow.tensor.transfer %39438 : tensor<4x?x4096xf16>{%dim_36942} to #hal.device.promise<@__device_1>
    %39440 = torch_c.from_builtin_tensor %39439 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36943 = torch.constant.int 1
    %39441 = torch.aten.add.Tensor %39422, %39349, %int1_36943 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36944 = torch.constant.int 1
    %39442 = torch.aten.add.Tensor %39441, %39425, %int1_36944 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36945 = torch.constant.int 1
    %39443 = torch.aten.add.Tensor %39442, %39428, %int1_36945 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36946 = torch.constant.int 1
    %39444 = torch.aten.add.Tensor %39443, %39431, %int1_36946 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36947 = torch.constant.int 1
    %39445 = torch.aten.add.Tensor %39444, %39434, %int1_36947 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36948 = torch.constant.int 1
    %39446 = torch.aten.add.Tensor %39445, %39437, %int1_36948 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36949 = torch.constant.int 1
    %39447 = torch.aten.add.Tensor %39446, %39440, %int1_36949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39448 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36950 = arith.constant 1 : index
    %dim_36951 = tensor.dim %39448, %c1_36950 : tensor<4x?x4096xf16>
    %39449 = flow.tensor.transfer %39448 : tensor<4x?x4096xf16>{%dim_36951} to #hal.device.promise<@__device_2>
    %39450 = torch_c.from_builtin_tensor %39449 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39451 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36952 = arith.constant 1 : index
    %dim_36953 = tensor.dim %39451, %c1_36952 : tensor<4x?x4096xf16>
    %39452 = flow.tensor.transfer %39451 : tensor<4x?x4096xf16>{%dim_36953} to #hal.device.promise<@__device_2>
    %39453 = torch_c.from_builtin_tensor %39452 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39454 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36954 = arith.constant 1 : index
    %dim_36955 = tensor.dim %39454, %c1_36954 : tensor<4x?x4096xf16>
    %39455 = flow.tensor.transfer %39454 : tensor<4x?x4096xf16>{%dim_36955} to #hal.device.promise<@__device_2>
    %39456 = torch_c.from_builtin_tensor %39455 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39457 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36956 = arith.constant 1 : index
    %dim_36957 = tensor.dim %39457, %c1_36956 : tensor<4x?x4096xf16>
    %39458 = flow.tensor.transfer %39457 : tensor<4x?x4096xf16>{%dim_36957} to #hal.device.promise<@__device_2>
    %39459 = torch_c.from_builtin_tensor %39458 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39460 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36958 = arith.constant 1 : index
    %dim_36959 = tensor.dim %39460, %c1_36958 : tensor<4x?x4096xf16>
    %39461 = flow.tensor.transfer %39460 : tensor<4x?x4096xf16>{%dim_36959} to #hal.device.promise<@__device_2>
    %39462 = torch_c.from_builtin_tensor %39461 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39463 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36960 = arith.constant 1 : index
    %dim_36961 = tensor.dim %39463, %c1_36960 : tensor<4x?x4096xf16>
    %39464 = flow.tensor.transfer %39463 : tensor<4x?x4096xf16>{%dim_36961} to #hal.device.promise<@__device_2>
    %39465 = torch_c.from_builtin_tensor %39464 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39466 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36962 = arith.constant 1 : index
    %dim_36963 = tensor.dim %39466, %c1_36962 : tensor<4x?x4096xf16>
    %39467 = flow.tensor.transfer %39466 : tensor<4x?x4096xf16>{%dim_36963} to #hal.device.promise<@__device_2>
    %39468 = torch_c.from_builtin_tensor %39467 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36964 = torch.constant.int 1
    %39469 = torch.aten.add.Tensor %39450, %39453, %int1_36964 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36965 = torch.constant.int 1
    %39470 = torch.aten.add.Tensor %39469, %39356, %int1_36965 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36966 = torch.constant.int 1
    %39471 = torch.aten.add.Tensor %39470, %39456, %int1_36966 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36967 = torch.constant.int 1
    %39472 = torch.aten.add.Tensor %39471, %39459, %int1_36967 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36968 = torch.constant.int 1
    %39473 = torch.aten.add.Tensor %39472, %39462, %int1_36968 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36969 = torch.constant.int 1
    %39474 = torch.aten.add.Tensor %39473, %39465, %int1_36969 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36970 = torch.constant.int 1
    %39475 = torch.aten.add.Tensor %39474, %39468, %int1_36970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39476 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36971 = arith.constant 1 : index
    %dim_36972 = tensor.dim %39476, %c1_36971 : tensor<4x?x4096xf16>
    %39477 = flow.tensor.transfer %39476 : tensor<4x?x4096xf16>{%dim_36972} to #hal.device.promise<@__device_3>
    %39478 = torch_c.from_builtin_tensor %39477 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39479 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36973 = arith.constant 1 : index
    %dim_36974 = tensor.dim %39479, %c1_36973 : tensor<4x?x4096xf16>
    %39480 = flow.tensor.transfer %39479 : tensor<4x?x4096xf16>{%dim_36974} to #hal.device.promise<@__device_3>
    %39481 = torch_c.from_builtin_tensor %39480 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39482 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36975 = arith.constant 1 : index
    %dim_36976 = tensor.dim %39482, %c1_36975 : tensor<4x?x4096xf16>
    %39483 = flow.tensor.transfer %39482 : tensor<4x?x4096xf16>{%dim_36976} to #hal.device.promise<@__device_3>
    %39484 = torch_c.from_builtin_tensor %39483 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39485 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36977 = arith.constant 1 : index
    %dim_36978 = tensor.dim %39485, %c1_36977 : tensor<4x?x4096xf16>
    %39486 = flow.tensor.transfer %39485 : tensor<4x?x4096xf16>{%dim_36978} to #hal.device.promise<@__device_3>
    %39487 = torch_c.from_builtin_tensor %39486 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39488 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36979 = arith.constant 1 : index
    %dim_36980 = tensor.dim %39488, %c1_36979 : tensor<4x?x4096xf16>
    %39489 = flow.tensor.transfer %39488 : tensor<4x?x4096xf16>{%dim_36980} to #hal.device.promise<@__device_3>
    %39490 = torch_c.from_builtin_tensor %39489 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39491 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36981 = arith.constant 1 : index
    %dim_36982 = tensor.dim %39491, %c1_36981 : tensor<4x?x4096xf16>
    %39492 = flow.tensor.transfer %39491 : tensor<4x?x4096xf16>{%dim_36982} to #hal.device.promise<@__device_3>
    %39493 = torch_c.from_builtin_tensor %39492 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39494 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36983 = arith.constant 1 : index
    %dim_36984 = tensor.dim %39494, %c1_36983 : tensor<4x?x4096xf16>
    %39495 = flow.tensor.transfer %39494 : tensor<4x?x4096xf16>{%dim_36984} to #hal.device.promise<@__device_3>
    %39496 = torch_c.from_builtin_tensor %39495 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36985 = torch.constant.int 1
    %39497 = torch.aten.add.Tensor %39478, %39481, %int1_36985 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36986 = torch.constant.int 1
    %39498 = torch.aten.add.Tensor %39497, %39484, %int1_36986 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36987 = torch.constant.int 1
    %39499 = torch.aten.add.Tensor %39498, %39363, %int1_36987 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36988 = torch.constant.int 1
    %39500 = torch.aten.add.Tensor %39499, %39487, %int1_36988 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36989 = torch.constant.int 1
    %39501 = torch.aten.add.Tensor %39500, %39490, %int1_36989 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36990 = torch.constant.int 1
    %39502 = torch.aten.add.Tensor %39501, %39493, %int1_36990 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_36991 = torch.constant.int 1
    %39503 = torch.aten.add.Tensor %39502, %39496, %int1_36991 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39504 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36992 = arith.constant 1 : index
    %dim_36993 = tensor.dim %39504, %c1_36992 : tensor<4x?x4096xf16>
    %39505 = flow.tensor.transfer %39504 : tensor<4x?x4096xf16>{%dim_36993} to #hal.device.promise<@__device_4>
    %39506 = torch_c.from_builtin_tensor %39505 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39507 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36994 = arith.constant 1 : index
    %dim_36995 = tensor.dim %39507, %c1_36994 : tensor<4x?x4096xf16>
    %39508 = flow.tensor.transfer %39507 : tensor<4x?x4096xf16>{%dim_36995} to #hal.device.promise<@__device_4>
    %39509 = torch_c.from_builtin_tensor %39508 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39510 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36996 = arith.constant 1 : index
    %dim_36997 = tensor.dim %39510, %c1_36996 : tensor<4x?x4096xf16>
    %39511 = flow.tensor.transfer %39510 : tensor<4x?x4096xf16>{%dim_36997} to #hal.device.promise<@__device_4>
    %39512 = torch_c.from_builtin_tensor %39511 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39513 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_36998 = arith.constant 1 : index
    %dim_36999 = tensor.dim %39513, %c1_36998 : tensor<4x?x4096xf16>
    %39514 = flow.tensor.transfer %39513 : tensor<4x?x4096xf16>{%dim_36999} to #hal.device.promise<@__device_4>
    %39515 = torch_c.from_builtin_tensor %39514 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39516 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37000 = arith.constant 1 : index
    %dim_37001 = tensor.dim %39516, %c1_37000 : tensor<4x?x4096xf16>
    %39517 = flow.tensor.transfer %39516 : tensor<4x?x4096xf16>{%dim_37001} to #hal.device.promise<@__device_4>
    %39518 = torch_c.from_builtin_tensor %39517 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39519 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37002 = arith.constant 1 : index
    %dim_37003 = tensor.dim %39519, %c1_37002 : tensor<4x?x4096xf16>
    %39520 = flow.tensor.transfer %39519 : tensor<4x?x4096xf16>{%dim_37003} to #hal.device.promise<@__device_4>
    %39521 = torch_c.from_builtin_tensor %39520 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39522 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37004 = arith.constant 1 : index
    %dim_37005 = tensor.dim %39522, %c1_37004 : tensor<4x?x4096xf16>
    %39523 = flow.tensor.transfer %39522 : tensor<4x?x4096xf16>{%dim_37005} to #hal.device.promise<@__device_4>
    %39524 = torch_c.from_builtin_tensor %39523 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37006 = torch.constant.int 1
    %39525 = torch.aten.add.Tensor %39506, %39509, %int1_37006 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37007 = torch.constant.int 1
    %39526 = torch.aten.add.Tensor %39525, %39512, %int1_37007 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37008 = torch.constant.int 1
    %39527 = torch.aten.add.Tensor %39526, %39515, %int1_37008 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37009 = torch.constant.int 1
    %39528 = torch.aten.add.Tensor %39527, %39370, %int1_37009 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37010 = torch.constant.int 1
    %39529 = torch.aten.add.Tensor %39528, %39518, %int1_37010 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37011 = torch.constant.int 1
    %39530 = torch.aten.add.Tensor %39529, %39521, %int1_37011 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37012 = torch.constant.int 1
    %39531 = torch.aten.add.Tensor %39530, %39524, %int1_37012 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39532 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37013 = arith.constant 1 : index
    %dim_37014 = tensor.dim %39532, %c1_37013 : tensor<4x?x4096xf16>
    %39533 = flow.tensor.transfer %39532 : tensor<4x?x4096xf16>{%dim_37014} to #hal.device.promise<@__device_5>
    %39534 = torch_c.from_builtin_tensor %39533 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39535 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37015 = arith.constant 1 : index
    %dim_37016 = tensor.dim %39535, %c1_37015 : tensor<4x?x4096xf16>
    %39536 = flow.tensor.transfer %39535 : tensor<4x?x4096xf16>{%dim_37016} to #hal.device.promise<@__device_5>
    %39537 = torch_c.from_builtin_tensor %39536 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39538 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37017 = arith.constant 1 : index
    %dim_37018 = tensor.dim %39538, %c1_37017 : tensor<4x?x4096xf16>
    %39539 = flow.tensor.transfer %39538 : tensor<4x?x4096xf16>{%dim_37018} to #hal.device.promise<@__device_5>
    %39540 = torch_c.from_builtin_tensor %39539 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39541 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37019 = arith.constant 1 : index
    %dim_37020 = tensor.dim %39541, %c1_37019 : tensor<4x?x4096xf16>
    %39542 = flow.tensor.transfer %39541 : tensor<4x?x4096xf16>{%dim_37020} to #hal.device.promise<@__device_5>
    %39543 = torch_c.from_builtin_tensor %39542 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39544 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37021 = arith.constant 1 : index
    %dim_37022 = tensor.dim %39544, %c1_37021 : tensor<4x?x4096xf16>
    %39545 = flow.tensor.transfer %39544 : tensor<4x?x4096xf16>{%dim_37022} to #hal.device.promise<@__device_5>
    %39546 = torch_c.from_builtin_tensor %39545 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39547 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37023 = arith.constant 1 : index
    %dim_37024 = tensor.dim %39547, %c1_37023 : tensor<4x?x4096xf16>
    %39548 = flow.tensor.transfer %39547 : tensor<4x?x4096xf16>{%dim_37024} to #hal.device.promise<@__device_5>
    %39549 = torch_c.from_builtin_tensor %39548 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39550 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37025 = arith.constant 1 : index
    %dim_37026 = tensor.dim %39550, %c1_37025 : tensor<4x?x4096xf16>
    %39551 = flow.tensor.transfer %39550 : tensor<4x?x4096xf16>{%dim_37026} to #hal.device.promise<@__device_5>
    %39552 = torch_c.from_builtin_tensor %39551 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37027 = torch.constant.int 1
    %39553 = torch.aten.add.Tensor %39534, %39537, %int1_37027 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37028 = torch.constant.int 1
    %39554 = torch.aten.add.Tensor %39553, %39540, %int1_37028 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37029 = torch.constant.int 1
    %39555 = torch.aten.add.Tensor %39554, %39543, %int1_37029 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37030 = torch.constant.int 1
    %39556 = torch.aten.add.Tensor %39555, %39546, %int1_37030 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37031 = torch.constant.int 1
    %39557 = torch.aten.add.Tensor %39556, %39377, %int1_37031 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37032 = torch.constant.int 1
    %39558 = torch.aten.add.Tensor %39557, %39549, %int1_37032 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37033 = torch.constant.int 1
    %39559 = torch.aten.add.Tensor %39558, %39552, %int1_37033 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39560 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37034 = arith.constant 1 : index
    %dim_37035 = tensor.dim %39560, %c1_37034 : tensor<4x?x4096xf16>
    %39561 = flow.tensor.transfer %39560 : tensor<4x?x4096xf16>{%dim_37035} to #hal.device.promise<@__device_6>
    %39562 = torch_c.from_builtin_tensor %39561 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39563 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37036 = arith.constant 1 : index
    %dim_37037 = tensor.dim %39563, %c1_37036 : tensor<4x?x4096xf16>
    %39564 = flow.tensor.transfer %39563 : tensor<4x?x4096xf16>{%dim_37037} to #hal.device.promise<@__device_6>
    %39565 = torch_c.from_builtin_tensor %39564 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39566 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37038 = arith.constant 1 : index
    %dim_37039 = tensor.dim %39566, %c1_37038 : tensor<4x?x4096xf16>
    %39567 = flow.tensor.transfer %39566 : tensor<4x?x4096xf16>{%dim_37039} to #hal.device.promise<@__device_6>
    %39568 = torch_c.from_builtin_tensor %39567 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39569 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37040 = arith.constant 1 : index
    %dim_37041 = tensor.dim %39569, %c1_37040 : tensor<4x?x4096xf16>
    %39570 = flow.tensor.transfer %39569 : tensor<4x?x4096xf16>{%dim_37041} to #hal.device.promise<@__device_6>
    %39571 = torch_c.from_builtin_tensor %39570 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39572 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37042 = arith.constant 1 : index
    %dim_37043 = tensor.dim %39572, %c1_37042 : tensor<4x?x4096xf16>
    %39573 = flow.tensor.transfer %39572 : tensor<4x?x4096xf16>{%dim_37043} to #hal.device.promise<@__device_6>
    %39574 = torch_c.from_builtin_tensor %39573 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39575 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37044 = arith.constant 1 : index
    %dim_37045 = tensor.dim %39575, %c1_37044 : tensor<4x?x4096xf16>
    %39576 = flow.tensor.transfer %39575 : tensor<4x?x4096xf16>{%dim_37045} to #hal.device.promise<@__device_6>
    %39577 = torch_c.from_builtin_tensor %39576 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39578 = torch_c.to_builtin_tensor %39391 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37046 = arith.constant 1 : index
    %dim_37047 = tensor.dim %39578, %c1_37046 : tensor<4x?x4096xf16>
    %39579 = flow.tensor.transfer %39578 : tensor<4x?x4096xf16>{%dim_37047} to #hal.device.promise<@__device_6>
    %39580 = torch_c.from_builtin_tensor %39579 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37048 = torch.constant.int 1
    %39581 = torch.aten.add.Tensor %39562, %39565, %int1_37048 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37049 = torch.constant.int 1
    %39582 = torch.aten.add.Tensor %39581, %39568, %int1_37049 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37050 = torch.constant.int 1
    %39583 = torch.aten.add.Tensor %39582, %39571, %int1_37050 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37051 = torch.constant.int 1
    %39584 = torch.aten.add.Tensor %39583, %39574, %int1_37051 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37052 = torch.constant.int 1
    %39585 = torch.aten.add.Tensor %39584, %39577, %int1_37052 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37053 = torch.constant.int 1
    %39586 = torch.aten.add.Tensor %39585, %39384, %int1_37053 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37054 = torch.constant.int 1
    %39587 = torch.aten.add.Tensor %39586, %39580, %int1_37054 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39588 = torch_c.to_builtin_tensor %39342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37055 = arith.constant 1 : index
    %dim_37056 = tensor.dim %39588, %c1_37055 : tensor<4x?x4096xf16>
    %39589 = flow.tensor.transfer %39588 : tensor<4x?x4096xf16>{%dim_37056} to #hal.device.promise<@__device_7>
    %39590 = torch_c.from_builtin_tensor %39589 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39591 = torch_c.to_builtin_tensor %39349 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37057 = arith.constant 1 : index
    %dim_37058 = tensor.dim %39591, %c1_37057 : tensor<4x?x4096xf16>
    %39592 = flow.tensor.transfer %39591 : tensor<4x?x4096xf16>{%dim_37058} to #hal.device.promise<@__device_7>
    %39593 = torch_c.from_builtin_tensor %39592 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39594 = torch_c.to_builtin_tensor %39356 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37059 = arith.constant 1 : index
    %dim_37060 = tensor.dim %39594, %c1_37059 : tensor<4x?x4096xf16>
    %39595 = flow.tensor.transfer %39594 : tensor<4x?x4096xf16>{%dim_37060} to #hal.device.promise<@__device_7>
    %39596 = torch_c.from_builtin_tensor %39595 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39597 = torch_c.to_builtin_tensor %39363 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37061 = arith.constant 1 : index
    %dim_37062 = tensor.dim %39597, %c1_37061 : tensor<4x?x4096xf16>
    %39598 = flow.tensor.transfer %39597 : tensor<4x?x4096xf16>{%dim_37062} to #hal.device.promise<@__device_7>
    %39599 = torch_c.from_builtin_tensor %39598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39600 = torch_c.to_builtin_tensor %39370 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37063 = arith.constant 1 : index
    %dim_37064 = tensor.dim %39600, %c1_37063 : tensor<4x?x4096xf16>
    %39601 = flow.tensor.transfer %39600 : tensor<4x?x4096xf16>{%dim_37064} to #hal.device.promise<@__device_7>
    %39602 = torch_c.from_builtin_tensor %39601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39603 = torch_c.to_builtin_tensor %39377 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37065 = arith.constant 1 : index
    %dim_37066 = tensor.dim %39603, %c1_37065 : tensor<4x?x4096xf16>
    %39604 = flow.tensor.transfer %39603 : tensor<4x?x4096xf16>{%dim_37066} to #hal.device.promise<@__device_7>
    %39605 = torch_c.from_builtin_tensor %39604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %39606 = torch_c.to_builtin_tensor %39384 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_37067 = arith.constant 1 : index
    %dim_37068 = tensor.dim %39606, %c1_37067 : tensor<4x?x4096xf16>
    %39607 = flow.tensor.transfer %39606 : tensor<4x?x4096xf16>{%dim_37068} to #hal.device.promise<@__device_7>
    %39608 = torch_c.from_builtin_tensor %39607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37069 = torch.constant.int 1
    %39609 = torch.aten.add.Tensor %39590, %39593, %int1_37069 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37070 = torch.constant.int 1
    %39610 = torch.aten.add.Tensor %39609, %39596, %int1_37070 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37071 = torch.constant.int 1
    %39611 = torch.aten.add.Tensor %39610, %39599, %int1_37071 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37072 = torch.constant.int 1
    %39612 = torch.aten.add.Tensor %39611, %39602, %int1_37072 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37073 = torch.constant.int 1
    %39613 = torch.aten.add.Tensor %39612, %39605, %int1_37073 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37074 = torch.constant.int 1
    %39614 = torch.aten.add.Tensor %39613, %39608, %int1_37074 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37075 = torch.constant.int 1
    %39615 = torch.aten.add.Tensor %39614, %39391, %int1_37075 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37076 = torch.constant.int 1
    %39616 = torch.aten.add.Tensor %39096, %39419, %int1_37076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37077 = torch.constant.int 1
    %39617 = torch.aten.add.Tensor %39097, %39447, %int1_37077 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37078 = torch.constant.int 1
    %39618 = torch.aten.add.Tensor %39098, %39475, %int1_37078 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37079 = torch.constant.int 1
    %39619 = torch.aten.add.Tensor %39099, %39503, %int1_37079 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37080 = torch.constant.int 1
    %39620 = torch.aten.add.Tensor %39100, %39531, %int1_37080 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37081 = torch.constant.int 1
    %39621 = torch.aten.add.Tensor %39101, %39559, %int1_37081 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37082 = torch.constant.int 1
    %39622 = torch.aten.add.Tensor %39102, %39587, %int1_37082 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37083 = torch.constant.int 1
    %39623 = torch.aten.add.Tensor %39103, %39615, %int1_37083 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_37084 = torch.constant.int 6
    %39624 = torch.prims.convert_element_type %39616, %int6_37084 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37085 = torch.constant.int 6
    %39625 = torch.prims.convert_element_type %39617, %int6_37085 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37086 = torch.constant.int 6
    %39626 = torch.prims.convert_element_type %39618, %int6_37086 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37087 = torch.constant.int 6
    %39627 = torch.prims.convert_element_type %39619, %int6_37087 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37088 = torch.constant.int 6
    %39628 = torch.prims.convert_element_type %39620, %int6_37088 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37089 = torch.constant.int 6
    %39629 = torch.prims.convert_element_type %39621, %int6_37089 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37090 = torch.constant.int 6
    %39630 = torch.prims.convert_element_type %39622, %int6_37090 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_37091 = torch.constant.int 6
    %39631 = torch.prims.convert_element_type %39623, %int6_37091 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37092 = torch.constant.int 2
    %39632 = torch.aten.pow.Tensor_Scalar %39624, %int2_37092 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37093 = torch.constant.int 2
    %39633 = torch.aten.pow.Tensor_Scalar %39625, %int2_37093 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37094 = torch.constant.int 2
    %39634 = torch.aten.pow.Tensor_Scalar %39626, %int2_37094 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37095 = torch.constant.int 2
    %39635 = torch.aten.pow.Tensor_Scalar %39627, %int2_37095 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37096 = torch.constant.int 2
    %39636 = torch.aten.pow.Tensor_Scalar %39628, %int2_37096 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37097 = torch.constant.int 2
    %39637 = torch.aten.pow.Tensor_Scalar %39629, %int2_37097 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37098 = torch.constant.int 2
    %39638 = torch.aten.pow.Tensor_Scalar %39630, %int2_37098 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_37099 = torch.constant.int 2
    %39639 = torch.aten.pow.Tensor_Scalar %39631, %int2_37099 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_37100 = torch.constant.int -1
    %39640 = torch.prim.ListConstruct %int-1_37100 : (!torch.int) -> !torch.list<int>
    %true_37101 = torch.constant.bool true
    %none_37102 = torch.constant.none
    %39641 = torch.aten.mean.dim %39632, %39640, %true_37101, %none_37102 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37103 = torch.constant.int -1
    %39642 = torch.prim.ListConstruct %int-1_37103 : (!torch.int) -> !torch.list<int>
    %true_37104 = torch.constant.bool true
    %none_37105 = torch.constant.none
    %39643 = torch.aten.mean.dim %39633, %39642, %true_37104, %none_37105 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37106 = torch.constant.int -1
    %39644 = torch.prim.ListConstruct %int-1_37106 : (!torch.int) -> !torch.list<int>
    %true_37107 = torch.constant.bool true
    %none_37108 = torch.constant.none
    %39645 = torch.aten.mean.dim %39634, %39644, %true_37107, %none_37108 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37109 = torch.constant.int -1
    %39646 = torch.prim.ListConstruct %int-1_37109 : (!torch.int) -> !torch.list<int>
    %true_37110 = torch.constant.bool true
    %none_37111 = torch.constant.none
    %39647 = torch.aten.mean.dim %39635, %39646, %true_37110, %none_37111 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37112 = torch.constant.int -1
    %39648 = torch.prim.ListConstruct %int-1_37112 : (!torch.int) -> !torch.list<int>
    %true_37113 = torch.constant.bool true
    %none_37114 = torch.constant.none
    %39649 = torch.aten.mean.dim %39636, %39648, %true_37113, %none_37114 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37115 = torch.constant.int -1
    %39650 = torch.prim.ListConstruct %int-1_37115 : (!torch.int) -> !torch.list<int>
    %true_37116 = torch.constant.bool true
    %none_37117 = torch.constant.none
    %39651 = torch.aten.mean.dim %39637, %39650, %true_37116, %none_37117 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37118 = torch.constant.int -1
    %39652 = torch.prim.ListConstruct %int-1_37118 : (!torch.int) -> !torch.list<int>
    %true_37119 = torch.constant.bool true
    %none_37120 = torch.constant.none
    %39653 = torch.aten.mean.dim %39638, %39652, %true_37119, %none_37120 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_37121 = torch.constant.int -1
    %39654 = torch.prim.ListConstruct %int-1_37121 : (!torch.int) -> !torch.list<int>
    %true_37122 = torch.constant.bool true
    %none_37123 = torch.constant.none
    %39655 = torch.aten.mean.dim %39639, %39654, %true_37122, %none_37123 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37124 = torch.constant.float 9.9999997473787516E-6
    %int1_37125 = torch.constant.int 1
    %39656 = torch.aten.add.Scalar %39641, %float9.999990e-06_37124, %int1_37125 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37126 = torch.constant.float 9.9999997473787516E-6
    %int1_37127 = torch.constant.int 1
    %39657 = torch.aten.add.Scalar %39643, %float9.999990e-06_37126, %int1_37127 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37128 = torch.constant.float 9.9999997473787516E-6
    %int1_37129 = torch.constant.int 1
    %39658 = torch.aten.add.Scalar %39645, %float9.999990e-06_37128, %int1_37129 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37130 = torch.constant.float 9.9999997473787516E-6
    %int1_37131 = torch.constant.int 1
    %39659 = torch.aten.add.Scalar %39647, %float9.999990e-06_37130, %int1_37131 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37132 = torch.constant.float 9.9999997473787516E-6
    %int1_37133 = torch.constant.int 1
    %39660 = torch.aten.add.Scalar %39649, %float9.999990e-06_37132, %int1_37133 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37134 = torch.constant.float 9.9999997473787516E-6
    %int1_37135 = torch.constant.int 1
    %39661 = torch.aten.add.Scalar %39651, %float9.999990e-06_37134, %int1_37135 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37136 = torch.constant.float 9.9999997473787516E-6
    %int1_37137 = torch.constant.int 1
    %39662 = torch.aten.add.Scalar %39653, %float9.999990e-06_37136, %int1_37137 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_37138 = torch.constant.float 9.9999997473787516E-6
    %int1_37139 = torch.constant.int 1
    %39663 = torch.aten.add.Scalar %39655, %float9.999990e-06_37138, %int1_37139 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39664 = torch.aten.rsqrt %39656 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39665 = torch.aten.rsqrt %39657 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39666 = torch.aten.rsqrt %39658 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39667 = torch.aten.rsqrt %39659 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39668 = torch.aten.rsqrt %39660 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39669 = torch.aten.rsqrt %39661 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39670 = torch.aten.rsqrt %39662 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39671 = torch.aten.rsqrt %39663 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %39671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %39672 = torch.aten.mul.Tensor %39624, %39664 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39673 = torch.aten.mul.Tensor %39625, %39665 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39674 = torch.aten.mul.Tensor %39626, %39666 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39675 = torch.aten.mul.Tensor %39627, %39667 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39676 = torch.aten.mul.Tensor %39628, %39668 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39677 = torch.aten.mul.Tensor %39629, %39669 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39678 = torch.aten.mul.Tensor %39630, %39670 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39679 = torch.aten.mul.Tensor %39631, %39671 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39680 = torch.aten.mul.Tensor %1448, %39672 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39681 = torch.aten.mul.Tensor %1449, %39673 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39682 = torch.aten.mul.Tensor %1450, %39674 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39683 = torch.aten.mul.Tensor %1451, %39675 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39684 = torch.aten.mul.Tensor %1452, %39676 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39685 = torch.aten.mul.Tensor %1453, %39677 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39686 = torch.aten.mul.Tensor %1454, %39678 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %39687 = torch.aten.mul.Tensor %1455, %39679 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %39687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_37140 = torch.constant.int 5
    %39688 = torch.prims.convert_element_type %39680, %int5_37140 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37141 = torch.constant.int 5
    %39689 = torch.prims.convert_element_type %39681, %int5_37141 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37142 = torch.constant.int 5
    %39690 = torch.prims.convert_element_type %39682, %int5_37142 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37143 = torch.constant.int 5
    %39691 = torch.prims.convert_element_type %39683, %int5_37143 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37144 = torch.constant.int 5
    %39692 = torch.prims.convert_element_type %39684, %int5_37144 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37145 = torch.constant.int 5
    %39693 = torch.prims.convert_element_type %39685, %int5_37145 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37146 = torch.constant.int 5
    %39694 = torch.prims.convert_element_type %39686, %int5_37146 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_37147 = torch.constant.int 5
    %39695 = torch.prims.convert_element_type %39687, %int5_37147 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %39695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_37148 = torch.constant.int 1
    %int0_37149 = torch.constant.int 0
    %39696 = torch.prim.ListConstruct %int1_37148, %int0_37149 : (!torch.int, !torch.int) -> !torch.list<int>
    %39697 = torch.aten.permute %1456, %39696 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37150 = torch.constant.int 1
    %int0_37151 = torch.constant.int 0
    %39698 = torch.prim.ListConstruct %int1_37150, %int0_37151 : (!torch.int, !torch.int) -> !torch.list<int>
    %39699 = torch.aten.permute %1457, %39698 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37152 = torch.constant.int 1
    %int0_37153 = torch.constant.int 0
    %39700 = torch.prim.ListConstruct %int1_37152, %int0_37153 : (!torch.int, !torch.int) -> !torch.list<int>
    %39701 = torch.aten.permute %1458, %39700 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37154 = torch.constant.int 1
    %int0_37155 = torch.constant.int 0
    %39702 = torch.prim.ListConstruct %int1_37154, %int0_37155 : (!torch.int, !torch.int) -> !torch.list<int>
    %39703 = torch.aten.permute %1459, %39702 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37156 = torch.constant.int 1
    %int0_37157 = torch.constant.int 0
    %39704 = torch.prim.ListConstruct %int1_37156, %int0_37157 : (!torch.int, !torch.int) -> !torch.list<int>
    %39705 = torch.aten.permute %1460, %39704 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37158 = torch.constant.int 1
    %int0_37159 = torch.constant.int 0
    %39706 = torch.prim.ListConstruct %int1_37158, %int0_37159 : (!torch.int, !torch.int) -> !torch.list<int>
    %39707 = torch.aten.permute %1461, %39706 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37160 = torch.constant.int 1
    %int0_37161 = torch.constant.int 0
    %39708 = torch.prim.ListConstruct %int1_37160, %int0_37161 : (!torch.int, !torch.int) -> !torch.list<int>
    %39709 = torch.aten.permute %1462, %39708 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_37162 = torch.constant.int 1
    %int0_37163 = torch.constant.int 0
    %39710 = torch.prim.ListConstruct %int1_37162, %int0_37163 : (!torch.int, !torch.int) -> !torch.list<int>
    %39711 = torch.aten.permute %1463, %39710 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_37164 = torch.constant.int 4
    %39712 = torch.aten.mul.int %int4_37164, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37165 = torch.constant.int 4096
    %39713 = torch.prim.ListConstruct %39712, %int4096_37165 : (!torch.int, !torch.int) -> !torch.list<int>
    %39714 = torch.aten.view %39688, %39713 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39714, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39715 = torch.aten.mm %39714, %39697 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39715, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37166 = torch.constant.int 4
    %int512_37167 = torch.constant.int 512
    %39716 = torch.prim.ListConstruct %int4_37166, %2482, %int512_37167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39717 = torch.aten.view %39715, %39716 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37168 = torch.constant.int 4
    %39718 = torch.aten.mul.int %int4_37168, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37169 = torch.constant.int 4096
    %39719 = torch.prim.ListConstruct %39718, %int4096_37169 : (!torch.int, !torch.int) -> !torch.list<int>
    %39720 = torch.aten.view %39689, %39719 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39720, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39721 = torch.aten.mm %39720, %39699 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39721, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37170 = torch.constant.int 4
    %int512_37171 = torch.constant.int 512
    %39722 = torch.prim.ListConstruct %int4_37170, %2482, %int512_37171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39723 = torch.aten.view %39721, %39722 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37172 = torch.constant.int 4
    %39724 = torch.aten.mul.int %int4_37172, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37173 = torch.constant.int 4096
    %39725 = torch.prim.ListConstruct %39724, %int4096_37173 : (!torch.int, !torch.int) -> !torch.list<int>
    %39726 = torch.aten.view %39690, %39725 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39726, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39727 = torch.aten.mm %39726, %39701 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39727, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37174 = torch.constant.int 4
    %int512_37175 = torch.constant.int 512
    %39728 = torch.prim.ListConstruct %int4_37174, %2482, %int512_37175 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39729 = torch.aten.view %39727, %39728 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37176 = torch.constant.int 4
    %39730 = torch.aten.mul.int %int4_37176, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37177 = torch.constant.int 4096
    %39731 = torch.prim.ListConstruct %39730, %int4096_37177 : (!torch.int, !torch.int) -> !torch.list<int>
    %39732 = torch.aten.view %39691, %39731 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39732, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39733 = torch.aten.mm %39732, %39703 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39733, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37178 = torch.constant.int 4
    %int512_37179 = torch.constant.int 512
    %39734 = torch.prim.ListConstruct %int4_37178, %2482, %int512_37179 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39735 = torch.aten.view %39733, %39734 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37180 = torch.constant.int 4
    %39736 = torch.aten.mul.int %int4_37180, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37181 = torch.constant.int 4096
    %39737 = torch.prim.ListConstruct %39736, %int4096_37181 : (!torch.int, !torch.int) -> !torch.list<int>
    %39738 = torch.aten.view %39692, %39737 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39738, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39739 = torch.aten.mm %39738, %39705 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39739, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37182 = torch.constant.int 4
    %int512_37183 = torch.constant.int 512
    %39740 = torch.prim.ListConstruct %int4_37182, %2482, %int512_37183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39741 = torch.aten.view %39739, %39740 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37184 = torch.constant.int 4
    %39742 = torch.aten.mul.int %int4_37184, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37185 = torch.constant.int 4096
    %39743 = torch.prim.ListConstruct %39742, %int4096_37185 : (!torch.int, !torch.int) -> !torch.list<int>
    %39744 = torch.aten.view %39693, %39743 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39744, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39745 = torch.aten.mm %39744, %39707 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39745, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37186 = torch.constant.int 4
    %int512_37187 = torch.constant.int 512
    %39746 = torch.prim.ListConstruct %int4_37186, %2482, %int512_37187 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39747 = torch.aten.view %39745, %39746 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37188 = torch.constant.int 4
    %39748 = torch.aten.mul.int %int4_37188, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37189 = torch.constant.int 4096
    %39749 = torch.prim.ListConstruct %39748, %int4096_37189 : (!torch.int, !torch.int) -> !torch.list<int>
    %39750 = torch.aten.view %39694, %39749 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39750, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39751 = torch.aten.mm %39750, %39709 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39751, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37190 = torch.constant.int 4
    %int512_37191 = torch.constant.int 512
    %39752 = torch.prim.ListConstruct %int4_37190, %2482, %int512_37191 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39753 = torch.aten.view %39751, %39752 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_37192 = torch.constant.int 4
    %39754 = torch.aten.mul.int %int4_37192, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37193 = torch.constant.int 4096
    %39755 = torch.prim.ListConstruct %39754, %int4096_37193 : (!torch.int, !torch.int) -> !torch.list<int>
    %39756 = torch.aten.view %39695, %39755 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39756, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39757 = torch.aten.mm %39756, %39711 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %39757, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_37194 = torch.constant.int 4
    %int512_37195 = torch.constant.int 512
    %39758 = torch.prim.ListConstruct %int4_37194, %2482, %int512_37195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39759 = torch.aten.view %39757, %39758 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %39759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_37196 = torch.constant.int 1
    %int0_37197 = torch.constant.int 0
    %39760 = torch.prim.ListConstruct %int1_37196, %int0_37197 : (!torch.int, !torch.int) -> !torch.list<int>
    %39761 = torch.aten.permute %1464, %39760 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37198 = torch.constant.int 1
    %int0_37199 = torch.constant.int 0
    %39762 = torch.prim.ListConstruct %int1_37198, %int0_37199 : (!torch.int, !torch.int) -> !torch.list<int>
    %39763 = torch.aten.permute %1465, %39762 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37200 = torch.constant.int 1
    %int0_37201 = torch.constant.int 0
    %39764 = torch.prim.ListConstruct %int1_37200, %int0_37201 : (!torch.int, !torch.int) -> !torch.list<int>
    %39765 = torch.aten.permute %1466, %39764 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37202 = torch.constant.int 1
    %int0_37203 = torch.constant.int 0
    %39766 = torch.prim.ListConstruct %int1_37202, %int0_37203 : (!torch.int, !torch.int) -> !torch.list<int>
    %39767 = torch.aten.permute %1467, %39766 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37204 = torch.constant.int 1
    %int0_37205 = torch.constant.int 0
    %39768 = torch.prim.ListConstruct %int1_37204, %int0_37205 : (!torch.int, !torch.int) -> !torch.list<int>
    %39769 = torch.aten.permute %1468, %39768 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37206 = torch.constant.int 1
    %int0_37207 = torch.constant.int 0
    %39770 = torch.prim.ListConstruct %int1_37206, %int0_37207 : (!torch.int, !torch.int) -> !torch.list<int>
    %39771 = torch.aten.permute %1469, %39770 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37208 = torch.constant.int 1
    %int0_37209 = torch.constant.int 0
    %39772 = torch.prim.ListConstruct %int1_37208, %int0_37209 : (!torch.int, !torch.int) -> !torch.list<int>
    %39773 = torch.aten.permute %1470, %39772 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37210 = torch.constant.int 1
    %int0_37211 = torch.constant.int 0
    %39774 = torch.prim.ListConstruct %int1_37210, %int0_37211 : (!torch.int, !torch.int) -> !torch.list<int>
    %39775 = torch.aten.permute %1471, %39774 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_37212 = torch.constant.int 4
    %39776 = torch.aten.mul.int %int4_37212, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37213 = torch.constant.int 4096
    %39777 = torch.prim.ListConstruct %39776, %int4096_37213 : (!torch.int, !torch.int) -> !torch.list<int>
    %39778 = torch.aten.view %39688, %39777 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39778, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39779 = torch.aten.mm %39778, %39761 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39779, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37214 = torch.constant.int 4
    %int128_37215 = torch.constant.int 128
    %39780 = torch.prim.ListConstruct %int4_37214, %2482, %int128_37215 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39781 = torch.aten.view %39779, %39780 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37216 = torch.constant.int 4
    %39782 = torch.aten.mul.int %int4_37216, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37217 = torch.constant.int 4096
    %39783 = torch.prim.ListConstruct %39782, %int4096_37217 : (!torch.int, !torch.int) -> !torch.list<int>
    %39784 = torch.aten.view %39689, %39783 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39784, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39785 = torch.aten.mm %39784, %39763 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39785, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37218 = torch.constant.int 4
    %int128_37219 = torch.constant.int 128
    %39786 = torch.prim.ListConstruct %int4_37218, %2482, %int128_37219 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39787 = torch.aten.view %39785, %39786 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37220 = torch.constant.int 4
    %39788 = torch.aten.mul.int %int4_37220, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37221 = torch.constant.int 4096
    %39789 = torch.prim.ListConstruct %39788, %int4096_37221 : (!torch.int, !torch.int) -> !torch.list<int>
    %39790 = torch.aten.view %39690, %39789 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39790, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39791 = torch.aten.mm %39790, %39765 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39791, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37222 = torch.constant.int 4
    %int128_37223 = torch.constant.int 128
    %39792 = torch.prim.ListConstruct %int4_37222, %2482, %int128_37223 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39793 = torch.aten.view %39791, %39792 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37224 = torch.constant.int 4
    %39794 = torch.aten.mul.int %int4_37224, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37225 = torch.constant.int 4096
    %39795 = torch.prim.ListConstruct %39794, %int4096_37225 : (!torch.int, !torch.int) -> !torch.list<int>
    %39796 = torch.aten.view %39691, %39795 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39796, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39797 = torch.aten.mm %39796, %39767 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39797, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37226 = torch.constant.int 4
    %int128_37227 = torch.constant.int 128
    %39798 = torch.prim.ListConstruct %int4_37226, %2482, %int128_37227 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39799 = torch.aten.view %39797, %39798 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37228 = torch.constant.int 4
    %39800 = torch.aten.mul.int %int4_37228, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37229 = torch.constant.int 4096
    %39801 = torch.prim.ListConstruct %39800, %int4096_37229 : (!torch.int, !torch.int) -> !torch.list<int>
    %39802 = torch.aten.view %39692, %39801 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39802, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39803 = torch.aten.mm %39802, %39769 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39803, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37230 = torch.constant.int 4
    %int128_37231 = torch.constant.int 128
    %39804 = torch.prim.ListConstruct %int4_37230, %2482, %int128_37231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39805 = torch.aten.view %39803, %39804 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37232 = torch.constant.int 4
    %39806 = torch.aten.mul.int %int4_37232, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37233 = torch.constant.int 4096
    %39807 = torch.prim.ListConstruct %39806, %int4096_37233 : (!torch.int, !torch.int) -> !torch.list<int>
    %39808 = torch.aten.view %39693, %39807 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39808, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39809 = torch.aten.mm %39808, %39771 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39809, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37234 = torch.constant.int 4
    %int128_37235 = torch.constant.int 128
    %39810 = torch.prim.ListConstruct %int4_37234, %2482, %int128_37235 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39811 = torch.aten.view %39809, %39810 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37236 = torch.constant.int 4
    %39812 = torch.aten.mul.int %int4_37236, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37237 = torch.constant.int 4096
    %39813 = torch.prim.ListConstruct %39812, %int4096_37237 : (!torch.int, !torch.int) -> !torch.list<int>
    %39814 = torch.aten.view %39694, %39813 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39814, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39815 = torch.aten.mm %39814, %39773 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39815, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37238 = torch.constant.int 4
    %int128_37239 = torch.constant.int 128
    %39816 = torch.prim.ListConstruct %int4_37238, %2482, %int128_37239 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39817 = torch.aten.view %39815, %39816 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37240 = torch.constant.int 4
    %39818 = torch.aten.mul.int %int4_37240, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37241 = torch.constant.int 4096
    %39819 = torch.prim.ListConstruct %39818, %int4096_37241 : (!torch.int, !torch.int) -> !torch.list<int>
    %39820 = torch.aten.view %39695, %39819 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39820, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39821 = torch.aten.mm %39820, %39775 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39821, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37242 = torch.constant.int 4
    %int128_37243 = torch.constant.int 128
    %39822 = torch.prim.ListConstruct %int4_37242, %2482, %int128_37243 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39823 = torch.aten.view %39821, %39822 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_37244 = torch.constant.int 1
    %int0_37245 = torch.constant.int 0
    %39824 = torch.prim.ListConstruct %int1_37244, %int0_37245 : (!torch.int, !torch.int) -> !torch.list<int>
    %39825 = torch.aten.permute %1472, %39824 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37246 = torch.constant.int 1
    %int0_37247 = torch.constant.int 0
    %39826 = torch.prim.ListConstruct %int1_37246, %int0_37247 : (!torch.int, !torch.int) -> !torch.list<int>
    %39827 = torch.aten.permute %1473, %39826 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37248 = torch.constant.int 1
    %int0_37249 = torch.constant.int 0
    %39828 = torch.prim.ListConstruct %int1_37248, %int0_37249 : (!torch.int, !torch.int) -> !torch.list<int>
    %39829 = torch.aten.permute %1474, %39828 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37250 = torch.constant.int 1
    %int0_37251 = torch.constant.int 0
    %39830 = torch.prim.ListConstruct %int1_37250, %int0_37251 : (!torch.int, !torch.int) -> !torch.list<int>
    %39831 = torch.aten.permute %1475, %39830 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37252 = torch.constant.int 1
    %int0_37253 = torch.constant.int 0
    %39832 = torch.prim.ListConstruct %int1_37252, %int0_37253 : (!torch.int, !torch.int) -> !torch.list<int>
    %39833 = torch.aten.permute %1476, %39832 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37254 = torch.constant.int 1
    %int0_37255 = torch.constant.int 0
    %39834 = torch.prim.ListConstruct %int1_37254, %int0_37255 : (!torch.int, !torch.int) -> !torch.list<int>
    %39835 = torch.aten.permute %1477, %39834 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37256 = torch.constant.int 1
    %int0_37257 = torch.constant.int 0
    %39836 = torch.prim.ListConstruct %int1_37256, %int0_37257 : (!torch.int, !torch.int) -> !torch.list<int>
    %39837 = torch.aten.permute %1478, %39836 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_37258 = torch.constant.int 1
    %int0_37259 = torch.constant.int 0
    %39838 = torch.prim.ListConstruct %int1_37258, %int0_37259 : (!torch.int, !torch.int) -> !torch.list<int>
    %39839 = torch.aten.permute %1479, %39838 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_37260 = torch.constant.int 4
    %39840 = torch.aten.mul.int %int4_37260, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37261 = torch.constant.int 4096
    %39841 = torch.prim.ListConstruct %39840, %int4096_37261 : (!torch.int, !torch.int) -> !torch.list<int>
    %39842 = torch.aten.view %39688, %39841 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39842, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39843 = torch.aten.mm %39842, %39825 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39843, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37262 = torch.constant.int 4
    %int128_37263 = torch.constant.int 128
    %39844 = torch.prim.ListConstruct %int4_37262, %2482, %int128_37263 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39845 = torch.aten.view %39843, %39844 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37264 = torch.constant.int 4
    %39846 = torch.aten.mul.int %int4_37264, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37265 = torch.constant.int 4096
    %39847 = torch.prim.ListConstruct %39846, %int4096_37265 : (!torch.int, !torch.int) -> !torch.list<int>
    %39848 = torch.aten.view %39689, %39847 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39848, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39849 = torch.aten.mm %39848, %39827 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39849, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37266 = torch.constant.int 4
    %int128_37267 = torch.constant.int 128
    %39850 = torch.prim.ListConstruct %int4_37266, %2482, %int128_37267 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39851 = torch.aten.view %39849, %39850 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37268 = torch.constant.int 4
    %39852 = torch.aten.mul.int %int4_37268, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37269 = torch.constant.int 4096
    %39853 = torch.prim.ListConstruct %39852, %int4096_37269 : (!torch.int, !torch.int) -> !torch.list<int>
    %39854 = torch.aten.view %39690, %39853 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39854, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39855 = torch.aten.mm %39854, %39829 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39855, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37270 = torch.constant.int 4
    %int128_37271 = torch.constant.int 128
    %39856 = torch.prim.ListConstruct %int4_37270, %2482, %int128_37271 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39857 = torch.aten.view %39855, %39856 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37272 = torch.constant.int 4
    %39858 = torch.aten.mul.int %int4_37272, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37273 = torch.constant.int 4096
    %39859 = torch.prim.ListConstruct %39858, %int4096_37273 : (!torch.int, !torch.int) -> !torch.list<int>
    %39860 = torch.aten.view %39691, %39859 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39860, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39861 = torch.aten.mm %39860, %39831 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39861, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37274 = torch.constant.int 4
    %int128_37275 = torch.constant.int 128
    %39862 = torch.prim.ListConstruct %int4_37274, %2482, %int128_37275 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39863 = torch.aten.view %39861, %39862 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37276 = torch.constant.int 4
    %39864 = torch.aten.mul.int %int4_37276, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37277 = torch.constant.int 4096
    %39865 = torch.prim.ListConstruct %39864, %int4096_37277 : (!torch.int, !torch.int) -> !torch.list<int>
    %39866 = torch.aten.view %39692, %39865 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39866, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39867 = torch.aten.mm %39866, %39833 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39867, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37278 = torch.constant.int 4
    %int128_37279 = torch.constant.int 128
    %39868 = torch.prim.ListConstruct %int4_37278, %2482, %int128_37279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39869 = torch.aten.view %39867, %39868 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37280 = torch.constant.int 4
    %39870 = torch.aten.mul.int %int4_37280, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37281 = torch.constant.int 4096
    %39871 = torch.prim.ListConstruct %39870, %int4096_37281 : (!torch.int, !torch.int) -> !torch.list<int>
    %39872 = torch.aten.view %39693, %39871 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39872, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39873 = torch.aten.mm %39872, %39835 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39873, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37282 = torch.constant.int 4
    %int128_37283 = torch.constant.int 128
    %39874 = torch.prim.ListConstruct %int4_37282, %2482, %int128_37283 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39875 = torch.aten.view %39873, %39874 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37284 = torch.constant.int 4
    %39876 = torch.aten.mul.int %int4_37284, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37285 = torch.constant.int 4096
    %39877 = torch.prim.ListConstruct %39876, %int4096_37285 : (!torch.int, !torch.int) -> !torch.list<int>
    %39878 = torch.aten.view %39694, %39877 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39878, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39879 = torch.aten.mm %39878, %39837 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39879, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37286 = torch.constant.int 4
    %int128_37287 = torch.constant.int 128
    %39880 = torch.prim.ListConstruct %int4_37286, %2482, %int128_37287 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39881 = torch.aten.view %39879, %39880 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37288 = torch.constant.int 4
    %39882 = torch.aten.mul.int %int4_37288, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_37289 = torch.constant.int 4096
    %39883 = torch.prim.ListConstruct %39882, %int4096_37289 : (!torch.int, !torch.int) -> !torch.list<int>
    %39884 = torch.aten.view %39695, %39883 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %39884, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %39885 = torch.aten.mm %39884, %39839 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %39885, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_37290 = torch.constant.int 4
    %int128_37291 = torch.constant.int 128
    %39886 = torch.prim.ListConstruct %int4_37290, %2482, %int128_37291 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39887 = torch.aten.view %39885, %39886 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %39887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_37292 = torch.constant.int 4
    %int4_37293 = torch.constant.int 4
    %int128_37294 = torch.constant.int 128
    %39888 = torch.prim.ListConstruct %int4_37292, %2482, %int4_37293, %int128_37294 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39889 = torch.aten.view %39717, %39888 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37295 = torch.constant.int 4
    %int4_37296 = torch.constant.int 4
    %int128_37297 = torch.constant.int 128
    %39890 = torch.prim.ListConstruct %int4_37295, %2482, %int4_37296, %int128_37297 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39891 = torch.aten.view %39723, %39890 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37298 = torch.constant.int 4
    %int4_37299 = torch.constant.int 4
    %int128_37300 = torch.constant.int 128
    %39892 = torch.prim.ListConstruct %int4_37298, %2482, %int4_37299, %int128_37300 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39893 = torch.aten.view %39729, %39892 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37301 = torch.constant.int 4
    %int4_37302 = torch.constant.int 4
    %int128_37303 = torch.constant.int 128
    %39894 = torch.prim.ListConstruct %int4_37301, %2482, %int4_37302, %int128_37303 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39895 = torch.aten.view %39735, %39894 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37304 = torch.constant.int 4
    %int4_37305 = torch.constant.int 4
    %int128_37306 = torch.constant.int 128
    %39896 = torch.prim.ListConstruct %int4_37304, %2482, %int4_37305, %int128_37306 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39897 = torch.aten.view %39741, %39896 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37307 = torch.constant.int 4
    %int4_37308 = torch.constant.int 4
    %int128_37309 = torch.constant.int 128
    %39898 = torch.prim.ListConstruct %int4_37307, %2482, %int4_37308, %int128_37309 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39899 = torch.aten.view %39747, %39898 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37310 = torch.constant.int 4
    %int4_37311 = torch.constant.int 4
    %int128_37312 = torch.constant.int 128
    %39900 = torch.prim.ListConstruct %int4_37310, %2482, %int4_37311, %int128_37312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39901 = torch.aten.view %39753, %39900 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37313 = torch.constant.int 4
    %int4_37314 = torch.constant.int 4
    %int128_37315 = torch.constant.int 128
    %39902 = torch.prim.ListConstruct %int4_37313, %2482, %int4_37314, %int128_37315 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39903 = torch.aten.view %39759, %39902 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_37316 = torch.constant.int 4
    %int1_37317 = torch.constant.int 1
    %int128_37318 = torch.constant.int 128
    %39904 = torch.prim.ListConstruct %int4_37316, %2482, %int1_37317, %int128_37318 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39905 = torch.aten.view %39781, %39904 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37319 = torch.constant.int 4
    %int1_37320 = torch.constant.int 1
    %int128_37321 = torch.constant.int 128
    %39906 = torch.prim.ListConstruct %int4_37319, %2482, %int1_37320, %int128_37321 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39907 = torch.aten.view %39787, %39906 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37322 = torch.constant.int 4
    %int1_37323 = torch.constant.int 1
    %int128_37324 = torch.constant.int 128
    %39908 = torch.prim.ListConstruct %int4_37322, %2482, %int1_37323, %int128_37324 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39909 = torch.aten.view %39793, %39908 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37325 = torch.constant.int 4
    %int1_37326 = torch.constant.int 1
    %int128_37327 = torch.constant.int 128
    %39910 = torch.prim.ListConstruct %int4_37325, %2482, %int1_37326, %int128_37327 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39911 = torch.aten.view %39799, %39910 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37328 = torch.constant.int 4
    %int1_37329 = torch.constant.int 1
    %int128_37330 = torch.constant.int 128
    %39912 = torch.prim.ListConstruct %int4_37328, %2482, %int1_37329, %int128_37330 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39913 = torch.aten.view %39805, %39912 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37331 = torch.constant.int 4
    %int1_37332 = torch.constant.int 1
    %int128_37333 = torch.constant.int 128
    %39914 = torch.prim.ListConstruct %int4_37331, %2482, %int1_37332, %int128_37333 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39915 = torch.aten.view %39811, %39914 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37334 = torch.constant.int 4
    %int1_37335 = torch.constant.int 1
    %int128_37336 = torch.constant.int 128
    %39916 = torch.prim.ListConstruct %int4_37334, %2482, %int1_37335, %int128_37336 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39917 = torch.aten.view %39817, %39916 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37337 = torch.constant.int 4
    %int1_37338 = torch.constant.int 1
    %int128_37339 = torch.constant.int 128
    %39918 = torch.prim.ListConstruct %int4_37337, %2482, %int1_37338, %int128_37339 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39919 = torch.aten.view %39823, %39918 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37340 = torch.constant.int 4
    %int1_37341 = torch.constant.int 1
    %int128_37342 = torch.constant.int 128
    %39920 = torch.prim.ListConstruct %int4_37340, %2482, %int1_37341, %int128_37342 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39921 = torch.aten.view %39845, %39920 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37343 = torch.constant.int 4
    %int1_37344 = torch.constant.int 1
    %int128_37345 = torch.constant.int 128
    %39922 = torch.prim.ListConstruct %int4_37343, %2482, %int1_37344, %int128_37345 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39923 = torch.aten.view %39851, %39922 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37346 = torch.constant.int 4
    %int1_37347 = torch.constant.int 1
    %int128_37348 = torch.constant.int 128
    %39924 = torch.prim.ListConstruct %int4_37346, %2482, %int1_37347, %int128_37348 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39925 = torch.aten.view %39857, %39924 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37349 = torch.constant.int 4
    %int1_37350 = torch.constant.int 1
    %int128_37351 = torch.constant.int 128
    %39926 = torch.prim.ListConstruct %int4_37349, %2482, %int1_37350, %int128_37351 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39927 = torch.aten.view %39863, %39926 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37352 = torch.constant.int 4
    %int1_37353 = torch.constant.int 1
    %int128_37354 = torch.constant.int 128
    %39928 = torch.prim.ListConstruct %int4_37352, %2482, %int1_37353, %int128_37354 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39929 = torch.aten.view %39869, %39928 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37355 = torch.constant.int 4
    %int1_37356 = torch.constant.int 1
    %int128_37357 = torch.constant.int 128
    %39930 = torch.prim.ListConstruct %int4_37355, %2482, %int1_37356, %int128_37357 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39931 = torch.aten.view %39875, %39930 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37358 = torch.constant.int 4
    %int1_37359 = torch.constant.int 1
    %int128_37360 = torch.constant.int 128
    %39932 = torch.prim.ListConstruct %int4_37358, %2482, %int1_37359, %int128_37360 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39933 = torch.aten.view %39881, %39932 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_37361 = torch.constant.int 4
    %int1_37362 = torch.constant.int 1
    %int128_37363 = torch.constant.int 128
    %39934 = torch.prim.ListConstruct %int4_37361, %2482, %int1_37362, %int128_37363 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %39935 = torch.aten.view %39887, %39934 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %39935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_37364 = torch.constant.int 131072
    %none_37365 = torch.constant.none
    %none_37366 = torch.constant.none
    %cpu_37367 = torch.constant.device "cpu"
    %false_37368 = torch.constant.bool false
    %39936 = torch.aten.arange %int131072_37364, %none_37365, %none_37366, %cpu_37367, %false_37368 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_37369 = torch.constant.int 0
    %int128_37370 = torch.constant.int 128
    %int2_37371 = torch.constant.int 2
    %none_37372 = torch.constant.none
    %none_37373 = torch.constant.none
    %cpu_37374 = torch.constant.device "cpu"
    %false_37375 = torch.constant.bool false
    %39937 = torch.aten.arange.start_step %int0_37369, %int128_37370, %int2_37371, %none_37372, %none_37373, %cpu_37374, %false_37375 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_37376 = torch.constant.int 0
    %int0_37377 = torch.constant.int 0
    %int64_37378 = torch.constant.int 64
    %int1_37379 = torch.constant.int 1
    %39938 = torch.aten.slice.Tensor %39937, %int0_37376, %int0_37377, %int64_37378, %int1_37379 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_37380 = torch.constant.int 6
    %39939 = torch.prims.convert_element_type %39938, %int6_37380 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_37381 = torch.constant.int 128
    %39940 = torch.aten.div.Scalar %39939, %int128_37381 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_37382 = torch.constant.float 5.000000e+05
    %39941 = torch.aten.pow.Scalar %float5.000000e05_37382, %39940 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %39942 = torch.aten.reciprocal %39941 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_37383 = torch.constant.float 1.000000e+00
    %39943 = torch.aten.mul.Scalar %39942, %float1.000000e00_37383 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_37384 = torch.constant.int 131072
    %int1_37385 = torch.constant.int 1
    %39944 = torch.prim.ListConstruct %int131072_37384, %int1_37385 : (!torch.int, !torch.int) -> !torch.list<int>
    %39945 = torch.aten.view %39936, %39944 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %39946 = torch.aten.mul.Tensor %39945, %39943 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %39947 = torch.aten.cos %39946 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %39948 = torch.aten.sin %39946 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %39949 = torch.aten.complex %39947, %39948 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %39950 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39951 = flow.tensor.transfer %39950 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %39952 = torch_c.from_builtin_tensor %39951 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39953 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39954 = flow.tensor.transfer %39953 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %39955 = torch_c.from_builtin_tensor %39954 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39956 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39957 = flow.tensor.transfer %39956 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %39958 = torch_c.from_builtin_tensor %39957 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39959 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39960 = flow.tensor.transfer %39959 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %39961 = torch_c.from_builtin_tensor %39960 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39962 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39963 = flow.tensor.transfer %39962 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %39964 = torch_c.from_builtin_tensor %39963 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39965 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39966 = flow.tensor.transfer %39965 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %39967 = torch_c.from_builtin_tensor %39966 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39968 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39969 = flow.tensor.transfer %39968 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %39970 = torch_c.from_builtin_tensor %39969 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %39971 = torch_c.to_builtin_tensor %39949 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %39972 = flow.tensor.transfer %39971 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %39973 = torch_c.from_builtin_tensor %39972 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_37386 = torch.constant.int 1
    %39974 = torch.aten.size.int %39717, %int1_37386 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37387 = torch.constant.int 0
    %39975 = torch.aten.add.int %int0_37387, %39974 : !torch.int, !torch.int -> !torch.int
    %int0_37388 = torch.constant.int 0
    %int0_37389 = torch.constant.int 0
    %int1_37390 = torch.constant.int 1
    %39976 = torch.aten.slice.Tensor %39952, %int0_37388, %int0_37389, %39975, %int1_37390 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %39976, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37391 = torch.constant.int 1
    %int0_37392 = torch.constant.int 0
    %int9223372036854775807_37393 = torch.constant.int 9223372036854775807
    %int1_37394 = torch.constant.int 1
    %39977 = torch.aten.slice.Tensor %39976, %int1_37391, %int0_37392, %int9223372036854775807_37393, %int1_37394 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %39977, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37395 = torch.constant.int 0
    %39978 = torch.aten.unsqueeze %39977, %int0_37395 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %39978, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37396 = torch.constant.int 2
    %39979 = torch.aten.unsqueeze %39978, %int2_37396 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %39979, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37397 = torch.constant.int 3
    %int0_37398 = torch.constant.int 0
    %int9223372036854775807_37399 = torch.constant.int 9223372036854775807
    %int1_37400 = torch.constant.int 1
    %39980 = torch.aten.slice.Tensor %39979, %int3_37397, %int0_37398, %int9223372036854775807_37399, %int1_37400 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %39980, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %39981 = torch_c.to_builtin_tensor %39889 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37401 = arith.constant 1 : index
    %dim_37402 = tensor.dim %39981, %c1_37401 : tensor<4x?x4x128xf16>
    %39982 = flow.tensor.bitcast %39981 : tensor<4x?x4x128xf16>{%dim_37402} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37402}
    %39983 = torch_c.from_builtin_tensor %39982 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %39983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %39984 = torch.aten.mul.Tensor %39983, %39980 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %39984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %39985 = torch_c.to_builtin_tensor %39984 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37403 = arith.constant 1 : index
    %dim_37404 = tensor.dim %39985, %c1_37403 : tensor<4x?x4x64xcomplex<f32>>
    %39986 = flow.tensor.bitcast %39985 : tensor<4x?x4x64xcomplex<f32>>{%dim_37404} -> tensor<4x?x4x128xf32>{%dim_37404}
    %39987 = torch_c.from_builtin_tensor %39986 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %39987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37405 = torch.constant.int 5
    %39988 = torch.prims.convert_element_type %39987, %int5_37405 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %39988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37406 = torch.constant.int 1
    %39989 = torch.aten.size.int %39723, %int1_37406 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37407 = torch.constant.int 0
    %39990 = torch.aten.add.int %int0_37407, %39989 : !torch.int, !torch.int -> !torch.int
    %int0_37408 = torch.constant.int 0
    %int0_37409 = torch.constant.int 0
    %int1_37410 = torch.constant.int 1
    %39991 = torch.aten.slice.Tensor %39955, %int0_37408, %int0_37409, %39990, %int1_37410 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %39991, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37411 = torch.constant.int 1
    %int0_37412 = torch.constant.int 0
    %int9223372036854775807_37413 = torch.constant.int 9223372036854775807
    %int1_37414 = torch.constant.int 1
    %39992 = torch.aten.slice.Tensor %39991, %int1_37411, %int0_37412, %int9223372036854775807_37413, %int1_37414 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %39992, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37415 = torch.constant.int 0
    %39993 = torch.aten.unsqueeze %39992, %int0_37415 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %39993, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37416 = torch.constant.int 2
    %39994 = torch.aten.unsqueeze %39993, %int2_37416 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %39994, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37417 = torch.constant.int 3
    %int0_37418 = torch.constant.int 0
    %int9223372036854775807_37419 = torch.constant.int 9223372036854775807
    %int1_37420 = torch.constant.int 1
    %39995 = torch.aten.slice.Tensor %39994, %int3_37417, %int0_37418, %int9223372036854775807_37419, %int1_37420 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %39995, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %39996 = torch_c.to_builtin_tensor %39891 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37421 = arith.constant 1 : index
    %dim_37422 = tensor.dim %39996, %c1_37421 : tensor<4x?x4x128xf16>
    %39997 = flow.tensor.bitcast %39996 : tensor<4x?x4x128xf16>{%dim_37422} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37422}
    %39998 = torch_c.from_builtin_tensor %39997 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %39998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %39999 = torch.aten.mul.Tensor %39998, %39995 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %39999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40000 = torch_c.to_builtin_tensor %39999 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37423 = arith.constant 1 : index
    %dim_37424 = tensor.dim %40000, %c1_37423 : tensor<4x?x4x64xcomplex<f32>>
    %40001 = flow.tensor.bitcast %40000 : tensor<4x?x4x64xcomplex<f32>>{%dim_37424} -> tensor<4x?x4x128xf32>{%dim_37424}
    %40002 = torch_c.from_builtin_tensor %40001 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37425 = torch.constant.int 5
    %40003 = torch.prims.convert_element_type %40002, %int5_37425 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37426 = torch.constant.int 1
    %40004 = torch.aten.size.int %39729, %int1_37426 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37427 = torch.constant.int 0
    %40005 = torch.aten.add.int %int0_37427, %40004 : !torch.int, !torch.int -> !torch.int
    %int0_37428 = torch.constant.int 0
    %int0_37429 = torch.constant.int 0
    %int1_37430 = torch.constant.int 1
    %40006 = torch.aten.slice.Tensor %39958, %int0_37428, %int0_37429, %40005, %int1_37430 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40006, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37431 = torch.constant.int 1
    %int0_37432 = torch.constant.int 0
    %int9223372036854775807_37433 = torch.constant.int 9223372036854775807
    %int1_37434 = torch.constant.int 1
    %40007 = torch.aten.slice.Tensor %40006, %int1_37431, %int0_37432, %int9223372036854775807_37433, %int1_37434 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40007, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37435 = torch.constant.int 0
    %40008 = torch.aten.unsqueeze %40007, %int0_37435 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40008, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37436 = torch.constant.int 2
    %40009 = torch.aten.unsqueeze %40008, %int2_37436 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40009, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37437 = torch.constant.int 3
    %int0_37438 = torch.constant.int 0
    %int9223372036854775807_37439 = torch.constant.int 9223372036854775807
    %int1_37440 = torch.constant.int 1
    %40010 = torch.aten.slice.Tensor %40009, %int3_37437, %int0_37438, %int9223372036854775807_37439, %int1_37440 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40010, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40011 = torch_c.to_builtin_tensor %39893 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37441 = arith.constant 1 : index
    %dim_37442 = tensor.dim %40011, %c1_37441 : tensor<4x?x4x128xf16>
    %40012 = flow.tensor.bitcast %40011 : tensor<4x?x4x128xf16>{%dim_37442} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37442}
    %40013 = torch_c.from_builtin_tensor %40012 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40014 = torch.aten.mul.Tensor %40013, %40010 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40015 = torch_c.to_builtin_tensor %40014 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37443 = arith.constant 1 : index
    %dim_37444 = tensor.dim %40015, %c1_37443 : tensor<4x?x4x64xcomplex<f32>>
    %40016 = flow.tensor.bitcast %40015 : tensor<4x?x4x64xcomplex<f32>>{%dim_37444} -> tensor<4x?x4x128xf32>{%dim_37444}
    %40017 = torch_c.from_builtin_tensor %40016 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37445 = torch.constant.int 5
    %40018 = torch.prims.convert_element_type %40017, %int5_37445 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37446 = torch.constant.int 1
    %40019 = torch.aten.size.int %39735, %int1_37446 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37447 = torch.constant.int 0
    %40020 = torch.aten.add.int %int0_37447, %40019 : !torch.int, !torch.int -> !torch.int
    %int0_37448 = torch.constant.int 0
    %int0_37449 = torch.constant.int 0
    %int1_37450 = torch.constant.int 1
    %40021 = torch.aten.slice.Tensor %39961, %int0_37448, %int0_37449, %40020, %int1_37450 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40021, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37451 = torch.constant.int 1
    %int0_37452 = torch.constant.int 0
    %int9223372036854775807_37453 = torch.constant.int 9223372036854775807
    %int1_37454 = torch.constant.int 1
    %40022 = torch.aten.slice.Tensor %40021, %int1_37451, %int0_37452, %int9223372036854775807_37453, %int1_37454 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40022, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37455 = torch.constant.int 0
    %40023 = torch.aten.unsqueeze %40022, %int0_37455 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40023, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37456 = torch.constant.int 2
    %40024 = torch.aten.unsqueeze %40023, %int2_37456 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40024, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37457 = torch.constant.int 3
    %int0_37458 = torch.constant.int 0
    %int9223372036854775807_37459 = torch.constant.int 9223372036854775807
    %int1_37460 = torch.constant.int 1
    %40025 = torch.aten.slice.Tensor %40024, %int3_37457, %int0_37458, %int9223372036854775807_37459, %int1_37460 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40025, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40026 = torch_c.to_builtin_tensor %39895 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37461 = arith.constant 1 : index
    %dim_37462 = tensor.dim %40026, %c1_37461 : tensor<4x?x4x128xf16>
    %40027 = flow.tensor.bitcast %40026 : tensor<4x?x4x128xf16>{%dim_37462} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37462}
    %40028 = torch_c.from_builtin_tensor %40027 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40029 = torch.aten.mul.Tensor %40028, %40025 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40030 = torch_c.to_builtin_tensor %40029 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37463 = arith.constant 1 : index
    %dim_37464 = tensor.dim %40030, %c1_37463 : tensor<4x?x4x64xcomplex<f32>>
    %40031 = flow.tensor.bitcast %40030 : tensor<4x?x4x64xcomplex<f32>>{%dim_37464} -> tensor<4x?x4x128xf32>{%dim_37464}
    %40032 = torch_c.from_builtin_tensor %40031 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37465 = torch.constant.int 5
    %40033 = torch.prims.convert_element_type %40032, %int5_37465 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37466 = torch.constant.int 1
    %40034 = torch.aten.size.int %39741, %int1_37466 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37467 = torch.constant.int 0
    %40035 = torch.aten.add.int %int0_37467, %40034 : !torch.int, !torch.int -> !torch.int
    %int0_37468 = torch.constant.int 0
    %int0_37469 = torch.constant.int 0
    %int1_37470 = torch.constant.int 1
    %40036 = torch.aten.slice.Tensor %39964, %int0_37468, %int0_37469, %40035, %int1_37470 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40036, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37471 = torch.constant.int 1
    %int0_37472 = torch.constant.int 0
    %int9223372036854775807_37473 = torch.constant.int 9223372036854775807
    %int1_37474 = torch.constant.int 1
    %40037 = torch.aten.slice.Tensor %40036, %int1_37471, %int0_37472, %int9223372036854775807_37473, %int1_37474 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40037, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37475 = torch.constant.int 0
    %40038 = torch.aten.unsqueeze %40037, %int0_37475 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40038, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37476 = torch.constant.int 2
    %40039 = torch.aten.unsqueeze %40038, %int2_37476 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40039, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37477 = torch.constant.int 3
    %int0_37478 = torch.constant.int 0
    %int9223372036854775807_37479 = torch.constant.int 9223372036854775807
    %int1_37480 = torch.constant.int 1
    %40040 = torch.aten.slice.Tensor %40039, %int3_37477, %int0_37478, %int9223372036854775807_37479, %int1_37480 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40040, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40041 = torch_c.to_builtin_tensor %39897 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37481 = arith.constant 1 : index
    %dim_37482 = tensor.dim %40041, %c1_37481 : tensor<4x?x4x128xf16>
    %40042 = flow.tensor.bitcast %40041 : tensor<4x?x4x128xf16>{%dim_37482} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37482}
    %40043 = torch_c.from_builtin_tensor %40042 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40044 = torch.aten.mul.Tensor %40043, %40040 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40045 = torch_c.to_builtin_tensor %40044 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37483 = arith.constant 1 : index
    %dim_37484 = tensor.dim %40045, %c1_37483 : tensor<4x?x4x64xcomplex<f32>>
    %40046 = flow.tensor.bitcast %40045 : tensor<4x?x4x64xcomplex<f32>>{%dim_37484} -> tensor<4x?x4x128xf32>{%dim_37484}
    %40047 = torch_c.from_builtin_tensor %40046 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37485 = torch.constant.int 5
    %40048 = torch.prims.convert_element_type %40047, %int5_37485 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37486 = torch.constant.int 1
    %40049 = torch.aten.size.int %39747, %int1_37486 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37487 = torch.constant.int 0
    %40050 = torch.aten.add.int %int0_37487, %40049 : !torch.int, !torch.int -> !torch.int
    %int0_37488 = torch.constant.int 0
    %int0_37489 = torch.constant.int 0
    %int1_37490 = torch.constant.int 1
    %40051 = torch.aten.slice.Tensor %39967, %int0_37488, %int0_37489, %40050, %int1_37490 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40051, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37491 = torch.constant.int 1
    %int0_37492 = torch.constant.int 0
    %int9223372036854775807_37493 = torch.constant.int 9223372036854775807
    %int1_37494 = torch.constant.int 1
    %40052 = torch.aten.slice.Tensor %40051, %int1_37491, %int0_37492, %int9223372036854775807_37493, %int1_37494 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40052, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37495 = torch.constant.int 0
    %40053 = torch.aten.unsqueeze %40052, %int0_37495 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40053, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37496 = torch.constant.int 2
    %40054 = torch.aten.unsqueeze %40053, %int2_37496 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40054, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37497 = torch.constant.int 3
    %int0_37498 = torch.constant.int 0
    %int9223372036854775807_37499 = torch.constant.int 9223372036854775807
    %int1_37500 = torch.constant.int 1
    %40055 = torch.aten.slice.Tensor %40054, %int3_37497, %int0_37498, %int9223372036854775807_37499, %int1_37500 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40055, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40056 = torch_c.to_builtin_tensor %39899 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37501 = arith.constant 1 : index
    %dim_37502 = tensor.dim %40056, %c1_37501 : tensor<4x?x4x128xf16>
    %40057 = flow.tensor.bitcast %40056 : tensor<4x?x4x128xf16>{%dim_37502} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37502}
    %40058 = torch_c.from_builtin_tensor %40057 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40059 = torch.aten.mul.Tensor %40058, %40055 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40060 = torch_c.to_builtin_tensor %40059 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37503 = arith.constant 1 : index
    %dim_37504 = tensor.dim %40060, %c1_37503 : tensor<4x?x4x64xcomplex<f32>>
    %40061 = flow.tensor.bitcast %40060 : tensor<4x?x4x64xcomplex<f32>>{%dim_37504} -> tensor<4x?x4x128xf32>{%dim_37504}
    %40062 = torch_c.from_builtin_tensor %40061 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37505 = torch.constant.int 5
    %40063 = torch.prims.convert_element_type %40062, %int5_37505 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37506 = torch.constant.int 1
    %40064 = torch.aten.size.int %39753, %int1_37506 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37507 = torch.constant.int 0
    %40065 = torch.aten.add.int %int0_37507, %40064 : !torch.int, !torch.int -> !torch.int
    %int0_37508 = torch.constant.int 0
    %int0_37509 = torch.constant.int 0
    %int1_37510 = torch.constant.int 1
    %40066 = torch.aten.slice.Tensor %39970, %int0_37508, %int0_37509, %40065, %int1_37510 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40066, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37511 = torch.constant.int 1
    %int0_37512 = torch.constant.int 0
    %int9223372036854775807_37513 = torch.constant.int 9223372036854775807
    %int1_37514 = torch.constant.int 1
    %40067 = torch.aten.slice.Tensor %40066, %int1_37511, %int0_37512, %int9223372036854775807_37513, %int1_37514 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40067, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37515 = torch.constant.int 0
    %40068 = torch.aten.unsqueeze %40067, %int0_37515 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40068, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37516 = torch.constant.int 2
    %40069 = torch.aten.unsqueeze %40068, %int2_37516 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40069, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37517 = torch.constant.int 3
    %int0_37518 = torch.constant.int 0
    %int9223372036854775807_37519 = torch.constant.int 9223372036854775807
    %int1_37520 = torch.constant.int 1
    %40070 = torch.aten.slice.Tensor %40069, %int3_37517, %int0_37518, %int9223372036854775807_37519, %int1_37520 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40070, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40071 = torch_c.to_builtin_tensor %39901 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37521 = arith.constant 1 : index
    %dim_37522 = tensor.dim %40071, %c1_37521 : tensor<4x?x4x128xf16>
    %40072 = flow.tensor.bitcast %40071 : tensor<4x?x4x128xf16>{%dim_37522} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37522}
    %40073 = torch_c.from_builtin_tensor %40072 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40074 = torch.aten.mul.Tensor %40073, %40070 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40075 = torch_c.to_builtin_tensor %40074 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37523 = arith.constant 1 : index
    %dim_37524 = tensor.dim %40075, %c1_37523 : tensor<4x?x4x64xcomplex<f32>>
    %40076 = flow.tensor.bitcast %40075 : tensor<4x?x4x64xcomplex<f32>>{%dim_37524} -> tensor<4x?x4x128xf32>{%dim_37524}
    %40077 = torch_c.from_builtin_tensor %40076 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37525 = torch.constant.int 5
    %40078 = torch.prims.convert_element_type %40077, %int5_37525 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_37526 = torch.constant.int 1
    %40079 = torch.aten.size.int %39759, %int1_37526 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_37527 = torch.constant.int 0
    %40080 = torch.aten.add.int %int0_37527, %40079 : !torch.int, !torch.int -> !torch.int
    %int0_37528 = torch.constant.int 0
    %int0_37529 = torch.constant.int 0
    %int1_37530 = torch.constant.int 1
    %40081 = torch.aten.slice.Tensor %39973, %int0_37528, %int0_37529, %40080, %int1_37530 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40081, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37531 = torch.constant.int 1
    %int0_37532 = torch.constant.int 0
    %int9223372036854775807_37533 = torch.constant.int 9223372036854775807
    %int1_37534 = torch.constant.int 1
    %40082 = torch.aten.slice.Tensor %40081, %int1_37531, %int0_37532, %int9223372036854775807_37533, %int1_37534 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40082, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37535 = torch.constant.int 0
    %40083 = torch.aten.unsqueeze %40082, %int0_37535 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40083, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37536 = torch.constant.int 2
    %40084 = torch.aten.unsqueeze %40083, %int2_37536 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40084, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37537 = torch.constant.int 3
    %int0_37538 = torch.constant.int 0
    %int9223372036854775807_37539 = torch.constant.int 9223372036854775807
    %int1_37540 = torch.constant.int 1
    %40085 = torch.aten.slice.Tensor %40084, %int3_37537, %int0_37538, %int9223372036854775807_37539, %int1_37540 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40085, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40086 = torch_c.to_builtin_tensor %39903 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_37541 = arith.constant 1 : index
    %dim_37542 = tensor.dim %40086, %c1_37541 : tensor<4x?x4x128xf16>
    %40087 = flow.tensor.bitcast %40086 : tensor<4x?x4x128xf16>{%dim_37542} -> tensor<4x?x4x64xcomplex<f16>>{%dim_37542}
    %40088 = torch_c.from_builtin_tensor %40087 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %40088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %40089 = torch.aten.mul.Tensor %40088, %40085 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %40089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %40090 = torch_c.to_builtin_tensor %40089 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_37543 = arith.constant 1 : index
    %dim_37544 = tensor.dim %40090, %c1_37543 : tensor<4x?x4x64xcomplex<f32>>
    %40091 = flow.tensor.bitcast %40090 : tensor<4x?x4x64xcomplex<f32>>{%dim_37544} -> tensor<4x?x4x128xf32>{%dim_37544}
    %40092 = torch_c.from_builtin_tensor %40091 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %40092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_37545 = torch.constant.int 5
    %40093 = torch.prims.convert_element_type %40092, %int5_37545 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_37546 = torch.constant.int 131072
    %none_37547 = torch.constant.none
    %none_37548 = torch.constant.none
    %cpu_37549 = torch.constant.device "cpu"
    %false_37550 = torch.constant.bool false
    %40094 = torch.aten.arange %int131072_37546, %none_37547, %none_37548, %cpu_37549, %false_37550 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_37551 = torch.constant.int 0
    %int128_37552 = torch.constant.int 128
    %int2_37553 = torch.constant.int 2
    %none_37554 = torch.constant.none
    %none_37555 = torch.constant.none
    %cpu_37556 = torch.constant.device "cpu"
    %false_37557 = torch.constant.bool false
    %40095 = torch.aten.arange.start_step %int0_37551, %int128_37552, %int2_37553, %none_37554, %none_37555, %cpu_37556, %false_37557 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_37558 = torch.constant.int 0
    %int0_37559 = torch.constant.int 0
    %int64_37560 = torch.constant.int 64
    %int1_37561 = torch.constant.int 1
    %40096 = torch.aten.slice.Tensor %40095, %int0_37558, %int0_37559, %int64_37560, %int1_37561 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_37562 = torch.constant.int 6
    %40097 = torch.prims.convert_element_type %40096, %int6_37562 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_37563 = torch.constant.int 128
    %40098 = torch.aten.div.Scalar %40097, %int128_37563 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_37564 = torch.constant.float 5.000000e+05
    %40099 = torch.aten.pow.Scalar %float5.000000e05_37564, %40098 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %40100 = torch.aten.reciprocal %40099 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_37565 = torch.constant.float 1.000000e+00
    %40101 = torch.aten.mul.Scalar %40100, %float1.000000e00_37565 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_37566 = torch.constant.int 131072
    %int1_37567 = torch.constant.int 1
    %40102 = torch.prim.ListConstruct %int131072_37566, %int1_37567 : (!torch.int, !torch.int) -> !torch.list<int>
    %40103 = torch.aten.view %40094, %40102 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %40104 = torch.aten.mul.Tensor %40103, %40101 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %40105 = torch.aten.cos %40104 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %40106 = torch.aten.sin %40104 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %40107 = torch.aten.complex %40105, %40106 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %40108 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40109 = flow.tensor.transfer %40108 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %40110 = torch_c.from_builtin_tensor %40109 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40111 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40112 = flow.tensor.transfer %40111 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %40113 = torch_c.from_builtin_tensor %40112 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40114 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40115 = flow.tensor.transfer %40114 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %40116 = torch_c.from_builtin_tensor %40115 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40117 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40118 = flow.tensor.transfer %40117 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %40119 = torch_c.from_builtin_tensor %40118 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40120 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40121 = flow.tensor.transfer %40120 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %40122 = torch_c.from_builtin_tensor %40121 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40123 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40124 = flow.tensor.transfer %40123 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %40125 = torch_c.from_builtin_tensor %40124 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40126 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40127 = flow.tensor.transfer %40126 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %40128 = torch_c.from_builtin_tensor %40127 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %40129 = torch_c.to_builtin_tensor %40107 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %40130 = flow.tensor.transfer %40129 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %40131 = torch_c.from_builtin_tensor %40130 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_37568 = torch.constant.int 1
    %40132 = torch.aten.size.int %39781, %int1_37568 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37569 = torch.constant.int 0
    %40133 = torch.aten.add.int %int0_37569, %40132 : !torch.int, !torch.int -> !torch.int
    %int0_37570 = torch.constant.int 0
    %int0_37571 = torch.constant.int 0
    %int1_37572 = torch.constant.int 1
    %40134 = torch.aten.slice.Tensor %40110, %int0_37570, %int0_37571, %40133, %int1_37572 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40134, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37573 = torch.constant.int 1
    %int0_37574 = torch.constant.int 0
    %int9223372036854775807_37575 = torch.constant.int 9223372036854775807
    %int1_37576 = torch.constant.int 1
    %40135 = torch.aten.slice.Tensor %40134, %int1_37573, %int0_37574, %int9223372036854775807_37575, %int1_37576 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40135, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37577 = torch.constant.int 0
    %40136 = torch.aten.unsqueeze %40135, %int0_37577 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40136, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37578 = torch.constant.int 2
    %40137 = torch.aten.unsqueeze %40136, %int2_37578 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40137, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37579 = torch.constant.int 3
    %int0_37580 = torch.constant.int 0
    %int9223372036854775807_37581 = torch.constant.int 9223372036854775807
    %int1_37582 = torch.constant.int 1
    %40138 = torch.aten.slice.Tensor %40137, %int3_37579, %int0_37580, %int9223372036854775807_37581, %int1_37582 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40138, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40139 = torch_c.to_builtin_tensor %39905 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37583 = arith.constant 1 : index
    %dim_37584 = tensor.dim %40139, %c1_37583 : tensor<4x?x1x128xf16>
    %40140 = flow.tensor.bitcast %40139 : tensor<4x?x1x128xf16>{%dim_37584} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37584}
    %40141 = torch_c.from_builtin_tensor %40140 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40142 = torch.aten.mul.Tensor %40141, %40138 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40143 = torch_c.to_builtin_tensor %40142 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37585 = arith.constant 1 : index
    %dim_37586 = tensor.dim %40143, %c1_37585 : tensor<4x?x1x64xcomplex<f32>>
    %40144 = flow.tensor.bitcast %40143 : tensor<4x?x1x64xcomplex<f32>>{%dim_37586} -> tensor<4x?x1x128xf32>{%dim_37586}
    %40145 = torch_c.from_builtin_tensor %40144 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37587 = torch.constant.int 5
    %40146 = torch.prims.convert_element_type %40145, %int5_37587 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37588 = torch.constant.int 1
    %40147 = torch.aten.size.int %39787, %int1_37588 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37589 = torch.constant.int 0
    %40148 = torch.aten.add.int %int0_37589, %40147 : !torch.int, !torch.int -> !torch.int
    %int0_37590 = torch.constant.int 0
    %int0_37591 = torch.constant.int 0
    %int1_37592 = torch.constant.int 1
    %40149 = torch.aten.slice.Tensor %40113, %int0_37590, %int0_37591, %40148, %int1_37592 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40149, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37593 = torch.constant.int 1
    %int0_37594 = torch.constant.int 0
    %int9223372036854775807_37595 = torch.constant.int 9223372036854775807
    %int1_37596 = torch.constant.int 1
    %40150 = torch.aten.slice.Tensor %40149, %int1_37593, %int0_37594, %int9223372036854775807_37595, %int1_37596 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40150, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37597 = torch.constant.int 0
    %40151 = torch.aten.unsqueeze %40150, %int0_37597 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40151, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37598 = torch.constant.int 2
    %40152 = torch.aten.unsqueeze %40151, %int2_37598 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40152, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37599 = torch.constant.int 3
    %int0_37600 = torch.constant.int 0
    %int9223372036854775807_37601 = torch.constant.int 9223372036854775807
    %int1_37602 = torch.constant.int 1
    %40153 = torch.aten.slice.Tensor %40152, %int3_37599, %int0_37600, %int9223372036854775807_37601, %int1_37602 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40153, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40154 = torch_c.to_builtin_tensor %39907 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37603 = arith.constant 1 : index
    %dim_37604 = tensor.dim %40154, %c1_37603 : tensor<4x?x1x128xf16>
    %40155 = flow.tensor.bitcast %40154 : tensor<4x?x1x128xf16>{%dim_37604} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37604}
    %40156 = torch_c.from_builtin_tensor %40155 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40157 = torch.aten.mul.Tensor %40156, %40153 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40158 = torch_c.to_builtin_tensor %40157 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37605 = arith.constant 1 : index
    %dim_37606 = tensor.dim %40158, %c1_37605 : tensor<4x?x1x64xcomplex<f32>>
    %40159 = flow.tensor.bitcast %40158 : tensor<4x?x1x64xcomplex<f32>>{%dim_37606} -> tensor<4x?x1x128xf32>{%dim_37606}
    %40160 = torch_c.from_builtin_tensor %40159 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37607 = torch.constant.int 5
    %40161 = torch.prims.convert_element_type %40160, %int5_37607 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37608 = torch.constant.int 1
    %40162 = torch.aten.size.int %39793, %int1_37608 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37609 = torch.constant.int 0
    %40163 = torch.aten.add.int %int0_37609, %40162 : !torch.int, !torch.int -> !torch.int
    %int0_37610 = torch.constant.int 0
    %int0_37611 = torch.constant.int 0
    %int1_37612 = torch.constant.int 1
    %40164 = torch.aten.slice.Tensor %40116, %int0_37610, %int0_37611, %40163, %int1_37612 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40164, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37613 = torch.constant.int 1
    %int0_37614 = torch.constant.int 0
    %int9223372036854775807_37615 = torch.constant.int 9223372036854775807
    %int1_37616 = torch.constant.int 1
    %40165 = torch.aten.slice.Tensor %40164, %int1_37613, %int0_37614, %int9223372036854775807_37615, %int1_37616 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40165, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37617 = torch.constant.int 0
    %40166 = torch.aten.unsqueeze %40165, %int0_37617 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40166, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37618 = torch.constant.int 2
    %40167 = torch.aten.unsqueeze %40166, %int2_37618 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40167, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37619 = torch.constant.int 3
    %int0_37620 = torch.constant.int 0
    %int9223372036854775807_37621 = torch.constant.int 9223372036854775807
    %int1_37622 = torch.constant.int 1
    %40168 = torch.aten.slice.Tensor %40167, %int3_37619, %int0_37620, %int9223372036854775807_37621, %int1_37622 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40168, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40169 = torch_c.to_builtin_tensor %39909 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37623 = arith.constant 1 : index
    %dim_37624 = tensor.dim %40169, %c1_37623 : tensor<4x?x1x128xf16>
    %40170 = flow.tensor.bitcast %40169 : tensor<4x?x1x128xf16>{%dim_37624} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37624}
    %40171 = torch_c.from_builtin_tensor %40170 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40172 = torch.aten.mul.Tensor %40171, %40168 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40173 = torch_c.to_builtin_tensor %40172 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37625 = arith.constant 1 : index
    %dim_37626 = tensor.dim %40173, %c1_37625 : tensor<4x?x1x64xcomplex<f32>>
    %40174 = flow.tensor.bitcast %40173 : tensor<4x?x1x64xcomplex<f32>>{%dim_37626} -> tensor<4x?x1x128xf32>{%dim_37626}
    %40175 = torch_c.from_builtin_tensor %40174 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37627 = torch.constant.int 5
    %40176 = torch.prims.convert_element_type %40175, %int5_37627 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37628 = torch.constant.int 1
    %40177 = torch.aten.size.int %39799, %int1_37628 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37629 = torch.constant.int 0
    %40178 = torch.aten.add.int %int0_37629, %40177 : !torch.int, !torch.int -> !torch.int
    %int0_37630 = torch.constant.int 0
    %int0_37631 = torch.constant.int 0
    %int1_37632 = torch.constant.int 1
    %40179 = torch.aten.slice.Tensor %40119, %int0_37630, %int0_37631, %40178, %int1_37632 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40179, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37633 = torch.constant.int 1
    %int0_37634 = torch.constant.int 0
    %int9223372036854775807_37635 = torch.constant.int 9223372036854775807
    %int1_37636 = torch.constant.int 1
    %40180 = torch.aten.slice.Tensor %40179, %int1_37633, %int0_37634, %int9223372036854775807_37635, %int1_37636 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40180, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37637 = torch.constant.int 0
    %40181 = torch.aten.unsqueeze %40180, %int0_37637 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40181, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37638 = torch.constant.int 2
    %40182 = torch.aten.unsqueeze %40181, %int2_37638 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40182, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37639 = torch.constant.int 3
    %int0_37640 = torch.constant.int 0
    %int9223372036854775807_37641 = torch.constant.int 9223372036854775807
    %int1_37642 = torch.constant.int 1
    %40183 = torch.aten.slice.Tensor %40182, %int3_37639, %int0_37640, %int9223372036854775807_37641, %int1_37642 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40183, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40184 = torch_c.to_builtin_tensor %39911 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37643 = arith.constant 1 : index
    %dim_37644 = tensor.dim %40184, %c1_37643 : tensor<4x?x1x128xf16>
    %40185 = flow.tensor.bitcast %40184 : tensor<4x?x1x128xf16>{%dim_37644} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37644}
    %40186 = torch_c.from_builtin_tensor %40185 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40187 = torch.aten.mul.Tensor %40186, %40183 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40188 = torch_c.to_builtin_tensor %40187 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37645 = arith.constant 1 : index
    %dim_37646 = tensor.dim %40188, %c1_37645 : tensor<4x?x1x64xcomplex<f32>>
    %40189 = flow.tensor.bitcast %40188 : tensor<4x?x1x64xcomplex<f32>>{%dim_37646} -> tensor<4x?x1x128xf32>{%dim_37646}
    %40190 = torch_c.from_builtin_tensor %40189 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37647 = torch.constant.int 5
    %40191 = torch.prims.convert_element_type %40190, %int5_37647 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37648 = torch.constant.int 1
    %40192 = torch.aten.size.int %39805, %int1_37648 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37649 = torch.constant.int 0
    %40193 = torch.aten.add.int %int0_37649, %40192 : !torch.int, !torch.int -> !torch.int
    %int0_37650 = torch.constant.int 0
    %int0_37651 = torch.constant.int 0
    %int1_37652 = torch.constant.int 1
    %40194 = torch.aten.slice.Tensor %40122, %int0_37650, %int0_37651, %40193, %int1_37652 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40194, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37653 = torch.constant.int 1
    %int0_37654 = torch.constant.int 0
    %int9223372036854775807_37655 = torch.constant.int 9223372036854775807
    %int1_37656 = torch.constant.int 1
    %40195 = torch.aten.slice.Tensor %40194, %int1_37653, %int0_37654, %int9223372036854775807_37655, %int1_37656 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40195, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37657 = torch.constant.int 0
    %40196 = torch.aten.unsqueeze %40195, %int0_37657 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40196, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37658 = torch.constant.int 2
    %40197 = torch.aten.unsqueeze %40196, %int2_37658 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40197, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37659 = torch.constant.int 3
    %int0_37660 = torch.constant.int 0
    %int9223372036854775807_37661 = torch.constant.int 9223372036854775807
    %int1_37662 = torch.constant.int 1
    %40198 = torch.aten.slice.Tensor %40197, %int3_37659, %int0_37660, %int9223372036854775807_37661, %int1_37662 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40198, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40199 = torch_c.to_builtin_tensor %39913 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37663 = arith.constant 1 : index
    %dim_37664 = tensor.dim %40199, %c1_37663 : tensor<4x?x1x128xf16>
    %40200 = flow.tensor.bitcast %40199 : tensor<4x?x1x128xf16>{%dim_37664} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37664}
    %40201 = torch_c.from_builtin_tensor %40200 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40202 = torch.aten.mul.Tensor %40201, %40198 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40203 = torch_c.to_builtin_tensor %40202 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37665 = arith.constant 1 : index
    %dim_37666 = tensor.dim %40203, %c1_37665 : tensor<4x?x1x64xcomplex<f32>>
    %40204 = flow.tensor.bitcast %40203 : tensor<4x?x1x64xcomplex<f32>>{%dim_37666} -> tensor<4x?x1x128xf32>{%dim_37666}
    %40205 = torch_c.from_builtin_tensor %40204 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37667 = torch.constant.int 5
    %40206 = torch.prims.convert_element_type %40205, %int5_37667 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37668 = torch.constant.int 1
    %40207 = torch.aten.size.int %39811, %int1_37668 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37669 = torch.constant.int 0
    %40208 = torch.aten.add.int %int0_37669, %40207 : !torch.int, !torch.int -> !torch.int
    %int0_37670 = torch.constant.int 0
    %int0_37671 = torch.constant.int 0
    %int1_37672 = torch.constant.int 1
    %40209 = torch.aten.slice.Tensor %40125, %int0_37670, %int0_37671, %40208, %int1_37672 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40209, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37673 = torch.constant.int 1
    %int0_37674 = torch.constant.int 0
    %int9223372036854775807_37675 = torch.constant.int 9223372036854775807
    %int1_37676 = torch.constant.int 1
    %40210 = torch.aten.slice.Tensor %40209, %int1_37673, %int0_37674, %int9223372036854775807_37675, %int1_37676 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40210, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37677 = torch.constant.int 0
    %40211 = torch.aten.unsqueeze %40210, %int0_37677 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40211, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37678 = torch.constant.int 2
    %40212 = torch.aten.unsqueeze %40211, %int2_37678 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40212, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37679 = torch.constant.int 3
    %int0_37680 = torch.constant.int 0
    %int9223372036854775807_37681 = torch.constant.int 9223372036854775807
    %int1_37682 = torch.constant.int 1
    %40213 = torch.aten.slice.Tensor %40212, %int3_37679, %int0_37680, %int9223372036854775807_37681, %int1_37682 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40213, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40214 = torch_c.to_builtin_tensor %39915 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37683 = arith.constant 1 : index
    %dim_37684 = tensor.dim %40214, %c1_37683 : tensor<4x?x1x128xf16>
    %40215 = flow.tensor.bitcast %40214 : tensor<4x?x1x128xf16>{%dim_37684} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37684}
    %40216 = torch_c.from_builtin_tensor %40215 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40217 = torch.aten.mul.Tensor %40216, %40213 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40218 = torch_c.to_builtin_tensor %40217 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37685 = arith.constant 1 : index
    %dim_37686 = tensor.dim %40218, %c1_37685 : tensor<4x?x1x64xcomplex<f32>>
    %40219 = flow.tensor.bitcast %40218 : tensor<4x?x1x64xcomplex<f32>>{%dim_37686} -> tensor<4x?x1x128xf32>{%dim_37686}
    %40220 = torch_c.from_builtin_tensor %40219 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37687 = torch.constant.int 5
    %40221 = torch.prims.convert_element_type %40220, %int5_37687 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37688 = torch.constant.int 1
    %40222 = torch.aten.size.int %39817, %int1_37688 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37689 = torch.constant.int 0
    %40223 = torch.aten.add.int %int0_37689, %40222 : !torch.int, !torch.int -> !torch.int
    %int0_37690 = torch.constant.int 0
    %int0_37691 = torch.constant.int 0
    %int1_37692 = torch.constant.int 1
    %40224 = torch.aten.slice.Tensor %40128, %int0_37690, %int0_37691, %40223, %int1_37692 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40224, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37693 = torch.constant.int 1
    %int0_37694 = torch.constant.int 0
    %int9223372036854775807_37695 = torch.constant.int 9223372036854775807
    %int1_37696 = torch.constant.int 1
    %40225 = torch.aten.slice.Tensor %40224, %int1_37693, %int0_37694, %int9223372036854775807_37695, %int1_37696 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40225, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37697 = torch.constant.int 0
    %40226 = torch.aten.unsqueeze %40225, %int0_37697 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40226, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37698 = torch.constant.int 2
    %40227 = torch.aten.unsqueeze %40226, %int2_37698 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40227, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37699 = torch.constant.int 3
    %int0_37700 = torch.constant.int 0
    %int9223372036854775807_37701 = torch.constant.int 9223372036854775807
    %int1_37702 = torch.constant.int 1
    %40228 = torch.aten.slice.Tensor %40227, %int3_37699, %int0_37700, %int9223372036854775807_37701, %int1_37702 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40228, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40229 = torch_c.to_builtin_tensor %39917 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37703 = arith.constant 1 : index
    %dim_37704 = tensor.dim %40229, %c1_37703 : tensor<4x?x1x128xf16>
    %40230 = flow.tensor.bitcast %40229 : tensor<4x?x1x128xf16>{%dim_37704} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37704}
    %40231 = torch_c.from_builtin_tensor %40230 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40232 = torch.aten.mul.Tensor %40231, %40228 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40233 = torch_c.to_builtin_tensor %40232 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37705 = arith.constant 1 : index
    %dim_37706 = tensor.dim %40233, %c1_37705 : tensor<4x?x1x64xcomplex<f32>>
    %40234 = flow.tensor.bitcast %40233 : tensor<4x?x1x64xcomplex<f32>>{%dim_37706} -> tensor<4x?x1x128xf32>{%dim_37706}
    %40235 = torch_c.from_builtin_tensor %40234 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37707 = torch.constant.int 5
    %40236 = torch.prims.convert_element_type %40235, %int5_37707 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_37708 = torch.constant.int 1
    %40237 = torch.aten.size.int %39823, %int1_37708 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_37709 = torch.constant.int 0
    %40238 = torch.aten.add.int %int0_37709, %40237 : !torch.int, !torch.int -> !torch.int
    %int0_37710 = torch.constant.int 0
    %int0_37711 = torch.constant.int 0
    %int1_37712 = torch.constant.int 1
    %40239 = torch.aten.slice.Tensor %40131, %int0_37710, %int0_37711, %40238, %int1_37712 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40239, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_37713 = torch.constant.int 1
    %int0_37714 = torch.constant.int 0
    %int9223372036854775807_37715 = torch.constant.int 9223372036854775807
    %int1_37716 = torch.constant.int 1
    %40240 = torch.aten.slice.Tensor %40239, %int1_37713, %int0_37714, %int9223372036854775807_37715, %int1_37716 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %40240, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_37717 = torch.constant.int 0
    %40241 = torch.aten.unsqueeze %40240, %int0_37717 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %40241, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_37718 = torch.constant.int 2
    %40242 = torch.aten.unsqueeze %40241, %int2_37718 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40242, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_37719 = torch.constant.int 3
    %int0_37720 = torch.constant.int 0
    %int9223372036854775807_37721 = torch.constant.int 9223372036854775807
    %int1_37722 = torch.constant.int 1
    %40243 = torch.aten.slice.Tensor %40242, %int3_37719, %int0_37720, %int9223372036854775807_37721, %int1_37722 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40243, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %40244 = torch_c.to_builtin_tensor %39919 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_37723 = arith.constant 1 : index
    %dim_37724 = tensor.dim %40244, %c1_37723 : tensor<4x?x1x128xf16>
    %40245 = flow.tensor.bitcast %40244 : tensor<4x?x1x128xf16>{%dim_37724} -> tensor<4x?x1x64xcomplex<f16>>{%dim_37724}
    %40246 = torch_c.from_builtin_tensor %40245 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %40246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %40247 = torch.aten.mul.Tensor %40246, %40243 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %40247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %40248 = torch_c.to_builtin_tensor %40247 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_37725 = arith.constant 1 : index
    %dim_37726 = tensor.dim %40248, %c1_37725 : tensor<4x?x1x64xcomplex<f32>>
    %40249 = flow.tensor.bitcast %40248 : tensor<4x?x1x64xcomplex<f32>>{%dim_37726} -> tensor<4x?x1x128xf32>{%dim_37726}
    %40250 = torch_c.from_builtin_tensor %40249 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %40250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_37727 = torch.constant.int 5
    %40251 = torch.prims.convert_element_type %40250, %int5_37727 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %40251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_37728 = torch.constant.int 64
    %40252 = torch.aten.mul.Scalar %2364, %int64_37728 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40252, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37729 = torch.constant.int 64
    %40253 = torch.aten.mul.Scalar %2367, %int64_37729 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40253, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37730 = torch.constant.int 64
    %40254 = torch.aten.mul.Scalar %2370, %int64_37730 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40254, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37731 = torch.constant.int 64
    %40255 = torch.aten.mul.Scalar %2373, %int64_37731 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40255, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37732 = torch.constant.int 64
    %40256 = torch.aten.mul.Scalar %2376, %int64_37732 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40256, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37733 = torch.constant.int 64
    %40257 = torch.aten.mul.Scalar %2379, %int64_37733 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40257, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37734 = torch.constant.int 64
    %40258 = torch.aten.mul.Scalar %2382, %int64_37734 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40258, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_37735 = torch.constant.int 64
    %40259 = torch.aten.mul.Scalar %2385, %int64_37735 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40259, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40 = torch.constant.int 40
    %int1_37736 = torch.constant.int 1
    %40260 = torch.aten.add.Scalar %40252, %int40, %int1_37736 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40260, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37737 = torch.constant.int 40
    %int1_37738 = torch.constant.int 1
    %40261 = torch.aten.add.Scalar %40253, %int40_37737, %int1_37738 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40261, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37739 = torch.constant.int 40
    %int1_37740 = torch.constant.int 1
    %40262 = torch.aten.add.Scalar %40254, %int40_37739, %int1_37740 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40262, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37741 = torch.constant.int 40
    %int1_37742 = torch.constant.int 1
    %40263 = torch.aten.add.Scalar %40255, %int40_37741, %int1_37742 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40263, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37743 = torch.constant.int 40
    %int1_37744 = torch.constant.int 1
    %40264 = torch.aten.add.Scalar %40256, %int40_37743, %int1_37744 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40264, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37745 = torch.constant.int 40
    %int1_37746 = torch.constant.int 1
    %40265 = torch.aten.add.Scalar %40257, %int40_37745, %int1_37746 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40265, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37747 = torch.constant.int 40
    %int1_37748 = torch.constant.int 1
    %40266 = torch.aten.add.Scalar %40258, %int40_37747, %int1_37748 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40266, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int40_37749 = torch.constant.int 40
    %int1_37750 = torch.constant.int 1
    %40267 = torch.aten.add.Scalar %40259, %int40_37749, %int1_37750 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40267, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_37751 = torch.constant.int 4
    %int16_37752 = torch.constant.int 16
    %int1_37753 = torch.constant.int 1
    %int128_37754 = torch.constant.int 128
    %40268 = torch.prim.ListConstruct %int4_37751, %3095, %int16_37752, %int1_37753, %int128_37754 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40269 = torch.aten.view %40146, %40268 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40269, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37755 = torch.constant.int 4
    %int16_37756 = torch.constant.int 16
    %int1_37757 = torch.constant.int 1
    %int128_37758 = torch.constant.int 128
    %40270 = torch.prim.ListConstruct %int4_37755, %3095, %int16_37756, %int1_37757, %int128_37758 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40271 = torch.aten.view %40161, %40270 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40271, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37759 = torch.constant.int 4
    %int16_37760 = torch.constant.int 16
    %int1_37761 = torch.constant.int 1
    %int128_37762 = torch.constant.int 128
    %40272 = torch.prim.ListConstruct %int4_37759, %3095, %int16_37760, %int1_37761, %int128_37762 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40273 = torch.aten.view %40176, %40272 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40273, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37763 = torch.constant.int 4
    %int16_37764 = torch.constant.int 16
    %int1_37765 = torch.constant.int 1
    %int128_37766 = torch.constant.int 128
    %40274 = torch.prim.ListConstruct %int4_37763, %3095, %int16_37764, %int1_37765, %int128_37766 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40275 = torch.aten.view %40191, %40274 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40275, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37767 = torch.constant.int 4
    %int16_37768 = torch.constant.int 16
    %int1_37769 = torch.constant.int 1
    %int128_37770 = torch.constant.int 128
    %40276 = torch.prim.ListConstruct %int4_37767, %3095, %int16_37768, %int1_37769, %int128_37770 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40277 = torch.aten.view %40206, %40276 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40277, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37771 = torch.constant.int 4
    %int16_37772 = torch.constant.int 16
    %int1_37773 = torch.constant.int 1
    %int128_37774 = torch.constant.int 128
    %40278 = torch.prim.ListConstruct %int4_37771, %3095, %int16_37772, %int1_37773, %int128_37774 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40279 = torch.aten.view %40221, %40278 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40279, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37775 = torch.constant.int 4
    %int16_37776 = torch.constant.int 16
    %int1_37777 = torch.constant.int 1
    %int128_37778 = torch.constant.int 128
    %40280 = torch.prim.ListConstruct %int4_37775, %3095, %int16_37776, %int1_37777, %int128_37778 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40281 = torch.aten.view %40236, %40280 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40281, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37779 = torch.constant.int 4
    %int16_37780 = torch.constant.int 16
    %int1_37781 = torch.constant.int 1
    %int128_37782 = torch.constant.int 128
    %40282 = torch.prim.ListConstruct %int4_37779, %3095, %int16_37780, %int1_37781, %int128_37782 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40283 = torch.aten.view %40251, %40282 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40283, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37783 = torch.constant.int 4
    %40284 = torch.aten.mul.int %int4_37783, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37784 = torch.constant.int 16
    %int1_37785 = torch.constant.int 1
    %int128_37786 = torch.constant.int 128
    %40285 = torch.prim.ListConstruct %40284, %int16_37784, %int1_37785, %int128_37786 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40286 = torch.aten.view %40269, %40285 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40286, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37787 = torch.constant.int 4
    %40287 = torch.aten.mul.int %int4_37787, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37788 = torch.constant.int 16
    %int1_37789 = torch.constant.int 1
    %int128_37790 = torch.constant.int 128
    %40288 = torch.prim.ListConstruct %40287, %int16_37788, %int1_37789, %int128_37790 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40289 = torch.aten.view %40271, %40288 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40289, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37791 = torch.constant.int 4
    %40290 = torch.aten.mul.int %int4_37791, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37792 = torch.constant.int 16
    %int1_37793 = torch.constant.int 1
    %int128_37794 = torch.constant.int 128
    %40291 = torch.prim.ListConstruct %40290, %int16_37792, %int1_37793, %int128_37794 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40292 = torch.aten.view %40273, %40291 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40292, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37795 = torch.constant.int 4
    %40293 = torch.aten.mul.int %int4_37795, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37796 = torch.constant.int 16
    %int1_37797 = torch.constant.int 1
    %int128_37798 = torch.constant.int 128
    %40294 = torch.prim.ListConstruct %40293, %int16_37796, %int1_37797, %int128_37798 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40295 = torch.aten.view %40275, %40294 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40295, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37799 = torch.constant.int 4
    %40296 = torch.aten.mul.int %int4_37799, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37800 = torch.constant.int 16
    %int1_37801 = torch.constant.int 1
    %int128_37802 = torch.constant.int 128
    %40297 = torch.prim.ListConstruct %40296, %int16_37800, %int1_37801, %int128_37802 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40298 = torch.aten.view %40277, %40297 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40298, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37803 = torch.constant.int 4
    %40299 = torch.aten.mul.int %int4_37803, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37804 = torch.constant.int 16
    %int1_37805 = torch.constant.int 1
    %int128_37806 = torch.constant.int 128
    %40300 = torch.prim.ListConstruct %40299, %int16_37804, %int1_37805, %int128_37806 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40301 = torch.aten.view %40279, %40300 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40301, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37807 = torch.constant.int 4
    %40302 = torch.aten.mul.int %int4_37807, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37808 = torch.constant.int 16
    %int1_37809 = torch.constant.int 1
    %int128_37810 = torch.constant.int 128
    %40303 = torch.prim.ListConstruct %40302, %int16_37808, %int1_37809, %int128_37810 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40304 = torch.aten.view %40281, %40303 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40304, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37811 = torch.constant.int 4
    %40305 = torch.aten.mul.int %int4_37811, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37812 = torch.constant.int 16
    %int1_37813 = torch.constant.int 1
    %int128_37814 = torch.constant.int 128
    %40306 = torch.prim.ListConstruct %40305, %int16_37812, %int1_37813, %int128_37814 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40307 = torch.aten.view %40283, %40306 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40307, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37815 = torch.constant.int 4
    %40308 = torch.aten.mul.int %int4_37815, %3095 : !torch.int, !torch.int -> !torch.int
    %40309 = torch.prim.ListConstruct %40308 : (!torch.int) -> !torch.list<int>
    %40310 = torch.aten.view %40260, %40309 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40310, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37816 = torch.constant.int 4
    %40311 = torch.aten.mul.int %int4_37816, %3095 : !torch.int, !torch.int -> !torch.int
    %40312 = torch.prim.ListConstruct %40311 : (!torch.int) -> !torch.list<int>
    %40313 = torch.aten.view %40261, %40312 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40313, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37817 = torch.constant.int 4
    %40314 = torch.aten.mul.int %int4_37817, %3095 : !torch.int, !torch.int -> !torch.int
    %40315 = torch.prim.ListConstruct %40314 : (!torch.int) -> !torch.list<int>
    %40316 = torch.aten.view %40262, %40315 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40316, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37818 = torch.constant.int 4
    %40317 = torch.aten.mul.int %int4_37818, %3095 : !torch.int, !torch.int -> !torch.int
    %40318 = torch.prim.ListConstruct %40317 : (!torch.int) -> !torch.list<int>
    %40319 = torch.aten.view %40263, %40318 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40319, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37819 = torch.constant.int 4
    %40320 = torch.aten.mul.int %int4_37819, %3095 : !torch.int, !torch.int -> !torch.int
    %40321 = torch.prim.ListConstruct %40320 : (!torch.int) -> !torch.list<int>
    %40322 = torch.aten.view %40264, %40321 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40322, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37820 = torch.constant.int 4
    %40323 = torch.aten.mul.int %int4_37820, %3095 : !torch.int, !torch.int -> !torch.int
    %40324 = torch.prim.ListConstruct %40323 : (!torch.int) -> !torch.list<int>
    %40325 = torch.aten.view %40265, %40324 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40325, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37821 = torch.constant.int 4
    %40326 = torch.aten.mul.int %int4_37821, %3095 : !torch.int, !torch.int -> !torch.int
    %40327 = torch.prim.ListConstruct %40326 : (!torch.int) -> !torch.list<int>
    %40328 = torch.aten.view %40266, %40327 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40328, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37822 = torch.constant.int 4
    %40329 = torch.aten.mul.int %int4_37822, %3095 : !torch.int, !torch.int -> !torch.int
    %40330 = torch.prim.ListConstruct %40329 : (!torch.int) -> !torch.list<int>
    %40331 = torch.aten.view %40267, %40330 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40331, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37823 = torch.constant.int 4
    %int16_37824 = torch.constant.int 16
    %int1_37825 = torch.constant.int 1
    %int128_37826 = torch.constant.int 128
    %40332 = torch.prim.ListConstruct %int4_37823, %3095, %int16_37824, %int1_37825, %int128_37826 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40333 = torch.aten.view %39921, %40332 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40333, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37827 = torch.constant.int 4
    %int16_37828 = torch.constant.int 16
    %int1_37829 = torch.constant.int 1
    %int128_37830 = torch.constant.int 128
    %40334 = torch.prim.ListConstruct %int4_37827, %3095, %int16_37828, %int1_37829, %int128_37830 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40335 = torch.aten.view %39923, %40334 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40335, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37831 = torch.constant.int 4
    %int16_37832 = torch.constant.int 16
    %int1_37833 = torch.constant.int 1
    %int128_37834 = torch.constant.int 128
    %40336 = torch.prim.ListConstruct %int4_37831, %3095, %int16_37832, %int1_37833, %int128_37834 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40337 = torch.aten.view %39925, %40336 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40337, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37835 = torch.constant.int 4
    %int16_37836 = torch.constant.int 16
    %int1_37837 = torch.constant.int 1
    %int128_37838 = torch.constant.int 128
    %40338 = torch.prim.ListConstruct %int4_37835, %3095, %int16_37836, %int1_37837, %int128_37838 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40339 = torch.aten.view %39927, %40338 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40339, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37839 = torch.constant.int 4
    %int16_37840 = torch.constant.int 16
    %int1_37841 = torch.constant.int 1
    %int128_37842 = torch.constant.int 128
    %40340 = torch.prim.ListConstruct %int4_37839, %3095, %int16_37840, %int1_37841, %int128_37842 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40341 = torch.aten.view %39929, %40340 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40341, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37843 = torch.constant.int 4
    %int16_37844 = torch.constant.int 16
    %int1_37845 = torch.constant.int 1
    %int128_37846 = torch.constant.int 128
    %40342 = torch.prim.ListConstruct %int4_37843, %3095, %int16_37844, %int1_37845, %int128_37846 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40343 = torch.aten.view %39931, %40342 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40343, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37847 = torch.constant.int 4
    %int16_37848 = torch.constant.int 16
    %int1_37849 = torch.constant.int 1
    %int128_37850 = torch.constant.int 128
    %40344 = torch.prim.ListConstruct %int4_37847, %3095, %int16_37848, %int1_37849, %int128_37850 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40345 = torch.aten.view %39933, %40344 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40345, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37851 = torch.constant.int 4
    %int16_37852 = torch.constant.int 16
    %int1_37853 = torch.constant.int 1
    %int128_37854 = torch.constant.int 128
    %40346 = torch.prim.ListConstruct %int4_37851, %3095, %int16_37852, %int1_37853, %int128_37854 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40347 = torch.aten.view %39935, %40346 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %40347, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_37855 = torch.constant.int 4
    %40348 = torch.aten.mul.int %int4_37855, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37856 = torch.constant.int 16
    %int1_37857 = torch.constant.int 1
    %int128_37858 = torch.constant.int 128
    %40349 = torch.prim.ListConstruct %40348, %int16_37856, %int1_37857, %int128_37858 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40350 = torch.aten.view %40333, %40349 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40350, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37859 = torch.constant.int 4
    %40351 = torch.aten.mul.int %int4_37859, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37860 = torch.constant.int 16
    %int1_37861 = torch.constant.int 1
    %int128_37862 = torch.constant.int 128
    %40352 = torch.prim.ListConstruct %40351, %int16_37860, %int1_37861, %int128_37862 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40353 = torch.aten.view %40335, %40352 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40353, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37863 = torch.constant.int 4
    %40354 = torch.aten.mul.int %int4_37863, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37864 = torch.constant.int 16
    %int1_37865 = torch.constant.int 1
    %int128_37866 = torch.constant.int 128
    %40355 = torch.prim.ListConstruct %40354, %int16_37864, %int1_37865, %int128_37866 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40356 = torch.aten.view %40337, %40355 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40356, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37867 = torch.constant.int 4
    %40357 = torch.aten.mul.int %int4_37867, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37868 = torch.constant.int 16
    %int1_37869 = torch.constant.int 1
    %int128_37870 = torch.constant.int 128
    %40358 = torch.prim.ListConstruct %40357, %int16_37868, %int1_37869, %int128_37870 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40359 = torch.aten.view %40339, %40358 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40359, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37871 = torch.constant.int 4
    %40360 = torch.aten.mul.int %int4_37871, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37872 = torch.constant.int 16
    %int1_37873 = torch.constant.int 1
    %int128_37874 = torch.constant.int 128
    %40361 = torch.prim.ListConstruct %40360, %int16_37872, %int1_37873, %int128_37874 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40362 = torch.aten.view %40341, %40361 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40362, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37875 = torch.constant.int 4
    %40363 = torch.aten.mul.int %int4_37875, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37876 = torch.constant.int 16
    %int1_37877 = torch.constant.int 1
    %int128_37878 = torch.constant.int 128
    %40364 = torch.prim.ListConstruct %40363, %int16_37876, %int1_37877, %int128_37878 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40365 = torch.aten.view %40343, %40364 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40365, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37879 = torch.constant.int 4
    %40366 = torch.aten.mul.int %int4_37879, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37880 = torch.constant.int 16
    %int1_37881 = torch.constant.int 1
    %int128_37882 = torch.constant.int 128
    %40367 = torch.prim.ListConstruct %40366, %int16_37880, %int1_37881, %int128_37882 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40368 = torch.aten.view %40345, %40367 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40368, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_37883 = torch.constant.int 4
    %40369 = torch.aten.mul.int %int4_37883, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_37884 = torch.constant.int 16
    %int1_37885 = torch.constant.int 1
    %int128_37886 = torch.constant.int 128
    %40370 = torch.prim.ListConstruct %40369, %int16_37884, %int1_37885, %int128_37886 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40371 = torch.aten.view %40347, %40370 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40371, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_37887 = torch.constant.int 1
    %int1_37888 = torch.constant.int 1
    %40372 = torch.aten.add.Scalar %40260, %int1_37887, %int1_37888 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40372, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37889 = torch.constant.int 1
    %int1_37890 = torch.constant.int 1
    %40373 = torch.aten.add.Scalar %40261, %int1_37889, %int1_37890 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40373, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37891 = torch.constant.int 1
    %int1_37892 = torch.constant.int 1
    %40374 = torch.aten.add.Scalar %40262, %int1_37891, %int1_37892 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40374, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37893 = torch.constant.int 1
    %int1_37894 = torch.constant.int 1
    %40375 = torch.aten.add.Scalar %40263, %int1_37893, %int1_37894 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40375, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37895 = torch.constant.int 1
    %int1_37896 = torch.constant.int 1
    %40376 = torch.aten.add.Scalar %40264, %int1_37895, %int1_37896 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40376, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37897 = torch.constant.int 1
    %int1_37898 = torch.constant.int 1
    %40377 = torch.aten.add.Scalar %40265, %int1_37897, %int1_37898 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40377, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37899 = torch.constant.int 1
    %int1_37900 = torch.constant.int 1
    %40378 = torch.aten.add.Scalar %40266, %int1_37899, %int1_37900 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40378, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_37901 = torch.constant.int 1
    %int1_37902 = torch.constant.int 1
    %40379 = torch.aten.add.Scalar %40267, %int1_37901, %int1_37902 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %40379, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_37903 = torch.constant.int 4
    %40380 = torch.aten.mul.int %int4_37903, %3095 : !torch.int, !torch.int -> !torch.int
    %40381 = torch.prim.ListConstruct %40380 : (!torch.int) -> !torch.list<int>
    %40382 = torch.aten.view %40372, %40381 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40382, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37904 = torch.constant.int 4
    %40383 = torch.aten.mul.int %int4_37904, %3095 : !torch.int, !torch.int -> !torch.int
    %40384 = torch.prim.ListConstruct %40383 : (!torch.int) -> !torch.list<int>
    %40385 = torch.aten.view %40373, %40384 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40385, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37905 = torch.constant.int 4
    %40386 = torch.aten.mul.int %int4_37905, %3095 : !torch.int, !torch.int -> !torch.int
    %40387 = torch.prim.ListConstruct %40386 : (!torch.int) -> !torch.list<int>
    %40388 = torch.aten.view %40374, %40387 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40388, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37906 = torch.constant.int 4
    %40389 = torch.aten.mul.int %int4_37906, %3095 : !torch.int, !torch.int -> !torch.int
    %40390 = torch.prim.ListConstruct %40389 : (!torch.int) -> !torch.list<int>
    %40391 = torch.aten.view %40375, %40390 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40391, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37907 = torch.constant.int 4
    %40392 = torch.aten.mul.int %int4_37907, %3095 : !torch.int, !torch.int -> !torch.int
    %40393 = torch.prim.ListConstruct %40392 : (!torch.int) -> !torch.list<int>
    %40394 = torch.aten.view %40376, %40393 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40394, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37908 = torch.constant.int 4
    %40395 = torch.aten.mul.int %int4_37908, %3095 : !torch.int, !torch.int -> !torch.int
    %40396 = torch.prim.ListConstruct %40395 : (!torch.int) -> !torch.list<int>
    %40397 = torch.aten.view %40377, %40396 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40397, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37909 = torch.constant.int 4
    %40398 = torch.aten.mul.int %int4_37909, %3095 : !torch.int, !torch.int -> !torch.int
    %40399 = torch.prim.ListConstruct %40398 : (!torch.int) -> !torch.list<int>
    %40400 = torch.aten.view %40378, %40399 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40400, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_37910 = torch.constant.int 4
    %40401 = torch.aten.mul.int %int4_37910, %3095 : !torch.int, !torch.int -> !torch.int
    %40402 = torch.prim.ListConstruct %40401 : (!torch.int) -> !torch.list<int>
    %40403 = torch.aten.view %40379, %40402 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40403, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %40404 = torch.prim.ListConstruct %40310, %40382 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37911 = torch.constant.int 0
    %40405 = torch.aten.cat %40404, %int0_37911 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40405, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40406 = torch.prim.ListConstruct %40313, %40385 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37912 = torch.constant.int 0
    %40407 = torch.aten.cat %40406, %int0_37912 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40407, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40408 = torch.prim.ListConstruct %40316, %40388 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37913 = torch.constant.int 0
    %40409 = torch.aten.cat %40408, %int0_37913 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40409, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40410 = torch.prim.ListConstruct %40319, %40391 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37914 = torch.constant.int 0
    %40411 = torch.aten.cat %40410, %int0_37914 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40411, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40412 = torch.prim.ListConstruct %40322, %40394 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37915 = torch.constant.int 0
    %40413 = torch.aten.cat %40412, %int0_37915 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40413, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40414 = torch.prim.ListConstruct %40325, %40397 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37916 = torch.constant.int 0
    %40415 = torch.aten.cat %40414, %int0_37916 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40415, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40416 = torch.prim.ListConstruct %40328, %40400 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37917 = torch.constant.int 0
    %40417 = torch.aten.cat %40416, %int0_37917 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40417, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40418 = torch.prim.ListConstruct %40331, %40403 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_37918 = torch.constant.int 0
    %40419 = torch.aten.cat %40418, %int0_37918 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %40419, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %40420 = torch.prim.ListConstruct %40286, %40350 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37919 = torch.constant.int 0
    %40421 = torch.aten.cat %40420, %int0_37919 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40421, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40422 = torch.prim.ListConstruct %40289, %40353 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37920 = torch.constant.int 0
    %40423 = torch.aten.cat %40422, %int0_37920 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40423, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40424 = torch.prim.ListConstruct %40292, %40356 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37921 = torch.constant.int 0
    %40425 = torch.aten.cat %40424, %int0_37921 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40425, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40426 = torch.prim.ListConstruct %40295, %40359 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37922 = torch.constant.int 0
    %40427 = torch.aten.cat %40426, %int0_37922 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40427, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40428 = torch.prim.ListConstruct %40298, %40362 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37923 = torch.constant.int 0
    %40429 = torch.aten.cat %40428, %int0_37923 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40429, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40430 = torch.prim.ListConstruct %40301, %40365 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37924 = torch.constant.int 0
    %40431 = torch.aten.cat %40430, %int0_37924 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40431, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40432 = torch.prim.ListConstruct %40304, %40368 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37925 = torch.constant.int 0
    %40433 = torch.aten.cat %40432, %int0_37925 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40433, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40434 = torch.prim.ListConstruct %40307, %40371 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_37926 = torch.constant.int 0
    %40435 = torch.aten.cat %40434, %int0_37926 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40435, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_37927 = torch.constant.int 32
    %int2_37928 = torch.constant.int 2
    %int16_37929 = torch.constant.int 16
    %int1_37930 = torch.constant.int 1
    %int128_37931 = torch.constant.int 128
    %40436 = torch.prim.ListConstruct %3023, %int32_37927, %int2_37928, %int16_37929, %int1_37930, %int128_37931 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40437 = torch.aten.view %38586, %40436 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40437, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_37932 = torch.constant.int 32
    %40438 = torch.aten.mul.int %3023, %int32_37932 : !torch.int, !torch.int -> !torch.int
    %int2_37933 = torch.constant.int 2
    %40439 = torch.aten.mul.int %40438, %int2_37933 : !torch.int, !torch.int -> !torch.int
    %int16_37934 = torch.constant.int 16
    %int1_37935 = torch.constant.int 1
    %int128_37936 = torch.constant.int 128
    %40440 = torch.prim.ListConstruct %40439, %int16_37934, %int1_37935, %int128_37936 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40441 = torch.aten.view %40437, %40440 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40441, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40442 = torch.prim.ListConstruct %40405 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_37937 = torch.constant.bool false
    %40443 = torch.aten.index_put %40441, %40442, %40421, %false_37937 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40443, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_37938 = torch.constant.int 32
    %int2_37939 = torch.constant.int 2
    %int16_37940 = torch.constant.int 16
    %int1_37941 = torch.constant.int 1
    %int128_37942 = torch.constant.int 128
    %40444 = torch.prim.ListConstruct %3023, %int32_37938, %int2_37939, %int16_37940, %int1_37941, %int128_37942 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40445 = torch.aten.view %40443, %40444 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40445, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_37943 = torch.constant.int 131072
    %40446 = torch.prim.ListConstruct %3023, %int131072_37943 : (!torch.int, !torch.int) -> !torch.list<int>
    %40447 = torch.aten.view %40445, %40446 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40447, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_37944 = torch.constant.int 32
    %int2_37945 = torch.constant.int 2
    %int16_37946 = torch.constant.int 16
    %int1_37947 = torch.constant.int 1
    %int128_37948 = torch.constant.int 128
    %40448 = torch.prim.ListConstruct %3026, %int32_37944, %int2_37945, %int16_37946, %int1_37947, %int128_37948 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40449 = torch.aten.view %38598, %40448 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40449, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_37949 = torch.constant.int 32
    %40450 = torch.aten.mul.int %3026, %int32_37949 : !torch.int, !torch.int -> !torch.int
    %int2_37950 = torch.constant.int 2
    %40451 = torch.aten.mul.int %40450, %int2_37950 : !torch.int, !torch.int -> !torch.int
    %int16_37951 = torch.constant.int 16
    %int1_37952 = torch.constant.int 1
    %int128_37953 = torch.constant.int 128
    %40452 = torch.prim.ListConstruct %40451, %int16_37951, %int1_37952, %int128_37953 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40453 = torch.aten.view %40449, %40452 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40453, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40454 = torch.prim.ListConstruct %40407 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_37954 = torch.constant.bool false
    %40455 = torch.aten.index_put %40453, %40454, %40423, %false_37954 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40455, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_37955 = torch.constant.int 32
    %int2_37956 = torch.constant.int 2
    %int16_37957 = torch.constant.int 16
    %int1_37958 = torch.constant.int 1
    %int128_37959 = torch.constant.int 128
    %40456 = torch.prim.ListConstruct %3026, %int32_37955, %int2_37956, %int16_37957, %int1_37958, %int128_37959 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40457 = torch.aten.view %40455, %40456 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40457, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_37960 = torch.constant.int 131072
    %40458 = torch.prim.ListConstruct %3026, %int131072_37960 : (!torch.int, !torch.int) -> !torch.list<int>
    %40459 = torch.aten.view %40457, %40458 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40459, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_37961 = torch.constant.int 32
    %int2_37962 = torch.constant.int 2
    %int16_37963 = torch.constant.int 16
    %int1_37964 = torch.constant.int 1
    %int128_37965 = torch.constant.int 128
    %40460 = torch.prim.ListConstruct %3029, %int32_37961, %int2_37962, %int16_37963, %int1_37964, %int128_37965 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40461 = torch.aten.view %38610, %40460 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40461, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_37966 = torch.constant.int 32
    %40462 = torch.aten.mul.int %3029, %int32_37966 : !torch.int, !torch.int -> !torch.int
    %int2_37967 = torch.constant.int 2
    %40463 = torch.aten.mul.int %40462, %int2_37967 : !torch.int, !torch.int -> !torch.int
    %int16_37968 = torch.constant.int 16
    %int1_37969 = torch.constant.int 1
    %int128_37970 = torch.constant.int 128
    %40464 = torch.prim.ListConstruct %40463, %int16_37968, %int1_37969, %int128_37970 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40465 = torch.aten.view %40461, %40464 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40465, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40466 = torch.prim.ListConstruct %40409 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_37971 = torch.constant.bool false
    %40467 = torch.aten.index_put %40465, %40466, %40425, %false_37971 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40467, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_37972 = torch.constant.int 32
    %int2_37973 = torch.constant.int 2
    %int16_37974 = torch.constant.int 16
    %int1_37975 = torch.constant.int 1
    %int128_37976 = torch.constant.int 128
    %40468 = torch.prim.ListConstruct %3029, %int32_37972, %int2_37973, %int16_37974, %int1_37975, %int128_37976 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40469 = torch.aten.view %40467, %40468 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40469, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_37977 = torch.constant.int 131072
    %40470 = torch.prim.ListConstruct %3029, %int131072_37977 : (!torch.int, !torch.int) -> !torch.list<int>
    %40471 = torch.aten.view %40469, %40470 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40471, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_37978 = torch.constant.int 32
    %int2_37979 = torch.constant.int 2
    %int16_37980 = torch.constant.int 16
    %int1_37981 = torch.constant.int 1
    %int128_37982 = torch.constant.int 128
    %40472 = torch.prim.ListConstruct %3032, %int32_37978, %int2_37979, %int16_37980, %int1_37981, %int128_37982 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40473 = torch.aten.view %38622, %40472 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40473, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_37983 = torch.constant.int 32
    %40474 = torch.aten.mul.int %3032, %int32_37983 : !torch.int, !torch.int -> !torch.int
    %int2_37984 = torch.constant.int 2
    %40475 = torch.aten.mul.int %40474, %int2_37984 : !torch.int, !torch.int -> !torch.int
    %int16_37985 = torch.constant.int 16
    %int1_37986 = torch.constant.int 1
    %int128_37987 = torch.constant.int 128
    %40476 = torch.prim.ListConstruct %40475, %int16_37985, %int1_37986, %int128_37987 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40477 = torch.aten.view %40473, %40476 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40477, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40478 = torch.prim.ListConstruct %40411 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_37988 = torch.constant.bool false
    %40479 = torch.aten.index_put %40477, %40478, %40427, %false_37988 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40479, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_37989 = torch.constant.int 32
    %int2_37990 = torch.constant.int 2
    %int16_37991 = torch.constant.int 16
    %int1_37992 = torch.constant.int 1
    %int128_37993 = torch.constant.int 128
    %40480 = torch.prim.ListConstruct %3032, %int32_37989, %int2_37990, %int16_37991, %int1_37992, %int128_37993 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40481 = torch.aten.view %40479, %40480 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40481, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_37994 = torch.constant.int 131072
    %40482 = torch.prim.ListConstruct %3032, %int131072_37994 : (!torch.int, !torch.int) -> !torch.list<int>
    %40483 = torch.aten.view %40481, %40482 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40483, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_37995 = torch.constant.int 32
    %int2_37996 = torch.constant.int 2
    %int16_37997 = torch.constant.int 16
    %int1_37998 = torch.constant.int 1
    %int128_37999 = torch.constant.int 128
    %40484 = torch.prim.ListConstruct %3035, %int32_37995, %int2_37996, %int16_37997, %int1_37998, %int128_37999 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40485 = torch.aten.view %38634, %40484 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40485, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_38000 = torch.constant.int 32
    %40486 = torch.aten.mul.int %3035, %int32_38000 : !torch.int, !torch.int -> !torch.int
    %int2_38001 = torch.constant.int 2
    %40487 = torch.aten.mul.int %40486, %int2_38001 : !torch.int, !torch.int -> !torch.int
    %int16_38002 = torch.constant.int 16
    %int1_38003 = torch.constant.int 1
    %int128_38004 = torch.constant.int 128
    %40488 = torch.prim.ListConstruct %40487, %int16_38002, %int1_38003, %int128_38004 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40489 = torch.aten.view %40485, %40488 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40489, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40490 = torch.prim.ListConstruct %40413 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_38005 = torch.constant.bool false
    %40491 = torch.aten.index_put %40489, %40490, %40429, %false_38005 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40491, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_38006 = torch.constant.int 32
    %int2_38007 = torch.constant.int 2
    %int16_38008 = torch.constant.int 16
    %int1_38009 = torch.constant.int 1
    %int128_38010 = torch.constant.int 128
    %40492 = torch.prim.ListConstruct %3035, %int32_38006, %int2_38007, %int16_38008, %int1_38009, %int128_38010 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40493 = torch.aten.view %40491, %40492 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40493, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_38011 = torch.constant.int 131072
    %40494 = torch.prim.ListConstruct %3035, %int131072_38011 : (!torch.int, !torch.int) -> !torch.list<int>
    %40495 = torch.aten.view %40493, %40494 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40495, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_38012 = torch.constant.int 32
    %int2_38013 = torch.constant.int 2
    %int16_38014 = torch.constant.int 16
    %int1_38015 = torch.constant.int 1
    %int128_38016 = torch.constant.int 128
    %40496 = torch.prim.ListConstruct %3038, %int32_38012, %int2_38013, %int16_38014, %int1_38015, %int128_38016 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40497 = torch.aten.view %38646, %40496 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40497, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_38017 = torch.constant.int 32
    %40498 = torch.aten.mul.int %3038, %int32_38017 : !torch.int, !torch.int -> !torch.int
    %int2_38018 = torch.constant.int 2
    %40499 = torch.aten.mul.int %40498, %int2_38018 : !torch.int, !torch.int -> !torch.int
    %int16_38019 = torch.constant.int 16
    %int1_38020 = torch.constant.int 1
    %int128_38021 = torch.constant.int 128
    %40500 = torch.prim.ListConstruct %40499, %int16_38019, %int1_38020, %int128_38021 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40501 = torch.aten.view %40497, %40500 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40501, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40502 = torch.prim.ListConstruct %40415 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_38022 = torch.constant.bool false
    %40503 = torch.aten.index_put %40501, %40502, %40431, %false_38022 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40503, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_38023 = torch.constant.int 32
    %int2_38024 = torch.constant.int 2
    %int16_38025 = torch.constant.int 16
    %int1_38026 = torch.constant.int 1
    %int128_38027 = torch.constant.int 128
    %40504 = torch.prim.ListConstruct %3038, %int32_38023, %int2_38024, %int16_38025, %int1_38026, %int128_38027 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40505 = torch.aten.view %40503, %40504 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40505, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_38028 = torch.constant.int 131072
    %40506 = torch.prim.ListConstruct %3038, %int131072_38028 : (!torch.int, !torch.int) -> !torch.list<int>
    %40507 = torch.aten.view %40505, %40506 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40507, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_38029 = torch.constant.int 32
    %int2_38030 = torch.constant.int 2
    %int16_38031 = torch.constant.int 16
    %int1_38032 = torch.constant.int 1
    %int128_38033 = torch.constant.int 128
    %40508 = torch.prim.ListConstruct %3041, %int32_38029, %int2_38030, %int16_38031, %int1_38032, %int128_38033 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40509 = torch.aten.view %38658, %40508 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40509, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_38034 = torch.constant.int 32
    %40510 = torch.aten.mul.int %3041, %int32_38034 : !torch.int, !torch.int -> !torch.int
    %int2_38035 = torch.constant.int 2
    %40511 = torch.aten.mul.int %40510, %int2_38035 : !torch.int, !torch.int -> !torch.int
    %int16_38036 = torch.constant.int 16
    %int1_38037 = torch.constant.int 1
    %int128_38038 = torch.constant.int 128
    %40512 = torch.prim.ListConstruct %40511, %int16_38036, %int1_38037, %int128_38038 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40513 = torch.aten.view %40509, %40512 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40513, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40514 = torch.prim.ListConstruct %40417 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_38039 = torch.constant.bool false
    %40515 = torch.aten.index_put %40513, %40514, %40433, %false_38039 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40515, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_38040 = torch.constant.int 32
    %int2_38041 = torch.constant.int 2
    %int16_38042 = torch.constant.int 16
    %int1_38043 = torch.constant.int 1
    %int128_38044 = torch.constant.int 128
    %40516 = torch.prim.ListConstruct %3041, %int32_38040, %int2_38041, %int16_38042, %int1_38043, %int128_38044 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40517 = torch.aten.view %40515, %40516 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40517, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_38045 = torch.constant.int 131072
    %40518 = torch.prim.ListConstruct %3041, %int131072_38045 : (!torch.int, !torch.int) -> !torch.list<int>
    %40519 = torch.aten.view %40517, %40518 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40519, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_38046 = torch.constant.int 32
    %int2_38047 = torch.constant.int 2
    %int16_38048 = torch.constant.int 16
    %int1_38049 = torch.constant.int 1
    %int128_38050 = torch.constant.int 128
    %40520 = torch.prim.ListConstruct %3044, %int32_38046, %int2_38047, %int16_38048, %int1_38049, %int128_38050 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40521 = torch.aten.view %38670, %40520 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40521, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_38051 = torch.constant.int 32
    %40522 = torch.aten.mul.int %3044, %int32_38051 : !torch.int, !torch.int -> !torch.int
    %int2_38052 = torch.constant.int 2
    %40523 = torch.aten.mul.int %40522, %int2_38052 : !torch.int, !torch.int -> !torch.int
    %int16_38053 = torch.constant.int 16
    %int1_38054 = torch.constant.int 1
    %int128_38055 = torch.constant.int 128
    %40524 = torch.prim.ListConstruct %40523, %int16_38053, %int1_38054, %int128_38055 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40525 = torch.aten.view %40521, %40524 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40525, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %40526 = torch.prim.ListConstruct %40419 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_38056 = torch.constant.bool false
    %40527 = torch.aten.index_put %40525, %40526, %40435, %false_38056 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %40527, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_38057 = torch.constant.int 32
    %int2_38058 = torch.constant.int 2
    %int16_38059 = torch.constant.int 16
    %int1_38060 = torch.constant.int 1
    %int128_38061 = torch.constant.int 128
    %40528 = torch.prim.ListConstruct %3044, %int32_38057, %int2_38058, %int16_38059, %int1_38060, %int128_38061 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40529 = torch.aten.view %40527, %40528 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %40529, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_38062 = torch.constant.int 131072
    %40530 = torch.prim.ListConstruct %3044, %int131072_38062 : (!torch.int, !torch.int) -> !torch.list<int>
    %40531 = torch.aten.view %40529, %40530 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %40531, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_38063 = torch.constant.int -2
    %40532 = torch.aten.unsqueeze %40146, %int-2_38063 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38064 = torch.constant.int -2
    %40533 = torch.aten.unsqueeze %40161, %int-2_38064 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38065 = torch.constant.int -2
    %40534 = torch.aten.unsqueeze %40176, %int-2_38065 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38066 = torch.constant.int -2
    %40535 = torch.aten.unsqueeze %40191, %int-2_38066 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38067 = torch.constant.int -2
    %40536 = torch.aten.unsqueeze %40206, %int-2_38067 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38068 = torch.constant.int -2
    %40537 = torch.aten.unsqueeze %40221, %int-2_38068 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38069 = torch.constant.int -2
    %40538 = torch.aten.unsqueeze %40236, %int-2_38069 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38070 = torch.constant.int -2
    %40539 = torch.aten.unsqueeze %40251, %int-2_38070 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_38071 = torch.constant.int 4
    %int1_38072 = torch.constant.int 1
    %int4_38073 = torch.constant.int 4
    %int128_38074 = torch.constant.int 128
    %40540 = torch.prim.ListConstruct %int4_38071, %40132, %int1_38072, %int4_38073, %int128_38074 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38075 = torch.constant.bool false
    %40541 = torch.aten.expand %40532, %40540, %false_38075 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38076 = torch.constant.int 4
    %int1_38077 = torch.constant.int 1
    %int4_38078 = torch.constant.int 4
    %int128_38079 = torch.constant.int 128
    %40542 = torch.prim.ListConstruct %int4_38076, %40132, %int1_38077, %int4_38078, %int128_38079 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38080 = torch.constant.bool false
    %40543 = torch.aten.expand %40533, %40542, %false_38080 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38081 = torch.constant.int 4
    %int1_38082 = torch.constant.int 1
    %int4_38083 = torch.constant.int 4
    %int128_38084 = torch.constant.int 128
    %40544 = torch.prim.ListConstruct %int4_38081, %40132, %int1_38082, %int4_38083, %int128_38084 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38085 = torch.constant.bool false
    %40545 = torch.aten.expand %40534, %40544, %false_38085 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38086 = torch.constant.int 4
    %int1_38087 = torch.constant.int 1
    %int4_38088 = torch.constant.int 4
    %int128_38089 = torch.constant.int 128
    %40546 = torch.prim.ListConstruct %int4_38086, %40132, %int1_38087, %int4_38088, %int128_38089 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38090 = torch.constant.bool false
    %40547 = torch.aten.expand %40535, %40546, %false_38090 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38091 = torch.constant.int 4
    %int1_38092 = torch.constant.int 1
    %int4_38093 = torch.constant.int 4
    %int128_38094 = torch.constant.int 128
    %40548 = torch.prim.ListConstruct %int4_38091, %40132, %int1_38092, %int4_38093, %int128_38094 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38095 = torch.constant.bool false
    %40549 = torch.aten.expand %40536, %40548, %false_38095 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38096 = torch.constant.int 4
    %int1_38097 = torch.constant.int 1
    %int4_38098 = torch.constant.int 4
    %int128_38099 = torch.constant.int 128
    %40550 = torch.prim.ListConstruct %int4_38096, %40132, %int1_38097, %int4_38098, %int128_38099 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38100 = torch.constant.bool false
    %40551 = torch.aten.expand %40537, %40550, %false_38100 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38101 = torch.constant.int 4
    %int1_38102 = torch.constant.int 1
    %int4_38103 = torch.constant.int 4
    %int128_38104 = torch.constant.int 128
    %40552 = torch.prim.ListConstruct %int4_38101, %40132, %int1_38102, %int4_38103, %int128_38104 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38105 = torch.constant.bool false
    %40553 = torch.aten.expand %40538, %40552, %false_38105 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38106 = torch.constant.int 4
    %int1_38107 = torch.constant.int 1
    %int4_38108 = torch.constant.int 4
    %int128_38109 = torch.constant.int 128
    %40554 = torch.prim.ListConstruct %int4_38106, %40132, %int1_38107, %int4_38108, %int128_38109 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38110 = torch.constant.bool false
    %40555 = torch.aten.expand %40539, %40554, %false_38110 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38111 = torch.constant.int 4
    %int4_38112 = torch.constant.int 4
    %int128_38113 = torch.constant.int 128
    %40556 = torch.prim.ListConstruct %int4_38111, %40132, %int4_38112, %int128_38113 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40557 = torch.aten.view %40541, %40556 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38114 = torch.constant.int 4
    %int4_38115 = torch.constant.int 4
    %int128_38116 = torch.constant.int 128
    %40558 = torch.prim.ListConstruct %int4_38114, %40132, %int4_38115, %int128_38116 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40559 = torch.aten.view %40543, %40558 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38117 = torch.constant.int 4
    %int4_38118 = torch.constant.int 4
    %int128_38119 = torch.constant.int 128
    %40560 = torch.prim.ListConstruct %int4_38117, %40132, %int4_38118, %int128_38119 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40561 = torch.aten.view %40545, %40560 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38120 = torch.constant.int 4
    %int4_38121 = torch.constant.int 4
    %int128_38122 = torch.constant.int 128
    %40562 = torch.prim.ListConstruct %int4_38120, %40132, %int4_38121, %int128_38122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40563 = torch.aten.view %40547, %40562 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38123 = torch.constant.int 4
    %int4_38124 = torch.constant.int 4
    %int128_38125 = torch.constant.int 128
    %40564 = torch.prim.ListConstruct %int4_38123, %40132, %int4_38124, %int128_38125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40565 = torch.aten.view %40549, %40564 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38126 = torch.constant.int 4
    %int4_38127 = torch.constant.int 4
    %int128_38128 = torch.constant.int 128
    %40566 = torch.prim.ListConstruct %int4_38126, %40132, %int4_38127, %int128_38128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40567 = torch.aten.view %40551, %40566 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38129 = torch.constant.int 4
    %int4_38130 = torch.constant.int 4
    %int128_38131 = torch.constant.int 128
    %40568 = torch.prim.ListConstruct %int4_38129, %40132, %int4_38130, %int128_38131 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40569 = torch.aten.view %40553, %40568 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38132 = torch.constant.int 4
    %int4_38133 = torch.constant.int 4
    %int128_38134 = torch.constant.int 128
    %40570 = torch.prim.ListConstruct %int4_38132, %40132, %int4_38133, %int128_38134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40571 = torch.aten.view %40555, %40570 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_38135 = torch.constant.int -2
    %40572 = torch.aten.unsqueeze %39921, %int-2_38135 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38136 = torch.constant.int -2
    %40573 = torch.aten.unsqueeze %39923, %int-2_38136 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38137 = torch.constant.int -2
    %40574 = torch.aten.unsqueeze %39925, %int-2_38137 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38138 = torch.constant.int -2
    %40575 = torch.aten.unsqueeze %39927, %int-2_38138 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38139 = torch.constant.int -2
    %40576 = torch.aten.unsqueeze %39929, %int-2_38139 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38140 = torch.constant.int -2
    %40577 = torch.aten.unsqueeze %39931, %int-2_38140 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38141 = torch.constant.int -2
    %40578 = torch.aten.unsqueeze %39933, %int-2_38141 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_38142 = torch.constant.int -2
    %40579 = torch.aten.unsqueeze %39935, %int-2_38142 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %40579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_38143 = torch.constant.int 1
    %40580 = torch.aten.size.int %39845, %int1_38143 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_38144 = torch.constant.int 4
    %int1_38145 = torch.constant.int 1
    %int4_38146 = torch.constant.int 4
    %int128_38147 = torch.constant.int 128
    %40581 = torch.prim.ListConstruct %int4_38144, %40580, %int1_38145, %int4_38146, %int128_38147 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38148 = torch.constant.bool false
    %40582 = torch.aten.expand %40572, %40581, %false_38148 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38149 = torch.constant.int 4
    %int1_38150 = torch.constant.int 1
    %int4_38151 = torch.constant.int 4
    %int128_38152 = torch.constant.int 128
    %40583 = torch.prim.ListConstruct %int4_38149, %40580, %int1_38150, %int4_38151, %int128_38152 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38153 = torch.constant.bool false
    %40584 = torch.aten.expand %40573, %40583, %false_38153 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38154 = torch.constant.int 4
    %int1_38155 = torch.constant.int 1
    %int4_38156 = torch.constant.int 4
    %int128_38157 = torch.constant.int 128
    %40585 = torch.prim.ListConstruct %int4_38154, %40580, %int1_38155, %int4_38156, %int128_38157 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38158 = torch.constant.bool false
    %40586 = torch.aten.expand %40574, %40585, %false_38158 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38159 = torch.constant.int 4
    %int1_38160 = torch.constant.int 1
    %int4_38161 = torch.constant.int 4
    %int128_38162 = torch.constant.int 128
    %40587 = torch.prim.ListConstruct %int4_38159, %40580, %int1_38160, %int4_38161, %int128_38162 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38163 = torch.constant.bool false
    %40588 = torch.aten.expand %40575, %40587, %false_38163 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38164 = torch.constant.int 4
    %int1_38165 = torch.constant.int 1
    %int4_38166 = torch.constant.int 4
    %int128_38167 = torch.constant.int 128
    %40589 = torch.prim.ListConstruct %int4_38164, %40580, %int1_38165, %int4_38166, %int128_38167 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38168 = torch.constant.bool false
    %40590 = torch.aten.expand %40576, %40589, %false_38168 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38169 = torch.constant.int 4
    %int1_38170 = torch.constant.int 1
    %int4_38171 = torch.constant.int 4
    %int128_38172 = torch.constant.int 128
    %40591 = torch.prim.ListConstruct %int4_38169, %40580, %int1_38170, %int4_38171, %int128_38172 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38173 = torch.constant.bool false
    %40592 = torch.aten.expand %40577, %40591, %false_38173 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38174 = torch.constant.int 4
    %int1_38175 = torch.constant.int 1
    %int4_38176 = torch.constant.int 4
    %int128_38177 = torch.constant.int 128
    %40593 = torch.prim.ListConstruct %int4_38174, %40580, %int1_38175, %int4_38176, %int128_38177 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38178 = torch.constant.bool false
    %40594 = torch.aten.expand %40578, %40593, %false_38178 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38179 = torch.constant.int 4
    %int1_38180 = torch.constant.int 1
    %int4_38181 = torch.constant.int 4
    %int128_38182 = torch.constant.int 128
    %40595 = torch.prim.ListConstruct %int4_38179, %40580, %int1_38180, %int4_38181, %int128_38182 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_38183 = torch.constant.bool false
    %40596 = torch.aten.expand %40579, %40595, %false_38183 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %40596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_38184 = torch.constant.int 4
    %int4_38185 = torch.constant.int 4
    %int128_38186 = torch.constant.int 128
    %40597 = torch.prim.ListConstruct %int4_38184, %40580, %int4_38185, %int128_38186 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40598 = torch.aten.view %40582, %40597 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38187 = torch.constant.int 4
    %int4_38188 = torch.constant.int 4
    %int128_38189 = torch.constant.int 128
    %40599 = torch.prim.ListConstruct %int4_38187, %40580, %int4_38188, %int128_38189 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40600 = torch.aten.view %40584, %40599 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38190 = torch.constant.int 4
    %int4_38191 = torch.constant.int 4
    %int128_38192 = torch.constant.int 128
    %40601 = torch.prim.ListConstruct %int4_38190, %40580, %int4_38191, %int128_38192 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40602 = torch.aten.view %40586, %40601 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38193 = torch.constant.int 4
    %int4_38194 = torch.constant.int 4
    %int128_38195 = torch.constant.int 128
    %40603 = torch.prim.ListConstruct %int4_38193, %40580, %int4_38194, %int128_38195 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40604 = torch.aten.view %40588, %40603 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38196 = torch.constant.int 4
    %int4_38197 = torch.constant.int 4
    %int128_38198 = torch.constant.int 128
    %40605 = torch.prim.ListConstruct %int4_38196, %40580, %int4_38197, %int128_38198 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40606 = torch.aten.view %40590, %40605 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38199 = torch.constant.int 4
    %int4_38200 = torch.constant.int 4
    %int128_38201 = torch.constant.int 128
    %40607 = torch.prim.ListConstruct %int4_38199, %40580, %int4_38200, %int128_38201 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40608 = torch.aten.view %40592, %40607 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38202 = torch.constant.int 4
    %int4_38203 = torch.constant.int 4
    %int128_38204 = torch.constant.int 128
    %40609 = torch.prim.ListConstruct %int4_38202, %40580, %int4_38203, %int128_38204 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40610 = torch.aten.view %40594, %40609 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38205 = torch.constant.int 4
    %int4_38206 = torch.constant.int 4
    %int128_38207 = torch.constant.int 128
    %40611 = torch.prim.ListConstruct %int4_38205, %40580, %int4_38206, %int128_38207 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40612 = torch.aten.view %40596, %40611 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38208 = torch.constant.int 1
    %int2_38209 = torch.constant.int 2
    %40613 = torch.aten.transpose.int %39988, %int1_38208, %int2_38209 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40613, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38210 = torch.constant.int 1
    %int2_38211 = torch.constant.int 2
    %40614 = torch.aten.transpose.int %40003, %int1_38210, %int2_38211 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40614, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38212 = torch.constant.int 1
    %int2_38213 = torch.constant.int 2
    %40615 = torch.aten.transpose.int %40018, %int1_38212, %int2_38213 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40615, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38214 = torch.constant.int 1
    %int2_38215 = torch.constant.int 2
    %40616 = torch.aten.transpose.int %40033, %int1_38214, %int2_38215 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40616, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38216 = torch.constant.int 1
    %int2_38217 = torch.constant.int 2
    %40617 = torch.aten.transpose.int %40048, %int1_38216, %int2_38217 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40617, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38218 = torch.constant.int 1
    %int2_38219 = torch.constant.int 2
    %40618 = torch.aten.transpose.int %40063, %int1_38218, %int2_38219 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40618, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38220 = torch.constant.int 1
    %int2_38221 = torch.constant.int 2
    %40619 = torch.aten.transpose.int %40078, %int1_38220, %int2_38221 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40619, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38222 = torch.constant.int 1
    %int2_38223 = torch.constant.int 2
    %40620 = torch.aten.transpose.int %40093, %int1_38222, %int2_38223 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40620, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38224 = torch.constant.int 1
    %int2_38225 = torch.constant.int 2
    %40621 = torch.aten.transpose.int %40557, %int1_38224, %int2_38225 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40621, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38226 = torch.constant.int 1
    %int2_38227 = torch.constant.int 2
    %40622 = torch.aten.transpose.int %40559, %int1_38226, %int2_38227 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40622, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38228 = torch.constant.int 1
    %int2_38229 = torch.constant.int 2
    %40623 = torch.aten.transpose.int %40561, %int1_38228, %int2_38229 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40623, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38230 = torch.constant.int 1
    %int2_38231 = torch.constant.int 2
    %40624 = torch.aten.transpose.int %40563, %int1_38230, %int2_38231 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40624, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38232 = torch.constant.int 1
    %int2_38233 = torch.constant.int 2
    %40625 = torch.aten.transpose.int %40565, %int1_38232, %int2_38233 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40625, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38234 = torch.constant.int 1
    %int2_38235 = torch.constant.int 2
    %40626 = torch.aten.transpose.int %40567, %int1_38234, %int2_38235 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40626, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38236 = torch.constant.int 1
    %int2_38237 = torch.constant.int 2
    %40627 = torch.aten.transpose.int %40569, %int1_38236, %int2_38237 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40627, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38238 = torch.constant.int 1
    %int2_38239 = torch.constant.int 2
    %40628 = torch.aten.transpose.int %40571, %int1_38238, %int2_38239 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40628, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38240 = torch.constant.int 1
    %int2_38241 = torch.constant.int 2
    %40629 = torch.aten.transpose.int %40598, %int1_38240, %int2_38241 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40629, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38242 = torch.constant.int 1
    %int2_38243 = torch.constant.int 2
    %40630 = torch.aten.transpose.int %40600, %int1_38242, %int2_38243 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40630, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38244 = torch.constant.int 1
    %int2_38245 = torch.constant.int 2
    %40631 = torch.aten.transpose.int %40602, %int1_38244, %int2_38245 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40631, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38246 = torch.constant.int 1
    %int2_38247 = torch.constant.int 2
    %40632 = torch.aten.transpose.int %40604, %int1_38246, %int2_38247 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40632, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38248 = torch.constant.int 1
    %int2_38249 = torch.constant.int 2
    %40633 = torch.aten.transpose.int %40606, %int1_38248, %int2_38249 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40633, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38250 = torch.constant.int 1
    %int2_38251 = torch.constant.int 2
    %40634 = torch.aten.transpose.int %40608, %int1_38250, %int2_38251 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40634, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38252 = torch.constant.int 1
    %int2_38253 = torch.constant.int 2
    %40635 = torch.aten.transpose.int %40610, %int1_38252, %int2_38253 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40635, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38254 = torch.constant.int 1
    %int2_38255 = torch.constant.int 2
    %40636 = torch.aten.transpose.int %40612, %int1_38254, %int2_38255 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %40636, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38256 = torch.constant.float 0.000000e+00
    %true_38257 = torch.constant.bool true
    %none_38258 = torch.constant.none
    %none_38259 = torch.constant.none
    %40637:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40613, %40621, %40629, %float0.000000e00_38256, %true_38257, %none_38258, %none_38259) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40637#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38260 = torch.constant.float 0.000000e+00
    %true_38261 = torch.constant.bool true
    %none_38262 = torch.constant.none
    %none_38263 = torch.constant.none
    %40638:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40614, %40622, %40630, %float0.000000e00_38260, %true_38261, %none_38262, %none_38263) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40638#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38264 = torch.constant.float 0.000000e+00
    %true_38265 = torch.constant.bool true
    %none_38266 = torch.constant.none
    %none_38267 = torch.constant.none
    %40639:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40615, %40623, %40631, %float0.000000e00_38264, %true_38265, %none_38266, %none_38267) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40639#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38268 = torch.constant.float 0.000000e+00
    %true_38269 = torch.constant.bool true
    %none_38270 = torch.constant.none
    %none_38271 = torch.constant.none
    %40640:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40616, %40624, %40632, %float0.000000e00_38268, %true_38269, %none_38270, %none_38271) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40640#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38272 = torch.constant.float 0.000000e+00
    %true_38273 = torch.constant.bool true
    %none_38274 = torch.constant.none
    %none_38275 = torch.constant.none
    %40641:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40617, %40625, %40633, %float0.000000e00_38272, %true_38273, %none_38274, %none_38275) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40641#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38276 = torch.constant.float 0.000000e+00
    %true_38277 = torch.constant.bool true
    %none_38278 = torch.constant.none
    %none_38279 = torch.constant.none
    %40642:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40618, %40626, %40634, %float0.000000e00_38276, %true_38277, %none_38278, %none_38279) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40642#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38280 = torch.constant.float 0.000000e+00
    %true_38281 = torch.constant.bool true
    %none_38282 = torch.constant.none
    %none_38283 = torch.constant.none
    %40643:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40619, %40627, %40635, %float0.000000e00_38280, %true_38281, %none_38282, %none_38283) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40643#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_38284 = torch.constant.float 0.000000e+00
    %true_38285 = torch.constant.bool true
    %none_38286 = torch.constant.none
    %none_38287 = torch.constant.none
    %40644:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%40620, %40628, %40636, %float0.000000e00_38284, %true_38285, %none_38286, %none_38287) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %40644#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_38288 = torch.constant.int 1
    %int2_38289 = torch.constant.int 2
    %40645 = torch.aten.transpose.int %40637#0, %int1_38288, %int2_38289 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38290 = torch.constant.int 1
    %int2_38291 = torch.constant.int 2
    %40646 = torch.aten.transpose.int %40638#0, %int1_38290, %int2_38291 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38292 = torch.constant.int 1
    %int2_38293 = torch.constant.int 2
    %40647 = torch.aten.transpose.int %40639#0, %int1_38292, %int2_38293 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38294 = torch.constant.int 1
    %int2_38295 = torch.constant.int 2
    %40648 = torch.aten.transpose.int %40640#0, %int1_38294, %int2_38295 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38296 = torch.constant.int 1
    %int2_38297 = torch.constant.int 2
    %40649 = torch.aten.transpose.int %40641#0, %int1_38296, %int2_38297 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38298 = torch.constant.int 1
    %int2_38299 = torch.constant.int 2
    %40650 = torch.aten.transpose.int %40642#0, %int1_38298, %int2_38299 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38300 = torch.constant.int 1
    %int2_38301 = torch.constant.int 2
    %40651 = torch.aten.transpose.int %40643#0, %int1_38300, %int2_38301 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_38302 = torch.constant.int 1
    %int2_38303 = torch.constant.int 2
    %40652 = torch.aten.transpose.int %40644#0, %int1_38302, %int2_38303 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %40652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_38304 = torch.constant.int 4
    %int512_38305 = torch.constant.int 512
    %40653 = torch.prim.ListConstruct %int4_38304, %39974, %int512_38305 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40654 = torch.aten.view %40645, %40653 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38306 = torch.constant.int 4
    %int512_38307 = torch.constant.int 512
    %40655 = torch.prim.ListConstruct %int4_38306, %39989, %int512_38307 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40656 = torch.aten.view %40646, %40655 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38308 = torch.constant.int 4
    %int512_38309 = torch.constant.int 512
    %40657 = torch.prim.ListConstruct %int4_38308, %40004, %int512_38309 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40658 = torch.aten.view %40647, %40657 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38310 = torch.constant.int 4
    %int512_38311 = torch.constant.int 512
    %40659 = torch.prim.ListConstruct %int4_38310, %40019, %int512_38311 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40660 = torch.aten.view %40648, %40659 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38312 = torch.constant.int 4
    %int512_38313 = torch.constant.int 512
    %40661 = torch.prim.ListConstruct %int4_38312, %40034, %int512_38313 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40662 = torch.aten.view %40649, %40661 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38314 = torch.constant.int 4
    %int512_38315 = torch.constant.int 512
    %40663 = torch.prim.ListConstruct %int4_38314, %40049, %int512_38315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40664 = torch.aten.view %40650, %40663 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38316 = torch.constant.int 4
    %int512_38317 = torch.constant.int 512
    %40665 = torch.prim.ListConstruct %int4_38316, %40064, %int512_38317 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40666 = torch.aten.view %40651, %40665 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_38318 = torch.constant.int 4
    %int512_38319 = torch.constant.int 512
    %40667 = torch.prim.ListConstruct %int4_38318, %40079, %int512_38319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40668 = torch.aten.view %40652, %40667 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %40668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_38320 = torch.constant.int 1
    %int0_38321 = torch.constant.int 0
    %40669 = torch.prim.ListConstruct %int1_38320, %int0_38321 : (!torch.int, !torch.int) -> !torch.list<int>
    %40670 = torch.aten.permute %1480, %40669 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38322 = torch.constant.int 1
    %int0_38323 = torch.constant.int 0
    %40671 = torch.prim.ListConstruct %int1_38322, %int0_38323 : (!torch.int, !torch.int) -> !torch.list<int>
    %40672 = torch.aten.permute %1481, %40671 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38324 = torch.constant.int 1
    %int0_38325 = torch.constant.int 0
    %40673 = torch.prim.ListConstruct %int1_38324, %int0_38325 : (!torch.int, !torch.int) -> !torch.list<int>
    %40674 = torch.aten.permute %1482, %40673 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38326 = torch.constant.int 1
    %int0_38327 = torch.constant.int 0
    %40675 = torch.prim.ListConstruct %int1_38326, %int0_38327 : (!torch.int, !torch.int) -> !torch.list<int>
    %40676 = torch.aten.permute %1483, %40675 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38328 = torch.constant.int 1
    %int0_38329 = torch.constant.int 0
    %40677 = torch.prim.ListConstruct %int1_38328, %int0_38329 : (!torch.int, !torch.int) -> !torch.list<int>
    %40678 = torch.aten.permute %1484, %40677 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38330 = torch.constant.int 1
    %int0_38331 = torch.constant.int 0
    %40679 = torch.prim.ListConstruct %int1_38330, %int0_38331 : (!torch.int, !torch.int) -> !torch.list<int>
    %40680 = torch.aten.permute %1485, %40679 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38332 = torch.constant.int 1
    %int0_38333 = torch.constant.int 0
    %40681 = torch.prim.ListConstruct %int1_38332, %int0_38333 : (!torch.int, !torch.int) -> !torch.list<int>
    %40682 = torch.aten.permute %1486, %40681 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_38334 = torch.constant.int 1
    %int0_38335 = torch.constant.int 0
    %40683 = torch.prim.ListConstruct %int1_38334, %int0_38335 : (!torch.int, !torch.int) -> !torch.list<int>
    %40684 = torch.aten.permute %1487, %40683 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_38336 = torch.constant.int 4
    %40685 = torch.aten.mul.int %int4_38336, %39974 : !torch.int, !torch.int -> !torch.int
    %int512_38337 = torch.constant.int 512
    %40686 = torch.prim.ListConstruct %40685, %int512_38337 : (!torch.int, !torch.int) -> !torch.list<int>
    %40687 = torch.aten.view %40654, %40686 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40687, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40688 = torch.aten.mm %40687, %40670 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40688, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38338 = torch.constant.int 4
    %int4096_38339 = torch.constant.int 4096
    %40689 = torch.prim.ListConstruct %int4_38338, %39974, %int4096_38339 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40690 = torch.aten.view %40688, %40689 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38340 = torch.constant.int 4
    %40691 = torch.aten.mul.int %int4_38340, %39989 : !torch.int, !torch.int -> !torch.int
    %int512_38341 = torch.constant.int 512
    %40692 = torch.prim.ListConstruct %40691, %int512_38341 : (!torch.int, !torch.int) -> !torch.list<int>
    %40693 = torch.aten.view %40656, %40692 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40693, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40694 = torch.aten.mm %40693, %40672 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40694, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38342 = torch.constant.int 4
    %int4096_38343 = torch.constant.int 4096
    %40695 = torch.prim.ListConstruct %int4_38342, %39989, %int4096_38343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40696 = torch.aten.view %40694, %40695 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38344 = torch.constant.int 4
    %40697 = torch.aten.mul.int %int4_38344, %40004 : !torch.int, !torch.int -> !torch.int
    %int512_38345 = torch.constant.int 512
    %40698 = torch.prim.ListConstruct %40697, %int512_38345 : (!torch.int, !torch.int) -> !torch.list<int>
    %40699 = torch.aten.view %40658, %40698 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40699, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40700 = torch.aten.mm %40699, %40674 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40700, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38346 = torch.constant.int 4
    %int4096_38347 = torch.constant.int 4096
    %40701 = torch.prim.ListConstruct %int4_38346, %40004, %int4096_38347 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40702 = torch.aten.view %40700, %40701 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38348 = torch.constant.int 4
    %40703 = torch.aten.mul.int %int4_38348, %40019 : !torch.int, !torch.int -> !torch.int
    %int512_38349 = torch.constant.int 512
    %40704 = torch.prim.ListConstruct %40703, %int512_38349 : (!torch.int, !torch.int) -> !torch.list<int>
    %40705 = torch.aten.view %40660, %40704 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40705, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40706 = torch.aten.mm %40705, %40676 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40706, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38350 = torch.constant.int 4
    %int4096_38351 = torch.constant.int 4096
    %40707 = torch.prim.ListConstruct %int4_38350, %40019, %int4096_38351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40708 = torch.aten.view %40706, %40707 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38352 = torch.constant.int 4
    %40709 = torch.aten.mul.int %int4_38352, %40034 : !torch.int, !torch.int -> !torch.int
    %int512_38353 = torch.constant.int 512
    %40710 = torch.prim.ListConstruct %40709, %int512_38353 : (!torch.int, !torch.int) -> !torch.list<int>
    %40711 = torch.aten.view %40662, %40710 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40711, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40712 = torch.aten.mm %40711, %40678 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40712, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38354 = torch.constant.int 4
    %int4096_38355 = torch.constant.int 4096
    %40713 = torch.prim.ListConstruct %int4_38354, %40034, %int4096_38355 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40714 = torch.aten.view %40712, %40713 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38356 = torch.constant.int 4
    %40715 = torch.aten.mul.int %int4_38356, %40049 : !torch.int, !torch.int -> !torch.int
    %int512_38357 = torch.constant.int 512
    %40716 = torch.prim.ListConstruct %40715, %int512_38357 : (!torch.int, !torch.int) -> !torch.list<int>
    %40717 = torch.aten.view %40664, %40716 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40717, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40718 = torch.aten.mm %40717, %40680 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40718, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38358 = torch.constant.int 4
    %int4096_38359 = torch.constant.int 4096
    %40719 = torch.prim.ListConstruct %int4_38358, %40049, %int4096_38359 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40720 = torch.aten.view %40718, %40719 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38360 = torch.constant.int 4
    %40721 = torch.aten.mul.int %int4_38360, %40064 : !torch.int, !torch.int -> !torch.int
    %int512_38361 = torch.constant.int 512
    %40722 = torch.prim.ListConstruct %40721, %int512_38361 : (!torch.int, !torch.int) -> !torch.list<int>
    %40723 = torch.aten.view %40666, %40722 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40723, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40724 = torch.aten.mm %40723, %40682 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40724, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38362 = torch.constant.int 4
    %int4096_38363 = torch.constant.int 4096
    %40725 = torch.prim.ListConstruct %int4_38362, %40064, %int4096_38363 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40726 = torch.aten.view %40724, %40725 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_38364 = torch.constant.int 4
    %40727 = torch.aten.mul.int %int4_38364, %40079 : !torch.int, !torch.int -> !torch.int
    %int512_38365 = torch.constant.int 512
    %40728 = torch.prim.ListConstruct %40727, %int512_38365 : (!torch.int, !torch.int) -> !torch.list<int>
    %40729 = torch.aten.view %40668, %40728 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %40729, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %40730 = torch.aten.mm %40729, %40684 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %40730, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38366 = torch.constant.int 4
    %int4096_38367 = torch.constant.int 4096
    %40731 = torch.prim.ListConstruct %int4_38366, %40079, %int4096_38367 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %40732 = torch.aten.view %40730, %40731 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40733 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38368 = arith.constant 1 : index
    %dim_38369 = tensor.dim %40733, %c1_38368 : tensor<4x?x4096xf16>
    %40734 = flow.tensor.transfer %40733 : tensor<4x?x4096xf16>{%dim_38369} to #hal.device.promise<@__device_0>
    %40735 = torch_c.from_builtin_tensor %40734 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40736 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38370 = arith.constant 1 : index
    %dim_38371 = tensor.dim %40736, %c1_38370 : tensor<4x?x4096xf16>
    %40737 = flow.tensor.transfer %40736 : tensor<4x?x4096xf16>{%dim_38371} to #hal.device.promise<@__device_0>
    %40738 = torch_c.from_builtin_tensor %40737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40739 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38372 = arith.constant 1 : index
    %dim_38373 = tensor.dim %40739, %c1_38372 : tensor<4x?x4096xf16>
    %40740 = flow.tensor.transfer %40739 : tensor<4x?x4096xf16>{%dim_38373} to #hal.device.promise<@__device_0>
    %40741 = torch_c.from_builtin_tensor %40740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40742 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38374 = arith.constant 1 : index
    %dim_38375 = tensor.dim %40742, %c1_38374 : tensor<4x?x4096xf16>
    %40743 = flow.tensor.transfer %40742 : tensor<4x?x4096xf16>{%dim_38375} to #hal.device.promise<@__device_0>
    %40744 = torch_c.from_builtin_tensor %40743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40745 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38376 = arith.constant 1 : index
    %dim_38377 = tensor.dim %40745, %c1_38376 : tensor<4x?x4096xf16>
    %40746 = flow.tensor.transfer %40745 : tensor<4x?x4096xf16>{%dim_38377} to #hal.device.promise<@__device_0>
    %40747 = torch_c.from_builtin_tensor %40746 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40748 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38378 = arith.constant 1 : index
    %dim_38379 = tensor.dim %40748, %c1_38378 : tensor<4x?x4096xf16>
    %40749 = flow.tensor.transfer %40748 : tensor<4x?x4096xf16>{%dim_38379} to #hal.device.promise<@__device_0>
    %40750 = torch_c.from_builtin_tensor %40749 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40751 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38380 = arith.constant 1 : index
    %dim_38381 = tensor.dim %40751, %c1_38380 : tensor<4x?x4096xf16>
    %40752 = flow.tensor.transfer %40751 : tensor<4x?x4096xf16>{%dim_38381} to #hal.device.promise<@__device_0>
    %40753 = torch_c.from_builtin_tensor %40752 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38382 = torch.constant.int 1
    %40754 = torch.aten.add.Tensor %40690, %40735, %int1_38382 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38383 = torch.constant.int 1
    %40755 = torch.aten.add.Tensor %40754, %40738, %int1_38383 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38384 = torch.constant.int 1
    %40756 = torch.aten.add.Tensor %40755, %40741, %int1_38384 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38385 = torch.constant.int 1
    %40757 = torch.aten.add.Tensor %40756, %40744, %int1_38385 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38386 = torch.constant.int 1
    %40758 = torch.aten.add.Tensor %40757, %40747, %int1_38386 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38387 = torch.constant.int 1
    %40759 = torch.aten.add.Tensor %40758, %40750, %int1_38387 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38388 = torch.constant.int 1
    %40760 = torch.aten.add.Tensor %40759, %40753, %int1_38388 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40761 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38389 = arith.constant 1 : index
    %dim_38390 = tensor.dim %40761, %c1_38389 : tensor<4x?x4096xf16>
    %40762 = flow.tensor.transfer %40761 : tensor<4x?x4096xf16>{%dim_38390} to #hal.device.promise<@__device_1>
    %40763 = torch_c.from_builtin_tensor %40762 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40764 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38391 = arith.constant 1 : index
    %dim_38392 = tensor.dim %40764, %c1_38391 : tensor<4x?x4096xf16>
    %40765 = flow.tensor.transfer %40764 : tensor<4x?x4096xf16>{%dim_38392} to #hal.device.promise<@__device_1>
    %40766 = torch_c.from_builtin_tensor %40765 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40767 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38393 = arith.constant 1 : index
    %dim_38394 = tensor.dim %40767, %c1_38393 : tensor<4x?x4096xf16>
    %40768 = flow.tensor.transfer %40767 : tensor<4x?x4096xf16>{%dim_38394} to #hal.device.promise<@__device_1>
    %40769 = torch_c.from_builtin_tensor %40768 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40770 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38395 = arith.constant 1 : index
    %dim_38396 = tensor.dim %40770, %c1_38395 : tensor<4x?x4096xf16>
    %40771 = flow.tensor.transfer %40770 : tensor<4x?x4096xf16>{%dim_38396} to #hal.device.promise<@__device_1>
    %40772 = torch_c.from_builtin_tensor %40771 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40773 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38397 = arith.constant 1 : index
    %dim_38398 = tensor.dim %40773, %c1_38397 : tensor<4x?x4096xf16>
    %40774 = flow.tensor.transfer %40773 : tensor<4x?x4096xf16>{%dim_38398} to #hal.device.promise<@__device_1>
    %40775 = torch_c.from_builtin_tensor %40774 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40776 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38399 = arith.constant 1 : index
    %dim_38400 = tensor.dim %40776, %c1_38399 : tensor<4x?x4096xf16>
    %40777 = flow.tensor.transfer %40776 : tensor<4x?x4096xf16>{%dim_38400} to #hal.device.promise<@__device_1>
    %40778 = torch_c.from_builtin_tensor %40777 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40779 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38401 = arith.constant 1 : index
    %dim_38402 = tensor.dim %40779, %c1_38401 : tensor<4x?x4096xf16>
    %40780 = flow.tensor.transfer %40779 : tensor<4x?x4096xf16>{%dim_38402} to #hal.device.promise<@__device_1>
    %40781 = torch_c.from_builtin_tensor %40780 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38403 = torch.constant.int 1
    %40782 = torch.aten.add.Tensor %40763, %40696, %int1_38403 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38404 = torch.constant.int 1
    %40783 = torch.aten.add.Tensor %40782, %40766, %int1_38404 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38405 = torch.constant.int 1
    %40784 = torch.aten.add.Tensor %40783, %40769, %int1_38405 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38406 = torch.constant.int 1
    %40785 = torch.aten.add.Tensor %40784, %40772, %int1_38406 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38407 = torch.constant.int 1
    %40786 = torch.aten.add.Tensor %40785, %40775, %int1_38407 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38408 = torch.constant.int 1
    %40787 = torch.aten.add.Tensor %40786, %40778, %int1_38408 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38409 = torch.constant.int 1
    %40788 = torch.aten.add.Tensor %40787, %40781, %int1_38409 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40789 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38410 = arith.constant 1 : index
    %dim_38411 = tensor.dim %40789, %c1_38410 : tensor<4x?x4096xf16>
    %40790 = flow.tensor.transfer %40789 : tensor<4x?x4096xf16>{%dim_38411} to #hal.device.promise<@__device_2>
    %40791 = torch_c.from_builtin_tensor %40790 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40792 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38412 = arith.constant 1 : index
    %dim_38413 = tensor.dim %40792, %c1_38412 : tensor<4x?x4096xf16>
    %40793 = flow.tensor.transfer %40792 : tensor<4x?x4096xf16>{%dim_38413} to #hal.device.promise<@__device_2>
    %40794 = torch_c.from_builtin_tensor %40793 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40795 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38414 = arith.constant 1 : index
    %dim_38415 = tensor.dim %40795, %c1_38414 : tensor<4x?x4096xf16>
    %40796 = flow.tensor.transfer %40795 : tensor<4x?x4096xf16>{%dim_38415} to #hal.device.promise<@__device_2>
    %40797 = torch_c.from_builtin_tensor %40796 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40798 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38416 = arith.constant 1 : index
    %dim_38417 = tensor.dim %40798, %c1_38416 : tensor<4x?x4096xf16>
    %40799 = flow.tensor.transfer %40798 : tensor<4x?x4096xf16>{%dim_38417} to #hal.device.promise<@__device_2>
    %40800 = torch_c.from_builtin_tensor %40799 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40801 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38418 = arith.constant 1 : index
    %dim_38419 = tensor.dim %40801, %c1_38418 : tensor<4x?x4096xf16>
    %40802 = flow.tensor.transfer %40801 : tensor<4x?x4096xf16>{%dim_38419} to #hal.device.promise<@__device_2>
    %40803 = torch_c.from_builtin_tensor %40802 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40804 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38420 = arith.constant 1 : index
    %dim_38421 = tensor.dim %40804, %c1_38420 : tensor<4x?x4096xf16>
    %40805 = flow.tensor.transfer %40804 : tensor<4x?x4096xf16>{%dim_38421} to #hal.device.promise<@__device_2>
    %40806 = torch_c.from_builtin_tensor %40805 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40807 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38422 = arith.constant 1 : index
    %dim_38423 = tensor.dim %40807, %c1_38422 : tensor<4x?x4096xf16>
    %40808 = flow.tensor.transfer %40807 : tensor<4x?x4096xf16>{%dim_38423} to #hal.device.promise<@__device_2>
    %40809 = torch_c.from_builtin_tensor %40808 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38424 = torch.constant.int 1
    %40810 = torch.aten.add.Tensor %40791, %40794, %int1_38424 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38425 = torch.constant.int 1
    %40811 = torch.aten.add.Tensor %40810, %40702, %int1_38425 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38426 = torch.constant.int 1
    %40812 = torch.aten.add.Tensor %40811, %40797, %int1_38426 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38427 = torch.constant.int 1
    %40813 = torch.aten.add.Tensor %40812, %40800, %int1_38427 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38428 = torch.constant.int 1
    %40814 = torch.aten.add.Tensor %40813, %40803, %int1_38428 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38429 = torch.constant.int 1
    %40815 = torch.aten.add.Tensor %40814, %40806, %int1_38429 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38430 = torch.constant.int 1
    %40816 = torch.aten.add.Tensor %40815, %40809, %int1_38430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40817 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38431 = arith.constant 1 : index
    %dim_38432 = tensor.dim %40817, %c1_38431 : tensor<4x?x4096xf16>
    %40818 = flow.tensor.transfer %40817 : tensor<4x?x4096xf16>{%dim_38432} to #hal.device.promise<@__device_3>
    %40819 = torch_c.from_builtin_tensor %40818 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40820 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38433 = arith.constant 1 : index
    %dim_38434 = tensor.dim %40820, %c1_38433 : tensor<4x?x4096xf16>
    %40821 = flow.tensor.transfer %40820 : tensor<4x?x4096xf16>{%dim_38434} to #hal.device.promise<@__device_3>
    %40822 = torch_c.from_builtin_tensor %40821 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40823 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38435 = arith.constant 1 : index
    %dim_38436 = tensor.dim %40823, %c1_38435 : tensor<4x?x4096xf16>
    %40824 = flow.tensor.transfer %40823 : tensor<4x?x4096xf16>{%dim_38436} to #hal.device.promise<@__device_3>
    %40825 = torch_c.from_builtin_tensor %40824 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40826 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38437 = arith.constant 1 : index
    %dim_38438 = tensor.dim %40826, %c1_38437 : tensor<4x?x4096xf16>
    %40827 = flow.tensor.transfer %40826 : tensor<4x?x4096xf16>{%dim_38438} to #hal.device.promise<@__device_3>
    %40828 = torch_c.from_builtin_tensor %40827 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40829 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38439 = arith.constant 1 : index
    %dim_38440 = tensor.dim %40829, %c1_38439 : tensor<4x?x4096xf16>
    %40830 = flow.tensor.transfer %40829 : tensor<4x?x4096xf16>{%dim_38440} to #hal.device.promise<@__device_3>
    %40831 = torch_c.from_builtin_tensor %40830 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40832 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38441 = arith.constant 1 : index
    %dim_38442 = tensor.dim %40832, %c1_38441 : tensor<4x?x4096xf16>
    %40833 = flow.tensor.transfer %40832 : tensor<4x?x4096xf16>{%dim_38442} to #hal.device.promise<@__device_3>
    %40834 = torch_c.from_builtin_tensor %40833 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40835 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38443 = arith.constant 1 : index
    %dim_38444 = tensor.dim %40835, %c1_38443 : tensor<4x?x4096xf16>
    %40836 = flow.tensor.transfer %40835 : tensor<4x?x4096xf16>{%dim_38444} to #hal.device.promise<@__device_3>
    %40837 = torch_c.from_builtin_tensor %40836 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38445 = torch.constant.int 1
    %40838 = torch.aten.add.Tensor %40819, %40822, %int1_38445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38446 = torch.constant.int 1
    %40839 = torch.aten.add.Tensor %40838, %40825, %int1_38446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38447 = torch.constant.int 1
    %40840 = torch.aten.add.Tensor %40839, %40708, %int1_38447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38448 = torch.constant.int 1
    %40841 = torch.aten.add.Tensor %40840, %40828, %int1_38448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38449 = torch.constant.int 1
    %40842 = torch.aten.add.Tensor %40841, %40831, %int1_38449 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38450 = torch.constant.int 1
    %40843 = torch.aten.add.Tensor %40842, %40834, %int1_38450 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38451 = torch.constant.int 1
    %40844 = torch.aten.add.Tensor %40843, %40837, %int1_38451 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40845 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38452 = arith.constant 1 : index
    %dim_38453 = tensor.dim %40845, %c1_38452 : tensor<4x?x4096xf16>
    %40846 = flow.tensor.transfer %40845 : tensor<4x?x4096xf16>{%dim_38453} to #hal.device.promise<@__device_4>
    %40847 = torch_c.from_builtin_tensor %40846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40848 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38454 = arith.constant 1 : index
    %dim_38455 = tensor.dim %40848, %c1_38454 : tensor<4x?x4096xf16>
    %40849 = flow.tensor.transfer %40848 : tensor<4x?x4096xf16>{%dim_38455} to #hal.device.promise<@__device_4>
    %40850 = torch_c.from_builtin_tensor %40849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40851 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38456 = arith.constant 1 : index
    %dim_38457 = tensor.dim %40851, %c1_38456 : tensor<4x?x4096xf16>
    %40852 = flow.tensor.transfer %40851 : tensor<4x?x4096xf16>{%dim_38457} to #hal.device.promise<@__device_4>
    %40853 = torch_c.from_builtin_tensor %40852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40854 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38458 = arith.constant 1 : index
    %dim_38459 = tensor.dim %40854, %c1_38458 : tensor<4x?x4096xf16>
    %40855 = flow.tensor.transfer %40854 : tensor<4x?x4096xf16>{%dim_38459} to #hal.device.promise<@__device_4>
    %40856 = torch_c.from_builtin_tensor %40855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40857 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38460 = arith.constant 1 : index
    %dim_38461 = tensor.dim %40857, %c1_38460 : tensor<4x?x4096xf16>
    %40858 = flow.tensor.transfer %40857 : tensor<4x?x4096xf16>{%dim_38461} to #hal.device.promise<@__device_4>
    %40859 = torch_c.from_builtin_tensor %40858 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40860 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38462 = arith.constant 1 : index
    %dim_38463 = tensor.dim %40860, %c1_38462 : tensor<4x?x4096xf16>
    %40861 = flow.tensor.transfer %40860 : tensor<4x?x4096xf16>{%dim_38463} to #hal.device.promise<@__device_4>
    %40862 = torch_c.from_builtin_tensor %40861 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40863 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38464 = arith.constant 1 : index
    %dim_38465 = tensor.dim %40863, %c1_38464 : tensor<4x?x4096xf16>
    %40864 = flow.tensor.transfer %40863 : tensor<4x?x4096xf16>{%dim_38465} to #hal.device.promise<@__device_4>
    %40865 = torch_c.from_builtin_tensor %40864 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38466 = torch.constant.int 1
    %40866 = torch.aten.add.Tensor %40847, %40850, %int1_38466 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38467 = torch.constant.int 1
    %40867 = torch.aten.add.Tensor %40866, %40853, %int1_38467 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38468 = torch.constant.int 1
    %40868 = torch.aten.add.Tensor %40867, %40856, %int1_38468 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38469 = torch.constant.int 1
    %40869 = torch.aten.add.Tensor %40868, %40714, %int1_38469 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38470 = torch.constant.int 1
    %40870 = torch.aten.add.Tensor %40869, %40859, %int1_38470 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38471 = torch.constant.int 1
    %40871 = torch.aten.add.Tensor %40870, %40862, %int1_38471 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38472 = torch.constant.int 1
    %40872 = torch.aten.add.Tensor %40871, %40865, %int1_38472 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40873 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38473 = arith.constant 1 : index
    %dim_38474 = tensor.dim %40873, %c1_38473 : tensor<4x?x4096xf16>
    %40874 = flow.tensor.transfer %40873 : tensor<4x?x4096xf16>{%dim_38474} to #hal.device.promise<@__device_5>
    %40875 = torch_c.from_builtin_tensor %40874 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40876 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38475 = arith.constant 1 : index
    %dim_38476 = tensor.dim %40876, %c1_38475 : tensor<4x?x4096xf16>
    %40877 = flow.tensor.transfer %40876 : tensor<4x?x4096xf16>{%dim_38476} to #hal.device.promise<@__device_5>
    %40878 = torch_c.from_builtin_tensor %40877 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40879 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38477 = arith.constant 1 : index
    %dim_38478 = tensor.dim %40879, %c1_38477 : tensor<4x?x4096xf16>
    %40880 = flow.tensor.transfer %40879 : tensor<4x?x4096xf16>{%dim_38478} to #hal.device.promise<@__device_5>
    %40881 = torch_c.from_builtin_tensor %40880 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40882 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38479 = arith.constant 1 : index
    %dim_38480 = tensor.dim %40882, %c1_38479 : tensor<4x?x4096xf16>
    %40883 = flow.tensor.transfer %40882 : tensor<4x?x4096xf16>{%dim_38480} to #hal.device.promise<@__device_5>
    %40884 = torch_c.from_builtin_tensor %40883 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40885 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38481 = arith.constant 1 : index
    %dim_38482 = tensor.dim %40885, %c1_38481 : tensor<4x?x4096xf16>
    %40886 = flow.tensor.transfer %40885 : tensor<4x?x4096xf16>{%dim_38482} to #hal.device.promise<@__device_5>
    %40887 = torch_c.from_builtin_tensor %40886 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40888 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38483 = arith.constant 1 : index
    %dim_38484 = tensor.dim %40888, %c1_38483 : tensor<4x?x4096xf16>
    %40889 = flow.tensor.transfer %40888 : tensor<4x?x4096xf16>{%dim_38484} to #hal.device.promise<@__device_5>
    %40890 = torch_c.from_builtin_tensor %40889 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40891 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38485 = arith.constant 1 : index
    %dim_38486 = tensor.dim %40891, %c1_38485 : tensor<4x?x4096xf16>
    %40892 = flow.tensor.transfer %40891 : tensor<4x?x4096xf16>{%dim_38486} to #hal.device.promise<@__device_5>
    %40893 = torch_c.from_builtin_tensor %40892 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38487 = torch.constant.int 1
    %40894 = torch.aten.add.Tensor %40875, %40878, %int1_38487 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38488 = torch.constant.int 1
    %40895 = torch.aten.add.Tensor %40894, %40881, %int1_38488 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38489 = torch.constant.int 1
    %40896 = torch.aten.add.Tensor %40895, %40884, %int1_38489 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38490 = torch.constant.int 1
    %40897 = torch.aten.add.Tensor %40896, %40887, %int1_38490 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38491 = torch.constant.int 1
    %40898 = torch.aten.add.Tensor %40897, %40720, %int1_38491 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38492 = torch.constant.int 1
    %40899 = torch.aten.add.Tensor %40898, %40890, %int1_38492 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38493 = torch.constant.int 1
    %40900 = torch.aten.add.Tensor %40899, %40893, %int1_38493 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40901 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38494 = arith.constant 1 : index
    %dim_38495 = tensor.dim %40901, %c1_38494 : tensor<4x?x4096xf16>
    %40902 = flow.tensor.transfer %40901 : tensor<4x?x4096xf16>{%dim_38495} to #hal.device.promise<@__device_6>
    %40903 = torch_c.from_builtin_tensor %40902 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40904 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38496 = arith.constant 1 : index
    %dim_38497 = tensor.dim %40904, %c1_38496 : tensor<4x?x4096xf16>
    %40905 = flow.tensor.transfer %40904 : tensor<4x?x4096xf16>{%dim_38497} to #hal.device.promise<@__device_6>
    %40906 = torch_c.from_builtin_tensor %40905 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40907 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38498 = arith.constant 1 : index
    %dim_38499 = tensor.dim %40907, %c1_38498 : tensor<4x?x4096xf16>
    %40908 = flow.tensor.transfer %40907 : tensor<4x?x4096xf16>{%dim_38499} to #hal.device.promise<@__device_6>
    %40909 = torch_c.from_builtin_tensor %40908 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40910 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38500 = arith.constant 1 : index
    %dim_38501 = tensor.dim %40910, %c1_38500 : tensor<4x?x4096xf16>
    %40911 = flow.tensor.transfer %40910 : tensor<4x?x4096xf16>{%dim_38501} to #hal.device.promise<@__device_6>
    %40912 = torch_c.from_builtin_tensor %40911 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40913 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38502 = arith.constant 1 : index
    %dim_38503 = tensor.dim %40913, %c1_38502 : tensor<4x?x4096xf16>
    %40914 = flow.tensor.transfer %40913 : tensor<4x?x4096xf16>{%dim_38503} to #hal.device.promise<@__device_6>
    %40915 = torch_c.from_builtin_tensor %40914 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40916 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38504 = arith.constant 1 : index
    %dim_38505 = tensor.dim %40916, %c1_38504 : tensor<4x?x4096xf16>
    %40917 = flow.tensor.transfer %40916 : tensor<4x?x4096xf16>{%dim_38505} to #hal.device.promise<@__device_6>
    %40918 = torch_c.from_builtin_tensor %40917 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40919 = torch_c.to_builtin_tensor %40732 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38506 = arith.constant 1 : index
    %dim_38507 = tensor.dim %40919, %c1_38506 : tensor<4x?x4096xf16>
    %40920 = flow.tensor.transfer %40919 : tensor<4x?x4096xf16>{%dim_38507} to #hal.device.promise<@__device_6>
    %40921 = torch_c.from_builtin_tensor %40920 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38508 = torch.constant.int 1
    %40922 = torch.aten.add.Tensor %40903, %40906, %int1_38508 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38509 = torch.constant.int 1
    %40923 = torch.aten.add.Tensor %40922, %40909, %int1_38509 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38510 = torch.constant.int 1
    %40924 = torch.aten.add.Tensor %40923, %40912, %int1_38510 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38511 = torch.constant.int 1
    %40925 = torch.aten.add.Tensor %40924, %40915, %int1_38511 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38512 = torch.constant.int 1
    %40926 = torch.aten.add.Tensor %40925, %40918, %int1_38512 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38513 = torch.constant.int 1
    %40927 = torch.aten.add.Tensor %40926, %40726, %int1_38513 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38514 = torch.constant.int 1
    %40928 = torch.aten.add.Tensor %40927, %40921, %int1_38514 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40929 = torch_c.to_builtin_tensor %40690 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38515 = arith.constant 1 : index
    %dim_38516 = tensor.dim %40929, %c1_38515 : tensor<4x?x4096xf16>
    %40930 = flow.tensor.transfer %40929 : tensor<4x?x4096xf16>{%dim_38516} to #hal.device.promise<@__device_7>
    %40931 = torch_c.from_builtin_tensor %40930 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40932 = torch_c.to_builtin_tensor %40696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38517 = arith.constant 1 : index
    %dim_38518 = tensor.dim %40932, %c1_38517 : tensor<4x?x4096xf16>
    %40933 = flow.tensor.transfer %40932 : tensor<4x?x4096xf16>{%dim_38518} to #hal.device.promise<@__device_7>
    %40934 = torch_c.from_builtin_tensor %40933 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40935 = torch_c.to_builtin_tensor %40702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38519 = arith.constant 1 : index
    %dim_38520 = tensor.dim %40935, %c1_38519 : tensor<4x?x4096xf16>
    %40936 = flow.tensor.transfer %40935 : tensor<4x?x4096xf16>{%dim_38520} to #hal.device.promise<@__device_7>
    %40937 = torch_c.from_builtin_tensor %40936 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40938 = torch_c.to_builtin_tensor %40708 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38521 = arith.constant 1 : index
    %dim_38522 = tensor.dim %40938, %c1_38521 : tensor<4x?x4096xf16>
    %40939 = flow.tensor.transfer %40938 : tensor<4x?x4096xf16>{%dim_38522} to #hal.device.promise<@__device_7>
    %40940 = torch_c.from_builtin_tensor %40939 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40941 = torch_c.to_builtin_tensor %40714 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38523 = arith.constant 1 : index
    %dim_38524 = tensor.dim %40941, %c1_38523 : tensor<4x?x4096xf16>
    %40942 = flow.tensor.transfer %40941 : tensor<4x?x4096xf16>{%dim_38524} to #hal.device.promise<@__device_7>
    %40943 = torch_c.from_builtin_tensor %40942 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40944 = torch_c.to_builtin_tensor %40720 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38525 = arith.constant 1 : index
    %dim_38526 = tensor.dim %40944, %c1_38525 : tensor<4x?x4096xf16>
    %40945 = flow.tensor.transfer %40944 : tensor<4x?x4096xf16>{%dim_38526} to #hal.device.promise<@__device_7>
    %40946 = torch_c.from_builtin_tensor %40945 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %40947 = torch_c.to_builtin_tensor %40726 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38527 = arith.constant 1 : index
    %dim_38528 = tensor.dim %40947, %c1_38527 : tensor<4x?x4096xf16>
    %40948 = flow.tensor.transfer %40947 : tensor<4x?x4096xf16>{%dim_38528} to #hal.device.promise<@__device_7>
    %40949 = torch_c.from_builtin_tensor %40948 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38529 = torch.constant.int 1
    %40950 = torch.aten.add.Tensor %40931, %40934, %int1_38529 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38530 = torch.constant.int 1
    %40951 = torch.aten.add.Tensor %40950, %40937, %int1_38530 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38531 = torch.constant.int 1
    %40952 = torch.aten.add.Tensor %40951, %40940, %int1_38531 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38532 = torch.constant.int 1
    %40953 = torch.aten.add.Tensor %40952, %40943, %int1_38532 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38533 = torch.constant.int 1
    %40954 = torch.aten.add.Tensor %40953, %40946, %int1_38533 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38534 = torch.constant.int 1
    %40955 = torch.aten.add.Tensor %40954, %40949, %int1_38534 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38535 = torch.constant.int 1
    %40956 = torch.aten.add.Tensor %40955, %40732, %int1_38535 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38536 = torch.constant.int 1
    %40957 = torch.aten.add.Tensor %39616, %40760, %int1_38536 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38537 = torch.constant.int 1
    %40958 = torch.aten.add.Tensor %39617, %40788, %int1_38537 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38538 = torch.constant.int 1
    %40959 = torch.aten.add.Tensor %39618, %40816, %int1_38538 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38539 = torch.constant.int 1
    %40960 = torch.aten.add.Tensor %39619, %40844, %int1_38539 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38540 = torch.constant.int 1
    %40961 = torch.aten.add.Tensor %39620, %40872, %int1_38540 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38541 = torch.constant.int 1
    %40962 = torch.aten.add.Tensor %39621, %40900, %int1_38541 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38542 = torch.constant.int 1
    %40963 = torch.aten.add.Tensor %39622, %40928, %int1_38542 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38543 = torch.constant.int 1
    %40964 = torch.aten.add.Tensor %39623, %40956, %int1_38543 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %40964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_38544 = torch.constant.int 6
    %40965 = torch.prims.convert_element_type %40957, %int6_38544 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38545 = torch.constant.int 6
    %40966 = torch.prims.convert_element_type %40958, %int6_38545 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38546 = torch.constant.int 6
    %40967 = torch.prims.convert_element_type %40959, %int6_38546 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38547 = torch.constant.int 6
    %40968 = torch.prims.convert_element_type %40960, %int6_38547 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38548 = torch.constant.int 6
    %40969 = torch.prims.convert_element_type %40961, %int6_38548 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38549 = torch.constant.int 6
    %40970 = torch.prims.convert_element_type %40962, %int6_38549 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38550 = torch.constant.int 6
    %40971 = torch.prims.convert_element_type %40963, %int6_38550 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38551 = torch.constant.int 6
    %40972 = torch.prims.convert_element_type %40964, %int6_38551 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38552 = torch.constant.int 2
    %40973 = torch.aten.pow.Tensor_Scalar %40965, %int2_38552 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38553 = torch.constant.int 2
    %40974 = torch.aten.pow.Tensor_Scalar %40966, %int2_38553 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38554 = torch.constant.int 2
    %40975 = torch.aten.pow.Tensor_Scalar %40967, %int2_38554 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38555 = torch.constant.int 2
    %40976 = torch.aten.pow.Tensor_Scalar %40968, %int2_38555 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38556 = torch.constant.int 2
    %40977 = torch.aten.pow.Tensor_Scalar %40969, %int2_38556 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38557 = torch.constant.int 2
    %40978 = torch.aten.pow.Tensor_Scalar %40970, %int2_38557 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38558 = torch.constant.int 2
    %40979 = torch.aten.pow.Tensor_Scalar %40971, %int2_38558 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38559 = torch.constant.int 2
    %40980 = torch.aten.pow.Tensor_Scalar %40972, %int2_38559 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %40980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_38560 = torch.constant.int -1
    %40981 = torch.prim.ListConstruct %int-1_38560 : (!torch.int) -> !torch.list<int>
    %true_38561 = torch.constant.bool true
    %none_38562 = torch.constant.none
    %40982 = torch.aten.mean.dim %40973, %40981, %true_38561, %none_38562 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38563 = torch.constant.int -1
    %40983 = torch.prim.ListConstruct %int-1_38563 : (!torch.int) -> !torch.list<int>
    %true_38564 = torch.constant.bool true
    %none_38565 = torch.constant.none
    %40984 = torch.aten.mean.dim %40974, %40983, %true_38564, %none_38565 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38566 = torch.constant.int -1
    %40985 = torch.prim.ListConstruct %int-1_38566 : (!torch.int) -> !torch.list<int>
    %true_38567 = torch.constant.bool true
    %none_38568 = torch.constant.none
    %40986 = torch.aten.mean.dim %40975, %40985, %true_38567, %none_38568 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38569 = torch.constant.int -1
    %40987 = torch.prim.ListConstruct %int-1_38569 : (!torch.int) -> !torch.list<int>
    %true_38570 = torch.constant.bool true
    %none_38571 = torch.constant.none
    %40988 = torch.aten.mean.dim %40976, %40987, %true_38570, %none_38571 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38572 = torch.constant.int -1
    %40989 = torch.prim.ListConstruct %int-1_38572 : (!torch.int) -> !torch.list<int>
    %true_38573 = torch.constant.bool true
    %none_38574 = torch.constant.none
    %40990 = torch.aten.mean.dim %40977, %40989, %true_38573, %none_38574 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38575 = torch.constant.int -1
    %40991 = torch.prim.ListConstruct %int-1_38575 : (!torch.int) -> !torch.list<int>
    %true_38576 = torch.constant.bool true
    %none_38577 = torch.constant.none
    %40992 = torch.aten.mean.dim %40978, %40991, %true_38576, %none_38577 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38578 = torch.constant.int -1
    %40993 = torch.prim.ListConstruct %int-1_38578 : (!torch.int) -> !torch.list<int>
    %true_38579 = torch.constant.bool true
    %none_38580 = torch.constant.none
    %40994 = torch.aten.mean.dim %40979, %40993, %true_38579, %none_38580 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38581 = torch.constant.int -1
    %40995 = torch.prim.ListConstruct %int-1_38581 : (!torch.int) -> !torch.list<int>
    %true_38582 = torch.constant.bool true
    %none_38583 = torch.constant.none
    %40996 = torch.aten.mean.dim %40980, %40995, %true_38582, %none_38583 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38584 = torch.constant.float 9.9999997473787516E-6
    %int1_38585 = torch.constant.int 1
    %40997 = torch.aten.add.Scalar %40982, %float9.999990e-06_38584, %int1_38585 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38586 = torch.constant.float 9.9999997473787516E-6
    %int1_38587 = torch.constant.int 1
    %40998 = torch.aten.add.Scalar %40984, %float9.999990e-06_38586, %int1_38587 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38588 = torch.constant.float 9.9999997473787516E-6
    %int1_38589 = torch.constant.int 1
    %40999 = torch.aten.add.Scalar %40986, %float9.999990e-06_38588, %int1_38589 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %40999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38590 = torch.constant.float 9.9999997473787516E-6
    %int1_38591 = torch.constant.int 1
    %41000 = torch.aten.add.Scalar %40988, %float9.999990e-06_38590, %int1_38591 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38592 = torch.constant.float 9.9999997473787516E-6
    %int1_38593 = torch.constant.int 1
    %41001 = torch.aten.add.Scalar %40990, %float9.999990e-06_38592, %int1_38593 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38594 = torch.constant.float 9.9999997473787516E-6
    %int1_38595 = torch.constant.int 1
    %41002 = torch.aten.add.Scalar %40992, %float9.999990e-06_38594, %int1_38595 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38596 = torch.constant.float 9.9999997473787516E-6
    %int1_38597 = torch.constant.int 1
    %41003 = torch.aten.add.Scalar %40994, %float9.999990e-06_38596, %int1_38597 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38598 = torch.constant.float 9.9999997473787516E-6
    %int1_38599 = torch.constant.int 1
    %41004 = torch.aten.add.Scalar %40996, %float9.999990e-06_38598, %int1_38599 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41005 = torch.aten.rsqrt %40997 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41006 = torch.aten.rsqrt %40998 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41007 = torch.aten.rsqrt %40999 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41008 = torch.aten.rsqrt %41000 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41009 = torch.aten.rsqrt %41001 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41010 = torch.aten.rsqrt %41002 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41011 = torch.aten.rsqrt %41003 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41012 = torch.aten.rsqrt %41004 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41013 = torch.aten.mul.Tensor %40965, %41005 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41014 = torch.aten.mul.Tensor %40966, %41006 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41015 = torch.aten.mul.Tensor %40967, %41007 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41016 = torch.aten.mul.Tensor %40968, %41008 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41017 = torch.aten.mul.Tensor %40969, %41009 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41018 = torch.aten.mul.Tensor %40970, %41010 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41019 = torch.aten.mul.Tensor %40971, %41011 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41020 = torch.aten.mul.Tensor %40972, %41012 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41021 = torch.aten.mul.Tensor %1488, %41013 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41022 = torch.aten.mul.Tensor %1489, %41014 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41023 = torch.aten.mul.Tensor %1490, %41015 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41024 = torch.aten.mul.Tensor %1491, %41016 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41025 = torch.aten.mul.Tensor %1492, %41017 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41026 = torch.aten.mul.Tensor %1493, %41018 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41027 = torch.aten.mul.Tensor %1494, %41019 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41028 = torch.aten.mul.Tensor %1495, %41020 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_38600 = torch.constant.int 5
    %41029 = torch.prims.convert_element_type %41021, %int5_38600 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38601 = torch.constant.int 5
    %41030 = torch.prims.convert_element_type %41022, %int5_38601 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38602 = torch.constant.int 5
    %41031 = torch.prims.convert_element_type %41023, %int5_38602 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38603 = torch.constant.int 5
    %41032 = torch.prims.convert_element_type %41024, %int5_38603 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38604 = torch.constant.int 5
    %41033 = torch.prims.convert_element_type %41025, %int5_38604 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38605 = torch.constant.int 5
    %41034 = torch.prims.convert_element_type %41026, %int5_38605 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38606 = torch.constant.int 5
    %41035 = torch.prims.convert_element_type %41027, %int5_38606 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38607 = torch.constant.int 5
    %41036 = torch.prims.convert_element_type %41028, %int5_38607 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38608 = torch.constant.int 1
    %int0_38609 = torch.constant.int 0
    %41037 = torch.prim.ListConstruct %int1_38608, %int0_38609 : (!torch.int, !torch.int) -> !torch.list<int>
    %41038 = torch.aten.permute %1496, %41037 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38610 = torch.constant.int 1
    %int0_38611 = torch.constant.int 0
    %41039 = torch.prim.ListConstruct %int1_38610, %int0_38611 : (!torch.int, !torch.int) -> !torch.list<int>
    %41040 = torch.aten.permute %1497, %41039 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38612 = torch.constant.int 1
    %int0_38613 = torch.constant.int 0
    %41041 = torch.prim.ListConstruct %int1_38612, %int0_38613 : (!torch.int, !torch.int) -> !torch.list<int>
    %41042 = torch.aten.permute %1498, %41041 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38614 = torch.constant.int 1
    %int0_38615 = torch.constant.int 0
    %41043 = torch.prim.ListConstruct %int1_38614, %int0_38615 : (!torch.int, !torch.int) -> !torch.list<int>
    %41044 = torch.aten.permute %1499, %41043 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38616 = torch.constant.int 1
    %int0_38617 = torch.constant.int 0
    %41045 = torch.prim.ListConstruct %int1_38616, %int0_38617 : (!torch.int, !torch.int) -> !torch.list<int>
    %41046 = torch.aten.permute %1500, %41045 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38618 = torch.constant.int 1
    %int0_38619 = torch.constant.int 0
    %41047 = torch.prim.ListConstruct %int1_38618, %int0_38619 : (!torch.int, !torch.int) -> !torch.list<int>
    %41048 = torch.aten.permute %1501, %41047 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38620 = torch.constant.int 1
    %int0_38621 = torch.constant.int 0
    %41049 = torch.prim.ListConstruct %int1_38620, %int0_38621 : (!torch.int, !torch.int) -> !torch.list<int>
    %41050 = torch.aten.permute %1502, %41049 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38622 = torch.constant.int 1
    %int0_38623 = torch.constant.int 0
    %41051 = torch.prim.ListConstruct %int1_38622, %int0_38623 : (!torch.int, !torch.int) -> !torch.list<int>
    %41052 = torch.aten.permute %1503, %41051 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_38624 = torch.constant.int 4
    %41053 = torch.aten.mul.int %int4_38624, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38625 = torch.constant.int 4096
    %41054 = torch.prim.ListConstruct %41053, %int4096_38625 : (!torch.int, !torch.int) -> !torch.list<int>
    %41055 = torch.aten.view %41029, %41054 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41055, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41056 = torch.aten.mm %41055, %41038 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41056, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38626 = torch.constant.int 4
    %int1792_38627 = torch.constant.int 1792
    %41057 = torch.prim.ListConstruct %int4_38626, %2482, %int1792_38627 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41058 = torch.aten.view %41056, %41057 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38628 = torch.constant.int 4
    %41059 = torch.aten.mul.int %int4_38628, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38629 = torch.constant.int 4096
    %41060 = torch.prim.ListConstruct %41059, %int4096_38629 : (!torch.int, !torch.int) -> !torch.list<int>
    %41061 = torch.aten.view %41030, %41060 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41061, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41062 = torch.aten.mm %41061, %41040 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41062, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38630 = torch.constant.int 4
    %int1792_38631 = torch.constant.int 1792
    %41063 = torch.prim.ListConstruct %int4_38630, %2482, %int1792_38631 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41064 = torch.aten.view %41062, %41063 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38632 = torch.constant.int 4
    %41065 = torch.aten.mul.int %int4_38632, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38633 = torch.constant.int 4096
    %41066 = torch.prim.ListConstruct %41065, %int4096_38633 : (!torch.int, !torch.int) -> !torch.list<int>
    %41067 = torch.aten.view %41031, %41066 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41067, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41068 = torch.aten.mm %41067, %41042 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41068, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38634 = torch.constant.int 4
    %int1792_38635 = torch.constant.int 1792
    %41069 = torch.prim.ListConstruct %int4_38634, %2482, %int1792_38635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41070 = torch.aten.view %41068, %41069 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38636 = torch.constant.int 4
    %41071 = torch.aten.mul.int %int4_38636, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38637 = torch.constant.int 4096
    %41072 = torch.prim.ListConstruct %41071, %int4096_38637 : (!torch.int, !torch.int) -> !torch.list<int>
    %41073 = torch.aten.view %41032, %41072 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41073, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41074 = torch.aten.mm %41073, %41044 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41074, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38638 = torch.constant.int 4
    %int1792_38639 = torch.constant.int 1792
    %41075 = torch.prim.ListConstruct %int4_38638, %2482, %int1792_38639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41076 = torch.aten.view %41074, %41075 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38640 = torch.constant.int 4
    %41077 = torch.aten.mul.int %int4_38640, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38641 = torch.constant.int 4096
    %41078 = torch.prim.ListConstruct %41077, %int4096_38641 : (!torch.int, !torch.int) -> !torch.list<int>
    %41079 = torch.aten.view %41033, %41078 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41079, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41080 = torch.aten.mm %41079, %41046 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41080, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38642 = torch.constant.int 4
    %int1792_38643 = torch.constant.int 1792
    %41081 = torch.prim.ListConstruct %int4_38642, %2482, %int1792_38643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41082 = torch.aten.view %41080, %41081 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38644 = torch.constant.int 4
    %41083 = torch.aten.mul.int %int4_38644, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38645 = torch.constant.int 4096
    %41084 = torch.prim.ListConstruct %41083, %int4096_38645 : (!torch.int, !torch.int) -> !torch.list<int>
    %41085 = torch.aten.view %41034, %41084 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41085, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41086 = torch.aten.mm %41085, %41048 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41086, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38646 = torch.constant.int 4
    %int1792_38647 = torch.constant.int 1792
    %41087 = torch.prim.ListConstruct %int4_38646, %2482, %int1792_38647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41088 = torch.aten.view %41086, %41087 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38648 = torch.constant.int 4
    %41089 = torch.aten.mul.int %int4_38648, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38649 = torch.constant.int 4096
    %41090 = torch.prim.ListConstruct %41089, %int4096_38649 : (!torch.int, !torch.int) -> !torch.list<int>
    %41091 = torch.aten.view %41035, %41090 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41091, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41092 = torch.aten.mm %41091, %41050 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41092, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38650 = torch.constant.int 4
    %int1792_38651 = torch.constant.int 1792
    %41093 = torch.prim.ListConstruct %int4_38650, %2482, %int1792_38651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41094 = torch.aten.view %41092, %41093 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38652 = torch.constant.int 4
    %41095 = torch.aten.mul.int %int4_38652, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38653 = torch.constant.int 4096
    %41096 = torch.prim.ListConstruct %41095, %int4096_38653 : (!torch.int, !torch.int) -> !torch.list<int>
    %41097 = torch.aten.view %41036, %41096 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41097, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41098 = torch.aten.mm %41097, %41052 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41098, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38654 = torch.constant.int 4
    %int1792_38655 = torch.constant.int 1792
    %41099 = torch.prim.ListConstruct %int4_38654, %2482, %int1792_38655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41100 = torch.aten.view %41098, %41099 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41101 = torch.aten.silu %41058 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41102 = torch.aten.silu %41064 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41103 = torch.aten.silu %41070 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41104 = torch.aten.silu %41076 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41105 = torch.aten.silu %41082 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41106 = torch.aten.silu %41088 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41107 = torch.aten.silu %41094 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41108 = torch.aten.silu %41100 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_38656 = torch.constant.int 1
    %int0_38657 = torch.constant.int 0
    %41109 = torch.prim.ListConstruct %int1_38656, %int0_38657 : (!torch.int, !torch.int) -> !torch.list<int>
    %41110 = torch.aten.permute %1504, %41109 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38658 = torch.constant.int 1
    %int0_38659 = torch.constant.int 0
    %41111 = torch.prim.ListConstruct %int1_38658, %int0_38659 : (!torch.int, !torch.int) -> !torch.list<int>
    %41112 = torch.aten.permute %1505, %41111 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38660 = torch.constant.int 1
    %int0_38661 = torch.constant.int 0
    %41113 = torch.prim.ListConstruct %int1_38660, %int0_38661 : (!torch.int, !torch.int) -> !torch.list<int>
    %41114 = torch.aten.permute %1506, %41113 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38662 = torch.constant.int 1
    %int0_38663 = torch.constant.int 0
    %41115 = torch.prim.ListConstruct %int1_38662, %int0_38663 : (!torch.int, !torch.int) -> !torch.list<int>
    %41116 = torch.aten.permute %1507, %41115 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38664 = torch.constant.int 1
    %int0_38665 = torch.constant.int 0
    %41117 = torch.prim.ListConstruct %int1_38664, %int0_38665 : (!torch.int, !torch.int) -> !torch.list<int>
    %41118 = torch.aten.permute %1508, %41117 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38666 = torch.constant.int 1
    %int0_38667 = torch.constant.int 0
    %41119 = torch.prim.ListConstruct %int1_38666, %int0_38667 : (!torch.int, !torch.int) -> !torch.list<int>
    %41120 = torch.aten.permute %1509, %41119 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38668 = torch.constant.int 1
    %int0_38669 = torch.constant.int 0
    %41121 = torch.prim.ListConstruct %int1_38668, %int0_38669 : (!torch.int, !torch.int) -> !torch.list<int>
    %41122 = torch.aten.permute %1510, %41121 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_38670 = torch.constant.int 1
    %int0_38671 = torch.constant.int 0
    %41123 = torch.prim.ListConstruct %int1_38670, %int0_38671 : (!torch.int, !torch.int) -> !torch.list<int>
    %41124 = torch.aten.permute %1511, %41123 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_38672 = torch.constant.int 4
    %41125 = torch.aten.mul.int %int4_38672, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38673 = torch.constant.int 4096
    %41126 = torch.prim.ListConstruct %41125, %int4096_38673 : (!torch.int, !torch.int) -> !torch.list<int>
    %41127 = torch.aten.view %41029, %41126 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41127, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41128 = torch.aten.mm %41127, %41110 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41128, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38674 = torch.constant.int 4
    %int1792_38675 = torch.constant.int 1792
    %41129 = torch.prim.ListConstruct %int4_38674, %2482, %int1792_38675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41130 = torch.aten.view %41128, %41129 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38676 = torch.constant.int 4
    %41131 = torch.aten.mul.int %int4_38676, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38677 = torch.constant.int 4096
    %41132 = torch.prim.ListConstruct %41131, %int4096_38677 : (!torch.int, !torch.int) -> !torch.list<int>
    %41133 = torch.aten.view %41030, %41132 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41133, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41134 = torch.aten.mm %41133, %41112 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41134, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38678 = torch.constant.int 4
    %int1792_38679 = torch.constant.int 1792
    %41135 = torch.prim.ListConstruct %int4_38678, %2482, %int1792_38679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41136 = torch.aten.view %41134, %41135 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38680 = torch.constant.int 4
    %41137 = torch.aten.mul.int %int4_38680, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38681 = torch.constant.int 4096
    %41138 = torch.prim.ListConstruct %41137, %int4096_38681 : (!torch.int, !torch.int) -> !torch.list<int>
    %41139 = torch.aten.view %41031, %41138 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41139, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41140 = torch.aten.mm %41139, %41114 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41140, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38682 = torch.constant.int 4
    %int1792_38683 = torch.constant.int 1792
    %41141 = torch.prim.ListConstruct %int4_38682, %2482, %int1792_38683 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41142 = torch.aten.view %41140, %41141 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38684 = torch.constant.int 4
    %41143 = torch.aten.mul.int %int4_38684, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38685 = torch.constant.int 4096
    %41144 = torch.prim.ListConstruct %41143, %int4096_38685 : (!torch.int, !torch.int) -> !torch.list<int>
    %41145 = torch.aten.view %41032, %41144 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41145, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41146 = torch.aten.mm %41145, %41116 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41146, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38686 = torch.constant.int 4
    %int1792_38687 = torch.constant.int 1792
    %41147 = torch.prim.ListConstruct %int4_38686, %2482, %int1792_38687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41148 = torch.aten.view %41146, %41147 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38688 = torch.constant.int 4
    %41149 = torch.aten.mul.int %int4_38688, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38689 = torch.constant.int 4096
    %41150 = torch.prim.ListConstruct %41149, %int4096_38689 : (!torch.int, !torch.int) -> !torch.list<int>
    %41151 = torch.aten.view %41033, %41150 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41151, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41152 = torch.aten.mm %41151, %41118 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41152, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38690 = torch.constant.int 4
    %int1792_38691 = torch.constant.int 1792
    %41153 = torch.prim.ListConstruct %int4_38690, %2482, %int1792_38691 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41154 = torch.aten.view %41152, %41153 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38692 = torch.constant.int 4
    %41155 = torch.aten.mul.int %int4_38692, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38693 = torch.constant.int 4096
    %41156 = torch.prim.ListConstruct %41155, %int4096_38693 : (!torch.int, !torch.int) -> !torch.list<int>
    %41157 = torch.aten.view %41034, %41156 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41157, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41158 = torch.aten.mm %41157, %41120 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41158, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38694 = torch.constant.int 4
    %int1792_38695 = torch.constant.int 1792
    %41159 = torch.prim.ListConstruct %int4_38694, %2482, %int1792_38695 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41160 = torch.aten.view %41158, %41159 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38696 = torch.constant.int 4
    %41161 = torch.aten.mul.int %int4_38696, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38697 = torch.constant.int 4096
    %41162 = torch.prim.ListConstruct %41161, %int4096_38697 : (!torch.int, !torch.int) -> !torch.list<int>
    %41163 = torch.aten.view %41035, %41162 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41163, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41164 = torch.aten.mm %41163, %41122 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41164, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38698 = torch.constant.int 4
    %int1792_38699 = torch.constant.int 1792
    %41165 = torch.prim.ListConstruct %int4_38698, %2482, %int1792_38699 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41166 = torch.aten.view %41164, %41165 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_38700 = torch.constant.int 4
    %41167 = torch.aten.mul.int %int4_38700, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_38701 = torch.constant.int 4096
    %41168 = torch.prim.ListConstruct %41167, %int4096_38701 : (!torch.int, !torch.int) -> !torch.list<int>
    %41169 = torch.aten.view %41036, %41168 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41169, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41170 = torch.aten.mm %41169, %41124 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41170, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_38702 = torch.constant.int 4
    %int1792_38703 = torch.constant.int 1792
    %41171 = torch.prim.ListConstruct %int4_38702, %2482, %int1792_38703 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41172 = torch.aten.view %41170, %41171 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41173 = torch.aten.mul.Tensor %41101, %41130 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41174 = torch.aten.mul.Tensor %41102, %41136 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41175 = torch.aten.mul.Tensor %41103, %41142 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41176 = torch.aten.mul.Tensor %41104, %41148 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41177 = torch.aten.mul.Tensor %41105, %41154 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41178 = torch.aten.mul.Tensor %41106, %41160 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41179 = torch.aten.mul.Tensor %41107, %41166 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %41180 = torch.aten.mul.Tensor %41108, %41172 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %41180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_38704 = torch.constant.int 1
    %int0_38705 = torch.constant.int 0
    %41181 = torch.prim.ListConstruct %int1_38704, %int0_38705 : (!torch.int, !torch.int) -> !torch.list<int>
    %41182 = torch.aten.permute %1512, %41181 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38706 = torch.constant.int 1
    %int0_38707 = torch.constant.int 0
    %41183 = torch.prim.ListConstruct %int1_38706, %int0_38707 : (!torch.int, !torch.int) -> !torch.list<int>
    %41184 = torch.aten.permute %1513, %41183 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38708 = torch.constant.int 1
    %int0_38709 = torch.constant.int 0
    %41185 = torch.prim.ListConstruct %int1_38708, %int0_38709 : (!torch.int, !torch.int) -> !torch.list<int>
    %41186 = torch.aten.permute %1514, %41185 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38710 = torch.constant.int 1
    %int0_38711 = torch.constant.int 0
    %41187 = torch.prim.ListConstruct %int1_38710, %int0_38711 : (!torch.int, !torch.int) -> !torch.list<int>
    %41188 = torch.aten.permute %1515, %41187 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38712 = torch.constant.int 1
    %int0_38713 = torch.constant.int 0
    %41189 = torch.prim.ListConstruct %int1_38712, %int0_38713 : (!torch.int, !torch.int) -> !torch.list<int>
    %41190 = torch.aten.permute %1516, %41189 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38714 = torch.constant.int 1
    %int0_38715 = torch.constant.int 0
    %41191 = torch.prim.ListConstruct %int1_38714, %int0_38715 : (!torch.int, !torch.int) -> !torch.list<int>
    %41192 = torch.aten.permute %1517, %41191 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38716 = torch.constant.int 1
    %int0_38717 = torch.constant.int 0
    %41193 = torch.prim.ListConstruct %int1_38716, %int0_38717 : (!torch.int, !torch.int) -> !torch.list<int>
    %41194 = torch.aten.permute %1518, %41193 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38718 = torch.constant.int 1
    %int0_38719 = torch.constant.int 0
    %41195 = torch.prim.ListConstruct %int1_38718, %int0_38719 : (!torch.int, !torch.int) -> !torch.list<int>
    %41196 = torch.aten.permute %1519, %41195 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_38720 = torch.constant.int 1
    %41197 = torch.aten.size.int %41058, %int1_38720 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38721 = torch.constant.int 4
    %41198 = torch.aten.mul.int %int4_38721, %41197 : !torch.int, !torch.int -> !torch.int
    %int1792_38722 = torch.constant.int 1792
    %41199 = torch.prim.ListConstruct %41198, %int1792_38722 : (!torch.int, !torch.int) -> !torch.list<int>
    %41200 = torch.aten.view %41173, %41199 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41200, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41201 = torch.aten.mm %41200, %41182 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41201, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38723 = torch.constant.int 4
    %int4096_38724 = torch.constant.int 4096
    %41202 = torch.prim.ListConstruct %int4_38723, %41197, %int4096_38724 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41203 = torch.aten.view %41201, %41202 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38725 = torch.constant.int 1
    %41204 = torch.aten.size.int %41064, %int1_38725 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38726 = torch.constant.int 4
    %41205 = torch.aten.mul.int %int4_38726, %41204 : !torch.int, !torch.int -> !torch.int
    %int1792_38727 = torch.constant.int 1792
    %41206 = torch.prim.ListConstruct %41205, %int1792_38727 : (!torch.int, !torch.int) -> !torch.list<int>
    %41207 = torch.aten.view %41174, %41206 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41207, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41208 = torch.aten.mm %41207, %41184 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41208, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38728 = torch.constant.int 4
    %int4096_38729 = torch.constant.int 4096
    %41209 = torch.prim.ListConstruct %int4_38728, %41204, %int4096_38729 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41210 = torch.aten.view %41208, %41209 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38730 = torch.constant.int 1
    %41211 = torch.aten.size.int %41070, %int1_38730 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38731 = torch.constant.int 4
    %41212 = torch.aten.mul.int %int4_38731, %41211 : !torch.int, !torch.int -> !torch.int
    %int1792_38732 = torch.constant.int 1792
    %41213 = torch.prim.ListConstruct %41212, %int1792_38732 : (!torch.int, !torch.int) -> !torch.list<int>
    %41214 = torch.aten.view %41175, %41213 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41214, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41215 = torch.aten.mm %41214, %41186 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41215, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38733 = torch.constant.int 4
    %int4096_38734 = torch.constant.int 4096
    %41216 = torch.prim.ListConstruct %int4_38733, %41211, %int4096_38734 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41217 = torch.aten.view %41215, %41216 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38735 = torch.constant.int 1
    %41218 = torch.aten.size.int %41076, %int1_38735 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38736 = torch.constant.int 4
    %41219 = torch.aten.mul.int %int4_38736, %41218 : !torch.int, !torch.int -> !torch.int
    %int1792_38737 = torch.constant.int 1792
    %41220 = torch.prim.ListConstruct %41219, %int1792_38737 : (!torch.int, !torch.int) -> !torch.list<int>
    %41221 = torch.aten.view %41176, %41220 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41221, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41222 = torch.aten.mm %41221, %41188 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41222, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38738 = torch.constant.int 4
    %int4096_38739 = torch.constant.int 4096
    %41223 = torch.prim.ListConstruct %int4_38738, %41218, %int4096_38739 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41224 = torch.aten.view %41222, %41223 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38740 = torch.constant.int 1
    %41225 = torch.aten.size.int %41082, %int1_38740 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38741 = torch.constant.int 4
    %41226 = torch.aten.mul.int %int4_38741, %41225 : !torch.int, !torch.int -> !torch.int
    %int1792_38742 = torch.constant.int 1792
    %41227 = torch.prim.ListConstruct %41226, %int1792_38742 : (!torch.int, !torch.int) -> !torch.list<int>
    %41228 = torch.aten.view %41177, %41227 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41228, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41229 = torch.aten.mm %41228, %41190 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41229, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38743 = torch.constant.int 4
    %int4096_38744 = torch.constant.int 4096
    %41230 = torch.prim.ListConstruct %int4_38743, %41225, %int4096_38744 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41231 = torch.aten.view %41229, %41230 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38745 = torch.constant.int 1
    %41232 = torch.aten.size.int %41088, %int1_38745 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38746 = torch.constant.int 4
    %41233 = torch.aten.mul.int %int4_38746, %41232 : !torch.int, !torch.int -> !torch.int
    %int1792_38747 = torch.constant.int 1792
    %41234 = torch.prim.ListConstruct %41233, %int1792_38747 : (!torch.int, !torch.int) -> !torch.list<int>
    %41235 = torch.aten.view %41178, %41234 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41235, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41236 = torch.aten.mm %41235, %41192 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41236, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38748 = torch.constant.int 4
    %int4096_38749 = torch.constant.int 4096
    %41237 = torch.prim.ListConstruct %int4_38748, %41232, %int4096_38749 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41238 = torch.aten.view %41236, %41237 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38750 = torch.constant.int 1
    %41239 = torch.aten.size.int %41094, %int1_38750 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38751 = torch.constant.int 4
    %41240 = torch.aten.mul.int %int4_38751, %41239 : !torch.int, !torch.int -> !torch.int
    %int1792_38752 = torch.constant.int 1792
    %41241 = torch.prim.ListConstruct %41240, %int1792_38752 : (!torch.int, !torch.int) -> !torch.list<int>
    %41242 = torch.aten.view %41179, %41241 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41242, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41243 = torch.aten.mm %41242, %41194 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41243, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38753 = torch.constant.int 4
    %int4096_38754 = torch.constant.int 4096
    %41244 = torch.prim.ListConstruct %int4_38753, %41239, %int4096_38754 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41245 = torch.aten.view %41243, %41244 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38755 = torch.constant.int 1
    %41246 = torch.aten.size.int %41100, %int1_38755 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_38756 = torch.constant.int 4
    %41247 = torch.aten.mul.int %int4_38756, %41246 : !torch.int, !torch.int -> !torch.int
    %int1792_38757 = torch.constant.int 1792
    %41248 = torch.prim.ListConstruct %41247, %int1792_38757 : (!torch.int, !torch.int) -> !torch.list<int>
    %41249 = torch.aten.view %41180, %41248 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %41249, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %41250 = torch.aten.mm %41249, %41196 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41250, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_38758 = torch.constant.int 4
    %int4096_38759 = torch.constant.int 4096
    %41251 = torch.prim.ListConstruct %int4_38758, %41246, %int4096_38759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41252 = torch.aten.view %41250, %41251 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41253 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38760 = arith.constant 1 : index
    %dim_38761 = tensor.dim %41253, %c1_38760 : tensor<4x?x4096xf16>
    %41254 = flow.tensor.transfer %41253 : tensor<4x?x4096xf16>{%dim_38761} to #hal.device.promise<@__device_0>
    %41255 = torch_c.from_builtin_tensor %41254 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41256 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38762 = arith.constant 1 : index
    %dim_38763 = tensor.dim %41256, %c1_38762 : tensor<4x?x4096xf16>
    %41257 = flow.tensor.transfer %41256 : tensor<4x?x4096xf16>{%dim_38763} to #hal.device.promise<@__device_0>
    %41258 = torch_c.from_builtin_tensor %41257 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41259 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38764 = arith.constant 1 : index
    %dim_38765 = tensor.dim %41259, %c1_38764 : tensor<4x?x4096xf16>
    %41260 = flow.tensor.transfer %41259 : tensor<4x?x4096xf16>{%dim_38765} to #hal.device.promise<@__device_0>
    %41261 = torch_c.from_builtin_tensor %41260 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41262 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38766 = arith.constant 1 : index
    %dim_38767 = tensor.dim %41262, %c1_38766 : tensor<4x?x4096xf16>
    %41263 = flow.tensor.transfer %41262 : tensor<4x?x4096xf16>{%dim_38767} to #hal.device.promise<@__device_0>
    %41264 = torch_c.from_builtin_tensor %41263 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41265 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38768 = arith.constant 1 : index
    %dim_38769 = tensor.dim %41265, %c1_38768 : tensor<4x?x4096xf16>
    %41266 = flow.tensor.transfer %41265 : tensor<4x?x4096xf16>{%dim_38769} to #hal.device.promise<@__device_0>
    %41267 = torch_c.from_builtin_tensor %41266 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41268 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38770 = arith.constant 1 : index
    %dim_38771 = tensor.dim %41268, %c1_38770 : tensor<4x?x4096xf16>
    %41269 = flow.tensor.transfer %41268 : tensor<4x?x4096xf16>{%dim_38771} to #hal.device.promise<@__device_0>
    %41270 = torch_c.from_builtin_tensor %41269 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41271 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38772 = arith.constant 1 : index
    %dim_38773 = tensor.dim %41271, %c1_38772 : tensor<4x?x4096xf16>
    %41272 = flow.tensor.transfer %41271 : tensor<4x?x4096xf16>{%dim_38773} to #hal.device.promise<@__device_0>
    %41273 = torch_c.from_builtin_tensor %41272 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38774 = torch.constant.int 1
    %41274 = torch.aten.add.Tensor %41203, %41255, %int1_38774 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38775 = torch.constant.int 1
    %41275 = torch.aten.add.Tensor %41274, %41258, %int1_38775 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38776 = torch.constant.int 1
    %41276 = torch.aten.add.Tensor %41275, %41261, %int1_38776 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38777 = torch.constant.int 1
    %41277 = torch.aten.add.Tensor %41276, %41264, %int1_38777 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38778 = torch.constant.int 1
    %41278 = torch.aten.add.Tensor %41277, %41267, %int1_38778 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38779 = torch.constant.int 1
    %41279 = torch.aten.add.Tensor %41278, %41270, %int1_38779 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38780 = torch.constant.int 1
    %41280 = torch.aten.add.Tensor %41279, %41273, %int1_38780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41281 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38781 = arith.constant 1 : index
    %dim_38782 = tensor.dim %41281, %c1_38781 : tensor<4x?x4096xf16>
    %41282 = flow.tensor.transfer %41281 : tensor<4x?x4096xf16>{%dim_38782} to #hal.device.promise<@__device_1>
    %41283 = torch_c.from_builtin_tensor %41282 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41284 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38783 = arith.constant 1 : index
    %dim_38784 = tensor.dim %41284, %c1_38783 : tensor<4x?x4096xf16>
    %41285 = flow.tensor.transfer %41284 : tensor<4x?x4096xf16>{%dim_38784} to #hal.device.promise<@__device_1>
    %41286 = torch_c.from_builtin_tensor %41285 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41287 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38785 = arith.constant 1 : index
    %dim_38786 = tensor.dim %41287, %c1_38785 : tensor<4x?x4096xf16>
    %41288 = flow.tensor.transfer %41287 : tensor<4x?x4096xf16>{%dim_38786} to #hal.device.promise<@__device_1>
    %41289 = torch_c.from_builtin_tensor %41288 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41290 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38787 = arith.constant 1 : index
    %dim_38788 = tensor.dim %41290, %c1_38787 : tensor<4x?x4096xf16>
    %41291 = flow.tensor.transfer %41290 : tensor<4x?x4096xf16>{%dim_38788} to #hal.device.promise<@__device_1>
    %41292 = torch_c.from_builtin_tensor %41291 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41293 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38789 = arith.constant 1 : index
    %dim_38790 = tensor.dim %41293, %c1_38789 : tensor<4x?x4096xf16>
    %41294 = flow.tensor.transfer %41293 : tensor<4x?x4096xf16>{%dim_38790} to #hal.device.promise<@__device_1>
    %41295 = torch_c.from_builtin_tensor %41294 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41296 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38791 = arith.constant 1 : index
    %dim_38792 = tensor.dim %41296, %c1_38791 : tensor<4x?x4096xf16>
    %41297 = flow.tensor.transfer %41296 : tensor<4x?x4096xf16>{%dim_38792} to #hal.device.promise<@__device_1>
    %41298 = torch_c.from_builtin_tensor %41297 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41299 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38793 = arith.constant 1 : index
    %dim_38794 = tensor.dim %41299, %c1_38793 : tensor<4x?x4096xf16>
    %41300 = flow.tensor.transfer %41299 : tensor<4x?x4096xf16>{%dim_38794} to #hal.device.promise<@__device_1>
    %41301 = torch_c.from_builtin_tensor %41300 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38795 = torch.constant.int 1
    %41302 = torch.aten.add.Tensor %41283, %41210, %int1_38795 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38796 = torch.constant.int 1
    %41303 = torch.aten.add.Tensor %41302, %41286, %int1_38796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38797 = torch.constant.int 1
    %41304 = torch.aten.add.Tensor %41303, %41289, %int1_38797 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38798 = torch.constant.int 1
    %41305 = torch.aten.add.Tensor %41304, %41292, %int1_38798 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38799 = torch.constant.int 1
    %41306 = torch.aten.add.Tensor %41305, %41295, %int1_38799 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38800 = torch.constant.int 1
    %41307 = torch.aten.add.Tensor %41306, %41298, %int1_38800 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38801 = torch.constant.int 1
    %41308 = torch.aten.add.Tensor %41307, %41301, %int1_38801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41309 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38802 = arith.constant 1 : index
    %dim_38803 = tensor.dim %41309, %c1_38802 : tensor<4x?x4096xf16>
    %41310 = flow.tensor.transfer %41309 : tensor<4x?x4096xf16>{%dim_38803} to #hal.device.promise<@__device_2>
    %41311 = torch_c.from_builtin_tensor %41310 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41312 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38804 = arith.constant 1 : index
    %dim_38805 = tensor.dim %41312, %c1_38804 : tensor<4x?x4096xf16>
    %41313 = flow.tensor.transfer %41312 : tensor<4x?x4096xf16>{%dim_38805} to #hal.device.promise<@__device_2>
    %41314 = torch_c.from_builtin_tensor %41313 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41315 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38806 = arith.constant 1 : index
    %dim_38807 = tensor.dim %41315, %c1_38806 : tensor<4x?x4096xf16>
    %41316 = flow.tensor.transfer %41315 : tensor<4x?x4096xf16>{%dim_38807} to #hal.device.promise<@__device_2>
    %41317 = torch_c.from_builtin_tensor %41316 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41318 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38808 = arith.constant 1 : index
    %dim_38809 = tensor.dim %41318, %c1_38808 : tensor<4x?x4096xf16>
    %41319 = flow.tensor.transfer %41318 : tensor<4x?x4096xf16>{%dim_38809} to #hal.device.promise<@__device_2>
    %41320 = torch_c.from_builtin_tensor %41319 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41321 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38810 = arith.constant 1 : index
    %dim_38811 = tensor.dim %41321, %c1_38810 : tensor<4x?x4096xf16>
    %41322 = flow.tensor.transfer %41321 : tensor<4x?x4096xf16>{%dim_38811} to #hal.device.promise<@__device_2>
    %41323 = torch_c.from_builtin_tensor %41322 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41324 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38812 = arith.constant 1 : index
    %dim_38813 = tensor.dim %41324, %c1_38812 : tensor<4x?x4096xf16>
    %41325 = flow.tensor.transfer %41324 : tensor<4x?x4096xf16>{%dim_38813} to #hal.device.promise<@__device_2>
    %41326 = torch_c.from_builtin_tensor %41325 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41327 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38814 = arith.constant 1 : index
    %dim_38815 = tensor.dim %41327, %c1_38814 : tensor<4x?x4096xf16>
    %41328 = flow.tensor.transfer %41327 : tensor<4x?x4096xf16>{%dim_38815} to #hal.device.promise<@__device_2>
    %41329 = torch_c.from_builtin_tensor %41328 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38816 = torch.constant.int 1
    %41330 = torch.aten.add.Tensor %41311, %41314, %int1_38816 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38817 = torch.constant.int 1
    %41331 = torch.aten.add.Tensor %41330, %41217, %int1_38817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38818 = torch.constant.int 1
    %41332 = torch.aten.add.Tensor %41331, %41317, %int1_38818 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38819 = torch.constant.int 1
    %41333 = torch.aten.add.Tensor %41332, %41320, %int1_38819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38820 = torch.constant.int 1
    %41334 = torch.aten.add.Tensor %41333, %41323, %int1_38820 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38821 = torch.constant.int 1
    %41335 = torch.aten.add.Tensor %41334, %41326, %int1_38821 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38822 = torch.constant.int 1
    %41336 = torch.aten.add.Tensor %41335, %41329, %int1_38822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41337 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38823 = arith.constant 1 : index
    %dim_38824 = tensor.dim %41337, %c1_38823 : tensor<4x?x4096xf16>
    %41338 = flow.tensor.transfer %41337 : tensor<4x?x4096xf16>{%dim_38824} to #hal.device.promise<@__device_3>
    %41339 = torch_c.from_builtin_tensor %41338 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41340 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38825 = arith.constant 1 : index
    %dim_38826 = tensor.dim %41340, %c1_38825 : tensor<4x?x4096xf16>
    %41341 = flow.tensor.transfer %41340 : tensor<4x?x4096xf16>{%dim_38826} to #hal.device.promise<@__device_3>
    %41342 = torch_c.from_builtin_tensor %41341 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41343 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38827 = arith.constant 1 : index
    %dim_38828 = tensor.dim %41343, %c1_38827 : tensor<4x?x4096xf16>
    %41344 = flow.tensor.transfer %41343 : tensor<4x?x4096xf16>{%dim_38828} to #hal.device.promise<@__device_3>
    %41345 = torch_c.from_builtin_tensor %41344 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41346 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38829 = arith.constant 1 : index
    %dim_38830 = tensor.dim %41346, %c1_38829 : tensor<4x?x4096xf16>
    %41347 = flow.tensor.transfer %41346 : tensor<4x?x4096xf16>{%dim_38830} to #hal.device.promise<@__device_3>
    %41348 = torch_c.from_builtin_tensor %41347 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41349 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38831 = arith.constant 1 : index
    %dim_38832 = tensor.dim %41349, %c1_38831 : tensor<4x?x4096xf16>
    %41350 = flow.tensor.transfer %41349 : tensor<4x?x4096xf16>{%dim_38832} to #hal.device.promise<@__device_3>
    %41351 = torch_c.from_builtin_tensor %41350 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41352 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38833 = arith.constant 1 : index
    %dim_38834 = tensor.dim %41352, %c1_38833 : tensor<4x?x4096xf16>
    %41353 = flow.tensor.transfer %41352 : tensor<4x?x4096xf16>{%dim_38834} to #hal.device.promise<@__device_3>
    %41354 = torch_c.from_builtin_tensor %41353 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41355 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38835 = arith.constant 1 : index
    %dim_38836 = tensor.dim %41355, %c1_38835 : tensor<4x?x4096xf16>
    %41356 = flow.tensor.transfer %41355 : tensor<4x?x4096xf16>{%dim_38836} to #hal.device.promise<@__device_3>
    %41357 = torch_c.from_builtin_tensor %41356 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38837 = torch.constant.int 1
    %41358 = torch.aten.add.Tensor %41339, %41342, %int1_38837 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38838 = torch.constant.int 1
    %41359 = torch.aten.add.Tensor %41358, %41345, %int1_38838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38839 = torch.constant.int 1
    %41360 = torch.aten.add.Tensor %41359, %41224, %int1_38839 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38840 = torch.constant.int 1
    %41361 = torch.aten.add.Tensor %41360, %41348, %int1_38840 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38841 = torch.constant.int 1
    %41362 = torch.aten.add.Tensor %41361, %41351, %int1_38841 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38842 = torch.constant.int 1
    %41363 = torch.aten.add.Tensor %41362, %41354, %int1_38842 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38843 = torch.constant.int 1
    %41364 = torch.aten.add.Tensor %41363, %41357, %int1_38843 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41365 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38844 = arith.constant 1 : index
    %dim_38845 = tensor.dim %41365, %c1_38844 : tensor<4x?x4096xf16>
    %41366 = flow.tensor.transfer %41365 : tensor<4x?x4096xf16>{%dim_38845} to #hal.device.promise<@__device_4>
    %41367 = torch_c.from_builtin_tensor %41366 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41368 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38846 = arith.constant 1 : index
    %dim_38847 = tensor.dim %41368, %c1_38846 : tensor<4x?x4096xf16>
    %41369 = flow.tensor.transfer %41368 : tensor<4x?x4096xf16>{%dim_38847} to #hal.device.promise<@__device_4>
    %41370 = torch_c.from_builtin_tensor %41369 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41371 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38848 = arith.constant 1 : index
    %dim_38849 = tensor.dim %41371, %c1_38848 : tensor<4x?x4096xf16>
    %41372 = flow.tensor.transfer %41371 : tensor<4x?x4096xf16>{%dim_38849} to #hal.device.promise<@__device_4>
    %41373 = torch_c.from_builtin_tensor %41372 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41374 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38850 = arith.constant 1 : index
    %dim_38851 = tensor.dim %41374, %c1_38850 : tensor<4x?x4096xf16>
    %41375 = flow.tensor.transfer %41374 : tensor<4x?x4096xf16>{%dim_38851} to #hal.device.promise<@__device_4>
    %41376 = torch_c.from_builtin_tensor %41375 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41377 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38852 = arith.constant 1 : index
    %dim_38853 = tensor.dim %41377, %c1_38852 : tensor<4x?x4096xf16>
    %41378 = flow.tensor.transfer %41377 : tensor<4x?x4096xf16>{%dim_38853} to #hal.device.promise<@__device_4>
    %41379 = torch_c.from_builtin_tensor %41378 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41380 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38854 = arith.constant 1 : index
    %dim_38855 = tensor.dim %41380, %c1_38854 : tensor<4x?x4096xf16>
    %41381 = flow.tensor.transfer %41380 : tensor<4x?x4096xf16>{%dim_38855} to #hal.device.promise<@__device_4>
    %41382 = torch_c.from_builtin_tensor %41381 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41383 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38856 = arith.constant 1 : index
    %dim_38857 = tensor.dim %41383, %c1_38856 : tensor<4x?x4096xf16>
    %41384 = flow.tensor.transfer %41383 : tensor<4x?x4096xf16>{%dim_38857} to #hal.device.promise<@__device_4>
    %41385 = torch_c.from_builtin_tensor %41384 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38858 = torch.constant.int 1
    %41386 = torch.aten.add.Tensor %41367, %41370, %int1_38858 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38859 = torch.constant.int 1
    %41387 = torch.aten.add.Tensor %41386, %41373, %int1_38859 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38860 = torch.constant.int 1
    %41388 = torch.aten.add.Tensor %41387, %41376, %int1_38860 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38861 = torch.constant.int 1
    %41389 = torch.aten.add.Tensor %41388, %41231, %int1_38861 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38862 = torch.constant.int 1
    %41390 = torch.aten.add.Tensor %41389, %41379, %int1_38862 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38863 = torch.constant.int 1
    %41391 = torch.aten.add.Tensor %41390, %41382, %int1_38863 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38864 = torch.constant.int 1
    %41392 = torch.aten.add.Tensor %41391, %41385, %int1_38864 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41393 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38865 = arith.constant 1 : index
    %dim_38866 = tensor.dim %41393, %c1_38865 : tensor<4x?x4096xf16>
    %41394 = flow.tensor.transfer %41393 : tensor<4x?x4096xf16>{%dim_38866} to #hal.device.promise<@__device_5>
    %41395 = torch_c.from_builtin_tensor %41394 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41396 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38867 = arith.constant 1 : index
    %dim_38868 = tensor.dim %41396, %c1_38867 : tensor<4x?x4096xf16>
    %41397 = flow.tensor.transfer %41396 : tensor<4x?x4096xf16>{%dim_38868} to #hal.device.promise<@__device_5>
    %41398 = torch_c.from_builtin_tensor %41397 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41399 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38869 = arith.constant 1 : index
    %dim_38870 = tensor.dim %41399, %c1_38869 : tensor<4x?x4096xf16>
    %41400 = flow.tensor.transfer %41399 : tensor<4x?x4096xf16>{%dim_38870} to #hal.device.promise<@__device_5>
    %41401 = torch_c.from_builtin_tensor %41400 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41402 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38871 = arith.constant 1 : index
    %dim_38872 = tensor.dim %41402, %c1_38871 : tensor<4x?x4096xf16>
    %41403 = flow.tensor.transfer %41402 : tensor<4x?x4096xf16>{%dim_38872} to #hal.device.promise<@__device_5>
    %41404 = torch_c.from_builtin_tensor %41403 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41405 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38873 = arith.constant 1 : index
    %dim_38874 = tensor.dim %41405, %c1_38873 : tensor<4x?x4096xf16>
    %41406 = flow.tensor.transfer %41405 : tensor<4x?x4096xf16>{%dim_38874} to #hal.device.promise<@__device_5>
    %41407 = torch_c.from_builtin_tensor %41406 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41408 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38875 = arith.constant 1 : index
    %dim_38876 = tensor.dim %41408, %c1_38875 : tensor<4x?x4096xf16>
    %41409 = flow.tensor.transfer %41408 : tensor<4x?x4096xf16>{%dim_38876} to #hal.device.promise<@__device_5>
    %41410 = torch_c.from_builtin_tensor %41409 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41411 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38877 = arith.constant 1 : index
    %dim_38878 = tensor.dim %41411, %c1_38877 : tensor<4x?x4096xf16>
    %41412 = flow.tensor.transfer %41411 : tensor<4x?x4096xf16>{%dim_38878} to #hal.device.promise<@__device_5>
    %41413 = torch_c.from_builtin_tensor %41412 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38879 = torch.constant.int 1
    %41414 = torch.aten.add.Tensor %41395, %41398, %int1_38879 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38880 = torch.constant.int 1
    %41415 = torch.aten.add.Tensor %41414, %41401, %int1_38880 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38881 = torch.constant.int 1
    %41416 = torch.aten.add.Tensor %41415, %41404, %int1_38881 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38882 = torch.constant.int 1
    %41417 = torch.aten.add.Tensor %41416, %41407, %int1_38882 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38883 = torch.constant.int 1
    %41418 = torch.aten.add.Tensor %41417, %41238, %int1_38883 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38884 = torch.constant.int 1
    %41419 = torch.aten.add.Tensor %41418, %41410, %int1_38884 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38885 = torch.constant.int 1
    %41420 = torch.aten.add.Tensor %41419, %41413, %int1_38885 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41421 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38886 = arith.constant 1 : index
    %dim_38887 = tensor.dim %41421, %c1_38886 : tensor<4x?x4096xf16>
    %41422 = flow.tensor.transfer %41421 : tensor<4x?x4096xf16>{%dim_38887} to #hal.device.promise<@__device_6>
    %41423 = torch_c.from_builtin_tensor %41422 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41424 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38888 = arith.constant 1 : index
    %dim_38889 = tensor.dim %41424, %c1_38888 : tensor<4x?x4096xf16>
    %41425 = flow.tensor.transfer %41424 : tensor<4x?x4096xf16>{%dim_38889} to #hal.device.promise<@__device_6>
    %41426 = torch_c.from_builtin_tensor %41425 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41427 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38890 = arith.constant 1 : index
    %dim_38891 = tensor.dim %41427, %c1_38890 : tensor<4x?x4096xf16>
    %41428 = flow.tensor.transfer %41427 : tensor<4x?x4096xf16>{%dim_38891} to #hal.device.promise<@__device_6>
    %41429 = torch_c.from_builtin_tensor %41428 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41430 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38892 = arith.constant 1 : index
    %dim_38893 = tensor.dim %41430, %c1_38892 : tensor<4x?x4096xf16>
    %41431 = flow.tensor.transfer %41430 : tensor<4x?x4096xf16>{%dim_38893} to #hal.device.promise<@__device_6>
    %41432 = torch_c.from_builtin_tensor %41431 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41433 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38894 = arith.constant 1 : index
    %dim_38895 = tensor.dim %41433, %c1_38894 : tensor<4x?x4096xf16>
    %41434 = flow.tensor.transfer %41433 : tensor<4x?x4096xf16>{%dim_38895} to #hal.device.promise<@__device_6>
    %41435 = torch_c.from_builtin_tensor %41434 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41436 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38896 = arith.constant 1 : index
    %dim_38897 = tensor.dim %41436, %c1_38896 : tensor<4x?x4096xf16>
    %41437 = flow.tensor.transfer %41436 : tensor<4x?x4096xf16>{%dim_38897} to #hal.device.promise<@__device_6>
    %41438 = torch_c.from_builtin_tensor %41437 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41439 = torch_c.to_builtin_tensor %41252 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38898 = arith.constant 1 : index
    %dim_38899 = tensor.dim %41439, %c1_38898 : tensor<4x?x4096xf16>
    %41440 = flow.tensor.transfer %41439 : tensor<4x?x4096xf16>{%dim_38899} to #hal.device.promise<@__device_6>
    %41441 = torch_c.from_builtin_tensor %41440 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38900 = torch.constant.int 1
    %41442 = torch.aten.add.Tensor %41423, %41426, %int1_38900 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38901 = torch.constant.int 1
    %41443 = torch.aten.add.Tensor %41442, %41429, %int1_38901 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38902 = torch.constant.int 1
    %41444 = torch.aten.add.Tensor %41443, %41432, %int1_38902 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38903 = torch.constant.int 1
    %41445 = torch.aten.add.Tensor %41444, %41435, %int1_38903 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38904 = torch.constant.int 1
    %41446 = torch.aten.add.Tensor %41445, %41438, %int1_38904 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38905 = torch.constant.int 1
    %41447 = torch.aten.add.Tensor %41446, %41245, %int1_38905 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38906 = torch.constant.int 1
    %41448 = torch.aten.add.Tensor %41447, %41441, %int1_38906 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41449 = torch_c.to_builtin_tensor %41203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38907 = arith.constant 1 : index
    %dim_38908 = tensor.dim %41449, %c1_38907 : tensor<4x?x4096xf16>
    %41450 = flow.tensor.transfer %41449 : tensor<4x?x4096xf16>{%dim_38908} to #hal.device.promise<@__device_7>
    %41451 = torch_c.from_builtin_tensor %41450 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41452 = torch_c.to_builtin_tensor %41210 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38909 = arith.constant 1 : index
    %dim_38910 = tensor.dim %41452, %c1_38909 : tensor<4x?x4096xf16>
    %41453 = flow.tensor.transfer %41452 : tensor<4x?x4096xf16>{%dim_38910} to #hal.device.promise<@__device_7>
    %41454 = torch_c.from_builtin_tensor %41453 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41455 = torch_c.to_builtin_tensor %41217 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38911 = arith.constant 1 : index
    %dim_38912 = tensor.dim %41455, %c1_38911 : tensor<4x?x4096xf16>
    %41456 = flow.tensor.transfer %41455 : tensor<4x?x4096xf16>{%dim_38912} to #hal.device.promise<@__device_7>
    %41457 = torch_c.from_builtin_tensor %41456 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41458 = torch_c.to_builtin_tensor %41224 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38913 = arith.constant 1 : index
    %dim_38914 = tensor.dim %41458, %c1_38913 : tensor<4x?x4096xf16>
    %41459 = flow.tensor.transfer %41458 : tensor<4x?x4096xf16>{%dim_38914} to #hal.device.promise<@__device_7>
    %41460 = torch_c.from_builtin_tensor %41459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41461 = torch_c.to_builtin_tensor %41231 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38915 = arith.constant 1 : index
    %dim_38916 = tensor.dim %41461, %c1_38915 : tensor<4x?x4096xf16>
    %41462 = flow.tensor.transfer %41461 : tensor<4x?x4096xf16>{%dim_38916} to #hal.device.promise<@__device_7>
    %41463 = torch_c.from_builtin_tensor %41462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41464 = torch_c.to_builtin_tensor %41238 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38917 = arith.constant 1 : index
    %dim_38918 = tensor.dim %41464, %c1_38917 : tensor<4x?x4096xf16>
    %41465 = flow.tensor.transfer %41464 : tensor<4x?x4096xf16>{%dim_38918} to #hal.device.promise<@__device_7>
    %41466 = torch_c.from_builtin_tensor %41465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %41467 = torch_c.to_builtin_tensor %41245 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_38919 = arith.constant 1 : index
    %dim_38920 = tensor.dim %41467, %c1_38919 : tensor<4x?x4096xf16>
    %41468 = flow.tensor.transfer %41467 : tensor<4x?x4096xf16>{%dim_38920} to #hal.device.promise<@__device_7>
    %41469 = torch_c.from_builtin_tensor %41468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38921 = torch.constant.int 1
    %41470 = torch.aten.add.Tensor %41451, %41454, %int1_38921 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38922 = torch.constant.int 1
    %41471 = torch.aten.add.Tensor %41470, %41457, %int1_38922 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38923 = torch.constant.int 1
    %41472 = torch.aten.add.Tensor %41471, %41460, %int1_38923 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38924 = torch.constant.int 1
    %41473 = torch.aten.add.Tensor %41472, %41463, %int1_38924 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38925 = torch.constant.int 1
    %41474 = torch.aten.add.Tensor %41473, %41466, %int1_38925 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38926 = torch.constant.int 1
    %41475 = torch.aten.add.Tensor %41474, %41469, %int1_38926 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38927 = torch.constant.int 1
    %41476 = torch.aten.add.Tensor %41475, %41252, %int1_38927 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38928 = torch.constant.int 1
    %41477 = torch.aten.add.Tensor %40957, %41280, %int1_38928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38929 = torch.constant.int 1
    %41478 = torch.aten.add.Tensor %40958, %41308, %int1_38929 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38930 = torch.constant.int 1
    %41479 = torch.aten.add.Tensor %40959, %41336, %int1_38930 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38931 = torch.constant.int 1
    %41480 = torch.aten.add.Tensor %40960, %41364, %int1_38931 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38932 = torch.constant.int 1
    %41481 = torch.aten.add.Tensor %40961, %41392, %int1_38932 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38933 = torch.constant.int 1
    %41482 = torch.aten.add.Tensor %40962, %41420, %int1_38933 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38934 = torch.constant.int 1
    %41483 = torch.aten.add.Tensor %40963, %41448, %int1_38934 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_38935 = torch.constant.int 1
    %41484 = torch.aten.add.Tensor %40964, %41476, %int1_38935 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_38936 = torch.constant.int 6
    %41485 = torch.prims.convert_element_type %41477, %int6_38936 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38937 = torch.constant.int 6
    %41486 = torch.prims.convert_element_type %41478, %int6_38937 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38938 = torch.constant.int 6
    %41487 = torch.prims.convert_element_type %41479, %int6_38938 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38939 = torch.constant.int 6
    %41488 = torch.prims.convert_element_type %41480, %int6_38939 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38940 = torch.constant.int 6
    %41489 = torch.prims.convert_element_type %41481, %int6_38940 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38941 = torch.constant.int 6
    %41490 = torch.prims.convert_element_type %41482, %int6_38941 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38942 = torch.constant.int 6
    %41491 = torch.prims.convert_element_type %41483, %int6_38942 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_38943 = torch.constant.int 6
    %41492 = torch.prims.convert_element_type %41484, %int6_38943 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38944 = torch.constant.int 2
    %41493 = torch.aten.pow.Tensor_Scalar %41485, %int2_38944 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38945 = torch.constant.int 2
    %41494 = torch.aten.pow.Tensor_Scalar %41486, %int2_38945 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38946 = torch.constant.int 2
    %41495 = torch.aten.pow.Tensor_Scalar %41487, %int2_38946 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38947 = torch.constant.int 2
    %41496 = torch.aten.pow.Tensor_Scalar %41488, %int2_38947 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38948 = torch.constant.int 2
    %41497 = torch.aten.pow.Tensor_Scalar %41489, %int2_38948 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38949 = torch.constant.int 2
    %41498 = torch.aten.pow.Tensor_Scalar %41490, %int2_38949 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38950 = torch.constant.int 2
    %41499 = torch.aten.pow.Tensor_Scalar %41491, %int2_38950 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_38951 = torch.constant.int 2
    %41500 = torch.aten.pow.Tensor_Scalar %41492, %int2_38951 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_38952 = torch.constant.int -1
    %41501 = torch.prim.ListConstruct %int-1_38952 : (!torch.int) -> !torch.list<int>
    %true_38953 = torch.constant.bool true
    %none_38954 = torch.constant.none
    %41502 = torch.aten.mean.dim %41493, %41501, %true_38953, %none_38954 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38955 = torch.constant.int -1
    %41503 = torch.prim.ListConstruct %int-1_38955 : (!torch.int) -> !torch.list<int>
    %true_38956 = torch.constant.bool true
    %none_38957 = torch.constant.none
    %41504 = torch.aten.mean.dim %41494, %41503, %true_38956, %none_38957 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38958 = torch.constant.int -1
    %41505 = torch.prim.ListConstruct %int-1_38958 : (!torch.int) -> !torch.list<int>
    %true_38959 = torch.constant.bool true
    %none_38960 = torch.constant.none
    %41506 = torch.aten.mean.dim %41495, %41505, %true_38959, %none_38960 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38961 = torch.constant.int -1
    %41507 = torch.prim.ListConstruct %int-1_38961 : (!torch.int) -> !torch.list<int>
    %true_38962 = torch.constant.bool true
    %none_38963 = torch.constant.none
    %41508 = torch.aten.mean.dim %41496, %41507, %true_38962, %none_38963 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38964 = torch.constant.int -1
    %41509 = torch.prim.ListConstruct %int-1_38964 : (!torch.int) -> !torch.list<int>
    %true_38965 = torch.constant.bool true
    %none_38966 = torch.constant.none
    %41510 = torch.aten.mean.dim %41497, %41509, %true_38965, %none_38966 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38967 = torch.constant.int -1
    %41511 = torch.prim.ListConstruct %int-1_38967 : (!torch.int) -> !torch.list<int>
    %true_38968 = torch.constant.bool true
    %none_38969 = torch.constant.none
    %41512 = torch.aten.mean.dim %41498, %41511, %true_38968, %none_38969 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38970 = torch.constant.int -1
    %41513 = torch.prim.ListConstruct %int-1_38970 : (!torch.int) -> !torch.list<int>
    %true_38971 = torch.constant.bool true
    %none_38972 = torch.constant.none
    %41514 = torch.aten.mean.dim %41499, %41513, %true_38971, %none_38972 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_38973 = torch.constant.int -1
    %41515 = torch.prim.ListConstruct %int-1_38973 : (!torch.int) -> !torch.list<int>
    %true_38974 = torch.constant.bool true
    %none_38975 = torch.constant.none
    %41516 = torch.aten.mean.dim %41500, %41515, %true_38974, %none_38975 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38976 = torch.constant.float 9.9999997473787516E-6
    %int1_38977 = torch.constant.int 1
    %41517 = torch.aten.add.Scalar %41502, %float9.999990e-06_38976, %int1_38977 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38978 = torch.constant.float 9.9999997473787516E-6
    %int1_38979 = torch.constant.int 1
    %41518 = torch.aten.add.Scalar %41504, %float9.999990e-06_38978, %int1_38979 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38980 = torch.constant.float 9.9999997473787516E-6
    %int1_38981 = torch.constant.int 1
    %41519 = torch.aten.add.Scalar %41506, %float9.999990e-06_38980, %int1_38981 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38982 = torch.constant.float 9.9999997473787516E-6
    %int1_38983 = torch.constant.int 1
    %41520 = torch.aten.add.Scalar %41508, %float9.999990e-06_38982, %int1_38983 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38984 = torch.constant.float 9.9999997473787516E-6
    %int1_38985 = torch.constant.int 1
    %41521 = torch.aten.add.Scalar %41510, %float9.999990e-06_38984, %int1_38985 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38986 = torch.constant.float 9.9999997473787516E-6
    %int1_38987 = torch.constant.int 1
    %41522 = torch.aten.add.Scalar %41512, %float9.999990e-06_38986, %int1_38987 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38988 = torch.constant.float 9.9999997473787516E-6
    %int1_38989 = torch.constant.int 1
    %41523 = torch.aten.add.Scalar %41514, %float9.999990e-06_38988, %int1_38989 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_38990 = torch.constant.float 9.9999997473787516E-6
    %int1_38991 = torch.constant.int 1
    %41524 = torch.aten.add.Scalar %41516, %float9.999990e-06_38990, %int1_38991 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41525 = torch.aten.rsqrt %41517 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41526 = torch.aten.rsqrt %41518 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41527 = torch.aten.rsqrt %41519 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41528 = torch.aten.rsqrt %41520 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41529 = torch.aten.rsqrt %41521 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41530 = torch.aten.rsqrt %41522 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41531 = torch.aten.rsqrt %41523 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41532 = torch.aten.rsqrt %41524 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %41532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %41533 = torch.aten.mul.Tensor %41485, %41525 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41534 = torch.aten.mul.Tensor %41486, %41526 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41535 = torch.aten.mul.Tensor %41487, %41527 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41536 = torch.aten.mul.Tensor %41488, %41528 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41537 = torch.aten.mul.Tensor %41489, %41529 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41538 = torch.aten.mul.Tensor %41490, %41530 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41539 = torch.aten.mul.Tensor %41491, %41531 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41540 = torch.aten.mul.Tensor %41492, %41532 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41541 = torch.aten.mul.Tensor %1520, %41533 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41542 = torch.aten.mul.Tensor %1521, %41534 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41543 = torch.aten.mul.Tensor %1522, %41535 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41544 = torch.aten.mul.Tensor %1523, %41536 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41545 = torch.aten.mul.Tensor %1524, %41537 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41546 = torch.aten.mul.Tensor %1525, %41538 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41547 = torch.aten.mul.Tensor %1526, %41539 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %41548 = torch.aten.mul.Tensor %1527, %41540 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %41548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_38992 = torch.constant.int 5
    %41549 = torch.prims.convert_element_type %41541, %int5_38992 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38993 = torch.constant.int 5
    %41550 = torch.prims.convert_element_type %41542, %int5_38993 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38994 = torch.constant.int 5
    %41551 = torch.prims.convert_element_type %41543, %int5_38994 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38995 = torch.constant.int 5
    %41552 = torch.prims.convert_element_type %41544, %int5_38995 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38996 = torch.constant.int 5
    %41553 = torch.prims.convert_element_type %41545, %int5_38996 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38997 = torch.constant.int 5
    %41554 = torch.prims.convert_element_type %41546, %int5_38997 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38998 = torch.constant.int 5
    %41555 = torch.prims.convert_element_type %41547, %int5_38998 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_38999 = torch.constant.int 5
    %41556 = torch.prims.convert_element_type %41548, %int5_38999 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %41556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_39000 = torch.constant.int 1
    %int0_39001 = torch.constant.int 0
    %41557 = torch.prim.ListConstruct %int1_39000, %int0_39001 : (!torch.int, !torch.int) -> !torch.list<int>
    %41558 = torch.aten.permute %1528, %41557 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39002 = torch.constant.int 1
    %int0_39003 = torch.constant.int 0
    %41559 = torch.prim.ListConstruct %int1_39002, %int0_39003 : (!torch.int, !torch.int) -> !torch.list<int>
    %41560 = torch.aten.permute %1529, %41559 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39004 = torch.constant.int 1
    %int0_39005 = torch.constant.int 0
    %41561 = torch.prim.ListConstruct %int1_39004, %int0_39005 : (!torch.int, !torch.int) -> !torch.list<int>
    %41562 = torch.aten.permute %1530, %41561 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39006 = torch.constant.int 1
    %int0_39007 = torch.constant.int 0
    %41563 = torch.prim.ListConstruct %int1_39006, %int0_39007 : (!torch.int, !torch.int) -> !torch.list<int>
    %41564 = torch.aten.permute %1531, %41563 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39008 = torch.constant.int 1
    %int0_39009 = torch.constant.int 0
    %41565 = torch.prim.ListConstruct %int1_39008, %int0_39009 : (!torch.int, !torch.int) -> !torch.list<int>
    %41566 = torch.aten.permute %1532, %41565 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39010 = torch.constant.int 1
    %int0_39011 = torch.constant.int 0
    %41567 = torch.prim.ListConstruct %int1_39010, %int0_39011 : (!torch.int, !torch.int) -> !torch.list<int>
    %41568 = torch.aten.permute %1533, %41567 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39012 = torch.constant.int 1
    %int0_39013 = torch.constant.int 0
    %41569 = torch.prim.ListConstruct %int1_39012, %int0_39013 : (!torch.int, !torch.int) -> !torch.list<int>
    %41570 = torch.aten.permute %1534, %41569 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_39014 = torch.constant.int 1
    %int0_39015 = torch.constant.int 0
    %41571 = torch.prim.ListConstruct %int1_39014, %int0_39015 : (!torch.int, !torch.int) -> !torch.list<int>
    %41572 = torch.aten.permute %1535, %41571 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_39016 = torch.constant.int 4
    %41573 = torch.aten.mul.int %int4_39016, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39017 = torch.constant.int 4096
    %41574 = torch.prim.ListConstruct %41573, %int4096_39017 : (!torch.int, !torch.int) -> !torch.list<int>
    %41575 = torch.aten.view %41549, %41574 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41575, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41576 = torch.aten.mm %41575, %41558 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41576, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39018 = torch.constant.int 4
    %int512_39019 = torch.constant.int 512
    %41577 = torch.prim.ListConstruct %int4_39018, %2482, %int512_39019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41578 = torch.aten.view %41576, %41577 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39020 = torch.constant.int 4
    %41579 = torch.aten.mul.int %int4_39020, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39021 = torch.constant.int 4096
    %41580 = torch.prim.ListConstruct %41579, %int4096_39021 : (!torch.int, !torch.int) -> !torch.list<int>
    %41581 = torch.aten.view %41550, %41580 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41581, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41582 = torch.aten.mm %41581, %41560 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41582, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39022 = torch.constant.int 4
    %int512_39023 = torch.constant.int 512
    %41583 = torch.prim.ListConstruct %int4_39022, %2482, %int512_39023 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41584 = torch.aten.view %41582, %41583 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39024 = torch.constant.int 4
    %41585 = torch.aten.mul.int %int4_39024, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39025 = torch.constant.int 4096
    %41586 = torch.prim.ListConstruct %41585, %int4096_39025 : (!torch.int, !torch.int) -> !torch.list<int>
    %41587 = torch.aten.view %41551, %41586 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41587, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41588 = torch.aten.mm %41587, %41562 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41588, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39026 = torch.constant.int 4
    %int512_39027 = torch.constant.int 512
    %41589 = torch.prim.ListConstruct %int4_39026, %2482, %int512_39027 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41590 = torch.aten.view %41588, %41589 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39028 = torch.constant.int 4
    %41591 = torch.aten.mul.int %int4_39028, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39029 = torch.constant.int 4096
    %41592 = torch.prim.ListConstruct %41591, %int4096_39029 : (!torch.int, !torch.int) -> !torch.list<int>
    %41593 = torch.aten.view %41552, %41592 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41593, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41594 = torch.aten.mm %41593, %41564 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41594, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39030 = torch.constant.int 4
    %int512_39031 = torch.constant.int 512
    %41595 = torch.prim.ListConstruct %int4_39030, %2482, %int512_39031 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41596 = torch.aten.view %41594, %41595 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39032 = torch.constant.int 4
    %41597 = torch.aten.mul.int %int4_39032, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39033 = torch.constant.int 4096
    %41598 = torch.prim.ListConstruct %41597, %int4096_39033 : (!torch.int, !torch.int) -> !torch.list<int>
    %41599 = torch.aten.view %41553, %41598 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41599, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41600 = torch.aten.mm %41599, %41566 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41600, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39034 = torch.constant.int 4
    %int512_39035 = torch.constant.int 512
    %41601 = torch.prim.ListConstruct %int4_39034, %2482, %int512_39035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41602 = torch.aten.view %41600, %41601 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39036 = torch.constant.int 4
    %41603 = torch.aten.mul.int %int4_39036, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39037 = torch.constant.int 4096
    %41604 = torch.prim.ListConstruct %41603, %int4096_39037 : (!torch.int, !torch.int) -> !torch.list<int>
    %41605 = torch.aten.view %41554, %41604 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41605, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41606 = torch.aten.mm %41605, %41568 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41606, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39038 = torch.constant.int 4
    %int512_39039 = torch.constant.int 512
    %41607 = torch.prim.ListConstruct %int4_39038, %2482, %int512_39039 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41608 = torch.aten.view %41606, %41607 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39040 = torch.constant.int 4
    %41609 = torch.aten.mul.int %int4_39040, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39041 = torch.constant.int 4096
    %41610 = torch.prim.ListConstruct %41609, %int4096_39041 : (!torch.int, !torch.int) -> !torch.list<int>
    %41611 = torch.aten.view %41555, %41610 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41611, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41612 = torch.aten.mm %41611, %41570 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41612, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39042 = torch.constant.int 4
    %int512_39043 = torch.constant.int 512
    %41613 = torch.prim.ListConstruct %int4_39042, %2482, %int512_39043 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41614 = torch.aten.view %41612, %41613 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_39044 = torch.constant.int 4
    %41615 = torch.aten.mul.int %int4_39044, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39045 = torch.constant.int 4096
    %41616 = torch.prim.ListConstruct %41615, %int4096_39045 : (!torch.int, !torch.int) -> !torch.list<int>
    %41617 = torch.aten.view %41556, %41616 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41617, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41618 = torch.aten.mm %41617, %41572 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %41618, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_39046 = torch.constant.int 4
    %int512_39047 = torch.constant.int 512
    %41619 = torch.prim.ListConstruct %int4_39046, %2482, %int512_39047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41620 = torch.aten.view %41618, %41619 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %41620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_39048 = torch.constant.int 1
    %int0_39049 = torch.constant.int 0
    %41621 = torch.prim.ListConstruct %int1_39048, %int0_39049 : (!torch.int, !torch.int) -> !torch.list<int>
    %41622 = torch.aten.permute %1536, %41621 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39050 = torch.constant.int 1
    %int0_39051 = torch.constant.int 0
    %41623 = torch.prim.ListConstruct %int1_39050, %int0_39051 : (!torch.int, !torch.int) -> !torch.list<int>
    %41624 = torch.aten.permute %1537, %41623 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39052 = torch.constant.int 1
    %int0_39053 = torch.constant.int 0
    %41625 = torch.prim.ListConstruct %int1_39052, %int0_39053 : (!torch.int, !torch.int) -> !torch.list<int>
    %41626 = torch.aten.permute %1538, %41625 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39054 = torch.constant.int 1
    %int0_39055 = torch.constant.int 0
    %41627 = torch.prim.ListConstruct %int1_39054, %int0_39055 : (!torch.int, !torch.int) -> !torch.list<int>
    %41628 = torch.aten.permute %1539, %41627 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39056 = torch.constant.int 1
    %int0_39057 = torch.constant.int 0
    %41629 = torch.prim.ListConstruct %int1_39056, %int0_39057 : (!torch.int, !torch.int) -> !torch.list<int>
    %41630 = torch.aten.permute %1540, %41629 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39058 = torch.constant.int 1
    %int0_39059 = torch.constant.int 0
    %41631 = torch.prim.ListConstruct %int1_39058, %int0_39059 : (!torch.int, !torch.int) -> !torch.list<int>
    %41632 = torch.aten.permute %1541, %41631 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39060 = torch.constant.int 1
    %int0_39061 = torch.constant.int 0
    %41633 = torch.prim.ListConstruct %int1_39060, %int0_39061 : (!torch.int, !torch.int) -> !torch.list<int>
    %41634 = torch.aten.permute %1542, %41633 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39062 = torch.constant.int 1
    %int0_39063 = torch.constant.int 0
    %41635 = torch.prim.ListConstruct %int1_39062, %int0_39063 : (!torch.int, !torch.int) -> !torch.list<int>
    %41636 = torch.aten.permute %1543, %41635 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_39064 = torch.constant.int 4
    %41637 = torch.aten.mul.int %int4_39064, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39065 = torch.constant.int 4096
    %41638 = torch.prim.ListConstruct %41637, %int4096_39065 : (!torch.int, !torch.int) -> !torch.list<int>
    %41639 = torch.aten.view %41549, %41638 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41639, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41640 = torch.aten.mm %41639, %41622 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41640, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39066 = torch.constant.int 4
    %int128_39067 = torch.constant.int 128
    %41641 = torch.prim.ListConstruct %int4_39066, %2482, %int128_39067 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41642 = torch.aten.view %41640, %41641 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39068 = torch.constant.int 4
    %41643 = torch.aten.mul.int %int4_39068, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39069 = torch.constant.int 4096
    %41644 = torch.prim.ListConstruct %41643, %int4096_39069 : (!torch.int, !torch.int) -> !torch.list<int>
    %41645 = torch.aten.view %41550, %41644 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41645, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41646 = torch.aten.mm %41645, %41624 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41646, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39070 = torch.constant.int 4
    %int128_39071 = torch.constant.int 128
    %41647 = torch.prim.ListConstruct %int4_39070, %2482, %int128_39071 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41648 = torch.aten.view %41646, %41647 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39072 = torch.constant.int 4
    %41649 = torch.aten.mul.int %int4_39072, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39073 = torch.constant.int 4096
    %41650 = torch.prim.ListConstruct %41649, %int4096_39073 : (!torch.int, !torch.int) -> !torch.list<int>
    %41651 = torch.aten.view %41551, %41650 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41651, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41652 = torch.aten.mm %41651, %41626 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41652, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39074 = torch.constant.int 4
    %int128_39075 = torch.constant.int 128
    %41653 = torch.prim.ListConstruct %int4_39074, %2482, %int128_39075 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41654 = torch.aten.view %41652, %41653 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39076 = torch.constant.int 4
    %41655 = torch.aten.mul.int %int4_39076, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39077 = torch.constant.int 4096
    %41656 = torch.prim.ListConstruct %41655, %int4096_39077 : (!torch.int, !torch.int) -> !torch.list<int>
    %41657 = torch.aten.view %41552, %41656 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41657, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41658 = torch.aten.mm %41657, %41628 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41658, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39078 = torch.constant.int 4
    %int128_39079 = torch.constant.int 128
    %41659 = torch.prim.ListConstruct %int4_39078, %2482, %int128_39079 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41660 = torch.aten.view %41658, %41659 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39080 = torch.constant.int 4
    %41661 = torch.aten.mul.int %int4_39080, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39081 = torch.constant.int 4096
    %41662 = torch.prim.ListConstruct %41661, %int4096_39081 : (!torch.int, !torch.int) -> !torch.list<int>
    %41663 = torch.aten.view %41553, %41662 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41663, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41664 = torch.aten.mm %41663, %41630 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41664, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39082 = torch.constant.int 4
    %int128_39083 = torch.constant.int 128
    %41665 = torch.prim.ListConstruct %int4_39082, %2482, %int128_39083 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41666 = torch.aten.view %41664, %41665 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39084 = torch.constant.int 4
    %41667 = torch.aten.mul.int %int4_39084, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39085 = torch.constant.int 4096
    %41668 = torch.prim.ListConstruct %41667, %int4096_39085 : (!torch.int, !torch.int) -> !torch.list<int>
    %41669 = torch.aten.view %41554, %41668 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41669, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41670 = torch.aten.mm %41669, %41632 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41670, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39086 = torch.constant.int 4
    %int128_39087 = torch.constant.int 128
    %41671 = torch.prim.ListConstruct %int4_39086, %2482, %int128_39087 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41672 = torch.aten.view %41670, %41671 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39088 = torch.constant.int 4
    %41673 = torch.aten.mul.int %int4_39088, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39089 = torch.constant.int 4096
    %41674 = torch.prim.ListConstruct %41673, %int4096_39089 : (!torch.int, !torch.int) -> !torch.list<int>
    %41675 = torch.aten.view %41555, %41674 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41675, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41676 = torch.aten.mm %41675, %41634 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41676, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39090 = torch.constant.int 4
    %int128_39091 = torch.constant.int 128
    %41677 = torch.prim.ListConstruct %int4_39090, %2482, %int128_39091 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41678 = torch.aten.view %41676, %41677 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39092 = torch.constant.int 4
    %41679 = torch.aten.mul.int %int4_39092, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39093 = torch.constant.int 4096
    %41680 = torch.prim.ListConstruct %41679, %int4096_39093 : (!torch.int, !torch.int) -> !torch.list<int>
    %41681 = torch.aten.view %41556, %41680 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41681, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41682 = torch.aten.mm %41681, %41636 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41682, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39094 = torch.constant.int 4
    %int128_39095 = torch.constant.int 128
    %41683 = torch.prim.ListConstruct %int4_39094, %2482, %int128_39095 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41684 = torch.aten.view %41682, %41683 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_39096 = torch.constant.int 1
    %int0_39097 = torch.constant.int 0
    %41685 = torch.prim.ListConstruct %int1_39096, %int0_39097 : (!torch.int, !torch.int) -> !torch.list<int>
    %41686 = torch.aten.permute %1544, %41685 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39098 = torch.constant.int 1
    %int0_39099 = torch.constant.int 0
    %41687 = torch.prim.ListConstruct %int1_39098, %int0_39099 : (!torch.int, !torch.int) -> !torch.list<int>
    %41688 = torch.aten.permute %1545, %41687 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39100 = torch.constant.int 1
    %int0_39101 = torch.constant.int 0
    %41689 = torch.prim.ListConstruct %int1_39100, %int0_39101 : (!torch.int, !torch.int) -> !torch.list<int>
    %41690 = torch.aten.permute %1546, %41689 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39102 = torch.constant.int 1
    %int0_39103 = torch.constant.int 0
    %41691 = torch.prim.ListConstruct %int1_39102, %int0_39103 : (!torch.int, !torch.int) -> !torch.list<int>
    %41692 = torch.aten.permute %1547, %41691 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39104 = torch.constant.int 1
    %int0_39105 = torch.constant.int 0
    %41693 = torch.prim.ListConstruct %int1_39104, %int0_39105 : (!torch.int, !torch.int) -> !torch.list<int>
    %41694 = torch.aten.permute %1548, %41693 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39106 = torch.constant.int 1
    %int0_39107 = torch.constant.int 0
    %41695 = torch.prim.ListConstruct %int1_39106, %int0_39107 : (!torch.int, !torch.int) -> !torch.list<int>
    %41696 = torch.aten.permute %1549, %41695 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39108 = torch.constant.int 1
    %int0_39109 = torch.constant.int 0
    %41697 = torch.prim.ListConstruct %int1_39108, %int0_39109 : (!torch.int, !torch.int) -> !torch.list<int>
    %41698 = torch.aten.permute %1550, %41697 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_39110 = torch.constant.int 1
    %int0_39111 = torch.constant.int 0
    %41699 = torch.prim.ListConstruct %int1_39110, %int0_39111 : (!torch.int, !torch.int) -> !torch.list<int>
    %41700 = torch.aten.permute %1551, %41699 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_39112 = torch.constant.int 4
    %41701 = torch.aten.mul.int %int4_39112, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39113 = torch.constant.int 4096
    %41702 = torch.prim.ListConstruct %41701, %int4096_39113 : (!torch.int, !torch.int) -> !torch.list<int>
    %41703 = torch.aten.view %41549, %41702 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41703, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41704 = torch.aten.mm %41703, %41686 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41704, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39114 = torch.constant.int 4
    %int128_39115 = torch.constant.int 128
    %41705 = torch.prim.ListConstruct %int4_39114, %2482, %int128_39115 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41706 = torch.aten.view %41704, %41705 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39116 = torch.constant.int 4
    %41707 = torch.aten.mul.int %int4_39116, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39117 = torch.constant.int 4096
    %41708 = torch.prim.ListConstruct %41707, %int4096_39117 : (!torch.int, !torch.int) -> !torch.list<int>
    %41709 = torch.aten.view %41550, %41708 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41709, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41710 = torch.aten.mm %41709, %41688 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41710, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39118 = torch.constant.int 4
    %int128_39119 = torch.constant.int 128
    %41711 = torch.prim.ListConstruct %int4_39118, %2482, %int128_39119 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41712 = torch.aten.view %41710, %41711 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39120 = torch.constant.int 4
    %41713 = torch.aten.mul.int %int4_39120, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39121 = torch.constant.int 4096
    %41714 = torch.prim.ListConstruct %41713, %int4096_39121 : (!torch.int, !torch.int) -> !torch.list<int>
    %41715 = torch.aten.view %41551, %41714 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41715, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41716 = torch.aten.mm %41715, %41690 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41716, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39122 = torch.constant.int 4
    %int128_39123 = torch.constant.int 128
    %41717 = torch.prim.ListConstruct %int4_39122, %2482, %int128_39123 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41718 = torch.aten.view %41716, %41717 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39124 = torch.constant.int 4
    %41719 = torch.aten.mul.int %int4_39124, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39125 = torch.constant.int 4096
    %41720 = torch.prim.ListConstruct %41719, %int4096_39125 : (!torch.int, !torch.int) -> !torch.list<int>
    %41721 = torch.aten.view %41552, %41720 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41721, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41722 = torch.aten.mm %41721, %41692 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41722, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39126 = torch.constant.int 4
    %int128_39127 = torch.constant.int 128
    %41723 = torch.prim.ListConstruct %int4_39126, %2482, %int128_39127 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41724 = torch.aten.view %41722, %41723 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39128 = torch.constant.int 4
    %41725 = torch.aten.mul.int %int4_39128, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39129 = torch.constant.int 4096
    %41726 = torch.prim.ListConstruct %41725, %int4096_39129 : (!torch.int, !torch.int) -> !torch.list<int>
    %41727 = torch.aten.view %41553, %41726 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41727, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41728 = torch.aten.mm %41727, %41694 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41728, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39130 = torch.constant.int 4
    %int128_39131 = torch.constant.int 128
    %41729 = torch.prim.ListConstruct %int4_39130, %2482, %int128_39131 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41730 = torch.aten.view %41728, %41729 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39132 = torch.constant.int 4
    %41731 = torch.aten.mul.int %int4_39132, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39133 = torch.constant.int 4096
    %41732 = torch.prim.ListConstruct %41731, %int4096_39133 : (!torch.int, !torch.int) -> !torch.list<int>
    %41733 = torch.aten.view %41554, %41732 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41733, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41734 = torch.aten.mm %41733, %41696 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41734, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39134 = torch.constant.int 4
    %int128_39135 = torch.constant.int 128
    %41735 = torch.prim.ListConstruct %int4_39134, %2482, %int128_39135 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41736 = torch.aten.view %41734, %41735 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39136 = torch.constant.int 4
    %41737 = torch.aten.mul.int %int4_39136, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39137 = torch.constant.int 4096
    %41738 = torch.prim.ListConstruct %41737, %int4096_39137 : (!torch.int, !torch.int) -> !torch.list<int>
    %41739 = torch.aten.view %41555, %41738 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41739, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41740 = torch.aten.mm %41739, %41698 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41740, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39138 = torch.constant.int 4
    %int128_39139 = torch.constant.int 128
    %41741 = torch.prim.ListConstruct %int4_39138, %2482, %int128_39139 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41742 = torch.aten.view %41740, %41741 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39140 = torch.constant.int 4
    %41743 = torch.aten.mul.int %int4_39140, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_39141 = torch.constant.int 4096
    %41744 = torch.prim.ListConstruct %41743, %int4096_39141 : (!torch.int, !torch.int) -> !torch.list<int>
    %41745 = torch.aten.view %41556, %41744 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %41745, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %41746 = torch.aten.mm %41745, %41700 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %41746, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_39142 = torch.constant.int 4
    %int128_39143 = torch.constant.int 128
    %41747 = torch.prim.ListConstruct %int4_39142, %2482, %int128_39143 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41748 = torch.aten.view %41746, %41747 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %41748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_39144 = torch.constant.int 4
    %int4_39145 = torch.constant.int 4
    %int128_39146 = torch.constant.int 128
    %41749 = torch.prim.ListConstruct %int4_39144, %2482, %int4_39145, %int128_39146 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41750 = torch.aten.view %41578, %41749 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39147 = torch.constant.int 4
    %int4_39148 = torch.constant.int 4
    %int128_39149 = torch.constant.int 128
    %41751 = torch.prim.ListConstruct %int4_39147, %2482, %int4_39148, %int128_39149 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41752 = torch.aten.view %41584, %41751 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39150 = torch.constant.int 4
    %int4_39151 = torch.constant.int 4
    %int128_39152 = torch.constant.int 128
    %41753 = torch.prim.ListConstruct %int4_39150, %2482, %int4_39151, %int128_39152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41754 = torch.aten.view %41590, %41753 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39153 = torch.constant.int 4
    %int4_39154 = torch.constant.int 4
    %int128_39155 = torch.constant.int 128
    %41755 = torch.prim.ListConstruct %int4_39153, %2482, %int4_39154, %int128_39155 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41756 = torch.aten.view %41596, %41755 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39156 = torch.constant.int 4
    %int4_39157 = torch.constant.int 4
    %int128_39158 = torch.constant.int 128
    %41757 = torch.prim.ListConstruct %int4_39156, %2482, %int4_39157, %int128_39158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41758 = torch.aten.view %41602, %41757 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39159 = torch.constant.int 4
    %int4_39160 = torch.constant.int 4
    %int128_39161 = torch.constant.int 128
    %41759 = torch.prim.ListConstruct %int4_39159, %2482, %int4_39160, %int128_39161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41760 = torch.aten.view %41608, %41759 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39162 = torch.constant.int 4
    %int4_39163 = torch.constant.int 4
    %int128_39164 = torch.constant.int 128
    %41761 = torch.prim.ListConstruct %int4_39162, %2482, %int4_39163, %int128_39164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41762 = torch.aten.view %41614, %41761 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39165 = torch.constant.int 4
    %int4_39166 = torch.constant.int 4
    %int128_39167 = torch.constant.int 128
    %41763 = torch.prim.ListConstruct %int4_39165, %2482, %int4_39166, %int128_39167 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41764 = torch.aten.view %41620, %41763 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39168 = torch.constant.int 4
    %int1_39169 = torch.constant.int 1
    %int128_39170 = torch.constant.int 128
    %41765 = torch.prim.ListConstruct %int4_39168, %2482, %int1_39169, %int128_39170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41766 = torch.aten.view %41642, %41765 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39171 = torch.constant.int 4
    %int1_39172 = torch.constant.int 1
    %int128_39173 = torch.constant.int 128
    %41767 = torch.prim.ListConstruct %int4_39171, %2482, %int1_39172, %int128_39173 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41768 = torch.aten.view %41648, %41767 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39174 = torch.constant.int 4
    %int1_39175 = torch.constant.int 1
    %int128_39176 = torch.constant.int 128
    %41769 = torch.prim.ListConstruct %int4_39174, %2482, %int1_39175, %int128_39176 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41770 = torch.aten.view %41654, %41769 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39177 = torch.constant.int 4
    %int1_39178 = torch.constant.int 1
    %int128_39179 = torch.constant.int 128
    %41771 = torch.prim.ListConstruct %int4_39177, %2482, %int1_39178, %int128_39179 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41772 = torch.aten.view %41660, %41771 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39180 = torch.constant.int 4
    %int1_39181 = torch.constant.int 1
    %int128_39182 = torch.constant.int 128
    %41773 = torch.prim.ListConstruct %int4_39180, %2482, %int1_39181, %int128_39182 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41774 = torch.aten.view %41666, %41773 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39183 = torch.constant.int 4
    %int1_39184 = torch.constant.int 1
    %int128_39185 = torch.constant.int 128
    %41775 = torch.prim.ListConstruct %int4_39183, %2482, %int1_39184, %int128_39185 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41776 = torch.aten.view %41672, %41775 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39186 = torch.constant.int 4
    %int1_39187 = torch.constant.int 1
    %int128_39188 = torch.constant.int 128
    %41777 = torch.prim.ListConstruct %int4_39186, %2482, %int1_39187, %int128_39188 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41778 = torch.aten.view %41678, %41777 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39189 = torch.constant.int 4
    %int1_39190 = torch.constant.int 1
    %int128_39191 = torch.constant.int 128
    %41779 = torch.prim.ListConstruct %int4_39189, %2482, %int1_39190, %int128_39191 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41780 = torch.aten.view %41684, %41779 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39192 = torch.constant.int 4
    %int1_39193 = torch.constant.int 1
    %int128_39194 = torch.constant.int 128
    %41781 = torch.prim.ListConstruct %int4_39192, %2482, %int1_39193, %int128_39194 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41782 = torch.aten.view %41706, %41781 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39195 = torch.constant.int 4
    %int1_39196 = torch.constant.int 1
    %int128_39197 = torch.constant.int 128
    %41783 = torch.prim.ListConstruct %int4_39195, %2482, %int1_39196, %int128_39197 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41784 = torch.aten.view %41712, %41783 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39198 = torch.constant.int 4
    %int1_39199 = torch.constant.int 1
    %int128_39200 = torch.constant.int 128
    %41785 = torch.prim.ListConstruct %int4_39198, %2482, %int1_39199, %int128_39200 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41786 = torch.aten.view %41718, %41785 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39201 = torch.constant.int 4
    %int1_39202 = torch.constant.int 1
    %int128_39203 = torch.constant.int 128
    %41787 = torch.prim.ListConstruct %int4_39201, %2482, %int1_39202, %int128_39203 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41788 = torch.aten.view %41724, %41787 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39204 = torch.constant.int 4
    %int1_39205 = torch.constant.int 1
    %int128_39206 = torch.constant.int 128
    %41789 = torch.prim.ListConstruct %int4_39204, %2482, %int1_39205, %int128_39206 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41790 = torch.aten.view %41730, %41789 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39207 = torch.constant.int 4
    %int1_39208 = torch.constant.int 1
    %int128_39209 = torch.constant.int 128
    %41791 = torch.prim.ListConstruct %int4_39207, %2482, %int1_39208, %int128_39209 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41792 = torch.aten.view %41736, %41791 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39210 = torch.constant.int 4
    %int1_39211 = torch.constant.int 1
    %int128_39212 = torch.constant.int 128
    %41793 = torch.prim.ListConstruct %int4_39210, %2482, %int1_39211, %int128_39212 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41794 = torch.aten.view %41742, %41793 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_39213 = torch.constant.int 4
    %int1_39214 = torch.constant.int 1
    %int128_39215 = torch.constant.int 128
    %41795 = torch.prim.ListConstruct %int4_39213, %2482, %int1_39214, %int128_39215 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %41796 = torch.aten.view %41748, %41795 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %41796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_39216 = torch.constant.int 131072
    %none_39217 = torch.constant.none
    %none_39218 = torch.constant.none
    %cpu_39219 = torch.constant.device "cpu"
    %false_39220 = torch.constant.bool false
    %41797 = torch.aten.arange %int131072_39216, %none_39217, %none_39218, %cpu_39219, %false_39220 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_39221 = torch.constant.int 0
    %int128_39222 = torch.constant.int 128
    %int2_39223 = torch.constant.int 2
    %none_39224 = torch.constant.none
    %none_39225 = torch.constant.none
    %cpu_39226 = torch.constant.device "cpu"
    %false_39227 = torch.constant.bool false
    %41798 = torch.aten.arange.start_step %int0_39221, %int128_39222, %int2_39223, %none_39224, %none_39225, %cpu_39226, %false_39227 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_39228 = torch.constant.int 0
    %int0_39229 = torch.constant.int 0
    %int64_39230 = torch.constant.int 64
    %int1_39231 = torch.constant.int 1
    %41799 = torch.aten.slice.Tensor %41798, %int0_39228, %int0_39229, %int64_39230, %int1_39231 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_39232 = torch.constant.int 6
    %41800 = torch.prims.convert_element_type %41799, %int6_39232 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_39233 = torch.constant.int 128
    %41801 = torch.aten.div.Scalar %41800, %int128_39233 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_39234 = torch.constant.float 5.000000e+05
    %41802 = torch.aten.pow.Scalar %float5.000000e05_39234, %41801 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %41803 = torch.aten.reciprocal %41802 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_39235 = torch.constant.float 1.000000e+00
    %41804 = torch.aten.mul.Scalar %41803, %float1.000000e00_39235 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_39236 = torch.constant.int 131072
    %int1_39237 = torch.constant.int 1
    %41805 = torch.prim.ListConstruct %int131072_39236, %int1_39237 : (!torch.int, !torch.int) -> !torch.list<int>
    %41806 = torch.aten.view %41797, %41805 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %41807 = torch.aten.mul.Tensor %41806, %41804 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %41808 = torch.aten.cos %41807 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %41809 = torch.aten.sin %41807 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %41810 = torch.aten.complex %41808, %41809 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %41811 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41812 = flow.tensor.transfer %41811 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %41813 = torch_c.from_builtin_tensor %41812 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41814 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41815 = flow.tensor.transfer %41814 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %41816 = torch_c.from_builtin_tensor %41815 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41817 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41818 = flow.tensor.transfer %41817 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %41819 = torch_c.from_builtin_tensor %41818 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41820 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41821 = flow.tensor.transfer %41820 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %41822 = torch_c.from_builtin_tensor %41821 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41823 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41824 = flow.tensor.transfer %41823 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %41825 = torch_c.from_builtin_tensor %41824 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41826 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41827 = flow.tensor.transfer %41826 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %41828 = torch_c.from_builtin_tensor %41827 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41829 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41830 = flow.tensor.transfer %41829 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %41831 = torch_c.from_builtin_tensor %41830 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41832 = torch_c.to_builtin_tensor %41810 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41833 = flow.tensor.transfer %41832 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %41834 = torch_c.from_builtin_tensor %41833 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_39238 = torch.constant.int 1
    %41835 = torch.aten.size.int %41578, %int1_39238 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39239 = torch.constant.int 0
    %41836 = torch.aten.add.int %int0_39239, %41835 : !torch.int, !torch.int -> !torch.int
    %int0_39240 = torch.constant.int 0
    %int0_39241 = torch.constant.int 0
    %int1_39242 = torch.constant.int 1
    %41837 = torch.aten.slice.Tensor %41813, %int0_39240, %int0_39241, %41836, %int1_39242 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41837, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39243 = torch.constant.int 1
    %int0_39244 = torch.constant.int 0
    %int9223372036854775807_39245 = torch.constant.int 9223372036854775807
    %int1_39246 = torch.constant.int 1
    %41838 = torch.aten.slice.Tensor %41837, %int1_39243, %int0_39244, %int9223372036854775807_39245, %int1_39246 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41838, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39247 = torch.constant.int 0
    %41839 = torch.aten.unsqueeze %41838, %int0_39247 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41839, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39248 = torch.constant.int 2
    %41840 = torch.aten.unsqueeze %41839, %int2_39248 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41840, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39249 = torch.constant.int 3
    %int0_39250 = torch.constant.int 0
    %int9223372036854775807_39251 = torch.constant.int 9223372036854775807
    %int1_39252 = torch.constant.int 1
    %41841 = torch.aten.slice.Tensor %41840, %int3_39249, %int0_39250, %int9223372036854775807_39251, %int1_39252 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41841, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41842 = torch_c.to_builtin_tensor %41750 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39253 = arith.constant 1 : index
    %dim_39254 = tensor.dim %41842, %c1_39253 : tensor<4x?x4x128xf16>
    %41843 = flow.tensor.bitcast %41842 : tensor<4x?x4x128xf16>{%dim_39254} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39254}
    %41844 = torch_c.from_builtin_tensor %41843 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41845 = torch.aten.mul.Tensor %41844, %41841 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41846 = torch_c.to_builtin_tensor %41845 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39255 = arith.constant 1 : index
    %dim_39256 = tensor.dim %41846, %c1_39255 : tensor<4x?x4x64xcomplex<f32>>
    %41847 = flow.tensor.bitcast %41846 : tensor<4x?x4x64xcomplex<f32>>{%dim_39256} -> tensor<4x?x4x128xf32>{%dim_39256}
    %41848 = torch_c.from_builtin_tensor %41847 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39257 = torch.constant.int 5
    %41849 = torch.prims.convert_element_type %41848, %int5_39257 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39258 = torch.constant.int 1
    %41850 = torch.aten.size.int %41584, %int1_39258 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39259 = torch.constant.int 0
    %41851 = torch.aten.add.int %int0_39259, %41850 : !torch.int, !torch.int -> !torch.int
    %int0_39260 = torch.constant.int 0
    %int0_39261 = torch.constant.int 0
    %int1_39262 = torch.constant.int 1
    %41852 = torch.aten.slice.Tensor %41816, %int0_39260, %int0_39261, %41851, %int1_39262 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41852, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39263 = torch.constant.int 1
    %int0_39264 = torch.constant.int 0
    %int9223372036854775807_39265 = torch.constant.int 9223372036854775807
    %int1_39266 = torch.constant.int 1
    %41853 = torch.aten.slice.Tensor %41852, %int1_39263, %int0_39264, %int9223372036854775807_39265, %int1_39266 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41853, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39267 = torch.constant.int 0
    %41854 = torch.aten.unsqueeze %41853, %int0_39267 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41854, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39268 = torch.constant.int 2
    %41855 = torch.aten.unsqueeze %41854, %int2_39268 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41855, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39269 = torch.constant.int 3
    %int0_39270 = torch.constant.int 0
    %int9223372036854775807_39271 = torch.constant.int 9223372036854775807
    %int1_39272 = torch.constant.int 1
    %41856 = torch.aten.slice.Tensor %41855, %int3_39269, %int0_39270, %int9223372036854775807_39271, %int1_39272 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41856, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41857 = torch_c.to_builtin_tensor %41752 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39273 = arith.constant 1 : index
    %dim_39274 = tensor.dim %41857, %c1_39273 : tensor<4x?x4x128xf16>
    %41858 = flow.tensor.bitcast %41857 : tensor<4x?x4x128xf16>{%dim_39274} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39274}
    %41859 = torch_c.from_builtin_tensor %41858 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41860 = torch.aten.mul.Tensor %41859, %41856 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41861 = torch_c.to_builtin_tensor %41860 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39275 = arith.constant 1 : index
    %dim_39276 = tensor.dim %41861, %c1_39275 : tensor<4x?x4x64xcomplex<f32>>
    %41862 = flow.tensor.bitcast %41861 : tensor<4x?x4x64xcomplex<f32>>{%dim_39276} -> tensor<4x?x4x128xf32>{%dim_39276}
    %41863 = torch_c.from_builtin_tensor %41862 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39277 = torch.constant.int 5
    %41864 = torch.prims.convert_element_type %41863, %int5_39277 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39278 = torch.constant.int 1
    %41865 = torch.aten.size.int %41590, %int1_39278 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39279 = torch.constant.int 0
    %41866 = torch.aten.add.int %int0_39279, %41865 : !torch.int, !torch.int -> !torch.int
    %int0_39280 = torch.constant.int 0
    %int0_39281 = torch.constant.int 0
    %int1_39282 = torch.constant.int 1
    %41867 = torch.aten.slice.Tensor %41819, %int0_39280, %int0_39281, %41866, %int1_39282 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41867, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39283 = torch.constant.int 1
    %int0_39284 = torch.constant.int 0
    %int9223372036854775807_39285 = torch.constant.int 9223372036854775807
    %int1_39286 = torch.constant.int 1
    %41868 = torch.aten.slice.Tensor %41867, %int1_39283, %int0_39284, %int9223372036854775807_39285, %int1_39286 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41868, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39287 = torch.constant.int 0
    %41869 = torch.aten.unsqueeze %41868, %int0_39287 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41869, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39288 = torch.constant.int 2
    %41870 = torch.aten.unsqueeze %41869, %int2_39288 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41870, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39289 = torch.constant.int 3
    %int0_39290 = torch.constant.int 0
    %int9223372036854775807_39291 = torch.constant.int 9223372036854775807
    %int1_39292 = torch.constant.int 1
    %41871 = torch.aten.slice.Tensor %41870, %int3_39289, %int0_39290, %int9223372036854775807_39291, %int1_39292 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41871, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41872 = torch_c.to_builtin_tensor %41754 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39293 = arith.constant 1 : index
    %dim_39294 = tensor.dim %41872, %c1_39293 : tensor<4x?x4x128xf16>
    %41873 = flow.tensor.bitcast %41872 : tensor<4x?x4x128xf16>{%dim_39294} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39294}
    %41874 = torch_c.from_builtin_tensor %41873 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41875 = torch.aten.mul.Tensor %41874, %41871 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41876 = torch_c.to_builtin_tensor %41875 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39295 = arith.constant 1 : index
    %dim_39296 = tensor.dim %41876, %c1_39295 : tensor<4x?x4x64xcomplex<f32>>
    %41877 = flow.tensor.bitcast %41876 : tensor<4x?x4x64xcomplex<f32>>{%dim_39296} -> tensor<4x?x4x128xf32>{%dim_39296}
    %41878 = torch_c.from_builtin_tensor %41877 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39297 = torch.constant.int 5
    %41879 = torch.prims.convert_element_type %41878, %int5_39297 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39298 = torch.constant.int 1
    %41880 = torch.aten.size.int %41596, %int1_39298 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39299 = torch.constant.int 0
    %41881 = torch.aten.add.int %int0_39299, %41880 : !torch.int, !torch.int -> !torch.int
    %int0_39300 = torch.constant.int 0
    %int0_39301 = torch.constant.int 0
    %int1_39302 = torch.constant.int 1
    %41882 = torch.aten.slice.Tensor %41822, %int0_39300, %int0_39301, %41881, %int1_39302 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41882, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39303 = torch.constant.int 1
    %int0_39304 = torch.constant.int 0
    %int9223372036854775807_39305 = torch.constant.int 9223372036854775807
    %int1_39306 = torch.constant.int 1
    %41883 = torch.aten.slice.Tensor %41882, %int1_39303, %int0_39304, %int9223372036854775807_39305, %int1_39306 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41883, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39307 = torch.constant.int 0
    %41884 = torch.aten.unsqueeze %41883, %int0_39307 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41884, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39308 = torch.constant.int 2
    %41885 = torch.aten.unsqueeze %41884, %int2_39308 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41885, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39309 = torch.constant.int 3
    %int0_39310 = torch.constant.int 0
    %int9223372036854775807_39311 = torch.constant.int 9223372036854775807
    %int1_39312 = torch.constant.int 1
    %41886 = torch.aten.slice.Tensor %41885, %int3_39309, %int0_39310, %int9223372036854775807_39311, %int1_39312 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41886, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41887 = torch_c.to_builtin_tensor %41756 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39313 = arith.constant 1 : index
    %dim_39314 = tensor.dim %41887, %c1_39313 : tensor<4x?x4x128xf16>
    %41888 = flow.tensor.bitcast %41887 : tensor<4x?x4x128xf16>{%dim_39314} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39314}
    %41889 = torch_c.from_builtin_tensor %41888 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41890 = torch.aten.mul.Tensor %41889, %41886 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41891 = torch_c.to_builtin_tensor %41890 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39315 = arith.constant 1 : index
    %dim_39316 = tensor.dim %41891, %c1_39315 : tensor<4x?x4x64xcomplex<f32>>
    %41892 = flow.tensor.bitcast %41891 : tensor<4x?x4x64xcomplex<f32>>{%dim_39316} -> tensor<4x?x4x128xf32>{%dim_39316}
    %41893 = torch_c.from_builtin_tensor %41892 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39317 = torch.constant.int 5
    %41894 = torch.prims.convert_element_type %41893, %int5_39317 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39318 = torch.constant.int 1
    %41895 = torch.aten.size.int %41602, %int1_39318 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39319 = torch.constant.int 0
    %41896 = torch.aten.add.int %int0_39319, %41895 : !torch.int, !torch.int -> !torch.int
    %int0_39320 = torch.constant.int 0
    %int0_39321 = torch.constant.int 0
    %int1_39322 = torch.constant.int 1
    %41897 = torch.aten.slice.Tensor %41825, %int0_39320, %int0_39321, %41896, %int1_39322 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41897, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39323 = torch.constant.int 1
    %int0_39324 = torch.constant.int 0
    %int9223372036854775807_39325 = torch.constant.int 9223372036854775807
    %int1_39326 = torch.constant.int 1
    %41898 = torch.aten.slice.Tensor %41897, %int1_39323, %int0_39324, %int9223372036854775807_39325, %int1_39326 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41898, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39327 = torch.constant.int 0
    %41899 = torch.aten.unsqueeze %41898, %int0_39327 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41899, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39328 = torch.constant.int 2
    %41900 = torch.aten.unsqueeze %41899, %int2_39328 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41900, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39329 = torch.constant.int 3
    %int0_39330 = torch.constant.int 0
    %int9223372036854775807_39331 = torch.constant.int 9223372036854775807
    %int1_39332 = torch.constant.int 1
    %41901 = torch.aten.slice.Tensor %41900, %int3_39329, %int0_39330, %int9223372036854775807_39331, %int1_39332 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41901, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41902 = torch_c.to_builtin_tensor %41758 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39333 = arith.constant 1 : index
    %dim_39334 = tensor.dim %41902, %c1_39333 : tensor<4x?x4x128xf16>
    %41903 = flow.tensor.bitcast %41902 : tensor<4x?x4x128xf16>{%dim_39334} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39334}
    %41904 = torch_c.from_builtin_tensor %41903 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41905 = torch.aten.mul.Tensor %41904, %41901 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41906 = torch_c.to_builtin_tensor %41905 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39335 = arith.constant 1 : index
    %dim_39336 = tensor.dim %41906, %c1_39335 : tensor<4x?x4x64xcomplex<f32>>
    %41907 = flow.tensor.bitcast %41906 : tensor<4x?x4x64xcomplex<f32>>{%dim_39336} -> tensor<4x?x4x128xf32>{%dim_39336}
    %41908 = torch_c.from_builtin_tensor %41907 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39337 = torch.constant.int 5
    %41909 = torch.prims.convert_element_type %41908, %int5_39337 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39338 = torch.constant.int 1
    %41910 = torch.aten.size.int %41608, %int1_39338 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39339 = torch.constant.int 0
    %41911 = torch.aten.add.int %int0_39339, %41910 : !torch.int, !torch.int -> !torch.int
    %int0_39340 = torch.constant.int 0
    %int0_39341 = torch.constant.int 0
    %int1_39342 = torch.constant.int 1
    %41912 = torch.aten.slice.Tensor %41828, %int0_39340, %int0_39341, %41911, %int1_39342 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41912, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39343 = torch.constant.int 1
    %int0_39344 = torch.constant.int 0
    %int9223372036854775807_39345 = torch.constant.int 9223372036854775807
    %int1_39346 = torch.constant.int 1
    %41913 = torch.aten.slice.Tensor %41912, %int1_39343, %int0_39344, %int9223372036854775807_39345, %int1_39346 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41913, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39347 = torch.constant.int 0
    %41914 = torch.aten.unsqueeze %41913, %int0_39347 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41914, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39348 = torch.constant.int 2
    %41915 = torch.aten.unsqueeze %41914, %int2_39348 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41915, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39349 = torch.constant.int 3
    %int0_39350 = torch.constant.int 0
    %int9223372036854775807_39351 = torch.constant.int 9223372036854775807
    %int1_39352 = torch.constant.int 1
    %41916 = torch.aten.slice.Tensor %41915, %int3_39349, %int0_39350, %int9223372036854775807_39351, %int1_39352 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41916, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41917 = torch_c.to_builtin_tensor %41760 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39353 = arith.constant 1 : index
    %dim_39354 = tensor.dim %41917, %c1_39353 : tensor<4x?x4x128xf16>
    %41918 = flow.tensor.bitcast %41917 : tensor<4x?x4x128xf16>{%dim_39354} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39354}
    %41919 = torch_c.from_builtin_tensor %41918 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41920 = torch.aten.mul.Tensor %41919, %41916 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41921 = torch_c.to_builtin_tensor %41920 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39355 = arith.constant 1 : index
    %dim_39356 = tensor.dim %41921, %c1_39355 : tensor<4x?x4x64xcomplex<f32>>
    %41922 = flow.tensor.bitcast %41921 : tensor<4x?x4x64xcomplex<f32>>{%dim_39356} -> tensor<4x?x4x128xf32>{%dim_39356}
    %41923 = torch_c.from_builtin_tensor %41922 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39357 = torch.constant.int 5
    %41924 = torch.prims.convert_element_type %41923, %int5_39357 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39358 = torch.constant.int 1
    %41925 = torch.aten.size.int %41614, %int1_39358 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39359 = torch.constant.int 0
    %41926 = torch.aten.add.int %int0_39359, %41925 : !torch.int, !torch.int -> !torch.int
    %int0_39360 = torch.constant.int 0
    %int0_39361 = torch.constant.int 0
    %int1_39362 = torch.constant.int 1
    %41927 = torch.aten.slice.Tensor %41831, %int0_39360, %int0_39361, %41926, %int1_39362 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41927, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39363 = torch.constant.int 1
    %int0_39364 = torch.constant.int 0
    %int9223372036854775807_39365 = torch.constant.int 9223372036854775807
    %int1_39366 = torch.constant.int 1
    %41928 = torch.aten.slice.Tensor %41927, %int1_39363, %int0_39364, %int9223372036854775807_39365, %int1_39366 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41928, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39367 = torch.constant.int 0
    %41929 = torch.aten.unsqueeze %41928, %int0_39367 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41929, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39368 = torch.constant.int 2
    %41930 = torch.aten.unsqueeze %41929, %int2_39368 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41930, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39369 = torch.constant.int 3
    %int0_39370 = torch.constant.int 0
    %int9223372036854775807_39371 = torch.constant.int 9223372036854775807
    %int1_39372 = torch.constant.int 1
    %41931 = torch.aten.slice.Tensor %41930, %int3_39369, %int0_39370, %int9223372036854775807_39371, %int1_39372 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41931, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41932 = torch_c.to_builtin_tensor %41762 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39373 = arith.constant 1 : index
    %dim_39374 = tensor.dim %41932, %c1_39373 : tensor<4x?x4x128xf16>
    %41933 = flow.tensor.bitcast %41932 : tensor<4x?x4x128xf16>{%dim_39374} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39374}
    %41934 = torch_c.from_builtin_tensor %41933 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41935 = torch.aten.mul.Tensor %41934, %41931 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41936 = torch_c.to_builtin_tensor %41935 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39375 = arith.constant 1 : index
    %dim_39376 = tensor.dim %41936, %c1_39375 : tensor<4x?x4x64xcomplex<f32>>
    %41937 = flow.tensor.bitcast %41936 : tensor<4x?x4x64xcomplex<f32>>{%dim_39376} -> tensor<4x?x4x128xf32>{%dim_39376}
    %41938 = torch_c.from_builtin_tensor %41937 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39377 = torch.constant.int 5
    %41939 = torch.prims.convert_element_type %41938, %int5_39377 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_39378 = torch.constant.int 1
    %41940 = torch.aten.size.int %41620, %int1_39378 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_39379 = torch.constant.int 0
    %41941 = torch.aten.add.int %int0_39379, %41940 : !torch.int, !torch.int -> !torch.int
    %int0_39380 = torch.constant.int 0
    %int0_39381 = torch.constant.int 0
    %int1_39382 = torch.constant.int 1
    %41942 = torch.aten.slice.Tensor %41834, %int0_39380, %int0_39381, %41941, %int1_39382 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41942, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39383 = torch.constant.int 1
    %int0_39384 = torch.constant.int 0
    %int9223372036854775807_39385 = torch.constant.int 9223372036854775807
    %int1_39386 = torch.constant.int 1
    %41943 = torch.aten.slice.Tensor %41942, %int1_39383, %int0_39384, %int9223372036854775807_39385, %int1_39386 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41943, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39387 = torch.constant.int 0
    %41944 = torch.aten.unsqueeze %41943, %int0_39387 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41944, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39388 = torch.constant.int 2
    %41945 = torch.aten.unsqueeze %41944, %int2_39388 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41945, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39389 = torch.constant.int 3
    %int0_39390 = torch.constant.int 0
    %int9223372036854775807_39391 = torch.constant.int 9223372036854775807
    %int1_39392 = torch.constant.int 1
    %41946 = torch.aten.slice.Tensor %41945, %int3_39389, %int0_39390, %int9223372036854775807_39391, %int1_39392 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41946, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %41947 = torch_c.to_builtin_tensor %41764 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_39393 = arith.constant 1 : index
    %dim_39394 = tensor.dim %41947, %c1_39393 : tensor<4x?x4x128xf16>
    %41948 = flow.tensor.bitcast %41947 : tensor<4x?x4x128xf16>{%dim_39394} -> tensor<4x?x4x64xcomplex<f16>>{%dim_39394}
    %41949 = torch_c.from_builtin_tensor %41948 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %41949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %41950 = torch.aten.mul.Tensor %41949, %41946 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %41950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %41951 = torch_c.to_builtin_tensor %41950 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_39395 = arith.constant 1 : index
    %dim_39396 = tensor.dim %41951, %c1_39395 : tensor<4x?x4x64xcomplex<f32>>
    %41952 = flow.tensor.bitcast %41951 : tensor<4x?x4x64xcomplex<f32>>{%dim_39396} -> tensor<4x?x4x128xf32>{%dim_39396}
    %41953 = torch_c.from_builtin_tensor %41952 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %41953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_39397 = torch.constant.int 5
    %41954 = torch.prims.convert_element_type %41953, %int5_39397 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %41954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_39398 = torch.constant.int 131072
    %none_39399 = torch.constant.none
    %none_39400 = torch.constant.none
    %cpu_39401 = torch.constant.device "cpu"
    %false_39402 = torch.constant.bool false
    %41955 = torch.aten.arange %int131072_39398, %none_39399, %none_39400, %cpu_39401, %false_39402 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_39403 = torch.constant.int 0
    %int128_39404 = torch.constant.int 128
    %int2_39405 = torch.constant.int 2
    %none_39406 = torch.constant.none
    %none_39407 = torch.constant.none
    %cpu_39408 = torch.constant.device "cpu"
    %false_39409 = torch.constant.bool false
    %41956 = torch.aten.arange.start_step %int0_39403, %int128_39404, %int2_39405, %none_39406, %none_39407, %cpu_39408, %false_39409 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_39410 = torch.constant.int 0
    %int0_39411 = torch.constant.int 0
    %int64_39412 = torch.constant.int 64
    %int1_39413 = torch.constant.int 1
    %41957 = torch.aten.slice.Tensor %41956, %int0_39410, %int0_39411, %int64_39412, %int1_39413 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_39414 = torch.constant.int 6
    %41958 = torch.prims.convert_element_type %41957, %int6_39414 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_39415 = torch.constant.int 128
    %41959 = torch.aten.div.Scalar %41958, %int128_39415 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_39416 = torch.constant.float 5.000000e+05
    %41960 = torch.aten.pow.Scalar %float5.000000e05_39416, %41959 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %41961 = torch.aten.reciprocal %41960 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_39417 = torch.constant.float 1.000000e+00
    %41962 = torch.aten.mul.Scalar %41961, %float1.000000e00_39417 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_39418 = torch.constant.int 131072
    %int1_39419 = torch.constant.int 1
    %41963 = torch.prim.ListConstruct %int131072_39418, %int1_39419 : (!torch.int, !torch.int) -> !torch.list<int>
    %41964 = torch.aten.view %41955, %41963 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %41965 = torch.aten.mul.Tensor %41964, %41962 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %41966 = torch.aten.cos %41965 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %41967 = torch.aten.sin %41965 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %41968 = torch.aten.complex %41966, %41967 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %41969 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41970 = flow.tensor.transfer %41969 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %41971 = torch_c.from_builtin_tensor %41970 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41972 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41973 = flow.tensor.transfer %41972 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %41974 = torch_c.from_builtin_tensor %41973 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41975 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41976 = flow.tensor.transfer %41975 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %41977 = torch_c.from_builtin_tensor %41976 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41978 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41979 = flow.tensor.transfer %41978 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %41980 = torch_c.from_builtin_tensor %41979 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41981 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41982 = flow.tensor.transfer %41981 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %41983 = torch_c.from_builtin_tensor %41982 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41984 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41985 = flow.tensor.transfer %41984 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %41986 = torch_c.from_builtin_tensor %41985 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41987 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41988 = flow.tensor.transfer %41987 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %41989 = torch_c.from_builtin_tensor %41988 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %41990 = torch_c.to_builtin_tensor %41968 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %41991 = flow.tensor.transfer %41990 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %41992 = torch_c.from_builtin_tensor %41991 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_39420 = torch.constant.int 1
    %41993 = torch.aten.size.int %41642, %int1_39420 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39421 = torch.constant.int 0
    %41994 = torch.aten.add.int %int0_39421, %41993 : !torch.int, !torch.int -> !torch.int
    %int0_39422 = torch.constant.int 0
    %int0_39423 = torch.constant.int 0
    %int1_39424 = torch.constant.int 1
    %41995 = torch.aten.slice.Tensor %41971, %int0_39422, %int0_39423, %41994, %int1_39424 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41995, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39425 = torch.constant.int 1
    %int0_39426 = torch.constant.int 0
    %int9223372036854775807_39427 = torch.constant.int 9223372036854775807
    %int1_39428 = torch.constant.int 1
    %41996 = torch.aten.slice.Tensor %41995, %int1_39425, %int0_39426, %int9223372036854775807_39427, %int1_39428 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %41996, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39429 = torch.constant.int 0
    %41997 = torch.aten.unsqueeze %41996, %int0_39429 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %41997, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39430 = torch.constant.int 2
    %41998 = torch.aten.unsqueeze %41997, %int2_39430 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41998, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39431 = torch.constant.int 3
    %int0_39432 = torch.constant.int 0
    %int9223372036854775807_39433 = torch.constant.int 9223372036854775807
    %int1_39434 = torch.constant.int 1
    %41999 = torch.aten.slice.Tensor %41998, %int3_39431, %int0_39432, %int9223372036854775807_39433, %int1_39434 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %41999, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42000 = torch_c.to_builtin_tensor %41766 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39435 = arith.constant 1 : index
    %dim_39436 = tensor.dim %42000, %c1_39435 : tensor<4x?x1x128xf16>
    %42001 = flow.tensor.bitcast %42000 : tensor<4x?x1x128xf16>{%dim_39436} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39436}
    %42002 = torch_c.from_builtin_tensor %42001 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42003 = torch.aten.mul.Tensor %42002, %41999 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42004 = torch_c.to_builtin_tensor %42003 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39437 = arith.constant 1 : index
    %dim_39438 = tensor.dim %42004, %c1_39437 : tensor<4x?x1x64xcomplex<f32>>
    %42005 = flow.tensor.bitcast %42004 : tensor<4x?x1x64xcomplex<f32>>{%dim_39438} -> tensor<4x?x1x128xf32>{%dim_39438}
    %42006 = torch_c.from_builtin_tensor %42005 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39439 = torch.constant.int 5
    %42007 = torch.prims.convert_element_type %42006, %int5_39439 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39440 = torch.constant.int 1
    %42008 = torch.aten.size.int %41648, %int1_39440 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39441 = torch.constant.int 0
    %42009 = torch.aten.add.int %int0_39441, %42008 : !torch.int, !torch.int -> !torch.int
    %int0_39442 = torch.constant.int 0
    %int0_39443 = torch.constant.int 0
    %int1_39444 = torch.constant.int 1
    %42010 = torch.aten.slice.Tensor %41974, %int0_39442, %int0_39443, %42009, %int1_39444 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42010, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39445 = torch.constant.int 1
    %int0_39446 = torch.constant.int 0
    %int9223372036854775807_39447 = torch.constant.int 9223372036854775807
    %int1_39448 = torch.constant.int 1
    %42011 = torch.aten.slice.Tensor %42010, %int1_39445, %int0_39446, %int9223372036854775807_39447, %int1_39448 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42011, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39449 = torch.constant.int 0
    %42012 = torch.aten.unsqueeze %42011, %int0_39449 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42012, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39450 = torch.constant.int 2
    %42013 = torch.aten.unsqueeze %42012, %int2_39450 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42013, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39451 = torch.constant.int 3
    %int0_39452 = torch.constant.int 0
    %int9223372036854775807_39453 = torch.constant.int 9223372036854775807
    %int1_39454 = torch.constant.int 1
    %42014 = torch.aten.slice.Tensor %42013, %int3_39451, %int0_39452, %int9223372036854775807_39453, %int1_39454 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42014, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42015 = torch_c.to_builtin_tensor %41768 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39455 = arith.constant 1 : index
    %dim_39456 = tensor.dim %42015, %c1_39455 : tensor<4x?x1x128xf16>
    %42016 = flow.tensor.bitcast %42015 : tensor<4x?x1x128xf16>{%dim_39456} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39456}
    %42017 = torch_c.from_builtin_tensor %42016 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42018 = torch.aten.mul.Tensor %42017, %42014 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42019 = torch_c.to_builtin_tensor %42018 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39457 = arith.constant 1 : index
    %dim_39458 = tensor.dim %42019, %c1_39457 : tensor<4x?x1x64xcomplex<f32>>
    %42020 = flow.tensor.bitcast %42019 : tensor<4x?x1x64xcomplex<f32>>{%dim_39458} -> tensor<4x?x1x128xf32>{%dim_39458}
    %42021 = torch_c.from_builtin_tensor %42020 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39459 = torch.constant.int 5
    %42022 = torch.prims.convert_element_type %42021, %int5_39459 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39460 = torch.constant.int 1
    %42023 = torch.aten.size.int %41654, %int1_39460 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39461 = torch.constant.int 0
    %42024 = torch.aten.add.int %int0_39461, %42023 : !torch.int, !torch.int -> !torch.int
    %int0_39462 = torch.constant.int 0
    %int0_39463 = torch.constant.int 0
    %int1_39464 = torch.constant.int 1
    %42025 = torch.aten.slice.Tensor %41977, %int0_39462, %int0_39463, %42024, %int1_39464 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42025, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39465 = torch.constant.int 1
    %int0_39466 = torch.constant.int 0
    %int9223372036854775807_39467 = torch.constant.int 9223372036854775807
    %int1_39468 = torch.constant.int 1
    %42026 = torch.aten.slice.Tensor %42025, %int1_39465, %int0_39466, %int9223372036854775807_39467, %int1_39468 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42026, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39469 = torch.constant.int 0
    %42027 = torch.aten.unsqueeze %42026, %int0_39469 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42027, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39470 = torch.constant.int 2
    %42028 = torch.aten.unsqueeze %42027, %int2_39470 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42028, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39471 = torch.constant.int 3
    %int0_39472 = torch.constant.int 0
    %int9223372036854775807_39473 = torch.constant.int 9223372036854775807
    %int1_39474 = torch.constant.int 1
    %42029 = torch.aten.slice.Tensor %42028, %int3_39471, %int0_39472, %int9223372036854775807_39473, %int1_39474 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42029, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42030 = torch_c.to_builtin_tensor %41770 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39475 = arith.constant 1 : index
    %dim_39476 = tensor.dim %42030, %c1_39475 : tensor<4x?x1x128xf16>
    %42031 = flow.tensor.bitcast %42030 : tensor<4x?x1x128xf16>{%dim_39476} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39476}
    %42032 = torch_c.from_builtin_tensor %42031 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42033 = torch.aten.mul.Tensor %42032, %42029 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42034 = torch_c.to_builtin_tensor %42033 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39477 = arith.constant 1 : index
    %dim_39478 = tensor.dim %42034, %c1_39477 : tensor<4x?x1x64xcomplex<f32>>
    %42035 = flow.tensor.bitcast %42034 : tensor<4x?x1x64xcomplex<f32>>{%dim_39478} -> tensor<4x?x1x128xf32>{%dim_39478}
    %42036 = torch_c.from_builtin_tensor %42035 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39479 = torch.constant.int 5
    %42037 = torch.prims.convert_element_type %42036, %int5_39479 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39480 = torch.constant.int 1
    %42038 = torch.aten.size.int %41660, %int1_39480 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39481 = torch.constant.int 0
    %42039 = torch.aten.add.int %int0_39481, %42038 : !torch.int, !torch.int -> !torch.int
    %int0_39482 = torch.constant.int 0
    %int0_39483 = torch.constant.int 0
    %int1_39484 = torch.constant.int 1
    %42040 = torch.aten.slice.Tensor %41980, %int0_39482, %int0_39483, %42039, %int1_39484 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42040, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39485 = torch.constant.int 1
    %int0_39486 = torch.constant.int 0
    %int9223372036854775807_39487 = torch.constant.int 9223372036854775807
    %int1_39488 = torch.constant.int 1
    %42041 = torch.aten.slice.Tensor %42040, %int1_39485, %int0_39486, %int9223372036854775807_39487, %int1_39488 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42041, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39489 = torch.constant.int 0
    %42042 = torch.aten.unsqueeze %42041, %int0_39489 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42042, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39490 = torch.constant.int 2
    %42043 = torch.aten.unsqueeze %42042, %int2_39490 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42043, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39491 = torch.constant.int 3
    %int0_39492 = torch.constant.int 0
    %int9223372036854775807_39493 = torch.constant.int 9223372036854775807
    %int1_39494 = torch.constant.int 1
    %42044 = torch.aten.slice.Tensor %42043, %int3_39491, %int0_39492, %int9223372036854775807_39493, %int1_39494 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42044, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42045 = torch_c.to_builtin_tensor %41772 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39495 = arith.constant 1 : index
    %dim_39496 = tensor.dim %42045, %c1_39495 : tensor<4x?x1x128xf16>
    %42046 = flow.tensor.bitcast %42045 : tensor<4x?x1x128xf16>{%dim_39496} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39496}
    %42047 = torch_c.from_builtin_tensor %42046 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42048 = torch.aten.mul.Tensor %42047, %42044 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42049 = torch_c.to_builtin_tensor %42048 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39497 = arith.constant 1 : index
    %dim_39498 = tensor.dim %42049, %c1_39497 : tensor<4x?x1x64xcomplex<f32>>
    %42050 = flow.tensor.bitcast %42049 : tensor<4x?x1x64xcomplex<f32>>{%dim_39498} -> tensor<4x?x1x128xf32>{%dim_39498}
    %42051 = torch_c.from_builtin_tensor %42050 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39499 = torch.constant.int 5
    %42052 = torch.prims.convert_element_type %42051, %int5_39499 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39500 = torch.constant.int 1
    %42053 = torch.aten.size.int %41666, %int1_39500 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39501 = torch.constant.int 0
    %42054 = torch.aten.add.int %int0_39501, %42053 : !torch.int, !torch.int -> !torch.int
    %int0_39502 = torch.constant.int 0
    %int0_39503 = torch.constant.int 0
    %int1_39504 = torch.constant.int 1
    %42055 = torch.aten.slice.Tensor %41983, %int0_39502, %int0_39503, %42054, %int1_39504 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42055, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39505 = torch.constant.int 1
    %int0_39506 = torch.constant.int 0
    %int9223372036854775807_39507 = torch.constant.int 9223372036854775807
    %int1_39508 = torch.constant.int 1
    %42056 = torch.aten.slice.Tensor %42055, %int1_39505, %int0_39506, %int9223372036854775807_39507, %int1_39508 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42056, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39509 = torch.constant.int 0
    %42057 = torch.aten.unsqueeze %42056, %int0_39509 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42057, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39510 = torch.constant.int 2
    %42058 = torch.aten.unsqueeze %42057, %int2_39510 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42058, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39511 = torch.constant.int 3
    %int0_39512 = torch.constant.int 0
    %int9223372036854775807_39513 = torch.constant.int 9223372036854775807
    %int1_39514 = torch.constant.int 1
    %42059 = torch.aten.slice.Tensor %42058, %int3_39511, %int0_39512, %int9223372036854775807_39513, %int1_39514 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42059, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42060 = torch_c.to_builtin_tensor %41774 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39515 = arith.constant 1 : index
    %dim_39516 = tensor.dim %42060, %c1_39515 : tensor<4x?x1x128xf16>
    %42061 = flow.tensor.bitcast %42060 : tensor<4x?x1x128xf16>{%dim_39516} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39516}
    %42062 = torch_c.from_builtin_tensor %42061 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42063 = torch.aten.mul.Tensor %42062, %42059 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42064 = torch_c.to_builtin_tensor %42063 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39517 = arith.constant 1 : index
    %dim_39518 = tensor.dim %42064, %c1_39517 : tensor<4x?x1x64xcomplex<f32>>
    %42065 = flow.tensor.bitcast %42064 : tensor<4x?x1x64xcomplex<f32>>{%dim_39518} -> tensor<4x?x1x128xf32>{%dim_39518}
    %42066 = torch_c.from_builtin_tensor %42065 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39519 = torch.constant.int 5
    %42067 = torch.prims.convert_element_type %42066, %int5_39519 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39520 = torch.constant.int 1
    %42068 = torch.aten.size.int %41672, %int1_39520 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39521 = torch.constant.int 0
    %42069 = torch.aten.add.int %int0_39521, %42068 : !torch.int, !torch.int -> !torch.int
    %int0_39522 = torch.constant.int 0
    %int0_39523 = torch.constant.int 0
    %int1_39524 = torch.constant.int 1
    %42070 = torch.aten.slice.Tensor %41986, %int0_39522, %int0_39523, %42069, %int1_39524 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42070, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39525 = torch.constant.int 1
    %int0_39526 = torch.constant.int 0
    %int9223372036854775807_39527 = torch.constant.int 9223372036854775807
    %int1_39528 = torch.constant.int 1
    %42071 = torch.aten.slice.Tensor %42070, %int1_39525, %int0_39526, %int9223372036854775807_39527, %int1_39528 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42071, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39529 = torch.constant.int 0
    %42072 = torch.aten.unsqueeze %42071, %int0_39529 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42072, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39530 = torch.constant.int 2
    %42073 = torch.aten.unsqueeze %42072, %int2_39530 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42073, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39531 = torch.constant.int 3
    %int0_39532 = torch.constant.int 0
    %int9223372036854775807_39533 = torch.constant.int 9223372036854775807
    %int1_39534 = torch.constant.int 1
    %42074 = torch.aten.slice.Tensor %42073, %int3_39531, %int0_39532, %int9223372036854775807_39533, %int1_39534 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42074, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42075 = torch_c.to_builtin_tensor %41776 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39535 = arith.constant 1 : index
    %dim_39536 = tensor.dim %42075, %c1_39535 : tensor<4x?x1x128xf16>
    %42076 = flow.tensor.bitcast %42075 : tensor<4x?x1x128xf16>{%dim_39536} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39536}
    %42077 = torch_c.from_builtin_tensor %42076 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42078 = torch.aten.mul.Tensor %42077, %42074 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42079 = torch_c.to_builtin_tensor %42078 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39537 = arith.constant 1 : index
    %dim_39538 = tensor.dim %42079, %c1_39537 : tensor<4x?x1x64xcomplex<f32>>
    %42080 = flow.tensor.bitcast %42079 : tensor<4x?x1x64xcomplex<f32>>{%dim_39538} -> tensor<4x?x1x128xf32>{%dim_39538}
    %42081 = torch_c.from_builtin_tensor %42080 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39539 = torch.constant.int 5
    %42082 = torch.prims.convert_element_type %42081, %int5_39539 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39540 = torch.constant.int 1
    %42083 = torch.aten.size.int %41678, %int1_39540 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39541 = torch.constant.int 0
    %42084 = torch.aten.add.int %int0_39541, %42083 : !torch.int, !torch.int -> !torch.int
    %int0_39542 = torch.constant.int 0
    %int0_39543 = torch.constant.int 0
    %int1_39544 = torch.constant.int 1
    %42085 = torch.aten.slice.Tensor %41989, %int0_39542, %int0_39543, %42084, %int1_39544 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42085, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39545 = torch.constant.int 1
    %int0_39546 = torch.constant.int 0
    %int9223372036854775807_39547 = torch.constant.int 9223372036854775807
    %int1_39548 = torch.constant.int 1
    %42086 = torch.aten.slice.Tensor %42085, %int1_39545, %int0_39546, %int9223372036854775807_39547, %int1_39548 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42086, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39549 = torch.constant.int 0
    %42087 = torch.aten.unsqueeze %42086, %int0_39549 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42087, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39550 = torch.constant.int 2
    %42088 = torch.aten.unsqueeze %42087, %int2_39550 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42088, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39551 = torch.constant.int 3
    %int0_39552 = torch.constant.int 0
    %int9223372036854775807_39553 = torch.constant.int 9223372036854775807
    %int1_39554 = torch.constant.int 1
    %42089 = torch.aten.slice.Tensor %42088, %int3_39551, %int0_39552, %int9223372036854775807_39553, %int1_39554 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42089, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42090 = torch_c.to_builtin_tensor %41778 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39555 = arith.constant 1 : index
    %dim_39556 = tensor.dim %42090, %c1_39555 : tensor<4x?x1x128xf16>
    %42091 = flow.tensor.bitcast %42090 : tensor<4x?x1x128xf16>{%dim_39556} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39556}
    %42092 = torch_c.from_builtin_tensor %42091 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42093 = torch.aten.mul.Tensor %42092, %42089 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42094 = torch_c.to_builtin_tensor %42093 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39557 = arith.constant 1 : index
    %dim_39558 = tensor.dim %42094, %c1_39557 : tensor<4x?x1x64xcomplex<f32>>
    %42095 = flow.tensor.bitcast %42094 : tensor<4x?x1x64xcomplex<f32>>{%dim_39558} -> tensor<4x?x1x128xf32>{%dim_39558}
    %42096 = torch_c.from_builtin_tensor %42095 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39559 = torch.constant.int 5
    %42097 = torch.prims.convert_element_type %42096, %int5_39559 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_39560 = torch.constant.int 1
    %42098 = torch.aten.size.int %41684, %int1_39560 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_39561 = torch.constant.int 0
    %42099 = torch.aten.add.int %int0_39561, %42098 : !torch.int, !torch.int -> !torch.int
    %int0_39562 = torch.constant.int 0
    %int0_39563 = torch.constant.int 0
    %int1_39564 = torch.constant.int 1
    %42100 = torch.aten.slice.Tensor %41992, %int0_39562, %int0_39563, %42099, %int1_39564 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42100, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_39565 = torch.constant.int 1
    %int0_39566 = torch.constant.int 0
    %int9223372036854775807_39567 = torch.constant.int 9223372036854775807
    %int1_39568 = torch.constant.int 1
    %42101 = torch.aten.slice.Tensor %42100, %int1_39565, %int0_39566, %int9223372036854775807_39567, %int1_39568 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %42101, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_39569 = torch.constant.int 0
    %42102 = torch.aten.unsqueeze %42101, %int0_39569 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %42102, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_39570 = torch.constant.int 2
    %42103 = torch.aten.unsqueeze %42102, %int2_39570 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42103, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_39571 = torch.constant.int 3
    %int0_39572 = torch.constant.int 0
    %int9223372036854775807_39573 = torch.constant.int 9223372036854775807
    %int1_39574 = torch.constant.int 1
    %42104 = torch.aten.slice.Tensor %42103, %int3_39571, %int0_39572, %int9223372036854775807_39573, %int1_39574 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42104, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %42105 = torch_c.to_builtin_tensor %41780 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_39575 = arith.constant 1 : index
    %dim_39576 = tensor.dim %42105, %c1_39575 : tensor<4x?x1x128xf16>
    %42106 = flow.tensor.bitcast %42105 : tensor<4x?x1x128xf16>{%dim_39576} -> tensor<4x?x1x64xcomplex<f16>>{%dim_39576}
    %42107 = torch_c.from_builtin_tensor %42106 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %42107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %42108 = torch.aten.mul.Tensor %42107, %42104 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %42108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %42109 = torch_c.to_builtin_tensor %42108 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_39577 = arith.constant 1 : index
    %dim_39578 = tensor.dim %42109, %c1_39577 : tensor<4x?x1x64xcomplex<f32>>
    %42110 = flow.tensor.bitcast %42109 : tensor<4x?x1x64xcomplex<f32>>{%dim_39578} -> tensor<4x?x1x128xf32>{%dim_39578}
    %42111 = torch_c.from_builtin_tensor %42110 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %42111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_39579 = torch.constant.int 5
    %42112 = torch.prims.convert_element_type %42111, %int5_39579 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %42112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_39580 = torch.constant.int 64
    %42113 = torch.aten.mul.Scalar %2364, %int64_39580 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42113, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39581 = torch.constant.int 64
    %42114 = torch.aten.mul.Scalar %2367, %int64_39581 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42114, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39582 = torch.constant.int 64
    %42115 = torch.aten.mul.Scalar %2370, %int64_39582 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42115, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39583 = torch.constant.int 64
    %42116 = torch.aten.mul.Scalar %2373, %int64_39583 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42116, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39584 = torch.constant.int 64
    %42117 = torch.aten.mul.Scalar %2376, %int64_39584 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42117, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39585 = torch.constant.int 64
    %42118 = torch.aten.mul.Scalar %2379, %int64_39585 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42118, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39586 = torch.constant.int 64
    %42119 = torch.aten.mul.Scalar %2382, %int64_39586 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42119, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_39587 = torch.constant.int 64
    %42120 = torch.aten.mul.Scalar %2385, %int64_39587 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42120, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42 = torch.constant.int 42
    %int1_39588 = torch.constant.int 1
    %42121 = torch.aten.add.Scalar %42113, %int42, %int1_39588 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42121, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39589 = torch.constant.int 42
    %int1_39590 = torch.constant.int 1
    %42122 = torch.aten.add.Scalar %42114, %int42_39589, %int1_39590 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42122, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39591 = torch.constant.int 42
    %int1_39592 = torch.constant.int 1
    %42123 = torch.aten.add.Scalar %42115, %int42_39591, %int1_39592 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42123, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39593 = torch.constant.int 42
    %int1_39594 = torch.constant.int 1
    %42124 = torch.aten.add.Scalar %42116, %int42_39593, %int1_39594 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42124, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39595 = torch.constant.int 42
    %int1_39596 = torch.constant.int 1
    %42125 = torch.aten.add.Scalar %42117, %int42_39595, %int1_39596 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42125, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39597 = torch.constant.int 42
    %int1_39598 = torch.constant.int 1
    %42126 = torch.aten.add.Scalar %42118, %int42_39597, %int1_39598 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42126, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39599 = torch.constant.int 42
    %int1_39600 = torch.constant.int 1
    %42127 = torch.aten.add.Scalar %42119, %int42_39599, %int1_39600 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42127, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int42_39601 = torch.constant.int 42
    %int1_39602 = torch.constant.int 1
    %42128 = torch.aten.add.Scalar %42120, %int42_39601, %int1_39602 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42128, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_39603 = torch.constant.int 4
    %int16_39604 = torch.constant.int 16
    %int1_39605 = torch.constant.int 1
    %int128_39606 = torch.constant.int 128
    %42129 = torch.prim.ListConstruct %int4_39603, %3095, %int16_39604, %int1_39605, %int128_39606 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42130 = torch.aten.view %42007, %42129 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42130, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39607 = torch.constant.int 4
    %int16_39608 = torch.constant.int 16
    %int1_39609 = torch.constant.int 1
    %int128_39610 = torch.constant.int 128
    %42131 = torch.prim.ListConstruct %int4_39607, %3095, %int16_39608, %int1_39609, %int128_39610 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42132 = torch.aten.view %42022, %42131 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42132, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39611 = torch.constant.int 4
    %int16_39612 = torch.constant.int 16
    %int1_39613 = torch.constant.int 1
    %int128_39614 = torch.constant.int 128
    %42133 = torch.prim.ListConstruct %int4_39611, %3095, %int16_39612, %int1_39613, %int128_39614 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42134 = torch.aten.view %42037, %42133 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42134, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39615 = torch.constant.int 4
    %int16_39616 = torch.constant.int 16
    %int1_39617 = torch.constant.int 1
    %int128_39618 = torch.constant.int 128
    %42135 = torch.prim.ListConstruct %int4_39615, %3095, %int16_39616, %int1_39617, %int128_39618 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42136 = torch.aten.view %42052, %42135 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42136, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39619 = torch.constant.int 4
    %int16_39620 = torch.constant.int 16
    %int1_39621 = torch.constant.int 1
    %int128_39622 = torch.constant.int 128
    %42137 = torch.prim.ListConstruct %int4_39619, %3095, %int16_39620, %int1_39621, %int128_39622 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42138 = torch.aten.view %42067, %42137 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42138, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39623 = torch.constant.int 4
    %int16_39624 = torch.constant.int 16
    %int1_39625 = torch.constant.int 1
    %int128_39626 = torch.constant.int 128
    %42139 = torch.prim.ListConstruct %int4_39623, %3095, %int16_39624, %int1_39625, %int128_39626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42140 = torch.aten.view %42082, %42139 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42140, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39627 = torch.constant.int 4
    %int16_39628 = torch.constant.int 16
    %int1_39629 = torch.constant.int 1
    %int128_39630 = torch.constant.int 128
    %42141 = torch.prim.ListConstruct %int4_39627, %3095, %int16_39628, %int1_39629, %int128_39630 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42142 = torch.aten.view %42097, %42141 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42142, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39631 = torch.constant.int 4
    %int16_39632 = torch.constant.int 16
    %int1_39633 = torch.constant.int 1
    %int128_39634 = torch.constant.int 128
    %42143 = torch.prim.ListConstruct %int4_39631, %3095, %int16_39632, %int1_39633, %int128_39634 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42144 = torch.aten.view %42112, %42143 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42144, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39635 = torch.constant.int 4
    %42145 = torch.aten.mul.int %int4_39635, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39636 = torch.constant.int 16
    %int1_39637 = torch.constant.int 1
    %int128_39638 = torch.constant.int 128
    %42146 = torch.prim.ListConstruct %42145, %int16_39636, %int1_39637, %int128_39638 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42147 = torch.aten.view %42130, %42146 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42147, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39639 = torch.constant.int 4
    %42148 = torch.aten.mul.int %int4_39639, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39640 = torch.constant.int 16
    %int1_39641 = torch.constant.int 1
    %int128_39642 = torch.constant.int 128
    %42149 = torch.prim.ListConstruct %42148, %int16_39640, %int1_39641, %int128_39642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42150 = torch.aten.view %42132, %42149 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42150, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39643 = torch.constant.int 4
    %42151 = torch.aten.mul.int %int4_39643, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39644 = torch.constant.int 16
    %int1_39645 = torch.constant.int 1
    %int128_39646 = torch.constant.int 128
    %42152 = torch.prim.ListConstruct %42151, %int16_39644, %int1_39645, %int128_39646 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42153 = torch.aten.view %42134, %42152 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42153, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39647 = torch.constant.int 4
    %42154 = torch.aten.mul.int %int4_39647, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39648 = torch.constant.int 16
    %int1_39649 = torch.constant.int 1
    %int128_39650 = torch.constant.int 128
    %42155 = torch.prim.ListConstruct %42154, %int16_39648, %int1_39649, %int128_39650 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42156 = torch.aten.view %42136, %42155 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42156, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39651 = torch.constant.int 4
    %42157 = torch.aten.mul.int %int4_39651, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39652 = torch.constant.int 16
    %int1_39653 = torch.constant.int 1
    %int128_39654 = torch.constant.int 128
    %42158 = torch.prim.ListConstruct %42157, %int16_39652, %int1_39653, %int128_39654 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42159 = torch.aten.view %42138, %42158 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42159, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39655 = torch.constant.int 4
    %42160 = torch.aten.mul.int %int4_39655, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39656 = torch.constant.int 16
    %int1_39657 = torch.constant.int 1
    %int128_39658 = torch.constant.int 128
    %42161 = torch.prim.ListConstruct %42160, %int16_39656, %int1_39657, %int128_39658 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42162 = torch.aten.view %42140, %42161 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42162, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39659 = torch.constant.int 4
    %42163 = torch.aten.mul.int %int4_39659, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39660 = torch.constant.int 16
    %int1_39661 = torch.constant.int 1
    %int128_39662 = torch.constant.int 128
    %42164 = torch.prim.ListConstruct %42163, %int16_39660, %int1_39661, %int128_39662 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42165 = torch.aten.view %42142, %42164 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42165, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39663 = torch.constant.int 4
    %42166 = torch.aten.mul.int %int4_39663, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39664 = torch.constant.int 16
    %int1_39665 = torch.constant.int 1
    %int128_39666 = torch.constant.int 128
    %42167 = torch.prim.ListConstruct %42166, %int16_39664, %int1_39665, %int128_39666 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42168 = torch.aten.view %42144, %42167 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42168, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39667 = torch.constant.int 4
    %42169 = torch.aten.mul.int %int4_39667, %3095 : !torch.int, !torch.int -> !torch.int
    %42170 = torch.prim.ListConstruct %42169 : (!torch.int) -> !torch.list<int>
    %42171 = torch.aten.view %42121, %42170 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42171, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39668 = torch.constant.int 4
    %42172 = torch.aten.mul.int %int4_39668, %3095 : !torch.int, !torch.int -> !torch.int
    %42173 = torch.prim.ListConstruct %42172 : (!torch.int) -> !torch.list<int>
    %42174 = torch.aten.view %42122, %42173 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42174, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39669 = torch.constant.int 4
    %42175 = torch.aten.mul.int %int4_39669, %3095 : !torch.int, !torch.int -> !torch.int
    %42176 = torch.prim.ListConstruct %42175 : (!torch.int) -> !torch.list<int>
    %42177 = torch.aten.view %42123, %42176 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42177, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39670 = torch.constant.int 4
    %42178 = torch.aten.mul.int %int4_39670, %3095 : !torch.int, !torch.int -> !torch.int
    %42179 = torch.prim.ListConstruct %42178 : (!torch.int) -> !torch.list<int>
    %42180 = torch.aten.view %42124, %42179 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42180, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39671 = torch.constant.int 4
    %42181 = torch.aten.mul.int %int4_39671, %3095 : !torch.int, !torch.int -> !torch.int
    %42182 = torch.prim.ListConstruct %42181 : (!torch.int) -> !torch.list<int>
    %42183 = torch.aten.view %42125, %42182 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42183, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39672 = torch.constant.int 4
    %42184 = torch.aten.mul.int %int4_39672, %3095 : !torch.int, !torch.int -> !torch.int
    %42185 = torch.prim.ListConstruct %42184 : (!torch.int) -> !torch.list<int>
    %42186 = torch.aten.view %42126, %42185 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42186, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39673 = torch.constant.int 4
    %42187 = torch.aten.mul.int %int4_39673, %3095 : !torch.int, !torch.int -> !torch.int
    %42188 = torch.prim.ListConstruct %42187 : (!torch.int) -> !torch.list<int>
    %42189 = torch.aten.view %42127, %42188 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42189, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39674 = torch.constant.int 4
    %42190 = torch.aten.mul.int %int4_39674, %3095 : !torch.int, !torch.int -> !torch.int
    %42191 = torch.prim.ListConstruct %42190 : (!torch.int) -> !torch.list<int>
    %42192 = torch.aten.view %42128, %42191 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42192, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39675 = torch.constant.int 4
    %int16_39676 = torch.constant.int 16
    %int1_39677 = torch.constant.int 1
    %int128_39678 = torch.constant.int 128
    %42193 = torch.prim.ListConstruct %int4_39675, %3095, %int16_39676, %int1_39677, %int128_39678 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42194 = torch.aten.view %41782, %42193 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42194, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39679 = torch.constant.int 4
    %int16_39680 = torch.constant.int 16
    %int1_39681 = torch.constant.int 1
    %int128_39682 = torch.constant.int 128
    %42195 = torch.prim.ListConstruct %int4_39679, %3095, %int16_39680, %int1_39681, %int128_39682 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42196 = torch.aten.view %41784, %42195 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42196, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39683 = torch.constant.int 4
    %int16_39684 = torch.constant.int 16
    %int1_39685 = torch.constant.int 1
    %int128_39686 = torch.constant.int 128
    %42197 = torch.prim.ListConstruct %int4_39683, %3095, %int16_39684, %int1_39685, %int128_39686 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42198 = torch.aten.view %41786, %42197 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42198, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39687 = torch.constant.int 4
    %int16_39688 = torch.constant.int 16
    %int1_39689 = torch.constant.int 1
    %int128_39690 = torch.constant.int 128
    %42199 = torch.prim.ListConstruct %int4_39687, %3095, %int16_39688, %int1_39689, %int128_39690 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42200 = torch.aten.view %41788, %42199 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42200, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39691 = torch.constant.int 4
    %int16_39692 = torch.constant.int 16
    %int1_39693 = torch.constant.int 1
    %int128_39694 = torch.constant.int 128
    %42201 = torch.prim.ListConstruct %int4_39691, %3095, %int16_39692, %int1_39693, %int128_39694 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42202 = torch.aten.view %41790, %42201 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42202, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39695 = torch.constant.int 4
    %int16_39696 = torch.constant.int 16
    %int1_39697 = torch.constant.int 1
    %int128_39698 = torch.constant.int 128
    %42203 = torch.prim.ListConstruct %int4_39695, %3095, %int16_39696, %int1_39697, %int128_39698 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42204 = torch.aten.view %41792, %42203 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42204, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39699 = torch.constant.int 4
    %int16_39700 = torch.constant.int 16
    %int1_39701 = torch.constant.int 1
    %int128_39702 = torch.constant.int 128
    %42205 = torch.prim.ListConstruct %int4_39699, %3095, %int16_39700, %int1_39701, %int128_39702 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42206 = torch.aten.view %41794, %42205 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42206, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39703 = torch.constant.int 4
    %int16_39704 = torch.constant.int 16
    %int1_39705 = torch.constant.int 1
    %int128_39706 = torch.constant.int 128
    %42207 = torch.prim.ListConstruct %int4_39703, %3095, %int16_39704, %int1_39705, %int128_39706 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42208 = torch.aten.view %41796, %42207 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %42208, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_39707 = torch.constant.int 4
    %42209 = torch.aten.mul.int %int4_39707, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39708 = torch.constant.int 16
    %int1_39709 = torch.constant.int 1
    %int128_39710 = torch.constant.int 128
    %42210 = torch.prim.ListConstruct %42209, %int16_39708, %int1_39709, %int128_39710 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42211 = torch.aten.view %42194, %42210 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42211, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39711 = torch.constant.int 4
    %42212 = torch.aten.mul.int %int4_39711, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39712 = torch.constant.int 16
    %int1_39713 = torch.constant.int 1
    %int128_39714 = torch.constant.int 128
    %42213 = torch.prim.ListConstruct %42212, %int16_39712, %int1_39713, %int128_39714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42214 = torch.aten.view %42196, %42213 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42214, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39715 = torch.constant.int 4
    %42215 = torch.aten.mul.int %int4_39715, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39716 = torch.constant.int 16
    %int1_39717 = torch.constant.int 1
    %int128_39718 = torch.constant.int 128
    %42216 = torch.prim.ListConstruct %42215, %int16_39716, %int1_39717, %int128_39718 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42217 = torch.aten.view %42198, %42216 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42217, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39719 = torch.constant.int 4
    %42218 = torch.aten.mul.int %int4_39719, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39720 = torch.constant.int 16
    %int1_39721 = torch.constant.int 1
    %int128_39722 = torch.constant.int 128
    %42219 = torch.prim.ListConstruct %42218, %int16_39720, %int1_39721, %int128_39722 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42220 = torch.aten.view %42200, %42219 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42220, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39723 = torch.constant.int 4
    %42221 = torch.aten.mul.int %int4_39723, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39724 = torch.constant.int 16
    %int1_39725 = torch.constant.int 1
    %int128_39726 = torch.constant.int 128
    %42222 = torch.prim.ListConstruct %42221, %int16_39724, %int1_39725, %int128_39726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42223 = torch.aten.view %42202, %42222 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42223, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39727 = torch.constant.int 4
    %42224 = torch.aten.mul.int %int4_39727, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39728 = torch.constant.int 16
    %int1_39729 = torch.constant.int 1
    %int128_39730 = torch.constant.int 128
    %42225 = torch.prim.ListConstruct %42224, %int16_39728, %int1_39729, %int128_39730 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42226 = torch.aten.view %42204, %42225 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42226, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39731 = torch.constant.int 4
    %42227 = torch.aten.mul.int %int4_39731, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39732 = torch.constant.int 16
    %int1_39733 = torch.constant.int 1
    %int128_39734 = torch.constant.int 128
    %42228 = torch.prim.ListConstruct %42227, %int16_39732, %int1_39733, %int128_39734 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42229 = torch.aten.view %42206, %42228 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42229, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_39735 = torch.constant.int 4
    %42230 = torch.aten.mul.int %int4_39735, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_39736 = torch.constant.int 16
    %int1_39737 = torch.constant.int 1
    %int128_39738 = torch.constant.int 128
    %42231 = torch.prim.ListConstruct %42230, %int16_39736, %int1_39737, %int128_39738 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42232 = torch.aten.view %42208, %42231 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42232, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_39739 = torch.constant.int 1
    %int1_39740 = torch.constant.int 1
    %42233 = torch.aten.add.Scalar %42121, %int1_39739, %int1_39740 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42233, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39741 = torch.constant.int 1
    %int1_39742 = torch.constant.int 1
    %42234 = torch.aten.add.Scalar %42122, %int1_39741, %int1_39742 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42234, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39743 = torch.constant.int 1
    %int1_39744 = torch.constant.int 1
    %42235 = torch.aten.add.Scalar %42123, %int1_39743, %int1_39744 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42235, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39745 = torch.constant.int 1
    %int1_39746 = torch.constant.int 1
    %42236 = torch.aten.add.Scalar %42124, %int1_39745, %int1_39746 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42236, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39747 = torch.constant.int 1
    %int1_39748 = torch.constant.int 1
    %42237 = torch.aten.add.Scalar %42125, %int1_39747, %int1_39748 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42237, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39749 = torch.constant.int 1
    %int1_39750 = torch.constant.int 1
    %42238 = torch.aten.add.Scalar %42126, %int1_39749, %int1_39750 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42238, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39751 = torch.constant.int 1
    %int1_39752 = torch.constant.int 1
    %42239 = torch.aten.add.Scalar %42127, %int1_39751, %int1_39752 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42239, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_39753 = torch.constant.int 1
    %int1_39754 = torch.constant.int 1
    %42240 = torch.aten.add.Scalar %42128, %int1_39753, %int1_39754 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %42240, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_39755 = torch.constant.int 4
    %42241 = torch.aten.mul.int %int4_39755, %3095 : !torch.int, !torch.int -> !torch.int
    %42242 = torch.prim.ListConstruct %42241 : (!torch.int) -> !torch.list<int>
    %42243 = torch.aten.view %42233, %42242 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42243, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39756 = torch.constant.int 4
    %42244 = torch.aten.mul.int %int4_39756, %3095 : !torch.int, !torch.int -> !torch.int
    %42245 = torch.prim.ListConstruct %42244 : (!torch.int) -> !torch.list<int>
    %42246 = torch.aten.view %42234, %42245 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42246, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39757 = torch.constant.int 4
    %42247 = torch.aten.mul.int %int4_39757, %3095 : !torch.int, !torch.int -> !torch.int
    %42248 = torch.prim.ListConstruct %42247 : (!torch.int) -> !torch.list<int>
    %42249 = torch.aten.view %42235, %42248 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42249, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39758 = torch.constant.int 4
    %42250 = torch.aten.mul.int %int4_39758, %3095 : !torch.int, !torch.int -> !torch.int
    %42251 = torch.prim.ListConstruct %42250 : (!torch.int) -> !torch.list<int>
    %42252 = torch.aten.view %42236, %42251 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42252, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39759 = torch.constant.int 4
    %42253 = torch.aten.mul.int %int4_39759, %3095 : !torch.int, !torch.int -> !torch.int
    %42254 = torch.prim.ListConstruct %42253 : (!torch.int) -> !torch.list<int>
    %42255 = torch.aten.view %42237, %42254 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42255, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39760 = torch.constant.int 4
    %42256 = torch.aten.mul.int %int4_39760, %3095 : !torch.int, !torch.int -> !torch.int
    %42257 = torch.prim.ListConstruct %42256 : (!torch.int) -> !torch.list<int>
    %42258 = torch.aten.view %42238, %42257 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42258, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39761 = torch.constant.int 4
    %42259 = torch.aten.mul.int %int4_39761, %3095 : !torch.int, !torch.int -> !torch.int
    %42260 = torch.prim.ListConstruct %42259 : (!torch.int) -> !torch.list<int>
    %42261 = torch.aten.view %42239, %42260 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42261, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_39762 = torch.constant.int 4
    %42262 = torch.aten.mul.int %int4_39762, %3095 : !torch.int, !torch.int -> !torch.int
    %42263 = torch.prim.ListConstruct %42262 : (!torch.int) -> !torch.list<int>
    %42264 = torch.aten.view %42240, %42263 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42264, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %42265 = torch.prim.ListConstruct %42171, %42243 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39763 = torch.constant.int 0
    %42266 = torch.aten.cat %42265, %int0_39763 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42266, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42267 = torch.prim.ListConstruct %42174, %42246 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39764 = torch.constant.int 0
    %42268 = torch.aten.cat %42267, %int0_39764 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42268, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42269 = torch.prim.ListConstruct %42177, %42249 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39765 = torch.constant.int 0
    %42270 = torch.aten.cat %42269, %int0_39765 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42270, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42271 = torch.prim.ListConstruct %42180, %42252 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39766 = torch.constant.int 0
    %42272 = torch.aten.cat %42271, %int0_39766 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42272, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42273 = torch.prim.ListConstruct %42183, %42255 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39767 = torch.constant.int 0
    %42274 = torch.aten.cat %42273, %int0_39767 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42274, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42275 = torch.prim.ListConstruct %42186, %42258 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39768 = torch.constant.int 0
    %42276 = torch.aten.cat %42275, %int0_39768 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42276, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42277 = torch.prim.ListConstruct %42189, %42261 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39769 = torch.constant.int 0
    %42278 = torch.aten.cat %42277, %int0_39769 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42278, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42279 = torch.prim.ListConstruct %42192, %42264 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_39770 = torch.constant.int 0
    %42280 = torch.aten.cat %42279, %int0_39770 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %42280, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %42281 = torch.prim.ListConstruct %42147, %42211 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39771 = torch.constant.int 0
    %42282 = torch.aten.cat %42281, %int0_39771 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42282, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42283 = torch.prim.ListConstruct %42150, %42214 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39772 = torch.constant.int 0
    %42284 = torch.aten.cat %42283, %int0_39772 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42284, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42285 = torch.prim.ListConstruct %42153, %42217 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39773 = torch.constant.int 0
    %42286 = torch.aten.cat %42285, %int0_39773 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42286, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42287 = torch.prim.ListConstruct %42156, %42220 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39774 = torch.constant.int 0
    %42288 = torch.aten.cat %42287, %int0_39774 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42288, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42289 = torch.prim.ListConstruct %42159, %42223 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39775 = torch.constant.int 0
    %42290 = torch.aten.cat %42289, %int0_39775 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42290, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42291 = torch.prim.ListConstruct %42162, %42226 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39776 = torch.constant.int 0
    %42292 = torch.aten.cat %42291, %int0_39776 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42292, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42293 = torch.prim.ListConstruct %42165, %42229 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39777 = torch.constant.int 0
    %42294 = torch.aten.cat %42293, %int0_39777 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42294, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42295 = torch.prim.ListConstruct %42168, %42232 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_39778 = torch.constant.int 0
    %42296 = torch.aten.cat %42295, %int0_39778 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42296, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39779 = torch.constant.int 32
    %int2_39780 = torch.constant.int 2
    %int16_39781 = torch.constant.int 16
    %int1_39782 = torch.constant.int 1
    %int128_39783 = torch.constant.int 128
    %42297 = torch.prim.ListConstruct %3023, %int32_39779, %int2_39780, %int16_39781, %int1_39782, %int128_39783 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42298 = torch.aten.view %40447, %42297 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42298, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39784 = torch.constant.int 32
    %42299 = torch.aten.mul.int %3023, %int32_39784 : !torch.int, !torch.int -> !torch.int
    %int2_39785 = torch.constant.int 2
    %42300 = torch.aten.mul.int %42299, %int2_39785 : !torch.int, !torch.int -> !torch.int
    %int16_39786 = torch.constant.int 16
    %int1_39787 = torch.constant.int 1
    %int128_39788 = torch.constant.int 128
    %42301 = torch.prim.ListConstruct %42300, %int16_39786, %int1_39787, %int128_39788 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42302 = torch.aten.view %42298, %42301 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42302, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42303 = torch.prim.ListConstruct %42266 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39789 = torch.constant.bool false
    %42304 = torch.aten.index_put %42302, %42303, %42282, %false_39789 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42304, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39790 = torch.constant.int 32
    %int2_39791 = torch.constant.int 2
    %int16_39792 = torch.constant.int 16
    %int1_39793 = torch.constant.int 1
    %int128_39794 = torch.constant.int 128
    %42305 = torch.prim.ListConstruct %3023, %int32_39790, %int2_39791, %int16_39792, %int1_39793, %int128_39794 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42306 = torch.aten.view %42304, %42305 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42306, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39795 = torch.constant.int 131072
    %42307 = torch.prim.ListConstruct %3023, %int131072_39795 : (!torch.int, !torch.int) -> !torch.list<int>
    %42308 = torch.aten.view %42306, %42307 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42308, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39796 = torch.constant.int 32
    %int2_39797 = torch.constant.int 2
    %int16_39798 = torch.constant.int 16
    %int1_39799 = torch.constant.int 1
    %int128_39800 = torch.constant.int 128
    %42309 = torch.prim.ListConstruct %3026, %int32_39796, %int2_39797, %int16_39798, %int1_39799, %int128_39800 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42310 = torch.aten.view %40459, %42309 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42310, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39801 = torch.constant.int 32
    %42311 = torch.aten.mul.int %3026, %int32_39801 : !torch.int, !torch.int -> !torch.int
    %int2_39802 = torch.constant.int 2
    %42312 = torch.aten.mul.int %42311, %int2_39802 : !torch.int, !torch.int -> !torch.int
    %int16_39803 = torch.constant.int 16
    %int1_39804 = torch.constant.int 1
    %int128_39805 = torch.constant.int 128
    %42313 = torch.prim.ListConstruct %42312, %int16_39803, %int1_39804, %int128_39805 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42314 = torch.aten.view %42310, %42313 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42314, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42315 = torch.prim.ListConstruct %42268 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39806 = torch.constant.bool false
    %42316 = torch.aten.index_put %42314, %42315, %42284, %false_39806 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42316, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39807 = torch.constant.int 32
    %int2_39808 = torch.constant.int 2
    %int16_39809 = torch.constant.int 16
    %int1_39810 = torch.constant.int 1
    %int128_39811 = torch.constant.int 128
    %42317 = torch.prim.ListConstruct %3026, %int32_39807, %int2_39808, %int16_39809, %int1_39810, %int128_39811 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42318 = torch.aten.view %42316, %42317 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42318, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39812 = torch.constant.int 131072
    %42319 = torch.prim.ListConstruct %3026, %int131072_39812 : (!torch.int, !torch.int) -> !torch.list<int>
    %42320 = torch.aten.view %42318, %42319 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42320, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39813 = torch.constant.int 32
    %int2_39814 = torch.constant.int 2
    %int16_39815 = torch.constant.int 16
    %int1_39816 = torch.constant.int 1
    %int128_39817 = torch.constant.int 128
    %42321 = torch.prim.ListConstruct %3029, %int32_39813, %int2_39814, %int16_39815, %int1_39816, %int128_39817 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42322 = torch.aten.view %40471, %42321 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42322, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39818 = torch.constant.int 32
    %42323 = torch.aten.mul.int %3029, %int32_39818 : !torch.int, !torch.int -> !torch.int
    %int2_39819 = torch.constant.int 2
    %42324 = torch.aten.mul.int %42323, %int2_39819 : !torch.int, !torch.int -> !torch.int
    %int16_39820 = torch.constant.int 16
    %int1_39821 = torch.constant.int 1
    %int128_39822 = torch.constant.int 128
    %42325 = torch.prim.ListConstruct %42324, %int16_39820, %int1_39821, %int128_39822 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42326 = torch.aten.view %42322, %42325 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42326, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42327 = torch.prim.ListConstruct %42270 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39823 = torch.constant.bool false
    %42328 = torch.aten.index_put %42326, %42327, %42286, %false_39823 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42328, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39824 = torch.constant.int 32
    %int2_39825 = torch.constant.int 2
    %int16_39826 = torch.constant.int 16
    %int1_39827 = torch.constant.int 1
    %int128_39828 = torch.constant.int 128
    %42329 = torch.prim.ListConstruct %3029, %int32_39824, %int2_39825, %int16_39826, %int1_39827, %int128_39828 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42330 = torch.aten.view %42328, %42329 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42330, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39829 = torch.constant.int 131072
    %42331 = torch.prim.ListConstruct %3029, %int131072_39829 : (!torch.int, !torch.int) -> !torch.list<int>
    %42332 = torch.aten.view %42330, %42331 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42332, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39830 = torch.constant.int 32
    %int2_39831 = torch.constant.int 2
    %int16_39832 = torch.constant.int 16
    %int1_39833 = torch.constant.int 1
    %int128_39834 = torch.constant.int 128
    %42333 = torch.prim.ListConstruct %3032, %int32_39830, %int2_39831, %int16_39832, %int1_39833, %int128_39834 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42334 = torch.aten.view %40483, %42333 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42334, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39835 = torch.constant.int 32
    %42335 = torch.aten.mul.int %3032, %int32_39835 : !torch.int, !torch.int -> !torch.int
    %int2_39836 = torch.constant.int 2
    %42336 = torch.aten.mul.int %42335, %int2_39836 : !torch.int, !torch.int -> !torch.int
    %int16_39837 = torch.constant.int 16
    %int1_39838 = torch.constant.int 1
    %int128_39839 = torch.constant.int 128
    %42337 = torch.prim.ListConstruct %42336, %int16_39837, %int1_39838, %int128_39839 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42338 = torch.aten.view %42334, %42337 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42338, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42339 = torch.prim.ListConstruct %42272 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39840 = torch.constant.bool false
    %42340 = torch.aten.index_put %42338, %42339, %42288, %false_39840 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42340, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39841 = torch.constant.int 32
    %int2_39842 = torch.constant.int 2
    %int16_39843 = torch.constant.int 16
    %int1_39844 = torch.constant.int 1
    %int128_39845 = torch.constant.int 128
    %42341 = torch.prim.ListConstruct %3032, %int32_39841, %int2_39842, %int16_39843, %int1_39844, %int128_39845 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42342 = torch.aten.view %42340, %42341 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42342, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39846 = torch.constant.int 131072
    %42343 = torch.prim.ListConstruct %3032, %int131072_39846 : (!torch.int, !torch.int) -> !torch.list<int>
    %42344 = torch.aten.view %42342, %42343 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42344, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39847 = torch.constant.int 32
    %int2_39848 = torch.constant.int 2
    %int16_39849 = torch.constant.int 16
    %int1_39850 = torch.constant.int 1
    %int128_39851 = torch.constant.int 128
    %42345 = torch.prim.ListConstruct %3035, %int32_39847, %int2_39848, %int16_39849, %int1_39850, %int128_39851 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42346 = torch.aten.view %40495, %42345 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42346, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39852 = torch.constant.int 32
    %42347 = torch.aten.mul.int %3035, %int32_39852 : !torch.int, !torch.int -> !torch.int
    %int2_39853 = torch.constant.int 2
    %42348 = torch.aten.mul.int %42347, %int2_39853 : !torch.int, !torch.int -> !torch.int
    %int16_39854 = torch.constant.int 16
    %int1_39855 = torch.constant.int 1
    %int128_39856 = torch.constant.int 128
    %42349 = torch.prim.ListConstruct %42348, %int16_39854, %int1_39855, %int128_39856 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42350 = torch.aten.view %42346, %42349 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42350, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42351 = torch.prim.ListConstruct %42274 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39857 = torch.constant.bool false
    %42352 = torch.aten.index_put %42350, %42351, %42290, %false_39857 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42352, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39858 = torch.constant.int 32
    %int2_39859 = torch.constant.int 2
    %int16_39860 = torch.constant.int 16
    %int1_39861 = torch.constant.int 1
    %int128_39862 = torch.constant.int 128
    %42353 = torch.prim.ListConstruct %3035, %int32_39858, %int2_39859, %int16_39860, %int1_39861, %int128_39862 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42354 = torch.aten.view %42352, %42353 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42354, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39863 = torch.constant.int 131072
    %42355 = torch.prim.ListConstruct %3035, %int131072_39863 : (!torch.int, !torch.int) -> !torch.list<int>
    %42356 = torch.aten.view %42354, %42355 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42356, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39864 = torch.constant.int 32
    %int2_39865 = torch.constant.int 2
    %int16_39866 = torch.constant.int 16
    %int1_39867 = torch.constant.int 1
    %int128_39868 = torch.constant.int 128
    %42357 = torch.prim.ListConstruct %3038, %int32_39864, %int2_39865, %int16_39866, %int1_39867, %int128_39868 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42358 = torch.aten.view %40507, %42357 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42358, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39869 = torch.constant.int 32
    %42359 = torch.aten.mul.int %3038, %int32_39869 : !torch.int, !torch.int -> !torch.int
    %int2_39870 = torch.constant.int 2
    %42360 = torch.aten.mul.int %42359, %int2_39870 : !torch.int, !torch.int -> !torch.int
    %int16_39871 = torch.constant.int 16
    %int1_39872 = torch.constant.int 1
    %int128_39873 = torch.constant.int 128
    %42361 = torch.prim.ListConstruct %42360, %int16_39871, %int1_39872, %int128_39873 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42362 = torch.aten.view %42358, %42361 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42362, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42363 = torch.prim.ListConstruct %42276 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39874 = torch.constant.bool false
    %42364 = torch.aten.index_put %42362, %42363, %42292, %false_39874 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42364, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39875 = torch.constant.int 32
    %int2_39876 = torch.constant.int 2
    %int16_39877 = torch.constant.int 16
    %int1_39878 = torch.constant.int 1
    %int128_39879 = torch.constant.int 128
    %42365 = torch.prim.ListConstruct %3038, %int32_39875, %int2_39876, %int16_39877, %int1_39878, %int128_39879 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42366 = torch.aten.view %42364, %42365 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42366, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39880 = torch.constant.int 131072
    %42367 = torch.prim.ListConstruct %3038, %int131072_39880 : (!torch.int, !torch.int) -> !torch.list<int>
    %42368 = torch.aten.view %42366, %42367 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42368, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39881 = torch.constant.int 32
    %int2_39882 = torch.constant.int 2
    %int16_39883 = torch.constant.int 16
    %int1_39884 = torch.constant.int 1
    %int128_39885 = torch.constant.int 128
    %42369 = torch.prim.ListConstruct %3041, %int32_39881, %int2_39882, %int16_39883, %int1_39884, %int128_39885 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42370 = torch.aten.view %40519, %42369 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42370, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39886 = torch.constant.int 32
    %42371 = torch.aten.mul.int %3041, %int32_39886 : !torch.int, !torch.int -> !torch.int
    %int2_39887 = torch.constant.int 2
    %42372 = torch.aten.mul.int %42371, %int2_39887 : !torch.int, !torch.int -> !torch.int
    %int16_39888 = torch.constant.int 16
    %int1_39889 = torch.constant.int 1
    %int128_39890 = torch.constant.int 128
    %42373 = torch.prim.ListConstruct %42372, %int16_39888, %int1_39889, %int128_39890 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42374 = torch.aten.view %42370, %42373 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42374, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42375 = torch.prim.ListConstruct %42278 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39891 = torch.constant.bool false
    %42376 = torch.aten.index_put %42374, %42375, %42294, %false_39891 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42376, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39892 = torch.constant.int 32
    %int2_39893 = torch.constant.int 2
    %int16_39894 = torch.constant.int 16
    %int1_39895 = torch.constant.int 1
    %int128_39896 = torch.constant.int 128
    %42377 = torch.prim.ListConstruct %3041, %int32_39892, %int2_39893, %int16_39894, %int1_39895, %int128_39896 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42378 = torch.aten.view %42376, %42377 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42378, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39897 = torch.constant.int 131072
    %42379 = torch.prim.ListConstruct %3041, %int131072_39897 : (!torch.int, !torch.int) -> !torch.list<int>
    %42380 = torch.aten.view %42378, %42379 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42380, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_39898 = torch.constant.int 32
    %int2_39899 = torch.constant.int 2
    %int16_39900 = torch.constant.int 16
    %int1_39901 = torch.constant.int 1
    %int128_39902 = torch.constant.int 128
    %42381 = torch.prim.ListConstruct %3044, %int32_39898, %int2_39899, %int16_39900, %int1_39901, %int128_39902 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42382 = torch.aten.view %40531, %42381 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42382, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_39903 = torch.constant.int 32
    %42383 = torch.aten.mul.int %3044, %int32_39903 : !torch.int, !torch.int -> !torch.int
    %int2_39904 = torch.constant.int 2
    %42384 = torch.aten.mul.int %42383, %int2_39904 : !torch.int, !torch.int -> !torch.int
    %int16_39905 = torch.constant.int 16
    %int1_39906 = torch.constant.int 1
    %int128_39907 = torch.constant.int 128
    %42385 = torch.prim.ListConstruct %42384, %int16_39905, %int1_39906, %int128_39907 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42386 = torch.aten.view %42382, %42385 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42386, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %42387 = torch.prim.ListConstruct %42280 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_39908 = torch.constant.bool false
    %42388 = torch.aten.index_put %42386, %42387, %42296, %false_39908 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %42388, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_39909 = torch.constant.int 32
    %int2_39910 = torch.constant.int 2
    %int16_39911 = torch.constant.int 16
    %int1_39912 = torch.constant.int 1
    %int128_39913 = torch.constant.int 128
    %42389 = torch.prim.ListConstruct %3044, %int32_39909, %int2_39910, %int16_39911, %int1_39912, %int128_39913 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42390 = torch.aten.view %42388, %42389 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %42390, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_39914 = torch.constant.int 131072
    %42391 = torch.prim.ListConstruct %3044, %int131072_39914 : (!torch.int, !torch.int) -> !torch.list<int>
    %42392 = torch.aten.view %42390, %42391 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %42392, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_39915 = torch.constant.int -2
    %42393 = torch.aten.unsqueeze %42007, %int-2_39915 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39916 = torch.constant.int -2
    %42394 = torch.aten.unsqueeze %42022, %int-2_39916 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39917 = torch.constant.int -2
    %42395 = torch.aten.unsqueeze %42037, %int-2_39917 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39918 = torch.constant.int -2
    %42396 = torch.aten.unsqueeze %42052, %int-2_39918 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39919 = torch.constant.int -2
    %42397 = torch.aten.unsqueeze %42067, %int-2_39919 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39920 = torch.constant.int -2
    %42398 = torch.aten.unsqueeze %42082, %int-2_39920 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39921 = torch.constant.int -2
    %42399 = torch.aten.unsqueeze %42097, %int-2_39921 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39922 = torch.constant.int -2
    %42400 = torch.aten.unsqueeze %42112, %int-2_39922 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_39923 = torch.constant.int 4
    %int1_39924 = torch.constant.int 1
    %int4_39925 = torch.constant.int 4
    %int128_39926 = torch.constant.int 128
    %42401 = torch.prim.ListConstruct %int4_39923, %41993, %int1_39924, %int4_39925, %int128_39926 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39927 = torch.constant.bool false
    %42402 = torch.aten.expand %42393, %42401, %false_39927 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39928 = torch.constant.int 4
    %int1_39929 = torch.constant.int 1
    %int4_39930 = torch.constant.int 4
    %int128_39931 = torch.constant.int 128
    %42403 = torch.prim.ListConstruct %int4_39928, %41993, %int1_39929, %int4_39930, %int128_39931 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39932 = torch.constant.bool false
    %42404 = torch.aten.expand %42394, %42403, %false_39932 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39933 = torch.constant.int 4
    %int1_39934 = torch.constant.int 1
    %int4_39935 = torch.constant.int 4
    %int128_39936 = torch.constant.int 128
    %42405 = torch.prim.ListConstruct %int4_39933, %41993, %int1_39934, %int4_39935, %int128_39936 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39937 = torch.constant.bool false
    %42406 = torch.aten.expand %42395, %42405, %false_39937 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39938 = torch.constant.int 4
    %int1_39939 = torch.constant.int 1
    %int4_39940 = torch.constant.int 4
    %int128_39941 = torch.constant.int 128
    %42407 = torch.prim.ListConstruct %int4_39938, %41993, %int1_39939, %int4_39940, %int128_39941 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39942 = torch.constant.bool false
    %42408 = torch.aten.expand %42396, %42407, %false_39942 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39943 = torch.constant.int 4
    %int1_39944 = torch.constant.int 1
    %int4_39945 = torch.constant.int 4
    %int128_39946 = torch.constant.int 128
    %42409 = torch.prim.ListConstruct %int4_39943, %41993, %int1_39944, %int4_39945, %int128_39946 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39947 = torch.constant.bool false
    %42410 = torch.aten.expand %42397, %42409, %false_39947 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39948 = torch.constant.int 4
    %int1_39949 = torch.constant.int 1
    %int4_39950 = torch.constant.int 4
    %int128_39951 = torch.constant.int 128
    %42411 = torch.prim.ListConstruct %int4_39948, %41993, %int1_39949, %int4_39950, %int128_39951 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39952 = torch.constant.bool false
    %42412 = torch.aten.expand %42398, %42411, %false_39952 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39953 = torch.constant.int 4
    %int1_39954 = torch.constant.int 1
    %int4_39955 = torch.constant.int 4
    %int128_39956 = torch.constant.int 128
    %42413 = torch.prim.ListConstruct %int4_39953, %41993, %int1_39954, %int4_39955, %int128_39956 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39957 = torch.constant.bool false
    %42414 = torch.aten.expand %42399, %42413, %false_39957 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39958 = torch.constant.int 4
    %int1_39959 = torch.constant.int 1
    %int4_39960 = torch.constant.int 4
    %int128_39961 = torch.constant.int 128
    %42415 = torch.prim.ListConstruct %int4_39958, %41993, %int1_39959, %int4_39960, %int128_39961 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_39962 = torch.constant.bool false
    %42416 = torch.aten.expand %42400, %42415, %false_39962 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_39963 = torch.constant.int 4
    %int4_39964 = torch.constant.int 4
    %int128_39965 = torch.constant.int 128
    %42417 = torch.prim.ListConstruct %int4_39963, %41993, %int4_39964, %int128_39965 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42418 = torch.aten.view %42402, %42417 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39966 = torch.constant.int 4
    %int4_39967 = torch.constant.int 4
    %int128_39968 = torch.constant.int 128
    %42419 = torch.prim.ListConstruct %int4_39966, %41993, %int4_39967, %int128_39968 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42420 = torch.aten.view %42404, %42419 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39969 = torch.constant.int 4
    %int4_39970 = torch.constant.int 4
    %int128_39971 = torch.constant.int 128
    %42421 = torch.prim.ListConstruct %int4_39969, %41993, %int4_39970, %int128_39971 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42422 = torch.aten.view %42406, %42421 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39972 = torch.constant.int 4
    %int4_39973 = torch.constant.int 4
    %int128_39974 = torch.constant.int 128
    %42423 = torch.prim.ListConstruct %int4_39972, %41993, %int4_39973, %int128_39974 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42424 = torch.aten.view %42408, %42423 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39975 = torch.constant.int 4
    %int4_39976 = torch.constant.int 4
    %int128_39977 = torch.constant.int 128
    %42425 = torch.prim.ListConstruct %int4_39975, %41993, %int4_39976, %int128_39977 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42426 = torch.aten.view %42410, %42425 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39978 = torch.constant.int 4
    %int4_39979 = torch.constant.int 4
    %int128_39980 = torch.constant.int 128
    %42427 = torch.prim.ListConstruct %int4_39978, %41993, %int4_39979, %int128_39980 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42428 = torch.aten.view %42412, %42427 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39981 = torch.constant.int 4
    %int4_39982 = torch.constant.int 4
    %int128_39983 = torch.constant.int 128
    %42429 = torch.prim.ListConstruct %int4_39981, %41993, %int4_39982, %int128_39983 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42430 = torch.aten.view %42414, %42429 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_39984 = torch.constant.int 4
    %int4_39985 = torch.constant.int 4
    %int128_39986 = torch.constant.int 128
    %42431 = torch.prim.ListConstruct %int4_39984, %41993, %int4_39985, %int128_39986 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42432 = torch.aten.view %42416, %42431 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_39987 = torch.constant.int -2
    %42433 = torch.aten.unsqueeze %41782, %int-2_39987 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39988 = torch.constant.int -2
    %42434 = torch.aten.unsqueeze %41784, %int-2_39988 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39989 = torch.constant.int -2
    %42435 = torch.aten.unsqueeze %41786, %int-2_39989 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39990 = torch.constant.int -2
    %42436 = torch.aten.unsqueeze %41788, %int-2_39990 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39991 = torch.constant.int -2
    %42437 = torch.aten.unsqueeze %41790, %int-2_39991 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39992 = torch.constant.int -2
    %42438 = torch.aten.unsqueeze %41792, %int-2_39992 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39993 = torch.constant.int -2
    %42439 = torch.aten.unsqueeze %41794, %int-2_39993 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_39994 = torch.constant.int -2
    %42440 = torch.aten.unsqueeze %41796, %int-2_39994 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %42440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_39995 = torch.constant.int 1
    %42441 = torch.aten.size.int %41706, %int1_39995 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_39996 = torch.constant.int 4
    %int1_39997 = torch.constant.int 1
    %int4_39998 = torch.constant.int 4
    %int128_39999 = torch.constant.int 128
    %42442 = torch.prim.ListConstruct %int4_39996, %42441, %int1_39997, %int4_39998, %int128_39999 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40000 = torch.constant.bool false
    %42443 = torch.aten.expand %42433, %42442, %false_40000 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40001 = torch.constant.int 4
    %int1_40002 = torch.constant.int 1
    %int4_40003 = torch.constant.int 4
    %int128_40004 = torch.constant.int 128
    %42444 = torch.prim.ListConstruct %int4_40001, %42441, %int1_40002, %int4_40003, %int128_40004 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40005 = torch.constant.bool false
    %42445 = torch.aten.expand %42434, %42444, %false_40005 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40006 = torch.constant.int 4
    %int1_40007 = torch.constant.int 1
    %int4_40008 = torch.constant.int 4
    %int128_40009 = torch.constant.int 128
    %42446 = torch.prim.ListConstruct %int4_40006, %42441, %int1_40007, %int4_40008, %int128_40009 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40010 = torch.constant.bool false
    %42447 = torch.aten.expand %42435, %42446, %false_40010 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40011 = torch.constant.int 4
    %int1_40012 = torch.constant.int 1
    %int4_40013 = torch.constant.int 4
    %int128_40014 = torch.constant.int 128
    %42448 = torch.prim.ListConstruct %int4_40011, %42441, %int1_40012, %int4_40013, %int128_40014 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40015 = torch.constant.bool false
    %42449 = torch.aten.expand %42436, %42448, %false_40015 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40016 = torch.constant.int 4
    %int1_40017 = torch.constant.int 1
    %int4_40018 = torch.constant.int 4
    %int128_40019 = torch.constant.int 128
    %42450 = torch.prim.ListConstruct %int4_40016, %42441, %int1_40017, %int4_40018, %int128_40019 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40020 = torch.constant.bool false
    %42451 = torch.aten.expand %42437, %42450, %false_40020 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40021 = torch.constant.int 4
    %int1_40022 = torch.constant.int 1
    %int4_40023 = torch.constant.int 4
    %int128_40024 = torch.constant.int 128
    %42452 = torch.prim.ListConstruct %int4_40021, %42441, %int1_40022, %int4_40023, %int128_40024 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40025 = torch.constant.bool false
    %42453 = torch.aten.expand %42438, %42452, %false_40025 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40026 = torch.constant.int 4
    %int1_40027 = torch.constant.int 1
    %int4_40028 = torch.constant.int 4
    %int128_40029 = torch.constant.int 128
    %42454 = torch.prim.ListConstruct %int4_40026, %42441, %int1_40027, %int4_40028, %int128_40029 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40030 = torch.constant.bool false
    %42455 = torch.aten.expand %42439, %42454, %false_40030 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40031 = torch.constant.int 4
    %int1_40032 = torch.constant.int 1
    %int4_40033 = torch.constant.int 4
    %int128_40034 = torch.constant.int 128
    %42456 = torch.prim.ListConstruct %int4_40031, %42441, %int1_40032, %int4_40033, %int128_40034 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_40035 = torch.constant.bool false
    %42457 = torch.aten.expand %42440, %42456, %false_40035 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %42457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_40036 = torch.constant.int 4
    %int4_40037 = torch.constant.int 4
    %int128_40038 = torch.constant.int 128
    %42458 = torch.prim.ListConstruct %int4_40036, %42441, %int4_40037, %int128_40038 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42459 = torch.aten.view %42443, %42458 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40039 = torch.constant.int 4
    %int4_40040 = torch.constant.int 4
    %int128_40041 = torch.constant.int 128
    %42460 = torch.prim.ListConstruct %int4_40039, %42441, %int4_40040, %int128_40041 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42461 = torch.aten.view %42445, %42460 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40042 = torch.constant.int 4
    %int4_40043 = torch.constant.int 4
    %int128_40044 = torch.constant.int 128
    %42462 = torch.prim.ListConstruct %int4_40042, %42441, %int4_40043, %int128_40044 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42463 = torch.aten.view %42447, %42462 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40045 = torch.constant.int 4
    %int4_40046 = torch.constant.int 4
    %int128_40047 = torch.constant.int 128
    %42464 = torch.prim.ListConstruct %int4_40045, %42441, %int4_40046, %int128_40047 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42465 = torch.aten.view %42449, %42464 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40048 = torch.constant.int 4
    %int4_40049 = torch.constant.int 4
    %int128_40050 = torch.constant.int 128
    %42466 = torch.prim.ListConstruct %int4_40048, %42441, %int4_40049, %int128_40050 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42467 = torch.aten.view %42451, %42466 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40051 = torch.constant.int 4
    %int4_40052 = torch.constant.int 4
    %int128_40053 = torch.constant.int 128
    %42468 = torch.prim.ListConstruct %int4_40051, %42441, %int4_40052, %int128_40053 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42469 = torch.aten.view %42453, %42468 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40054 = torch.constant.int 4
    %int4_40055 = torch.constant.int 4
    %int128_40056 = torch.constant.int 128
    %42470 = torch.prim.ListConstruct %int4_40054, %42441, %int4_40055, %int128_40056 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42471 = torch.aten.view %42455, %42470 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40057 = torch.constant.int 4
    %int4_40058 = torch.constant.int 4
    %int128_40059 = torch.constant.int 128
    %42472 = torch.prim.ListConstruct %int4_40057, %42441, %int4_40058, %int128_40059 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42473 = torch.aten.view %42457, %42472 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40060 = torch.constant.int 1
    %int2_40061 = torch.constant.int 2
    %42474 = torch.aten.transpose.int %41849, %int1_40060, %int2_40061 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42474, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40062 = torch.constant.int 1
    %int2_40063 = torch.constant.int 2
    %42475 = torch.aten.transpose.int %41864, %int1_40062, %int2_40063 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42475, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40064 = torch.constant.int 1
    %int2_40065 = torch.constant.int 2
    %42476 = torch.aten.transpose.int %41879, %int1_40064, %int2_40065 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42476, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40066 = torch.constant.int 1
    %int2_40067 = torch.constant.int 2
    %42477 = torch.aten.transpose.int %41894, %int1_40066, %int2_40067 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42477, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40068 = torch.constant.int 1
    %int2_40069 = torch.constant.int 2
    %42478 = torch.aten.transpose.int %41909, %int1_40068, %int2_40069 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42478, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40070 = torch.constant.int 1
    %int2_40071 = torch.constant.int 2
    %42479 = torch.aten.transpose.int %41924, %int1_40070, %int2_40071 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42479, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40072 = torch.constant.int 1
    %int2_40073 = torch.constant.int 2
    %42480 = torch.aten.transpose.int %41939, %int1_40072, %int2_40073 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42480, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40074 = torch.constant.int 1
    %int2_40075 = torch.constant.int 2
    %42481 = torch.aten.transpose.int %41954, %int1_40074, %int2_40075 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42481, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40076 = torch.constant.int 1
    %int2_40077 = torch.constant.int 2
    %42482 = torch.aten.transpose.int %42418, %int1_40076, %int2_40077 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42482, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40078 = torch.constant.int 1
    %int2_40079 = torch.constant.int 2
    %42483 = torch.aten.transpose.int %42420, %int1_40078, %int2_40079 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42483, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40080 = torch.constant.int 1
    %int2_40081 = torch.constant.int 2
    %42484 = torch.aten.transpose.int %42422, %int1_40080, %int2_40081 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42484, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40082 = torch.constant.int 1
    %int2_40083 = torch.constant.int 2
    %42485 = torch.aten.transpose.int %42424, %int1_40082, %int2_40083 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42485, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40084 = torch.constant.int 1
    %int2_40085 = torch.constant.int 2
    %42486 = torch.aten.transpose.int %42426, %int1_40084, %int2_40085 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42486, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40086 = torch.constant.int 1
    %int2_40087 = torch.constant.int 2
    %42487 = torch.aten.transpose.int %42428, %int1_40086, %int2_40087 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42487, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40088 = torch.constant.int 1
    %int2_40089 = torch.constant.int 2
    %42488 = torch.aten.transpose.int %42430, %int1_40088, %int2_40089 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42488, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40090 = torch.constant.int 1
    %int2_40091 = torch.constant.int 2
    %42489 = torch.aten.transpose.int %42432, %int1_40090, %int2_40091 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42489, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40092 = torch.constant.int 1
    %int2_40093 = torch.constant.int 2
    %42490 = torch.aten.transpose.int %42459, %int1_40092, %int2_40093 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42490, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40094 = torch.constant.int 1
    %int2_40095 = torch.constant.int 2
    %42491 = torch.aten.transpose.int %42461, %int1_40094, %int2_40095 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42491, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40096 = torch.constant.int 1
    %int2_40097 = torch.constant.int 2
    %42492 = torch.aten.transpose.int %42463, %int1_40096, %int2_40097 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42492, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40098 = torch.constant.int 1
    %int2_40099 = torch.constant.int 2
    %42493 = torch.aten.transpose.int %42465, %int1_40098, %int2_40099 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42493, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40100 = torch.constant.int 1
    %int2_40101 = torch.constant.int 2
    %42494 = torch.aten.transpose.int %42467, %int1_40100, %int2_40101 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42494, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40102 = torch.constant.int 1
    %int2_40103 = torch.constant.int 2
    %42495 = torch.aten.transpose.int %42469, %int1_40102, %int2_40103 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42495, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40104 = torch.constant.int 1
    %int2_40105 = torch.constant.int 2
    %42496 = torch.aten.transpose.int %42471, %int1_40104, %int2_40105 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42496, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40106 = torch.constant.int 1
    %int2_40107 = torch.constant.int 2
    %42497 = torch.aten.transpose.int %42473, %int1_40106, %int2_40107 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %42497, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40108 = torch.constant.float 0.000000e+00
    %true_40109 = torch.constant.bool true
    %none_40110 = torch.constant.none
    %none_40111 = torch.constant.none
    %42498:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42474, %42482, %42490, %float0.000000e00_40108, %true_40109, %none_40110, %none_40111) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42498#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40112 = torch.constant.float 0.000000e+00
    %true_40113 = torch.constant.bool true
    %none_40114 = torch.constant.none
    %none_40115 = torch.constant.none
    %42499:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42475, %42483, %42491, %float0.000000e00_40112, %true_40113, %none_40114, %none_40115) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42499#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40116 = torch.constant.float 0.000000e+00
    %true_40117 = torch.constant.bool true
    %none_40118 = torch.constant.none
    %none_40119 = torch.constant.none
    %42500:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42476, %42484, %42492, %float0.000000e00_40116, %true_40117, %none_40118, %none_40119) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42500#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40120 = torch.constant.float 0.000000e+00
    %true_40121 = torch.constant.bool true
    %none_40122 = torch.constant.none
    %none_40123 = torch.constant.none
    %42501:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42477, %42485, %42493, %float0.000000e00_40120, %true_40121, %none_40122, %none_40123) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42501#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40124 = torch.constant.float 0.000000e+00
    %true_40125 = torch.constant.bool true
    %none_40126 = torch.constant.none
    %none_40127 = torch.constant.none
    %42502:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42478, %42486, %42494, %float0.000000e00_40124, %true_40125, %none_40126, %none_40127) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42502#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40128 = torch.constant.float 0.000000e+00
    %true_40129 = torch.constant.bool true
    %none_40130 = torch.constant.none
    %none_40131 = torch.constant.none
    %42503:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42479, %42487, %42495, %float0.000000e00_40128, %true_40129, %none_40130, %none_40131) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42503#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40132 = torch.constant.float 0.000000e+00
    %true_40133 = torch.constant.bool true
    %none_40134 = torch.constant.none
    %none_40135 = torch.constant.none
    %42504:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42480, %42488, %42496, %float0.000000e00_40132, %true_40133, %none_40134, %none_40135) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42504#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_40136 = torch.constant.float 0.000000e+00
    %true_40137 = torch.constant.bool true
    %none_40138 = torch.constant.none
    %none_40139 = torch.constant.none
    %42505:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%42481, %42489, %42497, %float0.000000e00_40136, %true_40137, %none_40138, %none_40139) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %42505#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_40140 = torch.constant.int 1
    %int2_40141 = torch.constant.int 2
    %42506 = torch.aten.transpose.int %42498#0, %int1_40140, %int2_40141 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40142 = torch.constant.int 1
    %int2_40143 = torch.constant.int 2
    %42507 = torch.aten.transpose.int %42499#0, %int1_40142, %int2_40143 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40144 = torch.constant.int 1
    %int2_40145 = torch.constant.int 2
    %42508 = torch.aten.transpose.int %42500#0, %int1_40144, %int2_40145 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40146 = torch.constant.int 1
    %int2_40147 = torch.constant.int 2
    %42509 = torch.aten.transpose.int %42501#0, %int1_40146, %int2_40147 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40148 = torch.constant.int 1
    %int2_40149 = torch.constant.int 2
    %42510 = torch.aten.transpose.int %42502#0, %int1_40148, %int2_40149 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40150 = torch.constant.int 1
    %int2_40151 = torch.constant.int 2
    %42511 = torch.aten.transpose.int %42503#0, %int1_40150, %int2_40151 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40152 = torch.constant.int 1
    %int2_40153 = torch.constant.int 2
    %42512 = torch.aten.transpose.int %42504#0, %int1_40152, %int2_40153 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_40154 = torch.constant.int 1
    %int2_40155 = torch.constant.int 2
    %42513 = torch.aten.transpose.int %42505#0, %int1_40154, %int2_40155 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %42513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40156 = torch.constant.int 4
    %int512_40157 = torch.constant.int 512
    %42514 = torch.prim.ListConstruct %int4_40156, %41835, %int512_40157 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42515 = torch.aten.view %42506, %42514 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40158 = torch.constant.int 4
    %int512_40159 = torch.constant.int 512
    %42516 = torch.prim.ListConstruct %int4_40158, %41850, %int512_40159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42517 = torch.aten.view %42507, %42516 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40160 = torch.constant.int 4
    %int512_40161 = torch.constant.int 512
    %42518 = torch.prim.ListConstruct %int4_40160, %41865, %int512_40161 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42519 = torch.aten.view %42508, %42518 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40162 = torch.constant.int 4
    %int512_40163 = torch.constant.int 512
    %42520 = torch.prim.ListConstruct %int4_40162, %41880, %int512_40163 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42521 = torch.aten.view %42509, %42520 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40164 = torch.constant.int 4
    %int512_40165 = torch.constant.int 512
    %42522 = torch.prim.ListConstruct %int4_40164, %41895, %int512_40165 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42523 = torch.aten.view %42510, %42522 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40166 = torch.constant.int 4
    %int512_40167 = torch.constant.int 512
    %42524 = torch.prim.ListConstruct %int4_40166, %41910, %int512_40167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42525 = torch.aten.view %42511, %42524 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40168 = torch.constant.int 4
    %int512_40169 = torch.constant.int 512
    %42526 = torch.prim.ListConstruct %int4_40168, %41925, %int512_40169 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42527 = torch.aten.view %42512, %42526 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40170 = torch.constant.int 4
    %int512_40171 = torch.constant.int 512
    %42528 = torch.prim.ListConstruct %int4_40170, %41940, %int512_40171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42529 = torch.aten.view %42513, %42528 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %42529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_40172 = torch.constant.int 1
    %int0_40173 = torch.constant.int 0
    %42530 = torch.prim.ListConstruct %int1_40172, %int0_40173 : (!torch.int, !torch.int) -> !torch.list<int>
    %42531 = torch.aten.permute %1552, %42530 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40174 = torch.constant.int 1
    %int0_40175 = torch.constant.int 0
    %42532 = torch.prim.ListConstruct %int1_40174, %int0_40175 : (!torch.int, !torch.int) -> !torch.list<int>
    %42533 = torch.aten.permute %1553, %42532 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40176 = torch.constant.int 1
    %int0_40177 = torch.constant.int 0
    %42534 = torch.prim.ListConstruct %int1_40176, %int0_40177 : (!torch.int, !torch.int) -> !torch.list<int>
    %42535 = torch.aten.permute %1554, %42534 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40178 = torch.constant.int 1
    %int0_40179 = torch.constant.int 0
    %42536 = torch.prim.ListConstruct %int1_40178, %int0_40179 : (!torch.int, !torch.int) -> !torch.list<int>
    %42537 = torch.aten.permute %1555, %42536 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40180 = torch.constant.int 1
    %int0_40181 = torch.constant.int 0
    %42538 = torch.prim.ListConstruct %int1_40180, %int0_40181 : (!torch.int, !torch.int) -> !torch.list<int>
    %42539 = torch.aten.permute %1556, %42538 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40182 = torch.constant.int 1
    %int0_40183 = torch.constant.int 0
    %42540 = torch.prim.ListConstruct %int1_40182, %int0_40183 : (!torch.int, !torch.int) -> !torch.list<int>
    %42541 = torch.aten.permute %1557, %42540 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40184 = torch.constant.int 1
    %int0_40185 = torch.constant.int 0
    %42542 = torch.prim.ListConstruct %int1_40184, %int0_40185 : (!torch.int, !torch.int) -> !torch.list<int>
    %42543 = torch.aten.permute %1558, %42542 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_40186 = torch.constant.int 1
    %int0_40187 = torch.constant.int 0
    %42544 = torch.prim.ListConstruct %int1_40186, %int0_40187 : (!torch.int, !torch.int) -> !torch.list<int>
    %42545 = torch.aten.permute %1559, %42544 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_40188 = torch.constant.int 4
    %42546 = torch.aten.mul.int %int4_40188, %41835 : !torch.int, !torch.int -> !torch.int
    %int512_40189 = torch.constant.int 512
    %42547 = torch.prim.ListConstruct %42546, %int512_40189 : (!torch.int, !torch.int) -> !torch.list<int>
    %42548 = torch.aten.view %42515, %42547 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42548, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42549 = torch.aten.mm %42548, %42531 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42549, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40190 = torch.constant.int 4
    %int4096_40191 = torch.constant.int 4096
    %42550 = torch.prim.ListConstruct %int4_40190, %41835, %int4096_40191 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42551 = torch.aten.view %42549, %42550 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40192 = torch.constant.int 4
    %42552 = torch.aten.mul.int %int4_40192, %41850 : !torch.int, !torch.int -> !torch.int
    %int512_40193 = torch.constant.int 512
    %42553 = torch.prim.ListConstruct %42552, %int512_40193 : (!torch.int, !torch.int) -> !torch.list<int>
    %42554 = torch.aten.view %42517, %42553 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42554, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42555 = torch.aten.mm %42554, %42533 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42555, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40194 = torch.constant.int 4
    %int4096_40195 = torch.constant.int 4096
    %42556 = torch.prim.ListConstruct %int4_40194, %41850, %int4096_40195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42557 = torch.aten.view %42555, %42556 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40196 = torch.constant.int 4
    %42558 = torch.aten.mul.int %int4_40196, %41865 : !torch.int, !torch.int -> !torch.int
    %int512_40197 = torch.constant.int 512
    %42559 = torch.prim.ListConstruct %42558, %int512_40197 : (!torch.int, !torch.int) -> !torch.list<int>
    %42560 = torch.aten.view %42519, %42559 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42560, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42561 = torch.aten.mm %42560, %42535 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42561, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40198 = torch.constant.int 4
    %int4096_40199 = torch.constant.int 4096
    %42562 = torch.prim.ListConstruct %int4_40198, %41865, %int4096_40199 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42563 = torch.aten.view %42561, %42562 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40200 = torch.constant.int 4
    %42564 = torch.aten.mul.int %int4_40200, %41880 : !torch.int, !torch.int -> !torch.int
    %int512_40201 = torch.constant.int 512
    %42565 = torch.prim.ListConstruct %42564, %int512_40201 : (!torch.int, !torch.int) -> !torch.list<int>
    %42566 = torch.aten.view %42521, %42565 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42566, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42567 = torch.aten.mm %42566, %42537 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42567, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40202 = torch.constant.int 4
    %int4096_40203 = torch.constant.int 4096
    %42568 = torch.prim.ListConstruct %int4_40202, %41880, %int4096_40203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42569 = torch.aten.view %42567, %42568 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40204 = torch.constant.int 4
    %42570 = torch.aten.mul.int %int4_40204, %41895 : !torch.int, !torch.int -> !torch.int
    %int512_40205 = torch.constant.int 512
    %42571 = torch.prim.ListConstruct %42570, %int512_40205 : (!torch.int, !torch.int) -> !torch.list<int>
    %42572 = torch.aten.view %42523, %42571 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42572, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42573 = torch.aten.mm %42572, %42539 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42573, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40206 = torch.constant.int 4
    %int4096_40207 = torch.constant.int 4096
    %42574 = torch.prim.ListConstruct %int4_40206, %41895, %int4096_40207 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42575 = torch.aten.view %42573, %42574 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40208 = torch.constant.int 4
    %42576 = torch.aten.mul.int %int4_40208, %41910 : !torch.int, !torch.int -> !torch.int
    %int512_40209 = torch.constant.int 512
    %42577 = torch.prim.ListConstruct %42576, %int512_40209 : (!torch.int, !torch.int) -> !torch.list<int>
    %42578 = torch.aten.view %42525, %42577 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42578, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42579 = torch.aten.mm %42578, %42541 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42579, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40210 = torch.constant.int 4
    %int4096_40211 = torch.constant.int 4096
    %42580 = torch.prim.ListConstruct %int4_40210, %41910, %int4096_40211 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42581 = torch.aten.view %42579, %42580 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40212 = torch.constant.int 4
    %42582 = torch.aten.mul.int %int4_40212, %41925 : !torch.int, !torch.int -> !torch.int
    %int512_40213 = torch.constant.int 512
    %42583 = torch.prim.ListConstruct %42582, %int512_40213 : (!torch.int, !torch.int) -> !torch.list<int>
    %42584 = torch.aten.view %42527, %42583 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42584, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42585 = torch.aten.mm %42584, %42543 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42585, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40214 = torch.constant.int 4
    %int4096_40215 = torch.constant.int 4096
    %42586 = torch.prim.ListConstruct %int4_40214, %41925, %int4096_40215 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42587 = torch.aten.view %42585, %42586 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_40216 = torch.constant.int 4
    %42588 = torch.aten.mul.int %int4_40216, %41940 : !torch.int, !torch.int -> !torch.int
    %int512_40217 = torch.constant.int 512
    %42589 = torch.prim.ListConstruct %42588, %int512_40217 : (!torch.int, !torch.int) -> !torch.list<int>
    %42590 = torch.aten.view %42529, %42589 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %42590, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %42591 = torch.aten.mm %42590, %42545 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42591, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40218 = torch.constant.int 4
    %int4096_40219 = torch.constant.int 4096
    %42592 = torch.prim.ListConstruct %int4_40218, %41940, %int4096_40219 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42593 = torch.aten.view %42591, %42592 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42594 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40220 = arith.constant 1 : index
    %dim_40221 = tensor.dim %42594, %c1_40220 : tensor<4x?x4096xf16>
    %42595 = flow.tensor.transfer %42594 : tensor<4x?x4096xf16>{%dim_40221} to #hal.device.promise<@__device_0>
    %42596 = torch_c.from_builtin_tensor %42595 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42597 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40222 = arith.constant 1 : index
    %dim_40223 = tensor.dim %42597, %c1_40222 : tensor<4x?x4096xf16>
    %42598 = flow.tensor.transfer %42597 : tensor<4x?x4096xf16>{%dim_40223} to #hal.device.promise<@__device_0>
    %42599 = torch_c.from_builtin_tensor %42598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42600 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40224 = arith.constant 1 : index
    %dim_40225 = tensor.dim %42600, %c1_40224 : tensor<4x?x4096xf16>
    %42601 = flow.tensor.transfer %42600 : tensor<4x?x4096xf16>{%dim_40225} to #hal.device.promise<@__device_0>
    %42602 = torch_c.from_builtin_tensor %42601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42603 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40226 = arith.constant 1 : index
    %dim_40227 = tensor.dim %42603, %c1_40226 : tensor<4x?x4096xf16>
    %42604 = flow.tensor.transfer %42603 : tensor<4x?x4096xf16>{%dim_40227} to #hal.device.promise<@__device_0>
    %42605 = torch_c.from_builtin_tensor %42604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42606 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40228 = arith.constant 1 : index
    %dim_40229 = tensor.dim %42606, %c1_40228 : tensor<4x?x4096xf16>
    %42607 = flow.tensor.transfer %42606 : tensor<4x?x4096xf16>{%dim_40229} to #hal.device.promise<@__device_0>
    %42608 = torch_c.from_builtin_tensor %42607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42609 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40230 = arith.constant 1 : index
    %dim_40231 = tensor.dim %42609, %c1_40230 : tensor<4x?x4096xf16>
    %42610 = flow.tensor.transfer %42609 : tensor<4x?x4096xf16>{%dim_40231} to #hal.device.promise<@__device_0>
    %42611 = torch_c.from_builtin_tensor %42610 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42612 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40232 = arith.constant 1 : index
    %dim_40233 = tensor.dim %42612, %c1_40232 : tensor<4x?x4096xf16>
    %42613 = flow.tensor.transfer %42612 : tensor<4x?x4096xf16>{%dim_40233} to #hal.device.promise<@__device_0>
    %42614 = torch_c.from_builtin_tensor %42613 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40234 = torch.constant.int 1
    %42615 = torch.aten.add.Tensor %42551, %42596, %int1_40234 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40235 = torch.constant.int 1
    %42616 = torch.aten.add.Tensor %42615, %42599, %int1_40235 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40236 = torch.constant.int 1
    %42617 = torch.aten.add.Tensor %42616, %42602, %int1_40236 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40237 = torch.constant.int 1
    %42618 = torch.aten.add.Tensor %42617, %42605, %int1_40237 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40238 = torch.constant.int 1
    %42619 = torch.aten.add.Tensor %42618, %42608, %int1_40238 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40239 = torch.constant.int 1
    %42620 = torch.aten.add.Tensor %42619, %42611, %int1_40239 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40240 = torch.constant.int 1
    %42621 = torch.aten.add.Tensor %42620, %42614, %int1_40240 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42622 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40241 = arith.constant 1 : index
    %dim_40242 = tensor.dim %42622, %c1_40241 : tensor<4x?x4096xf16>
    %42623 = flow.tensor.transfer %42622 : tensor<4x?x4096xf16>{%dim_40242} to #hal.device.promise<@__device_1>
    %42624 = torch_c.from_builtin_tensor %42623 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42625 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40243 = arith.constant 1 : index
    %dim_40244 = tensor.dim %42625, %c1_40243 : tensor<4x?x4096xf16>
    %42626 = flow.tensor.transfer %42625 : tensor<4x?x4096xf16>{%dim_40244} to #hal.device.promise<@__device_1>
    %42627 = torch_c.from_builtin_tensor %42626 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42628 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40245 = arith.constant 1 : index
    %dim_40246 = tensor.dim %42628, %c1_40245 : tensor<4x?x4096xf16>
    %42629 = flow.tensor.transfer %42628 : tensor<4x?x4096xf16>{%dim_40246} to #hal.device.promise<@__device_1>
    %42630 = torch_c.from_builtin_tensor %42629 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42631 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40247 = arith.constant 1 : index
    %dim_40248 = tensor.dim %42631, %c1_40247 : tensor<4x?x4096xf16>
    %42632 = flow.tensor.transfer %42631 : tensor<4x?x4096xf16>{%dim_40248} to #hal.device.promise<@__device_1>
    %42633 = torch_c.from_builtin_tensor %42632 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42634 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40249 = arith.constant 1 : index
    %dim_40250 = tensor.dim %42634, %c1_40249 : tensor<4x?x4096xf16>
    %42635 = flow.tensor.transfer %42634 : tensor<4x?x4096xf16>{%dim_40250} to #hal.device.promise<@__device_1>
    %42636 = torch_c.from_builtin_tensor %42635 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42637 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40251 = arith.constant 1 : index
    %dim_40252 = tensor.dim %42637, %c1_40251 : tensor<4x?x4096xf16>
    %42638 = flow.tensor.transfer %42637 : tensor<4x?x4096xf16>{%dim_40252} to #hal.device.promise<@__device_1>
    %42639 = torch_c.from_builtin_tensor %42638 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42640 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40253 = arith.constant 1 : index
    %dim_40254 = tensor.dim %42640, %c1_40253 : tensor<4x?x4096xf16>
    %42641 = flow.tensor.transfer %42640 : tensor<4x?x4096xf16>{%dim_40254} to #hal.device.promise<@__device_1>
    %42642 = torch_c.from_builtin_tensor %42641 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40255 = torch.constant.int 1
    %42643 = torch.aten.add.Tensor %42624, %42557, %int1_40255 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40256 = torch.constant.int 1
    %42644 = torch.aten.add.Tensor %42643, %42627, %int1_40256 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40257 = torch.constant.int 1
    %42645 = torch.aten.add.Tensor %42644, %42630, %int1_40257 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40258 = torch.constant.int 1
    %42646 = torch.aten.add.Tensor %42645, %42633, %int1_40258 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40259 = torch.constant.int 1
    %42647 = torch.aten.add.Tensor %42646, %42636, %int1_40259 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40260 = torch.constant.int 1
    %42648 = torch.aten.add.Tensor %42647, %42639, %int1_40260 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40261 = torch.constant.int 1
    %42649 = torch.aten.add.Tensor %42648, %42642, %int1_40261 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42650 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40262 = arith.constant 1 : index
    %dim_40263 = tensor.dim %42650, %c1_40262 : tensor<4x?x4096xf16>
    %42651 = flow.tensor.transfer %42650 : tensor<4x?x4096xf16>{%dim_40263} to #hal.device.promise<@__device_2>
    %42652 = torch_c.from_builtin_tensor %42651 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42653 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40264 = arith.constant 1 : index
    %dim_40265 = tensor.dim %42653, %c1_40264 : tensor<4x?x4096xf16>
    %42654 = flow.tensor.transfer %42653 : tensor<4x?x4096xf16>{%dim_40265} to #hal.device.promise<@__device_2>
    %42655 = torch_c.from_builtin_tensor %42654 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42656 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40266 = arith.constant 1 : index
    %dim_40267 = tensor.dim %42656, %c1_40266 : tensor<4x?x4096xf16>
    %42657 = flow.tensor.transfer %42656 : tensor<4x?x4096xf16>{%dim_40267} to #hal.device.promise<@__device_2>
    %42658 = torch_c.from_builtin_tensor %42657 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42659 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40268 = arith.constant 1 : index
    %dim_40269 = tensor.dim %42659, %c1_40268 : tensor<4x?x4096xf16>
    %42660 = flow.tensor.transfer %42659 : tensor<4x?x4096xf16>{%dim_40269} to #hal.device.promise<@__device_2>
    %42661 = torch_c.from_builtin_tensor %42660 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42662 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40270 = arith.constant 1 : index
    %dim_40271 = tensor.dim %42662, %c1_40270 : tensor<4x?x4096xf16>
    %42663 = flow.tensor.transfer %42662 : tensor<4x?x4096xf16>{%dim_40271} to #hal.device.promise<@__device_2>
    %42664 = torch_c.from_builtin_tensor %42663 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42665 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40272 = arith.constant 1 : index
    %dim_40273 = tensor.dim %42665, %c1_40272 : tensor<4x?x4096xf16>
    %42666 = flow.tensor.transfer %42665 : tensor<4x?x4096xf16>{%dim_40273} to #hal.device.promise<@__device_2>
    %42667 = torch_c.from_builtin_tensor %42666 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42668 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40274 = arith.constant 1 : index
    %dim_40275 = tensor.dim %42668, %c1_40274 : tensor<4x?x4096xf16>
    %42669 = flow.tensor.transfer %42668 : tensor<4x?x4096xf16>{%dim_40275} to #hal.device.promise<@__device_2>
    %42670 = torch_c.from_builtin_tensor %42669 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40276 = torch.constant.int 1
    %42671 = torch.aten.add.Tensor %42652, %42655, %int1_40276 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40277 = torch.constant.int 1
    %42672 = torch.aten.add.Tensor %42671, %42563, %int1_40277 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40278 = torch.constant.int 1
    %42673 = torch.aten.add.Tensor %42672, %42658, %int1_40278 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40279 = torch.constant.int 1
    %42674 = torch.aten.add.Tensor %42673, %42661, %int1_40279 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40280 = torch.constant.int 1
    %42675 = torch.aten.add.Tensor %42674, %42664, %int1_40280 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40281 = torch.constant.int 1
    %42676 = torch.aten.add.Tensor %42675, %42667, %int1_40281 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40282 = torch.constant.int 1
    %42677 = torch.aten.add.Tensor %42676, %42670, %int1_40282 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42678 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40283 = arith.constant 1 : index
    %dim_40284 = tensor.dim %42678, %c1_40283 : tensor<4x?x4096xf16>
    %42679 = flow.tensor.transfer %42678 : tensor<4x?x4096xf16>{%dim_40284} to #hal.device.promise<@__device_3>
    %42680 = torch_c.from_builtin_tensor %42679 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42681 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40285 = arith.constant 1 : index
    %dim_40286 = tensor.dim %42681, %c1_40285 : tensor<4x?x4096xf16>
    %42682 = flow.tensor.transfer %42681 : tensor<4x?x4096xf16>{%dim_40286} to #hal.device.promise<@__device_3>
    %42683 = torch_c.from_builtin_tensor %42682 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42684 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40287 = arith.constant 1 : index
    %dim_40288 = tensor.dim %42684, %c1_40287 : tensor<4x?x4096xf16>
    %42685 = flow.tensor.transfer %42684 : tensor<4x?x4096xf16>{%dim_40288} to #hal.device.promise<@__device_3>
    %42686 = torch_c.from_builtin_tensor %42685 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42687 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40289 = arith.constant 1 : index
    %dim_40290 = tensor.dim %42687, %c1_40289 : tensor<4x?x4096xf16>
    %42688 = flow.tensor.transfer %42687 : tensor<4x?x4096xf16>{%dim_40290} to #hal.device.promise<@__device_3>
    %42689 = torch_c.from_builtin_tensor %42688 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42690 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40291 = arith.constant 1 : index
    %dim_40292 = tensor.dim %42690, %c1_40291 : tensor<4x?x4096xf16>
    %42691 = flow.tensor.transfer %42690 : tensor<4x?x4096xf16>{%dim_40292} to #hal.device.promise<@__device_3>
    %42692 = torch_c.from_builtin_tensor %42691 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42693 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40293 = arith.constant 1 : index
    %dim_40294 = tensor.dim %42693, %c1_40293 : tensor<4x?x4096xf16>
    %42694 = flow.tensor.transfer %42693 : tensor<4x?x4096xf16>{%dim_40294} to #hal.device.promise<@__device_3>
    %42695 = torch_c.from_builtin_tensor %42694 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42696 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40295 = arith.constant 1 : index
    %dim_40296 = tensor.dim %42696, %c1_40295 : tensor<4x?x4096xf16>
    %42697 = flow.tensor.transfer %42696 : tensor<4x?x4096xf16>{%dim_40296} to #hal.device.promise<@__device_3>
    %42698 = torch_c.from_builtin_tensor %42697 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40297 = torch.constant.int 1
    %42699 = torch.aten.add.Tensor %42680, %42683, %int1_40297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40298 = torch.constant.int 1
    %42700 = torch.aten.add.Tensor %42699, %42686, %int1_40298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40299 = torch.constant.int 1
    %42701 = torch.aten.add.Tensor %42700, %42569, %int1_40299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40300 = torch.constant.int 1
    %42702 = torch.aten.add.Tensor %42701, %42689, %int1_40300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40301 = torch.constant.int 1
    %42703 = torch.aten.add.Tensor %42702, %42692, %int1_40301 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40302 = torch.constant.int 1
    %42704 = torch.aten.add.Tensor %42703, %42695, %int1_40302 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40303 = torch.constant.int 1
    %42705 = torch.aten.add.Tensor %42704, %42698, %int1_40303 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42706 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40304 = arith.constant 1 : index
    %dim_40305 = tensor.dim %42706, %c1_40304 : tensor<4x?x4096xf16>
    %42707 = flow.tensor.transfer %42706 : tensor<4x?x4096xf16>{%dim_40305} to #hal.device.promise<@__device_4>
    %42708 = torch_c.from_builtin_tensor %42707 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42709 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40306 = arith.constant 1 : index
    %dim_40307 = tensor.dim %42709, %c1_40306 : tensor<4x?x4096xf16>
    %42710 = flow.tensor.transfer %42709 : tensor<4x?x4096xf16>{%dim_40307} to #hal.device.promise<@__device_4>
    %42711 = torch_c.from_builtin_tensor %42710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42712 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40308 = arith.constant 1 : index
    %dim_40309 = tensor.dim %42712, %c1_40308 : tensor<4x?x4096xf16>
    %42713 = flow.tensor.transfer %42712 : tensor<4x?x4096xf16>{%dim_40309} to #hal.device.promise<@__device_4>
    %42714 = torch_c.from_builtin_tensor %42713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42715 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40310 = arith.constant 1 : index
    %dim_40311 = tensor.dim %42715, %c1_40310 : tensor<4x?x4096xf16>
    %42716 = flow.tensor.transfer %42715 : tensor<4x?x4096xf16>{%dim_40311} to #hal.device.promise<@__device_4>
    %42717 = torch_c.from_builtin_tensor %42716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42718 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40312 = arith.constant 1 : index
    %dim_40313 = tensor.dim %42718, %c1_40312 : tensor<4x?x4096xf16>
    %42719 = flow.tensor.transfer %42718 : tensor<4x?x4096xf16>{%dim_40313} to #hal.device.promise<@__device_4>
    %42720 = torch_c.from_builtin_tensor %42719 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42721 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40314 = arith.constant 1 : index
    %dim_40315 = tensor.dim %42721, %c1_40314 : tensor<4x?x4096xf16>
    %42722 = flow.tensor.transfer %42721 : tensor<4x?x4096xf16>{%dim_40315} to #hal.device.promise<@__device_4>
    %42723 = torch_c.from_builtin_tensor %42722 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42724 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40316 = arith.constant 1 : index
    %dim_40317 = tensor.dim %42724, %c1_40316 : tensor<4x?x4096xf16>
    %42725 = flow.tensor.transfer %42724 : tensor<4x?x4096xf16>{%dim_40317} to #hal.device.promise<@__device_4>
    %42726 = torch_c.from_builtin_tensor %42725 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40318 = torch.constant.int 1
    %42727 = torch.aten.add.Tensor %42708, %42711, %int1_40318 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40319 = torch.constant.int 1
    %42728 = torch.aten.add.Tensor %42727, %42714, %int1_40319 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40320 = torch.constant.int 1
    %42729 = torch.aten.add.Tensor %42728, %42717, %int1_40320 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40321 = torch.constant.int 1
    %42730 = torch.aten.add.Tensor %42729, %42575, %int1_40321 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40322 = torch.constant.int 1
    %42731 = torch.aten.add.Tensor %42730, %42720, %int1_40322 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40323 = torch.constant.int 1
    %42732 = torch.aten.add.Tensor %42731, %42723, %int1_40323 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40324 = torch.constant.int 1
    %42733 = torch.aten.add.Tensor %42732, %42726, %int1_40324 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42734 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40325 = arith.constant 1 : index
    %dim_40326 = tensor.dim %42734, %c1_40325 : tensor<4x?x4096xf16>
    %42735 = flow.tensor.transfer %42734 : tensor<4x?x4096xf16>{%dim_40326} to #hal.device.promise<@__device_5>
    %42736 = torch_c.from_builtin_tensor %42735 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42737 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40327 = arith.constant 1 : index
    %dim_40328 = tensor.dim %42737, %c1_40327 : tensor<4x?x4096xf16>
    %42738 = flow.tensor.transfer %42737 : tensor<4x?x4096xf16>{%dim_40328} to #hal.device.promise<@__device_5>
    %42739 = torch_c.from_builtin_tensor %42738 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42740 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40329 = arith.constant 1 : index
    %dim_40330 = tensor.dim %42740, %c1_40329 : tensor<4x?x4096xf16>
    %42741 = flow.tensor.transfer %42740 : tensor<4x?x4096xf16>{%dim_40330} to #hal.device.promise<@__device_5>
    %42742 = torch_c.from_builtin_tensor %42741 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42743 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40331 = arith.constant 1 : index
    %dim_40332 = tensor.dim %42743, %c1_40331 : tensor<4x?x4096xf16>
    %42744 = flow.tensor.transfer %42743 : tensor<4x?x4096xf16>{%dim_40332} to #hal.device.promise<@__device_5>
    %42745 = torch_c.from_builtin_tensor %42744 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42746 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40333 = arith.constant 1 : index
    %dim_40334 = tensor.dim %42746, %c1_40333 : tensor<4x?x4096xf16>
    %42747 = flow.tensor.transfer %42746 : tensor<4x?x4096xf16>{%dim_40334} to #hal.device.promise<@__device_5>
    %42748 = torch_c.from_builtin_tensor %42747 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42749 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40335 = arith.constant 1 : index
    %dim_40336 = tensor.dim %42749, %c1_40335 : tensor<4x?x4096xf16>
    %42750 = flow.tensor.transfer %42749 : tensor<4x?x4096xf16>{%dim_40336} to #hal.device.promise<@__device_5>
    %42751 = torch_c.from_builtin_tensor %42750 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42752 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40337 = arith.constant 1 : index
    %dim_40338 = tensor.dim %42752, %c1_40337 : tensor<4x?x4096xf16>
    %42753 = flow.tensor.transfer %42752 : tensor<4x?x4096xf16>{%dim_40338} to #hal.device.promise<@__device_5>
    %42754 = torch_c.from_builtin_tensor %42753 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40339 = torch.constant.int 1
    %42755 = torch.aten.add.Tensor %42736, %42739, %int1_40339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40340 = torch.constant.int 1
    %42756 = torch.aten.add.Tensor %42755, %42742, %int1_40340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40341 = torch.constant.int 1
    %42757 = torch.aten.add.Tensor %42756, %42745, %int1_40341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40342 = torch.constant.int 1
    %42758 = torch.aten.add.Tensor %42757, %42748, %int1_40342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40343 = torch.constant.int 1
    %42759 = torch.aten.add.Tensor %42758, %42581, %int1_40343 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40344 = torch.constant.int 1
    %42760 = torch.aten.add.Tensor %42759, %42751, %int1_40344 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40345 = torch.constant.int 1
    %42761 = torch.aten.add.Tensor %42760, %42754, %int1_40345 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42762 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40346 = arith.constant 1 : index
    %dim_40347 = tensor.dim %42762, %c1_40346 : tensor<4x?x4096xf16>
    %42763 = flow.tensor.transfer %42762 : tensor<4x?x4096xf16>{%dim_40347} to #hal.device.promise<@__device_6>
    %42764 = torch_c.from_builtin_tensor %42763 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42765 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40348 = arith.constant 1 : index
    %dim_40349 = tensor.dim %42765, %c1_40348 : tensor<4x?x4096xf16>
    %42766 = flow.tensor.transfer %42765 : tensor<4x?x4096xf16>{%dim_40349} to #hal.device.promise<@__device_6>
    %42767 = torch_c.from_builtin_tensor %42766 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42768 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40350 = arith.constant 1 : index
    %dim_40351 = tensor.dim %42768, %c1_40350 : tensor<4x?x4096xf16>
    %42769 = flow.tensor.transfer %42768 : tensor<4x?x4096xf16>{%dim_40351} to #hal.device.promise<@__device_6>
    %42770 = torch_c.from_builtin_tensor %42769 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42771 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40352 = arith.constant 1 : index
    %dim_40353 = tensor.dim %42771, %c1_40352 : tensor<4x?x4096xf16>
    %42772 = flow.tensor.transfer %42771 : tensor<4x?x4096xf16>{%dim_40353} to #hal.device.promise<@__device_6>
    %42773 = torch_c.from_builtin_tensor %42772 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42774 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40354 = arith.constant 1 : index
    %dim_40355 = tensor.dim %42774, %c1_40354 : tensor<4x?x4096xf16>
    %42775 = flow.tensor.transfer %42774 : tensor<4x?x4096xf16>{%dim_40355} to #hal.device.promise<@__device_6>
    %42776 = torch_c.from_builtin_tensor %42775 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42777 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40356 = arith.constant 1 : index
    %dim_40357 = tensor.dim %42777, %c1_40356 : tensor<4x?x4096xf16>
    %42778 = flow.tensor.transfer %42777 : tensor<4x?x4096xf16>{%dim_40357} to #hal.device.promise<@__device_6>
    %42779 = torch_c.from_builtin_tensor %42778 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42780 = torch_c.to_builtin_tensor %42593 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40358 = arith.constant 1 : index
    %dim_40359 = tensor.dim %42780, %c1_40358 : tensor<4x?x4096xf16>
    %42781 = flow.tensor.transfer %42780 : tensor<4x?x4096xf16>{%dim_40359} to #hal.device.promise<@__device_6>
    %42782 = torch_c.from_builtin_tensor %42781 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40360 = torch.constant.int 1
    %42783 = torch.aten.add.Tensor %42764, %42767, %int1_40360 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40361 = torch.constant.int 1
    %42784 = torch.aten.add.Tensor %42783, %42770, %int1_40361 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40362 = torch.constant.int 1
    %42785 = torch.aten.add.Tensor %42784, %42773, %int1_40362 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40363 = torch.constant.int 1
    %42786 = torch.aten.add.Tensor %42785, %42776, %int1_40363 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40364 = torch.constant.int 1
    %42787 = torch.aten.add.Tensor %42786, %42779, %int1_40364 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40365 = torch.constant.int 1
    %42788 = torch.aten.add.Tensor %42787, %42587, %int1_40365 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40366 = torch.constant.int 1
    %42789 = torch.aten.add.Tensor %42788, %42782, %int1_40366 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42790 = torch_c.to_builtin_tensor %42551 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40367 = arith.constant 1 : index
    %dim_40368 = tensor.dim %42790, %c1_40367 : tensor<4x?x4096xf16>
    %42791 = flow.tensor.transfer %42790 : tensor<4x?x4096xf16>{%dim_40368} to #hal.device.promise<@__device_7>
    %42792 = torch_c.from_builtin_tensor %42791 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42793 = torch_c.to_builtin_tensor %42557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40369 = arith.constant 1 : index
    %dim_40370 = tensor.dim %42793, %c1_40369 : tensor<4x?x4096xf16>
    %42794 = flow.tensor.transfer %42793 : tensor<4x?x4096xf16>{%dim_40370} to #hal.device.promise<@__device_7>
    %42795 = torch_c.from_builtin_tensor %42794 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42796 = torch_c.to_builtin_tensor %42563 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40371 = arith.constant 1 : index
    %dim_40372 = tensor.dim %42796, %c1_40371 : tensor<4x?x4096xf16>
    %42797 = flow.tensor.transfer %42796 : tensor<4x?x4096xf16>{%dim_40372} to #hal.device.promise<@__device_7>
    %42798 = torch_c.from_builtin_tensor %42797 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42799 = torch_c.to_builtin_tensor %42569 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40373 = arith.constant 1 : index
    %dim_40374 = tensor.dim %42799, %c1_40373 : tensor<4x?x4096xf16>
    %42800 = flow.tensor.transfer %42799 : tensor<4x?x4096xf16>{%dim_40374} to #hal.device.promise<@__device_7>
    %42801 = torch_c.from_builtin_tensor %42800 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42802 = torch_c.to_builtin_tensor %42575 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40375 = arith.constant 1 : index
    %dim_40376 = tensor.dim %42802, %c1_40375 : tensor<4x?x4096xf16>
    %42803 = flow.tensor.transfer %42802 : tensor<4x?x4096xf16>{%dim_40376} to #hal.device.promise<@__device_7>
    %42804 = torch_c.from_builtin_tensor %42803 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42805 = torch_c.to_builtin_tensor %42581 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40377 = arith.constant 1 : index
    %dim_40378 = tensor.dim %42805, %c1_40377 : tensor<4x?x4096xf16>
    %42806 = flow.tensor.transfer %42805 : tensor<4x?x4096xf16>{%dim_40378} to #hal.device.promise<@__device_7>
    %42807 = torch_c.from_builtin_tensor %42806 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %42808 = torch_c.to_builtin_tensor %42587 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40379 = arith.constant 1 : index
    %dim_40380 = tensor.dim %42808, %c1_40379 : tensor<4x?x4096xf16>
    %42809 = flow.tensor.transfer %42808 : tensor<4x?x4096xf16>{%dim_40380} to #hal.device.promise<@__device_7>
    %42810 = torch_c.from_builtin_tensor %42809 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40381 = torch.constant.int 1
    %42811 = torch.aten.add.Tensor %42792, %42795, %int1_40381 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40382 = torch.constant.int 1
    %42812 = torch.aten.add.Tensor %42811, %42798, %int1_40382 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40383 = torch.constant.int 1
    %42813 = torch.aten.add.Tensor %42812, %42801, %int1_40383 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40384 = torch.constant.int 1
    %42814 = torch.aten.add.Tensor %42813, %42804, %int1_40384 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40385 = torch.constant.int 1
    %42815 = torch.aten.add.Tensor %42814, %42807, %int1_40385 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40386 = torch.constant.int 1
    %42816 = torch.aten.add.Tensor %42815, %42810, %int1_40386 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40387 = torch.constant.int 1
    %42817 = torch.aten.add.Tensor %42816, %42593, %int1_40387 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40388 = torch.constant.int 1
    %42818 = torch.aten.add.Tensor %41477, %42621, %int1_40388 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40389 = torch.constant.int 1
    %42819 = torch.aten.add.Tensor %41478, %42649, %int1_40389 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40390 = torch.constant.int 1
    %42820 = torch.aten.add.Tensor %41479, %42677, %int1_40390 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40391 = torch.constant.int 1
    %42821 = torch.aten.add.Tensor %41480, %42705, %int1_40391 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40392 = torch.constant.int 1
    %42822 = torch.aten.add.Tensor %41481, %42733, %int1_40392 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40393 = torch.constant.int 1
    %42823 = torch.aten.add.Tensor %41482, %42761, %int1_40393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40394 = torch.constant.int 1
    %42824 = torch.aten.add.Tensor %41483, %42789, %int1_40394 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40395 = torch.constant.int 1
    %42825 = torch.aten.add.Tensor %41484, %42817, %int1_40395 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_40396 = torch.constant.int 6
    %42826 = torch.prims.convert_element_type %42818, %int6_40396 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40397 = torch.constant.int 6
    %42827 = torch.prims.convert_element_type %42819, %int6_40397 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40398 = torch.constant.int 6
    %42828 = torch.prims.convert_element_type %42820, %int6_40398 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40399 = torch.constant.int 6
    %42829 = torch.prims.convert_element_type %42821, %int6_40399 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40400 = torch.constant.int 6
    %42830 = torch.prims.convert_element_type %42822, %int6_40400 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40401 = torch.constant.int 6
    %42831 = torch.prims.convert_element_type %42823, %int6_40401 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40402 = torch.constant.int 6
    %42832 = torch.prims.convert_element_type %42824, %int6_40402 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40403 = torch.constant.int 6
    %42833 = torch.prims.convert_element_type %42825, %int6_40403 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40404 = torch.constant.int 2
    %42834 = torch.aten.pow.Tensor_Scalar %42826, %int2_40404 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40405 = torch.constant.int 2
    %42835 = torch.aten.pow.Tensor_Scalar %42827, %int2_40405 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40406 = torch.constant.int 2
    %42836 = torch.aten.pow.Tensor_Scalar %42828, %int2_40406 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40407 = torch.constant.int 2
    %42837 = torch.aten.pow.Tensor_Scalar %42829, %int2_40407 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40408 = torch.constant.int 2
    %42838 = torch.aten.pow.Tensor_Scalar %42830, %int2_40408 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40409 = torch.constant.int 2
    %42839 = torch.aten.pow.Tensor_Scalar %42831, %int2_40409 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40410 = torch.constant.int 2
    %42840 = torch.aten.pow.Tensor_Scalar %42832, %int2_40410 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40411 = torch.constant.int 2
    %42841 = torch.aten.pow.Tensor_Scalar %42833, %int2_40411 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_40412 = torch.constant.int -1
    %42842 = torch.prim.ListConstruct %int-1_40412 : (!torch.int) -> !torch.list<int>
    %true_40413 = torch.constant.bool true
    %none_40414 = torch.constant.none
    %42843 = torch.aten.mean.dim %42834, %42842, %true_40413, %none_40414 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40415 = torch.constant.int -1
    %42844 = torch.prim.ListConstruct %int-1_40415 : (!torch.int) -> !torch.list<int>
    %true_40416 = torch.constant.bool true
    %none_40417 = torch.constant.none
    %42845 = torch.aten.mean.dim %42835, %42844, %true_40416, %none_40417 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40418 = torch.constant.int -1
    %42846 = torch.prim.ListConstruct %int-1_40418 : (!torch.int) -> !torch.list<int>
    %true_40419 = torch.constant.bool true
    %none_40420 = torch.constant.none
    %42847 = torch.aten.mean.dim %42836, %42846, %true_40419, %none_40420 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40421 = torch.constant.int -1
    %42848 = torch.prim.ListConstruct %int-1_40421 : (!torch.int) -> !torch.list<int>
    %true_40422 = torch.constant.bool true
    %none_40423 = torch.constant.none
    %42849 = torch.aten.mean.dim %42837, %42848, %true_40422, %none_40423 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40424 = torch.constant.int -1
    %42850 = torch.prim.ListConstruct %int-1_40424 : (!torch.int) -> !torch.list<int>
    %true_40425 = torch.constant.bool true
    %none_40426 = torch.constant.none
    %42851 = torch.aten.mean.dim %42838, %42850, %true_40425, %none_40426 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40427 = torch.constant.int -1
    %42852 = torch.prim.ListConstruct %int-1_40427 : (!torch.int) -> !torch.list<int>
    %true_40428 = torch.constant.bool true
    %none_40429 = torch.constant.none
    %42853 = torch.aten.mean.dim %42839, %42852, %true_40428, %none_40429 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40430 = torch.constant.int -1
    %42854 = torch.prim.ListConstruct %int-1_40430 : (!torch.int) -> !torch.list<int>
    %true_40431 = torch.constant.bool true
    %none_40432 = torch.constant.none
    %42855 = torch.aten.mean.dim %42840, %42854, %true_40431, %none_40432 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40433 = torch.constant.int -1
    %42856 = torch.prim.ListConstruct %int-1_40433 : (!torch.int) -> !torch.list<int>
    %true_40434 = torch.constant.bool true
    %none_40435 = torch.constant.none
    %42857 = torch.aten.mean.dim %42841, %42856, %true_40434, %none_40435 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40436 = torch.constant.float 9.9999997473787516E-6
    %int1_40437 = torch.constant.int 1
    %42858 = torch.aten.add.Scalar %42843, %float9.999990e-06_40436, %int1_40437 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40438 = torch.constant.float 9.9999997473787516E-6
    %int1_40439 = torch.constant.int 1
    %42859 = torch.aten.add.Scalar %42845, %float9.999990e-06_40438, %int1_40439 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40440 = torch.constant.float 9.9999997473787516E-6
    %int1_40441 = torch.constant.int 1
    %42860 = torch.aten.add.Scalar %42847, %float9.999990e-06_40440, %int1_40441 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40442 = torch.constant.float 9.9999997473787516E-6
    %int1_40443 = torch.constant.int 1
    %42861 = torch.aten.add.Scalar %42849, %float9.999990e-06_40442, %int1_40443 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40444 = torch.constant.float 9.9999997473787516E-6
    %int1_40445 = torch.constant.int 1
    %42862 = torch.aten.add.Scalar %42851, %float9.999990e-06_40444, %int1_40445 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40446 = torch.constant.float 9.9999997473787516E-6
    %int1_40447 = torch.constant.int 1
    %42863 = torch.aten.add.Scalar %42853, %float9.999990e-06_40446, %int1_40447 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40448 = torch.constant.float 9.9999997473787516E-6
    %int1_40449 = torch.constant.int 1
    %42864 = torch.aten.add.Scalar %42855, %float9.999990e-06_40448, %int1_40449 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40450 = torch.constant.float 9.9999997473787516E-6
    %int1_40451 = torch.constant.int 1
    %42865 = torch.aten.add.Scalar %42857, %float9.999990e-06_40450, %int1_40451 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42866 = torch.aten.rsqrt %42858 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42867 = torch.aten.rsqrt %42859 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42868 = torch.aten.rsqrt %42860 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42869 = torch.aten.rsqrt %42861 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42870 = torch.aten.rsqrt %42862 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42871 = torch.aten.rsqrt %42863 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42872 = torch.aten.rsqrt %42864 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42873 = torch.aten.rsqrt %42865 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %42873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %42874 = torch.aten.mul.Tensor %42826, %42866 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42875 = torch.aten.mul.Tensor %42827, %42867 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42876 = torch.aten.mul.Tensor %42828, %42868 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42877 = torch.aten.mul.Tensor %42829, %42869 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42878 = torch.aten.mul.Tensor %42830, %42870 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42879 = torch.aten.mul.Tensor %42831, %42871 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42880 = torch.aten.mul.Tensor %42832, %42872 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42881 = torch.aten.mul.Tensor %42833, %42873 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42882 = torch.aten.mul.Tensor %1560, %42874 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42883 = torch.aten.mul.Tensor %1561, %42875 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42884 = torch.aten.mul.Tensor %1562, %42876 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42885 = torch.aten.mul.Tensor %1563, %42877 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42886 = torch.aten.mul.Tensor %1564, %42878 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42887 = torch.aten.mul.Tensor %1565, %42879 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42888 = torch.aten.mul.Tensor %1566, %42880 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %42889 = torch.aten.mul.Tensor %1567, %42881 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %42889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_40452 = torch.constant.int 5
    %42890 = torch.prims.convert_element_type %42882, %int5_40452 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40453 = torch.constant.int 5
    %42891 = torch.prims.convert_element_type %42883, %int5_40453 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40454 = torch.constant.int 5
    %42892 = torch.prims.convert_element_type %42884, %int5_40454 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40455 = torch.constant.int 5
    %42893 = torch.prims.convert_element_type %42885, %int5_40455 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40456 = torch.constant.int 5
    %42894 = torch.prims.convert_element_type %42886, %int5_40456 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40457 = torch.constant.int 5
    %42895 = torch.prims.convert_element_type %42887, %int5_40457 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40458 = torch.constant.int 5
    %42896 = torch.prims.convert_element_type %42888, %int5_40458 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40459 = torch.constant.int 5
    %42897 = torch.prims.convert_element_type %42889, %int5_40459 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %42897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40460 = torch.constant.int 1
    %int0_40461 = torch.constant.int 0
    %42898 = torch.prim.ListConstruct %int1_40460, %int0_40461 : (!torch.int, !torch.int) -> !torch.list<int>
    %42899 = torch.aten.permute %1568, %42898 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40462 = torch.constant.int 1
    %int0_40463 = torch.constant.int 0
    %42900 = torch.prim.ListConstruct %int1_40462, %int0_40463 : (!torch.int, !torch.int) -> !torch.list<int>
    %42901 = torch.aten.permute %1569, %42900 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40464 = torch.constant.int 1
    %int0_40465 = torch.constant.int 0
    %42902 = torch.prim.ListConstruct %int1_40464, %int0_40465 : (!torch.int, !torch.int) -> !torch.list<int>
    %42903 = torch.aten.permute %1570, %42902 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40466 = torch.constant.int 1
    %int0_40467 = torch.constant.int 0
    %42904 = torch.prim.ListConstruct %int1_40466, %int0_40467 : (!torch.int, !torch.int) -> !torch.list<int>
    %42905 = torch.aten.permute %1571, %42904 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40468 = torch.constant.int 1
    %int0_40469 = torch.constant.int 0
    %42906 = torch.prim.ListConstruct %int1_40468, %int0_40469 : (!torch.int, !torch.int) -> !torch.list<int>
    %42907 = torch.aten.permute %1572, %42906 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40470 = torch.constant.int 1
    %int0_40471 = torch.constant.int 0
    %42908 = torch.prim.ListConstruct %int1_40470, %int0_40471 : (!torch.int, !torch.int) -> !torch.list<int>
    %42909 = torch.aten.permute %1573, %42908 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40472 = torch.constant.int 1
    %int0_40473 = torch.constant.int 0
    %42910 = torch.prim.ListConstruct %int1_40472, %int0_40473 : (!torch.int, !torch.int) -> !torch.list<int>
    %42911 = torch.aten.permute %1574, %42910 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40474 = torch.constant.int 1
    %int0_40475 = torch.constant.int 0
    %42912 = torch.prim.ListConstruct %int1_40474, %int0_40475 : (!torch.int, !torch.int) -> !torch.list<int>
    %42913 = torch.aten.permute %1575, %42912 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_40476 = torch.constant.int 4
    %42914 = torch.aten.mul.int %int4_40476, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40477 = torch.constant.int 4096
    %42915 = torch.prim.ListConstruct %42914, %int4096_40477 : (!torch.int, !torch.int) -> !torch.list<int>
    %42916 = torch.aten.view %42890, %42915 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42916, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42917 = torch.aten.mm %42916, %42899 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42917, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40478 = torch.constant.int 4
    %int1792_40479 = torch.constant.int 1792
    %42918 = torch.prim.ListConstruct %int4_40478, %2482, %int1792_40479 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42919 = torch.aten.view %42917, %42918 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40480 = torch.constant.int 4
    %42920 = torch.aten.mul.int %int4_40480, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40481 = torch.constant.int 4096
    %42921 = torch.prim.ListConstruct %42920, %int4096_40481 : (!torch.int, !torch.int) -> !torch.list<int>
    %42922 = torch.aten.view %42891, %42921 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42922, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42923 = torch.aten.mm %42922, %42901 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42923, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40482 = torch.constant.int 4
    %int1792_40483 = torch.constant.int 1792
    %42924 = torch.prim.ListConstruct %int4_40482, %2482, %int1792_40483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42925 = torch.aten.view %42923, %42924 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40484 = torch.constant.int 4
    %42926 = torch.aten.mul.int %int4_40484, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40485 = torch.constant.int 4096
    %42927 = torch.prim.ListConstruct %42926, %int4096_40485 : (!torch.int, !torch.int) -> !torch.list<int>
    %42928 = torch.aten.view %42892, %42927 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42928, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42929 = torch.aten.mm %42928, %42903 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42929, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40486 = torch.constant.int 4
    %int1792_40487 = torch.constant.int 1792
    %42930 = torch.prim.ListConstruct %int4_40486, %2482, %int1792_40487 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42931 = torch.aten.view %42929, %42930 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40488 = torch.constant.int 4
    %42932 = torch.aten.mul.int %int4_40488, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40489 = torch.constant.int 4096
    %42933 = torch.prim.ListConstruct %42932, %int4096_40489 : (!torch.int, !torch.int) -> !torch.list<int>
    %42934 = torch.aten.view %42893, %42933 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42934, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42935 = torch.aten.mm %42934, %42905 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42935, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40490 = torch.constant.int 4
    %int1792_40491 = torch.constant.int 1792
    %42936 = torch.prim.ListConstruct %int4_40490, %2482, %int1792_40491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42937 = torch.aten.view %42935, %42936 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40492 = torch.constant.int 4
    %42938 = torch.aten.mul.int %int4_40492, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40493 = torch.constant.int 4096
    %42939 = torch.prim.ListConstruct %42938, %int4096_40493 : (!torch.int, !torch.int) -> !torch.list<int>
    %42940 = torch.aten.view %42894, %42939 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42940, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42941 = torch.aten.mm %42940, %42907 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42941, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40494 = torch.constant.int 4
    %int1792_40495 = torch.constant.int 1792
    %42942 = torch.prim.ListConstruct %int4_40494, %2482, %int1792_40495 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42943 = torch.aten.view %42941, %42942 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40496 = torch.constant.int 4
    %42944 = torch.aten.mul.int %int4_40496, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40497 = torch.constant.int 4096
    %42945 = torch.prim.ListConstruct %42944, %int4096_40497 : (!torch.int, !torch.int) -> !torch.list<int>
    %42946 = torch.aten.view %42895, %42945 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42946, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42947 = torch.aten.mm %42946, %42909 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42947, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40498 = torch.constant.int 4
    %int1792_40499 = torch.constant.int 1792
    %42948 = torch.prim.ListConstruct %int4_40498, %2482, %int1792_40499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42949 = torch.aten.view %42947, %42948 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40500 = torch.constant.int 4
    %42950 = torch.aten.mul.int %int4_40500, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40501 = torch.constant.int 4096
    %42951 = torch.prim.ListConstruct %42950, %int4096_40501 : (!torch.int, !torch.int) -> !torch.list<int>
    %42952 = torch.aten.view %42896, %42951 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42952, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42953 = torch.aten.mm %42952, %42911 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42953, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40502 = torch.constant.int 4
    %int1792_40503 = torch.constant.int 1792
    %42954 = torch.prim.ListConstruct %int4_40502, %2482, %int1792_40503 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42955 = torch.aten.view %42953, %42954 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40504 = torch.constant.int 4
    %42956 = torch.aten.mul.int %int4_40504, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40505 = torch.constant.int 4096
    %42957 = torch.prim.ListConstruct %42956, %int4096_40505 : (!torch.int, !torch.int) -> !torch.list<int>
    %42958 = torch.aten.view %42897, %42957 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42958, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42959 = torch.aten.mm %42958, %42913 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42959, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40506 = torch.constant.int 4
    %int1792_40507 = torch.constant.int 1792
    %42960 = torch.prim.ListConstruct %int4_40506, %2482, %int1792_40507 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42961 = torch.aten.view %42959, %42960 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42962 = torch.aten.silu %42919 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42963 = torch.aten.silu %42925 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42964 = torch.aten.silu %42931 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42965 = torch.aten.silu %42937 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42966 = torch.aten.silu %42943 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42967 = torch.aten.silu %42949 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42968 = torch.aten.silu %42955 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %42969 = torch.aten.silu %42961 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_40508 = torch.constant.int 1
    %int0_40509 = torch.constant.int 0
    %42970 = torch.prim.ListConstruct %int1_40508, %int0_40509 : (!torch.int, !torch.int) -> !torch.list<int>
    %42971 = torch.aten.permute %1576, %42970 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40510 = torch.constant.int 1
    %int0_40511 = torch.constant.int 0
    %42972 = torch.prim.ListConstruct %int1_40510, %int0_40511 : (!torch.int, !torch.int) -> !torch.list<int>
    %42973 = torch.aten.permute %1577, %42972 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40512 = torch.constant.int 1
    %int0_40513 = torch.constant.int 0
    %42974 = torch.prim.ListConstruct %int1_40512, %int0_40513 : (!torch.int, !torch.int) -> !torch.list<int>
    %42975 = torch.aten.permute %1578, %42974 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40514 = torch.constant.int 1
    %int0_40515 = torch.constant.int 0
    %42976 = torch.prim.ListConstruct %int1_40514, %int0_40515 : (!torch.int, !torch.int) -> !torch.list<int>
    %42977 = torch.aten.permute %1579, %42976 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40516 = torch.constant.int 1
    %int0_40517 = torch.constant.int 0
    %42978 = torch.prim.ListConstruct %int1_40516, %int0_40517 : (!torch.int, !torch.int) -> !torch.list<int>
    %42979 = torch.aten.permute %1580, %42978 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40518 = torch.constant.int 1
    %int0_40519 = torch.constant.int 0
    %42980 = torch.prim.ListConstruct %int1_40518, %int0_40519 : (!torch.int, !torch.int) -> !torch.list<int>
    %42981 = torch.aten.permute %1581, %42980 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40520 = torch.constant.int 1
    %int0_40521 = torch.constant.int 0
    %42982 = torch.prim.ListConstruct %int1_40520, %int0_40521 : (!torch.int, !torch.int) -> !torch.list<int>
    %42983 = torch.aten.permute %1582, %42982 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_40522 = torch.constant.int 1
    %int0_40523 = torch.constant.int 0
    %42984 = torch.prim.ListConstruct %int1_40522, %int0_40523 : (!torch.int, !torch.int) -> !torch.list<int>
    %42985 = torch.aten.permute %1583, %42984 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_40524 = torch.constant.int 4
    %42986 = torch.aten.mul.int %int4_40524, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40525 = torch.constant.int 4096
    %42987 = torch.prim.ListConstruct %42986, %int4096_40525 : (!torch.int, !torch.int) -> !torch.list<int>
    %42988 = torch.aten.view %42890, %42987 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42988, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42989 = torch.aten.mm %42988, %42971 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42989, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40526 = torch.constant.int 4
    %int1792_40527 = torch.constant.int 1792
    %42990 = torch.prim.ListConstruct %int4_40526, %2482, %int1792_40527 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42991 = torch.aten.view %42989, %42990 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40528 = torch.constant.int 4
    %42992 = torch.aten.mul.int %int4_40528, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40529 = torch.constant.int 4096
    %42993 = torch.prim.ListConstruct %42992, %int4096_40529 : (!torch.int, !torch.int) -> !torch.list<int>
    %42994 = torch.aten.view %42891, %42993 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %42994, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %42995 = torch.aten.mm %42994, %42973 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %42995, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40530 = torch.constant.int 4
    %int1792_40531 = torch.constant.int 1792
    %42996 = torch.prim.ListConstruct %int4_40530, %2482, %int1792_40531 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %42997 = torch.aten.view %42995, %42996 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %42997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40532 = torch.constant.int 4
    %42998 = torch.aten.mul.int %int4_40532, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40533 = torch.constant.int 4096
    %42999 = torch.prim.ListConstruct %42998, %int4096_40533 : (!torch.int, !torch.int) -> !torch.list<int>
    %43000 = torch.aten.view %42892, %42999 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43000, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43001 = torch.aten.mm %43000, %42975 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43001, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40534 = torch.constant.int 4
    %int1792_40535 = torch.constant.int 1792
    %43002 = torch.prim.ListConstruct %int4_40534, %2482, %int1792_40535 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43003 = torch.aten.view %43001, %43002 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40536 = torch.constant.int 4
    %43004 = torch.aten.mul.int %int4_40536, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40537 = torch.constant.int 4096
    %43005 = torch.prim.ListConstruct %43004, %int4096_40537 : (!torch.int, !torch.int) -> !torch.list<int>
    %43006 = torch.aten.view %42893, %43005 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43006, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43007 = torch.aten.mm %43006, %42977 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43007, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40538 = torch.constant.int 4
    %int1792_40539 = torch.constant.int 1792
    %43008 = torch.prim.ListConstruct %int4_40538, %2482, %int1792_40539 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43009 = torch.aten.view %43007, %43008 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40540 = torch.constant.int 4
    %43010 = torch.aten.mul.int %int4_40540, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40541 = torch.constant.int 4096
    %43011 = torch.prim.ListConstruct %43010, %int4096_40541 : (!torch.int, !torch.int) -> !torch.list<int>
    %43012 = torch.aten.view %42894, %43011 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43012, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43013 = torch.aten.mm %43012, %42979 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43013, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40542 = torch.constant.int 4
    %int1792_40543 = torch.constant.int 1792
    %43014 = torch.prim.ListConstruct %int4_40542, %2482, %int1792_40543 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43015 = torch.aten.view %43013, %43014 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40544 = torch.constant.int 4
    %43016 = torch.aten.mul.int %int4_40544, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40545 = torch.constant.int 4096
    %43017 = torch.prim.ListConstruct %43016, %int4096_40545 : (!torch.int, !torch.int) -> !torch.list<int>
    %43018 = torch.aten.view %42895, %43017 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43018, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43019 = torch.aten.mm %43018, %42981 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43019, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40546 = torch.constant.int 4
    %int1792_40547 = torch.constant.int 1792
    %43020 = torch.prim.ListConstruct %int4_40546, %2482, %int1792_40547 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43021 = torch.aten.view %43019, %43020 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40548 = torch.constant.int 4
    %43022 = torch.aten.mul.int %int4_40548, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40549 = torch.constant.int 4096
    %43023 = torch.prim.ListConstruct %43022, %int4096_40549 : (!torch.int, !torch.int) -> !torch.list<int>
    %43024 = torch.aten.view %42896, %43023 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43024, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43025 = torch.aten.mm %43024, %42983 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43025, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40550 = torch.constant.int 4
    %int1792_40551 = torch.constant.int 1792
    %43026 = torch.prim.ListConstruct %int4_40550, %2482, %int1792_40551 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43027 = torch.aten.view %43025, %43026 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_40552 = torch.constant.int 4
    %43028 = torch.aten.mul.int %int4_40552, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40553 = torch.constant.int 4096
    %43029 = torch.prim.ListConstruct %43028, %int4096_40553 : (!torch.int, !torch.int) -> !torch.list<int>
    %43030 = torch.aten.view %42897, %43029 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43030, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43031 = torch.aten.mm %43030, %42985 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43031, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_40554 = torch.constant.int 4
    %int1792_40555 = torch.constant.int 1792
    %43032 = torch.prim.ListConstruct %int4_40554, %2482, %int1792_40555 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43033 = torch.aten.view %43031, %43032 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43034 = torch.aten.mul.Tensor %42962, %42991 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43035 = torch.aten.mul.Tensor %42963, %42997 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43036 = torch.aten.mul.Tensor %42964, %43003 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43037 = torch.aten.mul.Tensor %42965, %43009 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43038 = torch.aten.mul.Tensor %42966, %43015 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43039 = torch.aten.mul.Tensor %42967, %43021 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43040 = torch.aten.mul.Tensor %42968, %43027 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %43041 = torch.aten.mul.Tensor %42969, %43033 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %43041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_40556 = torch.constant.int 1
    %int0_40557 = torch.constant.int 0
    %43042 = torch.prim.ListConstruct %int1_40556, %int0_40557 : (!torch.int, !torch.int) -> !torch.list<int>
    %43043 = torch.aten.permute %1584, %43042 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40558 = torch.constant.int 1
    %int0_40559 = torch.constant.int 0
    %43044 = torch.prim.ListConstruct %int1_40558, %int0_40559 : (!torch.int, !torch.int) -> !torch.list<int>
    %43045 = torch.aten.permute %1585, %43044 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40560 = torch.constant.int 1
    %int0_40561 = torch.constant.int 0
    %43046 = torch.prim.ListConstruct %int1_40560, %int0_40561 : (!torch.int, !torch.int) -> !torch.list<int>
    %43047 = torch.aten.permute %1586, %43046 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40562 = torch.constant.int 1
    %int0_40563 = torch.constant.int 0
    %43048 = torch.prim.ListConstruct %int1_40562, %int0_40563 : (!torch.int, !torch.int) -> !torch.list<int>
    %43049 = torch.aten.permute %1587, %43048 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40564 = torch.constant.int 1
    %int0_40565 = torch.constant.int 0
    %43050 = torch.prim.ListConstruct %int1_40564, %int0_40565 : (!torch.int, !torch.int) -> !torch.list<int>
    %43051 = torch.aten.permute %1588, %43050 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40566 = torch.constant.int 1
    %int0_40567 = torch.constant.int 0
    %43052 = torch.prim.ListConstruct %int1_40566, %int0_40567 : (!torch.int, !torch.int) -> !torch.list<int>
    %43053 = torch.aten.permute %1589, %43052 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40568 = torch.constant.int 1
    %int0_40569 = torch.constant.int 0
    %43054 = torch.prim.ListConstruct %int1_40568, %int0_40569 : (!torch.int, !torch.int) -> !torch.list<int>
    %43055 = torch.aten.permute %1590, %43054 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40570 = torch.constant.int 1
    %int0_40571 = torch.constant.int 0
    %43056 = torch.prim.ListConstruct %int1_40570, %int0_40571 : (!torch.int, !torch.int) -> !torch.list<int>
    %43057 = torch.aten.permute %1591, %43056 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_40572 = torch.constant.int 1
    %43058 = torch.aten.size.int %42919, %int1_40572 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40573 = torch.constant.int 4
    %43059 = torch.aten.mul.int %int4_40573, %43058 : !torch.int, !torch.int -> !torch.int
    %int1792_40574 = torch.constant.int 1792
    %43060 = torch.prim.ListConstruct %43059, %int1792_40574 : (!torch.int, !torch.int) -> !torch.list<int>
    %43061 = torch.aten.view %43034, %43060 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43061, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43062 = torch.aten.mm %43061, %43043 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43062, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40575 = torch.constant.int 4
    %int4096_40576 = torch.constant.int 4096
    %43063 = torch.prim.ListConstruct %int4_40575, %43058, %int4096_40576 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43064 = torch.aten.view %43062, %43063 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40577 = torch.constant.int 1
    %43065 = torch.aten.size.int %42925, %int1_40577 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40578 = torch.constant.int 4
    %43066 = torch.aten.mul.int %int4_40578, %43065 : !torch.int, !torch.int -> !torch.int
    %int1792_40579 = torch.constant.int 1792
    %43067 = torch.prim.ListConstruct %43066, %int1792_40579 : (!torch.int, !torch.int) -> !torch.list<int>
    %43068 = torch.aten.view %43035, %43067 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43068, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43069 = torch.aten.mm %43068, %43045 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43069, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40580 = torch.constant.int 4
    %int4096_40581 = torch.constant.int 4096
    %43070 = torch.prim.ListConstruct %int4_40580, %43065, %int4096_40581 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43071 = torch.aten.view %43069, %43070 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40582 = torch.constant.int 1
    %43072 = torch.aten.size.int %42931, %int1_40582 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40583 = torch.constant.int 4
    %43073 = torch.aten.mul.int %int4_40583, %43072 : !torch.int, !torch.int -> !torch.int
    %int1792_40584 = torch.constant.int 1792
    %43074 = torch.prim.ListConstruct %43073, %int1792_40584 : (!torch.int, !torch.int) -> !torch.list<int>
    %43075 = torch.aten.view %43036, %43074 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43075, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43076 = torch.aten.mm %43075, %43047 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43076, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40585 = torch.constant.int 4
    %int4096_40586 = torch.constant.int 4096
    %43077 = torch.prim.ListConstruct %int4_40585, %43072, %int4096_40586 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43078 = torch.aten.view %43076, %43077 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40587 = torch.constant.int 1
    %43079 = torch.aten.size.int %42937, %int1_40587 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40588 = torch.constant.int 4
    %43080 = torch.aten.mul.int %int4_40588, %43079 : !torch.int, !torch.int -> !torch.int
    %int1792_40589 = torch.constant.int 1792
    %43081 = torch.prim.ListConstruct %43080, %int1792_40589 : (!torch.int, !torch.int) -> !torch.list<int>
    %43082 = torch.aten.view %43037, %43081 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43082, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43083 = torch.aten.mm %43082, %43049 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43083, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40590 = torch.constant.int 4
    %int4096_40591 = torch.constant.int 4096
    %43084 = torch.prim.ListConstruct %int4_40590, %43079, %int4096_40591 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43085 = torch.aten.view %43083, %43084 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40592 = torch.constant.int 1
    %43086 = torch.aten.size.int %42943, %int1_40592 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40593 = torch.constant.int 4
    %43087 = torch.aten.mul.int %int4_40593, %43086 : !torch.int, !torch.int -> !torch.int
    %int1792_40594 = torch.constant.int 1792
    %43088 = torch.prim.ListConstruct %43087, %int1792_40594 : (!torch.int, !torch.int) -> !torch.list<int>
    %43089 = torch.aten.view %43038, %43088 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43089, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43090 = torch.aten.mm %43089, %43051 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43090, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40595 = torch.constant.int 4
    %int4096_40596 = torch.constant.int 4096
    %43091 = torch.prim.ListConstruct %int4_40595, %43086, %int4096_40596 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43092 = torch.aten.view %43090, %43091 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40597 = torch.constant.int 1
    %43093 = torch.aten.size.int %42949, %int1_40597 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40598 = torch.constant.int 4
    %43094 = torch.aten.mul.int %int4_40598, %43093 : !torch.int, !torch.int -> !torch.int
    %int1792_40599 = torch.constant.int 1792
    %43095 = torch.prim.ListConstruct %43094, %int1792_40599 : (!torch.int, !torch.int) -> !torch.list<int>
    %43096 = torch.aten.view %43039, %43095 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43096, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43097 = torch.aten.mm %43096, %43053 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43097, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40600 = torch.constant.int 4
    %int4096_40601 = torch.constant.int 4096
    %43098 = torch.prim.ListConstruct %int4_40600, %43093, %int4096_40601 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43099 = torch.aten.view %43097, %43098 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40602 = torch.constant.int 1
    %43100 = torch.aten.size.int %42955, %int1_40602 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40603 = torch.constant.int 4
    %43101 = torch.aten.mul.int %int4_40603, %43100 : !torch.int, !torch.int -> !torch.int
    %int1792_40604 = torch.constant.int 1792
    %43102 = torch.prim.ListConstruct %43101, %int1792_40604 : (!torch.int, !torch.int) -> !torch.list<int>
    %43103 = torch.aten.view %43040, %43102 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43103, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43104 = torch.aten.mm %43103, %43055 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43104, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40605 = torch.constant.int 4
    %int4096_40606 = torch.constant.int 4096
    %43105 = torch.prim.ListConstruct %int4_40605, %43100, %int4096_40606 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43106 = torch.aten.view %43104, %43105 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40607 = torch.constant.int 1
    %43107 = torch.aten.size.int %42961, %int1_40607 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_40608 = torch.constant.int 4
    %43108 = torch.aten.mul.int %int4_40608, %43107 : !torch.int, !torch.int -> !torch.int
    %int1792_40609 = torch.constant.int 1792
    %43109 = torch.prim.ListConstruct %43108, %int1792_40609 : (!torch.int, !torch.int) -> !torch.list<int>
    %43110 = torch.aten.view %43041, %43109 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %43110, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %43111 = torch.aten.mm %43110, %43057 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43111, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_40610 = torch.constant.int 4
    %int4096_40611 = torch.constant.int 4096
    %43112 = torch.prim.ListConstruct %int4_40610, %43107, %int4096_40611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43113 = torch.aten.view %43111, %43112 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43114 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40612 = arith.constant 1 : index
    %dim_40613 = tensor.dim %43114, %c1_40612 : tensor<4x?x4096xf16>
    %43115 = flow.tensor.transfer %43114 : tensor<4x?x4096xf16>{%dim_40613} to #hal.device.promise<@__device_0>
    %43116 = torch_c.from_builtin_tensor %43115 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43117 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40614 = arith.constant 1 : index
    %dim_40615 = tensor.dim %43117, %c1_40614 : tensor<4x?x4096xf16>
    %43118 = flow.tensor.transfer %43117 : tensor<4x?x4096xf16>{%dim_40615} to #hal.device.promise<@__device_0>
    %43119 = torch_c.from_builtin_tensor %43118 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43120 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40616 = arith.constant 1 : index
    %dim_40617 = tensor.dim %43120, %c1_40616 : tensor<4x?x4096xf16>
    %43121 = flow.tensor.transfer %43120 : tensor<4x?x4096xf16>{%dim_40617} to #hal.device.promise<@__device_0>
    %43122 = torch_c.from_builtin_tensor %43121 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43123 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40618 = arith.constant 1 : index
    %dim_40619 = tensor.dim %43123, %c1_40618 : tensor<4x?x4096xf16>
    %43124 = flow.tensor.transfer %43123 : tensor<4x?x4096xf16>{%dim_40619} to #hal.device.promise<@__device_0>
    %43125 = torch_c.from_builtin_tensor %43124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43126 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40620 = arith.constant 1 : index
    %dim_40621 = tensor.dim %43126, %c1_40620 : tensor<4x?x4096xf16>
    %43127 = flow.tensor.transfer %43126 : tensor<4x?x4096xf16>{%dim_40621} to #hal.device.promise<@__device_0>
    %43128 = torch_c.from_builtin_tensor %43127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43129 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40622 = arith.constant 1 : index
    %dim_40623 = tensor.dim %43129, %c1_40622 : tensor<4x?x4096xf16>
    %43130 = flow.tensor.transfer %43129 : tensor<4x?x4096xf16>{%dim_40623} to #hal.device.promise<@__device_0>
    %43131 = torch_c.from_builtin_tensor %43130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43132 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40624 = arith.constant 1 : index
    %dim_40625 = tensor.dim %43132, %c1_40624 : tensor<4x?x4096xf16>
    %43133 = flow.tensor.transfer %43132 : tensor<4x?x4096xf16>{%dim_40625} to #hal.device.promise<@__device_0>
    %43134 = torch_c.from_builtin_tensor %43133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40626 = torch.constant.int 1
    %43135 = torch.aten.add.Tensor %43064, %43116, %int1_40626 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40627 = torch.constant.int 1
    %43136 = torch.aten.add.Tensor %43135, %43119, %int1_40627 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40628 = torch.constant.int 1
    %43137 = torch.aten.add.Tensor %43136, %43122, %int1_40628 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40629 = torch.constant.int 1
    %43138 = torch.aten.add.Tensor %43137, %43125, %int1_40629 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40630 = torch.constant.int 1
    %43139 = torch.aten.add.Tensor %43138, %43128, %int1_40630 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40631 = torch.constant.int 1
    %43140 = torch.aten.add.Tensor %43139, %43131, %int1_40631 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40632 = torch.constant.int 1
    %43141 = torch.aten.add.Tensor %43140, %43134, %int1_40632 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43142 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40633 = arith.constant 1 : index
    %dim_40634 = tensor.dim %43142, %c1_40633 : tensor<4x?x4096xf16>
    %43143 = flow.tensor.transfer %43142 : tensor<4x?x4096xf16>{%dim_40634} to #hal.device.promise<@__device_1>
    %43144 = torch_c.from_builtin_tensor %43143 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43145 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40635 = arith.constant 1 : index
    %dim_40636 = tensor.dim %43145, %c1_40635 : tensor<4x?x4096xf16>
    %43146 = flow.tensor.transfer %43145 : tensor<4x?x4096xf16>{%dim_40636} to #hal.device.promise<@__device_1>
    %43147 = torch_c.from_builtin_tensor %43146 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43148 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40637 = arith.constant 1 : index
    %dim_40638 = tensor.dim %43148, %c1_40637 : tensor<4x?x4096xf16>
    %43149 = flow.tensor.transfer %43148 : tensor<4x?x4096xf16>{%dim_40638} to #hal.device.promise<@__device_1>
    %43150 = torch_c.from_builtin_tensor %43149 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43151 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40639 = arith.constant 1 : index
    %dim_40640 = tensor.dim %43151, %c1_40639 : tensor<4x?x4096xf16>
    %43152 = flow.tensor.transfer %43151 : tensor<4x?x4096xf16>{%dim_40640} to #hal.device.promise<@__device_1>
    %43153 = torch_c.from_builtin_tensor %43152 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43154 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40641 = arith.constant 1 : index
    %dim_40642 = tensor.dim %43154, %c1_40641 : tensor<4x?x4096xf16>
    %43155 = flow.tensor.transfer %43154 : tensor<4x?x4096xf16>{%dim_40642} to #hal.device.promise<@__device_1>
    %43156 = torch_c.from_builtin_tensor %43155 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43157 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40643 = arith.constant 1 : index
    %dim_40644 = tensor.dim %43157, %c1_40643 : tensor<4x?x4096xf16>
    %43158 = flow.tensor.transfer %43157 : tensor<4x?x4096xf16>{%dim_40644} to #hal.device.promise<@__device_1>
    %43159 = torch_c.from_builtin_tensor %43158 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43160 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40645 = arith.constant 1 : index
    %dim_40646 = tensor.dim %43160, %c1_40645 : tensor<4x?x4096xf16>
    %43161 = flow.tensor.transfer %43160 : tensor<4x?x4096xf16>{%dim_40646} to #hal.device.promise<@__device_1>
    %43162 = torch_c.from_builtin_tensor %43161 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40647 = torch.constant.int 1
    %43163 = torch.aten.add.Tensor %43144, %43071, %int1_40647 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40648 = torch.constant.int 1
    %43164 = torch.aten.add.Tensor %43163, %43147, %int1_40648 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40649 = torch.constant.int 1
    %43165 = torch.aten.add.Tensor %43164, %43150, %int1_40649 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40650 = torch.constant.int 1
    %43166 = torch.aten.add.Tensor %43165, %43153, %int1_40650 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40651 = torch.constant.int 1
    %43167 = torch.aten.add.Tensor %43166, %43156, %int1_40651 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40652 = torch.constant.int 1
    %43168 = torch.aten.add.Tensor %43167, %43159, %int1_40652 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40653 = torch.constant.int 1
    %43169 = torch.aten.add.Tensor %43168, %43162, %int1_40653 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43170 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40654 = arith.constant 1 : index
    %dim_40655 = tensor.dim %43170, %c1_40654 : tensor<4x?x4096xf16>
    %43171 = flow.tensor.transfer %43170 : tensor<4x?x4096xf16>{%dim_40655} to #hal.device.promise<@__device_2>
    %43172 = torch_c.from_builtin_tensor %43171 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43173 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40656 = arith.constant 1 : index
    %dim_40657 = tensor.dim %43173, %c1_40656 : tensor<4x?x4096xf16>
    %43174 = flow.tensor.transfer %43173 : tensor<4x?x4096xf16>{%dim_40657} to #hal.device.promise<@__device_2>
    %43175 = torch_c.from_builtin_tensor %43174 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43176 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40658 = arith.constant 1 : index
    %dim_40659 = tensor.dim %43176, %c1_40658 : tensor<4x?x4096xf16>
    %43177 = flow.tensor.transfer %43176 : tensor<4x?x4096xf16>{%dim_40659} to #hal.device.promise<@__device_2>
    %43178 = torch_c.from_builtin_tensor %43177 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43179 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40660 = arith.constant 1 : index
    %dim_40661 = tensor.dim %43179, %c1_40660 : tensor<4x?x4096xf16>
    %43180 = flow.tensor.transfer %43179 : tensor<4x?x4096xf16>{%dim_40661} to #hal.device.promise<@__device_2>
    %43181 = torch_c.from_builtin_tensor %43180 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43182 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40662 = arith.constant 1 : index
    %dim_40663 = tensor.dim %43182, %c1_40662 : tensor<4x?x4096xf16>
    %43183 = flow.tensor.transfer %43182 : tensor<4x?x4096xf16>{%dim_40663} to #hal.device.promise<@__device_2>
    %43184 = torch_c.from_builtin_tensor %43183 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43185 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40664 = arith.constant 1 : index
    %dim_40665 = tensor.dim %43185, %c1_40664 : tensor<4x?x4096xf16>
    %43186 = flow.tensor.transfer %43185 : tensor<4x?x4096xf16>{%dim_40665} to #hal.device.promise<@__device_2>
    %43187 = torch_c.from_builtin_tensor %43186 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43188 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40666 = arith.constant 1 : index
    %dim_40667 = tensor.dim %43188, %c1_40666 : tensor<4x?x4096xf16>
    %43189 = flow.tensor.transfer %43188 : tensor<4x?x4096xf16>{%dim_40667} to #hal.device.promise<@__device_2>
    %43190 = torch_c.from_builtin_tensor %43189 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40668 = torch.constant.int 1
    %43191 = torch.aten.add.Tensor %43172, %43175, %int1_40668 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40669 = torch.constant.int 1
    %43192 = torch.aten.add.Tensor %43191, %43078, %int1_40669 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40670 = torch.constant.int 1
    %43193 = torch.aten.add.Tensor %43192, %43178, %int1_40670 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40671 = torch.constant.int 1
    %43194 = torch.aten.add.Tensor %43193, %43181, %int1_40671 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40672 = torch.constant.int 1
    %43195 = torch.aten.add.Tensor %43194, %43184, %int1_40672 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40673 = torch.constant.int 1
    %43196 = torch.aten.add.Tensor %43195, %43187, %int1_40673 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40674 = torch.constant.int 1
    %43197 = torch.aten.add.Tensor %43196, %43190, %int1_40674 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43198 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40675 = arith.constant 1 : index
    %dim_40676 = tensor.dim %43198, %c1_40675 : tensor<4x?x4096xf16>
    %43199 = flow.tensor.transfer %43198 : tensor<4x?x4096xf16>{%dim_40676} to #hal.device.promise<@__device_3>
    %43200 = torch_c.from_builtin_tensor %43199 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43201 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40677 = arith.constant 1 : index
    %dim_40678 = tensor.dim %43201, %c1_40677 : tensor<4x?x4096xf16>
    %43202 = flow.tensor.transfer %43201 : tensor<4x?x4096xf16>{%dim_40678} to #hal.device.promise<@__device_3>
    %43203 = torch_c.from_builtin_tensor %43202 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43204 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40679 = arith.constant 1 : index
    %dim_40680 = tensor.dim %43204, %c1_40679 : tensor<4x?x4096xf16>
    %43205 = flow.tensor.transfer %43204 : tensor<4x?x4096xf16>{%dim_40680} to #hal.device.promise<@__device_3>
    %43206 = torch_c.from_builtin_tensor %43205 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43207 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40681 = arith.constant 1 : index
    %dim_40682 = tensor.dim %43207, %c1_40681 : tensor<4x?x4096xf16>
    %43208 = flow.tensor.transfer %43207 : tensor<4x?x4096xf16>{%dim_40682} to #hal.device.promise<@__device_3>
    %43209 = torch_c.from_builtin_tensor %43208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43210 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40683 = arith.constant 1 : index
    %dim_40684 = tensor.dim %43210, %c1_40683 : tensor<4x?x4096xf16>
    %43211 = flow.tensor.transfer %43210 : tensor<4x?x4096xf16>{%dim_40684} to #hal.device.promise<@__device_3>
    %43212 = torch_c.from_builtin_tensor %43211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43213 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40685 = arith.constant 1 : index
    %dim_40686 = tensor.dim %43213, %c1_40685 : tensor<4x?x4096xf16>
    %43214 = flow.tensor.transfer %43213 : tensor<4x?x4096xf16>{%dim_40686} to #hal.device.promise<@__device_3>
    %43215 = torch_c.from_builtin_tensor %43214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43216 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40687 = arith.constant 1 : index
    %dim_40688 = tensor.dim %43216, %c1_40687 : tensor<4x?x4096xf16>
    %43217 = flow.tensor.transfer %43216 : tensor<4x?x4096xf16>{%dim_40688} to #hal.device.promise<@__device_3>
    %43218 = torch_c.from_builtin_tensor %43217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40689 = torch.constant.int 1
    %43219 = torch.aten.add.Tensor %43200, %43203, %int1_40689 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40690 = torch.constant.int 1
    %43220 = torch.aten.add.Tensor %43219, %43206, %int1_40690 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40691 = torch.constant.int 1
    %43221 = torch.aten.add.Tensor %43220, %43085, %int1_40691 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40692 = torch.constant.int 1
    %43222 = torch.aten.add.Tensor %43221, %43209, %int1_40692 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40693 = torch.constant.int 1
    %43223 = torch.aten.add.Tensor %43222, %43212, %int1_40693 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40694 = torch.constant.int 1
    %43224 = torch.aten.add.Tensor %43223, %43215, %int1_40694 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40695 = torch.constant.int 1
    %43225 = torch.aten.add.Tensor %43224, %43218, %int1_40695 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43226 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40696 = arith.constant 1 : index
    %dim_40697 = tensor.dim %43226, %c1_40696 : tensor<4x?x4096xf16>
    %43227 = flow.tensor.transfer %43226 : tensor<4x?x4096xf16>{%dim_40697} to #hal.device.promise<@__device_4>
    %43228 = torch_c.from_builtin_tensor %43227 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43229 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40698 = arith.constant 1 : index
    %dim_40699 = tensor.dim %43229, %c1_40698 : tensor<4x?x4096xf16>
    %43230 = flow.tensor.transfer %43229 : tensor<4x?x4096xf16>{%dim_40699} to #hal.device.promise<@__device_4>
    %43231 = torch_c.from_builtin_tensor %43230 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43232 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40700 = arith.constant 1 : index
    %dim_40701 = tensor.dim %43232, %c1_40700 : tensor<4x?x4096xf16>
    %43233 = flow.tensor.transfer %43232 : tensor<4x?x4096xf16>{%dim_40701} to #hal.device.promise<@__device_4>
    %43234 = torch_c.from_builtin_tensor %43233 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43235 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40702 = arith.constant 1 : index
    %dim_40703 = tensor.dim %43235, %c1_40702 : tensor<4x?x4096xf16>
    %43236 = flow.tensor.transfer %43235 : tensor<4x?x4096xf16>{%dim_40703} to #hal.device.promise<@__device_4>
    %43237 = torch_c.from_builtin_tensor %43236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43238 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40704 = arith.constant 1 : index
    %dim_40705 = tensor.dim %43238, %c1_40704 : tensor<4x?x4096xf16>
    %43239 = flow.tensor.transfer %43238 : tensor<4x?x4096xf16>{%dim_40705} to #hal.device.promise<@__device_4>
    %43240 = torch_c.from_builtin_tensor %43239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43241 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40706 = arith.constant 1 : index
    %dim_40707 = tensor.dim %43241, %c1_40706 : tensor<4x?x4096xf16>
    %43242 = flow.tensor.transfer %43241 : tensor<4x?x4096xf16>{%dim_40707} to #hal.device.promise<@__device_4>
    %43243 = torch_c.from_builtin_tensor %43242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43244 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40708 = arith.constant 1 : index
    %dim_40709 = tensor.dim %43244, %c1_40708 : tensor<4x?x4096xf16>
    %43245 = flow.tensor.transfer %43244 : tensor<4x?x4096xf16>{%dim_40709} to #hal.device.promise<@__device_4>
    %43246 = torch_c.from_builtin_tensor %43245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40710 = torch.constant.int 1
    %43247 = torch.aten.add.Tensor %43228, %43231, %int1_40710 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40711 = torch.constant.int 1
    %43248 = torch.aten.add.Tensor %43247, %43234, %int1_40711 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40712 = torch.constant.int 1
    %43249 = torch.aten.add.Tensor %43248, %43237, %int1_40712 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40713 = torch.constant.int 1
    %43250 = torch.aten.add.Tensor %43249, %43092, %int1_40713 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40714 = torch.constant.int 1
    %43251 = torch.aten.add.Tensor %43250, %43240, %int1_40714 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40715 = torch.constant.int 1
    %43252 = torch.aten.add.Tensor %43251, %43243, %int1_40715 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40716 = torch.constant.int 1
    %43253 = torch.aten.add.Tensor %43252, %43246, %int1_40716 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43254 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40717 = arith.constant 1 : index
    %dim_40718 = tensor.dim %43254, %c1_40717 : tensor<4x?x4096xf16>
    %43255 = flow.tensor.transfer %43254 : tensor<4x?x4096xf16>{%dim_40718} to #hal.device.promise<@__device_5>
    %43256 = torch_c.from_builtin_tensor %43255 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43257 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40719 = arith.constant 1 : index
    %dim_40720 = tensor.dim %43257, %c1_40719 : tensor<4x?x4096xf16>
    %43258 = flow.tensor.transfer %43257 : tensor<4x?x4096xf16>{%dim_40720} to #hal.device.promise<@__device_5>
    %43259 = torch_c.from_builtin_tensor %43258 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43260 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40721 = arith.constant 1 : index
    %dim_40722 = tensor.dim %43260, %c1_40721 : tensor<4x?x4096xf16>
    %43261 = flow.tensor.transfer %43260 : tensor<4x?x4096xf16>{%dim_40722} to #hal.device.promise<@__device_5>
    %43262 = torch_c.from_builtin_tensor %43261 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43263 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40723 = arith.constant 1 : index
    %dim_40724 = tensor.dim %43263, %c1_40723 : tensor<4x?x4096xf16>
    %43264 = flow.tensor.transfer %43263 : tensor<4x?x4096xf16>{%dim_40724} to #hal.device.promise<@__device_5>
    %43265 = torch_c.from_builtin_tensor %43264 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43266 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40725 = arith.constant 1 : index
    %dim_40726 = tensor.dim %43266, %c1_40725 : tensor<4x?x4096xf16>
    %43267 = flow.tensor.transfer %43266 : tensor<4x?x4096xf16>{%dim_40726} to #hal.device.promise<@__device_5>
    %43268 = torch_c.from_builtin_tensor %43267 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43269 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40727 = arith.constant 1 : index
    %dim_40728 = tensor.dim %43269, %c1_40727 : tensor<4x?x4096xf16>
    %43270 = flow.tensor.transfer %43269 : tensor<4x?x4096xf16>{%dim_40728} to #hal.device.promise<@__device_5>
    %43271 = torch_c.from_builtin_tensor %43270 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43272 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40729 = arith.constant 1 : index
    %dim_40730 = tensor.dim %43272, %c1_40729 : tensor<4x?x4096xf16>
    %43273 = flow.tensor.transfer %43272 : tensor<4x?x4096xf16>{%dim_40730} to #hal.device.promise<@__device_5>
    %43274 = torch_c.from_builtin_tensor %43273 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40731 = torch.constant.int 1
    %43275 = torch.aten.add.Tensor %43256, %43259, %int1_40731 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40732 = torch.constant.int 1
    %43276 = torch.aten.add.Tensor %43275, %43262, %int1_40732 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40733 = torch.constant.int 1
    %43277 = torch.aten.add.Tensor %43276, %43265, %int1_40733 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40734 = torch.constant.int 1
    %43278 = torch.aten.add.Tensor %43277, %43268, %int1_40734 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40735 = torch.constant.int 1
    %43279 = torch.aten.add.Tensor %43278, %43099, %int1_40735 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40736 = torch.constant.int 1
    %43280 = torch.aten.add.Tensor %43279, %43271, %int1_40736 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40737 = torch.constant.int 1
    %43281 = torch.aten.add.Tensor %43280, %43274, %int1_40737 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43282 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40738 = arith.constant 1 : index
    %dim_40739 = tensor.dim %43282, %c1_40738 : tensor<4x?x4096xf16>
    %43283 = flow.tensor.transfer %43282 : tensor<4x?x4096xf16>{%dim_40739} to #hal.device.promise<@__device_6>
    %43284 = torch_c.from_builtin_tensor %43283 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43285 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40740 = arith.constant 1 : index
    %dim_40741 = tensor.dim %43285, %c1_40740 : tensor<4x?x4096xf16>
    %43286 = flow.tensor.transfer %43285 : tensor<4x?x4096xf16>{%dim_40741} to #hal.device.promise<@__device_6>
    %43287 = torch_c.from_builtin_tensor %43286 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43288 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40742 = arith.constant 1 : index
    %dim_40743 = tensor.dim %43288, %c1_40742 : tensor<4x?x4096xf16>
    %43289 = flow.tensor.transfer %43288 : tensor<4x?x4096xf16>{%dim_40743} to #hal.device.promise<@__device_6>
    %43290 = torch_c.from_builtin_tensor %43289 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43291 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40744 = arith.constant 1 : index
    %dim_40745 = tensor.dim %43291, %c1_40744 : tensor<4x?x4096xf16>
    %43292 = flow.tensor.transfer %43291 : tensor<4x?x4096xf16>{%dim_40745} to #hal.device.promise<@__device_6>
    %43293 = torch_c.from_builtin_tensor %43292 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43294 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40746 = arith.constant 1 : index
    %dim_40747 = tensor.dim %43294, %c1_40746 : tensor<4x?x4096xf16>
    %43295 = flow.tensor.transfer %43294 : tensor<4x?x4096xf16>{%dim_40747} to #hal.device.promise<@__device_6>
    %43296 = torch_c.from_builtin_tensor %43295 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43297 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40748 = arith.constant 1 : index
    %dim_40749 = tensor.dim %43297, %c1_40748 : tensor<4x?x4096xf16>
    %43298 = flow.tensor.transfer %43297 : tensor<4x?x4096xf16>{%dim_40749} to #hal.device.promise<@__device_6>
    %43299 = torch_c.from_builtin_tensor %43298 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43300 = torch_c.to_builtin_tensor %43113 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40750 = arith.constant 1 : index
    %dim_40751 = tensor.dim %43300, %c1_40750 : tensor<4x?x4096xf16>
    %43301 = flow.tensor.transfer %43300 : tensor<4x?x4096xf16>{%dim_40751} to #hal.device.promise<@__device_6>
    %43302 = torch_c.from_builtin_tensor %43301 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40752 = torch.constant.int 1
    %43303 = torch.aten.add.Tensor %43284, %43287, %int1_40752 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40753 = torch.constant.int 1
    %43304 = torch.aten.add.Tensor %43303, %43290, %int1_40753 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40754 = torch.constant.int 1
    %43305 = torch.aten.add.Tensor %43304, %43293, %int1_40754 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40755 = torch.constant.int 1
    %43306 = torch.aten.add.Tensor %43305, %43296, %int1_40755 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40756 = torch.constant.int 1
    %43307 = torch.aten.add.Tensor %43306, %43299, %int1_40756 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40757 = torch.constant.int 1
    %43308 = torch.aten.add.Tensor %43307, %43106, %int1_40757 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40758 = torch.constant.int 1
    %43309 = torch.aten.add.Tensor %43308, %43302, %int1_40758 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43310 = torch_c.to_builtin_tensor %43064 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40759 = arith.constant 1 : index
    %dim_40760 = tensor.dim %43310, %c1_40759 : tensor<4x?x4096xf16>
    %43311 = flow.tensor.transfer %43310 : tensor<4x?x4096xf16>{%dim_40760} to #hal.device.promise<@__device_7>
    %43312 = torch_c.from_builtin_tensor %43311 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43313 = torch_c.to_builtin_tensor %43071 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40761 = arith.constant 1 : index
    %dim_40762 = tensor.dim %43313, %c1_40761 : tensor<4x?x4096xf16>
    %43314 = flow.tensor.transfer %43313 : tensor<4x?x4096xf16>{%dim_40762} to #hal.device.promise<@__device_7>
    %43315 = torch_c.from_builtin_tensor %43314 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43316 = torch_c.to_builtin_tensor %43078 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40763 = arith.constant 1 : index
    %dim_40764 = tensor.dim %43316, %c1_40763 : tensor<4x?x4096xf16>
    %43317 = flow.tensor.transfer %43316 : tensor<4x?x4096xf16>{%dim_40764} to #hal.device.promise<@__device_7>
    %43318 = torch_c.from_builtin_tensor %43317 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43319 = torch_c.to_builtin_tensor %43085 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40765 = arith.constant 1 : index
    %dim_40766 = tensor.dim %43319, %c1_40765 : tensor<4x?x4096xf16>
    %43320 = flow.tensor.transfer %43319 : tensor<4x?x4096xf16>{%dim_40766} to #hal.device.promise<@__device_7>
    %43321 = torch_c.from_builtin_tensor %43320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43322 = torch_c.to_builtin_tensor %43092 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40767 = arith.constant 1 : index
    %dim_40768 = tensor.dim %43322, %c1_40767 : tensor<4x?x4096xf16>
    %43323 = flow.tensor.transfer %43322 : tensor<4x?x4096xf16>{%dim_40768} to #hal.device.promise<@__device_7>
    %43324 = torch_c.from_builtin_tensor %43323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43325 = torch_c.to_builtin_tensor %43099 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40769 = arith.constant 1 : index
    %dim_40770 = tensor.dim %43325, %c1_40769 : tensor<4x?x4096xf16>
    %43326 = flow.tensor.transfer %43325 : tensor<4x?x4096xf16>{%dim_40770} to #hal.device.promise<@__device_7>
    %43327 = torch_c.from_builtin_tensor %43326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %43328 = torch_c.to_builtin_tensor %43106 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_40771 = arith.constant 1 : index
    %dim_40772 = tensor.dim %43328, %c1_40771 : tensor<4x?x4096xf16>
    %43329 = flow.tensor.transfer %43328 : tensor<4x?x4096xf16>{%dim_40772} to #hal.device.promise<@__device_7>
    %43330 = torch_c.from_builtin_tensor %43329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40773 = torch.constant.int 1
    %43331 = torch.aten.add.Tensor %43312, %43315, %int1_40773 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40774 = torch.constant.int 1
    %43332 = torch.aten.add.Tensor %43331, %43318, %int1_40774 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40775 = torch.constant.int 1
    %43333 = torch.aten.add.Tensor %43332, %43321, %int1_40775 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40776 = torch.constant.int 1
    %43334 = torch.aten.add.Tensor %43333, %43324, %int1_40776 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40777 = torch.constant.int 1
    %43335 = torch.aten.add.Tensor %43334, %43327, %int1_40777 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40778 = torch.constant.int 1
    %43336 = torch.aten.add.Tensor %43335, %43330, %int1_40778 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40779 = torch.constant.int 1
    %43337 = torch.aten.add.Tensor %43336, %43113, %int1_40779 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40780 = torch.constant.int 1
    %43338 = torch.aten.add.Tensor %42818, %43141, %int1_40780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40781 = torch.constant.int 1
    %43339 = torch.aten.add.Tensor %42819, %43169, %int1_40781 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40782 = torch.constant.int 1
    %43340 = torch.aten.add.Tensor %42820, %43197, %int1_40782 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40783 = torch.constant.int 1
    %43341 = torch.aten.add.Tensor %42821, %43225, %int1_40783 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40784 = torch.constant.int 1
    %43342 = torch.aten.add.Tensor %42822, %43253, %int1_40784 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40785 = torch.constant.int 1
    %43343 = torch.aten.add.Tensor %42823, %43281, %int1_40785 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40786 = torch.constant.int 1
    %43344 = torch.aten.add.Tensor %42824, %43309, %int1_40786 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40787 = torch.constant.int 1
    %43345 = torch.aten.add.Tensor %42825, %43337, %int1_40787 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_40788 = torch.constant.int 6
    %43346 = torch.prims.convert_element_type %43338, %int6_40788 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40789 = torch.constant.int 6
    %43347 = torch.prims.convert_element_type %43339, %int6_40789 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40790 = torch.constant.int 6
    %43348 = torch.prims.convert_element_type %43340, %int6_40790 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40791 = torch.constant.int 6
    %43349 = torch.prims.convert_element_type %43341, %int6_40791 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40792 = torch.constant.int 6
    %43350 = torch.prims.convert_element_type %43342, %int6_40792 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40793 = torch.constant.int 6
    %43351 = torch.prims.convert_element_type %43343, %int6_40793 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40794 = torch.constant.int 6
    %43352 = torch.prims.convert_element_type %43344, %int6_40794 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_40795 = torch.constant.int 6
    %43353 = torch.prims.convert_element_type %43345, %int6_40795 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40796 = torch.constant.int 2
    %43354 = torch.aten.pow.Tensor_Scalar %43346, %int2_40796 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40797 = torch.constant.int 2
    %43355 = torch.aten.pow.Tensor_Scalar %43347, %int2_40797 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40798 = torch.constant.int 2
    %43356 = torch.aten.pow.Tensor_Scalar %43348, %int2_40798 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40799 = torch.constant.int 2
    %43357 = torch.aten.pow.Tensor_Scalar %43349, %int2_40799 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40800 = torch.constant.int 2
    %43358 = torch.aten.pow.Tensor_Scalar %43350, %int2_40800 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40801 = torch.constant.int 2
    %43359 = torch.aten.pow.Tensor_Scalar %43351, %int2_40801 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40802 = torch.constant.int 2
    %43360 = torch.aten.pow.Tensor_Scalar %43352, %int2_40802 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_40803 = torch.constant.int 2
    %43361 = torch.aten.pow.Tensor_Scalar %43353, %int2_40803 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_40804 = torch.constant.int -1
    %43362 = torch.prim.ListConstruct %int-1_40804 : (!torch.int) -> !torch.list<int>
    %true_40805 = torch.constant.bool true
    %none_40806 = torch.constant.none
    %43363 = torch.aten.mean.dim %43354, %43362, %true_40805, %none_40806 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40807 = torch.constant.int -1
    %43364 = torch.prim.ListConstruct %int-1_40807 : (!torch.int) -> !torch.list<int>
    %true_40808 = torch.constant.bool true
    %none_40809 = torch.constant.none
    %43365 = torch.aten.mean.dim %43355, %43364, %true_40808, %none_40809 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40810 = torch.constant.int -1
    %43366 = torch.prim.ListConstruct %int-1_40810 : (!torch.int) -> !torch.list<int>
    %true_40811 = torch.constant.bool true
    %none_40812 = torch.constant.none
    %43367 = torch.aten.mean.dim %43356, %43366, %true_40811, %none_40812 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40813 = torch.constant.int -1
    %43368 = torch.prim.ListConstruct %int-1_40813 : (!torch.int) -> !torch.list<int>
    %true_40814 = torch.constant.bool true
    %none_40815 = torch.constant.none
    %43369 = torch.aten.mean.dim %43357, %43368, %true_40814, %none_40815 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40816 = torch.constant.int -1
    %43370 = torch.prim.ListConstruct %int-1_40816 : (!torch.int) -> !torch.list<int>
    %true_40817 = torch.constant.bool true
    %none_40818 = torch.constant.none
    %43371 = torch.aten.mean.dim %43358, %43370, %true_40817, %none_40818 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40819 = torch.constant.int -1
    %43372 = torch.prim.ListConstruct %int-1_40819 : (!torch.int) -> !torch.list<int>
    %true_40820 = torch.constant.bool true
    %none_40821 = torch.constant.none
    %43373 = torch.aten.mean.dim %43359, %43372, %true_40820, %none_40821 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40822 = torch.constant.int -1
    %43374 = torch.prim.ListConstruct %int-1_40822 : (!torch.int) -> !torch.list<int>
    %true_40823 = torch.constant.bool true
    %none_40824 = torch.constant.none
    %43375 = torch.aten.mean.dim %43360, %43374, %true_40823, %none_40824 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_40825 = torch.constant.int -1
    %43376 = torch.prim.ListConstruct %int-1_40825 : (!torch.int) -> !torch.list<int>
    %true_40826 = torch.constant.bool true
    %none_40827 = torch.constant.none
    %43377 = torch.aten.mean.dim %43361, %43376, %true_40826, %none_40827 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40828 = torch.constant.float 9.9999997473787516E-6
    %int1_40829 = torch.constant.int 1
    %43378 = torch.aten.add.Scalar %43363, %float9.999990e-06_40828, %int1_40829 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40830 = torch.constant.float 9.9999997473787516E-6
    %int1_40831 = torch.constant.int 1
    %43379 = torch.aten.add.Scalar %43365, %float9.999990e-06_40830, %int1_40831 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40832 = torch.constant.float 9.9999997473787516E-6
    %int1_40833 = torch.constant.int 1
    %43380 = torch.aten.add.Scalar %43367, %float9.999990e-06_40832, %int1_40833 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40834 = torch.constant.float 9.9999997473787516E-6
    %int1_40835 = torch.constant.int 1
    %43381 = torch.aten.add.Scalar %43369, %float9.999990e-06_40834, %int1_40835 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40836 = torch.constant.float 9.9999997473787516E-6
    %int1_40837 = torch.constant.int 1
    %43382 = torch.aten.add.Scalar %43371, %float9.999990e-06_40836, %int1_40837 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40838 = torch.constant.float 9.9999997473787516E-6
    %int1_40839 = torch.constant.int 1
    %43383 = torch.aten.add.Scalar %43373, %float9.999990e-06_40838, %int1_40839 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40840 = torch.constant.float 9.9999997473787516E-6
    %int1_40841 = torch.constant.int 1
    %43384 = torch.aten.add.Scalar %43375, %float9.999990e-06_40840, %int1_40841 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_40842 = torch.constant.float 9.9999997473787516E-6
    %int1_40843 = torch.constant.int 1
    %43385 = torch.aten.add.Scalar %43377, %float9.999990e-06_40842, %int1_40843 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43386 = torch.aten.rsqrt %43378 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43387 = torch.aten.rsqrt %43379 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43388 = torch.aten.rsqrt %43380 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43389 = torch.aten.rsqrt %43381 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43390 = torch.aten.rsqrt %43382 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43391 = torch.aten.rsqrt %43383 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43392 = torch.aten.rsqrt %43384 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43393 = torch.aten.rsqrt %43385 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %43393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %43394 = torch.aten.mul.Tensor %43346, %43386 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43395 = torch.aten.mul.Tensor %43347, %43387 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43396 = torch.aten.mul.Tensor %43348, %43388 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43397 = torch.aten.mul.Tensor %43349, %43389 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43398 = torch.aten.mul.Tensor %43350, %43390 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43399 = torch.aten.mul.Tensor %43351, %43391 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43400 = torch.aten.mul.Tensor %43352, %43392 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43401 = torch.aten.mul.Tensor %43353, %43393 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43402 = torch.aten.mul.Tensor %1592, %43394 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43403 = torch.aten.mul.Tensor %1593, %43395 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43404 = torch.aten.mul.Tensor %1594, %43396 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43405 = torch.aten.mul.Tensor %1595, %43397 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43406 = torch.aten.mul.Tensor %1596, %43398 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43407 = torch.aten.mul.Tensor %1597, %43399 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43408 = torch.aten.mul.Tensor %1598, %43400 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %43409 = torch.aten.mul.Tensor %1599, %43401 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %43409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_40844 = torch.constant.int 5
    %43410 = torch.prims.convert_element_type %43402, %int5_40844 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40845 = torch.constant.int 5
    %43411 = torch.prims.convert_element_type %43403, %int5_40845 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40846 = torch.constant.int 5
    %43412 = torch.prims.convert_element_type %43404, %int5_40846 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40847 = torch.constant.int 5
    %43413 = torch.prims.convert_element_type %43405, %int5_40847 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40848 = torch.constant.int 5
    %43414 = torch.prims.convert_element_type %43406, %int5_40848 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40849 = torch.constant.int 5
    %43415 = torch.prims.convert_element_type %43407, %int5_40849 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40850 = torch.constant.int 5
    %43416 = torch.prims.convert_element_type %43408, %int5_40850 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_40851 = torch.constant.int 5
    %43417 = torch.prims.convert_element_type %43409, %int5_40851 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %43417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_40852 = torch.constant.int 1
    %int0_40853 = torch.constant.int 0
    %43418 = torch.prim.ListConstruct %int1_40852, %int0_40853 : (!torch.int, !torch.int) -> !torch.list<int>
    %43419 = torch.aten.permute %1600, %43418 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40854 = torch.constant.int 1
    %int0_40855 = torch.constant.int 0
    %43420 = torch.prim.ListConstruct %int1_40854, %int0_40855 : (!torch.int, !torch.int) -> !torch.list<int>
    %43421 = torch.aten.permute %1601, %43420 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40856 = torch.constant.int 1
    %int0_40857 = torch.constant.int 0
    %43422 = torch.prim.ListConstruct %int1_40856, %int0_40857 : (!torch.int, !torch.int) -> !torch.list<int>
    %43423 = torch.aten.permute %1602, %43422 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40858 = torch.constant.int 1
    %int0_40859 = torch.constant.int 0
    %43424 = torch.prim.ListConstruct %int1_40858, %int0_40859 : (!torch.int, !torch.int) -> !torch.list<int>
    %43425 = torch.aten.permute %1603, %43424 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40860 = torch.constant.int 1
    %int0_40861 = torch.constant.int 0
    %43426 = torch.prim.ListConstruct %int1_40860, %int0_40861 : (!torch.int, !torch.int) -> !torch.list<int>
    %43427 = torch.aten.permute %1604, %43426 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40862 = torch.constant.int 1
    %int0_40863 = torch.constant.int 0
    %43428 = torch.prim.ListConstruct %int1_40862, %int0_40863 : (!torch.int, !torch.int) -> !torch.list<int>
    %43429 = torch.aten.permute %1605, %43428 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40864 = torch.constant.int 1
    %int0_40865 = torch.constant.int 0
    %43430 = torch.prim.ListConstruct %int1_40864, %int0_40865 : (!torch.int, !torch.int) -> !torch.list<int>
    %43431 = torch.aten.permute %1606, %43430 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_40866 = torch.constant.int 1
    %int0_40867 = torch.constant.int 0
    %43432 = torch.prim.ListConstruct %int1_40866, %int0_40867 : (!torch.int, !torch.int) -> !torch.list<int>
    %43433 = torch.aten.permute %1607, %43432 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_40868 = torch.constant.int 4
    %43434 = torch.aten.mul.int %int4_40868, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40869 = torch.constant.int 4096
    %43435 = torch.prim.ListConstruct %43434, %int4096_40869 : (!torch.int, !torch.int) -> !torch.list<int>
    %43436 = torch.aten.view %43410, %43435 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43436, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43437 = torch.aten.mm %43436, %43419 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43437, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40870 = torch.constant.int 4
    %int512_40871 = torch.constant.int 512
    %43438 = torch.prim.ListConstruct %int4_40870, %2482, %int512_40871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43439 = torch.aten.view %43437, %43438 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40872 = torch.constant.int 4
    %43440 = torch.aten.mul.int %int4_40872, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40873 = torch.constant.int 4096
    %43441 = torch.prim.ListConstruct %43440, %int4096_40873 : (!torch.int, !torch.int) -> !torch.list<int>
    %43442 = torch.aten.view %43411, %43441 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43442, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43443 = torch.aten.mm %43442, %43421 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43443, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40874 = torch.constant.int 4
    %int512_40875 = torch.constant.int 512
    %43444 = torch.prim.ListConstruct %int4_40874, %2482, %int512_40875 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43445 = torch.aten.view %43443, %43444 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40876 = torch.constant.int 4
    %43446 = torch.aten.mul.int %int4_40876, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40877 = torch.constant.int 4096
    %43447 = torch.prim.ListConstruct %43446, %int4096_40877 : (!torch.int, !torch.int) -> !torch.list<int>
    %43448 = torch.aten.view %43412, %43447 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43448, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43449 = torch.aten.mm %43448, %43423 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43449, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40878 = torch.constant.int 4
    %int512_40879 = torch.constant.int 512
    %43450 = torch.prim.ListConstruct %int4_40878, %2482, %int512_40879 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43451 = torch.aten.view %43449, %43450 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40880 = torch.constant.int 4
    %43452 = torch.aten.mul.int %int4_40880, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40881 = torch.constant.int 4096
    %43453 = torch.prim.ListConstruct %43452, %int4096_40881 : (!torch.int, !torch.int) -> !torch.list<int>
    %43454 = torch.aten.view %43413, %43453 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43454, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43455 = torch.aten.mm %43454, %43425 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43455, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40882 = torch.constant.int 4
    %int512_40883 = torch.constant.int 512
    %43456 = torch.prim.ListConstruct %int4_40882, %2482, %int512_40883 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43457 = torch.aten.view %43455, %43456 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40884 = torch.constant.int 4
    %43458 = torch.aten.mul.int %int4_40884, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40885 = torch.constant.int 4096
    %43459 = torch.prim.ListConstruct %43458, %int4096_40885 : (!torch.int, !torch.int) -> !torch.list<int>
    %43460 = torch.aten.view %43414, %43459 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43460, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43461 = torch.aten.mm %43460, %43427 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43461, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40886 = torch.constant.int 4
    %int512_40887 = torch.constant.int 512
    %43462 = torch.prim.ListConstruct %int4_40886, %2482, %int512_40887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43463 = torch.aten.view %43461, %43462 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40888 = torch.constant.int 4
    %43464 = torch.aten.mul.int %int4_40888, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40889 = torch.constant.int 4096
    %43465 = torch.prim.ListConstruct %43464, %int4096_40889 : (!torch.int, !torch.int) -> !torch.list<int>
    %43466 = torch.aten.view %43415, %43465 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43466, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43467 = torch.aten.mm %43466, %43429 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43467, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40890 = torch.constant.int 4
    %int512_40891 = torch.constant.int 512
    %43468 = torch.prim.ListConstruct %int4_40890, %2482, %int512_40891 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43469 = torch.aten.view %43467, %43468 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40892 = torch.constant.int 4
    %43470 = torch.aten.mul.int %int4_40892, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40893 = torch.constant.int 4096
    %43471 = torch.prim.ListConstruct %43470, %int4096_40893 : (!torch.int, !torch.int) -> !torch.list<int>
    %43472 = torch.aten.view %43416, %43471 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43472, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43473 = torch.aten.mm %43472, %43431 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43473, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40894 = torch.constant.int 4
    %int512_40895 = torch.constant.int 512
    %43474 = torch.prim.ListConstruct %int4_40894, %2482, %int512_40895 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43475 = torch.aten.view %43473, %43474 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_40896 = torch.constant.int 4
    %43476 = torch.aten.mul.int %int4_40896, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40897 = torch.constant.int 4096
    %43477 = torch.prim.ListConstruct %43476, %int4096_40897 : (!torch.int, !torch.int) -> !torch.list<int>
    %43478 = torch.aten.view %43417, %43477 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43478, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43479 = torch.aten.mm %43478, %43433 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %43479, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_40898 = torch.constant.int 4
    %int512_40899 = torch.constant.int 512
    %43480 = torch.prim.ListConstruct %int4_40898, %2482, %int512_40899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43481 = torch.aten.view %43479, %43480 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %43481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_40900 = torch.constant.int 1
    %int0_40901 = torch.constant.int 0
    %43482 = torch.prim.ListConstruct %int1_40900, %int0_40901 : (!torch.int, !torch.int) -> !torch.list<int>
    %43483 = torch.aten.permute %1608, %43482 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40902 = torch.constant.int 1
    %int0_40903 = torch.constant.int 0
    %43484 = torch.prim.ListConstruct %int1_40902, %int0_40903 : (!torch.int, !torch.int) -> !torch.list<int>
    %43485 = torch.aten.permute %1609, %43484 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40904 = torch.constant.int 1
    %int0_40905 = torch.constant.int 0
    %43486 = torch.prim.ListConstruct %int1_40904, %int0_40905 : (!torch.int, !torch.int) -> !torch.list<int>
    %43487 = torch.aten.permute %1610, %43486 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40906 = torch.constant.int 1
    %int0_40907 = torch.constant.int 0
    %43488 = torch.prim.ListConstruct %int1_40906, %int0_40907 : (!torch.int, !torch.int) -> !torch.list<int>
    %43489 = torch.aten.permute %1611, %43488 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40908 = torch.constant.int 1
    %int0_40909 = torch.constant.int 0
    %43490 = torch.prim.ListConstruct %int1_40908, %int0_40909 : (!torch.int, !torch.int) -> !torch.list<int>
    %43491 = torch.aten.permute %1612, %43490 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40910 = torch.constant.int 1
    %int0_40911 = torch.constant.int 0
    %43492 = torch.prim.ListConstruct %int1_40910, %int0_40911 : (!torch.int, !torch.int) -> !torch.list<int>
    %43493 = torch.aten.permute %1613, %43492 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40912 = torch.constant.int 1
    %int0_40913 = torch.constant.int 0
    %43494 = torch.prim.ListConstruct %int1_40912, %int0_40913 : (!torch.int, !torch.int) -> !torch.list<int>
    %43495 = torch.aten.permute %1614, %43494 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40914 = torch.constant.int 1
    %int0_40915 = torch.constant.int 0
    %43496 = torch.prim.ListConstruct %int1_40914, %int0_40915 : (!torch.int, !torch.int) -> !torch.list<int>
    %43497 = torch.aten.permute %1615, %43496 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_40916 = torch.constant.int 4
    %43498 = torch.aten.mul.int %int4_40916, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40917 = torch.constant.int 4096
    %43499 = torch.prim.ListConstruct %43498, %int4096_40917 : (!torch.int, !torch.int) -> !torch.list<int>
    %43500 = torch.aten.view %43410, %43499 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43500, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43501 = torch.aten.mm %43500, %43483 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43501, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40918 = torch.constant.int 4
    %int128_40919 = torch.constant.int 128
    %43502 = torch.prim.ListConstruct %int4_40918, %2482, %int128_40919 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43503 = torch.aten.view %43501, %43502 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40920 = torch.constant.int 4
    %43504 = torch.aten.mul.int %int4_40920, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40921 = torch.constant.int 4096
    %43505 = torch.prim.ListConstruct %43504, %int4096_40921 : (!torch.int, !torch.int) -> !torch.list<int>
    %43506 = torch.aten.view %43411, %43505 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43506, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43507 = torch.aten.mm %43506, %43485 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43507, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40922 = torch.constant.int 4
    %int128_40923 = torch.constant.int 128
    %43508 = torch.prim.ListConstruct %int4_40922, %2482, %int128_40923 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43509 = torch.aten.view %43507, %43508 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40924 = torch.constant.int 4
    %43510 = torch.aten.mul.int %int4_40924, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40925 = torch.constant.int 4096
    %43511 = torch.prim.ListConstruct %43510, %int4096_40925 : (!torch.int, !torch.int) -> !torch.list<int>
    %43512 = torch.aten.view %43412, %43511 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43512, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43513 = torch.aten.mm %43512, %43487 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43513, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40926 = torch.constant.int 4
    %int128_40927 = torch.constant.int 128
    %43514 = torch.prim.ListConstruct %int4_40926, %2482, %int128_40927 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43515 = torch.aten.view %43513, %43514 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40928 = torch.constant.int 4
    %43516 = torch.aten.mul.int %int4_40928, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40929 = torch.constant.int 4096
    %43517 = torch.prim.ListConstruct %43516, %int4096_40929 : (!torch.int, !torch.int) -> !torch.list<int>
    %43518 = torch.aten.view %43413, %43517 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43518, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43519 = torch.aten.mm %43518, %43489 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43519, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40930 = torch.constant.int 4
    %int128_40931 = torch.constant.int 128
    %43520 = torch.prim.ListConstruct %int4_40930, %2482, %int128_40931 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43521 = torch.aten.view %43519, %43520 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40932 = torch.constant.int 4
    %43522 = torch.aten.mul.int %int4_40932, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40933 = torch.constant.int 4096
    %43523 = torch.prim.ListConstruct %43522, %int4096_40933 : (!torch.int, !torch.int) -> !torch.list<int>
    %43524 = torch.aten.view %43414, %43523 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43524, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43525 = torch.aten.mm %43524, %43491 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43525, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40934 = torch.constant.int 4
    %int128_40935 = torch.constant.int 128
    %43526 = torch.prim.ListConstruct %int4_40934, %2482, %int128_40935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43527 = torch.aten.view %43525, %43526 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40936 = torch.constant.int 4
    %43528 = torch.aten.mul.int %int4_40936, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40937 = torch.constant.int 4096
    %43529 = torch.prim.ListConstruct %43528, %int4096_40937 : (!torch.int, !torch.int) -> !torch.list<int>
    %43530 = torch.aten.view %43415, %43529 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43530, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43531 = torch.aten.mm %43530, %43493 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43531, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40938 = torch.constant.int 4
    %int128_40939 = torch.constant.int 128
    %43532 = torch.prim.ListConstruct %int4_40938, %2482, %int128_40939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43533 = torch.aten.view %43531, %43532 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40940 = torch.constant.int 4
    %43534 = torch.aten.mul.int %int4_40940, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40941 = torch.constant.int 4096
    %43535 = torch.prim.ListConstruct %43534, %int4096_40941 : (!torch.int, !torch.int) -> !torch.list<int>
    %43536 = torch.aten.view %43416, %43535 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43536, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43537 = torch.aten.mm %43536, %43495 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43537, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40942 = torch.constant.int 4
    %int128_40943 = torch.constant.int 128
    %43538 = torch.prim.ListConstruct %int4_40942, %2482, %int128_40943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43539 = torch.aten.view %43537, %43538 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40944 = torch.constant.int 4
    %43540 = torch.aten.mul.int %int4_40944, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40945 = torch.constant.int 4096
    %43541 = torch.prim.ListConstruct %43540, %int4096_40945 : (!torch.int, !torch.int) -> !torch.list<int>
    %43542 = torch.aten.view %43417, %43541 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43542, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43543 = torch.aten.mm %43542, %43497 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43543, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40946 = torch.constant.int 4
    %int128_40947 = torch.constant.int 128
    %43544 = torch.prim.ListConstruct %int4_40946, %2482, %int128_40947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43545 = torch.aten.view %43543, %43544 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_40948 = torch.constant.int 1
    %int0_40949 = torch.constant.int 0
    %43546 = torch.prim.ListConstruct %int1_40948, %int0_40949 : (!torch.int, !torch.int) -> !torch.list<int>
    %43547 = torch.aten.permute %1616, %43546 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40950 = torch.constant.int 1
    %int0_40951 = torch.constant.int 0
    %43548 = torch.prim.ListConstruct %int1_40950, %int0_40951 : (!torch.int, !torch.int) -> !torch.list<int>
    %43549 = torch.aten.permute %1617, %43548 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40952 = torch.constant.int 1
    %int0_40953 = torch.constant.int 0
    %43550 = torch.prim.ListConstruct %int1_40952, %int0_40953 : (!torch.int, !torch.int) -> !torch.list<int>
    %43551 = torch.aten.permute %1618, %43550 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40954 = torch.constant.int 1
    %int0_40955 = torch.constant.int 0
    %43552 = torch.prim.ListConstruct %int1_40954, %int0_40955 : (!torch.int, !torch.int) -> !torch.list<int>
    %43553 = torch.aten.permute %1619, %43552 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40956 = torch.constant.int 1
    %int0_40957 = torch.constant.int 0
    %43554 = torch.prim.ListConstruct %int1_40956, %int0_40957 : (!torch.int, !torch.int) -> !torch.list<int>
    %43555 = torch.aten.permute %1620, %43554 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40958 = torch.constant.int 1
    %int0_40959 = torch.constant.int 0
    %43556 = torch.prim.ListConstruct %int1_40958, %int0_40959 : (!torch.int, !torch.int) -> !torch.list<int>
    %43557 = torch.aten.permute %1621, %43556 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40960 = torch.constant.int 1
    %int0_40961 = torch.constant.int 0
    %43558 = torch.prim.ListConstruct %int1_40960, %int0_40961 : (!torch.int, !torch.int) -> !torch.list<int>
    %43559 = torch.aten.permute %1622, %43558 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_40962 = torch.constant.int 1
    %int0_40963 = torch.constant.int 0
    %43560 = torch.prim.ListConstruct %int1_40962, %int0_40963 : (!torch.int, !torch.int) -> !torch.list<int>
    %43561 = torch.aten.permute %1623, %43560 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_40964 = torch.constant.int 4
    %43562 = torch.aten.mul.int %int4_40964, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40965 = torch.constant.int 4096
    %43563 = torch.prim.ListConstruct %43562, %int4096_40965 : (!torch.int, !torch.int) -> !torch.list<int>
    %43564 = torch.aten.view %43410, %43563 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43564, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43565 = torch.aten.mm %43564, %43547 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43565, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40966 = torch.constant.int 4
    %int128_40967 = torch.constant.int 128
    %43566 = torch.prim.ListConstruct %int4_40966, %2482, %int128_40967 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43567 = torch.aten.view %43565, %43566 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40968 = torch.constant.int 4
    %43568 = torch.aten.mul.int %int4_40968, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40969 = torch.constant.int 4096
    %43569 = torch.prim.ListConstruct %43568, %int4096_40969 : (!torch.int, !torch.int) -> !torch.list<int>
    %43570 = torch.aten.view %43411, %43569 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43570, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43571 = torch.aten.mm %43570, %43549 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43571, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40970 = torch.constant.int 4
    %int128_40971 = torch.constant.int 128
    %43572 = torch.prim.ListConstruct %int4_40970, %2482, %int128_40971 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43573 = torch.aten.view %43571, %43572 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40972 = torch.constant.int 4
    %43574 = torch.aten.mul.int %int4_40972, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40973 = torch.constant.int 4096
    %43575 = torch.prim.ListConstruct %43574, %int4096_40973 : (!torch.int, !torch.int) -> !torch.list<int>
    %43576 = torch.aten.view %43412, %43575 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43576, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43577 = torch.aten.mm %43576, %43551 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43577, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40974 = torch.constant.int 4
    %int128_40975 = torch.constant.int 128
    %43578 = torch.prim.ListConstruct %int4_40974, %2482, %int128_40975 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43579 = torch.aten.view %43577, %43578 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40976 = torch.constant.int 4
    %43580 = torch.aten.mul.int %int4_40976, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40977 = torch.constant.int 4096
    %43581 = torch.prim.ListConstruct %43580, %int4096_40977 : (!torch.int, !torch.int) -> !torch.list<int>
    %43582 = torch.aten.view %43413, %43581 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43582, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43583 = torch.aten.mm %43582, %43553 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43583, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40978 = torch.constant.int 4
    %int128_40979 = torch.constant.int 128
    %43584 = torch.prim.ListConstruct %int4_40978, %2482, %int128_40979 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43585 = torch.aten.view %43583, %43584 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40980 = torch.constant.int 4
    %43586 = torch.aten.mul.int %int4_40980, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40981 = torch.constant.int 4096
    %43587 = torch.prim.ListConstruct %43586, %int4096_40981 : (!torch.int, !torch.int) -> !torch.list<int>
    %43588 = torch.aten.view %43414, %43587 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43588, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43589 = torch.aten.mm %43588, %43555 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43589, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40982 = torch.constant.int 4
    %int128_40983 = torch.constant.int 128
    %43590 = torch.prim.ListConstruct %int4_40982, %2482, %int128_40983 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43591 = torch.aten.view %43589, %43590 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40984 = torch.constant.int 4
    %43592 = torch.aten.mul.int %int4_40984, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40985 = torch.constant.int 4096
    %43593 = torch.prim.ListConstruct %43592, %int4096_40985 : (!torch.int, !torch.int) -> !torch.list<int>
    %43594 = torch.aten.view %43415, %43593 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43594, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43595 = torch.aten.mm %43594, %43557 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43595, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40986 = torch.constant.int 4
    %int128_40987 = torch.constant.int 128
    %43596 = torch.prim.ListConstruct %int4_40986, %2482, %int128_40987 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43597 = torch.aten.view %43595, %43596 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40988 = torch.constant.int 4
    %43598 = torch.aten.mul.int %int4_40988, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40989 = torch.constant.int 4096
    %43599 = torch.prim.ListConstruct %43598, %int4096_40989 : (!torch.int, !torch.int) -> !torch.list<int>
    %43600 = torch.aten.view %43416, %43599 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43600, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43601 = torch.aten.mm %43600, %43559 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43601, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40990 = torch.constant.int 4
    %int128_40991 = torch.constant.int 128
    %43602 = torch.prim.ListConstruct %int4_40990, %2482, %int128_40991 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43603 = torch.aten.view %43601, %43602 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40992 = torch.constant.int 4
    %43604 = torch.aten.mul.int %int4_40992, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_40993 = torch.constant.int 4096
    %43605 = torch.prim.ListConstruct %43604, %int4096_40993 : (!torch.int, !torch.int) -> !torch.list<int>
    %43606 = torch.aten.view %43417, %43605 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %43606, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %43607 = torch.aten.mm %43606, %43561 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %43607, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_40994 = torch.constant.int 4
    %int128_40995 = torch.constant.int 128
    %43608 = torch.prim.ListConstruct %int4_40994, %2482, %int128_40995 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43609 = torch.aten.view %43607, %43608 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %43609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_40996 = torch.constant.int 4
    %int4_40997 = torch.constant.int 4
    %int128_40998 = torch.constant.int 128
    %43610 = torch.prim.ListConstruct %int4_40996, %2482, %int4_40997, %int128_40998 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43611 = torch.aten.view %43439, %43610 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_40999 = torch.constant.int 4
    %int4_41000 = torch.constant.int 4
    %int128_41001 = torch.constant.int 128
    %43612 = torch.prim.ListConstruct %int4_40999, %2482, %int4_41000, %int128_41001 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43613 = torch.aten.view %43445, %43612 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41002 = torch.constant.int 4
    %int4_41003 = torch.constant.int 4
    %int128_41004 = torch.constant.int 128
    %43614 = torch.prim.ListConstruct %int4_41002, %2482, %int4_41003, %int128_41004 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43615 = torch.aten.view %43451, %43614 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41005 = torch.constant.int 4
    %int4_41006 = torch.constant.int 4
    %int128_41007 = torch.constant.int 128
    %43616 = torch.prim.ListConstruct %int4_41005, %2482, %int4_41006, %int128_41007 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43617 = torch.aten.view %43457, %43616 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41008 = torch.constant.int 4
    %int4_41009 = torch.constant.int 4
    %int128_41010 = torch.constant.int 128
    %43618 = torch.prim.ListConstruct %int4_41008, %2482, %int4_41009, %int128_41010 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43619 = torch.aten.view %43463, %43618 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41011 = torch.constant.int 4
    %int4_41012 = torch.constant.int 4
    %int128_41013 = torch.constant.int 128
    %43620 = torch.prim.ListConstruct %int4_41011, %2482, %int4_41012, %int128_41013 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43621 = torch.aten.view %43469, %43620 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41014 = torch.constant.int 4
    %int4_41015 = torch.constant.int 4
    %int128_41016 = torch.constant.int 128
    %43622 = torch.prim.ListConstruct %int4_41014, %2482, %int4_41015, %int128_41016 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43623 = torch.aten.view %43475, %43622 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41017 = torch.constant.int 4
    %int4_41018 = torch.constant.int 4
    %int128_41019 = torch.constant.int 128
    %43624 = torch.prim.ListConstruct %int4_41017, %2482, %int4_41018, %int128_41019 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43625 = torch.aten.view %43481, %43624 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41020 = torch.constant.int 4
    %int1_41021 = torch.constant.int 1
    %int128_41022 = torch.constant.int 128
    %43626 = torch.prim.ListConstruct %int4_41020, %2482, %int1_41021, %int128_41022 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43627 = torch.aten.view %43503, %43626 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41023 = torch.constant.int 4
    %int1_41024 = torch.constant.int 1
    %int128_41025 = torch.constant.int 128
    %43628 = torch.prim.ListConstruct %int4_41023, %2482, %int1_41024, %int128_41025 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43629 = torch.aten.view %43509, %43628 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41026 = torch.constant.int 4
    %int1_41027 = torch.constant.int 1
    %int128_41028 = torch.constant.int 128
    %43630 = torch.prim.ListConstruct %int4_41026, %2482, %int1_41027, %int128_41028 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43631 = torch.aten.view %43515, %43630 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41029 = torch.constant.int 4
    %int1_41030 = torch.constant.int 1
    %int128_41031 = torch.constant.int 128
    %43632 = torch.prim.ListConstruct %int4_41029, %2482, %int1_41030, %int128_41031 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43633 = torch.aten.view %43521, %43632 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41032 = torch.constant.int 4
    %int1_41033 = torch.constant.int 1
    %int128_41034 = torch.constant.int 128
    %43634 = torch.prim.ListConstruct %int4_41032, %2482, %int1_41033, %int128_41034 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43635 = torch.aten.view %43527, %43634 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41035 = torch.constant.int 4
    %int1_41036 = torch.constant.int 1
    %int128_41037 = torch.constant.int 128
    %43636 = torch.prim.ListConstruct %int4_41035, %2482, %int1_41036, %int128_41037 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43637 = torch.aten.view %43533, %43636 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41038 = torch.constant.int 4
    %int1_41039 = torch.constant.int 1
    %int128_41040 = torch.constant.int 128
    %43638 = torch.prim.ListConstruct %int4_41038, %2482, %int1_41039, %int128_41040 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43639 = torch.aten.view %43539, %43638 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41041 = torch.constant.int 4
    %int1_41042 = torch.constant.int 1
    %int128_41043 = torch.constant.int 128
    %43640 = torch.prim.ListConstruct %int4_41041, %2482, %int1_41042, %int128_41043 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43641 = torch.aten.view %43545, %43640 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41044 = torch.constant.int 4
    %int1_41045 = torch.constant.int 1
    %int128_41046 = torch.constant.int 128
    %43642 = torch.prim.ListConstruct %int4_41044, %2482, %int1_41045, %int128_41046 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43643 = torch.aten.view %43567, %43642 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41047 = torch.constant.int 4
    %int1_41048 = torch.constant.int 1
    %int128_41049 = torch.constant.int 128
    %43644 = torch.prim.ListConstruct %int4_41047, %2482, %int1_41048, %int128_41049 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43645 = torch.aten.view %43573, %43644 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41050 = torch.constant.int 4
    %int1_41051 = torch.constant.int 1
    %int128_41052 = torch.constant.int 128
    %43646 = torch.prim.ListConstruct %int4_41050, %2482, %int1_41051, %int128_41052 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43647 = torch.aten.view %43579, %43646 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41053 = torch.constant.int 4
    %int1_41054 = torch.constant.int 1
    %int128_41055 = torch.constant.int 128
    %43648 = torch.prim.ListConstruct %int4_41053, %2482, %int1_41054, %int128_41055 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43649 = torch.aten.view %43585, %43648 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41056 = torch.constant.int 4
    %int1_41057 = torch.constant.int 1
    %int128_41058 = torch.constant.int 128
    %43650 = torch.prim.ListConstruct %int4_41056, %2482, %int1_41057, %int128_41058 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43651 = torch.aten.view %43591, %43650 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41059 = torch.constant.int 4
    %int1_41060 = torch.constant.int 1
    %int128_41061 = torch.constant.int 128
    %43652 = torch.prim.ListConstruct %int4_41059, %2482, %int1_41060, %int128_41061 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43653 = torch.aten.view %43597, %43652 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41062 = torch.constant.int 4
    %int1_41063 = torch.constant.int 1
    %int128_41064 = torch.constant.int 128
    %43654 = torch.prim.ListConstruct %int4_41062, %2482, %int1_41063, %int128_41064 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43655 = torch.aten.view %43603, %43654 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_41065 = torch.constant.int 4
    %int1_41066 = torch.constant.int 1
    %int128_41067 = torch.constant.int 128
    %43656 = torch.prim.ListConstruct %int4_41065, %2482, %int1_41066, %int128_41067 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43657 = torch.aten.view %43609, %43656 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_41068 = torch.constant.int 131072
    %none_41069 = torch.constant.none
    %none_41070 = torch.constant.none
    %cpu_41071 = torch.constant.device "cpu"
    %false_41072 = torch.constant.bool false
    %43658 = torch.aten.arange %int131072_41068, %none_41069, %none_41070, %cpu_41071, %false_41072 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_41073 = torch.constant.int 0
    %int128_41074 = torch.constant.int 128
    %int2_41075 = torch.constant.int 2
    %none_41076 = torch.constant.none
    %none_41077 = torch.constant.none
    %cpu_41078 = torch.constant.device "cpu"
    %false_41079 = torch.constant.bool false
    %43659 = torch.aten.arange.start_step %int0_41073, %int128_41074, %int2_41075, %none_41076, %none_41077, %cpu_41078, %false_41079 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_41080 = torch.constant.int 0
    %int0_41081 = torch.constant.int 0
    %int64_41082 = torch.constant.int 64
    %int1_41083 = torch.constant.int 1
    %43660 = torch.aten.slice.Tensor %43659, %int0_41080, %int0_41081, %int64_41082, %int1_41083 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_41084 = torch.constant.int 6
    %43661 = torch.prims.convert_element_type %43660, %int6_41084 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_41085 = torch.constant.int 128
    %43662 = torch.aten.div.Scalar %43661, %int128_41085 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_41086 = torch.constant.float 5.000000e+05
    %43663 = torch.aten.pow.Scalar %float5.000000e05_41086, %43662 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %43664 = torch.aten.reciprocal %43663 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_41087 = torch.constant.float 1.000000e+00
    %43665 = torch.aten.mul.Scalar %43664, %float1.000000e00_41087 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_41088 = torch.constant.int 131072
    %int1_41089 = torch.constant.int 1
    %43666 = torch.prim.ListConstruct %int131072_41088, %int1_41089 : (!torch.int, !torch.int) -> !torch.list<int>
    %43667 = torch.aten.view %43658, %43666 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %43668 = torch.aten.mul.Tensor %43667, %43665 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %43669 = torch.aten.cos %43668 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %43670 = torch.aten.sin %43668 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %43671 = torch.aten.complex %43669, %43670 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %43672 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43673 = flow.tensor.transfer %43672 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %43674 = torch_c.from_builtin_tensor %43673 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43675 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43676 = flow.tensor.transfer %43675 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %43677 = torch_c.from_builtin_tensor %43676 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43678 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43679 = flow.tensor.transfer %43678 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %43680 = torch_c.from_builtin_tensor %43679 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43681 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43682 = flow.tensor.transfer %43681 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %43683 = torch_c.from_builtin_tensor %43682 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43684 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43685 = flow.tensor.transfer %43684 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %43686 = torch_c.from_builtin_tensor %43685 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43687 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43688 = flow.tensor.transfer %43687 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %43689 = torch_c.from_builtin_tensor %43688 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43690 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43691 = flow.tensor.transfer %43690 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %43692 = torch_c.from_builtin_tensor %43691 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43693 = torch_c.to_builtin_tensor %43671 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43694 = flow.tensor.transfer %43693 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %43695 = torch_c.from_builtin_tensor %43694 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_41090 = torch.constant.int 1
    %43696 = torch.aten.size.int %43439, %int1_41090 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41091 = torch.constant.int 0
    %43697 = torch.aten.add.int %int0_41091, %43696 : !torch.int, !torch.int -> !torch.int
    %int0_41092 = torch.constant.int 0
    %int0_41093 = torch.constant.int 0
    %int1_41094 = torch.constant.int 1
    %43698 = torch.aten.slice.Tensor %43674, %int0_41092, %int0_41093, %43697, %int1_41094 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43698, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41095 = torch.constant.int 1
    %int0_41096 = torch.constant.int 0
    %int9223372036854775807_41097 = torch.constant.int 9223372036854775807
    %int1_41098 = torch.constant.int 1
    %43699 = torch.aten.slice.Tensor %43698, %int1_41095, %int0_41096, %int9223372036854775807_41097, %int1_41098 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43699, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41099 = torch.constant.int 0
    %43700 = torch.aten.unsqueeze %43699, %int0_41099 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43700, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41100 = torch.constant.int 2
    %43701 = torch.aten.unsqueeze %43700, %int2_41100 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43701, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41101 = torch.constant.int 3
    %int0_41102 = torch.constant.int 0
    %int9223372036854775807_41103 = torch.constant.int 9223372036854775807
    %int1_41104 = torch.constant.int 1
    %43702 = torch.aten.slice.Tensor %43701, %int3_41101, %int0_41102, %int9223372036854775807_41103, %int1_41104 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43702, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43703 = torch_c.to_builtin_tensor %43611 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41105 = arith.constant 1 : index
    %dim_41106 = tensor.dim %43703, %c1_41105 : tensor<4x?x4x128xf16>
    %43704 = flow.tensor.bitcast %43703 : tensor<4x?x4x128xf16>{%dim_41106} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41106}
    %43705 = torch_c.from_builtin_tensor %43704 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43706 = torch.aten.mul.Tensor %43705, %43702 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43707 = torch_c.to_builtin_tensor %43706 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41107 = arith.constant 1 : index
    %dim_41108 = tensor.dim %43707, %c1_41107 : tensor<4x?x4x64xcomplex<f32>>
    %43708 = flow.tensor.bitcast %43707 : tensor<4x?x4x64xcomplex<f32>>{%dim_41108} -> tensor<4x?x4x128xf32>{%dim_41108}
    %43709 = torch_c.from_builtin_tensor %43708 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41109 = torch.constant.int 5
    %43710 = torch.prims.convert_element_type %43709, %int5_41109 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41110 = torch.constant.int 1
    %43711 = torch.aten.size.int %43445, %int1_41110 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41111 = torch.constant.int 0
    %43712 = torch.aten.add.int %int0_41111, %43711 : !torch.int, !torch.int -> !torch.int
    %int0_41112 = torch.constant.int 0
    %int0_41113 = torch.constant.int 0
    %int1_41114 = torch.constant.int 1
    %43713 = torch.aten.slice.Tensor %43677, %int0_41112, %int0_41113, %43712, %int1_41114 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43713, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41115 = torch.constant.int 1
    %int0_41116 = torch.constant.int 0
    %int9223372036854775807_41117 = torch.constant.int 9223372036854775807
    %int1_41118 = torch.constant.int 1
    %43714 = torch.aten.slice.Tensor %43713, %int1_41115, %int0_41116, %int9223372036854775807_41117, %int1_41118 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43714, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41119 = torch.constant.int 0
    %43715 = torch.aten.unsqueeze %43714, %int0_41119 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43715, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41120 = torch.constant.int 2
    %43716 = torch.aten.unsqueeze %43715, %int2_41120 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43716, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41121 = torch.constant.int 3
    %int0_41122 = torch.constant.int 0
    %int9223372036854775807_41123 = torch.constant.int 9223372036854775807
    %int1_41124 = torch.constant.int 1
    %43717 = torch.aten.slice.Tensor %43716, %int3_41121, %int0_41122, %int9223372036854775807_41123, %int1_41124 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43717, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43718 = torch_c.to_builtin_tensor %43613 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41125 = arith.constant 1 : index
    %dim_41126 = tensor.dim %43718, %c1_41125 : tensor<4x?x4x128xf16>
    %43719 = flow.tensor.bitcast %43718 : tensor<4x?x4x128xf16>{%dim_41126} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41126}
    %43720 = torch_c.from_builtin_tensor %43719 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43721 = torch.aten.mul.Tensor %43720, %43717 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43722 = torch_c.to_builtin_tensor %43721 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41127 = arith.constant 1 : index
    %dim_41128 = tensor.dim %43722, %c1_41127 : tensor<4x?x4x64xcomplex<f32>>
    %43723 = flow.tensor.bitcast %43722 : tensor<4x?x4x64xcomplex<f32>>{%dim_41128} -> tensor<4x?x4x128xf32>{%dim_41128}
    %43724 = torch_c.from_builtin_tensor %43723 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41129 = torch.constant.int 5
    %43725 = torch.prims.convert_element_type %43724, %int5_41129 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41130 = torch.constant.int 1
    %43726 = torch.aten.size.int %43451, %int1_41130 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41131 = torch.constant.int 0
    %43727 = torch.aten.add.int %int0_41131, %43726 : !torch.int, !torch.int -> !torch.int
    %int0_41132 = torch.constant.int 0
    %int0_41133 = torch.constant.int 0
    %int1_41134 = torch.constant.int 1
    %43728 = torch.aten.slice.Tensor %43680, %int0_41132, %int0_41133, %43727, %int1_41134 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43728, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41135 = torch.constant.int 1
    %int0_41136 = torch.constant.int 0
    %int9223372036854775807_41137 = torch.constant.int 9223372036854775807
    %int1_41138 = torch.constant.int 1
    %43729 = torch.aten.slice.Tensor %43728, %int1_41135, %int0_41136, %int9223372036854775807_41137, %int1_41138 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43729, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41139 = torch.constant.int 0
    %43730 = torch.aten.unsqueeze %43729, %int0_41139 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43730, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41140 = torch.constant.int 2
    %43731 = torch.aten.unsqueeze %43730, %int2_41140 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43731, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41141 = torch.constant.int 3
    %int0_41142 = torch.constant.int 0
    %int9223372036854775807_41143 = torch.constant.int 9223372036854775807
    %int1_41144 = torch.constant.int 1
    %43732 = torch.aten.slice.Tensor %43731, %int3_41141, %int0_41142, %int9223372036854775807_41143, %int1_41144 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43732, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43733 = torch_c.to_builtin_tensor %43615 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41145 = arith.constant 1 : index
    %dim_41146 = tensor.dim %43733, %c1_41145 : tensor<4x?x4x128xf16>
    %43734 = flow.tensor.bitcast %43733 : tensor<4x?x4x128xf16>{%dim_41146} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41146}
    %43735 = torch_c.from_builtin_tensor %43734 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43736 = torch.aten.mul.Tensor %43735, %43732 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43737 = torch_c.to_builtin_tensor %43736 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41147 = arith.constant 1 : index
    %dim_41148 = tensor.dim %43737, %c1_41147 : tensor<4x?x4x64xcomplex<f32>>
    %43738 = flow.tensor.bitcast %43737 : tensor<4x?x4x64xcomplex<f32>>{%dim_41148} -> tensor<4x?x4x128xf32>{%dim_41148}
    %43739 = torch_c.from_builtin_tensor %43738 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41149 = torch.constant.int 5
    %43740 = torch.prims.convert_element_type %43739, %int5_41149 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41150 = torch.constant.int 1
    %43741 = torch.aten.size.int %43457, %int1_41150 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41151 = torch.constant.int 0
    %43742 = torch.aten.add.int %int0_41151, %43741 : !torch.int, !torch.int -> !torch.int
    %int0_41152 = torch.constant.int 0
    %int0_41153 = torch.constant.int 0
    %int1_41154 = torch.constant.int 1
    %43743 = torch.aten.slice.Tensor %43683, %int0_41152, %int0_41153, %43742, %int1_41154 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43743, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41155 = torch.constant.int 1
    %int0_41156 = torch.constant.int 0
    %int9223372036854775807_41157 = torch.constant.int 9223372036854775807
    %int1_41158 = torch.constant.int 1
    %43744 = torch.aten.slice.Tensor %43743, %int1_41155, %int0_41156, %int9223372036854775807_41157, %int1_41158 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43744, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41159 = torch.constant.int 0
    %43745 = torch.aten.unsqueeze %43744, %int0_41159 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43745, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41160 = torch.constant.int 2
    %43746 = torch.aten.unsqueeze %43745, %int2_41160 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43746, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41161 = torch.constant.int 3
    %int0_41162 = torch.constant.int 0
    %int9223372036854775807_41163 = torch.constant.int 9223372036854775807
    %int1_41164 = torch.constant.int 1
    %43747 = torch.aten.slice.Tensor %43746, %int3_41161, %int0_41162, %int9223372036854775807_41163, %int1_41164 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43747, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43748 = torch_c.to_builtin_tensor %43617 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41165 = arith.constant 1 : index
    %dim_41166 = tensor.dim %43748, %c1_41165 : tensor<4x?x4x128xf16>
    %43749 = flow.tensor.bitcast %43748 : tensor<4x?x4x128xf16>{%dim_41166} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41166}
    %43750 = torch_c.from_builtin_tensor %43749 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43751 = torch.aten.mul.Tensor %43750, %43747 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43752 = torch_c.to_builtin_tensor %43751 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41167 = arith.constant 1 : index
    %dim_41168 = tensor.dim %43752, %c1_41167 : tensor<4x?x4x64xcomplex<f32>>
    %43753 = flow.tensor.bitcast %43752 : tensor<4x?x4x64xcomplex<f32>>{%dim_41168} -> tensor<4x?x4x128xf32>{%dim_41168}
    %43754 = torch_c.from_builtin_tensor %43753 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41169 = torch.constant.int 5
    %43755 = torch.prims.convert_element_type %43754, %int5_41169 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41170 = torch.constant.int 1
    %43756 = torch.aten.size.int %43463, %int1_41170 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41171 = torch.constant.int 0
    %43757 = torch.aten.add.int %int0_41171, %43756 : !torch.int, !torch.int -> !torch.int
    %int0_41172 = torch.constant.int 0
    %int0_41173 = torch.constant.int 0
    %int1_41174 = torch.constant.int 1
    %43758 = torch.aten.slice.Tensor %43686, %int0_41172, %int0_41173, %43757, %int1_41174 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43758, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41175 = torch.constant.int 1
    %int0_41176 = torch.constant.int 0
    %int9223372036854775807_41177 = torch.constant.int 9223372036854775807
    %int1_41178 = torch.constant.int 1
    %43759 = torch.aten.slice.Tensor %43758, %int1_41175, %int0_41176, %int9223372036854775807_41177, %int1_41178 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43759, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41179 = torch.constant.int 0
    %43760 = torch.aten.unsqueeze %43759, %int0_41179 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43760, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41180 = torch.constant.int 2
    %43761 = torch.aten.unsqueeze %43760, %int2_41180 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43761, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41181 = torch.constant.int 3
    %int0_41182 = torch.constant.int 0
    %int9223372036854775807_41183 = torch.constant.int 9223372036854775807
    %int1_41184 = torch.constant.int 1
    %43762 = torch.aten.slice.Tensor %43761, %int3_41181, %int0_41182, %int9223372036854775807_41183, %int1_41184 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43762, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43763 = torch_c.to_builtin_tensor %43619 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41185 = arith.constant 1 : index
    %dim_41186 = tensor.dim %43763, %c1_41185 : tensor<4x?x4x128xf16>
    %43764 = flow.tensor.bitcast %43763 : tensor<4x?x4x128xf16>{%dim_41186} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41186}
    %43765 = torch_c.from_builtin_tensor %43764 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43766 = torch.aten.mul.Tensor %43765, %43762 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43767 = torch_c.to_builtin_tensor %43766 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41187 = arith.constant 1 : index
    %dim_41188 = tensor.dim %43767, %c1_41187 : tensor<4x?x4x64xcomplex<f32>>
    %43768 = flow.tensor.bitcast %43767 : tensor<4x?x4x64xcomplex<f32>>{%dim_41188} -> tensor<4x?x4x128xf32>{%dim_41188}
    %43769 = torch_c.from_builtin_tensor %43768 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41189 = torch.constant.int 5
    %43770 = torch.prims.convert_element_type %43769, %int5_41189 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41190 = torch.constant.int 1
    %43771 = torch.aten.size.int %43469, %int1_41190 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41191 = torch.constant.int 0
    %43772 = torch.aten.add.int %int0_41191, %43771 : !torch.int, !torch.int -> !torch.int
    %int0_41192 = torch.constant.int 0
    %int0_41193 = torch.constant.int 0
    %int1_41194 = torch.constant.int 1
    %43773 = torch.aten.slice.Tensor %43689, %int0_41192, %int0_41193, %43772, %int1_41194 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43773, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41195 = torch.constant.int 1
    %int0_41196 = torch.constant.int 0
    %int9223372036854775807_41197 = torch.constant.int 9223372036854775807
    %int1_41198 = torch.constant.int 1
    %43774 = torch.aten.slice.Tensor %43773, %int1_41195, %int0_41196, %int9223372036854775807_41197, %int1_41198 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43774, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41199 = torch.constant.int 0
    %43775 = torch.aten.unsqueeze %43774, %int0_41199 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43775, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41200 = torch.constant.int 2
    %43776 = torch.aten.unsqueeze %43775, %int2_41200 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43776, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41201 = torch.constant.int 3
    %int0_41202 = torch.constant.int 0
    %int9223372036854775807_41203 = torch.constant.int 9223372036854775807
    %int1_41204 = torch.constant.int 1
    %43777 = torch.aten.slice.Tensor %43776, %int3_41201, %int0_41202, %int9223372036854775807_41203, %int1_41204 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43777, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43778 = torch_c.to_builtin_tensor %43621 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41205 = arith.constant 1 : index
    %dim_41206 = tensor.dim %43778, %c1_41205 : tensor<4x?x4x128xf16>
    %43779 = flow.tensor.bitcast %43778 : tensor<4x?x4x128xf16>{%dim_41206} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41206}
    %43780 = torch_c.from_builtin_tensor %43779 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43781 = torch.aten.mul.Tensor %43780, %43777 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43782 = torch_c.to_builtin_tensor %43781 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41207 = arith.constant 1 : index
    %dim_41208 = tensor.dim %43782, %c1_41207 : tensor<4x?x4x64xcomplex<f32>>
    %43783 = flow.tensor.bitcast %43782 : tensor<4x?x4x64xcomplex<f32>>{%dim_41208} -> tensor<4x?x4x128xf32>{%dim_41208}
    %43784 = torch_c.from_builtin_tensor %43783 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41209 = torch.constant.int 5
    %43785 = torch.prims.convert_element_type %43784, %int5_41209 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41210 = torch.constant.int 1
    %43786 = torch.aten.size.int %43475, %int1_41210 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41211 = torch.constant.int 0
    %43787 = torch.aten.add.int %int0_41211, %43786 : !torch.int, !torch.int -> !torch.int
    %int0_41212 = torch.constant.int 0
    %int0_41213 = torch.constant.int 0
    %int1_41214 = torch.constant.int 1
    %43788 = torch.aten.slice.Tensor %43692, %int0_41212, %int0_41213, %43787, %int1_41214 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43788, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41215 = torch.constant.int 1
    %int0_41216 = torch.constant.int 0
    %int9223372036854775807_41217 = torch.constant.int 9223372036854775807
    %int1_41218 = torch.constant.int 1
    %43789 = torch.aten.slice.Tensor %43788, %int1_41215, %int0_41216, %int9223372036854775807_41217, %int1_41218 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43789, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41219 = torch.constant.int 0
    %43790 = torch.aten.unsqueeze %43789, %int0_41219 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43790, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41220 = torch.constant.int 2
    %43791 = torch.aten.unsqueeze %43790, %int2_41220 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43791, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41221 = torch.constant.int 3
    %int0_41222 = torch.constant.int 0
    %int9223372036854775807_41223 = torch.constant.int 9223372036854775807
    %int1_41224 = torch.constant.int 1
    %43792 = torch.aten.slice.Tensor %43791, %int3_41221, %int0_41222, %int9223372036854775807_41223, %int1_41224 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43792, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43793 = torch_c.to_builtin_tensor %43623 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41225 = arith.constant 1 : index
    %dim_41226 = tensor.dim %43793, %c1_41225 : tensor<4x?x4x128xf16>
    %43794 = flow.tensor.bitcast %43793 : tensor<4x?x4x128xf16>{%dim_41226} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41226}
    %43795 = torch_c.from_builtin_tensor %43794 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43796 = torch.aten.mul.Tensor %43795, %43792 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43797 = torch_c.to_builtin_tensor %43796 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41227 = arith.constant 1 : index
    %dim_41228 = tensor.dim %43797, %c1_41227 : tensor<4x?x4x64xcomplex<f32>>
    %43798 = flow.tensor.bitcast %43797 : tensor<4x?x4x64xcomplex<f32>>{%dim_41228} -> tensor<4x?x4x128xf32>{%dim_41228}
    %43799 = torch_c.from_builtin_tensor %43798 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41229 = torch.constant.int 5
    %43800 = torch.prims.convert_element_type %43799, %int5_41229 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41230 = torch.constant.int 1
    %43801 = torch.aten.size.int %43481, %int1_41230 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_41231 = torch.constant.int 0
    %43802 = torch.aten.add.int %int0_41231, %43801 : !torch.int, !torch.int -> !torch.int
    %int0_41232 = torch.constant.int 0
    %int0_41233 = torch.constant.int 0
    %int1_41234 = torch.constant.int 1
    %43803 = torch.aten.slice.Tensor %43695, %int0_41232, %int0_41233, %43802, %int1_41234 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43803, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41235 = torch.constant.int 1
    %int0_41236 = torch.constant.int 0
    %int9223372036854775807_41237 = torch.constant.int 9223372036854775807
    %int1_41238 = torch.constant.int 1
    %43804 = torch.aten.slice.Tensor %43803, %int1_41235, %int0_41236, %int9223372036854775807_41237, %int1_41238 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43804, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41239 = torch.constant.int 0
    %43805 = torch.aten.unsqueeze %43804, %int0_41239 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43805, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41240 = torch.constant.int 2
    %43806 = torch.aten.unsqueeze %43805, %int2_41240 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43806, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41241 = torch.constant.int 3
    %int0_41242 = torch.constant.int 0
    %int9223372036854775807_41243 = torch.constant.int 9223372036854775807
    %int1_41244 = torch.constant.int 1
    %43807 = torch.aten.slice.Tensor %43806, %int3_41241, %int0_41242, %int9223372036854775807_41243, %int1_41244 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43807, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43808 = torch_c.to_builtin_tensor %43625 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_41245 = arith.constant 1 : index
    %dim_41246 = tensor.dim %43808, %c1_41245 : tensor<4x?x4x128xf16>
    %43809 = flow.tensor.bitcast %43808 : tensor<4x?x4x128xf16>{%dim_41246} -> tensor<4x?x4x64xcomplex<f16>>{%dim_41246}
    %43810 = torch_c.from_builtin_tensor %43809 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %43810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %43811 = torch.aten.mul.Tensor %43810, %43807 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %43811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %43812 = torch_c.to_builtin_tensor %43811 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_41247 = arith.constant 1 : index
    %dim_41248 = tensor.dim %43812, %c1_41247 : tensor<4x?x4x64xcomplex<f32>>
    %43813 = flow.tensor.bitcast %43812 : tensor<4x?x4x64xcomplex<f32>>{%dim_41248} -> tensor<4x?x4x128xf32>{%dim_41248}
    %43814 = torch_c.from_builtin_tensor %43813 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %43814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_41249 = torch.constant.int 5
    %43815 = torch.prims.convert_element_type %43814, %int5_41249 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %43815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_41250 = torch.constant.int 131072
    %none_41251 = torch.constant.none
    %none_41252 = torch.constant.none
    %cpu_41253 = torch.constant.device "cpu"
    %false_41254 = torch.constant.bool false
    %43816 = torch.aten.arange %int131072_41250, %none_41251, %none_41252, %cpu_41253, %false_41254 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_41255 = torch.constant.int 0
    %int128_41256 = torch.constant.int 128
    %int2_41257 = torch.constant.int 2
    %none_41258 = torch.constant.none
    %none_41259 = torch.constant.none
    %cpu_41260 = torch.constant.device "cpu"
    %false_41261 = torch.constant.bool false
    %43817 = torch.aten.arange.start_step %int0_41255, %int128_41256, %int2_41257, %none_41258, %none_41259, %cpu_41260, %false_41261 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_41262 = torch.constant.int 0
    %int0_41263 = torch.constant.int 0
    %int64_41264 = torch.constant.int 64
    %int1_41265 = torch.constant.int 1
    %43818 = torch.aten.slice.Tensor %43817, %int0_41262, %int0_41263, %int64_41264, %int1_41265 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_41266 = torch.constant.int 6
    %43819 = torch.prims.convert_element_type %43818, %int6_41266 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_41267 = torch.constant.int 128
    %43820 = torch.aten.div.Scalar %43819, %int128_41267 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_41268 = torch.constant.float 5.000000e+05
    %43821 = torch.aten.pow.Scalar %float5.000000e05_41268, %43820 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %43822 = torch.aten.reciprocal %43821 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_41269 = torch.constant.float 1.000000e+00
    %43823 = torch.aten.mul.Scalar %43822, %float1.000000e00_41269 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_41270 = torch.constant.int 131072
    %int1_41271 = torch.constant.int 1
    %43824 = torch.prim.ListConstruct %int131072_41270, %int1_41271 : (!torch.int, !torch.int) -> !torch.list<int>
    %43825 = torch.aten.view %43816, %43824 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %43826 = torch.aten.mul.Tensor %43825, %43823 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %43827 = torch.aten.cos %43826 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %43828 = torch.aten.sin %43826 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %43829 = torch.aten.complex %43827, %43828 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %43830 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43831 = flow.tensor.transfer %43830 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %43832 = torch_c.from_builtin_tensor %43831 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43833 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43834 = flow.tensor.transfer %43833 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %43835 = torch_c.from_builtin_tensor %43834 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43836 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43837 = flow.tensor.transfer %43836 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %43838 = torch_c.from_builtin_tensor %43837 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43839 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43840 = flow.tensor.transfer %43839 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %43841 = torch_c.from_builtin_tensor %43840 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43842 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43843 = flow.tensor.transfer %43842 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %43844 = torch_c.from_builtin_tensor %43843 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43845 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43846 = flow.tensor.transfer %43845 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %43847 = torch_c.from_builtin_tensor %43846 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43848 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43849 = flow.tensor.transfer %43848 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %43850 = torch_c.from_builtin_tensor %43849 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %43851 = torch_c.to_builtin_tensor %43829 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %43852 = flow.tensor.transfer %43851 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %43853 = torch_c.from_builtin_tensor %43852 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_41272 = torch.constant.int 1
    %43854 = torch.aten.size.int %43503, %int1_41272 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41273 = torch.constant.int 0
    %43855 = torch.aten.add.int %int0_41273, %43854 : !torch.int, !torch.int -> !torch.int
    %int0_41274 = torch.constant.int 0
    %int0_41275 = torch.constant.int 0
    %int1_41276 = torch.constant.int 1
    %43856 = torch.aten.slice.Tensor %43832, %int0_41274, %int0_41275, %43855, %int1_41276 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43856, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41277 = torch.constant.int 1
    %int0_41278 = torch.constant.int 0
    %int9223372036854775807_41279 = torch.constant.int 9223372036854775807
    %int1_41280 = torch.constant.int 1
    %43857 = torch.aten.slice.Tensor %43856, %int1_41277, %int0_41278, %int9223372036854775807_41279, %int1_41280 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43857, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41281 = torch.constant.int 0
    %43858 = torch.aten.unsqueeze %43857, %int0_41281 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43858, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41282 = torch.constant.int 2
    %43859 = torch.aten.unsqueeze %43858, %int2_41282 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43859, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41283 = torch.constant.int 3
    %int0_41284 = torch.constant.int 0
    %int9223372036854775807_41285 = torch.constant.int 9223372036854775807
    %int1_41286 = torch.constant.int 1
    %43860 = torch.aten.slice.Tensor %43859, %int3_41283, %int0_41284, %int9223372036854775807_41285, %int1_41286 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43860, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43861 = torch_c.to_builtin_tensor %43627 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41287 = arith.constant 1 : index
    %dim_41288 = tensor.dim %43861, %c1_41287 : tensor<4x?x1x128xf16>
    %43862 = flow.tensor.bitcast %43861 : tensor<4x?x1x128xf16>{%dim_41288} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41288}
    %43863 = torch_c.from_builtin_tensor %43862 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43864 = torch.aten.mul.Tensor %43863, %43860 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43865 = torch_c.to_builtin_tensor %43864 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41289 = arith.constant 1 : index
    %dim_41290 = tensor.dim %43865, %c1_41289 : tensor<4x?x1x64xcomplex<f32>>
    %43866 = flow.tensor.bitcast %43865 : tensor<4x?x1x64xcomplex<f32>>{%dim_41290} -> tensor<4x?x1x128xf32>{%dim_41290}
    %43867 = torch_c.from_builtin_tensor %43866 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41291 = torch.constant.int 5
    %43868 = torch.prims.convert_element_type %43867, %int5_41291 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41292 = torch.constant.int 1
    %43869 = torch.aten.size.int %43509, %int1_41292 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41293 = torch.constant.int 0
    %43870 = torch.aten.add.int %int0_41293, %43869 : !torch.int, !torch.int -> !torch.int
    %int0_41294 = torch.constant.int 0
    %int0_41295 = torch.constant.int 0
    %int1_41296 = torch.constant.int 1
    %43871 = torch.aten.slice.Tensor %43835, %int0_41294, %int0_41295, %43870, %int1_41296 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43871, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41297 = torch.constant.int 1
    %int0_41298 = torch.constant.int 0
    %int9223372036854775807_41299 = torch.constant.int 9223372036854775807
    %int1_41300 = torch.constant.int 1
    %43872 = torch.aten.slice.Tensor %43871, %int1_41297, %int0_41298, %int9223372036854775807_41299, %int1_41300 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43872, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41301 = torch.constant.int 0
    %43873 = torch.aten.unsqueeze %43872, %int0_41301 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43873, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41302 = torch.constant.int 2
    %43874 = torch.aten.unsqueeze %43873, %int2_41302 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43874, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41303 = torch.constant.int 3
    %int0_41304 = torch.constant.int 0
    %int9223372036854775807_41305 = torch.constant.int 9223372036854775807
    %int1_41306 = torch.constant.int 1
    %43875 = torch.aten.slice.Tensor %43874, %int3_41303, %int0_41304, %int9223372036854775807_41305, %int1_41306 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43875, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43876 = torch_c.to_builtin_tensor %43629 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41307 = arith.constant 1 : index
    %dim_41308 = tensor.dim %43876, %c1_41307 : tensor<4x?x1x128xf16>
    %43877 = flow.tensor.bitcast %43876 : tensor<4x?x1x128xf16>{%dim_41308} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41308}
    %43878 = torch_c.from_builtin_tensor %43877 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43879 = torch.aten.mul.Tensor %43878, %43875 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43880 = torch_c.to_builtin_tensor %43879 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41309 = arith.constant 1 : index
    %dim_41310 = tensor.dim %43880, %c1_41309 : tensor<4x?x1x64xcomplex<f32>>
    %43881 = flow.tensor.bitcast %43880 : tensor<4x?x1x64xcomplex<f32>>{%dim_41310} -> tensor<4x?x1x128xf32>{%dim_41310}
    %43882 = torch_c.from_builtin_tensor %43881 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41311 = torch.constant.int 5
    %43883 = torch.prims.convert_element_type %43882, %int5_41311 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41312 = torch.constant.int 1
    %43884 = torch.aten.size.int %43515, %int1_41312 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41313 = torch.constant.int 0
    %43885 = torch.aten.add.int %int0_41313, %43884 : !torch.int, !torch.int -> !torch.int
    %int0_41314 = torch.constant.int 0
    %int0_41315 = torch.constant.int 0
    %int1_41316 = torch.constant.int 1
    %43886 = torch.aten.slice.Tensor %43838, %int0_41314, %int0_41315, %43885, %int1_41316 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43886, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41317 = torch.constant.int 1
    %int0_41318 = torch.constant.int 0
    %int9223372036854775807_41319 = torch.constant.int 9223372036854775807
    %int1_41320 = torch.constant.int 1
    %43887 = torch.aten.slice.Tensor %43886, %int1_41317, %int0_41318, %int9223372036854775807_41319, %int1_41320 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43887, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41321 = torch.constant.int 0
    %43888 = torch.aten.unsqueeze %43887, %int0_41321 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43888, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41322 = torch.constant.int 2
    %43889 = torch.aten.unsqueeze %43888, %int2_41322 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43889, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41323 = torch.constant.int 3
    %int0_41324 = torch.constant.int 0
    %int9223372036854775807_41325 = torch.constant.int 9223372036854775807
    %int1_41326 = torch.constant.int 1
    %43890 = torch.aten.slice.Tensor %43889, %int3_41323, %int0_41324, %int9223372036854775807_41325, %int1_41326 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43890, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43891 = torch_c.to_builtin_tensor %43631 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41327 = arith.constant 1 : index
    %dim_41328 = tensor.dim %43891, %c1_41327 : tensor<4x?x1x128xf16>
    %43892 = flow.tensor.bitcast %43891 : tensor<4x?x1x128xf16>{%dim_41328} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41328}
    %43893 = torch_c.from_builtin_tensor %43892 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43894 = torch.aten.mul.Tensor %43893, %43890 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43895 = torch_c.to_builtin_tensor %43894 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41329 = arith.constant 1 : index
    %dim_41330 = tensor.dim %43895, %c1_41329 : tensor<4x?x1x64xcomplex<f32>>
    %43896 = flow.tensor.bitcast %43895 : tensor<4x?x1x64xcomplex<f32>>{%dim_41330} -> tensor<4x?x1x128xf32>{%dim_41330}
    %43897 = torch_c.from_builtin_tensor %43896 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41331 = torch.constant.int 5
    %43898 = torch.prims.convert_element_type %43897, %int5_41331 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41332 = torch.constant.int 1
    %43899 = torch.aten.size.int %43521, %int1_41332 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41333 = torch.constant.int 0
    %43900 = torch.aten.add.int %int0_41333, %43899 : !torch.int, !torch.int -> !torch.int
    %int0_41334 = torch.constant.int 0
    %int0_41335 = torch.constant.int 0
    %int1_41336 = torch.constant.int 1
    %43901 = torch.aten.slice.Tensor %43841, %int0_41334, %int0_41335, %43900, %int1_41336 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43901, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41337 = torch.constant.int 1
    %int0_41338 = torch.constant.int 0
    %int9223372036854775807_41339 = torch.constant.int 9223372036854775807
    %int1_41340 = torch.constant.int 1
    %43902 = torch.aten.slice.Tensor %43901, %int1_41337, %int0_41338, %int9223372036854775807_41339, %int1_41340 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43902, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41341 = torch.constant.int 0
    %43903 = torch.aten.unsqueeze %43902, %int0_41341 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43903, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41342 = torch.constant.int 2
    %43904 = torch.aten.unsqueeze %43903, %int2_41342 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43904, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41343 = torch.constant.int 3
    %int0_41344 = torch.constant.int 0
    %int9223372036854775807_41345 = torch.constant.int 9223372036854775807
    %int1_41346 = torch.constant.int 1
    %43905 = torch.aten.slice.Tensor %43904, %int3_41343, %int0_41344, %int9223372036854775807_41345, %int1_41346 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43905, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43906 = torch_c.to_builtin_tensor %43633 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41347 = arith.constant 1 : index
    %dim_41348 = tensor.dim %43906, %c1_41347 : tensor<4x?x1x128xf16>
    %43907 = flow.tensor.bitcast %43906 : tensor<4x?x1x128xf16>{%dim_41348} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41348}
    %43908 = torch_c.from_builtin_tensor %43907 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43909 = torch.aten.mul.Tensor %43908, %43905 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43910 = torch_c.to_builtin_tensor %43909 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41349 = arith.constant 1 : index
    %dim_41350 = tensor.dim %43910, %c1_41349 : tensor<4x?x1x64xcomplex<f32>>
    %43911 = flow.tensor.bitcast %43910 : tensor<4x?x1x64xcomplex<f32>>{%dim_41350} -> tensor<4x?x1x128xf32>{%dim_41350}
    %43912 = torch_c.from_builtin_tensor %43911 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41351 = torch.constant.int 5
    %43913 = torch.prims.convert_element_type %43912, %int5_41351 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41352 = torch.constant.int 1
    %43914 = torch.aten.size.int %43527, %int1_41352 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41353 = torch.constant.int 0
    %43915 = torch.aten.add.int %int0_41353, %43914 : !torch.int, !torch.int -> !torch.int
    %int0_41354 = torch.constant.int 0
    %int0_41355 = torch.constant.int 0
    %int1_41356 = torch.constant.int 1
    %43916 = torch.aten.slice.Tensor %43844, %int0_41354, %int0_41355, %43915, %int1_41356 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43916, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41357 = torch.constant.int 1
    %int0_41358 = torch.constant.int 0
    %int9223372036854775807_41359 = torch.constant.int 9223372036854775807
    %int1_41360 = torch.constant.int 1
    %43917 = torch.aten.slice.Tensor %43916, %int1_41357, %int0_41358, %int9223372036854775807_41359, %int1_41360 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43917, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41361 = torch.constant.int 0
    %43918 = torch.aten.unsqueeze %43917, %int0_41361 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43918, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41362 = torch.constant.int 2
    %43919 = torch.aten.unsqueeze %43918, %int2_41362 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43919, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41363 = torch.constant.int 3
    %int0_41364 = torch.constant.int 0
    %int9223372036854775807_41365 = torch.constant.int 9223372036854775807
    %int1_41366 = torch.constant.int 1
    %43920 = torch.aten.slice.Tensor %43919, %int3_41363, %int0_41364, %int9223372036854775807_41365, %int1_41366 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43920, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43921 = torch_c.to_builtin_tensor %43635 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41367 = arith.constant 1 : index
    %dim_41368 = tensor.dim %43921, %c1_41367 : tensor<4x?x1x128xf16>
    %43922 = flow.tensor.bitcast %43921 : tensor<4x?x1x128xf16>{%dim_41368} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41368}
    %43923 = torch_c.from_builtin_tensor %43922 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43924 = torch.aten.mul.Tensor %43923, %43920 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43925 = torch_c.to_builtin_tensor %43924 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41369 = arith.constant 1 : index
    %dim_41370 = tensor.dim %43925, %c1_41369 : tensor<4x?x1x64xcomplex<f32>>
    %43926 = flow.tensor.bitcast %43925 : tensor<4x?x1x64xcomplex<f32>>{%dim_41370} -> tensor<4x?x1x128xf32>{%dim_41370}
    %43927 = torch_c.from_builtin_tensor %43926 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41371 = torch.constant.int 5
    %43928 = torch.prims.convert_element_type %43927, %int5_41371 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41372 = torch.constant.int 1
    %43929 = torch.aten.size.int %43533, %int1_41372 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41373 = torch.constant.int 0
    %43930 = torch.aten.add.int %int0_41373, %43929 : !torch.int, !torch.int -> !torch.int
    %int0_41374 = torch.constant.int 0
    %int0_41375 = torch.constant.int 0
    %int1_41376 = torch.constant.int 1
    %43931 = torch.aten.slice.Tensor %43847, %int0_41374, %int0_41375, %43930, %int1_41376 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43931, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41377 = torch.constant.int 1
    %int0_41378 = torch.constant.int 0
    %int9223372036854775807_41379 = torch.constant.int 9223372036854775807
    %int1_41380 = torch.constant.int 1
    %43932 = torch.aten.slice.Tensor %43931, %int1_41377, %int0_41378, %int9223372036854775807_41379, %int1_41380 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43932, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41381 = torch.constant.int 0
    %43933 = torch.aten.unsqueeze %43932, %int0_41381 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43933, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41382 = torch.constant.int 2
    %43934 = torch.aten.unsqueeze %43933, %int2_41382 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43934, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41383 = torch.constant.int 3
    %int0_41384 = torch.constant.int 0
    %int9223372036854775807_41385 = torch.constant.int 9223372036854775807
    %int1_41386 = torch.constant.int 1
    %43935 = torch.aten.slice.Tensor %43934, %int3_41383, %int0_41384, %int9223372036854775807_41385, %int1_41386 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43935, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43936 = torch_c.to_builtin_tensor %43637 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41387 = arith.constant 1 : index
    %dim_41388 = tensor.dim %43936, %c1_41387 : tensor<4x?x1x128xf16>
    %43937 = flow.tensor.bitcast %43936 : tensor<4x?x1x128xf16>{%dim_41388} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41388}
    %43938 = torch_c.from_builtin_tensor %43937 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43939 = torch.aten.mul.Tensor %43938, %43935 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43940 = torch_c.to_builtin_tensor %43939 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41389 = arith.constant 1 : index
    %dim_41390 = tensor.dim %43940, %c1_41389 : tensor<4x?x1x64xcomplex<f32>>
    %43941 = flow.tensor.bitcast %43940 : tensor<4x?x1x64xcomplex<f32>>{%dim_41390} -> tensor<4x?x1x128xf32>{%dim_41390}
    %43942 = torch_c.from_builtin_tensor %43941 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41391 = torch.constant.int 5
    %43943 = torch.prims.convert_element_type %43942, %int5_41391 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41392 = torch.constant.int 1
    %43944 = torch.aten.size.int %43539, %int1_41392 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41393 = torch.constant.int 0
    %43945 = torch.aten.add.int %int0_41393, %43944 : !torch.int, !torch.int -> !torch.int
    %int0_41394 = torch.constant.int 0
    %int0_41395 = torch.constant.int 0
    %int1_41396 = torch.constant.int 1
    %43946 = torch.aten.slice.Tensor %43850, %int0_41394, %int0_41395, %43945, %int1_41396 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43946, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41397 = torch.constant.int 1
    %int0_41398 = torch.constant.int 0
    %int9223372036854775807_41399 = torch.constant.int 9223372036854775807
    %int1_41400 = torch.constant.int 1
    %43947 = torch.aten.slice.Tensor %43946, %int1_41397, %int0_41398, %int9223372036854775807_41399, %int1_41400 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43947, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41401 = torch.constant.int 0
    %43948 = torch.aten.unsqueeze %43947, %int0_41401 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43948, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41402 = torch.constant.int 2
    %43949 = torch.aten.unsqueeze %43948, %int2_41402 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43949, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41403 = torch.constant.int 3
    %int0_41404 = torch.constant.int 0
    %int9223372036854775807_41405 = torch.constant.int 9223372036854775807
    %int1_41406 = torch.constant.int 1
    %43950 = torch.aten.slice.Tensor %43949, %int3_41403, %int0_41404, %int9223372036854775807_41405, %int1_41406 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43950, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43951 = torch_c.to_builtin_tensor %43639 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41407 = arith.constant 1 : index
    %dim_41408 = tensor.dim %43951, %c1_41407 : tensor<4x?x1x128xf16>
    %43952 = flow.tensor.bitcast %43951 : tensor<4x?x1x128xf16>{%dim_41408} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41408}
    %43953 = torch_c.from_builtin_tensor %43952 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43954 = torch.aten.mul.Tensor %43953, %43950 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43955 = torch_c.to_builtin_tensor %43954 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41409 = arith.constant 1 : index
    %dim_41410 = tensor.dim %43955, %c1_41409 : tensor<4x?x1x64xcomplex<f32>>
    %43956 = flow.tensor.bitcast %43955 : tensor<4x?x1x64xcomplex<f32>>{%dim_41410} -> tensor<4x?x1x128xf32>{%dim_41410}
    %43957 = torch_c.from_builtin_tensor %43956 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41411 = torch.constant.int 5
    %43958 = torch.prims.convert_element_type %43957, %int5_41411 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_41412 = torch.constant.int 1
    %43959 = torch.aten.size.int %43545, %int1_41412 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_41413 = torch.constant.int 0
    %43960 = torch.aten.add.int %int0_41413, %43959 : !torch.int, !torch.int -> !torch.int
    %int0_41414 = torch.constant.int 0
    %int0_41415 = torch.constant.int 0
    %int1_41416 = torch.constant.int 1
    %43961 = torch.aten.slice.Tensor %43853, %int0_41414, %int0_41415, %43960, %int1_41416 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43961, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_41417 = torch.constant.int 1
    %int0_41418 = torch.constant.int 0
    %int9223372036854775807_41419 = torch.constant.int 9223372036854775807
    %int1_41420 = torch.constant.int 1
    %43962 = torch.aten.slice.Tensor %43961, %int1_41417, %int0_41418, %int9223372036854775807_41419, %int1_41420 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %43962, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_41421 = torch.constant.int 0
    %43963 = torch.aten.unsqueeze %43962, %int0_41421 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %43963, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_41422 = torch.constant.int 2
    %43964 = torch.aten.unsqueeze %43963, %int2_41422 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43964, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_41423 = torch.constant.int 3
    %int0_41424 = torch.constant.int 0
    %int9223372036854775807_41425 = torch.constant.int 9223372036854775807
    %int1_41426 = torch.constant.int 1
    %43965 = torch.aten.slice.Tensor %43964, %int3_41423, %int0_41424, %int9223372036854775807_41425, %int1_41426 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43965, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %43966 = torch_c.to_builtin_tensor %43641 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_41427 = arith.constant 1 : index
    %dim_41428 = tensor.dim %43966, %c1_41427 : tensor<4x?x1x128xf16>
    %43967 = flow.tensor.bitcast %43966 : tensor<4x?x1x128xf16>{%dim_41428} -> tensor<4x?x1x64xcomplex<f16>>{%dim_41428}
    %43968 = torch_c.from_builtin_tensor %43967 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %43968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %43969 = torch.aten.mul.Tensor %43968, %43965 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %43969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %43970 = torch_c.to_builtin_tensor %43969 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_41429 = arith.constant 1 : index
    %dim_41430 = tensor.dim %43970, %c1_41429 : tensor<4x?x1x64xcomplex<f32>>
    %43971 = flow.tensor.bitcast %43970 : tensor<4x?x1x64xcomplex<f32>>{%dim_41430} -> tensor<4x?x1x128xf32>{%dim_41430}
    %43972 = torch_c.from_builtin_tensor %43971 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %43972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_41431 = torch.constant.int 5
    %43973 = torch.prims.convert_element_type %43972, %int5_41431 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %43973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_41432 = torch.constant.int 64
    %43974 = torch.aten.mul.Scalar %2364, %int64_41432 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43974, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41433 = torch.constant.int 64
    %43975 = torch.aten.mul.Scalar %2367, %int64_41433 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43975, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41434 = torch.constant.int 64
    %43976 = torch.aten.mul.Scalar %2370, %int64_41434 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43976, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41435 = torch.constant.int 64
    %43977 = torch.aten.mul.Scalar %2373, %int64_41435 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43977, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41436 = torch.constant.int 64
    %43978 = torch.aten.mul.Scalar %2376, %int64_41436 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43978, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41437 = torch.constant.int 64
    %43979 = torch.aten.mul.Scalar %2379, %int64_41437 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43979, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41438 = torch.constant.int 64
    %43980 = torch.aten.mul.Scalar %2382, %int64_41438 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43980, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_41439 = torch.constant.int 64
    %43981 = torch.aten.mul.Scalar %2385, %int64_41439 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43981, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44 = torch.constant.int 44
    %int1_41440 = torch.constant.int 1
    %43982 = torch.aten.add.Scalar %43974, %int44, %int1_41440 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43982, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41441 = torch.constant.int 44
    %int1_41442 = torch.constant.int 1
    %43983 = torch.aten.add.Scalar %43975, %int44_41441, %int1_41442 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43983, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41443 = torch.constant.int 44
    %int1_41444 = torch.constant.int 1
    %43984 = torch.aten.add.Scalar %43976, %int44_41443, %int1_41444 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43984, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41445 = torch.constant.int 44
    %int1_41446 = torch.constant.int 1
    %43985 = torch.aten.add.Scalar %43977, %int44_41445, %int1_41446 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43985, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41447 = torch.constant.int 44
    %int1_41448 = torch.constant.int 1
    %43986 = torch.aten.add.Scalar %43978, %int44_41447, %int1_41448 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43986, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41449 = torch.constant.int 44
    %int1_41450 = torch.constant.int 1
    %43987 = torch.aten.add.Scalar %43979, %int44_41449, %int1_41450 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43987, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41451 = torch.constant.int 44
    %int1_41452 = torch.constant.int 1
    %43988 = torch.aten.add.Scalar %43980, %int44_41451, %int1_41452 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43988, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int44_41453 = torch.constant.int 44
    %int1_41454 = torch.constant.int 1
    %43989 = torch.aten.add.Scalar %43981, %int44_41453, %int1_41454 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %43989, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_41455 = torch.constant.int 4
    %int16_41456 = torch.constant.int 16
    %int1_41457 = torch.constant.int 1
    %int128_41458 = torch.constant.int 128
    %43990 = torch.prim.ListConstruct %int4_41455, %3095, %int16_41456, %int1_41457, %int128_41458 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43991 = torch.aten.view %43868, %43990 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %43991, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41459 = torch.constant.int 4
    %int16_41460 = torch.constant.int 16
    %int1_41461 = torch.constant.int 1
    %int128_41462 = torch.constant.int 128
    %43992 = torch.prim.ListConstruct %int4_41459, %3095, %int16_41460, %int1_41461, %int128_41462 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43993 = torch.aten.view %43883, %43992 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %43993, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41463 = torch.constant.int 4
    %int16_41464 = torch.constant.int 16
    %int1_41465 = torch.constant.int 1
    %int128_41466 = torch.constant.int 128
    %43994 = torch.prim.ListConstruct %int4_41463, %3095, %int16_41464, %int1_41465, %int128_41466 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43995 = torch.aten.view %43898, %43994 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %43995, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41467 = torch.constant.int 4
    %int16_41468 = torch.constant.int 16
    %int1_41469 = torch.constant.int 1
    %int128_41470 = torch.constant.int 128
    %43996 = torch.prim.ListConstruct %int4_41467, %3095, %int16_41468, %int1_41469, %int128_41470 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43997 = torch.aten.view %43913, %43996 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %43997, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41471 = torch.constant.int 4
    %int16_41472 = torch.constant.int 16
    %int1_41473 = torch.constant.int 1
    %int128_41474 = torch.constant.int 128
    %43998 = torch.prim.ListConstruct %int4_41471, %3095, %int16_41472, %int1_41473, %int128_41474 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %43999 = torch.aten.view %43928, %43998 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %43999, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41475 = torch.constant.int 4
    %int16_41476 = torch.constant.int 16
    %int1_41477 = torch.constant.int 1
    %int128_41478 = torch.constant.int 128
    %44000 = torch.prim.ListConstruct %int4_41475, %3095, %int16_41476, %int1_41477, %int128_41478 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44001 = torch.aten.view %43943, %44000 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44001, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41479 = torch.constant.int 4
    %int16_41480 = torch.constant.int 16
    %int1_41481 = torch.constant.int 1
    %int128_41482 = torch.constant.int 128
    %44002 = torch.prim.ListConstruct %int4_41479, %3095, %int16_41480, %int1_41481, %int128_41482 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44003 = torch.aten.view %43958, %44002 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44003, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41483 = torch.constant.int 4
    %int16_41484 = torch.constant.int 16
    %int1_41485 = torch.constant.int 1
    %int128_41486 = torch.constant.int 128
    %44004 = torch.prim.ListConstruct %int4_41483, %3095, %int16_41484, %int1_41485, %int128_41486 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44005 = torch.aten.view %43973, %44004 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44005, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41487 = torch.constant.int 4
    %44006 = torch.aten.mul.int %int4_41487, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41488 = torch.constant.int 16
    %int1_41489 = torch.constant.int 1
    %int128_41490 = torch.constant.int 128
    %44007 = torch.prim.ListConstruct %44006, %int16_41488, %int1_41489, %int128_41490 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44008 = torch.aten.view %43991, %44007 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44008, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41491 = torch.constant.int 4
    %44009 = torch.aten.mul.int %int4_41491, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41492 = torch.constant.int 16
    %int1_41493 = torch.constant.int 1
    %int128_41494 = torch.constant.int 128
    %44010 = torch.prim.ListConstruct %44009, %int16_41492, %int1_41493, %int128_41494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44011 = torch.aten.view %43993, %44010 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44011, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41495 = torch.constant.int 4
    %44012 = torch.aten.mul.int %int4_41495, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41496 = torch.constant.int 16
    %int1_41497 = torch.constant.int 1
    %int128_41498 = torch.constant.int 128
    %44013 = torch.prim.ListConstruct %44012, %int16_41496, %int1_41497, %int128_41498 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44014 = torch.aten.view %43995, %44013 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44014, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41499 = torch.constant.int 4
    %44015 = torch.aten.mul.int %int4_41499, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41500 = torch.constant.int 16
    %int1_41501 = torch.constant.int 1
    %int128_41502 = torch.constant.int 128
    %44016 = torch.prim.ListConstruct %44015, %int16_41500, %int1_41501, %int128_41502 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44017 = torch.aten.view %43997, %44016 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44017, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41503 = torch.constant.int 4
    %44018 = torch.aten.mul.int %int4_41503, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41504 = torch.constant.int 16
    %int1_41505 = torch.constant.int 1
    %int128_41506 = torch.constant.int 128
    %44019 = torch.prim.ListConstruct %44018, %int16_41504, %int1_41505, %int128_41506 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44020 = torch.aten.view %43999, %44019 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44020, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41507 = torch.constant.int 4
    %44021 = torch.aten.mul.int %int4_41507, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41508 = torch.constant.int 16
    %int1_41509 = torch.constant.int 1
    %int128_41510 = torch.constant.int 128
    %44022 = torch.prim.ListConstruct %44021, %int16_41508, %int1_41509, %int128_41510 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44023 = torch.aten.view %44001, %44022 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44023, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41511 = torch.constant.int 4
    %44024 = torch.aten.mul.int %int4_41511, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41512 = torch.constant.int 16
    %int1_41513 = torch.constant.int 1
    %int128_41514 = torch.constant.int 128
    %44025 = torch.prim.ListConstruct %44024, %int16_41512, %int1_41513, %int128_41514 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44026 = torch.aten.view %44003, %44025 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44026, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41515 = torch.constant.int 4
    %44027 = torch.aten.mul.int %int4_41515, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41516 = torch.constant.int 16
    %int1_41517 = torch.constant.int 1
    %int128_41518 = torch.constant.int 128
    %44028 = torch.prim.ListConstruct %44027, %int16_41516, %int1_41517, %int128_41518 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44029 = torch.aten.view %44005, %44028 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44029, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41519 = torch.constant.int 4
    %44030 = torch.aten.mul.int %int4_41519, %3095 : !torch.int, !torch.int -> !torch.int
    %44031 = torch.prim.ListConstruct %44030 : (!torch.int) -> !torch.list<int>
    %44032 = torch.aten.view %43982, %44031 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44032, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41520 = torch.constant.int 4
    %44033 = torch.aten.mul.int %int4_41520, %3095 : !torch.int, !torch.int -> !torch.int
    %44034 = torch.prim.ListConstruct %44033 : (!torch.int) -> !torch.list<int>
    %44035 = torch.aten.view %43983, %44034 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44035, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41521 = torch.constant.int 4
    %44036 = torch.aten.mul.int %int4_41521, %3095 : !torch.int, !torch.int -> !torch.int
    %44037 = torch.prim.ListConstruct %44036 : (!torch.int) -> !torch.list<int>
    %44038 = torch.aten.view %43984, %44037 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44038, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41522 = torch.constant.int 4
    %44039 = torch.aten.mul.int %int4_41522, %3095 : !torch.int, !torch.int -> !torch.int
    %44040 = torch.prim.ListConstruct %44039 : (!torch.int) -> !torch.list<int>
    %44041 = torch.aten.view %43985, %44040 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44041, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41523 = torch.constant.int 4
    %44042 = torch.aten.mul.int %int4_41523, %3095 : !torch.int, !torch.int -> !torch.int
    %44043 = torch.prim.ListConstruct %44042 : (!torch.int) -> !torch.list<int>
    %44044 = torch.aten.view %43986, %44043 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44044, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41524 = torch.constant.int 4
    %44045 = torch.aten.mul.int %int4_41524, %3095 : !torch.int, !torch.int -> !torch.int
    %44046 = torch.prim.ListConstruct %44045 : (!torch.int) -> !torch.list<int>
    %44047 = torch.aten.view %43987, %44046 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44047, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41525 = torch.constant.int 4
    %44048 = torch.aten.mul.int %int4_41525, %3095 : !torch.int, !torch.int -> !torch.int
    %44049 = torch.prim.ListConstruct %44048 : (!torch.int) -> !torch.list<int>
    %44050 = torch.aten.view %43988, %44049 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44050, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41526 = torch.constant.int 4
    %44051 = torch.aten.mul.int %int4_41526, %3095 : !torch.int, !torch.int -> !torch.int
    %44052 = torch.prim.ListConstruct %44051 : (!torch.int) -> !torch.list<int>
    %44053 = torch.aten.view %43989, %44052 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44053, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41527 = torch.constant.int 4
    %int16_41528 = torch.constant.int 16
    %int1_41529 = torch.constant.int 1
    %int128_41530 = torch.constant.int 128
    %44054 = torch.prim.ListConstruct %int4_41527, %3095, %int16_41528, %int1_41529, %int128_41530 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44055 = torch.aten.view %43643, %44054 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44055, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41531 = torch.constant.int 4
    %int16_41532 = torch.constant.int 16
    %int1_41533 = torch.constant.int 1
    %int128_41534 = torch.constant.int 128
    %44056 = torch.prim.ListConstruct %int4_41531, %3095, %int16_41532, %int1_41533, %int128_41534 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44057 = torch.aten.view %43645, %44056 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44057, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41535 = torch.constant.int 4
    %int16_41536 = torch.constant.int 16
    %int1_41537 = torch.constant.int 1
    %int128_41538 = torch.constant.int 128
    %44058 = torch.prim.ListConstruct %int4_41535, %3095, %int16_41536, %int1_41537, %int128_41538 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44059 = torch.aten.view %43647, %44058 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44059, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41539 = torch.constant.int 4
    %int16_41540 = torch.constant.int 16
    %int1_41541 = torch.constant.int 1
    %int128_41542 = torch.constant.int 128
    %44060 = torch.prim.ListConstruct %int4_41539, %3095, %int16_41540, %int1_41541, %int128_41542 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44061 = torch.aten.view %43649, %44060 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44061, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41543 = torch.constant.int 4
    %int16_41544 = torch.constant.int 16
    %int1_41545 = torch.constant.int 1
    %int128_41546 = torch.constant.int 128
    %44062 = torch.prim.ListConstruct %int4_41543, %3095, %int16_41544, %int1_41545, %int128_41546 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44063 = torch.aten.view %43651, %44062 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44063, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41547 = torch.constant.int 4
    %int16_41548 = torch.constant.int 16
    %int1_41549 = torch.constant.int 1
    %int128_41550 = torch.constant.int 128
    %44064 = torch.prim.ListConstruct %int4_41547, %3095, %int16_41548, %int1_41549, %int128_41550 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44065 = torch.aten.view %43653, %44064 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44065, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41551 = torch.constant.int 4
    %int16_41552 = torch.constant.int 16
    %int1_41553 = torch.constant.int 1
    %int128_41554 = torch.constant.int 128
    %44066 = torch.prim.ListConstruct %int4_41551, %3095, %int16_41552, %int1_41553, %int128_41554 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44067 = torch.aten.view %43655, %44066 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44067, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41555 = torch.constant.int 4
    %int16_41556 = torch.constant.int 16
    %int1_41557 = torch.constant.int 1
    %int128_41558 = torch.constant.int 128
    %44068 = torch.prim.ListConstruct %int4_41555, %3095, %int16_41556, %int1_41557, %int128_41558 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44069 = torch.aten.view %43657, %44068 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %44069, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_41559 = torch.constant.int 4
    %44070 = torch.aten.mul.int %int4_41559, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41560 = torch.constant.int 16
    %int1_41561 = torch.constant.int 1
    %int128_41562 = torch.constant.int 128
    %44071 = torch.prim.ListConstruct %44070, %int16_41560, %int1_41561, %int128_41562 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44072 = torch.aten.view %44055, %44071 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44072, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41563 = torch.constant.int 4
    %44073 = torch.aten.mul.int %int4_41563, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41564 = torch.constant.int 16
    %int1_41565 = torch.constant.int 1
    %int128_41566 = torch.constant.int 128
    %44074 = torch.prim.ListConstruct %44073, %int16_41564, %int1_41565, %int128_41566 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44075 = torch.aten.view %44057, %44074 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44075, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41567 = torch.constant.int 4
    %44076 = torch.aten.mul.int %int4_41567, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41568 = torch.constant.int 16
    %int1_41569 = torch.constant.int 1
    %int128_41570 = torch.constant.int 128
    %44077 = torch.prim.ListConstruct %44076, %int16_41568, %int1_41569, %int128_41570 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44078 = torch.aten.view %44059, %44077 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44078, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41571 = torch.constant.int 4
    %44079 = torch.aten.mul.int %int4_41571, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41572 = torch.constant.int 16
    %int1_41573 = torch.constant.int 1
    %int128_41574 = torch.constant.int 128
    %44080 = torch.prim.ListConstruct %44079, %int16_41572, %int1_41573, %int128_41574 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44081 = torch.aten.view %44061, %44080 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44081, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41575 = torch.constant.int 4
    %44082 = torch.aten.mul.int %int4_41575, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41576 = torch.constant.int 16
    %int1_41577 = torch.constant.int 1
    %int128_41578 = torch.constant.int 128
    %44083 = torch.prim.ListConstruct %44082, %int16_41576, %int1_41577, %int128_41578 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44084 = torch.aten.view %44063, %44083 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44084, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41579 = torch.constant.int 4
    %44085 = torch.aten.mul.int %int4_41579, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41580 = torch.constant.int 16
    %int1_41581 = torch.constant.int 1
    %int128_41582 = torch.constant.int 128
    %44086 = torch.prim.ListConstruct %44085, %int16_41580, %int1_41581, %int128_41582 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44087 = torch.aten.view %44065, %44086 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44087, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41583 = torch.constant.int 4
    %44088 = torch.aten.mul.int %int4_41583, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41584 = torch.constant.int 16
    %int1_41585 = torch.constant.int 1
    %int128_41586 = torch.constant.int 128
    %44089 = torch.prim.ListConstruct %44088, %int16_41584, %int1_41585, %int128_41586 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44090 = torch.aten.view %44067, %44089 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44090, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_41587 = torch.constant.int 4
    %44091 = torch.aten.mul.int %int4_41587, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_41588 = torch.constant.int 16
    %int1_41589 = torch.constant.int 1
    %int128_41590 = torch.constant.int 128
    %44092 = torch.prim.ListConstruct %44091, %int16_41588, %int1_41589, %int128_41590 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44093 = torch.aten.view %44069, %44092 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44093, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_41591 = torch.constant.int 1
    %int1_41592 = torch.constant.int 1
    %44094 = torch.aten.add.Scalar %43982, %int1_41591, %int1_41592 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44094, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41593 = torch.constant.int 1
    %int1_41594 = torch.constant.int 1
    %44095 = torch.aten.add.Scalar %43983, %int1_41593, %int1_41594 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44095, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41595 = torch.constant.int 1
    %int1_41596 = torch.constant.int 1
    %44096 = torch.aten.add.Scalar %43984, %int1_41595, %int1_41596 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44096, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41597 = torch.constant.int 1
    %int1_41598 = torch.constant.int 1
    %44097 = torch.aten.add.Scalar %43985, %int1_41597, %int1_41598 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44097, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41599 = torch.constant.int 1
    %int1_41600 = torch.constant.int 1
    %44098 = torch.aten.add.Scalar %43986, %int1_41599, %int1_41600 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44098, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41601 = torch.constant.int 1
    %int1_41602 = torch.constant.int 1
    %44099 = torch.aten.add.Scalar %43987, %int1_41601, %int1_41602 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44099, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41603 = torch.constant.int 1
    %int1_41604 = torch.constant.int 1
    %44100 = torch.aten.add.Scalar %43988, %int1_41603, %int1_41604 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44100, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_41605 = torch.constant.int 1
    %int1_41606 = torch.constant.int 1
    %44101 = torch.aten.add.Scalar %43989, %int1_41605, %int1_41606 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %44101, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_41607 = torch.constant.int 4
    %44102 = torch.aten.mul.int %int4_41607, %3095 : !torch.int, !torch.int -> !torch.int
    %44103 = torch.prim.ListConstruct %44102 : (!torch.int) -> !torch.list<int>
    %44104 = torch.aten.view %44094, %44103 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44104, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41608 = torch.constant.int 4
    %44105 = torch.aten.mul.int %int4_41608, %3095 : !torch.int, !torch.int -> !torch.int
    %44106 = torch.prim.ListConstruct %44105 : (!torch.int) -> !torch.list<int>
    %44107 = torch.aten.view %44095, %44106 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44107, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41609 = torch.constant.int 4
    %44108 = torch.aten.mul.int %int4_41609, %3095 : !torch.int, !torch.int -> !torch.int
    %44109 = torch.prim.ListConstruct %44108 : (!torch.int) -> !torch.list<int>
    %44110 = torch.aten.view %44096, %44109 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44110, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41610 = torch.constant.int 4
    %44111 = torch.aten.mul.int %int4_41610, %3095 : !torch.int, !torch.int -> !torch.int
    %44112 = torch.prim.ListConstruct %44111 : (!torch.int) -> !torch.list<int>
    %44113 = torch.aten.view %44097, %44112 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44113, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41611 = torch.constant.int 4
    %44114 = torch.aten.mul.int %int4_41611, %3095 : !torch.int, !torch.int -> !torch.int
    %44115 = torch.prim.ListConstruct %44114 : (!torch.int) -> !torch.list<int>
    %44116 = torch.aten.view %44098, %44115 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44116, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41612 = torch.constant.int 4
    %44117 = torch.aten.mul.int %int4_41612, %3095 : !torch.int, !torch.int -> !torch.int
    %44118 = torch.prim.ListConstruct %44117 : (!torch.int) -> !torch.list<int>
    %44119 = torch.aten.view %44099, %44118 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44119, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41613 = torch.constant.int 4
    %44120 = torch.aten.mul.int %int4_41613, %3095 : !torch.int, !torch.int -> !torch.int
    %44121 = torch.prim.ListConstruct %44120 : (!torch.int) -> !torch.list<int>
    %44122 = torch.aten.view %44100, %44121 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44122, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_41614 = torch.constant.int 4
    %44123 = torch.aten.mul.int %int4_41614, %3095 : !torch.int, !torch.int -> !torch.int
    %44124 = torch.prim.ListConstruct %44123 : (!torch.int) -> !torch.list<int>
    %44125 = torch.aten.view %44101, %44124 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44125, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %44126 = torch.prim.ListConstruct %44032, %44104 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41615 = torch.constant.int 0
    %44127 = torch.aten.cat %44126, %int0_41615 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44127, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44128 = torch.prim.ListConstruct %44035, %44107 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41616 = torch.constant.int 0
    %44129 = torch.aten.cat %44128, %int0_41616 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44129, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44130 = torch.prim.ListConstruct %44038, %44110 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41617 = torch.constant.int 0
    %44131 = torch.aten.cat %44130, %int0_41617 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44131, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44132 = torch.prim.ListConstruct %44041, %44113 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41618 = torch.constant.int 0
    %44133 = torch.aten.cat %44132, %int0_41618 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44133, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44134 = torch.prim.ListConstruct %44044, %44116 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41619 = torch.constant.int 0
    %44135 = torch.aten.cat %44134, %int0_41619 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44135, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44136 = torch.prim.ListConstruct %44047, %44119 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41620 = torch.constant.int 0
    %44137 = torch.aten.cat %44136, %int0_41620 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44137, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44138 = torch.prim.ListConstruct %44050, %44122 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41621 = torch.constant.int 0
    %44139 = torch.aten.cat %44138, %int0_41621 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44139, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44140 = torch.prim.ListConstruct %44053, %44125 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_41622 = torch.constant.int 0
    %44141 = torch.aten.cat %44140, %int0_41622 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %44141, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %44142 = torch.prim.ListConstruct %44008, %44072 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41623 = torch.constant.int 0
    %44143 = torch.aten.cat %44142, %int0_41623 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44143, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44144 = torch.prim.ListConstruct %44011, %44075 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41624 = torch.constant.int 0
    %44145 = torch.aten.cat %44144, %int0_41624 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44145, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44146 = torch.prim.ListConstruct %44014, %44078 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41625 = torch.constant.int 0
    %44147 = torch.aten.cat %44146, %int0_41625 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44147, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44148 = torch.prim.ListConstruct %44017, %44081 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41626 = torch.constant.int 0
    %44149 = torch.aten.cat %44148, %int0_41626 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44149, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44150 = torch.prim.ListConstruct %44020, %44084 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41627 = torch.constant.int 0
    %44151 = torch.aten.cat %44150, %int0_41627 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44151, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44152 = torch.prim.ListConstruct %44023, %44087 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41628 = torch.constant.int 0
    %44153 = torch.aten.cat %44152, %int0_41628 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44153, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44154 = torch.prim.ListConstruct %44026, %44090 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41629 = torch.constant.int 0
    %44155 = torch.aten.cat %44154, %int0_41629 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44155, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44156 = torch.prim.ListConstruct %44029, %44093 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_41630 = torch.constant.int 0
    %44157 = torch.aten.cat %44156, %int0_41630 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44157, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41631 = torch.constant.int 32
    %int2_41632 = torch.constant.int 2
    %int16_41633 = torch.constant.int 16
    %int1_41634 = torch.constant.int 1
    %int128_41635 = torch.constant.int 128
    %44158 = torch.prim.ListConstruct %3023, %int32_41631, %int2_41632, %int16_41633, %int1_41634, %int128_41635 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44159 = torch.aten.view %42308, %44158 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44159, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41636 = torch.constant.int 32
    %44160 = torch.aten.mul.int %3023, %int32_41636 : !torch.int, !torch.int -> !torch.int
    %int2_41637 = torch.constant.int 2
    %44161 = torch.aten.mul.int %44160, %int2_41637 : !torch.int, !torch.int -> !torch.int
    %int16_41638 = torch.constant.int 16
    %int1_41639 = torch.constant.int 1
    %int128_41640 = torch.constant.int 128
    %44162 = torch.prim.ListConstruct %44161, %int16_41638, %int1_41639, %int128_41640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44163 = torch.aten.view %44159, %44162 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44163, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44164 = torch.prim.ListConstruct %44127 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41641 = torch.constant.bool false
    %44165 = torch.aten.index_put %44163, %44164, %44143, %false_41641 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44165, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41642 = torch.constant.int 32
    %int2_41643 = torch.constant.int 2
    %int16_41644 = torch.constant.int 16
    %int1_41645 = torch.constant.int 1
    %int128_41646 = torch.constant.int 128
    %44166 = torch.prim.ListConstruct %3023, %int32_41642, %int2_41643, %int16_41644, %int1_41645, %int128_41646 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44167 = torch.aten.view %44165, %44166 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44167, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41647 = torch.constant.int 131072
    %44168 = torch.prim.ListConstruct %3023, %int131072_41647 : (!torch.int, !torch.int) -> !torch.list<int>
    %44169 = torch.aten.view %44167, %44168 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44169, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41648 = torch.constant.int 32
    %int2_41649 = torch.constant.int 2
    %int16_41650 = torch.constant.int 16
    %int1_41651 = torch.constant.int 1
    %int128_41652 = torch.constant.int 128
    %44170 = torch.prim.ListConstruct %3026, %int32_41648, %int2_41649, %int16_41650, %int1_41651, %int128_41652 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44171 = torch.aten.view %42320, %44170 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44171, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41653 = torch.constant.int 32
    %44172 = torch.aten.mul.int %3026, %int32_41653 : !torch.int, !torch.int -> !torch.int
    %int2_41654 = torch.constant.int 2
    %44173 = torch.aten.mul.int %44172, %int2_41654 : !torch.int, !torch.int -> !torch.int
    %int16_41655 = torch.constant.int 16
    %int1_41656 = torch.constant.int 1
    %int128_41657 = torch.constant.int 128
    %44174 = torch.prim.ListConstruct %44173, %int16_41655, %int1_41656, %int128_41657 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44175 = torch.aten.view %44171, %44174 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44175, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44176 = torch.prim.ListConstruct %44129 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41658 = torch.constant.bool false
    %44177 = torch.aten.index_put %44175, %44176, %44145, %false_41658 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44177, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41659 = torch.constant.int 32
    %int2_41660 = torch.constant.int 2
    %int16_41661 = torch.constant.int 16
    %int1_41662 = torch.constant.int 1
    %int128_41663 = torch.constant.int 128
    %44178 = torch.prim.ListConstruct %3026, %int32_41659, %int2_41660, %int16_41661, %int1_41662, %int128_41663 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44179 = torch.aten.view %44177, %44178 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44179, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41664 = torch.constant.int 131072
    %44180 = torch.prim.ListConstruct %3026, %int131072_41664 : (!torch.int, !torch.int) -> !torch.list<int>
    %44181 = torch.aten.view %44179, %44180 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44181, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41665 = torch.constant.int 32
    %int2_41666 = torch.constant.int 2
    %int16_41667 = torch.constant.int 16
    %int1_41668 = torch.constant.int 1
    %int128_41669 = torch.constant.int 128
    %44182 = torch.prim.ListConstruct %3029, %int32_41665, %int2_41666, %int16_41667, %int1_41668, %int128_41669 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44183 = torch.aten.view %42332, %44182 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44183, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41670 = torch.constant.int 32
    %44184 = torch.aten.mul.int %3029, %int32_41670 : !torch.int, !torch.int -> !torch.int
    %int2_41671 = torch.constant.int 2
    %44185 = torch.aten.mul.int %44184, %int2_41671 : !torch.int, !torch.int -> !torch.int
    %int16_41672 = torch.constant.int 16
    %int1_41673 = torch.constant.int 1
    %int128_41674 = torch.constant.int 128
    %44186 = torch.prim.ListConstruct %44185, %int16_41672, %int1_41673, %int128_41674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44187 = torch.aten.view %44183, %44186 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44187, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44188 = torch.prim.ListConstruct %44131 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41675 = torch.constant.bool false
    %44189 = torch.aten.index_put %44187, %44188, %44147, %false_41675 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44189, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41676 = torch.constant.int 32
    %int2_41677 = torch.constant.int 2
    %int16_41678 = torch.constant.int 16
    %int1_41679 = torch.constant.int 1
    %int128_41680 = torch.constant.int 128
    %44190 = torch.prim.ListConstruct %3029, %int32_41676, %int2_41677, %int16_41678, %int1_41679, %int128_41680 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44191 = torch.aten.view %44189, %44190 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44191, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41681 = torch.constant.int 131072
    %44192 = torch.prim.ListConstruct %3029, %int131072_41681 : (!torch.int, !torch.int) -> !torch.list<int>
    %44193 = torch.aten.view %44191, %44192 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44193, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41682 = torch.constant.int 32
    %int2_41683 = torch.constant.int 2
    %int16_41684 = torch.constant.int 16
    %int1_41685 = torch.constant.int 1
    %int128_41686 = torch.constant.int 128
    %44194 = torch.prim.ListConstruct %3032, %int32_41682, %int2_41683, %int16_41684, %int1_41685, %int128_41686 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44195 = torch.aten.view %42344, %44194 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44195, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41687 = torch.constant.int 32
    %44196 = torch.aten.mul.int %3032, %int32_41687 : !torch.int, !torch.int -> !torch.int
    %int2_41688 = torch.constant.int 2
    %44197 = torch.aten.mul.int %44196, %int2_41688 : !torch.int, !torch.int -> !torch.int
    %int16_41689 = torch.constant.int 16
    %int1_41690 = torch.constant.int 1
    %int128_41691 = torch.constant.int 128
    %44198 = torch.prim.ListConstruct %44197, %int16_41689, %int1_41690, %int128_41691 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44199 = torch.aten.view %44195, %44198 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44199, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44200 = torch.prim.ListConstruct %44133 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41692 = torch.constant.bool false
    %44201 = torch.aten.index_put %44199, %44200, %44149, %false_41692 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44201, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41693 = torch.constant.int 32
    %int2_41694 = torch.constant.int 2
    %int16_41695 = torch.constant.int 16
    %int1_41696 = torch.constant.int 1
    %int128_41697 = torch.constant.int 128
    %44202 = torch.prim.ListConstruct %3032, %int32_41693, %int2_41694, %int16_41695, %int1_41696, %int128_41697 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44203 = torch.aten.view %44201, %44202 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44203, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41698 = torch.constant.int 131072
    %44204 = torch.prim.ListConstruct %3032, %int131072_41698 : (!torch.int, !torch.int) -> !torch.list<int>
    %44205 = torch.aten.view %44203, %44204 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44205, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41699 = torch.constant.int 32
    %int2_41700 = torch.constant.int 2
    %int16_41701 = torch.constant.int 16
    %int1_41702 = torch.constant.int 1
    %int128_41703 = torch.constant.int 128
    %44206 = torch.prim.ListConstruct %3035, %int32_41699, %int2_41700, %int16_41701, %int1_41702, %int128_41703 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44207 = torch.aten.view %42356, %44206 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44207, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41704 = torch.constant.int 32
    %44208 = torch.aten.mul.int %3035, %int32_41704 : !torch.int, !torch.int -> !torch.int
    %int2_41705 = torch.constant.int 2
    %44209 = torch.aten.mul.int %44208, %int2_41705 : !torch.int, !torch.int -> !torch.int
    %int16_41706 = torch.constant.int 16
    %int1_41707 = torch.constant.int 1
    %int128_41708 = torch.constant.int 128
    %44210 = torch.prim.ListConstruct %44209, %int16_41706, %int1_41707, %int128_41708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44211 = torch.aten.view %44207, %44210 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44211, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44212 = torch.prim.ListConstruct %44135 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41709 = torch.constant.bool false
    %44213 = torch.aten.index_put %44211, %44212, %44151, %false_41709 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44213, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41710 = torch.constant.int 32
    %int2_41711 = torch.constant.int 2
    %int16_41712 = torch.constant.int 16
    %int1_41713 = torch.constant.int 1
    %int128_41714 = torch.constant.int 128
    %44214 = torch.prim.ListConstruct %3035, %int32_41710, %int2_41711, %int16_41712, %int1_41713, %int128_41714 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44215 = torch.aten.view %44213, %44214 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44215, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41715 = torch.constant.int 131072
    %44216 = torch.prim.ListConstruct %3035, %int131072_41715 : (!torch.int, !torch.int) -> !torch.list<int>
    %44217 = torch.aten.view %44215, %44216 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44217, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41716 = torch.constant.int 32
    %int2_41717 = torch.constant.int 2
    %int16_41718 = torch.constant.int 16
    %int1_41719 = torch.constant.int 1
    %int128_41720 = torch.constant.int 128
    %44218 = torch.prim.ListConstruct %3038, %int32_41716, %int2_41717, %int16_41718, %int1_41719, %int128_41720 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44219 = torch.aten.view %42368, %44218 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44219, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41721 = torch.constant.int 32
    %44220 = torch.aten.mul.int %3038, %int32_41721 : !torch.int, !torch.int -> !torch.int
    %int2_41722 = torch.constant.int 2
    %44221 = torch.aten.mul.int %44220, %int2_41722 : !torch.int, !torch.int -> !torch.int
    %int16_41723 = torch.constant.int 16
    %int1_41724 = torch.constant.int 1
    %int128_41725 = torch.constant.int 128
    %44222 = torch.prim.ListConstruct %44221, %int16_41723, %int1_41724, %int128_41725 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44223 = torch.aten.view %44219, %44222 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44223, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44224 = torch.prim.ListConstruct %44137 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41726 = torch.constant.bool false
    %44225 = torch.aten.index_put %44223, %44224, %44153, %false_41726 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44225, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41727 = torch.constant.int 32
    %int2_41728 = torch.constant.int 2
    %int16_41729 = torch.constant.int 16
    %int1_41730 = torch.constant.int 1
    %int128_41731 = torch.constant.int 128
    %44226 = torch.prim.ListConstruct %3038, %int32_41727, %int2_41728, %int16_41729, %int1_41730, %int128_41731 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44227 = torch.aten.view %44225, %44226 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44227, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41732 = torch.constant.int 131072
    %44228 = torch.prim.ListConstruct %3038, %int131072_41732 : (!torch.int, !torch.int) -> !torch.list<int>
    %44229 = torch.aten.view %44227, %44228 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44229, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41733 = torch.constant.int 32
    %int2_41734 = torch.constant.int 2
    %int16_41735 = torch.constant.int 16
    %int1_41736 = torch.constant.int 1
    %int128_41737 = torch.constant.int 128
    %44230 = torch.prim.ListConstruct %3041, %int32_41733, %int2_41734, %int16_41735, %int1_41736, %int128_41737 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44231 = torch.aten.view %42380, %44230 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44231, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41738 = torch.constant.int 32
    %44232 = torch.aten.mul.int %3041, %int32_41738 : !torch.int, !torch.int -> !torch.int
    %int2_41739 = torch.constant.int 2
    %44233 = torch.aten.mul.int %44232, %int2_41739 : !torch.int, !torch.int -> !torch.int
    %int16_41740 = torch.constant.int 16
    %int1_41741 = torch.constant.int 1
    %int128_41742 = torch.constant.int 128
    %44234 = torch.prim.ListConstruct %44233, %int16_41740, %int1_41741, %int128_41742 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44235 = torch.aten.view %44231, %44234 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44235, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44236 = torch.prim.ListConstruct %44139 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41743 = torch.constant.bool false
    %44237 = torch.aten.index_put %44235, %44236, %44155, %false_41743 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44237, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41744 = torch.constant.int 32
    %int2_41745 = torch.constant.int 2
    %int16_41746 = torch.constant.int 16
    %int1_41747 = torch.constant.int 1
    %int128_41748 = torch.constant.int 128
    %44238 = torch.prim.ListConstruct %3041, %int32_41744, %int2_41745, %int16_41746, %int1_41747, %int128_41748 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44239 = torch.aten.view %44237, %44238 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44239, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41749 = torch.constant.int 131072
    %44240 = torch.prim.ListConstruct %3041, %int131072_41749 : (!torch.int, !torch.int) -> !torch.list<int>
    %44241 = torch.aten.view %44239, %44240 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44241, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_41750 = torch.constant.int 32
    %int2_41751 = torch.constant.int 2
    %int16_41752 = torch.constant.int 16
    %int1_41753 = torch.constant.int 1
    %int128_41754 = torch.constant.int 128
    %44242 = torch.prim.ListConstruct %3044, %int32_41750, %int2_41751, %int16_41752, %int1_41753, %int128_41754 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44243 = torch.aten.view %42392, %44242 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44243, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_41755 = torch.constant.int 32
    %44244 = torch.aten.mul.int %3044, %int32_41755 : !torch.int, !torch.int -> !torch.int
    %int2_41756 = torch.constant.int 2
    %44245 = torch.aten.mul.int %44244, %int2_41756 : !torch.int, !torch.int -> !torch.int
    %int16_41757 = torch.constant.int 16
    %int1_41758 = torch.constant.int 1
    %int128_41759 = torch.constant.int 128
    %44246 = torch.prim.ListConstruct %44245, %int16_41757, %int1_41758, %int128_41759 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44247 = torch.aten.view %44243, %44246 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44247, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %44248 = torch.prim.ListConstruct %44141 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_41760 = torch.constant.bool false
    %44249 = torch.aten.index_put %44247, %44248, %44157, %false_41760 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %44249, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_41761 = torch.constant.int 32
    %int2_41762 = torch.constant.int 2
    %int16_41763 = torch.constant.int 16
    %int1_41764 = torch.constant.int 1
    %int128_41765 = torch.constant.int 128
    %44250 = torch.prim.ListConstruct %3044, %int32_41761, %int2_41762, %int16_41763, %int1_41764, %int128_41765 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44251 = torch.aten.view %44249, %44250 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %44251, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_41766 = torch.constant.int 131072
    %44252 = torch.prim.ListConstruct %3044, %int131072_41766 : (!torch.int, !torch.int) -> !torch.list<int>
    %44253 = torch.aten.view %44251, %44252 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %44253, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_41767 = torch.constant.int -2
    %44254 = torch.aten.unsqueeze %43868, %int-2_41767 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41768 = torch.constant.int -2
    %44255 = torch.aten.unsqueeze %43883, %int-2_41768 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41769 = torch.constant.int -2
    %44256 = torch.aten.unsqueeze %43898, %int-2_41769 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41770 = torch.constant.int -2
    %44257 = torch.aten.unsqueeze %43913, %int-2_41770 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41771 = torch.constant.int -2
    %44258 = torch.aten.unsqueeze %43928, %int-2_41771 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41772 = torch.constant.int -2
    %44259 = torch.aten.unsqueeze %43943, %int-2_41772 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41773 = torch.constant.int -2
    %44260 = torch.aten.unsqueeze %43958, %int-2_41773 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41774 = torch.constant.int -2
    %44261 = torch.aten.unsqueeze %43973, %int-2_41774 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_41775 = torch.constant.int 4
    %int1_41776 = torch.constant.int 1
    %int4_41777 = torch.constant.int 4
    %int128_41778 = torch.constant.int 128
    %44262 = torch.prim.ListConstruct %int4_41775, %43854, %int1_41776, %int4_41777, %int128_41778 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41779 = torch.constant.bool false
    %44263 = torch.aten.expand %44254, %44262, %false_41779 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41780 = torch.constant.int 4
    %int1_41781 = torch.constant.int 1
    %int4_41782 = torch.constant.int 4
    %int128_41783 = torch.constant.int 128
    %44264 = torch.prim.ListConstruct %int4_41780, %43854, %int1_41781, %int4_41782, %int128_41783 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41784 = torch.constant.bool false
    %44265 = torch.aten.expand %44255, %44264, %false_41784 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41785 = torch.constant.int 4
    %int1_41786 = torch.constant.int 1
    %int4_41787 = torch.constant.int 4
    %int128_41788 = torch.constant.int 128
    %44266 = torch.prim.ListConstruct %int4_41785, %43854, %int1_41786, %int4_41787, %int128_41788 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41789 = torch.constant.bool false
    %44267 = torch.aten.expand %44256, %44266, %false_41789 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41790 = torch.constant.int 4
    %int1_41791 = torch.constant.int 1
    %int4_41792 = torch.constant.int 4
    %int128_41793 = torch.constant.int 128
    %44268 = torch.prim.ListConstruct %int4_41790, %43854, %int1_41791, %int4_41792, %int128_41793 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41794 = torch.constant.bool false
    %44269 = torch.aten.expand %44257, %44268, %false_41794 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41795 = torch.constant.int 4
    %int1_41796 = torch.constant.int 1
    %int4_41797 = torch.constant.int 4
    %int128_41798 = torch.constant.int 128
    %44270 = torch.prim.ListConstruct %int4_41795, %43854, %int1_41796, %int4_41797, %int128_41798 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41799 = torch.constant.bool false
    %44271 = torch.aten.expand %44258, %44270, %false_41799 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41800 = torch.constant.int 4
    %int1_41801 = torch.constant.int 1
    %int4_41802 = torch.constant.int 4
    %int128_41803 = torch.constant.int 128
    %44272 = torch.prim.ListConstruct %int4_41800, %43854, %int1_41801, %int4_41802, %int128_41803 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41804 = torch.constant.bool false
    %44273 = torch.aten.expand %44259, %44272, %false_41804 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41805 = torch.constant.int 4
    %int1_41806 = torch.constant.int 1
    %int4_41807 = torch.constant.int 4
    %int128_41808 = torch.constant.int 128
    %44274 = torch.prim.ListConstruct %int4_41805, %43854, %int1_41806, %int4_41807, %int128_41808 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41809 = torch.constant.bool false
    %44275 = torch.aten.expand %44260, %44274, %false_41809 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41810 = torch.constant.int 4
    %int1_41811 = torch.constant.int 1
    %int4_41812 = torch.constant.int 4
    %int128_41813 = torch.constant.int 128
    %44276 = torch.prim.ListConstruct %int4_41810, %43854, %int1_41811, %int4_41812, %int128_41813 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41814 = torch.constant.bool false
    %44277 = torch.aten.expand %44261, %44276, %false_41814 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41815 = torch.constant.int 4
    %int4_41816 = torch.constant.int 4
    %int128_41817 = torch.constant.int 128
    %44278 = torch.prim.ListConstruct %int4_41815, %43854, %int4_41816, %int128_41817 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44279 = torch.aten.view %44263, %44278 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41818 = torch.constant.int 4
    %int4_41819 = torch.constant.int 4
    %int128_41820 = torch.constant.int 128
    %44280 = torch.prim.ListConstruct %int4_41818, %43854, %int4_41819, %int128_41820 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44281 = torch.aten.view %44265, %44280 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41821 = torch.constant.int 4
    %int4_41822 = torch.constant.int 4
    %int128_41823 = torch.constant.int 128
    %44282 = torch.prim.ListConstruct %int4_41821, %43854, %int4_41822, %int128_41823 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44283 = torch.aten.view %44267, %44282 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41824 = torch.constant.int 4
    %int4_41825 = torch.constant.int 4
    %int128_41826 = torch.constant.int 128
    %44284 = torch.prim.ListConstruct %int4_41824, %43854, %int4_41825, %int128_41826 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44285 = torch.aten.view %44269, %44284 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41827 = torch.constant.int 4
    %int4_41828 = torch.constant.int 4
    %int128_41829 = torch.constant.int 128
    %44286 = torch.prim.ListConstruct %int4_41827, %43854, %int4_41828, %int128_41829 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44287 = torch.aten.view %44271, %44286 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41830 = torch.constant.int 4
    %int4_41831 = torch.constant.int 4
    %int128_41832 = torch.constant.int 128
    %44288 = torch.prim.ListConstruct %int4_41830, %43854, %int4_41831, %int128_41832 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44289 = torch.aten.view %44273, %44288 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41833 = torch.constant.int 4
    %int4_41834 = torch.constant.int 4
    %int128_41835 = torch.constant.int 128
    %44290 = torch.prim.ListConstruct %int4_41833, %43854, %int4_41834, %int128_41835 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44291 = torch.aten.view %44275, %44290 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41836 = torch.constant.int 4
    %int4_41837 = torch.constant.int 4
    %int128_41838 = torch.constant.int 128
    %44292 = torch.prim.ListConstruct %int4_41836, %43854, %int4_41837, %int128_41838 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44293 = torch.aten.view %44277, %44292 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_41839 = torch.constant.int -2
    %44294 = torch.aten.unsqueeze %43643, %int-2_41839 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41840 = torch.constant.int -2
    %44295 = torch.aten.unsqueeze %43645, %int-2_41840 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41841 = torch.constant.int -2
    %44296 = torch.aten.unsqueeze %43647, %int-2_41841 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41842 = torch.constant.int -2
    %44297 = torch.aten.unsqueeze %43649, %int-2_41842 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41843 = torch.constant.int -2
    %44298 = torch.aten.unsqueeze %43651, %int-2_41843 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41844 = torch.constant.int -2
    %44299 = torch.aten.unsqueeze %43653, %int-2_41844 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41845 = torch.constant.int -2
    %44300 = torch.aten.unsqueeze %43655, %int-2_41845 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_41846 = torch.constant.int -2
    %44301 = torch.aten.unsqueeze %43657, %int-2_41846 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %44301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_41847 = torch.constant.int 1
    %44302 = torch.aten.size.int %43567, %int1_41847 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_41848 = torch.constant.int 4
    %int1_41849 = torch.constant.int 1
    %int4_41850 = torch.constant.int 4
    %int128_41851 = torch.constant.int 128
    %44303 = torch.prim.ListConstruct %int4_41848, %44302, %int1_41849, %int4_41850, %int128_41851 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41852 = torch.constant.bool false
    %44304 = torch.aten.expand %44294, %44303, %false_41852 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41853 = torch.constant.int 4
    %int1_41854 = torch.constant.int 1
    %int4_41855 = torch.constant.int 4
    %int128_41856 = torch.constant.int 128
    %44305 = torch.prim.ListConstruct %int4_41853, %44302, %int1_41854, %int4_41855, %int128_41856 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41857 = torch.constant.bool false
    %44306 = torch.aten.expand %44295, %44305, %false_41857 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41858 = torch.constant.int 4
    %int1_41859 = torch.constant.int 1
    %int4_41860 = torch.constant.int 4
    %int128_41861 = torch.constant.int 128
    %44307 = torch.prim.ListConstruct %int4_41858, %44302, %int1_41859, %int4_41860, %int128_41861 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41862 = torch.constant.bool false
    %44308 = torch.aten.expand %44296, %44307, %false_41862 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41863 = torch.constant.int 4
    %int1_41864 = torch.constant.int 1
    %int4_41865 = torch.constant.int 4
    %int128_41866 = torch.constant.int 128
    %44309 = torch.prim.ListConstruct %int4_41863, %44302, %int1_41864, %int4_41865, %int128_41866 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41867 = torch.constant.bool false
    %44310 = torch.aten.expand %44297, %44309, %false_41867 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41868 = torch.constant.int 4
    %int1_41869 = torch.constant.int 1
    %int4_41870 = torch.constant.int 4
    %int128_41871 = torch.constant.int 128
    %44311 = torch.prim.ListConstruct %int4_41868, %44302, %int1_41869, %int4_41870, %int128_41871 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41872 = torch.constant.bool false
    %44312 = torch.aten.expand %44298, %44311, %false_41872 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41873 = torch.constant.int 4
    %int1_41874 = torch.constant.int 1
    %int4_41875 = torch.constant.int 4
    %int128_41876 = torch.constant.int 128
    %44313 = torch.prim.ListConstruct %int4_41873, %44302, %int1_41874, %int4_41875, %int128_41876 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41877 = torch.constant.bool false
    %44314 = torch.aten.expand %44299, %44313, %false_41877 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41878 = torch.constant.int 4
    %int1_41879 = torch.constant.int 1
    %int4_41880 = torch.constant.int 4
    %int128_41881 = torch.constant.int 128
    %44315 = torch.prim.ListConstruct %int4_41878, %44302, %int1_41879, %int4_41880, %int128_41881 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41882 = torch.constant.bool false
    %44316 = torch.aten.expand %44300, %44315, %false_41882 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41883 = torch.constant.int 4
    %int1_41884 = torch.constant.int 1
    %int4_41885 = torch.constant.int 4
    %int128_41886 = torch.constant.int 128
    %44317 = torch.prim.ListConstruct %int4_41883, %44302, %int1_41884, %int4_41885, %int128_41886 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_41887 = torch.constant.bool false
    %44318 = torch.aten.expand %44301, %44317, %false_41887 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %44318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_41888 = torch.constant.int 4
    %int4_41889 = torch.constant.int 4
    %int128_41890 = torch.constant.int 128
    %44319 = torch.prim.ListConstruct %int4_41888, %44302, %int4_41889, %int128_41890 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44320 = torch.aten.view %44304, %44319 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41891 = torch.constant.int 4
    %int4_41892 = torch.constant.int 4
    %int128_41893 = torch.constant.int 128
    %44321 = torch.prim.ListConstruct %int4_41891, %44302, %int4_41892, %int128_41893 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44322 = torch.aten.view %44306, %44321 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41894 = torch.constant.int 4
    %int4_41895 = torch.constant.int 4
    %int128_41896 = torch.constant.int 128
    %44323 = torch.prim.ListConstruct %int4_41894, %44302, %int4_41895, %int128_41896 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44324 = torch.aten.view %44308, %44323 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41897 = torch.constant.int 4
    %int4_41898 = torch.constant.int 4
    %int128_41899 = torch.constant.int 128
    %44325 = torch.prim.ListConstruct %int4_41897, %44302, %int4_41898, %int128_41899 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44326 = torch.aten.view %44310, %44325 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41900 = torch.constant.int 4
    %int4_41901 = torch.constant.int 4
    %int128_41902 = torch.constant.int 128
    %44327 = torch.prim.ListConstruct %int4_41900, %44302, %int4_41901, %int128_41902 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44328 = torch.aten.view %44312, %44327 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41903 = torch.constant.int 4
    %int4_41904 = torch.constant.int 4
    %int128_41905 = torch.constant.int 128
    %44329 = torch.prim.ListConstruct %int4_41903, %44302, %int4_41904, %int128_41905 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44330 = torch.aten.view %44314, %44329 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41906 = torch.constant.int 4
    %int4_41907 = torch.constant.int 4
    %int128_41908 = torch.constant.int 128
    %44331 = torch.prim.ListConstruct %int4_41906, %44302, %int4_41907, %int128_41908 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44332 = torch.aten.view %44316, %44331 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_41909 = torch.constant.int 4
    %int4_41910 = torch.constant.int 4
    %int128_41911 = torch.constant.int 128
    %44333 = torch.prim.ListConstruct %int4_41909, %44302, %int4_41910, %int128_41911 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44334 = torch.aten.view %44318, %44333 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41912 = torch.constant.int 1
    %int2_41913 = torch.constant.int 2
    %44335 = torch.aten.transpose.int %43710, %int1_41912, %int2_41913 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44335, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41914 = torch.constant.int 1
    %int2_41915 = torch.constant.int 2
    %44336 = torch.aten.transpose.int %43725, %int1_41914, %int2_41915 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44336, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41916 = torch.constant.int 1
    %int2_41917 = torch.constant.int 2
    %44337 = torch.aten.transpose.int %43740, %int1_41916, %int2_41917 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44337, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41918 = torch.constant.int 1
    %int2_41919 = torch.constant.int 2
    %44338 = torch.aten.transpose.int %43755, %int1_41918, %int2_41919 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44338, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41920 = torch.constant.int 1
    %int2_41921 = torch.constant.int 2
    %44339 = torch.aten.transpose.int %43770, %int1_41920, %int2_41921 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44339, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41922 = torch.constant.int 1
    %int2_41923 = torch.constant.int 2
    %44340 = torch.aten.transpose.int %43785, %int1_41922, %int2_41923 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44340, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41924 = torch.constant.int 1
    %int2_41925 = torch.constant.int 2
    %44341 = torch.aten.transpose.int %43800, %int1_41924, %int2_41925 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44341, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41926 = torch.constant.int 1
    %int2_41927 = torch.constant.int 2
    %44342 = torch.aten.transpose.int %43815, %int1_41926, %int2_41927 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44342, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41928 = torch.constant.int 1
    %int2_41929 = torch.constant.int 2
    %44343 = torch.aten.transpose.int %44279, %int1_41928, %int2_41929 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44343, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41930 = torch.constant.int 1
    %int2_41931 = torch.constant.int 2
    %44344 = torch.aten.transpose.int %44281, %int1_41930, %int2_41931 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44344, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41932 = torch.constant.int 1
    %int2_41933 = torch.constant.int 2
    %44345 = torch.aten.transpose.int %44283, %int1_41932, %int2_41933 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44345, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41934 = torch.constant.int 1
    %int2_41935 = torch.constant.int 2
    %44346 = torch.aten.transpose.int %44285, %int1_41934, %int2_41935 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44346, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41936 = torch.constant.int 1
    %int2_41937 = torch.constant.int 2
    %44347 = torch.aten.transpose.int %44287, %int1_41936, %int2_41937 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44347, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41938 = torch.constant.int 1
    %int2_41939 = torch.constant.int 2
    %44348 = torch.aten.transpose.int %44289, %int1_41938, %int2_41939 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44348, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41940 = torch.constant.int 1
    %int2_41941 = torch.constant.int 2
    %44349 = torch.aten.transpose.int %44291, %int1_41940, %int2_41941 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44349, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41942 = torch.constant.int 1
    %int2_41943 = torch.constant.int 2
    %44350 = torch.aten.transpose.int %44293, %int1_41942, %int2_41943 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44350, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41944 = torch.constant.int 1
    %int2_41945 = torch.constant.int 2
    %44351 = torch.aten.transpose.int %44320, %int1_41944, %int2_41945 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44351, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41946 = torch.constant.int 1
    %int2_41947 = torch.constant.int 2
    %44352 = torch.aten.transpose.int %44322, %int1_41946, %int2_41947 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44352, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41948 = torch.constant.int 1
    %int2_41949 = torch.constant.int 2
    %44353 = torch.aten.transpose.int %44324, %int1_41948, %int2_41949 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44353, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41950 = torch.constant.int 1
    %int2_41951 = torch.constant.int 2
    %44354 = torch.aten.transpose.int %44326, %int1_41950, %int2_41951 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44354, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41952 = torch.constant.int 1
    %int2_41953 = torch.constant.int 2
    %44355 = torch.aten.transpose.int %44328, %int1_41952, %int2_41953 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44355, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41954 = torch.constant.int 1
    %int2_41955 = torch.constant.int 2
    %44356 = torch.aten.transpose.int %44330, %int1_41954, %int2_41955 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44356, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41956 = torch.constant.int 1
    %int2_41957 = torch.constant.int 2
    %44357 = torch.aten.transpose.int %44332, %int1_41956, %int2_41957 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44357, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41958 = torch.constant.int 1
    %int2_41959 = torch.constant.int 2
    %44358 = torch.aten.transpose.int %44334, %int1_41958, %int2_41959 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %44358, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41960 = torch.constant.float 0.000000e+00
    %true_41961 = torch.constant.bool true
    %none_41962 = torch.constant.none
    %none_41963 = torch.constant.none
    %44359:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44335, %44343, %44351, %float0.000000e00_41960, %true_41961, %none_41962, %none_41963) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44359#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41964 = torch.constant.float 0.000000e+00
    %true_41965 = torch.constant.bool true
    %none_41966 = torch.constant.none
    %none_41967 = torch.constant.none
    %44360:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44336, %44344, %44352, %float0.000000e00_41964, %true_41965, %none_41966, %none_41967) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44360#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41968 = torch.constant.float 0.000000e+00
    %true_41969 = torch.constant.bool true
    %none_41970 = torch.constant.none
    %none_41971 = torch.constant.none
    %44361:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44337, %44345, %44353, %float0.000000e00_41968, %true_41969, %none_41970, %none_41971) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44361#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41972 = torch.constant.float 0.000000e+00
    %true_41973 = torch.constant.bool true
    %none_41974 = torch.constant.none
    %none_41975 = torch.constant.none
    %44362:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44338, %44346, %44354, %float0.000000e00_41972, %true_41973, %none_41974, %none_41975) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44362#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41976 = torch.constant.float 0.000000e+00
    %true_41977 = torch.constant.bool true
    %none_41978 = torch.constant.none
    %none_41979 = torch.constant.none
    %44363:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44339, %44347, %44355, %float0.000000e00_41976, %true_41977, %none_41978, %none_41979) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44363#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41980 = torch.constant.float 0.000000e+00
    %true_41981 = torch.constant.bool true
    %none_41982 = torch.constant.none
    %none_41983 = torch.constant.none
    %44364:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44340, %44348, %44356, %float0.000000e00_41980, %true_41981, %none_41982, %none_41983) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44364#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41984 = torch.constant.float 0.000000e+00
    %true_41985 = torch.constant.bool true
    %none_41986 = torch.constant.none
    %none_41987 = torch.constant.none
    %44365:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44341, %44349, %44357, %float0.000000e00_41984, %true_41985, %none_41986, %none_41987) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44365#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_41988 = torch.constant.float 0.000000e+00
    %true_41989 = torch.constant.bool true
    %none_41990 = torch.constant.none
    %none_41991 = torch.constant.none
    %44366:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%44342, %44350, %44358, %float0.000000e00_41988, %true_41989, %none_41990, %none_41991) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %44366#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_41992 = torch.constant.int 1
    %int2_41993 = torch.constant.int 2
    %44367 = torch.aten.transpose.int %44359#0, %int1_41992, %int2_41993 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41994 = torch.constant.int 1
    %int2_41995 = torch.constant.int 2
    %44368 = torch.aten.transpose.int %44360#0, %int1_41994, %int2_41995 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41996 = torch.constant.int 1
    %int2_41997 = torch.constant.int 2
    %44369 = torch.aten.transpose.int %44361#0, %int1_41996, %int2_41997 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_41998 = torch.constant.int 1
    %int2_41999 = torch.constant.int 2
    %44370 = torch.aten.transpose.int %44362#0, %int1_41998, %int2_41999 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42000 = torch.constant.int 1
    %int2_42001 = torch.constant.int 2
    %44371 = torch.aten.transpose.int %44363#0, %int1_42000, %int2_42001 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42002 = torch.constant.int 1
    %int2_42003 = torch.constant.int 2
    %44372 = torch.aten.transpose.int %44364#0, %int1_42002, %int2_42003 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42004 = torch.constant.int 1
    %int2_42005 = torch.constant.int 2
    %44373 = torch.aten.transpose.int %44365#0, %int1_42004, %int2_42005 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42006 = torch.constant.int 1
    %int2_42007 = torch.constant.int 2
    %44374 = torch.aten.transpose.int %44366#0, %int1_42006, %int2_42007 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %44374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42008 = torch.constant.int 4
    %int512_42009 = torch.constant.int 512
    %44375 = torch.prim.ListConstruct %int4_42008, %43696, %int512_42009 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44376 = torch.aten.view %44367, %44375 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42010 = torch.constant.int 4
    %int512_42011 = torch.constant.int 512
    %44377 = torch.prim.ListConstruct %int4_42010, %43711, %int512_42011 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44378 = torch.aten.view %44368, %44377 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42012 = torch.constant.int 4
    %int512_42013 = torch.constant.int 512
    %44379 = torch.prim.ListConstruct %int4_42012, %43726, %int512_42013 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44380 = torch.aten.view %44369, %44379 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42014 = torch.constant.int 4
    %int512_42015 = torch.constant.int 512
    %44381 = torch.prim.ListConstruct %int4_42014, %43741, %int512_42015 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44382 = torch.aten.view %44370, %44381 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42016 = torch.constant.int 4
    %int512_42017 = torch.constant.int 512
    %44383 = torch.prim.ListConstruct %int4_42016, %43756, %int512_42017 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44384 = torch.aten.view %44371, %44383 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42018 = torch.constant.int 4
    %int512_42019 = torch.constant.int 512
    %44385 = torch.prim.ListConstruct %int4_42018, %43771, %int512_42019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44386 = torch.aten.view %44372, %44385 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42020 = torch.constant.int 4
    %int512_42021 = torch.constant.int 512
    %44387 = torch.prim.ListConstruct %int4_42020, %43786, %int512_42021 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44388 = torch.aten.view %44373, %44387 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42022 = torch.constant.int 4
    %int512_42023 = torch.constant.int 512
    %44389 = torch.prim.ListConstruct %int4_42022, %43801, %int512_42023 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44390 = torch.aten.view %44374, %44389 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %44390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_42024 = torch.constant.int 1
    %int0_42025 = torch.constant.int 0
    %44391 = torch.prim.ListConstruct %int1_42024, %int0_42025 : (!torch.int, !torch.int) -> !torch.list<int>
    %44392 = torch.aten.permute %1624, %44391 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42026 = torch.constant.int 1
    %int0_42027 = torch.constant.int 0
    %44393 = torch.prim.ListConstruct %int1_42026, %int0_42027 : (!torch.int, !torch.int) -> !torch.list<int>
    %44394 = torch.aten.permute %1625, %44393 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42028 = torch.constant.int 1
    %int0_42029 = torch.constant.int 0
    %44395 = torch.prim.ListConstruct %int1_42028, %int0_42029 : (!torch.int, !torch.int) -> !torch.list<int>
    %44396 = torch.aten.permute %1626, %44395 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42030 = torch.constant.int 1
    %int0_42031 = torch.constant.int 0
    %44397 = torch.prim.ListConstruct %int1_42030, %int0_42031 : (!torch.int, !torch.int) -> !torch.list<int>
    %44398 = torch.aten.permute %1627, %44397 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42032 = torch.constant.int 1
    %int0_42033 = torch.constant.int 0
    %44399 = torch.prim.ListConstruct %int1_42032, %int0_42033 : (!torch.int, !torch.int) -> !torch.list<int>
    %44400 = torch.aten.permute %1628, %44399 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42034 = torch.constant.int 1
    %int0_42035 = torch.constant.int 0
    %44401 = torch.prim.ListConstruct %int1_42034, %int0_42035 : (!torch.int, !torch.int) -> !torch.list<int>
    %44402 = torch.aten.permute %1629, %44401 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42036 = torch.constant.int 1
    %int0_42037 = torch.constant.int 0
    %44403 = torch.prim.ListConstruct %int1_42036, %int0_42037 : (!torch.int, !torch.int) -> !torch.list<int>
    %44404 = torch.aten.permute %1630, %44403 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_42038 = torch.constant.int 1
    %int0_42039 = torch.constant.int 0
    %44405 = torch.prim.ListConstruct %int1_42038, %int0_42039 : (!torch.int, !torch.int) -> !torch.list<int>
    %44406 = torch.aten.permute %1631, %44405 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_42040 = torch.constant.int 4
    %44407 = torch.aten.mul.int %int4_42040, %43696 : !torch.int, !torch.int -> !torch.int
    %int512_42041 = torch.constant.int 512
    %44408 = torch.prim.ListConstruct %44407, %int512_42041 : (!torch.int, !torch.int) -> !torch.list<int>
    %44409 = torch.aten.view %44376, %44408 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44409, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44410 = torch.aten.mm %44409, %44392 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44410, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42042 = torch.constant.int 4
    %int4096_42043 = torch.constant.int 4096
    %44411 = torch.prim.ListConstruct %int4_42042, %43696, %int4096_42043 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44412 = torch.aten.view %44410, %44411 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42044 = torch.constant.int 4
    %44413 = torch.aten.mul.int %int4_42044, %43711 : !torch.int, !torch.int -> !torch.int
    %int512_42045 = torch.constant.int 512
    %44414 = torch.prim.ListConstruct %44413, %int512_42045 : (!torch.int, !torch.int) -> !torch.list<int>
    %44415 = torch.aten.view %44378, %44414 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44415, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44416 = torch.aten.mm %44415, %44394 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44416, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42046 = torch.constant.int 4
    %int4096_42047 = torch.constant.int 4096
    %44417 = torch.prim.ListConstruct %int4_42046, %43711, %int4096_42047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44418 = torch.aten.view %44416, %44417 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42048 = torch.constant.int 4
    %44419 = torch.aten.mul.int %int4_42048, %43726 : !torch.int, !torch.int -> !torch.int
    %int512_42049 = torch.constant.int 512
    %44420 = torch.prim.ListConstruct %44419, %int512_42049 : (!torch.int, !torch.int) -> !torch.list<int>
    %44421 = torch.aten.view %44380, %44420 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44421, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44422 = torch.aten.mm %44421, %44396 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44422, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42050 = torch.constant.int 4
    %int4096_42051 = torch.constant.int 4096
    %44423 = torch.prim.ListConstruct %int4_42050, %43726, %int4096_42051 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44424 = torch.aten.view %44422, %44423 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42052 = torch.constant.int 4
    %44425 = torch.aten.mul.int %int4_42052, %43741 : !torch.int, !torch.int -> !torch.int
    %int512_42053 = torch.constant.int 512
    %44426 = torch.prim.ListConstruct %44425, %int512_42053 : (!torch.int, !torch.int) -> !torch.list<int>
    %44427 = torch.aten.view %44382, %44426 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44427, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44428 = torch.aten.mm %44427, %44398 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44428, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42054 = torch.constant.int 4
    %int4096_42055 = torch.constant.int 4096
    %44429 = torch.prim.ListConstruct %int4_42054, %43741, %int4096_42055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44430 = torch.aten.view %44428, %44429 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42056 = torch.constant.int 4
    %44431 = torch.aten.mul.int %int4_42056, %43756 : !torch.int, !torch.int -> !torch.int
    %int512_42057 = torch.constant.int 512
    %44432 = torch.prim.ListConstruct %44431, %int512_42057 : (!torch.int, !torch.int) -> !torch.list<int>
    %44433 = torch.aten.view %44384, %44432 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44433, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44434 = torch.aten.mm %44433, %44400 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44434, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42058 = torch.constant.int 4
    %int4096_42059 = torch.constant.int 4096
    %44435 = torch.prim.ListConstruct %int4_42058, %43756, %int4096_42059 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44436 = torch.aten.view %44434, %44435 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42060 = torch.constant.int 4
    %44437 = torch.aten.mul.int %int4_42060, %43771 : !torch.int, !torch.int -> !torch.int
    %int512_42061 = torch.constant.int 512
    %44438 = torch.prim.ListConstruct %44437, %int512_42061 : (!torch.int, !torch.int) -> !torch.list<int>
    %44439 = torch.aten.view %44386, %44438 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44439, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44440 = torch.aten.mm %44439, %44402 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44440, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42062 = torch.constant.int 4
    %int4096_42063 = torch.constant.int 4096
    %44441 = torch.prim.ListConstruct %int4_42062, %43771, %int4096_42063 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44442 = torch.aten.view %44440, %44441 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42064 = torch.constant.int 4
    %44443 = torch.aten.mul.int %int4_42064, %43786 : !torch.int, !torch.int -> !torch.int
    %int512_42065 = torch.constant.int 512
    %44444 = torch.prim.ListConstruct %44443, %int512_42065 : (!torch.int, !torch.int) -> !torch.list<int>
    %44445 = torch.aten.view %44388, %44444 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44445, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44446 = torch.aten.mm %44445, %44404 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44446, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42066 = torch.constant.int 4
    %int4096_42067 = torch.constant.int 4096
    %44447 = torch.prim.ListConstruct %int4_42066, %43786, %int4096_42067 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44448 = torch.aten.view %44446, %44447 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_42068 = torch.constant.int 4
    %44449 = torch.aten.mul.int %int4_42068, %43801 : !torch.int, !torch.int -> !torch.int
    %int512_42069 = torch.constant.int 512
    %44450 = torch.prim.ListConstruct %44449, %int512_42069 : (!torch.int, !torch.int) -> !torch.list<int>
    %44451 = torch.aten.view %44390, %44450 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %44451, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %44452 = torch.aten.mm %44451, %44406 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44452, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42070 = torch.constant.int 4
    %int4096_42071 = torch.constant.int 4096
    %44453 = torch.prim.ListConstruct %int4_42070, %43801, %int4096_42071 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44454 = torch.aten.view %44452, %44453 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44455 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42072 = arith.constant 1 : index
    %dim_42073 = tensor.dim %44455, %c1_42072 : tensor<4x?x4096xf16>
    %44456 = flow.tensor.transfer %44455 : tensor<4x?x4096xf16>{%dim_42073} to #hal.device.promise<@__device_0>
    %44457 = torch_c.from_builtin_tensor %44456 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44458 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42074 = arith.constant 1 : index
    %dim_42075 = tensor.dim %44458, %c1_42074 : tensor<4x?x4096xf16>
    %44459 = flow.tensor.transfer %44458 : tensor<4x?x4096xf16>{%dim_42075} to #hal.device.promise<@__device_0>
    %44460 = torch_c.from_builtin_tensor %44459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44461 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42076 = arith.constant 1 : index
    %dim_42077 = tensor.dim %44461, %c1_42076 : tensor<4x?x4096xf16>
    %44462 = flow.tensor.transfer %44461 : tensor<4x?x4096xf16>{%dim_42077} to #hal.device.promise<@__device_0>
    %44463 = torch_c.from_builtin_tensor %44462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44464 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42078 = arith.constant 1 : index
    %dim_42079 = tensor.dim %44464, %c1_42078 : tensor<4x?x4096xf16>
    %44465 = flow.tensor.transfer %44464 : tensor<4x?x4096xf16>{%dim_42079} to #hal.device.promise<@__device_0>
    %44466 = torch_c.from_builtin_tensor %44465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44467 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42080 = arith.constant 1 : index
    %dim_42081 = tensor.dim %44467, %c1_42080 : tensor<4x?x4096xf16>
    %44468 = flow.tensor.transfer %44467 : tensor<4x?x4096xf16>{%dim_42081} to #hal.device.promise<@__device_0>
    %44469 = torch_c.from_builtin_tensor %44468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44470 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42082 = arith.constant 1 : index
    %dim_42083 = tensor.dim %44470, %c1_42082 : tensor<4x?x4096xf16>
    %44471 = flow.tensor.transfer %44470 : tensor<4x?x4096xf16>{%dim_42083} to #hal.device.promise<@__device_0>
    %44472 = torch_c.from_builtin_tensor %44471 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44473 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42084 = arith.constant 1 : index
    %dim_42085 = tensor.dim %44473, %c1_42084 : tensor<4x?x4096xf16>
    %44474 = flow.tensor.transfer %44473 : tensor<4x?x4096xf16>{%dim_42085} to #hal.device.promise<@__device_0>
    %44475 = torch_c.from_builtin_tensor %44474 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42086 = torch.constant.int 1
    %44476 = torch.aten.add.Tensor %44412, %44457, %int1_42086 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42087 = torch.constant.int 1
    %44477 = torch.aten.add.Tensor %44476, %44460, %int1_42087 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42088 = torch.constant.int 1
    %44478 = torch.aten.add.Tensor %44477, %44463, %int1_42088 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42089 = torch.constant.int 1
    %44479 = torch.aten.add.Tensor %44478, %44466, %int1_42089 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42090 = torch.constant.int 1
    %44480 = torch.aten.add.Tensor %44479, %44469, %int1_42090 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42091 = torch.constant.int 1
    %44481 = torch.aten.add.Tensor %44480, %44472, %int1_42091 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42092 = torch.constant.int 1
    %44482 = torch.aten.add.Tensor %44481, %44475, %int1_42092 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44483 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42093 = arith.constant 1 : index
    %dim_42094 = tensor.dim %44483, %c1_42093 : tensor<4x?x4096xf16>
    %44484 = flow.tensor.transfer %44483 : tensor<4x?x4096xf16>{%dim_42094} to #hal.device.promise<@__device_1>
    %44485 = torch_c.from_builtin_tensor %44484 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44486 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42095 = arith.constant 1 : index
    %dim_42096 = tensor.dim %44486, %c1_42095 : tensor<4x?x4096xf16>
    %44487 = flow.tensor.transfer %44486 : tensor<4x?x4096xf16>{%dim_42096} to #hal.device.promise<@__device_1>
    %44488 = torch_c.from_builtin_tensor %44487 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44489 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42097 = arith.constant 1 : index
    %dim_42098 = tensor.dim %44489, %c1_42097 : tensor<4x?x4096xf16>
    %44490 = flow.tensor.transfer %44489 : tensor<4x?x4096xf16>{%dim_42098} to #hal.device.promise<@__device_1>
    %44491 = torch_c.from_builtin_tensor %44490 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44492 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42099 = arith.constant 1 : index
    %dim_42100 = tensor.dim %44492, %c1_42099 : tensor<4x?x4096xf16>
    %44493 = flow.tensor.transfer %44492 : tensor<4x?x4096xf16>{%dim_42100} to #hal.device.promise<@__device_1>
    %44494 = torch_c.from_builtin_tensor %44493 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44495 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42101 = arith.constant 1 : index
    %dim_42102 = tensor.dim %44495, %c1_42101 : tensor<4x?x4096xf16>
    %44496 = flow.tensor.transfer %44495 : tensor<4x?x4096xf16>{%dim_42102} to #hal.device.promise<@__device_1>
    %44497 = torch_c.from_builtin_tensor %44496 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44498 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42103 = arith.constant 1 : index
    %dim_42104 = tensor.dim %44498, %c1_42103 : tensor<4x?x4096xf16>
    %44499 = flow.tensor.transfer %44498 : tensor<4x?x4096xf16>{%dim_42104} to #hal.device.promise<@__device_1>
    %44500 = torch_c.from_builtin_tensor %44499 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44501 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42105 = arith.constant 1 : index
    %dim_42106 = tensor.dim %44501, %c1_42105 : tensor<4x?x4096xf16>
    %44502 = flow.tensor.transfer %44501 : tensor<4x?x4096xf16>{%dim_42106} to #hal.device.promise<@__device_1>
    %44503 = torch_c.from_builtin_tensor %44502 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42107 = torch.constant.int 1
    %44504 = torch.aten.add.Tensor %44485, %44418, %int1_42107 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42108 = torch.constant.int 1
    %44505 = torch.aten.add.Tensor %44504, %44488, %int1_42108 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42109 = torch.constant.int 1
    %44506 = torch.aten.add.Tensor %44505, %44491, %int1_42109 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42110 = torch.constant.int 1
    %44507 = torch.aten.add.Tensor %44506, %44494, %int1_42110 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42111 = torch.constant.int 1
    %44508 = torch.aten.add.Tensor %44507, %44497, %int1_42111 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42112 = torch.constant.int 1
    %44509 = torch.aten.add.Tensor %44508, %44500, %int1_42112 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42113 = torch.constant.int 1
    %44510 = torch.aten.add.Tensor %44509, %44503, %int1_42113 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44511 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42114 = arith.constant 1 : index
    %dim_42115 = tensor.dim %44511, %c1_42114 : tensor<4x?x4096xf16>
    %44512 = flow.tensor.transfer %44511 : tensor<4x?x4096xf16>{%dim_42115} to #hal.device.promise<@__device_2>
    %44513 = torch_c.from_builtin_tensor %44512 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44514 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42116 = arith.constant 1 : index
    %dim_42117 = tensor.dim %44514, %c1_42116 : tensor<4x?x4096xf16>
    %44515 = flow.tensor.transfer %44514 : tensor<4x?x4096xf16>{%dim_42117} to #hal.device.promise<@__device_2>
    %44516 = torch_c.from_builtin_tensor %44515 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44517 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42118 = arith.constant 1 : index
    %dim_42119 = tensor.dim %44517, %c1_42118 : tensor<4x?x4096xf16>
    %44518 = flow.tensor.transfer %44517 : tensor<4x?x4096xf16>{%dim_42119} to #hal.device.promise<@__device_2>
    %44519 = torch_c.from_builtin_tensor %44518 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44520 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42120 = arith.constant 1 : index
    %dim_42121 = tensor.dim %44520, %c1_42120 : tensor<4x?x4096xf16>
    %44521 = flow.tensor.transfer %44520 : tensor<4x?x4096xf16>{%dim_42121} to #hal.device.promise<@__device_2>
    %44522 = torch_c.from_builtin_tensor %44521 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44523 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42122 = arith.constant 1 : index
    %dim_42123 = tensor.dim %44523, %c1_42122 : tensor<4x?x4096xf16>
    %44524 = flow.tensor.transfer %44523 : tensor<4x?x4096xf16>{%dim_42123} to #hal.device.promise<@__device_2>
    %44525 = torch_c.from_builtin_tensor %44524 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44526 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42124 = arith.constant 1 : index
    %dim_42125 = tensor.dim %44526, %c1_42124 : tensor<4x?x4096xf16>
    %44527 = flow.tensor.transfer %44526 : tensor<4x?x4096xf16>{%dim_42125} to #hal.device.promise<@__device_2>
    %44528 = torch_c.from_builtin_tensor %44527 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44529 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42126 = arith.constant 1 : index
    %dim_42127 = tensor.dim %44529, %c1_42126 : tensor<4x?x4096xf16>
    %44530 = flow.tensor.transfer %44529 : tensor<4x?x4096xf16>{%dim_42127} to #hal.device.promise<@__device_2>
    %44531 = torch_c.from_builtin_tensor %44530 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42128 = torch.constant.int 1
    %44532 = torch.aten.add.Tensor %44513, %44516, %int1_42128 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42129 = torch.constant.int 1
    %44533 = torch.aten.add.Tensor %44532, %44424, %int1_42129 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42130 = torch.constant.int 1
    %44534 = torch.aten.add.Tensor %44533, %44519, %int1_42130 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42131 = torch.constant.int 1
    %44535 = torch.aten.add.Tensor %44534, %44522, %int1_42131 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42132 = torch.constant.int 1
    %44536 = torch.aten.add.Tensor %44535, %44525, %int1_42132 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42133 = torch.constant.int 1
    %44537 = torch.aten.add.Tensor %44536, %44528, %int1_42133 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42134 = torch.constant.int 1
    %44538 = torch.aten.add.Tensor %44537, %44531, %int1_42134 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44539 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42135 = arith.constant 1 : index
    %dim_42136 = tensor.dim %44539, %c1_42135 : tensor<4x?x4096xf16>
    %44540 = flow.tensor.transfer %44539 : tensor<4x?x4096xf16>{%dim_42136} to #hal.device.promise<@__device_3>
    %44541 = torch_c.from_builtin_tensor %44540 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44542 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42137 = arith.constant 1 : index
    %dim_42138 = tensor.dim %44542, %c1_42137 : tensor<4x?x4096xf16>
    %44543 = flow.tensor.transfer %44542 : tensor<4x?x4096xf16>{%dim_42138} to #hal.device.promise<@__device_3>
    %44544 = torch_c.from_builtin_tensor %44543 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44545 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42139 = arith.constant 1 : index
    %dim_42140 = tensor.dim %44545, %c1_42139 : tensor<4x?x4096xf16>
    %44546 = flow.tensor.transfer %44545 : tensor<4x?x4096xf16>{%dim_42140} to #hal.device.promise<@__device_3>
    %44547 = torch_c.from_builtin_tensor %44546 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44548 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42141 = arith.constant 1 : index
    %dim_42142 = tensor.dim %44548, %c1_42141 : tensor<4x?x4096xf16>
    %44549 = flow.tensor.transfer %44548 : tensor<4x?x4096xf16>{%dim_42142} to #hal.device.promise<@__device_3>
    %44550 = torch_c.from_builtin_tensor %44549 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44551 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42143 = arith.constant 1 : index
    %dim_42144 = tensor.dim %44551, %c1_42143 : tensor<4x?x4096xf16>
    %44552 = flow.tensor.transfer %44551 : tensor<4x?x4096xf16>{%dim_42144} to #hal.device.promise<@__device_3>
    %44553 = torch_c.from_builtin_tensor %44552 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44554 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42145 = arith.constant 1 : index
    %dim_42146 = tensor.dim %44554, %c1_42145 : tensor<4x?x4096xf16>
    %44555 = flow.tensor.transfer %44554 : tensor<4x?x4096xf16>{%dim_42146} to #hal.device.promise<@__device_3>
    %44556 = torch_c.from_builtin_tensor %44555 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44557 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42147 = arith.constant 1 : index
    %dim_42148 = tensor.dim %44557, %c1_42147 : tensor<4x?x4096xf16>
    %44558 = flow.tensor.transfer %44557 : tensor<4x?x4096xf16>{%dim_42148} to #hal.device.promise<@__device_3>
    %44559 = torch_c.from_builtin_tensor %44558 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42149 = torch.constant.int 1
    %44560 = torch.aten.add.Tensor %44541, %44544, %int1_42149 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42150 = torch.constant.int 1
    %44561 = torch.aten.add.Tensor %44560, %44547, %int1_42150 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42151 = torch.constant.int 1
    %44562 = torch.aten.add.Tensor %44561, %44430, %int1_42151 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42152 = torch.constant.int 1
    %44563 = torch.aten.add.Tensor %44562, %44550, %int1_42152 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42153 = torch.constant.int 1
    %44564 = torch.aten.add.Tensor %44563, %44553, %int1_42153 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42154 = torch.constant.int 1
    %44565 = torch.aten.add.Tensor %44564, %44556, %int1_42154 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42155 = torch.constant.int 1
    %44566 = torch.aten.add.Tensor %44565, %44559, %int1_42155 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44567 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42156 = arith.constant 1 : index
    %dim_42157 = tensor.dim %44567, %c1_42156 : tensor<4x?x4096xf16>
    %44568 = flow.tensor.transfer %44567 : tensor<4x?x4096xf16>{%dim_42157} to #hal.device.promise<@__device_4>
    %44569 = torch_c.from_builtin_tensor %44568 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44570 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42158 = arith.constant 1 : index
    %dim_42159 = tensor.dim %44570, %c1_42158 : tensor<4x?x4096xf16>
    %44571 = flow.tensor.transfer %44570 : tensor<4x?x4096xf16>{%dim_42159} to #hal.device.promise<@__device_4>
    %44572 = torch_c.from_builtin_tensor %44571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44573 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42160 = arith.constant 1 : index
    %dim_42161 = tensor.dim %44573, %c1_42160 : tensor<4x?x4096xf16>
    %44574 = flow.tensor.transfer %44573 : tensor<4x?x4096xf16>{%dim_42161} to #hal.device.promise<@__device_4>
    %44575 = torch_c.from_builtin_tensor %44574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44576 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42162 = arith.constant 1 : index
    %dim_42163 = tensor.dim %44576, %c1_42162 : tensor<4x?x4096xf16>
    %44577 = flow.tensor.transfer %44576 : tensor<4x?x4096xf16>{%dim_42163} to #hal.device.promise<@__device_4>
    %44578 = torch_c.from_builtin_tensor %44577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44579 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42164 = arith.constant 1 : index
    %dim_42165 = tensor.dim %44579, %c1_42164 : tensor<4x?x4096xf16>
    %44580 = flow.tensor.transfer %44579 : tensor<4x?x4096xf16>{%dim_42165} to #hal.device.promise<@__device_4>
    %44581 = torch_c.from_builtin_tensor %44580 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44582 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42166 = arith.constant 1 : index
    %dim_42167 = tensor.dim %44582, %c1_42166 : tensor<4x?x4096xf16>
    %44583 = flow.tensor.transfer %44582 : tensor<4x?x4096xf16>{%dim_42167} to #hal.device.promise<@__device_4>
    %44584 = torch_c.from_builtin_tensor %44583 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44585 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42168 = arith.constant 1 : index
    %dim_42169 = tensor.dim %44585, %c1_42168 : tensor<4x?x4096xf16>
    %44586 = flow.tensor.transfer %44585 : tensor<4x?x4096xf16>{%dim_42169} to #hal.device.promise<@__device_4>
    %44587 = torch_c.from_builtin_tensor %44586 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42170 = torch.constant.int 1
    %44588 = torch.aten.add.Tensor %44569, %44572, %int1_42170 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42171 = torch.constant.int 1
    %44589 = torch.aten.add.Tensor %44588, %44575, %int1_42171 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42172 = torch.constant.int 1
    %44590 = torch.aten.add.Tensor %44589, %44578, %int1_42172 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42173 = torch.constant.int 1
    %44591 = torch.aten.add.Tensor %44590, %44436, %int1_42173 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42174 = torch.constant.int 1
    %44592 = torch.aten.add.Tensor %44591, %44581, %int1_42174 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42175 = torch.constant.int 1
    %44593 = torch.aten.add.Tensor %44592, %44584, %int1_42175 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42176 = torch.constant.int 1
    %44594 = torch.aten.add.Tensor %44593, %44587, %int1_42176 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44595 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42177 = arith.constant 1 : index
    %dim_42178 = tensor.dim %44595, %c1_42177 : tensor<4x?x4096xf16>
    %44596 = flow.tensor.transfer %44595 : tensor<4x?x4096xf16>{%dim_42178} to #hal.device.promise<@__device_5>
    %44597 = torch_c.from_builtin_tensor %44596 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44598 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42179 = arith.constant 1 : index
    %dim_42180 = tensor.dim %44598, %c1_42179 : tensor<4x?x4096xf16>
    %44599 = flow.tensor.transfer %44598 : tensor<4x?x4096xf16>{%dim_42180} to #hal.device.promise<@__device_5>
    %44600 = torch_c.from_builtin_tensor %44599 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44601 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42181 = arith.constant 1 : index
    %dim_42182 = tensor.dim %44601, %c1_42181 : tensor<4x?x4096xf16>
    %44602 = flow.tensor.transfer %44601 : tensor<4x?x4096xf16>{%dim_42182} to #hal.device.promise<@__device_5>
    %44603 = torch_c.from_builtin_tensor %44602 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44604 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42183 = arith.constant 1 : index
    %dim_42184 = tensor.dim %44604, %c1_42183 : tensor<4x?x4096xf16>
    %44605 = flow.tensor.transfer %44604 : tensor<4x?x4096xf16>{%dim_42184} to #hal.device.promise<@__device_5>
    %44606 = torch_c.from_builtin_tensor %44605 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44607 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42185 = arith.constant 1 : index
    %dim_42186 = tensor.dim %44607, %c1_42185 : tensor<4x?x4096xf16>
    %44608 = flow.tensor.transfer %44607 : tensor<4x?x4096xf16>{%dim_42186} to #hal.device.promise<@__device_5>
    %44609 = torch_c.from_builtin_tensor %44608 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44610 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42187 = arith.constant 1 : index
    %dim_42188 = tensor.dim %44610, %c1_42187 : tensor<4x?x4096xf16>
    %44611 = flow.tensor.transfer %44610 : tensor<4x?x4096xf16>{%dim_42188} to #hal.device.promise<@__device_5>
    %44612 = torch_c.from_builtin_tensor %44611 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44613 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42189 = arith.constant 1 : index
    %dim_42190 = tensor.dim %44613, %c1_42189 : tensor<4x?x4096xf16>
    %44614 = flow.tensor.transfer %44613 : tensor<4x?x4096xf16>{%dim_42190} to #hal.device.promise<@__device_5>
    %44615 = torch_c.from_builtin_tensor %44614 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42191 = torch.constant.int 1
    %44616 = torch.aten.add.Tensor %44597, %44600, %int1_42191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42192 = torch.constant.int 1
    %44617 = torch.aten.add.Tensor %44616, %44603, %int1_42192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42193 = torch.constant.int 1
    %44618 = torch.aten.add.Tensor %44617, %44606, %int1_42193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42194 = torch.constant.int 1
    %44619 = torch.aten.add.Tensor %44618, %44609, %int1_42194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42195 = torch.constant.int 1
    %44620 = torch.aten.add.Tensor %44619, %44442, %int1_42195 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42196 = torch.constant.int 1
    %44621 = torch.aten.add.Tensor %44620, %44612, %int1_42196 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42197 = torch.constant.int 1
    %44622 = torch.aten.add.Tensor %44621, %44615, %int1_42197 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44623 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42198 = arith.constant 1 : index
    %dim_42199 = tensor.dim %44623, %c1_42198 : tensor<4x?x4096xf16>
    %44624 = flow.tensor.transfer %44623 : tensor<4x?x4096xf16>{%dim_42199} to #hal.device.promise<@__device_6>
    %44625 = torch_c.from_builtin_tensor %44624 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44626 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42200 = arith.constant 1 : index
    %dim_42201 = tensor.dim %44626, %c1_42200 : tensor<4x?x4096xf16>
    %44627 = flow.tensor.transfer %44626 : tensor<4x?x4096xf16>{%dim_42201} to #hal.device.promise<@__device_6>
    %44628 = torch_c.from_builtin_tensor %44627 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44629 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42202 = arith.constant 1 : index
    %dim_42203 = tensor.dim %44629, %c1_42202 : tensor<4x?x4096xf16>
    %44630 = flow.tensor.transfer %44629 : tensor<4x?x4096xf16>{%dim_42203} to #hal.device.promise<@__device_6>
    %44631 = torch_c.from_builtin_tensor %44630 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44632 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42204 = arith.constant 1 : index
    %dim_42205 = tensor.dim %44632, %c1_42204 : tensor<4x?x4096xf16>
    %44633 = flow.tensor.transfer %44632 : tensor<4x?x4096xf16>{%dim_42205} to #hal.device.promise<@__device_6>
    %44634 = torch_c.from_builtin_tensor %44633 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44635 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42206 = arith.constant 1 : index
    %dim_42207 = tensor.dim %44635, %c1_42206 : tensor<4x?x4096xf16>
    %44636 = flow.tensor.transfer %44635 : tensor<4x?x4096xf16>{%dim_42207} to #hal.device.promise<@__device_6>
    %44637 = torch_c.from_builtin_tensor %44636 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44638 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42208 = arith.constant 1 : index
    %dim_42209 = tensor.dim %44638, %c1_42208 : tensor<4x?x4096xf16>
    %44639 = flow.tensor.transfer %44638 : tensor<4x?x4096xf16>{%dim_42209} to #hal.device.promise<@__device_6>
    %44640 = torch_c.from_builtin_tensor %44639 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44641 = torch_c.to_builtin_tensor %44454 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42210 = arith.constant 1 : index
    %dim_42211 = tensor.dim %44641, %c1_42210 : tensor<4x?x4096xf16>
    %44642 = flow.tensor.transfer %44641 : tensor<4x?x4096xf16>{%dim_42211} to #hal.device.promise<@__device_6>
    %44643 = torch_c.from_builtin_tensor %44642 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42212 = torch.constant.int 1
    %44644 = torch.aten.add.Tensor %44625, %44628, %int1_42212 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42213 = torch.constant.int 1
    %44645 = torch.aten.add.Tensor %44644, %44631, %int1_42213 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42214 = torch.constant.int 1
    %44646 = torch.aten.add.Tensor %44645, %44634, %int1_42214 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42215 = torch.constant.int 1
    %44647 = torch.aten.add.Tensor %44646, %44637, %int1_42215 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42216 = torch.constant.int 1
    %44648 = torch.aten.add.Tensor %44647, %44640, %int1_42216 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42217 = torch.constant.int 1
    %44649 = torch.aten.add.Tensor %44648, %44448, %int1_42217 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42218 = torch.constant.int 1
    %44650 = torch.aten.add.Tensor %44649, %44643, %int1_42218 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44651 = torch_c.to_builtin_tensor %44412 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42219 = arith.constant 1 : index
    %dim_42220 = tensor.dim %44651, %c1_42219 : tensor<4x?x4096xf16>
    %44652 = flow.tensor.transfer %44651 : tensor<4x?x4096xf16>{%dim_42220} to #hal.device.promise<@__device_7>
    %44653 = torch_c.from_builtin_tensor %44652 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44654 = torch_c.to_builtin_tensor %44418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42221 = arith.constant 1 : index
    %dim_42222 = tensor.dim %44654, %c1_42221 : tensor<4x?x4096xf16>
    %44655 = flow.tensor.transfer %44654 : tensor<4x?x4096xf16>{%dim_42222} to #hal.device.promise<@__device_7>
    %44656 = torch_c.from_builtin_tensor %44655 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44657 = torch_c.to_builtin_tensor %44424 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42223 = arith.constant 1 : index
    %dim_42224 = tensor.dim %44657, %c1_42223 : tensor<4x?x4096xf16>
    %44658 = flow.tensor.transfer %44657 : tensor<4x?x4096xf16>{%dim_42224} to #hal.device.promise<@__device_7>
    %44659 = torch_c.from_builtin_tensor %44658 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44660 = torch_c.to_builtin_tensor %44430 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42225 = arith.constant 1 : index
    %dim_42226 = tensor.dim %44660, %c1_42225 : tensor<4x?x4096xf16>
    %44661 = flow.tensor.transfer %44660 : tensor<4x?x4096xf16>{%dim_42226} to #hal.device.promise<@__device_7>
    %44662 = torch_c.from_builtin_tensor %44661 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44663 = torch_c.to_builtin_tensor %44436 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42227 = arith.constant 1 : index
    %dim_42228 = tensor.dim %44663, %c1_42227 : tensor<4x?x4096xf16>
    %44664 = flow.tensor.transfer %44663 : tensor<4x?x4096xf16>{%dim_42228} to #hal.device.promise<@__device_7>
    %44665 = torch_c.from_builtin_tensor %44664 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44666 = torch_c.to_builtin_tensor %44442 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42229 = arith.constant 1 : index
    %dim_42230 = tensor.dim %44666, %c1_42229 : tensor<4x?x4096xf16>
    %44667 = flow.tensor.transfer %44666 : tensor<4x?x4096xf16>{%dim_42230} to #hal.device.promise<@__device_7>
    %44668 = torch_c.from_builtin_tensor %44667 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44669 = torch_c.to_builtin_tensor %44448 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42231 = arith.constant 1 : index
    %dim_42232 = tensor.dim %44669, %c1_42231 : tensor<4x?x4096xf16>
    %44670 = flow.tensor.transfer %44669 : tensor<4x?x4096xf16>{%dim_42232} to #hal.device.promise<@__device_7>
    %44671 = torch_c.from_builtin_tensor %44670 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42233 = torch.constant.int 1
    %44672 = torch.aten.add.Tensor %44653, %44656, %int1_42233 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42234 = torch.constant.int 1
    %44673 = torch.aten.add.Tensor %44672, %44659, %int1_42234 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42235 = torch.constant.int 1
    %44674 = torch.aten.add.Tensor %44673, %44662, %int1_42235 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42236 = torch.constant.int 1
    %44675 = torch.aten.add.Tensor %44674, %44665, %int1_42236 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42237 = torch.constant.int 1
    %44676 = torch.aten.add.Tensor %44675, %44668, %int1_42237 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42238 = torch.constant.int 1
    %44677 = torch.aten.add.Tensor %44676, %44671, %int1_42238 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42239 = torch.constant.int 1
    %44678 = torch.aten.add.Tensor %44677, %44454, %int1_42239 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42240 = torch.constant.int 1
    %44679 = torch.aten.add.Tensor %43338, %44482, %int1_42240 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42241 = torch.constant.int 1
    %44680 = torch.aten.add.Tensor %43339, %44510, %int1_42241 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42242 = torch.constant.int 1
    %44681 = torch.aten.add.Tensor %43340, %44538, %int1_42242 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42243 = torch.constant.int 1
    %44682 = torch.aten.add.Tensor %43341, %44566, %int1_42243 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42244 = torch.constant.int 1
    %44683 = torch.aten.add.Tensor %43342, %44594, %int1_42244 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42245 = torch.constant.int 1
    %44684 = torch.aten.add.Tensor %43343, %44622, %int1_42245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42246 = torch.constant.int 1
    %44685 = torch.aten.add.Tensor %43344, %44650, %int1_42246 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42247 = torch.constant.int 1
    %44686 = torch.aten.add.Tensor %43345, %44678, %int1_42247 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_42248 = torch.constant.int 6
    %44687 = torch.prims.convert_element_type %44679, %int6_42248 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42249 = torch.constant.int 6
    %44688 = torch.prims.convert_element_type %44680, %int6_42249 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42250 = torch.constant.int 6
    %44689 = torch.prims.convert_element_type %44681, %int6_42250 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42251 = torch.constant.int 6
    %44690 = torch.prims.convert_element_type %44682, %int6_42251 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42252 = torch.constant.int 6
    %44691 = torch.prims.convert_element_type %44683, %int6_42252 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42253 = torch.constant.int 6
    %44692 = torch.prims.convert_element_type %44684, %int6_42253 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42254 = torch.constant.int 6
    %44693 = torch.prims.convert_element_type %44685, %int6_42254 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42255 = torch.constant.int 6
    %44694 = torch.prims.convert_element_type %44686, %int6_42255 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42256 = torch.constant.int 2
    %44695 = torch.aten.pow.Tensor_Scalar %44687, %int2_42256 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42257 = torch.constant.int 2
    %44696 = torch.aten.pow.Tensor_Scalar %44688, %int2_42257 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42258 = torch.constant.int 2
    %44697 = torch.aten.pow.Tensor_Scalar %44689, %int2_42258 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42259 = torch.constant.int 2
    %44698 = torch.aten.pow.Tensor_Scalar %44690, %int2_42259 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42260 = torch.constant.int 2
    %44699 = torch.aten.pow.Tensor_Scalar %44691, %int2_42260 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42261 = torch.constant.int 2
    %44700 = torch.aten.pow.Tensor_Scalar %44692, %int2_42261 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42262 = torch.constant.int 2
    %44701 = torch.aten.pow.Tensor_Scalar %44693, %int2_42262 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42263 = torch.constant.int 2
    %44702 = torch.aten.pow.Tensor_Scalar %44694, %int2_42263 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_42264 = torch.constant.int -1
    %44703 = torch.prim.ListConstruct %int-1_42264 : (!torch.int) -> !torch.list<int>
    %true_42265 = torch.constant.bool true
    %none_42266 = torch.constant.none
    %44704 = torch.aten.mean.dim %44695, %44703, %true_42265, %none_42266 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42267 = torch.constant.int -1
    %44705 = torch.prim.ListConstruct %int-1_42267 : (!torch.int) -> !torch.list<int>
    %true_42268 = torch.constant.bool true
    %none_42269 = torch.constant.none
    %44706 = torch.aten.mean.dim %44696, %44705, %true_42268, %none_42269 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42270 = torch.constant.int -1
    %44707 = torch.prim.ListConstruct %int-1_42270 : (!torch.int) -> !torch.list<int>
    %true_42271 = torch.constant.bool true
    %none_42272 = torch.constant.none
    %44708 = torch.aten.mean.dim %44697, %44707, %true_42271, %none_42272 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42273 = torch.constant.int -1
    %44709 = torch.prim.ListConstruct %int-1_42273 : (!torch.int) -> !torch.list<int>
    %true_42274 = torch.constant.bool true
    %none_42275 = torch.constant.none
    %44710 = torch.aten.mean.dim %44698, %44709, %true_42274, %none_42275 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42276 = torch.constant.int -1
    %44711 = torch.prim.ListConstruct %int-1_42276 : (!torch.int) -> !torch.list<int>
    %true_42277 = torch.constant.bool true
    %none_42278 = torch.constant.none
    %44712 = torch.aten.mean.dim %44699, %44711, %true_42277, %none_42278 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42279 = torch.constant.int -1
    %44713 = torch.prim.ListConstruct %int-1_42279 : (!torch.int) -> !torch.list<int>
    %true_42280 = torch.constant.bool true
    %none_42281 = torch.constant.none
    %44714 = torch.aten.mean.dim %44700, %44713, %true_42280, %none_42281 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42282 = torch.constant.int -1
    %44715 = torch.prim.ListConstruct %int-1_42282 : (!torch.int) -> !torch.list<int>
    %true_42283 = torch.constant.bool true
    %none_42284 = torch.constant.none
    %44716 = torch.aten.mean.dim %44701, %44715, %true_42283, %none_42284 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42285 = torch.constant.int -1
    %44717 = torch.prim.ListConstruct %int-1_42285 : (!torch.int) -> !torch.list<int>
    %true_42286 = torch.constant.bool true
    %none_42287 = torch.constant.none
    %44718 = torch.aten.mean.dim %44702, %44717, %true_42286, %none_42287 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42288 = torch.constant.float 9.9999997473787516E-6
    %int1_42289 = torch.constant.int 1
    %44719 = torch.aten.add.Scalar %44704, %float9.999990e-06_42288, %int1_42289 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42290 = torch.constant.float 9.9999997473787516E-6
    %int1_42291 = torch.constant.int 1
    %44720 = torch.aten.add.Scalar %44706, %float9.999990e-06_42290, %int1_42291 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42292 = torch.constant.float 9.9999997473787516E-6
    %int1_42293 = torch.constant.int 1
    %44721 = torch.aten.add.Scalar %44708, %float9.999990e-06_42292, %int1_42293 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42294 = torch.constant.float 9.9999997473787516E-6
    %int1_42295 = torch.constant.int 1
    %44722 = torch.aten.add.Scalar %44710, %float9.999990e-06_42294, %int1_42295 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42296 = torch.constant.float 9.9999997473787516E-6
    %int1_42297 = torch.constant.int 1
    %44723 = torch.aten.add.Scalar %44712, %float9.999990e-06_42296, %int1_42297 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42298 = torch.constant.float 9.9999997473787516E-6
    %int1_42299 = torch.constant.int 1
    %44724 = torch.aten.add.Scalar %44714, %float9.999990e-06_42298, %int1_42299 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42300 = torch.constant.float 9.9999997473787516E-6
    %int1_42301 = torch.constant.int 1
    %44725 = torch.aten.add.Scalar %44716, %float9.999990e-06_42300, %int1_42301 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42302 = torch.constant.float 9.9999997473787516E-6
    %int1_42303 = torch.constant.int 1
    %44726 = torch.aten.add.Scalar %44718, %float9.999990e-06_42302, %int1_42303 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44727 = torch.aten.rsqrt %44719 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44728 = torch.aten.rsqrt %44720 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44729 = torch.aten.rsqrt %44721 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44730 = torch.aten.rsqrt %44722 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44731 = torch.aten.rsqrt %44723 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44732 = torch.aten.rsqrt %44724 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44733 = torch.aten.rsqrt %44725 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44734 = torch.aten.rsqrt %44726 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %44734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %44735 = torch.aten.mul.Tensor %44687, %44727 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44736 = torch.aten.mul.Tensor %44688, %44728 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44737 = torch.aten.mul.Tensor %44689, %44729 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44738 = torch.aten.mul.Tensor %44690, %44730 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44739 = torch.aten.mul.Tensor %44691, %44731 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44740 = torch.aten.mul.Tensor %44692, %44732 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44741 = torch.aten.mul.Tensor %44693, %44733 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44742 = torch.aten.mul.Tensor %44694, %44734 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44743 = torch.aten.mul.Tensor %1632, %44735 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44744 = torch.aten.mul.Tensor %1633, %44736 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44745 = torch.aten.mul.Tensor %1634, %44737 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44746 = torch.aten.mul.Tensor %1635, %44738 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44747 = torch.aten.mul.Tensor %1636, %44739 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44748 = torch.aten.mul.Tensor %1637, %44740 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44749 = torch.aten.mul.Tensor %1638, %44741 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %44750 = torch.aten.mul.Tensor %1639, %44742 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %44750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_42304 = torch.constant.int 5
    %44751 = torch.prims.convert_element_type %44743, %int5_42304 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42305 = torch.constant.int 5
    %44752 = torch.prims.convert_element_type %44744, %int5_42305 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42306 = torch.constant.int 5
    %44753 = torch.prims.convert_element_type %44745, %int5_42306 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42307 = torch.constant.int 5
    %44754 = torch.prims.convert_element_type %44746, %int5_42307 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42308 = torch.constant.int 5
    %44755 = torch.prims.convert_element_type %44747, %int5_42308 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42309 = torch.constant.int 5
    %44756 = torch.prims.convert_element_type %44748, %int5_42309 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42310 = torch.constant.int 5
    %44757 = torch.prims.convert_element_type %44749, %int5_42310 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42311 = torch.constant.int 5
    %44758 = torch.prims.convert_element_type %44750, %int5_42311 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42312 = torch.constant.int 1
    %int0_42313 = torch.constant.int 0
    %44759 = torch.prim.ListConstruct %int1_42312, %int0_42313 : (!torch.int, !torch.int) -> !torch.list<int>
    %44760 = torch.aten.permute %1640, %44759 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42314 = torch.constant.int 1
    %int0_42315 = torch.constant.int 0
    %44761 = torch.prim.ListConstruct %int1_42314, %int0_42315 : (!torch.int, !torch.int) -> !torch.list<int>
    %44762 = torch.aten.permute %1641, %44761 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42316 = torch.constant.int 1
    %int0_42317 = torch.constant.int 0
    %44763 = torch.prim.ListConstruct %int1_42316, %int0_42317 : (!torch.int, !torch.int) -> !torch.list<int>
    %44764 = torch.aten.permute %1642, %44763 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42318 = torch.constant.int 1
    %int0_42319 = torch.constant.int 0
    %44765 = torch.prim.ListConstruct %int1_42318, %int0_42319 : (!torch.int, !torch.int) -> !torch.list<int>
    %44766 = torch.aten.permute %1643, %44765 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42320 = torch.constant.int 1
    %int0_42321 = torch.constant.int 0
    %44767 = torch.prim.ListConstruct %int1_42320, %int0_42321 : (!torch.int, !torch.int) -> !torch.list<int>
    %44768 = torch.aten.permute %1644, %44767 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42322 = torch.constant.int 1
    %int0_42323 = torch.constant.int 0
    %44769 = torch.prim.ListConstruct %int1_42322, %int0_42323 : (!torch.int, !torch.int) -> !torch.list<int>
    %44770 = torch.aten.permute %1645, %44769 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42324 = torch.constant.int 1
    %int0_42325 = torch.constant.int 0
    %44771 = torch.prim.ListConstruct %int1_42324, %int0_42325 : (!torch.int, !torch.int) -> !torch.list<int>
    %44772 = torch.aten.permute %1646, %44771 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42326 = torch.constant.int 1
    %int0_42327 = torch.constant.int 0
    %44773 = torch.prim.ListConstruct %int1_42326, %int0_42327 : (!torch.int, !torch.int) -> !torch.list<int>
    %44774 = torch.aten.permute %1647, %44773 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_42328 = torch.constant.int 4
    %44775 = torch.aten.mul.int %int4_42328, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42329 = torch.constant.int 4096
    %44776 = torch.prim.ListConstruct %44775, %int4096_42329 : (!torch.int, !torch.int) -> !torch.list<int>
    %44777 = torch.aten.view %44751, %44776 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44777, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44778 = torch.aten.mm %44777, %44760 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44778, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42330 = torch.constant.int 4
    %int1792_42331 = torch.constant.int 1792
    %44779 = torch.prim.ListConstruct %int4_42330, %2482, %int1792_42331 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44780 = torch.aten.view %44778, %44779 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42332 = torch.constant.int 4
    %44781 = torch.aten.mul.int %int4_42332, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42333 = torch.constant.int 4096
    %44782 = torch.prim.ListConstruct %44781, %int4096_42333 : (!torch.int, !torch.int) -> !torch.list<int>
    %44783 = torch.aten.view %44752, %44782 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44783, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44784 = torch.aten.mm %44783, %44762 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44784, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42334 = torch.constant.int 4
    %int1792_42335 = torch.constant.int 1792
    %44785 = torch.prim.ListConstruct %int4_42334, %2482, %int1792_42335 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44786 = torch.aten.view %44784, %44785 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42336 = torch.constant.int 4
    %44787 = torch.aten.mul.int %int4_42336, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42337 = torch.constant.int 4096
    %44788 = torch.prim.ListConstruct %44787, %int4096_42337 : (!torch.int, !torch.int) -> !torch.list<int>
    %44789 = torch.aten.view %44753, %44788 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44789, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44790 = torch.aten.mm %44789, %44764 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44790, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42338 = torch.constant.int 4
    %int1792_42339 = torch.constant.int 1792
    %44791 = torch.prim.ListConstruct %int4_42338, %2482, %int1792_42339 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44792 = torch.aten.view %44790, %44791 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42340 = torch.constant.int 4
    %44793 = torch.aten.mul.int %int4_42340, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42341 = torch.constant.int 4096
    %44794 = torch.prim.ListConstruct %44793, %int4096_42341 : (!torch.int, !torch.int) -> !torch.list<int>
    %44795 = torch.aten.view %44754, %44794 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44795, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44796 = torch.aten.mm %44795, %44766 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44796, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42342 = torch.constant.int 4
    %int1792_42343 = torch.constant.int 1792
    %44797 = torch.prim.ListConstruct %int4_42342, %2482, %int1792_42343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44798 = torch.aten.view %44796, %44797 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42344 = torch.constant.int 4
    %44799 = torch.aten.mul.int %int4_42344, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42345 = torch.constant.int 4096
    %44800 = torch.prim.ListConstruct %44799, %int4096_42345 : (!torch.int, !torch.int) -> !torch.list<int>
    %44801 = torch.aten.view %44755, %44800 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44801, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44802 = torch.aten.mm %44801, %44768 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44802, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42346 = torch.constant.int 4
    %int1792_42347 = torch.constant.int 1792
    %44803 = torch.prim.ListConstruct %int4_42346, %2482, %int1792_42347 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44804 = torch.aten.view %44802, %44803 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42348 = torch.constant.int 4
    %44805 = torch.aten.mul.int %int4_42348, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42349 = torch.constant.int 4096
    %44806 = torch.prim.ListConstruct %44805, %int4096_42349 : (!torch.int, !torch.int) -> !torch.list<int>
    %44807 = torch.aten.view %44756, %44806 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44807, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44808 = torch.aten.mm %44807, %44770 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44808, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42350 = torch.constant.int 4
    %int1792_42351 = torch.constant.int 1792
    %44809 = torch.prim.ListConstruct %int4_42350, %2482, %int1792_42351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44810 = torch.aten.view %44808, %44809 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42352 = torch.constant.int 4
    %44811 = torch.aten.mul.int %int4_42352, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42353 = torch.constant.int 4096
    %44812 = torch.prim.ListConstruct %44811, %int4096_42353 : (!torch.int, !torch.int) -> !torch.list<int>
    %44813 = torch.aten.view %44757, %44812 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44813, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44814 = torch.aten.mm %44813, %44772 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44814, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42354 = torch.constant.int 4
    %int1792_42355 = torch.constant.int 1792
    %44815 = torch.prim.ListConstruct %int4_42354, %2482, %int1792_42355 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44816 = torch.aten.view %44814, %44815 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42356 = torch.constant.int 4
    %44817 = torch.aten.mul.int %int4_42356, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42357 = torch.constant.int 4096
    %44818 = torch.prim.ListConstruct %44817, %int4096_42357 : (!torch.int, !torch.int) -> !torch.list<int>
    %44819 = torch.aten.view %44758, %44818 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44819, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44820 = torch.aten.mm %44819, %44774 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44820, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42358 = torch.constant.int 4
    %int1792_42359 = torch.constant.int 1792
    %44821 = torch.prim.ListConstruct %int4_42358, %2482, %int1792_42359 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44822 = torch.aten.view %44820, %44821 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44823 = torch.aten.silu %44780 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44824 = torch.aten.silu %44786 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44825 = torch.aten.silu %44792 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44826 = torch.aten.silu %44798 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44827 = torch.aten.silu %44804 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44828 = torch.aten.silu %44810 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44829 = torch.aten.silu %44816 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44830 = torch.aten.silu %44822 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_42360 = torch.constant.int 1
    %int0_42361 = torch.constant.int 0
    %44831 = torch.prim.ListConstruct %int1_42360, %int0_42361 : (!torch.int, !torch.int) -> !torch.list<int>
    %44832 = torch.aten.permute %1648, %44831 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42362 = torch.constant.int 1
    %int0_42363 = torch.constant.int 0
    %44833 = torch.prim.ListConstruct %int1_42362, %int0_42363 : (!torch.int, !torch.int) -> !torch.list<int>
    %44834 = torch.aten.permute %1649, %44833 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42364 = torch.constant.int 1
    %int0_42365 = torch.constant.int 0
    %44835 = torch.prim.ListConstruct %int1_42364, %int0_42365 : (!torch.int, !torch.int) -> !torch.list<int>
    %44836 = torch.aten.permute %1650, %44835 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42366 = torch.constant.int 1
    %int0_42367 = torch.constant.int 0
    %44837 = torch.prim.ListConstruct %int1_42366, %int0_42367 : (!torch.int, !torch.int) -> !torch.list<int>
    %44838 = torch.aten.permute %1651, %44837 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42368 = torch.constant.int 1
    %int0_42369 = torch.constant.int 0
    %44839 = torch.prim.ListConstruct %int1_42368, %int0_42369 : (!torch.int, !torch.int) -> !torch.list<int>
    %44840 = torch.aten.permute %1652, %44839 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42370 = torch.constant.int 1
    %int0_42371 = torch.constant.int 0
    %44841 = torch.prim.ListConstruct %int1_42370, %int0_42371 : (!torch.int, !torch.int) -> !torch.list<int>
    %44842 = torch.aten.permute %1653, %44841 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42372 = torch.constant.int 1
    %int0_42373 = torch.constant.int 0
    %44843 = torch.prim.ListConstruct %int1_42372, %int0_42373 : (!torch.int, !torch.int) -> !torch.list<int>
    %44844 = torch.aten.permute %1654, %44843 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_42374 = torch.constant.int 1
    %int0_42375 = torch.constant.int 0
    %44845 = torch.prim.ListConstruct %int1_42374, %int0_42375 : (!torch.int, !torch.int) -> !torch.list<int>
    %44846 = torch.aten.permute %1655, %44845 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_42376 = torch.constant.int 4
    %44847 = torch.aten.mul.int %int4_42376, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42377 = torch.constant.int 4096
    %44848 = torch.prim.ListConstruct %44847, %int4096_42377 : (!torch.int, !torch.int) -> !torch.list<int>
    %44849 = torch.aten.view %44751, %44848 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44849, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44850 = torch.aten.mm %44849, %44832 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44850, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42378 = torch.constant.int 4
    %int1792_42379 = torch.constant.int 1792
    %44851 = torch.prim.ListConstruct %int4_42378, %2482, %int1792_42379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44852 = torch.aten.view %44850, %44851 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42380 = torch.constant.int 4
    %44853 = torch.aten.mul.int %int4_42380, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42381 = torch.constant.int 4096
    %44854 = torch.prim.ListConstruct %44853, %int4096_42381 : (!torch.int, !torch.int) -> !torch.list<int>
    %44855 = torch.aten.view %44752, %44854 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44855, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44856 = torch.aten.mm %44855, %44834 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44856, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42382 = torch.constant.int 4
    %int1792_42383 = torch.constant.int 1792
    %44857 = torch.prim.ListConstruct %int4_42382, %2482, %int1792_42383 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44858 = torch.aten.view %44856, %44857 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42384 = torch.constant.int 4
    %44859 = torch.aten.mul.int %int4_42384, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42385 = torch.constant.int 4096
    %44860 = torch.prim.ListConstruct %44859, %int4096_42385 : (!torch.int, !torch.int) -> !torch.list<int>
    %44861 = torch.aten.view %44753, %44860 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44861, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44862 = torch.aten.mm %44861, %44836 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44862, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42386 = torch.constant.int 4
    %int1792_42387 = torch.constant.int 1792
    %44863 = torch.prim.ListConstruct %int4_42386, %2482, %int1792_42387 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44864 = torch.aten.view %44862, %44863 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42388 = torch.constant.int 4
    %44865 = torch.aten.mul.int %int4_42388, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42389 = torch.constant.int 4096
    %44866 = torch.prim.ListConstruct %44865, %int4096_42389 : (!torch.int, !torch.int) -> !torch.list<int>
    %44867 = torch.aten.view %44754, %44866 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44867, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44868 = torch.aten.mm %44867, %44838 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44868, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42390 = torch.constant.int 4
    %int1792_42391 = torch.constant.int 1792
    %44869 = torch.prim.ListConstruct %int4_42390, %2482, %int1792_42391 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44870 = torch.aten.view %44868, %44869 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42392 = torch.constant.int 4
    %44871 = torch.aten.mul.int %int4_42392, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42393 = torch.constant.int 4096
    %44872 = torch.prim.ListConstruct %44871, %int4096_42393 : (!torch.int, !torch.int) -> !torch.list<int>
    %44873 = torch.aten.view %44755, %44872 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44873, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44874 = torch.aten.mm %44873, %44840 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44874, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42394 = torch.constant.int 4
    %int1792_42395 = torch.constant.int 1792
    %44875 = torch.prim.ListConstruct %int4_42394, %2482, %int1792_42395 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44876 = torch.aten.view %44874, %44875 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42396 = torch.constant.int 4
    %44877 = torch.aten.mul.int %int4_42396, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42397 = torch.constant.int 4096
    %44878 = torch.prim.ListConstruct %44877, %int4096_42397 : (!torch.int, !torch.int) -> !torch.list<int>
    %44879 = torch.aten.view %44756, %44878 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44879, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44880 = torch.aten.mm %44879, %44842 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44880, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42398 = torch.constant.int 4
    %int1792_42399 = torch.constant.int 1792
    %44881 = torch.prim.ListConstruct %int4_42398, %2482, %int1792_42399 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44882 = torch.aten.view %44880, %44881 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42400 = torch.constant.int 4
    %44883 = torch.aten.mul.int %int4_42400, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42401 = torch.constant.int 4096
    %44884 = torch.prim.ListConstruct %44883, %int4096_42401 : (!torch.int, !torch.int) -> !torch.list<int>
    %44885 = torch.aten.view %44757, %44884 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44885, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44886 = torch.aten.mm %44885, %44844 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44886, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42402 = torch.constant.int 4
    %int1792_42403 = torch.constant.int 1792
    %44887 = torch.prim.ListConstruct %int4_42402, %2482, %int1792_42403 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44888 = torch.aten.view %44886, %44887 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_42404 = torch.constant.int 4
    %44889 = torch.aten.mul.int %int4_42404, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42405 = torch.constant.int 4096
    %44890 = torch.prim.ListConstruct %44889, %int4096_42405 : (!torch.int, !torch.int) -> !torch.list<int>
    %44891 = torch.aten.view %44758, %44890 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44891, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %44892 = torch.aten.mm %44891, %44846 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44892, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_42406 = torch.constant.int 4
    %int1792_42407 = torch.constant.int 1792
    %44893 = torch.prim.ListConstruct %int4_42406, %2482, %int1792_42407 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44894 = torch.aten.view %44892, %44893 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44895 = torch.aten.mul.Tensor %44823, %44852 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44896 = torch.aten.mul.Tensor %44824, %44858 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44897 = torch.aten.mul.Tensor %44825, %44864 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44898 = torch.aten.mul.Tensor %44826, %44870 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44899 = torch.aten.mul.Tensor %44827, %44876 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44900 = torch.aten.mul.Tensor %44828, %44882 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44901 = torch.aten.mul.Tensor %44829, %44888 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %44902 = torch.aten.mul.Tensor %44830, %44894 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %44902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_42408 = torch.constant.int 1
    %int0_42409 = torch.constant.int 0
    %44903 = torch.prim.ListConstruct %int1_42408, %int0_42409 : (!torch.int, !torch.int) -> !torch.list<int>
    %44904 = torch.aten.permute %1656, %44903 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42410 = torch.constant.int 1
    %int0_42411 = torch.constant.int 0
    %44905 = torch.prim.ListConstruct %int1_42410, %int0_42411 : (!torch.int, !torch.int) -> !torch.list<int>
    %44906 = torch.aten.permute %1657, %44905 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42412 = torch.constant.int 1
    %int0_42413 = torch.constant.int 0
    %44907 = torch.prim.ListConstruct %int1_42412, %int0_42413 : (!torch.int, !torch.int) -> !torch.list<int>
    %44908 = torch.aten.permute %1658, %44907 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42414 = torch.constant.int 1
    %int0_42415 = torch.constant.int 0
    %44909 = torch.prim.ListConstruct %int1_42414, %int0_42415 : (!torch.int, !torch.int) -> !torch.list<int>
    %44910 = torch.aten.permute %1659, %44909 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42416 = torch.constant.int 1
    %int0_42417 = torch.constant.int 0
    %44911 = torch.prim.ListConstruct %int1_42416, %int0_42417 : (!torch.int, !torch.int) -> !torch.list<int>
    %44912 = torch.aten.permute %1660, %44911 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42418 = torch.constant.int 1
    %int0_42419 = torch.constant.int 0
    %44913 = torch.prim.ListConstruct %int1_42418, %int0_42419 : (!torch.int, !torch.int) -> !torch.list<int>
    %44914 = torch.aten.permute %1661, %44913 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42420 = torch.constant.int 1
    %int0_42421 = torch.constant.int 0
    %44915 = torch.prim.ListConstruct %int1_42420, %int0_42421 : (!torch.int, !torch.int) -> !torch.list<int>
    %44916 = torch.aten.permute %1662, %44915 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42422 = torch.constant.int 1
    %int0_42423 = torch.constant.int 0
    %44917 = torch.prim.ListConstruct %int1_42422, %int0_42423 : (!torch.int, !torch.int) -> !torch.list<int>
    %44918 = torch.aten.permute %1663, %44917 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_42424 = torch.constant.int 1
    %44919 = torch.aten.size.int %44780, %int1_42424 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42425 = torch.constant.int 4
    %44920 = torch.aten.mul.int %int4_42425, %44919 : !torch.int, !torch.int -> !torch.int
    %int1792_42426 = torch.constant.int 1792
    %44921 = torch.prim.ListConstruct %44920, %int1792_42426 : (!torch.int, !torch.int) -> !torch.list<int>
    %44922 = torch.aten.view %44895, %44921 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44922, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44923 = torch.aten.mm %44922, %44904 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44923, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42427 = torch.constant.int 4
    %int4096_42428 = torch.constant.int 4096
    %44924 = torch.prim.ListConstruct %int4_42427, %44919, %int4096_42428 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44925 = torch.aten.view %44923, %44924 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42429 = torch.constant.int 1
    %44926 = torch.aten.size.int %44786, %int1_42429 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42430 = torch.constant.int 4
    %44927 = torch.aten.mul.int %int4_42430, %44926 : !torch.int, !torch.int -> !torch.int
    %int1792_42431 = torch.constant.int 1792
    %44928 = torch.prim.ListConstruct %44927, %int1792_42431 : (!torch.int, !torch.int) -> !torch.list<int>
    %44929 = torch.aten.view %44896, %44928 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44929, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44930 = torch.aten.mm %44929, %44906 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44930, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42432 = torch.constant.int 4
    %int4096_42433 = torch.constant.int 4096
    %44931 = torch.prim.ListConstruct %int4_42432, %44926, %int4096_42433 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44932 = torch.aten.view %44930, %44931 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42434 = torch.constant.int 1
    %44933 = torch.aten.size.int %44792, %int1_42434 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42435 = torch.constant.int 4
    %44934 = torch.aten.mul.int %int4_42435, %44933 : !torch.int, !torch.int -> !torch.int
    %int1792_42436 = torch.constant.int 1792
    %44935 = torch.prim.ListConstruct %44934, %int1792_42436 : (!torch.int, !torch.int) -> !torch.list<int>
    %44936 = torch.aten.view %44897, %44935 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44936, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44937 = torch.aten.mm %44936, %44908 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44937, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42437 = torch.constant.int 4
    %int4096_42438 = torch.constant.int 4096
    %44938 = torch.prim.ListConstruct %int4_42437, %44933, %int4096_42438 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44939 = torch.aten.view %44937, %44938 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42439 = torch.constant.int 1
    %44940 = torch.aten.size.int %44798, %int1_42439 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42440 = torch.constant.int 4
    %44941 = torch.aten.mul.int %int4_42440, %44940 : !torch.int, !torch.int -> !torch.int
    %int1792_42441 = torch.constant.int 1792
    %44942 = torch.prim.ListConstruct %44941, %int1792_42441 : (!torch.int, !torch.int) -> !torch.list<int>
    %44943 = torch.aten.view %44898, %44942 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44943, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44944 = torch.aten.mm %44943, %44910 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44944, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42442 = torch.constant.int 4
    %int4096_42443 = torch.constant.int 4096
    %44945 = torch.prim.ListConstruct %int4_42442, %44940, %int4096_42443 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44946 = torch.aten.view %44944, %44945 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42444 = torch.constant.int 1
    %44947 = torch.aten.size.int %44804, %int1_42444 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42445 = torch.constant.int 4
    %44948 = torch.aten.mul.int %int4_42445, %44947 : !torch.int, !torch.int -> !torch.int
    %int1792_42446 = torch.constant.int 1792
    %44949 = torch.prim.ListConstruct %44948, %int1792_42446 : (!torch.int, !torch.int) -> !torch.list<int>
    %44950 = torch.aten.view %44899, %44949 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44950, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44951 = torch.aten.mm %44950, %44912 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44951, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42447 = torch.constant.int 4
    %int4096_42448 = torch.constant.int 4096
    %44952 = torch.prim.ListConstruct %int4_42447, %44947, %int4096_42448 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44953 = torch.aten.view %44951, %44952 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42449 = torch.constant.int 1
    %44954 = torch.aten.size.int %44810, %int1_42449 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42450 = torch.constant.int 4
    %44955 = torch.aten.mul.int %int4_42450, %44954 : !torch.int, !torch.int -> !torch.int
    %int1792_42451 = torch.constant.int 1792
    %44956 = torch.prim.ListConstruct %44955, %int1792_42451 : (!torch.int, !torch.int) -> !torch.list<int>
    %44957 = torch.aten.view %44900, %44956 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44957, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44958 = torch.aten.mm %44957, %44914 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44958, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42452 = torch.constant.int 4
    %int4096_42453 = torch.constant.int 4096
    %44959 = torch.prim.ListConstruct %int4_42452, %44954, %int4096_42453 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44960 = torch.aten.view %44958, %44959 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42454 = torch.constant.int 1
    %44961 = torch.aten.size.int %44816, %int1_42454 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42455 = torch.constant.int 4
    %44962 = torch.aten.mul.int %int4_42455, %44961 : !torch.int, !torch.int -> !torch.int
    %int1792_42456 = torch.constant.int 1792
    %44963 = torch.prim.ListConstruct %44962, %int1792_42456 : (!torch.int, !torch.int) -> !torch.list<int>
    %44964 = torch.aten.view %44901, %44963 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44964, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44965 = torch.aten.mm %44964, %44916 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44965, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42457 = torch.constant.int 4
    %int4096_42458 = torch.constant.int 4096
    %44966 = torch.prim.ListConstruct %int4_42457, %44961, %int4096_42458 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44967 = torch.aten.view %44965, %44966 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42459 = torch.constant.int 1
    %44968 = torch.aten.size.int %44822, %int1_42459 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_42460 = torch.constant.int 4
    %44969 = torch.aten.mul.int %int4_42460, %44968 : !torch.int, !torch.int -> !torch.int
    %int1792_42461 = torch.constant.int 1792
    %44970 = torch.prim.ListConstruct %44969, %int1792_42461 : (!torch.int, !torch.int) -> !torch.list<int>
    %44971 = torch.aten.view %44902, %44970 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %44971, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %44972 = torch.aten.mm %44971, %44918 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %44972, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_42462 = torch.constant.int 4
    %int4096_42463 = torch.constant.int 4096
    %44973 = torch.prim.ListConstruct %int4_42462, %44968, %int4096_42463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %44974 = torch.aten.view %44972, %44973 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44975 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42464 = arith.constant 1 : index
    %dim_42465 = tensor.dim %44975, %c1_42464 : tensor<4x?x4096xf16>
    %44976 = flow.tensor.transfer %44975 : tensor<4x?x4096xf16>{%dim_42465} to #hal.device.promise<@__device_0>
    %44977 = torch_c.from_builtin_tensor %44976 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44978 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42466 = arith.constant 1 : index
    %dim_42467 = tensor.dim %44978, %c1_42466 : tensor<4x?x4096xf16>
    %44979 = flow.tensor.transfer %44978 : tensor<4x?x4096xf16>{%dim_42467} to #hal.device.promise<@__device_0>
    %44980 = torch_c.from_builtin_tensor %44979 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44981 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42468 = arith.constant 1 : index
    %dim_42469 = tensor.dim %44981, %c1_42468 : tensor<4x?x4096xf16>
    %44982 = flow.tensor.transfer %44981 : tensor<4x?x4096xf16>{%dim_42469} to #hal.device.promise<@__device_0>
    %44983 = torch_c.from_builtin_tensor %44982 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44984 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42470 = arith.constant 1 : index
    %dim_42471 = tensor.dim %44984, %c1_42470 : tensor<4x?x4096xf16>
    %44985 = flow.tensor.transfer %44984 : tensor<4x?x4096xf16>{%dim_42471} to #hal.device.promise<@__device_0>
    %44986 = torch_c.from_builtin_tensor %44985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44987 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42472 = arith.constant 1 : index
    %dim_42473 = tensor.dim %44987, %c1_42472 : tensor<4x?x4096xf16>
    %44988 = flow.tensor.transfer %44987 : tensor<4x?x4096xf16>{%dim_42473} to #hal.device.promise<@__device_0>
    %44989 = torch_c.from_builtin_tensor %44988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44990 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42474 = arith.constant 1 : index
    %dim_42475 = tensor.dim %44990, %c1_42474 : tensor<4x?x4096xf16>
    %44991 = flow.tensor.transfer %44990 : tensor<4x?x4096xf16>{%dim_42475} to #hal.device.promise<@__device_0>
    %44992 = torch_c.from_builtin_tensor %44991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %44993 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42476 = arith.constant 1 : index
    %dim_42477 = tensor.dim %44993, %c1_42476 : tensor<4x?x4096xf16>
    %44994 = flow.tensor.transfer %44993 : tensor<4x?x4096xf16>{%dim_42477} to #hal.device.promise<@__device_0>
    %44995 = torch_c.from_builtin_tensor %44994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42478 = torch.constant.int 1
    %44996 = torch.aten.add.Tensor %44925, %44977, %int1_42478 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42479 = torch.constant.int 1
    %44997 = torch.aten.add.Tensor %44996, %44980, %int1_42479 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42480 = torch.constant.int 1
    %44998 = torch.aten.add.Tensor %44997, %44983, %int1_42480 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42481 = torch.constant.int 1
    %44999 = torch.aten.add.Tensor %44998, %44986, %int1_42481 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %44999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42482 = torch.constant.int 1
    %45000 = torch.aten.add.Tensor %44999, %44989, %int1_42482 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42483 = torch.constant.int 1
    %45001 = torch.aten.add.Tensor %45000, %44992, %int1_42483 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42484 = torch.constant.int 1
    %45002 = torch.aten.add.Tensor %45001, %44995, %int1_42484 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45003 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42485 = arith.constant 1 : index
    %dim_42486 = tensor.dim %45003, %c1_42485 : tensor<4x?x4096xf16>
    %45004 = flow.tensor.transfer %45003 : tensor<4x?x4096xf16>{%dim_42486} to #hal.device.promise<@__device_1>
    %45005 = torch_c.from_builtin_tensor %45004 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45006 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42487 = arith.constant 1 : index
    %dim_42488 = tensor.dim %45006, %c1_42487 : tensor<4x?x4096xf16>
    %45007 = flow.tensor.transfer %45006 : tensor<4x?x4096xf16>{%dim_42488} to #hal.device.promise<@__device_1>
    %45008 = torch_c.from_builtin_tensor %45007 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45009 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42489 = arith.constant 1 : index
    %dim_42490 = tensor.dim %45009, %c1_42489 : tensor<4x?x4096xf16>
    %45010 = flow.tensor.transfer %45009 : tensor<4x?x4096xf16>{%dim_42490} to #hal.device.promise<@__device_1>
    %45011 = torch_c.from_builtin_tensor %45010 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45012 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42491 = arith.constant 1 : index
    %dim_42492 = tensor.dim %45012, %c1_42491 : tensor<4x?x4096xf16>
    %45013 = flow.tensor.transfer %45012 : tensor<4x?x4096xf16>{%dim_42492} to #hal.device.promise<@__device_1>
    %45014 = torch_c.from_builtin_tensor %45013 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45015 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42493 = arith.constant 1 : index
    %dim_42494 = tensor.dim %45015, %c1_42493 : tensor<4x?x4096xf16>
    %45016 = flow.tensor.transfer %45015 : tensor<4x?x4096xf16>{%dim_42494} to #hal.device.promise<@__device_1>
    %45017 = torch_c.from_builtin_tensor %45016 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45018 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42495 = arith.constant 1 : index
    %dim_42496 = tensor.dim %45018, %c1_42495 : tensor<4x?x4096xf16>
    %45019 = flow.tensor.transfer %45018 : tensor<4x?x4096xf16>{%dim_42496} to #hal.device.promise<@__device_1>
    %45020 = torch_c.from_builtin_tensor %45019 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45021 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42497 = arith.constant 1 : index
    %dim_42498 = tensor.dim %45021, %c1_42497 : tensor<4x?x4096xf16>
    %45022 = flow.tensor.transfer %45021 : tensor<4x?x4096xf16>{%dim_42498} to #hal.device.promise<@__device_1>
    %45023 = torch_c.from_builtin_tensor %45022 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42499 = torch.constant.int 1
    %45024 = torch.aten.add.Tensor %45005, %44932, %int1_42499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42500 = torch.constant.int 1
    %45025 = torch.aten.add.Tensor %45024, %45008, %int1_42500 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42501 = torch.constant.int 1
    %45026 = torch.aten.add.Tensor %45025, %45011, %int1_42501 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42502 = torch.constant.int 1
    %45027 = torch.aten.add.Tensor %45026, %45014, %int1_42502 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42503 = torch.constant.int 1
    %45028 = torch.aten.add.Tensor %45027, %45017, %int1_42503 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42504 = torch.constant.int 1
    %45029 = torch.aten.add.Tensor %45028, %45020, %int1_42504 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42505 = torch.constant.int 1
    %45030 = torch.aten.add.Tensor %45029, %45023, %int1_42505 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45031 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42506 = arith.constant 1 : index
    %dim_42507 = tensor.dim %45031, %c1_42506 : tensor<4x?x4096xf16>
    %45032 = flow.tensor.transfer %45031 : tensor<4x?x4096xf16>{%dim_42507} to #hal.device.promise<@__device_2>
    %45033 = torch_c.from_builtin_tensor %45032 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45034 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42508 = arith.constant 1 : index
    %dim_42509 = tensor.dim %45034, %c1_42508 : tensor<4x?x4096xf16>
    %45035 = flow.tensor.transfer %45034 : tensor<4x?x4096xf16>{%dim_42509} to #hal.device.promise<@__device_2>
    %45036 = torch_c.from_builtin_tensor %45035 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45037 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42510 = arith.constant 1 : index
    %dim_42511 = tensor.dim %45037, %c1_42510 : tensor<4x?x4096xf16>
    %45038 = flow.tensor.transfer %45037 : tensor<4x?x4096xf16>{%dim_42511} to #hal.device.promise<@__device_2>
    %45039 = torch_c.from_builtin_tensor %45038 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45040 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42512 = arith.constant 1 : index
    %dim_42513 = tensor.dim %45040, %c1_42512 : tensor<4x?x4096xf16>
    %45041 = flow.tensor.transfer %45040 : tensor<4x?x4096xf16>{%dim_42513} to #hal.device.promise<@__device_2>
    %45042 = torch_c.from_builtin_tensor %45041 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45043 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42514 = arith.constant 1 : index
    %dim_42515 = tensor.dim %45043, %c1_42514 : tensor<4x?x4096xf16>
    %45044 = flow.tensor.transfer %45043 : tensor<4x?x4096xf16>{%dim_42515} to #hal.device.promise<@__device_2>
    %45045 = torch_c.from_builtin_tensor %45044 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45046 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42516 = arith.constant 1 : index
    %dim_42517 = tensor.dim %45046, %c1_42516 : tensor<4x?x4096xf16>
    %45047 = flow.tensor.transfer %45046 : tensor<4x?x4096xf16>{%dim_42517} to #hal.device.promise<@__device_2>
    %45048 = torch_c.from_builtin_tensor %45047 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45049 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42518 = arith.constant 1 : index
    %dim_42519 = tensor.dim %45049, %c1_42518 : tensor<4x?x4096xf16>
    %45050 = flow.tensor.transfer %45049 : tensor<4x?x4096xf16>{%dim_42519} to #hal.device.promise<@__device_2>
    %45051 = torch_c.from_builtin_tensor %45050 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42520 = torch.constant.int 1
    %45052 = torch.aten.add.Tensor %45033, %45036, %int1_42520 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42521 = torch.constant.int 1
    %45053 = torch.aten.add.Tensor %45052, %44939, %int1_42521 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42522 = torch.constant.int 1
    %45054 = torch.aten.add.Tensor %45053, %45039, %int1_42522 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42523 = torch.constant.int 1
    %45055 = torch.aten.add.Tensor %45054, %45042, %int1_42523 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42524 = torch.constant.int 1
    %45056 = torch.aten.add.Tensor %45055, %45045, %int1_42524 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42525 = torch.constant.int 1
    %45057 = torch.aten.add.Tensor %45056, %45048, %int1_42525 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42526 = torch.constant.int 1
    %45058 = torch.aten.add.Tensor %45057, %45051, %int1_42526 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45059 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42527 = arith.constant 1 : index
    %dim_42528 = tensor.dim %45059, %c1_42527 : tensor<4x?x4096xf16>
    %45060 = flow.tensor.transfer %45059 : tensor<4x?x4096xf16>{%dim_42528} to #hal.device.promise<@__device_3>
    %45061 = torch_c.from_builtin_tensor %45060 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45062 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42529 = arith.constant 1 : index
    %dim_42530 = tensor.dim %45062, %c1_42529 : tensor<4x?x4096xf16>
    %45063 = flow.tensor.transfer %45062 : tensor<4x?x4096xf16>{%dim_42530} to #hal.device.promise<@__device_3>
    %45064 = torch_c.from_builtin_tensor %45063 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45065 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42531 = arith.constant 1 : index
    %dim_42532 = tensor.dim %45065, %c1_42531 : tensor<4x?x4096xf16>
    %45066 = flow.tensor.transfer %45065 : tensor<4x?x4096xf16>{%dim_42532} to #hal.device.promise<@__device_3>
    %45067 = torch_c.from_builtin_tensor %45066 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45068 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42533 = arith.constant 1 : index
    %dim_42534 = tensor.dim %45068, %c1_42533 : tensor<4x?x4096xf16>
    %45069 = flow.tensor.transfer %45068 : tensor<4x?x4096xf16>{%dim_42534} to #hal.device.promise<@__device_3>
    %45070 = torch_c.from_builtin_tensor %45069 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45071 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42535 = arith.constant 1 : index
    %dim_42536 = tensor.dim %45071, %c1_42535 : tensor<4x?x4096xf16>
    %45072 = flow.tensor.transfer %45071 : tensor<4x?x4096xf16>{%dim_42536} to #hal.device.promise<@__device_3>
    %45073 = torch_c.from_builtin_tensor %45072 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45074 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42537 = arith.constant 1 : index
    %dim_42538 = tensor.dim %45074, %c1_42537 : tensor<4x?x4096xf16>
    %45075 = flow.tensor.transfer %45074 : tensor<4x?x4096xf16>{%dim_42538} to #hal.device.promise<@__device_3>
    %45076 = torch_c.from_builtin_tensor %45075 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45077 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42539 = arith.constant 1 : index
    %dim_42540 = tensor.dim %45077, %c1_42539 : tensor<4x?x4096xf16>
    %45078 = flow.tensor.transfer %45077 : tensor<4x?x4096xf16>{%dim_42540} to #hal.device.promise<@__device_3>
    %45079 = torch_c.from_builtin_tensor %45078 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42541 = torch.constant.int 1
    %45080 = torch.aten.add.Tensor %45061, %45064, %int1_42541 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42542 = torch.constant.int 1
    %45081 = torch.aten.add.Tensor %45080, %45067, %int1_42542 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42543 = torch.constant.int 1
    %45082 = torch.aten.add.Tensor %45081, %44946, %int1_42543 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42544 = torch.constant.int 1
    %45083 = torch.aten.add.Tensor %45082, %45070, %int1_42544 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42545 = torch.constant.int 1
    %45084 = torch.aten.add.Tensor %45083, %45073, %int1_42545 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42546 = torch.constant.int 1
    %45085 = torch.aten.add.Tensor %45084, %45076, %int1_42546 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42547 = torch.constant.int 1
    %45086 = torch.aten.add.Tensor %45085, %45079, %int1_42547 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45087 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42548 = arith.constant 1 : index
    %dim_42549 = tensor.dim %45087, %c1_42548 : tensor<4x?x4096xf16>
    %45088 = flow.tensor.transfer %45087 : tensor<4x?x4096xf16>{%dim_42549} to #hal.device.promise<@__device_4>
    %45089 = torch_c.from_builtin_tensor %45088 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45090 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42550 = arith.constant 1 : index
    %dim_42551 = tensor.dim %45090, %c1_42550 : tensor<4x?x4096xf16>
    %45091 = flow.tensor.transfer %45090 : tensor<4x?x4096xf16>{%dim_42551} to #hal.device.promise<@__device_4>
    %45092 = torch_c.from_builtin_tensor %45091 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45093 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42552 = arith.constant 1 : index
    %dim_42553 = tensor.dim %45093, %c1_42552 : tensor<4x?x4096xf16>
    %45094 = flow.tensor.transfer %45093 : tensor<4x?x4096xf16>{%dim_42553} to #hal.device.promise<@__device_4>
    %45095 = torch_c.from_builtin_tensor %45094 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45096 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42554 = arith.constant 1 : index
    %dim_42555 = tensor.dim %45096, %c1_42554 : tensor<4x?x4096xf16>
    %45097 = flow.tensor.transfer %45096 : tensor<4x?x4096xf16>{%dim_42555} to #hal.device.promise<@__device_4>
    %45098 = torch_c.from_builtin_tensor %45097 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45099 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42556 = arith.constant 1 : index
    %dim_42557 = tensor.dim %45099, %c1_42556 : tensor<4x?x4096xf16>
    %45100 = flow.tensor.transfer %45099 : tensor<4x?x4096xf16>{%dim_42557} to #hal.device.promise<@__device_4>
    %45101 = torch_c.from_builtin_tensor %45100 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45102 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42558 = arith.constant 1 : index
    %dim_42559 = tensor.dim %45102, %c1_42558 : tensor<4x?x4096xf16>
    %45103 = flow.tensor.transfer %45102 : tensor<4x?x4096xf16>{%dim_42559} to #hal.device.promise<@__device_4>
    %45104 = torch_c.from_builtin_tensor %45103 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45105 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42560 = arith.constant 1 : index
    %dim_42561 = tensor.dim %45105, %c1_42560 : tensor<4x?x4096xf16>
    %45106 = flow.tensor.transfer %45105 : tensor<4x?x4096xf16>{%dim_42561} to #hal.device.promise<@__device_4>
    %45107 = torch_c.from_builtin_tensor %45106 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42562 = torch.constant.int 1
    %45108 = torch.aten.add.Tensor %45089, %45092, %int1_42562 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42563 = torch.constant.int 1
    %45109 = torch.aten.add.Tensor %45108, %45095, %int1_42563 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42564 = torch.constant.int 1
    %45110 = torch.aten.add.Tensor %45109, %45098, %int1_42564 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42565 = torch.constant.int 1
    %45111 = torch.aten.add.Tensor %45110, %44953, %int1_42565 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42566 = torch.constant.int 1
    %45112 = torch.aten.add.Tensor %45111, %45101, %int1_42566 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42567 = torch.constant.int 1
    %45113 = torch.aten.add.Tensor %45112, %45104, %int1_42567 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42568 = torch.constant.int 1
    %45114 = torch.aten.add.Tensor %45113, %45107, %int1_42568 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45115 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42569 = arith.constant 1 : index
    %dim_42570 = tensor.dim %45115, %c1_42569 : tensor<4x?x4096xf16>
    %45116 = flow.tensor.transfer %45115 : tensor<4x?x4096xf16>{%dim_42570} to #hal.device.promise<@__device_5>
    %45117 = torch_c.from_builtin_tensor %45116 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45118 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42571 = arith.constant 1 : index
    %dim_42572 = tensor.dim %45118, %c1_42571 : tensor<4x?x4096xf16>
    %45119 = flow.tensor.transfer %45118 : tensor<4x?x4096xf16>{%dim_42572} to #hal.device.promise<@__device_5>
    %45120 = torch_c.from_builtin_tensor %45119 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45121 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42573 = arith.constant 1 : index
    %dim_42574 = tensor.dim %45121, %c1_42573 : tensor<4x?x4096xf16>
    %45122 = flow.tensor.transfer %45121 : tensor<4x?x4096xf16>{%dim_42574} to #hal.device.promise<@__device_5>
    %45123 = torch_c.from_builtin_tensor %45122 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45124 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42575 = arith.constant 1 : index
    %dim_42576 = tensor.dim %45124, %c1_42575 : tensor<4x?x4096xf16>
    %45125 = flow.tensor.transfer %45124 : tensor<4x?x4096xf16>{%dim_42576} to #hal.device.promise<@__device_5>
    %45126 = torch_c.from_builtin_tensor %45125 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45127 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42577 = arith.constant 1 : index
    %dim_42578 = tensor.dim %45127, %c1_42577 : tensor<4x?x4096xf16>
    %45128 = flow.tensor.transfer %45127 : tensor<4x?x4096xf16>{%dim_42578} to #hal.device.promise<@__device_5>
    %45129 = torch_c.from_builtin_tensor %45128 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45130 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42579 = arith.constant 1 : index
    %dim_42580 = tensor.dim %45130, %c1_42579 : tensor<4x?x4096xf16>
    %45131 = flow.tensor.transfer %45130 : tensor<4x?x4096xf16>{%dim_42580} to #hal.device.promise<@__device_5>
    %45132 = torch_c.from_builtin_tensor %45131 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45133 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42581 = arith.constant 1 : index
    %dim_42582 = tensor.dim %45133, %c1_42581 : tensor<4x?x4096xf16>
    %45134 = flow.tensor.transfer %45133 : tensor<4x?x4096xf16>{%dim_42582} to #hal.device.promise<@__device_5>
    %45135 = torch_c.from_builtin_tensor %45134 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42583 = torch.constant.int 1
    %45136 = torch.aten.add.Tensor %45117, %45120, %int1_42583 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42584 = torch.constant.int 1
    %45137 = torch.aten.add.Tensor %45136, %45123, %int1_42584 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42585 = torch.constant.int 1
    %45138 = torch.aten.add.Tensor %45137, %45126, %int1_42585 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42586 = torch.constant.int 1
    %45139 = torch.aten.add.Tensor %45138, %45129, %int1_42586 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42587 = torch.constant.int 1
    %45140 = torch.aten.add.Tensor %45139, %44960, %int1_42587 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42588 = torch.constant.int 1
    %45141 = torch.aten.add.Tensor %45140, %45132, %int1_42588 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42589 = torch.constant.int 1
    %45142 = torch.aten.add.Tensor %45141, %45135, %int1_42589 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45143 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42590 = arith.constant 1 : index
    %dim_42591 = tensor.dim %45143, %c1_42590 : tensor<4x?x4096xf16>
    %45144 = flow.tensor.transfer %45143 : tensor<4x?x4096xf16>{%dim_42591} to #hal.device.promise<@__device_6>
    %45145 = torch_c.from_builtin_tensor %45144 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45146 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42592 = arith.constant 1 : index
    %dim_42593 = tensor.dim %45146, %c1_42592 : tensor<4x?x4096xf16>
    %45147 = flow.tensor.transfer %45146 : tensor<4x?x4096xf16>{%dim_42593} to #hal.device.promise<@__device_6>
    %45148 = torch_c.from_builtin_tensor %45147 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45149 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42594 = arith.constant 1 : index
    %dim_42595 = tensor.dim %45149, %c1_42594 : tensor<4x?x4096xf16>
    %45150 = flow.tensor.transfer %45149 : tensor<4x?x4096xf16>{%dim_42595} to #hal.device.promise<@__device_6>
    %45151 = torch_c.from_builtin_tensor %45150 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45152 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42596 = arith.constant 1 : index
    %dim_42597 = tensor.dim %45152, %c1_42596 : tensor<4x?x4096xf16>
    %45153 = flow.tensor.transfer %45152 : tensor<4x?x4096xf16>{%dim_42597} to #hal.device.promise<@__device_6>
    %45154 = torch_c.from_builtin_tensor %45153 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45155 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42598 = arith.constant 1 : index
    %dim_42599 = tensor.dim %45155, %c1_42598 : tensor<4x?x4096xf16>
    %45156 = flow.tensor.transfer %45155 : tensor<4x?x4096xf16>{%dim_42599} to #hal.device.promise<@__device_6>
    %45157 = torch_c.from_builtin_tensor %45156 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45158 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42600 = arith.constant 1 : index
    %dim_42601 = tensor.dim %45158, %c1_42600 : tensor<4x?x4096xf16>
    %45159 = flow.tensor.transfer %45158 : tensor<4x?x4096xf16>{%dim_42601} to #hal.device.promise<@__device_6>
    %45160 = torch_c.from_builtin_tensor %45159 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45161 = torch_c.to_builtin_tensor %44974 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42602 = arith.constant 1 : index
    %dim_42603 = tensor.dim %45161, %c1_42602 : tensor<4x?x4096xf16>
    %45162 = flow.tensor.transfer %45161 : tensor<4x?x4096xf16>{%dim_42603} to #hal.device.promise<@__device_6>
    %45163 = torch_c.from_builtin_tensor %45162 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42604 = torch.constant.int 1
    %45164 = torch.aten.add.Tensor %45145, %45148, %int1_42604 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42605 = torch.constant.int 1
    %45165 = torch.aten.add.Tensor %45164, %45151, %int1_42605 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42606 = torch.constant.int 1
    %45166 = torch.aten.add.Tensor %45165, %45154, %int1_42606 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42607 = torch.constant.int 1
    %45167 = torch.aten.add.Tensor %45166, %45157, %int1_42607 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42608 = torch.constant.int 1
    %45168 = torch.aten.add.Tensor %45167, %45160, %int1_42608 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42609 = torch.constant.int 1
    %45169 = torch.aten.add.Tensor %45168, %44967, %int1_42609 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42610 = torch.constant.int 1
    %45170 = torch.aten.add.Tensor %45169, %45163, %int1_42610 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45171 = torch_c.to_builtin_tensor %44925 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42611 = arith.constant 1 : index
    %dim_42612 = tensor.dim %45171, %c1_42611 : tensor<4x?x4096xf16>
    %45172 = flow.tensor.transfer %45171 : tensor<4x?x4096xf16>{%dim_42612} to #hal.device.promise<@__device_7>
    %45173 = torch_c.from_builtin_tensor %45172 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45174 = torch_c.to_builtin_tensor %44932 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42613 = arith.constant 1 : index
    %dim_42614 = tensor.dim %45174, %c1_42613 : tensor<4x?x4096xf16>
    %45175 = flow.tensor.transfer %45174 : tensor<4x?x4096xf16>{%dim_42614} to #hal.device.promise<@__device_7>
    %45176 = torch_c.from_builtin_tensor %45175 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45177 = torch_c.to_builtin_tensor %44939 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42615 = arith.constant 1 : index
    %dim_42616 = tensor.dim %45177, %c1_42615 : tensor<4x?x4096xf16>
    %45178 = flow.tensor.transfer %45177 : tensor<4x?x4096xf16>{%dim_42616} to #hal.device.promise<@__device_7>
    %45179 = torch_c.from_builtin_tensor %45178 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45180 = torch_c.to_builtin_tensor %44946 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42617 = arith.constant 1 : index
    %dim_42618 = tensor.dim %45180, %c1_42617 : tensor<4x?x4096xf16>
    %45181 = flow.tensor.transfer %45180 : tensor<4x?x4096xf16>{%dim_42618} to #hal.device.promise<@__device_7>
    %45182 = torch_c.from_builtin_tensor %45181 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45183 = torch_c.to_builtin_tensor %44953 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42619 = arith.constant 1 : index
    %dim_42620 = tensor.dim %45183, %c1_42619 : tensor<4x?x4096xf16>
    %45184 = flow.tensor.transfer %45183 : tensor<4x?x4096xf16>{%dim_42620} to #hal.device.promise<@__device_7>
    %45185 = torch_c.from_builtin_tensor %45184 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45186 = torch_c.to_builtin_tensor %44960 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42621 = arith.constant 1 : index
    %dim_42622 = tensor.dim %45186, %c1_42621 : tensor<4x?x4096xf16>
    %45187 = flow.tensor.transfer %45186 : tensor<4x?x4096xf16>{%dim_42622} to #hal.device.promise<@__device_7>
    %45188 = torch_c.from_builtin_tensor %45187 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %45189 = torch_c.to_builtin_tensor %44967 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_42623 = arith.constant 1 : index
    %dim_42624 = tensor.dim %45189, %c1_42623 : tensor<4x?x4096xf16>
    %45190 = flow.tensor.transfer %45189 : tensor<4x?x4096xf16>{%dim_42624} to #hal.device.promise<@__device_7>
    %45191 = torch_c.from_builtin_tensor %45190 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42625 = torch.constant.int 1
    %45192 = torch.aten.add.Tensor %45173, %45176, %int1_42625 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42626 = torch.constant.int 1
    %45193 = torch.aten.add.Tensor %45192, %45179, %int1_42626 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42627 = torch.constant.int 1
    %45194 = torch.aten.add.Tensor %45193, %45182, %int1_42627 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42628 = torch.constant.int 1
    %45195 = torch.aten.add.Tensor %45194, %45185, %int1_42628 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42629 = torch.constant.int 1
    %45196 = torch.aten.add.Tensor %45195, %45188, %int1_42629 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42630 = torch.constant.int 1
    %45197 = torch.aten.add.Tensor %45196, %45191, %int1_42630 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42631 = torch.constant.int 1
    %45198 = torch.aten.add.Tensor %45197, %44974, %int1_42631 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42632 = torch.constant.int 1
    %45199 = torch.aten.add.Tensor %44679, %45002, %int1_42632 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42633 = torch.constant.int 1
    %45200 = torch.aten.add.Tensor %44680, %45030, %int1_42633 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42634 = torch.constant.int 1
    %45201 = torch.aten.add.Tensor %44681, %45058, %int1_42634 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42635 = torch.constant.int 1
    %45202 = torch.aten.add.Tensor %44682, %45086, %int1_42635 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42636 = torch.constant.int 1
    %45203 = torch.aten.add.Tensor %44683, %45114, %int1_42636 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42637 = torch.constant.int 1
    %45204 = torch.aten.add.Tensor %44684, %45142, %int1_42637 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42638 = torch.constant.int 1
    %45205 = torch.aten.add.Tensor %44685, %45170, %int1_42638 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42639 = torch.constant.int 1
    %45206 = torch.aten.add.Tensor %44686, %45198, %int1_42639 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_42640 = torch.constant.int 6
    %45207 = torch.prims.convert_element_type %45199, %int6_42640 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42641 = torch.constant.int 6
    %45208 = torch.prims.convert_element_type %45200, %int6_42641 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42642 = torch.constant.int 6
    %45209 = torch.prims.convert_element_type %45201, %int6_42642 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42643 = torch.constant.int 6
    %45210 = torch.prims.convert_element_type %45202, %int6_42643 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42644 = torch.constant.int 6
    %45211 = torch.prims.convert_element_type %45203, %int6_42644 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42645 = torch.constant.int 6
    %45212 = torch.prims.convert_element_type %45204, %int6_42645 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42646 = torch.constant.int 6
    %45213 = torch.prims.convert_element_type %45205, %int6_42646 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_42647 = torch.constant.int 6
    %45214 = torch.prims.convert_element_type %45206, %int6_42647 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42648 = torch.constant.int 2
    %45215 = torch.aten.pow.Tensor_Scalar %45207, %int2_42648 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42649 = torch.constant.int 2
    %45216 = torch.aten.pow.Tensor_Scalar %45208, %int2_42649 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42650 = torch.constant.int 2
    %45217 = torch.aten.pow.Tensor_Scalar %45209, %int2_42650 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42651 = torch.constant.int 2
    %45218 = torch.aten.pow.Tensor_Scalar %45210, %int2_42651 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42652 = torch.constant.int 2
    %45219 = torch.aten.pow.Tensor_Scalar %45211, %int2_42652 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42653 = torch.constant.int 2
    %45220 = torch.aten.pow.Tensor_Scalar %45212, %int2_42653 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42654 = torch.constant.int 2
    %45221 = torch.aten.pow.Tensor_Scalar %45213, %int2_42654 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_42655 = torch.constant.int 2
    %45222 = torch.aten.pow.Tensor_Scalar %45214, %int2_42655 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_42656 = torch.constant.int -1
    %45223 = torch.prim.ListConstruct %int-1_42656 : (!torch.int) -> !torch.list<int>
    %true_42657 = torch.constant.bool true
    %none_42658 = torch.constant.none
    %45224 = torch.aten.mean.dim %45215, %45223, %true_42657, %none_42658 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42659 = torch.constant.int -1
    %45225 = torch.prim.ListConstruct %int-1_42659 : (!torch.int) -> !torch.list<int>
    %true_42660 = torch.constant.bool true
    %none_42661 = torch.constant.none
    %45226 = torch.aten.mean.dim %45216, %45225, %true_42660, %none_42661 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42662 = torch.constant.int -1
    %45227 = torch.prim.ListConstruct %int-1_42662 : (!torch.int) -> !torch.list<int>
    %true_42663 = torch.constant.bool true
    %none_42664 = torch.constant.none
    %45228 = torch.aten.mean.dim %45217, %45227, %true_42663, %none_42664 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42665 = torch.constant.int -1
    %45229 = torch.prim.ListConstruct %int-1_42665 : (!torch.int) -> !torch.list<int>
    %true_42666 = torch.constant.bool true
    %none_42667 = torch.constant.none
    %45230 = torch.aten.mean.dim %45218, %45229, %true_42666, %none_42667 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42668 = torch.constant.int -1
    %45231 = torch.prim.ListConstruct %int-1_42668 : (!torch.int) -> !torch.list<int>
    %true_42669 = torch.constant.bool true
    %none_42670 = torch.constant.none
    %45232 = torch.aten.mean.dim %45219, %45231, %true_42669, %none_42670 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42671 = torch.constant.int -1
    %45233 = torch.prim.ListConstruct %int-1_42671 : (!torch.int) -> !torch.list<int>
    %true_42672 = torch.constant.bool true
    %none_42673 = torch.constant.none
    %45234 = torch.aten.mean.dim %45220, %45233, %true_42672, %none_42673 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42674 = torch.constant.int -1
    %45235 = torch.prim.ListConstruct %int-1_42674 : (!torch.int) -> !torch.list<int>
    %true_42675 = torch.constant.bool true
    %none_42676 = torch.constant.none
    %45236 = torch.aten.mean.dim %45221, %45235, %true_42675, %none_42676 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_42677 = torch.constant.int -1
    %45237 = torch.prim.ListConstruct %int-1_42677 : (!torch.int) -> !torch.list<int>
    %true_42678 = torch.constant.bool true
    %none_42679 = torch.constant.none
    %45238 = torch.aten.mean.dim %45222, %45237, %true_42678, %none_42679 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42680 = torch.constant.float 9.9999997473787516E-6
    %int1_42681 = torch.constant.int 1
    %45239 = torch.aten.add.Scalar %45224, %float9.999990e-06_42680, %int1_42681 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42682 = torch.constant.float 9.9999997473787516E-6
    %int1_42683 = torch.constant.int 1
    %45240 = torch.aten.add.Scalar %45226, %float9.999990e-06_42682, %int1_42683 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42684 = torch.constant.float 9.9999997473787516E-6
    %int1_42685 = torch.constant.int 1
    %45241 = torch.aten.add.Scalar %45228, %float9.999990e-06_42684, %int1_42685 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42686 = torch.constant.float 9.9999997473787516E-6
    %int1_42687 = torch.constant.int 1
    %45242 = torch.aten.add.Scalar %45230, %float9.999990e-06_42686, %int1_42687 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42688 = torch.constant.float 9.9999997473787516E-6
    %int1_42689 = torch.constant.int 1
    %45243 = torch.aten.add.Scalar %45232, %float9.999990e-06_42688, %int1_42689 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42690 = torch.constant.float 9.9999997473787516E-6
    %int1_42691 = torch.constant.int 1
    %45244 = torch.aten.add.Scalar %45234, %float9.999990e-06_42690, %int1_42691 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42692 = torch.constant.float 9.9999997473787516E-6
    %int1_42693 = torch.constant.int 1
    %45245 = torch.aten.add.Scalar %45236, %float9.999990e-06_42692, %int1_42693 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_42694 = torch.constant.float 9.9999997473787516E-6
    %int1_42695 = torch.constant.int 1
    %45246 = torch.aten.add.Scalar %45238, %float9.999990e-06_42694, %int1_42695 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45247 = torch.aten.rsqrt %45239 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45248 = torch.aten.rsqrt %45240 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45249 = torch.aten.rsqrt %45241 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45250 = torch.aten.rsqrt %45242 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45251 = torch.aten.rsqrt %45243 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45252 = torch.aten.rsqrt %45244 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45253 = torch.aten.rsqrt %45245 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45254 = torch.aten.rsqrt %45246 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %45254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %45255 = torch.aten.mul.Tensor %45207, %45247 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45256 = torch.aten.mul.Tensor %45208, %45248 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45257 = torch.aten.mul.Tensor %45209, %45249 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45258 = torch.aten.mul.Tensor %45210, %45250 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45259 = torch.aten.mul.Tensor %45211, %45251 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45260 = torch.aten.mul.Tensor %45212, %45252 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45261 = torch.aten.mul.Tensor %45213, %45253 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45262 = torch.aten.mul.Tensor %45214, %45254 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45263 = torch.aten.mul.Tensor %1664, %45255 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45264 = torch.aten.mul.Tensor %1665, %45256 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45265 = torch.aten.mul.Tensor %1666, %45257 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45266 = torch.aten.mul.Tensor %1667, %45258 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45267 = torch.aten.mul.Tensor %1668, %45259 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45268 = torch.aten.mul.Tensor %1669, %45260 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45269 = torch.aten.mul.Tensor %1670, %45261 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %45270 = torch.aten.mul.Tensor %1671, %45262 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %45270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_42696 = torch.constant.int 5
    %45271 = torch.prims.convert_element_type %45263, %int5_42696 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42697 = torch.constant.int 5
    %45272 = torch.prims.convert_element_type %45264, %int5_42697 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42698 = torch.constant.int 5
    %45273 = torch.prims.convert_element_type %45265, %int5_42698 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42699 = torch.constant.int 5
    %45274 = torch.prims.convert_element_type %45266, %int5_42699 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42700 = torch.constant.int 5
    %45275 = torch.prims.convert_element_type %45267, %int5_42700 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42701 = torch.constant.int 5
    %45276 = torch.prims.convert_element_type %45268, %int5_42701 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42702 = torch.constant.int 5
    %45277 = torch.prims.convert_element_type %45269, %int5_42702 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_42703 = torch.constant.int 5
    %45278 = torch.prims.convert_element_type %45270, %int5_42703 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %45278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_42704 = torch.constant.int 1
    %int0_42705 = torch.constant.int 0
    %45279 = torch.prim.ListConstruct %int1_42704, %int0_42705 : (!torch.int, !torch.int) -> !torch.list<int>
    %45280 = torch.aten.permute %1672, %45279 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42706 = torch.constant.int 1
    %int0_42707 = torch.constant.int 0
    %45281 = torch.prim.ListConstruct %int1_42706, %int0_42707 : (!torch.int, !torch.int) -> !torch.list<int>
    %45282 = torch.aten.permute %1673, %45281 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42708 = torch.constant.int 1
    %int0_42709 = torch.constant.int 0
    %45283 = torch.prim.ListConstruct %int1_42708, %int0_42709 : (!torch.int, !torch.int) -> !torch.list<int>
    %45284 = torch.aten.permute %1674, %45283 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42710 = torch.constant.int 1
    %int0_42711 = torch.constant.int 0
    %45285 = torch.prim.ListConstruct %int1_42710, %int0_42711 : (!torch.int, !torch.int) -> !torch.list<int>
    %45286 = torch.aten.permute %1675, %45285 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42712 = torch.constant.int 1
    %int0_42713 = torch.constant.int 0
    %45287 = torch.prim.ListConstruct %int1_42712, %int0_42713 : (!torch.int, !torch.int) -> !torch.list<int>
    %45288 = torch.aten.permute %1676, %45287 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42714 = torch.constant.int 1
    %int0_42715 = torch.constant.int 0
    %45289 = torch.prim.ListConstruct %int1_42714, %int0_42715 : (!torch.int, !torch.int) -> !torch.list<int>
    %45290 = torch.aten.permute %1677, %45289 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42716 = torch.constant.int 1
    %int0_42717 = torch.constant.int 0
    %45291 = torch.prim.ListConstruct %int1_42716, %int0_42717 : (!torch.int, !torch.int) -> !torch.list<int>
    %45292 = torch.aten.permute %1678, %45291 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_42718 = torch.constant.int 1
    %int0_42719 = torch.constant.int 0
    %45293 = torch.prim.ListConstruct %int1_42718, %int0_42719 : (!torch.int, !torch.int) -> !torch.list<int>
    %45294 = torch.aten.permute %1679, %45293 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_42720 = torch.constant.int 4
    %45295 = torch.aten.mul.int %int4_42720, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42721 = torch.constant.int 4096
    %45296 = torch.prim.ListConstruct %45295, %int4096_42721 : (!torch.int, !torch.int) -> !torch.list<int>
    %45297 = torch.aten.view %45271, %45296 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45297, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45298 = torch.aten.mm %45297, %45280 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45298, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42722 = torch.constant.int 4
    %int512_42723 = torch.constant.int 512
    %45299 = torch.prim.ListConstruct %int4_42722, %2482, %int512_42723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45300 = torch.aten.view %45298, %45299 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42724 = torch.constant.int 4
    %45301 = torch.aten.mul.int %int4_42724, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42725 = torch.constant.int 4096
    %45302 = torch.prim.ListConstruct %45301, %int4096_42725 : (!torch.int, !torch.int) -> !torch.list<int>
    %45303 = torch.aten.view %45272, %45302 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45303, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45304 = torch.aten.mm %45303, %45282 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45304, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42726 = torch.constant.int 4
    %int512_42727 = torch.constant.int 512
    %45305 = torch.prim.ListConstruct %int4_42726, %2482, %int512_42727 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45306 = torch.aten.view %45304, %45305 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42728 = torch.constant.int 4
    %45307 = torch.aten.mul.int %int4_42728, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42729 = torch.constant.int 4096
    %45308 = torch.prim.ListConstruct %45307, %int4096_42729 : (!torch.int, !torch.int) -> !torch.list<int>
    %45309 = torch.aten.view %45273, %45308 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45309, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45310 = torch.aten.mm %45309, %45284 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45310, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42730 = torch.constant.int 4
    %int512_42731 = torch.constant.int 512
    %45311 = torch.prim.ListConstruct %int4_42730, %2482, %int512_42731 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45312 = torch.aten.view %45310, %45311 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42732 = torch.constant.int 4
    %45313 = torch.aten.mul.int %int4_42732, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42733 = torch.constant.int 4096
    %45314 = torch.prim.ListConstruct %45313, %int4096_42733 : (!torch.int, !torch.int) -> !torch.list<int>
    %45315 = torch.aten.view %45274, %45314 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45315, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45316 = torch.aten.mm %45315, %45286 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45316, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42734 = torch.constant.int 4
    %int512_42735 = torch.constant.int 512
    %45317 = torch.prim.ListConstruct %int4_42734, %2482, %int512_42735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45318 = torch.aten.view %45316, %45317 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42736 = torch.constant.int 4
    %45319 = torch.aten.mul.int %int4_42736, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42737 = torch.constant.int 4096
    %45320 = torch.prim.ListConstruct %45319, %int4096_42737 : (!torch.int, !torch.int) -> !torch.list<int>
    %45321 = torch.aten.view %45275, %45320 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45321, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45322 = torch.aten.mm %45321, %45288 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45322, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42738 = torch.constant.int 4
    %int512_42739 = torch.constant.int 512
    %45323 = torch.prim.ListConstruct %int4_42738, %2482, %int512_42739 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45324 = torch.aten.view %45322, %45323 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42740 = torch.constant.int 4
    %45325 = torch.aten.mul.int %int4_42740, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42741 = torch.constant.int 4096
    %45326 = torch.prim.ListConstruct %45325, %int4096_42741 : (!torch.int, !torch.int) -> !torch.list<int>
    %45327 = torch.aten.view %45276, %45326 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45327, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45328 = torch.aten.mm %45327, %45290 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45328, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42742 = torch.constant.int 4
    %int512_42743 = torch.constant.int 512
    %45329 = torch.prim.ListConstruct %int4_42742, %2482, %int512_42743 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45330 = torch.aten.view %45328, %45329 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42744 = torch.constant.int 4
    %45331 = torch.aten.mul.int %int4_42744, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42745 = torch.constant.int 4096
    %45332 = torch.prim.ListConstruct %45331, %int4096_42745 : (!torch.int, !torch.int) -> !torch.list<int>
    %45333 = torch.aten.view %45277, %45332 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45333, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45334 = torch.aten.mm %45333, %45292 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45334, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42746 = torch.constant.int 4
    %int512_42747 = torch.constant.int 512
    %45335 = torch.prim.ListConstruct %int4_42746, %2482, %int512_42747 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45336 = torch.aten.view %45334, %45335 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_42748 = torch.constant.int 4
    %45337 = torch.aten.mul.int %int4_42748, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42749 = torch.constant.int 4096
    %45338 = torch.prim.ListConstruct %45337, %int4096_42749 : (!torch.int, !torch.int) -> !torch.list<int>
    %45339 = torch.aten.view %45278, %45338 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45339, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45340 = torch.aten.mm %45339, %45294 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %45340, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_42750 = torch.constant.int 4
    %int512_42751 = torch.constant.int 512
    %45341 = torch.prim.ListConstruct %int4_42750, %2482, %int512_42751 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45342 = torch.aten.view %45340, %45341 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %45342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_42752 = torch.constant.int 1
    %int0_42753 = torch.constant.int 0
    %45343 = torch.prim.ListConstruct %int1_42752, %int0_42753 : (!torch.int, !torch.int) -> !torch.list<int>
    %45344 = torch.aten.permute %1680, %45343 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42754 = torch.constant.int 1
    %int0_42755 = torch.constant.int 0
    %45345 = torch.prim.ListConstruct %int1_42754, %int0_42755 : (!torch.int, !torch.int) -> !torch.list<int>
    %45346 = torch.aten.permute %1681, %45345 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42756 = torch.constant.int 1
    %int0_42757 = torch.constant.int 0
    %45347 = torch.prim.ListConstruct %int1_42756, %int0_42757 : (!torch.int, !torch.int) -> !torch.list<int>
    %45348 = torch.aten.permute %1682, %45347 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42758 = torch.constant.int 1
    %int0_42759 = torch.constant.int 0
    %45349 = torch.prim.ListConstruct %int1_42758, %int0_42759 : (!torch.int, !torch.int) -> !torch.list<int>
    %45350 = torch.aten.permute %1683, %45349 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42760 = torch.constant.int 1
    %int0_42761 = torch.constant.int 0
    %45351 = torch.prim.ListConstruct %int1_42760, %int0_42761 : (!torch.int, !torch.int) -> !torch.list<int>
    %45352 = torch.aten.permute %1684, %45351 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42762 = torch.constant.int 1
    %int0_42763 = torch.constant.int 0
    %45353 = torch.prim.ListConstruct %int1_42762, %int0_42763 : (!torch.int, !torch.int) -> !torch.list<int>
    %45354 = torch.aten.permute %1685, %45353 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42764 = torch.constant.int 1
    %int0_42765 = torch.constant.int 0
    %45355 = torch.prim.ListConstruct %int1_42764, %int0_42765 : (!torch.int, !torch.int) -> !torch.list<int>
    %45356 = torch.aten.permute %1686, %45355 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42766 = torch.constant.int 1
    %int0_42767 = torch.constant.int 0
    %45357 = torch.prim.ListConstruct %int1_42766, %int0_42767 : (!torch.int, !torch.int) -> !torch.list<int>
    %45358 = torch.aten.permute %1687, %45357 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_42768 = torch.constant.int 4
    %45359 = torch.aten.mul.int %int4_42768, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42769 = torch.constant.int 4096
    %45360 = torch.prim.ListConstruct %45359, %int4096_42769 : (!torch.int, !torch.int) -> !torch.list<int>
    %45361 = torch.aten.view %45271, %45360 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45361, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45362 = torch.aten.mm %45361, %45344 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45362, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42770 = torch.constant.int 4
    %int128_42771 = torch.constant.int 128
    %45363 = torch.prim.ListConstruct %int4_42770, %2482, %int128_42771 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45364 = torch.aten.view %45362, %45363 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42772 = torch.constant.int 4
    %45365 = torch.aten.mul.int %int4_42772, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42773 = torch.constant.int 4096
    %45366 = torch.prim.ListConstruct %45365, %int4096_42773 : (!torch.int, !torch.int) -> !torch.list<int>
    %45367 = torch.aten.view %45272, %45366 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45367, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45368 = torch.aten.mm %45367, %45346 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45368, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42774 = torch.constant.int 4
    %int128_42775 = torch.constant.int 128
    %45369 = torch.prim.ListConstruct %int4_42774, %2482, %int128_42775 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45370 = torch.aten.view %45368, %45369 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42776 = torch.constant.int 4
    %45371 = torch.aten.mul.int %int4_42776, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42777 = torch.constant.int 4096
    %45372 = torch.prim.ListConstruct %45371, %int4096_42777 : (!torch.int, !torch.int) -> !torch.list<int>
    %45373 = torch.aten.view %45273, %45372 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45373, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45374 = torch.aten.mm %45373, %45348 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45374, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42778 = torch.constant.int 4
    %int128_42779 = torch.constant.int 128
    %45375 = torch.prim.ListConstruct %int4_42778, %2482, %int128_42779 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45376 = torch.aten.view %45374, %45375 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42780 = torch.constant.int 4
    %45377 = torch.aten.mul.int %int4_42780, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42781 = torch.constant.int 4096
    %45378 = torch.prim.ListConstruct %45377, %int4096_42781 : (!torch.int, !torch.int) -> !torch.list<int>
    %45379 = torch.aten.view %45274, %45378 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45379, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45380 = torch.aten.mm %45379, %45350 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45380, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42782 = torch.constant.int 4
    %int128_42783 = torch.constant.int 128
    %45381 = torch.prim.ListConstruct %int4_42782, %2482, %int128_42783 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45382 = torch.aten.view %45380, %45381 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42784 = torch.constant.int 4
    %45383 = torch.aten.mul.int %int4_42784, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42785 = torch.constant.int 4096
    %45384 = torch.prim.ListConstruct %45383, %int4096_42785 : (!torch.int, !torch.int) -> !torch.list<int>
    %45385 = torch.aten.view %45275, %45384 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45385, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45386 = torch.aten.mm %45385, %45352 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45386, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42786 = torch.constant.int 4
    %int128_42787 = torch.constant.int 128
    %45387 = torch.prim.ListConstruct %int4_42786, %2482, %int128_42787 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45388 = torch.aten.view %45386, %45387 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42788 = torch.constant.int 4
    %45389 = torch.aten.mul.int %int4_42788, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42789 = torch.constant.int 4096
    %45390 = torch.prim.ListConstruct %45389, %int4096_42789 : (!torch.int, !torch.int) -> !torch.list<int>
    %45391 = torch.aten.view %45276, %45390 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45391, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45392 = torch.aten.mm %45391, %45354 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45392, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42790 = torch.constant.int 4
    %int128_42791 = torch.constant.int 128
    %45393 = torch.prim.ListConstruct %int4_42790, %2482, %int128_42791 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45394 = torch.aten.view %45392, %45393 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42792 = torch.constant.int 4
    %45395 = torch.aten.mul.int %int4_42792, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42793 = torch.constant.int 4096
    %45396 = torch.prim.ListConstruct %45395, %int4096_42793 : (!torch.int, !torch.int) -> !torch.list<int>
    %45397 = torch.aten.view %45277, %45396 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45397, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45398 = torch.aten.mm %45397, %45356 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45398, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42794 = torch.constant.int 4
    %int128_42795 = torch.constant.int 128
    %45399 = torch.prim.ListConstruct %int4_42794, %2482, %int128_42795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45400 = torch.aten.view %45398, %45399 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42796 = torch.constant.int 4
    %45401 = torch.aten.mul.int %int4_42796, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42797 = torch.constant.int 4096
    %45402 = torch.prim.ListConstruct %45401, %int4096_42797 : (!torch.int, !torch.int) -> !torch.list<int>
    %45403 = torch.aten.view %45278, %45402 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45403, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45404 = torch.aten.mm %45403, %45358 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45404, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42798 = torch.constant.int 4
    %int128_42799 = torch.constant.int 128
    %45405 = torch.prim.ListConstruct %int4_42798, %2482, %int128_42799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45406 = torch.aten.view %45404, %45405 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_42800 = torch.constant.int 1
    %int0_42801 = torch.constant.int 0
    %45407 = torch.prim.ListConstruct %int1_42800, %int0_42801 : (!torch.int, !torch.int) -> !torch.list<int>
    %45408 = torch.aten.permute %1688, %45407 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42802 = torch.constant.int 1
    %int0_42803 = torch.constant.int 0
    %45409 = torch.prim.ListConstruct %int1_42802, %int0_42803 : (!torch.int, !torch.int) -> !torch.list<int>
    %45410 = torch.aten.permute %1689, %45409 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42804 = torch.constant.int 1
    %int0_42805 = torch.constant.int 0
    %45411 = torch.prim.ListConstruct %int1_42804, %int0_42805 : (!torch.int, !torch.int) -> !torch.list<int>
    %45412 = torch.aten.permute %1690, %45411 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42806 = torch.constant.int 1
    %int0_42807 = torch.constant.int 0
    %45413 = torch.prim.ListConstruct %int1_42806, %int0_42807 : (!torch.int, !torch.int) -> !torch.list<int>
    %45414 = torch.aten.permute %1691, %45413 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42808 = torch.constant.int 1
    %int0_42809 = torch.constant.int 0
    %45415 = torch.prim.ListConstruct %int1_42808, %int0_42809 : (!torch.int, !torch.int) -> !torch.list<int>
    %45416 = torch.aten.permute %1692, %45415 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42810 = torch.constant.int 1
    %int0_42811 = torch.constant.int 0
    %45417 = torch.prim.ListConstruct %int1_42810, %int0_42811 : (!torch.int, !torch.int) -> !torch.list<int>
    %45418 = torch.aten.permute %1693, %45417 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42812 = torch.constant.int 1
    %int0_42813 = torch.constant.int 0
    %45419 = torch.prim.ListConstruct %int1_42812, %int0_42813 : (!torch.int, !torch.int) -> !torch.list<int>
    %45420 = torch.aten.permute %1694, %45419 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_42814 = torch.constant.int 1
    %int0_42815 = torch.constant.int 0
    %45421 = torch.prim.ListConstruct %int1_42814, %int0_42815 : (!torch.int, !torch.int) -> !torch.list<int>
    %45422 = torch.aten.permute %1695, %45421 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_42816 = torch.constant.int 4
    %45423 = torch.aten.mul.int %int4_42816, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42817 = torch.constant.int 4096
    %45424 = torch.prim.ListConstruct %45423, %int4096_42817 : (!torch.int, !torch.int) -> !torch.list<int>
    %45425 = torch.aten.view %45271, %45424 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45425, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45426 = torch.aten.mm %45425, %45408 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45426, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42818 = torch.constant.int 4
    %int128_42819 = torch.constant.int 128
    %45427 = torch.prim.ListConstruct %int4_42818, %2482, %int128_42819 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45428 = torch.aten.view %45426, %45427 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42820 = torch.constant.int 4
    %45429 = torch.aten.mul.int %int4_42820, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42821 = torch.constant.int 4096
    %45430 = torch.prim.ListConstruct %45429, %int4096_42821 : (!torch.int, !torch.int) -> !torch.list<int>
    %45431 = torch.aten.view %45272, %45430 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45431, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45432 = torch.aten.mm %45431, %45410 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45432, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42822 = torch.constant.int 4
    %int128_42823 = torch.constant.int 128
    %45433 = torch.prim.ListConstruct %int4_42822, %2482, %int128_42823 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45434 = torch.aten.view %45432, %45433 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42824 = torch.constant.int 4
    %45435 = torch.aten.mul.int %int4_42824, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42825 = torch.constant.int 4096
    %45436 = torch.prim.ListConstruct %45435, %int4096_42825 : (!torch.int, !torch.int) -> !torch.list<int>
    %45437 = torch.aten.view %45273, %45436 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45437, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45438 = torch.aten.mm %45437, %45412 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45438, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42826 = torch.constant.int 4
    %int128_42827 = torch.constant.int 128
    %45439 = torch.prim.ListConstruct %int4_42826, %2482, %int128_42827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45440 = torch.aten.view %45438, %45439 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42828 = torch.constant.int 4
    %45441 = torch.aten.mul.int %int4_42828, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42829 = torch.constant.int 4096
    %45442 = torch.prim.ListConstruct %45441, %int4096_42829 : (!torch.int, !torch.int) -> !torch.list<int>
    %45443 = torch.aten.view %45274, %45442 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45443, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45444 = torch.aten.mm %45443, %45414 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45444, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42830 = torch.constant.int 4
    %int128_42831 = torch.constant.int 128
    %45445 = torch.prim.ListConstruct %int4_42830, %2482, %int128_42831 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45446 = torch.aten.view %45444, %45445 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42832 = torch.constant.int 4
    %45447 = torch.aten.mul.int %int4_42832, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42833 = torch.constant.int 4096
    %45448 = torch.prim.ListConstruct %45447, %int4096_42833 : (!torch.int, !torch.int) -> !torch.list<int>
    %45449 = torch.aten.view %45275, %45448 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45449, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45450 = torch.aten.mm %45449, %45416 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45450, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42834 = torch.constant.int 4
    %int128_42835 = torch.constant.int 128
    %45451 = torch.prim.ListConstruct %int4_42834, %2482, %int128_42835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45452 = torch.aten.view %45450, %45451 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42836 = torch.constant.int 4
    %45453 = torch.aten.mul.int %int4_42836, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42837 = torch.constant.int 4096
    %45454 = torch.prim.ListConstruct %45453, %int4096_42837 : (!torch.int, !torch.int) -> !torch.list<int>
    %45455 = torch.aten.view %45276, %45454 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45455, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45456 = torch.aten.mm %45455, %45418 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45456, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42838 = torch.constant.int 4
    %int128_42839 = torch.constant.int 128
    %45457 = torch.prim.ListConstruct %int4_42838, %2482, %int128_42839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45458 = torch.aten.view %45456, %45457 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42840 = torch.constant.int 4
    %45459 = torch.aten.mul.int %int4_42840, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42841 = torch.constant.int 4096
    %45460 = torch.prim.ListConstruct %45459, %int4096_42841 : (!torch.int, !torch.int) -> !torch.list<int>
    %45461 = torch.aten.view %45277, %45460 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45461, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45462 = torch.aten.mm %45461, %45420 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45462, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42842 = torch.constant.int 4
    %int128_42843 = torch.constant.int 128
    %45463 = torch.prim.ListConstruct %int4_42842, %2482, %int128_42843 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45464 = torch.aten.view %45462, %45463 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42844 = torch.constant.int 4
    %45465 = torch.aten.mul.int %int4_42844, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_42845 = torch.constant.int 4096
    %45466 = torch.prim.ListConstruct %45465, %int4096_42845 : (!torch.int, !torch.int) -> !torch.list<int>
    %45467 = torch.aten.view %45278, %45466 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %45467, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %45468 = torch.aten.mm %45467, %45422 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %45468, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_42846 = torch.constant.int 4
    %int128_42847 = torch.constant.int 128
    %45469 = torch.prim.ListConstruct %int4_42846, %2482, %int128_42847 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45470 = torch.aten.view %45468, %45469 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %45470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_42848 = torch.constant.int 4
    %int4_42849 = torch.constant.int 4
    %int128_42850 = torch.constant.int 128
    %45471 = torch.prim.ListConstruct %int4_42848, %2482, %int4_42849, %int128_42850 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45472 = torch.aten.view %45300, %45471 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42851 = torch.constant.int 4
    %int4_42852 = torch.constant.int 4
    %int128_42853 = torch.constant.int 128
    %45473 = torch.prim.ListConstruct %int4_42851, %2482, %int4_42852, %int128_42853 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45474 = torch.aten.view %45306, %45473 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42854 = torch.constant.int 4
    %int4_42855 = torch.constant.int 4
    %int128_42856 = torch.constant.int 128
    %45475 = torch.prim.ListConstruct %int4_42854, %2482, %int4_42855, %int128_42856 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45476 = torch.aten.view %45312, %45475 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42857 = torch.constant.int 4
    %int4_42858 = torch.constant.int 4
    %int128_42859 = torch.constant.int 128
    %45477 = torch.prim.ListConstruct %int4_42857, %2482, %int4_42858, %int128_42859 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45478 = torch.aten.view %45318, %45477 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42860 = torch.constant.int 4
    %int4_42861 = torch.constant.int 4
    %int128_42862 = torch.constant.int 128
    %45479 = torch.prim.ListConstruct %int4_42860, %2482, %int4_42861, %int128_42862 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45480 = torch.aten.view %45324, %45479 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42863 = torch.constant.int 4
    %int4_42864 = torch.constant.int 4
    %int128_42865 = torch.constant.int 128
    %45481 = torch.prim.ListConstruct %int4_42863, %2482, %int4_42864, %int128_42865 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45482 = torch.aten.view %45330, %45481 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42866 = torch.constant.int 4
    %int4_42867 = torch.constant.int 4
    %int128_42868 = torch.constant.int 128
    %45483 = torch.prim.ListConstruct %int4_42866, %2482, %int4_42867, %int128_42868 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45484 = torch.aten.view %45336, %45483 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42869 = torch.constant.int 4
    %int4_42870 = torch.constant.int 4
    %int128_42871 = torch.constant.int 128
    %45485 = torch.prim.ListConstruct %int4_42869, %2482, %int4_42870, %int128_42871 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45486 = torch.aten.view %45342, %45485 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_42872 = torch.constant.int 4
    %int1_42873 = torch.constant.int 1
    %int128_42874 = torch.constant.int 128
    %45487 = torch.prim.ListConstruct %int4_42872, %2482, %int1_42873, %int128_42874 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45488 = torch.aten.view %45364, %45487 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42875 = torch.constant.int 4
    %int1_42876 = torch.constant.int 1
    %int128_42877 = torch.constant.int 128
    %45489 = torch.prim.ListConstruct %int4_42875, %2482, %int1_42876, %int128_42877 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45490 = torch.aten.view %45370, %45489 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42878 = torch.constant.int 4
    %int1_42879 = torch.constant.int 1
    %int128_42880 = torch.constant.int 128
    %45491 = torch.prim.ListConstruct %int4_42878, %2482, %int1_42879, %int128_42880 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45492 = torch.aten.view %45376, %45491 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42881 = torch.constant.int 4
    %int1_42882 = torch.constant.int 1
    %int128_42883 = torch.constant.int 128
    %45493 = torch.prim.ListConstruct %int4_42881, %2482, %int1_42882, %int128_42883 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45494 = torch.aten.view %45382, %45493 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42884 = torch.constant.int 4
    %int1_42885 = torch.constant.int 1
    %int128_42886 = torch.constant.int 128
    %45495 = torch.prim.ListConstruct %int4_42884, %2482, %int1_42885, %int128_42886 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45496 = torch.aten.view %45388, %45495 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42887 = torch.constant.int 4
    %int1_42888 = torch.constant.int 1
    %int128_42889 = torch.constant.int 128
    %45497 = torch.prim.ListConstruct %int4_42887, %2482, %int1_42888, %int128_42889 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45498 = torch.aten.view %45394, %45497 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42890 = torch.constant.int 4
    %int1_42891 = torch.constant.int 1
    %int128_42892 = torch.constant.int 128
    %45499 = torch.prim.ListConstruct %int4_42890, %2482, %int1_42891, %int128_42892 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45500 = torch.aten.view %45400, %45499 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42893 = torch.constant.int 4
    %int1_42894 = torch.constant.int 1
    %int128_42895 = torch.constant.int 128
    %45501 = torch.prim.ListConstruct %int4_42893, %2482, %int1_42894, %int128_42895 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45502 = torch.aten.view %45406, %45501 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42896 = torch.constant.int 4
    %int1_42897 = torch.constant.int 1
    %int128_42898 = torch.constant.int 128
    %45503 = torch.prim.ListConstruct %int4_42896, %2482, %int1_42897, %int128_42898 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45504 = torch.aten.view %45428, %45503 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42899 = torch.constant.int 4
    %int1_42900 = torch.constant.int 1
    %int128_42901 = torch.constant.int 128
    %45505 = torch.prim.ListConstruct %int4_42899, %2482, %int1_42900, %int128_42901 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45506 = torch.aten.view %45434, %45505 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42902 = torch.constant.int 4
    %int1_42903 = torch.constant.int 1
    %int128_42904 = torch.constant.int 128
    %45507 = torch.prim.ListConstruct %int4_42902, %2482, %int1_42903, %int128_42904 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45508 = torch.aten.view %45440, %45507 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42905 = torch.constant.int 4
    %int1_42906 = torch.constant.int 1
    %int128_42907 = torch.constant.int 128
    %45509 = torch.prim.ListConstruct %int4_42905, %2482, %int1_42906, %int128_42907 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45510 = torch.aten.view %45446, %45509 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42908 = torch.constant.int 4
    %int1_42909 = torch.constant.int 1
    %int128_42910 = torch.constant.int 128
    %45511 = torch.prim.ListConstruct %int4_42908, %2482, %int1_42909, %int128_42910 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45512 = torch.aten.view %45452, %45511 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42911 = torch.constant.int 4
    %int1_42912 = torch.constant.int 1
    %int128_42913 = torch.constant.int 128
    %45513 = torch.prim.ListConstruct %int4_42911, %2482, %int1_42912, %int128_42913 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45514 = torch.aten.view %45458, %45513 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42914 = torch.constant.int 4
    %int1_42915 = torch.constant.int 1
    %int128_42916 = torch.constant.int 128
    %45515 = torch.prim.ListConstruct %int4_42914, %2482, %int1_42915, %int128_42916 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45516 = torch.aten.view %45464, %45515 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_42917 = torch.constant.int 4
    %int1_42918 = torch.constant.int 1
    %int128_42919 = torch.constant.int 128
    %45517 = torch.prim.ListConstruct %int4_42917, %2482, %int1_42918, %int128_42919 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45518 = torch.aten.view %45470, %45517 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_42920 = torch.constant.int 131072
    %none_42921 = torch.constant.none
    %none_42922 = torch.constant.none
    %cpu_42923 = torch.constant.device "cpu"
    %false_42924 = torch.constant.bool false
    %45519 = torch.aten.arange %int131072_42920, %none_42921, %none_42922, %cpu_42923, %false_42924 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_42925 = torch.constant.int 0
    %int128_42926 = torch.constant.int 128
    %int2_42927 = torch.constant.int 2
    %none_42928 = torch.constant.none
    %none_42929 = torch.constant.none
    %cpu_42930 = torch.constant.device "cpu"
    %false_42931 = torch.constant.bool false
    %45520 = torch.aten.arange.start_step %int0_42925, %int128_42926, %int2_42927, %none_42928, %none_42929, %cpu_42930, %false_42931 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_42932 = torch.constant.int 0
    %int0_42933 = torch.constant.int 0
    %int64_42934 = torch.constant.int 64
    %int1_42935 = torch.constant.int 1
    %45521 = torch.aten.slice.Tensor %45520, %int0_42932, %int0_42933, %int64_42934, %int1_42935 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_42936 = torch.constant.int 6
    %45522 = torch.prims.convert_element_type %45521, %int6_42936 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_42937 = torch.constant.int 128
    %45523 = torch.aten.div.Scalar %45522, %int128_42937 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_42938 = torch.constant.float 5.000000e+05
    %45524 = torch.aten.pow.Scalar %float5.000000e05_42938, %45523 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %45525 = torch.aten.reciprocal %45524 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_42939 = torch.constant.float 1.000000e+00
    %45526 = torch.aten.mul.Scalar %45525, %float1.000000e00_42939 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_42940 = torch.constant.int 131072
    %int1_42941 = torch.constant.int 1
    %45527 = torch.prim.ListConstruct %int131072_42940, %int1_42941 : (!torch.int, !torch.int) -> !torch.list<int>
    %45528 = torch.aten.view %45519, %45527 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %45529 = torch.aten.mul.Tensor %45528, %45526 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %45530 = torch.aten.cos %45529 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %45531 = torch.aten.sin %45529 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %45532 = torch.aten.complex %45530, %45531 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %45533 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45534 = flow.tensor.transfer %45533 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %45535 = torch_c.from_builtin_tensor %45534 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45536 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45537 = flow.tensor.transfer %45536 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %45538 = torch_c.from_builtin_tensor %45537 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45539 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45540 = flow.tensor.transfer %45539 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %45541 = torch_c.from_builtin_tensor %45540 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45542 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45543 = flow.tensor.transfer %45542 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %45544 = torch_c.from_builtin_tensor %45543 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45545 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45546 = flow.tensor.transfer %45545 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %45547 = torch_c.from_builtin_tensor %45546 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45548 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45549 = flow.tensor.transfer %45548 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %45550 = torch_c.from_builtin_tensor %45549 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45551 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45552 = flow.tensor.transfer %45551 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %45553 = torch_c.from_builtin_tensor %45552 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45554 = torch_c.to_builtin_tensor %45532 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45555 = flow.tensor.transfer %45554 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %45556 = torch_c.from_builtin_tensor %45555 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_42942 = torch.constant.int 1
    %45557 = torch.aten.size.int %45300, %int1_42942 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_42943 = torch.constant.int 0
    %45558 = torch.aten.add.int %int0_42943, %45557 : !torch.int, !torch.int -> !torch.int
    %int0_42944 = torch.constant.int 0
    %int0_42945 = torch.constant.int 0
    %int1_42946 = torch.constant.int 1
    %45559 = torch.aten.slice.Tensor %45535, %int0_42944, %int0_42945, %45558, %int1_42946 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45559, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_42947 = torch.constant.int 1
    %int0_42948 = torch.constant.int 0
    %int9223372036854775807_42949 = torch.constant.int 9223372036854775807
    %int1_42950 = torch.constant.int 1
    %45560 = torch.aten.slice.Tensor %45559, %int1_42947, %int0_42948, %int9223372036854775807_42949, %int1_42950 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45560, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_42951 = torch.constant.int 0
    %45561 = torch.aten.unsqueeze %45560, %int0_42951 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45561, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_42952 = torch.constant.int 2
    %45562 = torch.aten.unsqueeze %45561, %int2_42952 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45562, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_42953 = torch.constant.int 3
    %int0_42954 = torch.constant.int 0
    %int9223372036854775807_42955 = torch.constant.int 9223372036854775807
    %int1_42956 = torch.constant.int 1
    %45563 = torch.aten.slice.Tensor %45562, %int3_42953, %int0_42954, %int9223372036854775807_42955, %int1_42956 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45563, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45564 = torch_c.to_builtin_tensor %45472 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_42957 = arith.constant 1 : index
    %dim_42958 = tensor.dim %45564, %c1_42957 : tensor<4x?x4x128xf16>
    %45565 = flow.tensor.bitcast %45564 : tensor<4x?x4x128xf16>{%dim_42958} -> tensor<4x?x4x64xcomplex<f16>>{%dim_42958}
    %45566 = torch_c.from_builtin_tensor %45565 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45567 = torch.aten.mul.Tensor %45566, %45563 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45568 = torch_c.to_builtin_tensor %45567 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_42959 = arith.constant 1 : index
    %dim_42960 = tensor.dim %45568, %c1_42959 : tensor<4x?x4x64xcomplex<f32>>
    %45569 = flow.tensor.bitcast %45568 : tensor<4x?x4x64xcomplex<f32>>{%dim_42960} -> tensor<4x?x4x128xf32>{%dim_42960}
    %45570 = torch_c.from_builtin_tensor %45569 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_42961 = torch.constant.int 5
    %45571 = torch.prims.convert_element_type %45570, %int5_42961 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42962 = torch.constant.int 1
    %45572 = torch.aten.size.int %45306, %int1_42962 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_42963 = torch.constant.int 0
    %45573 = torch.aten.add.int %int0_42963, %45572 : !torch.int, !torch.int -> !torch.int
    %int0_42964 = torch.constant.int 0
    %int0_42965 = torch.constant.int 0
    %int1_42966 = torch.constant.int 1
    %45574 = torch.aten.slice.Tensor %45538, %int0_42964, %int0_42965, %45573, %int1_42966 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45574, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_42967 = torch.constant.int 1
    %int0_42968 = torch.constant.int 0
    %int9223372036854775807_42969 = torch.constant.int 9223372036854775807
    %int1_42970 = torch.constant.int 1
    %45575 = torch.aten.slice.Tensor %45574, %int1_42967, %int0_42968, %int9223372036854775807_42969, %int1_42970 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45575, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_42971 = torch.constant.int 0
    %45576 = torch.aten.unsqueeze %45575, %int0_42971 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45576, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_42972 = torch.constant.int 2
    %45577 = torch.aten.unsqueeze %45576, %int2_42972 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45577, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_42973 = torch.constant.int 3
    %int0_42974 = torch.constant.int 0
    %int9223372036854775807_42975 = torch.constant.int 9223372036854775807
    %int1_42976 = torch.constant.int 1
    %45578 = torch.aten.slice.Tensor %45577, %int3_42973, %int0_42974, %int9223372036854775807_42975, %int1_42976 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45578, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45579 = torch_c.to_builtin_tensor %45474 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_42977 = arith.constant 1 : index
    %dim_42978 = tensor.dim %45579, %c1_42977 : tensor<4x?x4x128xf16>
    %45580 = flow.tensor.bitcast %45579 : tensor<4x?x4x128xf16>{%dim_42978} -> tensor<4x?x4x64xcomplex<f16>>{%dim_42978}
    %45581 = torch_c.from_builtin_tensor %45580 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45582 = torch.aten.mul.Tensor %45581, %45578 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45583 = torch_c.to_builtin_tensor %45582 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_42979 = arith.constant 1 : index
    %dim_42980 = tensor.dim %45583, %c1_42979 : tensor<4x?x4x64xcomplex<f32>>
    %45584 = flow.tensor.bitcast %45583 : tensor<4x?x4x64xcomplex<f32>>{%dim_42980} -> tensor<4x?x4x128xf32>{%dim_42980}
    %45585 = torch_c.from_builtin_tensor %45584 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_42981 = torch.constant.int 5
    %45586 = torch.prims.convert_element_type %45585, %int5_42981 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_42982 = torch.constant.int 1
    %45587 = torch.aten.size.int %45312, %int1_42982 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_42983 = torch.constant.int 0
    %45588 = torch.aten.add.int %int0_42983, %45587 : !torch.int, !torch.int -> !torch.int
    %int0_42984 = torch.constant.int 0
    %int0_42985 = torch.constant.int 0
    %int1_42986 = torch.constant.int 1
    %45589 = torch.aten.slice.Tensor %45541, %int0_42984, %int0_42985, %45588, %int1_42986 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45589, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_42987 = torch.constant.int 1
    %int0_42988 = torch.constant.int 0
    %int9223372036854775807_42989 = torch.constant.int 9223372036854775807
    %int1_42990 = torch.constant.int 1
    %45590 = torch.aten.slice.Tensor %45589, %int1_42987, %int0_42988, %int9223372036854775807_42989, %int1_42990 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45590, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_42991 = torch.constant.int 0
    %45591 = torch.aten.unsqueeze %45590, %int0_42991 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45591, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_42992 = torch.constant.int 2
    %45592 = torch.aten.unsqueeze %45591, %int2_42992 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45592, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_42993 = torch.constant.int 3
    %int0_42994 = torch.constant.int 0
    %int9223372036854775807_42995 = torch.constant.int 9223372036854775807
    %int1_42996 = torch.constant.int 1
    %45593 = torch.aten.slice.Tensor %45592, %int3_42993, %int0_42994, %int9223372036854775807_42995, %int1_42996 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45593, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45594 = torch_c.to_builtin_tensor %45476 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_42997 = arith.constant 1 : index
    %dim_42998 = tensor.dim %45594, %c1_42997 : tensor<4x?x4x128xf16>
    %45595 = flow.tensor.bitcast %45594 : tensor<4x?x4x128xf16>{%dim_42998} -> tensor<4x?x4x64xcomplex<f16>>{%dim_42998}
    %45596 = torch_c.from_builtin_tensor %45595 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45597 = torch.aten.mul.Tensor %45596, %45593 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45598 = torch_c.to_builtin_tensor %45597 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_42999 = arith.constant 1 : index
    %dim_43000 = tensor.dim %45598, %c1_42999 : tensor<4x?x4x64xcomplex<f32>>
    %45599 = flow.tensor.bitcast %45598 : tensor<4x?x4x64xcomplex<f32>>{%dim_43000} -> tensor<4x?x4x128xf32>{%dim_43000}
    %45600 = torch_c.from_builtin_tensor %45599 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43001 = torch.constant.int 5
    %45601 = torch.prims.convert_element_type %45600, %int5_43001 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43002 = torch.constant.int 1
    %45602 = torch.aten.size.int %45318, %int1_43002 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_43003 = torch.constant.int 0
    %45603 = torch.aten.add.int %int0_43003, %45602 : !torch.int, !torch.int -> !torch.int
    %int0_43004 = torch.constant.int 0
    %int0_43005 = torch.constant.int 0
    %int1_43006 = torch.constant.int 1
    %45604 = torch.aten.slice.Tensor %45544, %int0_43004, %int0_43005, %45603, %int1_43006 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45604, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43007 = torch.constant.int 1
    %int0_43008 = torch.constant.int 0
    %int9223372036854775807_43009 = torch.constant.int 9223372036854775807
    %int1_43010 = torch.constant.int 1
    %45605 = torch.aten.slice.Tensor %45604, %int1_43007, %int0_43008, %int9223372036854775807_43009, %int1_43010 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45605, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43011 = torch.constant.int 0
    %45606 = torch.aten.unsqueeze %45605, %int0_43011 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45606, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43012 = torch.constant.int 2
    %45607 = torch.aten.unsqueeze %45606, %int2_43012 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45607, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43013 = torch.constant.int 3
    %int0_43014 = torch.constant.int 0
    %int9223372036854775807_43015 = torch.constant.int 9223372036854775807
    %int1_43016 = torch.constant.int 1
    %45608 = torch.aten.slice.Tensor %45607, %int3_43013, %int0_43014, %int9223372036854775807_43015, %int1_43016 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45608, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45609 = torch_c.to_builtin_tensor %45478 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_43017 = arith.constant 1 : index
    %dim_43018 = tensor.dim %45609, %c1_43017 : tensor<4x?x4x128xf16>
    %45610 = flow.tensor.bitcast %45609 : tensor<4x?x4x128xf16>{%dim_43018} -> tensor<4x?x4x64xcomplex<f16>>{%dim_43018}
    %45611 = torch_c.from_builtin_tensor %45610 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45612 = torch.aten.mul.Tensor %45611, %45608 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45613 = torch_c.to_builtin_tensor %45612 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_43019 = arith.constant 1 : index
    %dim_43020 = tensor.dim %45613, %c1_43019 : tensor<4x?x4x64xcomplex<f32>>
    %45614 = flow.tensor.bitcast %45613 : tensor<4x?x4x64xcomplex<f32>>{%dim_43020} -> tensor<4x?x4x128xf32>{%dim_43020}
    %45615 = torch_c.from_builtin_tensor %45614 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43021 = torch.constant.int 5
    %45616 = torch.prims.convert_element_type %45615, %int5_43021 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43022 = torch.constant.int 1
    %45617 = torch.aten.size.int %45324, %int1_43022 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_43023 = torch.constant.int 0
    %45618 = torch.aten.add.int %int0_43023, %45617 : !torch.int, !torch.int -> !torch.int
    %int0_43024 = torch.constant.int 0
    %int0_43025 = torch.constant.int 0
    %int1_43026 = torch.constant.int 1
    %45619 = torch.aten.slice.Tensor %45547, %int0_43024, %int0_43025, %45618, %int1_43026 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45619, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43027 = torch.constant.int 1
    %int0_43028 = torch.constant.int 0
    %int9223372036854775807_43029 = torch.constant.int 9223372036854775807
    %int1_43030 = torch.constant.int 1
    %45620 = torch.aten.slice.Tensor %45619, %int1_43027, %int0_43028, %int9223372036854775807_43029, %int1_43030 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45620, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43031 = torch.constant.int 0
    %45621 = torch.aten.unsqueeze %45620, %int0_43031 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45621, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43032 = torch.constant.int 2
    %45622 = torch.aten.unsqueeze %45621, %int2_43032 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45622, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43033 = torch.constant.int 3
    %int0_43034 = torch.constant.int 0
    %int9223372036854775807_43035 = torch.constant.int 9223372036854775807
    %int1_43036 = torch.constant.int 1
    %45623 = torch.aten.slice.Tensor %45622, %int3_43033, %int0_43034, %int9223372036854775807_43035, %int1_43036 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45623, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45624 = torch_c.to_builtin_tensor %45480 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_43037 = arith.constant 1 : index
    %dim_43038 = tensor.dim %45624, %c1_43037 : tensor<4x?x4x128xf16>
    %45625 = flow.tensor.bitcast %45624 : tensor<4x?x4x128xf16>{%dim_43038} -> tensor<4x?x4x64xcomplex<f16>>{%dim_43038}
    %45626 = torch_c.from_builtin_tensor %45625 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45627 = torch.aten.mul.Tensor %45626, %45623 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45628 = torch_c.to_builtin_tensor %45627 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_43039 = arith.constant 1 : index
    %dim_43040 = tensor.dim %45628, %c1_43039 : tensor<4x?x4x64xcomplex<f32>>
    %45629 = flow.tensor.bitcast %45628 : tensor<4x?x4x64xcomplex<f32>>{%dim_43040} -> tensor<4x?x4x128xf32>{%dim_43040}
    %45630 = torch_c.from_builtin_tensor %45629 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43041 = torch.constant.int 5
    %45631 = torch.prims.convert_element_type %45630, %int5_43041 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43042 = torch.constant.int 1
    %45632 = torch.aten.size.int %45330, %int1_43042 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_43043 = torch.constant.int 0
    %45633 = torch.aten.add.int %int0_43043, %45632 : !torch.int, !torch.int -> !torch.int
    %int0_43044 = torch.constant.int 0
    %int0_43045 = torch.constant.int 0
    %int1_43046 = torch.constant.int 1
    %45634 = torch.aten.slice.Tensor %45550, %int0_43044, %int0_43045, %45633, %int1_43046 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45634, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43047 = torch.constant.int 1
    %int0_43048 = torch.constant.int 0
    %int9223372036854775807_43049 = torch.constant.int 9223372036854775807
    %int1_43050 = torch.constant.int 1
    %45635 = torch.aten.slice.Tensor %45634, %int1_43047, %int0_43048, %int9223372036854775807_43049, %int1_43050 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45635, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43051 = torch.constant.int 0
    %45636 = torch.aten.unsqueeze %45635, %int0_43051 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45636, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43052 = torch.constant.int 2
    %45637 = torch.aten.unsqueeze %45636, %int2_43052 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45637, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43053 = torch.constant.int 3
    %int0_43054 = torch.constant.int 0
    %int9223372036854775807_43055 = torch.constant.int 9223372036854775807
    %int1_43056 = torch.constant.int 1
    %45638 = torch.aten.slice.Tensor %45637, %int3_43053, %int0_43054, %int9223372036854775807_43055, %int1_43056 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45638, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45639 = torch_c.to_builtin_tensor %45482 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_43057 = arith.constant 1 : index
    %dim_43058 = tensor.dim %45639, %c1_43057 : tensor<4x?x4x128xf16>
    %45640 = flow.tensor.bitcast %45639 : tensor<4x?x4x128xf16>{%dim_43058} -> tensor<4x?x4x64xcomplex<f16>>{%dim_43058}
    %45641 = torch_c.from_builtin_tensor %45640 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45642 = torch.aten.mul.Tensor %45641, %45638 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45643 = torch_c.to_builtin_tensor %45642 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_43059 = arith.constant 1 : index
    %dim_43060 = tensor.dim %45643, %c1_43059 : tensor<4x?x4x64xcomplex<f32>>
    %45644 = flow.tensor.bitcast %45643 : tensor<4x?x4x64xcomplex<f32>>{%dim_43060} -> tensor<4x?x4x128xf32>{%dim_43060}
    %45645 = torch_c.from_builtin_tensor %45644 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43061 = torch.constant.int 5
    %45646 = torch.prims.convert_element_type %45645, %int5_43061 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43062 = torch.constant.int 1
    %45647 = torch.aten.size.int %45336, %int1_43062 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_43063 = torch.constant.int 0
    %45648 = torch.aten.add.int %int0_43063, %45647 : !torch.int, !torch.int -> !torch.int
    %int0_43064 = torch.constant.int 0
    %int0_43065 = torch.constant.int 0
    %int1_43066 = torch.constant.int 1
    %45649 = torch.aten.slice.Tensor %45553, %int0_43064, %int0_43065, %45648, %int1_43066 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45649, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43067 = torch.constant.int 1
    %int0_43068 = torch.constant.int 0
    %int9223372036854775807_43069 = torch.constant.int 9223372036854775807
    %int1_43070 = torch.constant.int 1
    %45650 = torch.aten.slice.Tensor %45649, %int1_43067, %int0_43068, %int9223372036854775807_43069, %int1_43070 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45650, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43071 = torch.constant.int 0
    %45651 = torch.aten.unsqueeze %45650, %int0_43071 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45651, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43072 = torch.constant.int 2
    %45652 = torch.aten.unsqueeze %45651, %int2_43072 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45652, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43073 = torch.constant.int 3
    %int0_43074 = torch.constant.int 0
    %int9223372036854775807_43075 = torch.constant.int 9223372036854775807
    %int1_43076 = torch.constant.int 1
    %45653 = torch.aten.slice.Tensor %45652, %int3_43073, %int0_43074, %int9223372036854775807_43075, %int1_43076 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45653, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45654 = torch_c.to_builtin_tensor %45484 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_43077 = arith.constant 1 : index
    %dim_43078 = tensor.dim %45654, %c1_43077 : tensor<4x?x4x128xf16>
    %45655 = flow.tensor.bitcast %45654 : tensor<4x?x4x128xf16>{%dim_43078} -> tensor<4x?x4x64xcomplex<f16>>{%dim_43078}
    %45656 = torch_c.from_builtin_tensor %45655 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45657 = torch.aten.mul.Tensor %45656, %45653 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45658 = torch_c.to_builtin_tensor %45657 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_43079 = arith.constant 1 : index
    %dim_43080 = tensor.dim %45658, %c1_43079 : tensor<4x?x4x64xcomplex<f32>>
    %45659 = flow.tensor.bitcast %45658 : tensor<4x?x4x64xcomplex<f32>>{%dim_43080} -> tensor<4x?x4x128xf32>{%dim_43080}
    %45660 = torch_c.from_builtin_tensor %45659 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43081 = torch.constant.int 5
    %45661 = torch.prims.convert_element_type %45660, %int5_43081 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43082 = torch.constant.int 1
    %45662 = torch.aten.size.int %45342, %int1_43082 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_43083 = torch.constant.int 0
    %45663 = torch.aten.add.int %int0_43083, %45662 : !torch.int, !torch.int -> !torch.int
    %int0_43084 = torch.constant.int 0
    %int0_43085 = torch.constant.int 0
    %int1_43086 = torch.constant.int 1
    %45664 = torch.aten.slice.Tensor %45556, %int0_43084, %int0_43085, %45663, %int1_43086 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45664, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43087 = torch.constant.int 1
    %int0_43088 = torch.constant.int 0
    %int9223372036854775807_43089 = torch.constant.int 9223372036854775807
    %int1_43090 = torch.constant.int 1
    %45665 = torch.aten.slice.Tensor %45664, %int1_43087, %int0_43088, %int9223372036854775807_43089, %int1_43090 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45665, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43091 = torch.constant.int 0
    %45666 = torch.aten.unsqueeze %45665, %int0_43091 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45666, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43092 = torch.constant.int 2
    %45667 = torch.aten.unsqueeze %45666, %int2_43092 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45667, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43093 = torch.constant.int 3
    %int0_43094 = torch.constant.int 0
    %int9223372036854775807_43095 = torch.constant.int 9223372036854775807
    %int1_43096 = torch.constant.int 1
    %45668 = torch.aten.slice.Tensor %45667, %int3_43093, %int0_43094, %int9223372036854775807_43095, %int1_43096 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45668, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45669 = torch_c.to_builtin_tensor %45486 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_43097 = arith.constant 1 : index
    %dim_43098 = tensor.dim %45669, %c1_43097 : tensor<4x?x4x128xf16>
    %45670 = flow.tensor.bitcast %45669 : tensor<4x?x4x128xf16>{%dim_43098} -> tensor<4x?x4x64xcomplex<f16>>{%dim_43098}
    %45671 = torch_c.from_builtin_tensor %45670 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %45671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %45672 = torch.aten.mul.Tensor %45671, %45668 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %45672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %45673 = torch_c.to_builtin_tensor %45672 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_43099 = arith.constant 1 : index
    %dim_43100 = tensor.dim %45673, %c1_43099 : tensor<4x?x4x64xcomplex<f32>>
    %45674 = flow.tensor.bitcast %45673 : tensor<4x?x4x64xcomplex<f32>>{%dim_43100} -> tensor<4x?x4x128xf32>{%dim_43100}
    %45675 = torch_c.from_builtin_tensor %45674 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %45675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_43101 = torch.constant.int 5
    %45676 = torch.prims.convert_element_type %45675, %int5_43101 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %45676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_43102 = torch.constant.int 131072
    %none_43103 = torch.constant.none
    %none_43104 = torch.constant.none
    %cpu_43105 = torch.constant.device "cpu"
    %false_43106 = torch.constant.bool false
    %45677 = torch.aten.arange %int131072_43102, %none_43103, %none_43104, %cpu_43105, %false_43106 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_43107 = torch.constant.int 0
    %int128_43108 = torch.constant.int 128
    %int2_43109 = torch.constant.int 2
    %none_43110 = torch.constant.none
    %none_43111 = torch.constant.none
    %cpu_43112 = torch.constant.device "cpu"
    %false_43113 = torch.constant.bool false
    %45678 = torch.aten.arange.start_step %int0_43107, %int128_43108, %int2_43109, %none_43110, %none_43111, %cpu_43112, %false_43113 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_43114 = torch.constant.int 0
    %int0_43115 = torch.constant.int 0
    %int64_43116 = torch.constant.int 64
    %int1_43117 = torch.constant.int 1
    %45679 = torch.aten.slice.Tensor %45678, %int0_43114, %int0_43115, %int64_43116, %int1_43117 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_43118 = torch.constant.int 6
    %45680 = torch.prims.convert_element_type %45679, %int6_43118 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_43119 = torch.constant.int 128
    %45681 = torch.aten.div.Scalar %45680, %int128_43119 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_43120 = torch.constant.float 5.000000e+05
    %45682 = torch.aten.pow.Scalar %float5.000000e05_43120, %45681 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %45683 = torch.aten.reciprocal %45682 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_43121 = torch.constant.float 1.000000e+00
    %45684 = torch.aten.mul.Scalar %45683, %float1.000000e00_43121 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_43122 = torch.constant.int 131072
    %int1_43123 = torch.constant.int 1
    %45685 = torch.prim.ListConstruct %int131072_43122, %int1_43123 : (!torch.int, !torch.int) -> !torch.list<int>
    %45686 = torch.aten.view %45677, %45685 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %45687 = torch.aten.mul.Tensor %45686, %45684 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %45688 = torch.aten.cos %45687 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %45689 = torch.aten.sin %45687 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %45690 = torch.aten.complex %45688, %45689 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %45691 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45692 = flow.tensor.transfer %45691 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %45693 = torch_c.from_builtin_tensor %45692 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45694 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45695 = flow.tensor.transfer %45694 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %45696 = torch_c.from_builtin_tensor %45695 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45697 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45698 = flow.tensor.transfer %45697 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %45699 = torch_c.from_builtin_tensor %45698 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45700 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45701 = flow.tensor.transfer %45700 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %45702 = torch_c.from_builtin_tensor %45701 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45703 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45704 = flow.tensor.transfer %45703 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %45705 = torch_c.from_builtin_tensor %45704 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45706 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45707 = flow.tensor.transfer %45706 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %45708 = torch_c.from_builtin_tensor %45707 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45709 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45710 = flow.tensor.transfer %45709 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %45711 = torch_c.from_builtin_tensor %45710 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %45712 = torch_c.to_builtin_tensor %45690 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %45713 = flow.tensor.transfer %45712 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %45714 = torch_c.from_builtin_tensor %45713 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_43124 = torch.constant.int 1
    %45715 = torch.aten.size.int %45364, %int1_43124 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43125 = torch.constant.int 0
    %45716 = torch.aten.add.int %int0_43125, %45715 : !torch.int, !torch.int -> !torch.int
    %int0_43126 = torch.constant.int 0
    %int0_43127 = torch.constant.int 0
    %int1_43128 = torch.constant.int 1
    %45717 = torch.aten.slice.Tensor %45693, %int0_43126, %int0_43127, %45716, %int1_43128 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45717, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43129 = torch.constant.int 1
    %int0_43130 = torch.constant.int 0
    %int9223372036854775807_43131 = torch.constant.int 9223372036854775807
    %int1_43132 = torch.constant.int 1
    %45718 = torch.aten.slice.Tensor %45717, %int1_43129, %int0_43130, %int9223372036854775807_43131, %int1_43132 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45718, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43133 = torch.constant.int 0
    %45719 = torch.aten.unsqueeze %45718, %int0_43133 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45719, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43134 = torch.constant.int 2
    %45720 = torch.aten.unsqueeze %45719, %int2_43134 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45720, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43135 = torch.constant.int 3
    %int0_43136 = torch.constant.int 0
    %int9223372036854775807_43137 = torch.constant.int 9223372036854775807
    %int1_43138 = torch.constant.int 1
    %45721 = torch.aten.slice.Tensor %45720, %int3_43135, %int0_43136, %int9223372036854775807_43137, %int1_43138 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45721, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45722 = torch_c.to_builtin_tensor %45488 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43139 = arith.constant 1 : index
    %dim_43140 = tensor.dim %45722, %c1_43139 : tensor<4x?x1x128xf16>
    %45723 = flow.tensor.bitcast %45722 : tensor<4x?x1x128xf16>{%dim_43140} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43140}
    %45724 = torch_c.from_builtin_tensor %45723 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45725 = torch.aten.mul.Tensor %45724, %45721 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45726 = torch_c.to_builtin_tensor %45725 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43141 = arith.constant 1 : index
    %dim_43142 = tensor.dim %45726, %c1_43141 : tensor<4x?x1x64xcomplex<f32>>
    %45727 = flow.tensor.bitcast %45726 : tensor<4x?x1x64xcomplex<f32>>{%dim_43142} -> tensor<4x?x1x128xf32>{%dim_43142}
    %45728 = torch_c.from_builtin_tensor %45727 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43143 = torch.constant.int 5
    %45729 = torch.prims.convert_element_type %45728, %int5_43143 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43144 = torch.constant.int 1
    %45730 = torch.aten.size.int %45370, %int1_43144 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43145 = torch.constant.int 0
    %45731 = torch.aten.add.int %int0_43145, %45730 : !torch.int, !torch.int -> !torch.int
    %int0_43146 = torch.constant.int 0
    %int0_43147 = torch.constant.int 0
    %int1_43148 = torch.constant.int 1
    %45732 = torch.aten.slice.Tensor %45696, %int0_43146, %int0_43147, %45731, %int1_43148 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45732, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43149 = torch.constant.int 1
    %int0_43150 = torch.constant.int 0
    %int9223372036854775807_43151 = torch.constant.int 9223372036854775807
    %int1_43152 = torch.constant.int 1
    %45733 = torch.aten.slice.Tensor %45732, %int1_43149, %int0_43150, %int9223372036854775807_43151, %int1_43152 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45733, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43153 = torch.constant.int 0
    %45734 = torch.aten.unsqueeze %45733, %int0_43153 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45734, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43154 = torch.constant.int 2
    %45735 = torch.aten.unsqueeze %45734, %int2_43154 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45735, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43155 = torch.constant.int 3
    %int0_43156 = torch.constant.int 0
    %int9223372036854775807_43157 = torch.constant.int 9223372036854775807
    %int1_43158 = torch.constant.int 1
    %45736 = torch.aten.slice.Tensor %45735, %int3_43155, %int0_43156, %int9223372036854775807_43157, %int1_43158 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45736, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45737 = torch_c.to_builtin_tensor %45490 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43159 = arith.constant 1 : index
    %dim_43160 = tensor.dim %45737, %c1_43159 : tensor<4x?x1x128xf16>
    %45738 = flow.tensor.bitcast %45737 : tensor<4x?x1x128xf16>{%dim_43160} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43160}
    %45739 = torch_c.from_builtin_tensor %45738 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45740 = torch.aten.mul.Tensor %45739, %45736 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45741 = torch_c.to_builtin_tensor %45740 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43161 = arith.constant 1 : index
    %dim_43162 = tensor.dim %45741, %c1_43161 : tensor<4x?x1x64xcomplex<f32>>
    %45742 = flow.tensor.bitcast %45741 : tensor<4x?x1x64xcomplex<f32>>{%dim_43162} -> tensor<4x?x1x128xf32>{%dim_43162}
    %45743 = torch_c.from_builtin_tensor %45742 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43163 = torch.constant.int 5
    %45744 = torch.prims.convert_element_type %45743, %int5_43163 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43164 = torch.constant.int 1
    %45745 = torch.aten.size.int %45376, %int1_43164 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43165 = torch.constant.int 0
    %45746 = torch.aten.add.int %int0_43165, %45745 : !torch.int, !torch.int -> !torch.int
    %int0_43166 = torch.constant.int 0
    %int0_43167 = torch.constant.int 0
    %int1_43168 = torch.constant.int 1
    %45747 = torch.aten.slice.Tensor %45699, %int0_43166, %int0_43167, %45746, %int1_43168 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45747, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43169 = torch.constant.int 1
    %int0_43170 = torch.constant.int 0
    %int9223372036854775807_43171 = torch.constant.int 9223372036854775807
    %int1_43172 = torch.constant.int 1
    %45748 = torch.aten.slice.Tensor %45747, %int1_43169, %int0_43170, %int9223372036854775807_43171, %int1_43172 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45748, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43173 = torch.constant.int 0
    %45749 = torch.aten.unsqueeze %45748, %int0_43173 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45749, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43174 = torch.constant.int 2
    %45750 = torch.aten.unsqueeze %45749, %int2_43174 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45750, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43175 = torch.constant.int 3
    %int0_43176 = torch.constant.int 0
    %int9223372036854775807_43177 = torch.constant.int 9223372036854775807
    %int1_43178 = torch.constant.int 1
    %45751 = torch.aten.slice.Tensor %45750, %int3_43175, %int0_43176, %int9223372036854775807_43177, %int1_43178 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45751, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45752 = torch_c.to_builtin_tensor %45492 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43179 = arith.constant 1 : index
    %dim_43180 = tensor.dim %45752, %c1_43179 : tensor<4x?x1x128xf16>
    %45753 = flow.tensor.bitcast %45752 : tensor<4x?x1x128xf16>{%dim_43180} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43180}
    %45754 = torch_c.from_builtin_tensor %45753 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45755 = torch.aten.mul.Tensor %45754, %45751 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45756 = torch_c.to_builtin_tensor %45755 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43181 = arith.constant 1 : index
    %dim_43182 = tensor.dim %45756, %c1_43181 : tensor<4x?x1x64xcomplex<f32>>
    %45757 = flow.tensor.bitcast %45756 : tensor<4x?x1x64xcomplex<f32>>{%dim_43182} -> tensor<4x?x1x128xf32>{%dim_43182}
    %45758 = torch_c.from_builtin_tensor %45757 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43183 = torch.constant.int 5
    %45759 = torch.prims.convert_element_type %45758, %int5_43183 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43184 = torch.constant.int 1
    %45760 = torch.aten.size.int %45382, %int1_43184 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43185 = torch.constant.int 0
    %45761 = torch.aten.add.int %int0_43185, %45760 : !torch.int, !torch.int -> !torch.int
    %int0_43186 = torch.constant.int 0
    %int0_43187 = torch.constant.int 0
    %int1_43188 = torch.constant.int 1
    %45762 = torch.aten.slice.Tensor %45702, %int0_43186, %int0_43187, %45761, %int1_43188 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45762, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43189 = torch.constant.int 1
    %int0_43190 = torch.constant.int 0
    %int9223372036854775807_43191 = torch.constant.int 9223372036854775807
    %int1_43192 = torch.constant.int 1
    %45763 = torch.aten.slice.Tensor %45762, %int1_43189, %int0_43190, %int9223372036854775807_43191, %int1_43192 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45763, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43193 = torch.constant.int 0
    %45764 = torch.aten.unsqueeze %45763, %int0_43193 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45764, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43194 = torch.constant.int 2
    %45765 = torch.aten.unsqueeze %45764, %int2_43194 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45765, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43195 = torch.constant.int 3
    %int0_43196 = torch.constant.int 0
    %int9223372036854775807_43197 = torch.constant.int 9223372036854775807
    %int1_43198 = torch.constant.int 1
    %45766 = torch.aten.slice.Tensor %45765, %int3_43195, %int0_43196, %int9223372036854775807_43197, %int1_43198 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45766, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45767 = torch_c.to_builtin_tensor %45494 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43199 = arith.constant 1 : index
    %dim_43200 = tensor.dim %45767, %c1_43199 : tensor<4x?x1x128xf16>
    %45768 = flow.tensor.bitcast %45767 : tensor<4x?x1x128xf16>{%dim_43200} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43200}
    %45769 = torch_c.from_builtin_tensor %45768 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45770 = torch.aten.mul.Tensor %45769, %45766 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45771 = torch_c.to_builtin_tensor %45770 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43201 = arith.constant 1 : index
    %dim_43202 = tensor.dim %45771, %c1_43201 : tensor<4x?x1x64xcomplex<f32>>
    %45772 = flow.tensor.bitcast %45771 : tensor<4x?x1x64xcomplex<f32>>{%dim_43202} -> tensor<4x?x1x128xf32>{%dim_43202}
    %45773 = torch_c.from_builtin_tensor %45772 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43203 = torch.constant.int 5
    %45774 = torch.prims.convert_element_type %45773, %int5_43203 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43204 = torch.constant.int 1
    %45775 = torch.aten.size.int %45388, %int1_43204 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43205 = torch.constant.int 0
    %45776 = torch.aten.add.int %int0_43205, %45775 : !torch.int, !torch.int -> !torch.int
    %int0_43206 = torch.constant.int 0
    %int0_43207 = torch.constant.int 0
    %int1_43208 = torch.constant.int 1
    %45777 = torch.aten.slice.Tensor %45705, %int0_43206, %int0_43207, %45776, %int1_43208 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45777, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43209 = torch.constant.int 1
    %int0_43210 = torch.constant.int 0
    %int9223372036854775807_43211 = torch.constant.int 9223372036854775807
    %int1_43212 = torch.constant.int 1
    %45778 = torch.aten.slice.Tensor %45777, %int1_43209, %int0_43210, %int9223372036854775807_43211, %int1_43212 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45778, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43213 = torch.constant.int 0
    %45779 = torch.aten.unsqueeze %45778, %int0_43213 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45779, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43214 = torch.constant.int 2
    %45780 = torch.aten.unsqueeze %45779, %int2_43214 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45780, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43215 = torch.constant.int 3
    %int0_43216 = torch.constant.int 0
    %int9223372036854775807_43217 = torch.constant.int 9223372036854775807
    %int1_43218 = torch.constant.int 1
    %45781 = torch.aten.slice.Tensor %45780, %int3_43215, %int0_43216, %int9223372036854775807_43217, %int1_43218 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45781, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45782 = torch_c.to_builtin_tensor %45496 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43219 = arith.constant 1 : index
    %dim_43220 = tensor.dim %45782, %c1_43219 : tensor<4x?x1x128xf16>
    %45783 = flow.tensor.bitcast %45782 : tensor<4x?x1x128xf16>{%dim_43220} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43220}
    %45784 = torch_c.from_builtin_tensor %45783 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45785 = torch.aten.mul.Tensor %45784, %45781 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45786 = torch_c.to_builtin_tensor %45785 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43221 = arith.constant 1 : index
    %dim_43222 = tensor.dim %45786, %c1_43221 : tensor<4x?x1x64xcomplex<f32>>
    %45787 = flow.tensor.bitcast %45786 : tensor<4x?x1x64xcomplex<f32>>{%dim_43222} -> tensor<4x?x1x128xf32>{%dim_43222}
    %45788 = torch_c.from_builtin_tensor %45787 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43223 = torch.constant.int 5
    %45789 = torch.prims.convert_element_type %45788, %int5_43223 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43224 = torch.constant.int 1
    %45790 = torch.aten.size.int %45394, %int1_43224 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43225 = torch.constant.int 0
    %45791 = torch.aten.add.int %int0_43225, %45790 : !torch.int, !torch.int -> !torch.int
    %int0_43226 = torch.constant.int 0
    %int0_43227 = torch.constant.int 0
    %int1_43228 = torch.constant.int 1
    %45792 = torch.aten.slice.Tensor %45708, %int0_43226, %int0_43227, %45791, %int1_43228 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45792, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43229 = torch.constant.int 1
    %int0_43230 = torch.constant.int 0
    %int9223372036854775807_43231 = torch.constant.int 9223372036854775807
    %int1_43232 = torch.constant.int 1
    %45793 = torch.aten.slice.Tensor %45792, %int1_43229, %int0_43230, %int9223372036854775807_43231, %int1_43232 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45793, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43233 = torch.constant.int 0
    %45794 = torch.aten.unsqueeze %45793, %int0_43233 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45794, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43234 = torch.constant.int 2
    %45795 = torch.aten.unsqueeze %45794, %int2_43234 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45795, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43235 = torch.constant.int 3
    %int0_43236 = torch.constant.int 0
    %int9223372036854775807_43237 = torch.constant.int 9223372036854775807
    %int1_43238 = torch.constant.int 1
    %45796 = torch.aten.slice.Tensor %45795, %int3_43235, %int0_43236, %int9223372036854775807_43237, %int1_43238 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45796, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45797 = torch_c.to_builtin_tensor %45498 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43239 = arith.constant 1 : index
    %dim_43240 = tensor.dim %45797, %c1_43239 : tensor<4x?x1x128xf16>
    %45798 = flow.tensor.bitcast %45797 : tensor<4x?x1x128xf16>{%dim_43240} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43240}
    %45799 = torch_c.from_builtin_tensor %45798 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45800 = torch.aten.mul.Tensor %45799, %45796 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45801 = torch_c.to_builtin_tensor %45800 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43241 = arith.constant 1 : index
    %dim_43242 = tensor.dim %45801, %c1_43241 : tensor<4x?x1x64xcomplex<f32>>
    %45802 = flow.tensor.bitcast %45801 : tensor<4x?x1x64xcomplex<f32>>{%dim_43242} -> tensor<4x?x1x128xf32>{%dim_43242}
    %45803 = torch_c.from_builtin_tensor %45802 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43243 = torch.constant.int 5
    %45804 = torch.prims.convert_element_type %45803, %int5_43243 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43244 = torch.constant.int 1
    %45805 = torch.aten.size.int %45400, %int1_43244 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43245 = torch.constant.int 0
    %45806 = torch.aten.add.int %int0_43245, %45805 : !torch.int, !torch.int -> !torch.int
    %int0_43246 = torch.constant.int 0
    %int0_43247 = torch.constant.int 0
    %int1_43248 = torch.constant.int 1
    %45807 = torch.aten.slice.Tensor %45711, %int0_43246, %int0_43247, %45806, %int1_43248 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45807, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43249 = torch.constant.int 1
    %int0_43250 = torch.constant.int 0
    %int9223372036854775807_43251 = torch.constant.int 9223372036854775807
    %int1_43252 = torch.constant.int 1
    %45808 = torch.aten.slice.Tensor %45807, %int1_43249, %int0_43250, %int9223372036854775807_43251, %int1_43252 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45808, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43253 = torch.constant.int 0
    %45809 = torch.aten.unsqueeze %45808, %int0_43253 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45809, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43254 = torch.constant.int 2
    %45810 = torch.aten.unsqueeze %45809, %int2_43254 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45810, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43255 = torch.constant.int 3
    %int0_43256 = torch.constant.int 0
    %int9223372036854775807_43257 = torch.constant.int 9223372036854775807
    %int1_43258 = torch.constant.int 1
    %45811 = torch.aten.slice.Tensor %45810, %int3_43255, %int0_43256, %int9223372036854775807_43257, %int1_43258 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45811, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45812 = torch_c.to_builtin_tensor %45500 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43259 = arith.constant 1 : index
    %dim_43260 = tensor.dim %45812, %c1_43259 : tensor<4x?x1x128xf16>
    %45813 = flow.tensor.bitcast %45812 : tensor<4x?x1x128xf16>{%dim_43260} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43260}
    %45814 = torch_c.from_builtin_tensor %45813 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45815 = torch.aten.mul.Tensor %45814, %45811 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45816 = torch_c.to_builtin_tensor %45815 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43261 = arith.constant 1 : index
    %dim_43262 = tensor.dim %45816, %c1_43261 : tensor<4x?x1x64xcomplex<f32>>
    %45817 = flow.tensor.bitcast %45816 : tensor<4x?x1x64xcomplex<f32>>{%dim_43262} -> tensor<4x?x1x128xf32>{%dim_43262}
    %45818 = torch_c.from_builtin_tensor %45817 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43263 = torch.constant.int 5
    %45819 = torch.prims.convert_element_type %45818, %int5_43263 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_43264 = torch.constant.int 1
    %45820 = torch.aten.size.int %45406, %int1_43264 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_43265 = torch.constant.int 0
    %45821 = torch.aten.add.int %int0_43265, %45820 : !torch.int, !torch.int -> !torch.int
    %int0_43266 = torch.constant.int 0
    %int0_43267 = torch.constant.int 0
    %int1_43268 = torch.constant.int 1
    %45822 = torch.aten.slice.Tensor %45714, %int0_43266, %int0_43267, %45821, %int1_43268 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45822, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_43269 = torch.constant.int 1
    %int0_43270 = torch.constant.int 0
    %int9223372036854775807_43271 = torch.constant.int 9223372036854775807
    %int1_43272 = torch.constant.int 1
    %45823 = torch.aten.slice.Tensor %45822, %int1_43269, %int0_43270, %int9223372036854775807_43271, %int1_43272 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %45823, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_43273 = torch.constant.int 0
    %45824 = torch.aten.unsqueeze %45823, %int0_43273 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %45824, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_43274 = torch.constant.int 2
    %45825 = torch.aten.unsqueeze %45824, %int2_43274 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45825, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_43275 = torch.constant.int 3
    %int0_43276 = torch.constant.int 0
    %int9223372036854775807_43277 = torch.constant.int 9223372036854775807
    %int1_43278 = torch.constant.int 1
    %45826 = torch.aten.slice.Tensor %45825, %int3_43275, %int0_43276, %int9223372036854775807_43277, %int1_43278 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45826, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %45827 = torch_c.to_builtin_tensor %45502 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_43279 = arith.constant 1 : index
    %dim_43280 = tensor.dim %45827, %c1_43279 : tensor<4x?x1x128xf16>
    %45828 = flow.tensor.bitcast %45827 : tensor<4x?x1x128xf16>{%dim_43280} -> tensor<4x?x1x64xcomplex<f16>>{%dim_43280}
    %45829 = torch_c.from_builtin_tensor %45828 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %45829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %45830 = torch.aten.mul.Tensor %45829, %45826 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %45830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %45831 = torch_c.to_builtin_tensor %45830 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_43281 = arith.constant 1 : index
    %dim_43282 = tensor.dim %45831, %c1_43281 : tensor<4x?x1x64xcomplex<f32>>
    %45832 = flow.tensor.bitcast %45831 : tensor<4x?x1x64xcomplex<f32>>{%dim_43282} -> tensor<4x?x1x128xf32>{%dim_43282}
    %45833 = torch_c.from_builtin_tensor %45832 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %45833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_43283 = torch.constant.int 5
    %45834 = torch.prims.convert_element_type %45833, %int5_43283 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %45834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_43284 = torch.constant.int 64
    %45835 = torch.aten.mul.Scalar %2364, %int64_43284 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45835, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43285 = torch.constant.int 64
    %45836 = torch.aten.mul.Scalar %2367, %int64_43285 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45836, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43286 = torch.constant.int 64
    %45837 = torch.aten.mul.Scalar %2370, %int64_43286 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45837, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43287 = torch.constant.int 64
    %45838 = torch.aten.mul.Scalar %2373, %int64_43287 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45838, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43288 = torch.constant.int 64
    %45839 = torch.aten.mul.Scalar %2376, %int64_43288 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45839, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43289 = torch.constant.int 64
    %45840 = torch.aten.mul.Scalar %2379, %int64_43289 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45840, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43290 = torch.constant.int 64
    %45841 = torch.aten.mul.Scalar %2382, %int64_43290 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45841, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_43291 = torch.constant.int 64
    %45842 = torch.aten.mul.Scalar %2385, %int64_43291 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45842, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46 = torch.constant.int 46
    %int1_43292 = torch.constant.int 1
    %45843 = torch.aten.add.Scalar %45835, %int46, %int1_43292 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45843, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43293 = torch.constant.int 46
    %int1_43294 = torch.constant.int 1
    %45844 = torch.aten.add.Scalar %45836, %int46_43293, %int1_43294 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45844, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43295 = torch.constant.int 46
    %int1_43296 = torch.constant.int 1
    %45845 = torch.aten.add.Scalar %45837, %int46_43295, %int1_43296 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45845, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43297 = torch.constant.int 46
    %int1_43298 = torch.constant.int 1
    %45846 = torch.aten.add.Scalar %45838, %int46_43297, %int1_43298 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45846, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43299 = torch.constant.int 46
    %int1_43300 = torch.constant.int 1
    %45847 = torch.aten.add.Scalar %45839, %int46_43299, %int1_43300 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45847, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43301 = torch.constant.int 46
    %int1_43302 = torch.constant.int 1
    %45848 = torch.aten.add.Scalar %45840, %int46_43301, %int1_43302 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45848, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43303 = torch.constant.int 46
    %int1_43304 = torch.constant.int 1
    %45849 = torch.aten.add.Scalar %45841, %int46_43303, %int1_43304 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45849, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int46_43305 = torch.constant.int 46
    %int1_43306 = torch.constant.int 1
    %45850 = torch.aten.add.Scalar %45842, %int46_43305, %int1_43306 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45850, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_43307 = torch.constant.int 4
    %int16_43308 = torch.constant.int 16
    %int1_43309 = torch.constant.int 1
    %int128_43310 = torch.constant.int 128
    %45851 = torch.prim.ListConstruct %int4_43307, %3095, %int16_43308, %int1_43309, %int128_43310 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45852 = torch.aten.view %45729, %45851 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45852, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43311 = torch.constant.int 4
    %int16_43312 = torch.constant.int 16
    %int1_43313 = torch.constant.int 1
    %int128_43314 = torch.constant.int 128
    %45853 = torch.prim.ListConstruct %int4_43311, %3095, %int16_43312, %int1_43313, %int128_43314 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45854 = torch.aten.view %45744, %45853 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45854, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43315 = torch.constant.int 4
    %int16_43316 = torch.constant.int 16
    %int1_43317 = torch.constant.int 1
    %int128_43318 = torch.constant.int 128
    %45855 = torch.prim.ListConstruct %int4_43315, %3095, %int16_43316, %int1_43317, %int128_43318 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45856 = torch.aten.view %45759, %45855 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45856, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43319 = torch.constant.int 4
    %int16_43320 = torch.constant.int 16
    %int1_43321 = torch.constant.int 1
    %int128_43322 = torch.constant.int 128
    %45857 = torch.prim.ListConstruct %int4_43319, %3095, %int16_43320, %int1_43321, %int128_43322 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45858 = torch.aten.view %45774, %45857 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45858, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43323 = torch.constant.int 4
    %int16_43324 = torch.constant.int 16
    %int1_43325 = torch.constant.int 1
    %int128_43326 = torch.constant.int 128
    %45859 = torch.prim.ListConstruct %int4_43323, %3095, %int16_43324, %int1_43325, %int128_43326 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45860 = torch.aten.view %45789, %45859 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45860, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43327 = torch.constant.int 4
    %int16_43328 = torch.constant.int 16
    %int1_43329 = torch.constant.int 1
    %int128_43330 = torch.constant.int 128
    %45861 = torch.prim.ListConstruct %int4_43327, %3095, %int16_43328, %int1_43329, %int128_43330 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45862 = torch.aten.view %45804, %45861 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45862, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43331 = torch.constant.int 4
    %int16_43332 = torch.constant.int 16
    %int1_43333 = torch.constant.int 1
    %int128_43334 = torch.constant.int 128
    %45863 = torch.prim.ListConstruct %int4_43331, %3095, %int16_43332, %int1_43333, %int128_43334 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45864 = torch.aten.view %45819, %45863 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45864, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43335 = torch.constant.int 4
    %int16_43336 = torch.constant.int 16
    %int1_43337 = torch.constant.int 1
    %int128_43338 = torch.constant.int 128
    %45865 = torch.prim.ListConstruct %int4_43335, %3095, %int16_43336, %int1_43337, %int128_43338 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45866 = torch.aten.view %45834, %45865 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45866, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43339 = torch.constant.int 4
    %45867 = torch.aten.mul.int %int4_43339, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43340 = torch.constant.int 16
    %int1_43341 = torch.constant.int 1
    %int128_43342 = torch.constant.int 128
    %45868 = torch.prim.ListConstruct %45867, %int16_43340, %int1_43341, %int128_43342 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45869 = torch.aten.view %45852, %45868 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45869, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43343 = torch.constant.int 4
    %45870 = torch.aten.mul.int %int4_43343, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43344 = torch.constant.int 16
    %int1_43345 = torch.constant.int 1
    %int128_43346 = torch.constant.int 128
    %45871 = torch.prim.ListConstruct %45870, %int16_43344, %int1_43345, %int128_43346 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45872 = torch.aten.view %45854, %45871 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45872, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43347 = torch.constant.int 4
    %45873 = torch.aten.mul.int %int4_43347, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43348 = torch.constant.int 16
    %int1_43349 = torch.constant.int 1
    %int128_43350 = torch.constant.int 128
    %45874 = torch.prim.ListConstruct %45873, %int16_43348, %int1_43349, %int128_43350 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45875 = torch.aten.view %45856, %45874 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45875, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43351 = torch.constant.int 4
    %45876 = torch.aten.mul.int %int4_43351, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43352 = torch.constant.int 16
    %int1_43353 = torch.constant.int 1
    %int128_43354 = torch.constant.int 128
    %45877 = torch.prim.ListConstruct %45876, %int16_43352, %int1_43353, %int128_43354 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45878 = torch.aten.view %45858, %45877 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45878, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43355 = torch.constant.int 4
    %45879 = torch.aten.mul.int %int4_43355, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43356 = torch.constant.int 16
    %int1_43357 = torch.constant.int 1
    %int128_43358 = torch.constant.int 128
    %45880 = torch.prim.ListConstruct %45879, %int16_43356, %int1_43357, %int128_43358 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45881 = torch.aten.view %45860, %45880 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45881, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43359 = torch.constant.int 4
    %45882 = torch.aten.mul.int %int4_43359, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43360 = torch.constant.int 16
    %int1_43361 = torch.constant.int 1
    %int128_43362 = torch.constant.int 128
    %45883 = torch.prim.ListConstruct %45882, %int16_43360, %int1_43361, %int128_43362 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45884 = torch.aten.view %45862, %45883 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45884, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43363 = torch.constant.int 4
    %45885 = torch.aten.mul.int %int4_43363, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43364 = torch.constant.int 16
    %int1_43365 = torch.constant.int 1
    %int128_43366 = torch.constant.int 128
    %45886 = torch.prim.ListConstruct %45885, %int16_43364, %int1_43365, %int128_43366 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45887 = torch.aten.view %45864, %45886 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45887, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43367 = torch.constant.int 4
    %45888 = torch.aten.mul.int %int4_43367, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43368 = torch.constant.int 16
    %int1_43369 = torch.constant.int 1
    %int128_43370 = torch.constant.int 128
    %45889 = torch.prim.ListConstruct %45888, %int16_43368, %int1_43369, %int128_43370 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45890 = torch.aten.view %45866, %45889 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45890, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43371 = torch.constant.int 4
    %45891 = torch.aten.mul.int %int4_43371, %3095 : !torch.int, !torch.int -> !torch.int
    %45892 = torch.prim.ListConstruct %45891 : (!torch.int) -> !torch.list<int>
    %45893 = torch.aten.view %45843, %45892 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45893, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43372 = torch.constant.int 4
    %45894 = torch.aten.mul.int %int4_43372, %3095 : !torch.int, !torch.int -> !torch.int
    %45895 = torch.prim.ListConstruct %45894 : (!torch.int) -> !torch.list<int>
    %45896 = torch.aten.view %45844, %45895 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45896, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43373 = torch.constant.int 4
    %45897 = torch.aten.mul.int %int4_43373, %3095 : !torch.int, !torch.int -> !torch.int
    %45898 = torch.prim.ListConstruct %45897 : (!torch.int) -> !torch.list<int>
    %45899 = torch.aten.view %45845, %45898 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45899, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43374 = torch.constant.int 4
    %45900 = torch.aten.mul.int %int4_43374, %3095 : !torch.int, !torch.int -> !torch.int
    %45901 = torch.prim.ListConstruct %45900 : (!torch.int) -> !torch.list<int>
    %45902 = torch.aten.view %45846, %45901 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45902, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43375 = torch.constant.int 4
    %45903 = torch.aten.mul.int %int4_43375, %3095 : !torch.int, !torch.int -> !torch.int
    %45904 = torch.prim.ListConstruct %45903 : (!torch.int) -> !torch.list<int>
    %45905 = torch.aten.view %45847, %45904 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45905, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43376 = torch.constant.int 4
    %45906 = torch.aten.mul.int %int4_43376, %3095 : !torch.int, !torch.int -> !torch.int
    %45907 = torch.prim.ListConstruct %45906 : (!torch.int) -> !torch.list<int>
    %45908 = torch.aten.view %45848, %45907 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45908, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43377 = torch.constant.int 4
    %45909 = torch.aten.mul.int %int4_43377, %3095 : !torch.int, !torch.int -> !torch.int
    %45910 = torch.prim.ListConstruct %45909 : (!torch.int) -> !torch.list<int>
    %45911 = torch.aten.view %45849, %45910 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45911, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43378 = torch.constant.int 4
    %45912 = torch.aten.mul.int %int4_43378, %3095 : !torch.int, !torch.int -> !torch.int
    %45913 = torch.prim.ListConstruct %45912 : (!torch.int) -> !torch.list<int>
    %45914 = torch.aten.view %45850, %45913 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45914, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43379 = torch.constant.int 4
    %int16_43380 = torch.constant.int 16
    %int1_43381 = torch.constant.int 1
    %int128_43382 = torch.constant.int 128
    %45915 = torch.prim.ListConstruct %int4_43379, %3095, %int16_43380, %int1_43381, %int128_43382 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45916 = torch.aten.view %45504, %45915 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45916, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43383 = torch.constant.int 4
    %int16_43384 = torch.constant.int 16
    %int1_43385 = torch.constant.int 1
    %int128_43386 = torch.constant.int 128
    %45917 = torch.prim.ListConstruct %int4_43383, %3095, %int16_43384, %int1_43385, %int128_43386 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45918 = torch.aten.view %45506, %45917 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45918, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43387 = torch.constant.int 4
    %int16_43388 = torch.constant.int 16
    %int1_43389 = torch.constant.int 1
    %int128_43390 = torch.constant.int 128
    %45919 = torch.prim.ListConstruct %int4_43387, %3095, %int16_43388, %int1_43389, %int128_43390 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45920 = torch.aten.view %45508, %45919 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45920, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43391 = torch.constant.int 4
    %int16_43392 = torch.constant.int 16
    %int1_43393 = torch.constant.int 1
    %int128_43394 = torch.constant.int 128
    %45921 = torch.prim.ListConstruct %int4_43391, %3095, %int16_43392, %int1_43393, %int128_43394 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45922 = torch.aten.view %45510, %45921 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45922, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43395 = torch.constant.int 4
    %int16_43396 = torch.constant.int 16
    %int1_43397 = torch.constant.int 1
    %int128_43398 = torch.constant.int 128
    %45923 = torch.prim.ListConstruct %int4_43395, %3095, %int16_43396, %int1_43397, %int128_43398 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45924 = torch.aten.view %45512, %45923 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45924, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43399 = torch.constant.int 4
    %int16_43400 = torch.constant.int 16
    %int1_43401 = torch.constant.int 1
    %int128_43402 = torch.constant.int 128
    %45925 = torch.prim.ListConstruct %int4_43399, %3095, %int16_43400, %int1_43401, %int128_43402 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45926 = torch.aten.view %45514, %45925 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45926, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43403 = torch.constant.int 4
    %int16_43404 = torch.constant.int 16
    %int1_43405 = torch.constant.int 1
    %int128_43406 = torch.constant.int 128
    %45927 = torch.prim.ListConstruct %int4_43403, %3095, %int16_43404, %int1_43405, %int128_43406 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45928 = torch.aten.view %45516, %45927 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45928, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43407 = torch.constant.int 4
    %int16_43408 = torch.constant.int 16
    %int1_43409 = torch.constant.int 1
    %int128_43410 = torch.constant.int 128
    %45929 = torch.prim.ListConstruct %int4_43407, %3095, %int16_43408, %int1_43409, %int128_43410 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45930 = torch.aten.view %45518, %45929 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %45930, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_43411 = torch.constant.int 4
    %45931 = torch.aten.mul.int %int4_43411, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43412 = torch.constant.int 16
    %int1_43413 = torch.constant.int 1
    %int128_43414 = torch.constant.int 128
    %45932 = torch.prim.ListConstruct %45931, %int16_43412, %int1_43413, %int128_43414 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45933 = torch.aten.view %45916, %45932 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45933, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43415 = torch.constant.int 4
    %45934 = torch.aten.mul.int %int4_43415, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43416 = torch.constant.int 16
    %int1_43417 = torch.constant.int 1
    %int128_43418 = torch.constant.int 128
    %45935 = torch.prim.ListConstruct %45934, %int16_43416, %int1_43417, %int128_43418 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45936 = torch.aten.view %45918, %45935 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45936, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43419 = torch.constant.int 4
    %45937 = torch.aten.mul.int %int4_43419, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43420 = torch.constant.int 16
    %int1_43421 = torch.constant.int 1
    %int128_43422 = torch.constant.int 128
    %45938 = torch.prim.ListConstruct %45937, %int16_43420, %int1_43421, %int128_43422 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45939 = torch.aten.view %45920, %45938 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45939, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43423 = torch.constant.int 4
    %45940 = torch.aten.mul.int %int4_43423, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43424 = torch.constant.int 16
    %int1_43425 = torch.constant.int 1
    %int128_43426 = torch.constant.int 128
    %45941 = torch.prim.ListConstruct %45940, %int16_43424, %int1_43425, %int128_43426 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45942 = torch.aten.view %45922, %45941 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45942, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43427 = torch.constant.int 4
    %45943 = torch.aten.mul.int %int4_43427, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43428 = torch.constant.int 16
    %int1_43429 = torch.constant.int 1
    %int128_43430 = torch.constant.int 128
    %45944 = torch.prim.ListConstruct %45943, %int16_43428, %int1_43429, %int128_43430 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45945 = torch.aten.view %45924, %45944 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45945, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43431 = torch.constant.int 4
    %45946 = torch.aten.mul.int %int4_43431, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43432 = torch.constant.int 16
    %int1_43433 = torch.constant.int 1
    %int128_43434 = torch.constant.int 128
    %45947 = torch.prim.ListConstruct %45946, %int16_43432, %int1_43433, %int128_43434 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45948 = torch.aten.view %45926, %45947 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45948, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43435 = torch.constant.int 4
    %45949 = torch.aten.mul.int %int4_43435, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43436 = torch.constant.int 16
    %int1_43437 = torch.constant.int 1
    %int128_43438 = torch.constant.int 128
    %45950 = torch.prim.ListConstruct %45949, %int16_43436, %int1_43437, %int128_43438 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45951 = torch.aten.view %45928, %45950 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45951, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_43439 = torch.constant.int 4
    %45952 = torch.aten.mul.int %int4_43439, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_43440 = torch.constant.int 16
    %int1_43441 = torch.constant.int 1
    %int128_43442 = torch.constant.int 128
    %45953 = torch.prim.ListConstruct %45952, %int16_43440, %int1_43441, %int128_43442 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %45954 = torch.aten.view %45930, %45953 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %45954, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_43443 = torch.constant.int 1
    %int1_43444 = torch.constant.int 1
    %45955 = torch.aten.add.Scalar %45843, %int1_43443, %int1_43444 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45955, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43445 = torch.constant.int 1
    %int1_43446 = torch.constant.int 1
    %45956 = torch.aten.add.Scalar %45844, %int1_43445, %int1_43446 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45956, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43447 = torch.constant.int 1
    %int1_43448 = torch.constant.int 1
    %45957 = torch.aten.add.Scalar %45845, %int1_43447, %int1_43448 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45957, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43449 = torch.constant.int 1
    %int1_43450 = torch.constant.int 1
    %45958 = torch.aten.add.Scalar %45846, %int1_43449, %int1_43450 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45958, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43451 = torch.constant.int 1
    %int1_43452 = torch.constant.int 1
    %45959 = torch.aten.add.Scalar %45847, %int1_43451, %int1_43452 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45959, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43453 = torch.constant.int 1
    %int1_43454 = torch.constant.int 1
    %45960 = torch.aten.add.Scalar %45848, %int1_43453, %int1_43454 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45960, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43455 = torch.constant.int 1
    %int1_43456 = torch.constant.int 1
    %45961 = torch.aten.add.Scalar %45849, %int1_43455, %int1_43456 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45961, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_43457 = torch.constant.int 1
    %int1_43458 = torch.constant.int 1
    %45962 = torch.aten.add.Scalar %45850, %int1_43457, %int1_43458 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %45962, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_43459 = torch.constant.int 4
    %45963 = torch.aten.mul.int %int4_43459, %3095 : !torch.int, !torch.int -> !torch.int
    %45964 = torch.prim.ListConstruct %45963 : (!torch.int) -> !torch.list<int>
    %45965 = torch.aten.view %45955, %45964 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45965, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43460 = torch.constant.int 4
    %45966 = torch.aten.mul.int %int4_43460, %3095 : !torch.int, !torch.int -> !torch.int
    %45967 = torch.prim.ListConstruct %45966 : (!torch.int) -> !torch.list<int>
    %45968 = torch.aten.view %45956, %45967 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45968, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43461 = torch.constant.int 4
    %45969 = torch.aten.mul.int %int4_43461, %3095 : !torch.int, !torch.int -> !torch.int
    %45970 = torch.prim.ListConstruct %45969 : (!torch.int) -> !torch.list<int>
    %45971 = torch.aten.view %45957, %45970 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45971, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43462 = torch.constant.int 4
    %45972 = torch.aten.mul.int %int4_43462, %3095 : !torch.int, !torch.int -> !torch.int
    %45973 = torch.prim.ListConstruct %45972 : (!torch.int) -> !torch.list<int>
    %45974 = torch.aten.view %45958, %45973 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45974, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43463 = torch.constant.int 4
    %45975 = torch.aten.mul.int %int4_43463, %3095 : !torch.int, !torch.int -> !torch.int
    %45976 = torch.prim.ListConstruct %45975 : (!torch.int) -> !torch.list<int>
    %45977 = torch.aten.view %45959, %45976 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45977, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43464 = torch.constant.int 4
    %45978 = torch.aten.mul.int %int4_43464, %3095 : !torch.int, !torch.int -> !torch.int
    %45979 = torch.prim.ListConstruct %45978 : (!torch.int) -> !torch.list<int>
    %45980 = torch.aten.view %45960, %45979 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45980, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43465 = torch.constant.int 4
    %45981 = torch.aten.mul.int %int4_43465, %3095 : !torch.int, !torch.int -> !torch.int
    %45982 = torch.prim.ListConstruct %45981 : (!torch.int) -> !torch.list<int>
    %45983 = torch.aten.view %45961, %45982 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45983, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_43466 = torch.constant.int 4
    %45984 = torch.aten.mul.int %int4_43466, %3095 : !torch.int, !torch.int -> !torch.int
    %45985 = torch.prim.ListConstruct %45984 : (!torch.int) -> !torch.list<int>
    %45986 = torch.aten.view %45962, %45985 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45986, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %45987 = torch.prim.ListConstruct %45893, %45965 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43467 = torch.constant.int 0
    %45988 = torch.aten.cat %45987, %int0_43467 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45988, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45989 = torch.prim.ListConstruct %45896, %45968 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43468 = torch.constant.int 0
    %45990 = torch.aten.cat %45989, %int0_43468 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45990, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45991 = torch.prim.ListConstruct %45899, %45971 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43469 = torch.constant.int 0
    %45992 = torch.aten.cat %45991, %int0_43469 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45992, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45993 = torch.prim.ListConstruct %45902, %45974 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43470 = torch.constant.int 0
    %45994 = torch.aten.cat %45993, %int0_43470 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45994, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45995 = torch.prim.ListConstruct %45905, %45977 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43471 = torch.constant.int 0
    %45996 = torch.aten.cat %45995, %int0_43471 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45996, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45997 = torch.prim.ListConstruct %45908, %45980 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43472 = torch.constant.int 0
    %45998 = torch.aten.cat %45997, %int0_43472 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %45998, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %45999 = torch.prim.ListConstruct %45911, %45983 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43473 = torch.constant.int 0
    %46000 = torch.aten.cat %45999, %int0_43473 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %46000, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %46001 = torch.prim.ListConstruct %45914, %45986 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_43474 = torch.constant.int 0
    %46002 = torch.aten.cat %46001, %int0_43474 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %46002, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %46003 = torch.prim.ListConstruct %45869, %45933 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43475 = torch.constant.int 0
    %46004 = torch.aten.cat %46003, %int0_43475 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46004, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46005 = torch.prim.ListConstruct %45872, %45936 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43476 = torch.constant.int 0
    %46006 = torch.aten.cat %46005, %int0_43476 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46006, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46007 = torch.prim.ListConstruct %45875, %45939 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43477 = torch.constant.int 0
    %46008 = torch.aten.cat %46007, %int0_43477 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46008, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46009 = torch.prim.ListConstruct %45878, %45942 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43478 = torch.constant.int 0
    %46010 = torch.aten.cat %46009, %int0_43478 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46010, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46011 = torch.prim.ListConstruct %45881, %45945 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43479 = torch.constant.int 0
    %46012 = torch.aten.cat %46011, %int0_43479 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46012, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46013 = torch.prim.ListConstruct %45884, %45948 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43480 = torch.constant.int 0
    %46014 = torch.aten.cat %46013, %int0_43480 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46014, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46015 = torch.prim.ListConstruct %45887, %45951 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43481 = torch.constant.int 0
    %46016 = torch.aten.cat %46015, %int0_43481 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46016, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46017 = torch.prim.ListConstruct %45890, %45954 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_43482 = torch.constant.int 0
    %46018 = torch.aten.cat %46017, %int0_43482 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46018, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43483 = torch.constant.int 32
    %int2_43484 = torch.constant.int 2
    %int16_43485 = torch.constant.int 16
    %int1_43486 = torch.constant.int 1
    %int128_43487 = torch.constant.int 128
    %46019 = torch.prim.ListConstruct %3023, %int32_43483, %int2_43484, %int16_43485, %int1_43486, %int128_43487 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46020 = torch.aten.view %44169, %46019 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46020, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43488 = torch.constant.int 32
    %46021 = torch.aten.mul.int %3023, %int32_43488 : !torch.int, !torch.int -> !torch.int
    %int2_43489 = torch.constant.int 2
    %46022 = torch.aten.mul.int %46021, %int2_43489 : !torch.int, !torch.int -> !torch.int
    %int16_43490 = torch.constant.int 16
    %int1_43491 = torch.constant.int 1
    %int128_43492 = torch.constant.int 128
    %46023 = torch.prim.ListConstruct %46022, %int16_43490, %int1_43491, %int128_43492 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46024 = torch.aten.view %46020, %46023 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46024, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46025 = torch.prim.ListConstruct %45988 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43493 = torch.constant.bool false
    %46026 = torch.aten.index_put %46024, %46025, %46004, %false_43493 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46026, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43494 = torch.constant.int 32
    %int2_43495 = torch.constant.int 2
    %int16_43496 = torch.constant.int 16
    %int1_43497 = torch.constant.int 1
    %int128_43498 = torch.constant.int 128
    %46027 = torch.prim.ListConstruct %3023, %int32_43494, %int2_43495, %int16_43496, %int1_43497, %int128_43498 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46028 = torch.aten.view %46026, %46027 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46028, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43499 = torch.constant.int 131072
    %46029 = torch.prim.ListConstruct %3023, %int131072_43499 : (!torch.int, !torch.int) -> !torch.list<int>
    %46030 = torch.aten.view %46028, %46029 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46030, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43500 = torch.constant.int 32
    %int2_43501 = torch.constant.int 2
    %int16_43502 = torch.constant.int 16
    %int1_43503 = torch.constant.int 1
    %int128_43504 = torch.constant.int 128
    %46031 = torch.prim.ListConstruct %3026, %int32_43500, %int2_43501, %int16_43502, %int1_43503, %int128_43504 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46032 = torch.aten.view %44181, %46031 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46032, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43505 = torch.constant.int 32
    %46033 = torch.aten.mul.int %3026, %int32_43505 : !torch.int, !torch.int -> !torch.int
    %int2_43506 = torch.constant.int 2
    %46034 = torch.aten.mul.int %46033, %int2_43506 : !torch.int, !torch.int -> !torch.int
    %int16_43507 = torch.constant.int 16
    %int1_43508 = torch.constant.int 1
    %int128_43509 = torch.constant.int 128
    %46035 = torch.prim.ListConstruct %46034, %int16_43507, %int1_43508, %int128_43509 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46036 = torch.aten.view %46032, %46035 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46036, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46037 = torch.prim.ListConstruct %45990 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43510 = torch.constant.bool false
    %46038 = torch.aten.index_put %46036, %46037, %46006, %false_43510 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46038, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43511 = torch.constant.int 32
    %int2_43512 = torch.constant.int 2
    %int16_43513 = torch.constant.int 16
    %int1_43514 = torch.constant.int 1
    %int128_43515 = torch.constant.int 128
    %46039 = torch.prim.ListConstruct %3026, %int32_43511, %int2_43512, %int16_43513, %int1_43514, %int128_43515 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46040 = torch.aten.view %46038, %46039 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46040, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43516 = torch.constant.int 131072
    %46041 = torch.prim.ListConstruct %3026, %int131072_43516 : (!torch.int, !torch.int) -> !torch.list<int>
    %46042 = torch.aten.view %46040, %46041 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46042, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43517 = torch.constant.int 32
    %int2_43518 = torch.constant.int 2
    %int16_43519 = torch.constant.int 16
    %int1_43520 = torch.constant.int 1
    %int128_43521 = torch.constant.int 128
    %46043 = torch.prim.ListConstruct %3029, %int32_43517, %int2_43518, %int16_43519, %int1_43520, %int128_43521 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46044 = torch.aten.view %44193, %46043 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46044, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43522 = torch.constant.int 32
    %46045 = torch.aten.mul.int %3029, %int32_43522 : !torch.int, !torch.int -> !torch.int
    %int2_43523 = torch.constant.int 2
    %46046 = torch.aten.mul.int %46045, %int2_43523 : !torch.int, !torch.int -> !torch.int
    %int16_43524 = torch.constant.int 16
    %int1_43525 = torch.constant.int 1
    %int128_43526 = torch.constant.int 128
    %46047 = torch.prim.ListConstruct %46046, %int16_43524, %int1_43525, %int128_43526 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46048 = torch.aten.view %46044, %46047 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46048, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46049 = torch.prim.ListConstruct %45992 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43527 = torch.constant.bool false
    %46050 = torch.aten.index_put %46048, %46049, %46008, %false_43527 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46050, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43528 = torch.constant.int 32
    %int2_43529 = torch.constant.int 2
    %int16_43530 = torch.constant.int 16
    %int1_43531 = torch.constant.int 1
    %int128_43532 = torch.constant.int 128
    %46051 = torch.prim.ListConstruct %3029, %int32_43528, %int2_43529, %int16_43530, %int1_43531, %int128_43532 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46052 = torch.aten.view %46050, %46051 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46052, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43533 = torch.constant.int 131072
    %46053 = torch.prim.ListConstruct %3029, %int131072_43533 : (!torch.int, !torch.int) -> !torch.list<int>
    %46054 = torch.aten.view %46052, %46053 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46054, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43534 = torch.constant.int 32
    %int2_43535 = torch.constant.int 2
    %int16_43536 = torch.constant.int 16
    %int1_43537 = torch.constant.int 1
    %int128_43538 = torch.constant.int 128
    %46055 = torch.prim.ListConstruct %3032, %int32_43534, %int2_43535, %int16_43536, %int1_43537, %int128_43538 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46056 = torch.aten.view %44205, %46055 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46056, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43539 = torch.constant.int 32
    %46057 = torch.aten.mul.int %3032, %int32_43539 : !torch.int, !torch.int -> !torch.int
    %int2_43540 = torch.constant.int 2
    %46058 = torch.aten.mul.int %46057, %int2_43540 : !torch.int, !torch.int -> !torch.int
    %int16_43541 = torch.constant.int 16
    %int1_43542 = torch.constant.int 1
    %int128_43543 = torch.constant.int 128
    %46059 = torch.prim.ListConstruct %46058, %int16_43541, %int1_43542, %int128_43543 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46060 = torch.aten.view %46056, %46059 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46060, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46061 = torch.prim.ListConstruct %45994 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43544 = torch.constant.bool false
    %46062 = torch.aten.index_put %46060, %46061, %46010, %false_43544 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46062, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43545 = torch.constant.int 32
    %int2_43546 = torch.constant.int 2
    %int16_43547 = torch.constant.int 16
    %int1_43548 = torch.constant.int 1
    %int128_43549 = torch.constant.int 128
    %46063 = torch.prim.ListConstruct %3032, %int32_43545, %int2_43546, %int16_43547, %int1_43548, %int128_43549 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46064 = torch.aten.view %46062, %46063 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46064, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43550 = torch.constant.int 131072
    %46065 = torch.prim.ListConstruct %3032, %int131072_43550 : (!torch.int, !torch.int) -> !torch.list<int>
    %46066 = torch.aten.view %46064, %46065 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46066, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43551 = torch.constant.int 32
    %int2_43552 = torch.constant.int 2
    %int16_43553 = torch.constant.int 16
    %int1_43554 = torch.constant.int 1
    %int128_43555 = torch.constant.int 128
    %46067 = torch.prim.ListConstruct %3035, %int32_43551, %int2_43552, %int16_43553, %int1_43554, %int128_43555 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46068 = torch.aten.view %44217, %46067 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46068, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43556 = torch.constant.int 32
    %46069 = torch.aten.mul.int %3035, %int32_43556 : !torch.int, !torch.int -> !torch.int
    %int2_43557 = torch.constant.int 2
    %46070 = torch.aten.mul.int %46069, %int2_43557 : !torch.int, !torch.int -> !torch.int
    %int16_43558 = torch.constant.int 16
    %int1_43559 = torch.constant.int 1
    %int128_43560 = torch.constant.int 128
    %46071 = torch.prim.ListConstruct %46070, %int16_43558, %int1_43559, %int128_43560 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46072 = torch.aten.view %46068, %46071 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46072, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46073 = torch.prim.ListConstruct %45996 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43561 = torch.constant.bool false
    %46074 = torch.aten.index_put %46072, %46073, %46012, %false_43561 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46074, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43562 = torch.constant.int 32
    %int2_43563 = torch.constant.int 2
    %int16_43564 = torch.constant.int 16
    %int1_43565 = torch.constant.int 1
    %int128_43566 = torch.constant.int 128
    %46075 = torch.prim.ListConstruct %3035, %int32_43562, %int2_43563, %int16_43564, %int1_43565, %int128_43566 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46076 = torch.aten.view %46074, %46075 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46076, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43567 = torch.constant.int 131072
    %46077 = torch.prim.ListConstruct %3035, %int131072_43567 : (!torch.int, !torch.int) -> !torch.list<int>
    %46078 = torch.aten.view %46076, %46077 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46078, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43568 = torch.constant.int 32
    %int2_43569 = torch.constant.int 2
    %int16_43570 = torch.constant.int 16
    %int1_43571 = torch.constant.int 1
    %int128_43572 = torch.constant.int 128
    %46079 = torch.prim.ListConstruct %3038, %int32_43568, %int2_43569, %int16_43570, %int1_43571, %int128_43572 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46080 = torch.aten.view %44229, %46079 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46080, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43573 = torch.constant.int 32
    %46081 = torch.aten.mul.int %3038, %int32_43573 : !torch.int, !torch.int -> !torch.int
    %int2_43574 = torch.constant.int 2
    %46082 = torch.aten.mul.int %46081, %int2_43574 : !torch.int, !torch.int -> !torch.int
    %int16_43575 = torch.constant.int 16
    %int1_43576 = torch.constant.int 1
    %int128_43577 = torch.constant.int 128
    %46083 = torch.prim.ListConstruct %46082, %int16_43575, %int1_43576, %int128_43577 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46084 = torch.aten.view %46080, %46083 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46084, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46085 = torch.prim.ListConstruct %45998 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43578 = torch.constant.bool false
    %46086 = torch.aten.index_put %46084, %46085, %46014, %false_43578 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46086, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43579 = torch.constant.int 32
    %int2_43580 = torch.constant.int 2
    %int16_43581 = torch.constant.int 16
    %int1_43582 = torch.constant.int 1
    %int128_43583 = torch.constant.int 128
    %46087 = torch.prim.ListConstruct %3038, %int32_43579, %int2_43580, %int16_43581, %int1_43582, %int128_43583 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46088 = torch.aten.view %46086, %46087 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46088, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43584 = torch.constant.int 131072
    %46089 = torch.prim.ListConstruct %3038, %int131072_43584 : (!torch.int, !torch.int) -> !torch.list<int>
    %46090 = torch.aten.view %46088, %46089 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46090, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43585 = torch.constant.int 32
    %int2_43586 = torch.constant.int 2
    %int16_43587 = torch.constant.int 16
    %int1_43588 = torch.constant.int 1
    %int128_43589 = torch.constant.int 128
    %46091 = torch.prim.ListConstruct %3041, %int32_43585, %int2_43586, %int16_43587, %int1_43588, %int128_43589 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46092 = torch.aten.view %44241, %46091 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46092, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43590 = torch.constant.int 32
    %46093 = torch.aten.mul.int %3041, %int32_43590 : !torch.int, !torch.int -> !torch.int
    %int2_43591 = torch.constant.int 2
    %46094 = torch.aten.mul.int %46093, %int2_43591 : !torch.int, !torch.int -> !torch.int
    %int16_43592 = torch.constant.int 16
    %int1_43593 = torch.constant.int 1
    %int128_43594 = torch.constant.int 128
    %46095 = torch.prim.ListConstruct %46094, %int16_43592, %int1_43593, %int128_43594 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46096 = torch.aten.view %46092, %46095 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46096, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46097 = torch.prim.ListConstruct %46000 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43595 = torch.constant.bool false
    %46098 = torch.aten.index_put %46096, %46097, %46016, %false_43595 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46098, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43596 = torch.constant.int 32
    %int2_43597 = torch.constant.int 2
    %int16_43598 = torch.constant.int 16
    %int1_43599 = torch.constant.int 1
    %int128_43600 = torch.constant.int 128
    %46099 = torch.prim.ListConstruct %3041, %int32_43596, %int2_43597, %int16_43598, %int1_43599, %int128_43600 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46100 = torch.aten.view %46098, %46099 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46100, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43601 = torch.constant.int 131072
    %46101 = torch.prim.ListConstruct %3041, %int131072_43601 : (!torch.int, !torch.int) -> !torch.list<int>
    %46102 = torch.aten.view %46100, %46101 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46102, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_43602 = torch.constant.int 32
    %int2_43603 = torch.constant.int 2
    %int16_43604 = torch.constant.int 16
    %int1_43605 = torch.constant.int 1
    %int128_43606 = torch.constant.int 128
    %46103 = torch.prim.ListConstruct %3044, %int32_43602, %int2_43603, %int16_43604, %int1_43605, %int128_43606 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46104 = torch.aten.view %44253, %46103 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46104, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_43607 = torch.constant.int 32
    %46105 = torch.aten.mul.int %3044, %int32_43607 : !torch.int, !torch.int -> !torch.int
    %int2_43608 = torch.constant.int 2
    %46106 = torch.aten.mul.int %46105, %int2_43608 : !torch.int, !torch.int -> !torch.int
    %int16_43609 = torch.constant.int 16
    %int1_43610 = torch.constant.int 1
    %int128_43611 = torch.constant.int 128
    %46107 = torch.prim.ListConstruct %46106, %int16_43609, %int1_43610, %int128_43611 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46108 = torch.aten.view %46104, %46107 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46108, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %46109 = torch.prim.ListConstruct %46002 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_43612 = torch.constant.bool false
    %46110 = torch.aten.index_put %46108, %46109, %46018, %false_43612 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %46110, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_43613 = torch.constant.int 32
    %int2_43614 = torch.constant.int 2
    %int16_43615 = torch.constant.int 16
    %int1_43616 = torch.constant.int 1
    %int128_43617 = torch.constant.int 128
    %46111 = torch.prim.ListConstruct %3044, %int32_43613, %int2_43614, %int16_43615, %int1_43616, %int128_43617 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46112 = torch.aten.view %46110, %46111 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %46112, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_43618 = torch.constant.int 131072
    %46113 = torch.prim.ListConstruct %3044, %int131072_43618 : (!torch.int, !torch.int) -> !torch.list<int>
    %46114 = torch.aten.view %46112, %46113 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %46114, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_43619 = torch.constant.int -2
    %46115 = torch.aten.unsqueeze %45729, %int-2_43619 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43620 = torch.constant.int -2
    %46116 = torch.aten.unsqueeze %45744, %int-2_43620 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43621 = torch.constant.int -2
    %46117 = torch.aten.unsqueeze %45759, %int-2_43621 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43622 = torch.constant.int -2
    %46118 = torch.aten.unsqueeze %45774, %int-2_43622 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43623 = torch.constant.int -2
    %46119 = torch.aten.unsqueeze %45789, %int-2_43623 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43624 = torch.constant.int -2
    %46120 = torch.aten.unsqueeze %45804, %int-2_43624 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43625 = torch.constant.int -2
    %46121 = torch.aten.unsqueeze %45819, %int-2_43625 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43626 = torch.constant.int -2
    %46122 = torch.aten.unsqueeze %45834, %int-2_43626 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_43627 = torch.constant.int 4
    %int1_43628 = torch.constant.int 1
    %int4_43629 = torch.constant.int 4
    %int128_43630 = torch.constant.int 128
    %46123 = torch.prim.ListConstruct %int4_43627, %45715, %int1_43628, %int4_43629, %int128_43630 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43631 = torch.constant.bool false
    %46124 = torch.aten.expand %46115, %46123, %false_43631 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43632 = torch.constant.int 4
    %int1_43633 = torch.constant.int 1
    %int4_43634 = torch.constant.int 4
    %int128_43635 = torch.constant.int 128
    %46125 = torch.prim.ListConstruct %int4_43632, %45715, %int1_43633, %int4_43634, %int128_43635 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43636 = torch.constant.bool false
    %46126 = torch.aten.expand %46116, %46125, %false_43636 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43637 = torch.constant.int 4
    %int1_43638 = torch.constant.int 1
    %int4_43639 = torch.constant.int 4
    %int128_43640 = torch.constant.int 128
    %46127 = torch.prim.ListConstruct %int4_43637, %45715, %int1_43638, %int4_43639, %int128_43640 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43641 = torch.constant.bool false
    %46128 = torch.aten.expand %46117, %46127, %false_43641 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43642 = torch.constant.int 4
    %int1_43643 = torch.constant.int 1
    %int4_43644 = torch.constant.int 4
    %int128_43645 = torch.constant.int 128
    %46129 = torch.prim.ListConstruct %int4_43642, %45715, %int1_43643, %int4_43644, %int128_43645 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43646 = torch.constant.bool false
    %46130 = torch.aten.expand %46118, %46129, %false_43646 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43647 = torch.constant.int 4
    %int1_43648 = torch.constant.int 1
    %int4_43649 = torch.constant.int 4
    %int128_43650 = torch.constant.int 128
    %46131 = torch.prim.ListConstruct %int4_43647, %45715, %int1_43648, %int4_43649, %int128_43650 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43651 = torch.constant.bool false
    %46132 = torch.aten.expand %46119, %46131, %false_43651 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43652 = torch.constant.int 4
    %int1_43653 = torch.constant.int 1
    %int4_43654 = torch.constant.int 4
    %int128_43655 = torch.constant.int 128
    %46133 = torch.prim.ListConstruct %int4_43652, %45715, %int1_43653, %int4_43654, %int128_43655 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43656 = torch.constant.bool false
    %46134 = torch.aten.expand %46120, %46133, %false_43656 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43657 = torch.constant.int 4
    %int1_43658 = torch.constant.int 1
    %int4_43659 = torch.constant.int 4
    %int128_43660 = torch.constant.int 128
    %46135 = torch.prim.ListConstruct %int4_43657, %45715, %int1_43658, %int4_43659, %int128_43660 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43661 = torch.constant.bool false
    %46136 = torch.aten.expand %46121, %46135, %false_43661 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43662 = torch.constant.int 4
    %int1_43663 = torch.constant.int 1
    %int4_43664 = torch.constant.int 4
    %int128_43665 = torch.constant.int 128
    %46137 = torch.prim.ListConstruct %int4_43662, %45715, %int1_43663, %int4_43664, %int128_43665 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43666 = torch.constant.bool false
    %46138 = torch.aten.expand %46122, %46137, %false_43666 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43667 = torch.constant.int 4
    %int4_43668 = torch.constant.int 4
    %int128_43669 = torch.constant.int 128
    %46139 = torch.prim.ListConstruct %int4_43667, %45715, %int4_43668, %int128_43669 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46140 = torch.aten.view %46124, %46139 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43670 = torch.constant.int 4
    %int4_43671 = torch.constant.int 4
    %int128_43672 = torch.constant.int 128
    %46141 = torch.prim.ListConstruct %int4_43670, %45715, %int4_43671, %int128_43672 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46142 = torch.aten.view %46126, %46141 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43673 = torch.constant.int 4
    %int4_43674 = torch.constant.int 4
    %int128_43675 = torch.constant.int 128
    %46143 = torch.prim.ListConstruct %int4_43673, %45715, %int4_43674, %int128_43675 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46144 = torch.aten.view %46128, %46143 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43676 = torch.constant.int 4
    %int4_43677 = torch.constant.int 4
    %int128_43678 = torch.constant.int 128
    %46145 = torch.prim.ListConstruct %int4_43676, %45715, %int4_43677, %int128_43678 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46146 = torch.aten.view %46130, %46145 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43679 = torch.constant.int 4
    %int4_43680 = torch.constant.int 4
    %int128_43681 = torch.constant.int 128
    %46147 = torch.prim.ListConstruct %int4_43679, %45715, %int4_43680, %int128_43681 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46148 = torch.aten.view %46132, %46147 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43682 = torch.constant.int 4
    %int4_43683 = torch.constant.int 4
    %int128_43684 = torch.constant.int 128
    %46149 = torch.prim.ListConstruct %int4_43682, %45715, %int4_43683, %int128_43684 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46150 = torch.aten.view %46134, %46149 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43685 = torch.constant.int 4
    %int4_43686 = torch.constant.int 4
    %int128_43687 = torch.constant.int 128
    %46151 = torch.prim.ListConstruct %int4_43685, %45715, %int4_43686, %int128_43687 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46152 = torch.aten.view %46136, %46151 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43688 = torch.constant.int 4
    %int4_43689 = torch.constant.int 4
    %int128_43690 = torch.constant.int 128
    %46153 = torch.prim.ListConstruct %int4_43688, %45715, %int4_43689, %int128_43690 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46154 = torch.aten.view %46138, %46153 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_43691 = torch.constant.int -2
    %46155 = torch.aten.unsqueeze %45504, %int-2_43691 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43692 = torch.constant.int -2
    %46156 = torch.aten.unsqueeze %45506, %int-2_43692 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43693 = torch.constant.int -2
    %46157 = torch.aten.unsqueeze %45508, %int-2_43693 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43694 = torch.constant.int -2
    %46158 = torch.aten.unsqueeze %45510, %int-2_43694 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43695 = torch.constant.int -2
    %46159 = torch.aten.unsqueeze %45512, %int-2_43695 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43696 = torch.constant.int -2
    %46160 = torch.aten.unsqueeze %45514, %int-2_43696 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43697 = torch.constant.int -2
    %46161 = torch.aten.unsqueeze %45516, %int-2_43697 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_43698 = torch.constant.int -2
    %46162 = torch.aten.unsqueeze %45518, %int-2_43698 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %46162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_43699 = torch.constant.int 1
    %46163 = torch.aten.size.int %45428, %int1_43699 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_43700 = torch.constant.int 4
    %int1_43701 = torch.constant.int 1
    %int4_43702 = torch.constant.int 4
    %int128_43703 = torch.constant.int 128
    %46164 = torch.prim.ListConstruct %int4_43700, %46163, %int1_43701, %int4_43702, %int128_43703 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43704 = torch.constant.bool false
    %46165 = torch.aten.expand %46155, %46164, %false_43704 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43705 = torch.constant.int 4
    %int1_43706 = torch.constant.int 1
    %int4_43707 = torch.constant.int 4
    %int128_43708 = torch.constant.int 128
    %46166 = torch.prim.ListConstruct %int4_43705, %46163, %int1_43706, %int4_43707, %int128_43708 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43709 = torch.constant.bool false
    %46167 = torch.aten.expand %46156, %46166, %false_43709 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43710 = torch.constant.int 4
    %int1_43711 = torch.constant.int 1
    %int4_43712 = torch.constant.int 4
    %int128_43713 = torch.constant.int 128
    %46168 = torch.prim.ListConstruct %int4_43710, %46163, %int1_43711, %int4_43712, %int128_43713 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43714 = torch.constant.bool false
    %46169 = torch.aten.expand %46157, %46168, %false_43714 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43715 = torch.constant.int 4
    %int1_43716 = torch.constant.int 1
    %int4_43717 = torch.constant.int 4
    %int128_43718 = torch.constant.int 128
    %46170 = torch.prim.ListConstruct %int4_43715, %46163, %int1_43716, %int4_43717, %int128_43718 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43719 = torch.constant.bool false
    %46171 = torch.aten.expand %46158, %46170, %false_43719 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43720 = torch.constant.int 4
    %int1_43721 = torch.constant.int 1
    %int4_43722 = torch.constant.int 4
    %int128_43723 = torch.constant.int 128
    %46172 = torch.prim.ListConstruct %int4_43720, %46163, %int1_43721, %int4_43722, %int128_43723 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43724 = torch.constant.bool false
    %46173 = torch.aten.expand %46159, %46172, %false_43724 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43725 = torch.constant.int 4
    %int1_43726 = torch.constant.int 1
    %int4_43727 = torch.constant.int 4
    %int128_43728 = torch.constant.int 128
    %46174 = torch.prim.ListConstruct %int4_43725, %46163, %int1_43726, %int4_43727, %int128_43728 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43729 = torch.constant.bool false
    %46175 = torch.aten.expand %46160, %46174, %false_43729 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43730 = torch.constant.int 4
    %int1_43731 = torch.constant.int 1
    %int4_43732 = torch.constant.int 4
    %int128_43733 = torch.constant.int 128
    %46176 = torch.prim.ListConstruct %int4_43730, %46163, %int1_43731, %int4_43732, %int128_43733 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43734 = torch.constant.bool false
    %46177 = torch.aten.expand %46161, %46176, %false_43734 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43735 = torch.constant.int 4
    %int1_43736 = torch.constant.int 1
    %int4_43737 = torch.constant.int 4
    %int128_43738 = torch.constant.int 128
    %46178 = torch.prim.ListConstruct %int4_43735, %46163, %int1_43736, %int4_43737, %int128_43738 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_43739 = torch.constant.bool false
    %46179 = torch.aten.expand %46162, %46178, %false_43739 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %46179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_43740 = torch.constant.int 4
    %int4_43741 = torch.constant.int 4
    %int128_43742 = torch.constant.int 128
    %46180 = torch.prim.ListConstruct %int4_43740, %46163, %int4_43741, %int128_43742 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46181 = torch.aten.view %46165, %46180 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43743 = torch.constant.int 4
    %int4_43744 = torch.constant.int 4
    %int128_43745 = torch.constant.int 128
    %46182 = torch.prim.ListConstruct %int4_43743, %46163, %int4_43744, %int128_43745 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46183 = torch.aten.view %46167, %46182 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43746 = torch.constant.int 4
    %int4_43747 = torch.constant.int 4
    %int128_43748 = torch.constant.int 128
    %46184 = torch.prim.ListConstruct %int4_43746, %46163, %int4_43747, %int128_43748 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46185 = torch.aten.view %46169, %46184 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43749 = torch.constant.int 4
    %int4_43750 = torch.constant.int 4
    %int128_43751 = torch.constant.int 128
    %46186 = torch.prim.ListConstruct %int4_43749, %46163, %int4_43750, %int128_43751 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46187 = torch.aten.view %46171, %46186 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43752 = torch.constant.int 4
    %int4_43753 = torch.constant.int 4
    %int128_43754 = torch.constant.int 128
    %46188 = torch.prim.ListConstruct %int4_43752, %46163, %int4_43753, %int128_43754 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46189 = torch.aten.view %46173, %46188 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43755 = torch.constant.int 4
    %int4_43756 = torch.constant.int 4
    %int128_43757 = torch.constant.int 128
    %46190 = torch.prim.ListConstruct %int4_43755, %46163, %int4_43756, %int128_43757 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46191 = torch.aten.view %46175, %46190 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43758 = torch.constant.int 4
    %int4_43759 = torch.constant.int 4
    %int128_43760 = torch.constant.int 128
    %46192 = torch.prim.ListConstruct %int4_43758, %46163, %int4_43759, %int128_43760 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46193 = torch.aten.view %46177, %46192 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43761 = torch.constant.int 4
    %int4_43762 = torch.constant.int 4
    %int128_43763 = torch.constant.int 128
    %46194 = torch.prim.ListConstruct %int4_43761, %46163, %int4_43762, %int128_43763 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46195 = torch.aten.view %46179, %46194 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43764 = torch.constant.int 1
    %int2_43765 = torch.constant.int 2
    %46196 = torch.aten.transpose.int %45571, %int1_43764, %int2_43765 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46196, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43766 = torch.constant.int 1
    %int2_43767 = torch.constant.int 2
    %46197 = torch.aten.transpose.int %45586, %int1_43766, %int2_43767 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46197, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43768 = torch.constant.int 1
    %int2_43769 = torch.constant.int 2
    %46198 = torch.aten.transpose.int %45601, %int1_43768, %int2_43769 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46198, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43770 = torch.constant.int 1
    %int2_43771 = torch.constant.int 2
    %46199 = torch.aten.transpose.int %45616, %int1_43770, %int2_43771 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46199, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43772 = torch.constant.int 1
    %int2_43773 = torch.constant.int 2
    %46200 = torch.aten.transpose.int %45631, %int1_43772, %int2_43773 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46200, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43774 = torch.constant.int 1
    %int2_43775 = torch.constant.int 2
    %46201 = torch.aten.transpose.int %45646, %int1_43774, %int2_43775 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46201, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43776 = torch.constant.int 1
    %int2_43777 = torch.constant.int 2
    %46202 = torch.aten.transpose.int %45661, %int1_43776, %int2_43777 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46202, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43778 = torch.constant.int 1
    %int2_43779 = torch.constant.int 2
    %46203 = torch.aten.transpose.int %45676, %int1_43778, %int2_43779 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46203, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43780 = torch.constant.int 1
    %int2_43781 = torch.constant.int 2
    %46204 = torch.aten.transpose.int %46140, %int1_43780, %int2_43781 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46204, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43782 = torch.constant.int 1
    %int2_43783 = torch.constant.int 2
    %46205 = torch.aten.transpose.int %46142, %int1_43782, %int2_43783 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46205, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43784 = torch.constant.int 1
    %int2_43785 = torch.constant.int 2
    %46206 = torch.aten.transpose.int %46144, %int1_43784, %int2_43785 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46206, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43786 = torch.constant.int 1
    %int2_43787 = torch.constant.int 2
    %46207 = torch.aten.transpose.int %46146, %int1_43786, %int2_43787 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46207, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43788 = torch.constant.int 1
    %int2_43789 = torch.constant.int 2
    %46208 = torch.aten.transpose.int %46148, %int1_43788, %int2_43789 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46208, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43790 = torch.constant.int 1
    %int2_43791 = torch.constant.int 2
    %46209 = torch.aten.transpose.int %46150, %int1_43790, %int2_43791 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46209, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43792 = torch.constant.int 1
    %int2_43793 = torch.constant.int 2
    %46210 = torch.aten.transpose.int %46152, %int1_43792, %int2_43793 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46210, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43794 = torch.constant.int 1
    %int2_43795 = torch.constant.int 2
    %46211 = torch.aten.transpose.int %46154, %int1_43794, %int2_43795 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46211, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43796 = torch.constant.int 1
    %int2_43797 = torch.constant.int 2
    %46212 = torch.aten.transpose.int %46181, %int1_43796, %int2_43797 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46212, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43798 = torch.constant.int 1
    %int2_43799 = torch.constant.int 2
    %46213 = torch.aten.transpose.int %46183, %int1_43798, %int2_43799 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46213, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43800 = torch.constant.int 1
    %int2_43801 = torch.constant.int 2
    %46214 = torch.aten.transpose.int %46185, %int1_43800, %int2_43801 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46214, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43802 = torch.constant.int 1
    %int2_43803 = torch.constant.int 2
    %46215 = torch.aten.transpose.int %46187, %int1_43802, %int2_43803 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46215, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43804 = torch.constant.int 1
    %int2_43805 = torch.constant.int 2
    %46216 = torch.aten.transpose.int %46189, %int1_43804, %int2_43805 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46216, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43806 = torch.constant.int 1
    %int2_43807 = torch.constant.int 2
    %46217 = torch.aten.transpose.int %46191, %int1_43806, %int2_43807 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46217, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43808 = torch.constant.int 1
    %int2_43809 = torch.constant.int 2
    %46218 = torch.aten.transpose.int %46193, %int1_43808, %int2_43809 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46218, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43810 = torch.constant.int 1
    %int2_43811 = torch.constant.int 2
    %46219 = torch.aten.transpose.int %46195, %int1_43810, %int2_43811 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %46219, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43812 = torch.constant.float 0.000000e+00
    %true_43813 = torch.constant.bool true
    %none_43814 = torch.constant.none
    %none_43815 = torch.constant.none
    %46220:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46196, %46204, %46212, %float0.000000e00_43812, %true_43813, %none_43814, %none_43815) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46220#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43816 = torch.constant.float 0.000000e+00
    %true_43817 = torch.constant.bool true
    %none_43818 = torch.constant.none
    %none_43819 = torch.constant.none
    %46221:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46197, %46205, %46213, %float0.000000e00_43816, %true_43817, %none_43818, %none_43819) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46221#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43820 = torch.constant.float 0.000000e+00
    %true_43821 = torch.constant.bool true
    %none_43822 = torch.constant.none
    %none_43823 = torch.constant.none
    %46222:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46198, %46206, %46214, %float0.000000e00_43820, %true_43821, %none_43822, %none_43823) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46222#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43824 = torch.constant.float 0.000000e+00
    %true_43825 = torch.constant.bool true
    %none_43826 = torch.constant.none
    %none_43827 = torch.constant.none
    %46223:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46199, %46207, %46215, %float0.000000e00_43824, %true_43825, %none_43826, %none_43827) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46223#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43828 = torch.constant.float 0.000000e+00
    %true_43829 = torch.constant.bool true
    %none_43830 = torch.constant.none
    %none_43831 = torch.constant.none
    %46224:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46200, %46208, %46216, %float0.000000e00_43828, %true_43829, %none_43830, %none_43831) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46224#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43832 = torch.constant.float 0.000000e+00
    %true_43833 = torch.constant.bool true
    %none_43834 = torch.constant.none
    %none_43835 = torch.constant.none
    %46225:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46201, %46209, %46217, %float0.000000e00_43832, %true_43833, %none_43834, %none_43835) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46225#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43836 = torch.constant.float 0.000000e+00
    %true_43837 = torch.constant.bool true
    %none_43838 = torch.constant.none
    %none_43839 = torch.constant.none
    %46226:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46202, %46210, %46218, %float0.000000e00_43836, %true_43837, %none_43838, %none_43839) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46226#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_43840 = torch.constant.float 0.000000e+00
    %true_43841 = torch.constant.bool true
    %none_43842 = torch.constant.none
    %none_43843 = torch.constant.none
    %46227:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%46203, %46211, %46219, %float0.000000e00_43840, %true_43841, %none_43842, %none_43843) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %46227#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_43844 = torch.constant.int 1
    %int2_43845 = torch.constant.int 2
    %46228 = torch.aten.transpose.int %46220#0, %int1_43844, %int2_43845 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43846 = torch.constant.int 1
    %int2_43847 = torch.constant.int 2
    %46229 = torch.aten.transpose.int %46221#0, %int1_43846, %int2_43847 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43848 = torch.constant.int 1
    %int2_43849 = torch.constant.int 2
    %46230 = torch.aten.transpose.int %46222#0, %int1_43848, %int2_43849 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43850 = torch.constant.int 1
    %int2_43851 = torch.constant.int 2
    %46231 = torch.aten.transpose.int %46223#0, %int1_43850, %int2_43851 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43852 = torch.constant.int 1
    %int2_43853 = torch.constant.int 2
    %46232 = torch.aten.transpose.int %46224#0, %int1_43852, %int2_43853 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43854 = torch.constant.int 1
    %int2_43855 = torch.constant.int 2
    %46233 = torch.aten.transpose.int %46225#0, %int1_43854, %int2_43855 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43856 = torch.constant.int 1
    %int2_43857 = torch.constant.int 2
    %46234 = torch.aten.transpose.int %46226#0, %int1_43856, %int2_43857 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_43858 = torch.constant.int 1
    %int2_43859 = torch.constant.int 2
    %46235 = torch.aten.transpose.int %46227#0, %int1_43858, %int2_43859 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %46235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_43860 = torch.constant.int 4
    %int512_43861 = torch.constant.int 512
    %46236 = torch.prim.ListConstruct %int4_43860, %45557, %int512_43861 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46237 = torch.aten.view %46228, %46236 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43862 = torch.constant.int 4
    %int512_43863 = torch.constant.int 512
    %46238 = torch.prim.ListConstruct %int4_43862, %45572, %int512_43863 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46239 = torch.aten.view %46229, %46238 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43864 = torch.constant.int 4
    %int512_43865 = torch.constant.int 512
    %46240 = torch.prim.ListConstruct %int4_43864, %45587, %int512_43865 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46241 = torch.aten.view %46230, %46240 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43866 = torch.constant.int 4
    %int512_43867 = torch.constant.int 512
    %46242 = torch.prim.ListConstruct %int4_43866, %45602, %int512_43867 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46243 = torch.aten.view %46231, %46242 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43868 = torch.constant.int 4
    %int512_43869 = torch.constant.int 512
    %46244 = torch.prim.ListConstruct %int4_43868, %45617, %int512_43869 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46245 = torch.aten.view %46232, %46244 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43870 = torch.constant.int 4
    %int512_43871 = torch.constant.int 512
    %46246 = torch.prim.ListConstruct %int4_43870, %45632, %int512_43871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46247 = torch.aten.view %46233, %46246 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43872 = torch.constant.int 4
    %int512_43873 = torch.constant.int 512
    %46248 = torch.prim.ListConstruct %int4_43872, %45647, %int512_43873 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46249 = torch.aten.view %46234, %46248 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_43874 = torch.constant.int 4
    %int512_43875 = torch.constant.int 512
    %46250 = torch.prim.ListConstruct %int4_43874, %45662, %int512_43875 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46251 = torch.aten.view %46235, %46250 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %46251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_43876 = torch.constant.int 1
    %int0_43877 = torch.constant.int 0
    %46252 = torch.prim.ListConstruct %int1_43876, %int0_43877 : (!torch.int, !torch.int) -> !torch.list<int>
    %46253 = torch.aten.permute %1696, %46252 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43878 = torch.constant.int 1
    %int0_43879 = torch.constant.int 0
    %46254 = torch.prim.ListConstruct %int1_43878, %int0_43879 : (!torch.int, !torch.int) -> !torch.list<int>
    %46255 = torch.aten.permute %1697, %46254 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43880 = torch.constant.int 1
    %int0_43881 = torch.constant.int 0
    %46256 = torch.prim.ListConstruct %int1_43880, %int0_43881 : (!torch.int, !torch.int) -> !torch.list<int>
    %46257 = torch.aten.permute %1698, %46256 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43882 = torch.constant.int 1
    %int0_43883 = torch.constant.int 0
    %46258 = torch.prim.ListConstruct %int1_43882, %int0_43883 : (!torch.int, !torch.int) -> !torch.list<int>
    %46259 = torch.aten.permute %1699, %46258 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43884 = torch.constant.int 1
    %int0_43885 = torch.constant.int 0
    %46260 = torch.prim.ListConstruct %int1_43884, %int0_43885 : (!torch.int, !torch.int) -> !torch.list<int>
    %46261 = torch.aten.permute %1700, %46260 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43886 = torch.constant.int 1
    %int0_43887 = torch.constant.int 0
    %46262 = torch.prim.ListConstruct %int1_43886, %int0_43887 : (!torch.int, !torch.int) -> !torch.list<int>
    %46263 = torch.aten.permute %1701, %46262 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43888 = torch.constant.int 1
    %int0_43889 = torch.constant.int 0
    %46264 = torch.prim.ListConstruct %int1_43888, %int0_43889 : (!torch.int, !torch.int) -> !torch.list<int>
    %46265 = torch.aten.permute %1702, %46264 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_43890 = torch.constant.int 1
    %int0_43891 = torch.constant.int 0
    %46266 = torch.prim.ListConstruct %int1_43890, %int0_43891 : (!torch.int, !torch.int) -> !torch.list<int>
    %46267 = torch.aten.permute %1703, %46266 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_43892 = torch.constant.int 4
    %46268 = torch.aten.mul.int %int4_43892, %45557 : !torch.int, !torch.int -> !torch.int
    %int512_43893 = torch.constant.int 512
    %46269 = torch.prim.ListConstruct %46268, %int512_43893 : (!torch.int, !torch.int) -> !torch.list<int>
    %46270 = torch.aten.view %46237, %46269 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46270, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46271 = torch.aten.mm %46270, %46253 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46271, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43894 = torch.constant.int 4
    %int4096_43895 = torch.constant.int 4096
    %46272 = torch.prim.ListConstruct %int4_43894, %45557, %int4096_43895 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46273 = torch.aten.view %46271, %46272 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43896 = torch.constant.int 4
    %46274 = torch.aten.mul.int %int4_43896, %45572 : !torch.int, !torch.int -> !torch.int
    %int512_43897 = torch.constant.int 512
    %46275 = torch.prim.ListConstruct %46274, %int512_43897 : (!torch.int, !torch.int) -> !torch.list<int>
    %46276 = torch.aten.view %46239, %46275 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46276, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46277 = torch.aten.mm %46276, %46255 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46277, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43898 = torch.constant.int 4
    %int4096_43899 = torch.constant.int 4096
    %46278 = torch.prim.ListConstruct %int4_43898, %45572, %int4096_43899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46279 = torch.aten.view %46277, %46278 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43900 = torch.constant.int 4
    %46280 = torch.aten.mul.int %int4_43900, %45587 : !torch.int, !torch.int -> !torch.int
    %int512_43901 = torch.constant.int 512
    %46281 = torch.prim.ListConstruct %46280, %int512_43901 : (!torch.int, !torch.int) -> !torch.list<int>
    %46282 = torch.aten.view %46241, %46281 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46282, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46283 = torch.aten.mm %46282, %46257 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46283, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43902 = torch.constant.int 4
    %int4096_43903 = torch.constant.int 4096
    %46284 = torch.prim.ListConstruct %int4_43902, %45587, %int4096_43903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46285 = torch.aten.view %46283, %46284 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43904 = torch.constant.int 4
    %46286 = torch.aten.mul.int %int4_43904, %45602 : !torch.int, !torch.int -> !torch.int
    %int512_43905 = torch.constant.int 512
    %46287 = torch.prim.ListConstruct %46286, %int512_43905 : (!torch.int, !torch.int) -> !torch.list<int>
    %46288 = torch.aten.view %46243, %46287 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46288, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46289 = torch.aten.mm %46288, %46259 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46289, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43906 = torch.constant.int 4
    %int4096_43907 = torch.constant.int 4096
    %46290 = torch.prim.ListConstruct %int4_43906, %45602, %int4096_43907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46291 = torch.aten.view %46289, %46290 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43908 = torch.constant.int 4
    %46292 = torch.aten.mul.int %int4_43908, %45617 : !torch.int, !torch.int -> !torch.int
    %int512_43909 = torch.constant.int 512
    %46293 = torch.prim.ListConstruct %46292, %int512_43909 : (!torch.int, !torch.int) -> !torch.list<int>
    %46294 = torch.aten.view %46245, %46293 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46294, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46295 = torch.aten.mm %46294, %46261 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46295, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43910 = torch.constant.int 4
    %int4096_43911 = torch.constant.int 4096
    %46296 = torch.prim.ListConstruct %int4_43910, %45617, %int4096_43911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46297 = torch.aten.view %46295, %46296 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43912 = torch.constant.int 4
    %46298 = torch.aten.mul.int %int4_43912, %45632 : !torch.int, !torch.int -> !torch.int
    %int512_43913 = torch.constant.int 512
    %46299 = torch.prim.ListConstruct %46298, %int512_43913 : (!torch.int, !torch.int) -> !torch.list<int>
    %46300 = torch.aten.view %46247, %46299 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46300, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46301 = torch.aten.mm %46300, %46263 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46301, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43914 = torch.constant.int 4
    %int4096_43915 = torch.constant.int 4096
    %46302 = torch.prim.ListConstruct %int4_43914, %45632, %int4096_43915 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46303 = torch.aten.view %46301, %46302 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43916 = torch.constant.int 4
    %46304 = torch.aten.mul.int %int4_43916, %45647 : !torch.int, !torch.int -> !torch.int
    %int512_43917 = torch.constant.int 512
    %46305 = torch.prim.ListConstruct %46304, %int512_43917 : (!torch.int, !torch.int) -> !torch.list<int>
    %46306 = torch.aten.view %46249, %46305 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46306, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46307 = torch.aten.mm %46306, %46265 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46307, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43918 = torch.constant.int 4
    %int4096_43919 = torch.constant.int 4096
    %46308 = torch.prim.ListConstruct %int4_43918, %45647, %int4096_43919 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46309 = torch.aten.view %46307, %46308 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_43920 = torch.constant.int 4
    %46310 = torch.aten.mul.int %int4_43920, %45662 : !torch.int, !torch.int -> !torch.int
    %int512_43921 = torch.constant.int 512
    %46311 = torch.prim.ListConstruct %46310, %int512_43921 : (!torch.int, !torch.int) -> !torch.list<int>
    %46312 = torch.aten.view %46251, %46311 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %46312, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %46313 = torch.aten.mm %46312, %46267 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46313, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_43922 = torch.constant.int 4
    %int4096_43923 = torch.constant.int 4096
    %46314 = torch.prim.ListConstruct %int4_43922, %45662, %int4096_43923 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46315 = torch.aten.view %46313, %46314 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46316 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43924 = arith.constant 1 : index
    %dim_43925 = tensor.dim %46316, %c1_43924 : tensor<4x?x4096xf16>
    %46317 = flow.tensor.transfer %46316 : tensor<4x?x4096xf16>{%dim_43925} to #hal.device.promise<@__device_0>
    %46318 = torch_c.from_builtin_tensor %46317 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46319 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43926 = arith.constant 1 : index
    %dim_43927 = tensor.dim %46319, %c1_43926 : tensor<4x?x4096xf16>
    %46320 = flow.tensor.transfer %46319 : tensor<4x?x4096xf16>{%dim_43927} to #hal.device.promise<@__device_0>
    %46321 = torch_c.from_builtin_tensor %46320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46322 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43928 = arith.constant 1 : index
    %dim_43929 = tensor.dim %46322, %c1_43928 : tensor<4x?x4096xf16>
    %46323 = flow.tensor.transfer %46322 : tensor<4x?x4096xf16>{%dim_43929} to #hal.device.promise<@__device_0>
    %46324 = torch_c.from_builtin_tensor %46323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46325 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43930 = arith.constant 1 : index
    %dim_43931 = tensor.dim %46325, %c1_43930 : tensor<4x?x4096xf16>
    %46326 = flow.tensor.transfer %46325 : tensor<4x?x4096xf16>{%dim_43931} to #hal.device.promise<@__device_0>
    %46327 = torch_c.from_builtin_tensor %46326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46328 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43932 = arith.constant 1 : index
    %dim_43933 = tensor.dim %46328, %c1_43932 : tensor<4x?x4096xf16>
    %46329 = flow.tensor.transfer %46328 : tensor<4x?x4096xf16>{%dim_43933} to #hal.device.promise<@__device_0>
    %46330 = torch_c.from_builtin_tensor %46329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46331 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43934 = arith.constant 1 : index
    %dim_43935 = tensor.dim %46331, %c1_43934 : tensor<4x?x4096xf16>
    %46332 = flow.tensor.transfer %46331 : tensor<4x?x4096xf16>{%dim_43935} to #hal.device.promise<@__device_0>
    %46333 = torch_c.from_builtin_tensor %46332 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46334 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43936 = arith.constant 1 : index
    %dim_43937 = tensor.dim %46334, %c1_43936 : tensor<4x?x4096xf16>
    %46335 = flow.tensor.transfer %46334 : tensor<4x?x4096xf16>{%dim_43937} to #hal.device.promise<@__device_0>
    %46336 = torch_c.from_builtin_tensor %46335 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43938 = torch.constant.int 1
    %46337 = torch.aten.add.Tensor %46273, %46318, %int1_43938 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43939 = torch.constant.int 1
    %46338 = torch.aten.add.Tensor %46337, %46321, %int1_43939 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43940 = torch.constant.int 1
    %46339 = torch.aten.add.Tensor %46338, %46324, %int1_43940 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43941 = torch.constant.int 1
    %46340 = torch.aten.add.Tensor %46339, %46327, %int1_43941 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43942 = torch.constant.int 1
    %46341 = torch.aten.add.Tensor %46340, %46330, %int1_43942 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43943 = torch.constant.int 1
    %46342 = torch.aten.add.Tensor %46341, %46333, %int1_43943 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43944 = torch.constant.int 1
    %46343 = torch.aten.add.Tensor %46342, %46336, %int1_43944 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46344 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43945 = arith.constant 1 : index
    %dim_43946 = tensor.dim %46344, %c1_43945 : tensor<4x?x4096xf16>
    %46345 = flow.tensor.transfer %46344 : tensor<4x?x4096xf16>{%dim_43946} to #hal.device.promise<@__device_1>
    %46346 = torch_c.from_builtin_tensor %46345 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46347 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43947 = arith.constant 1 : index
    %dim_43948 = tensor.dim %46347, %c1_43947 : tensor<4x?x4096xf16>
    %46348 = flow.tensor.transfer %46347 : tensor<4x?x4096xf16>{%dim_43948} to #hal.device.promise<@__device_1>
    %46349 = torch_c.from_builtin_tensor %46348 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46350 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43949 = arith.constant 1 : index
    %dim_43950 = tensor.dim %46350, %c1_43949 : tensor<4x?x4096xf16>
    %46351 = flow.tensor.transfer %46350 : tensor<4x?x4096xf16>{%dim_43950} to #hal.device.promise<@__device_1>
    %46352 = torch_c.from_builtin_tensor %46351 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46353 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43951 = arith.constant 1 : index
    %dim_43952 = tensor.dim %46353, %c1_43951 : tensor<4x?x4096xf16>
    %46354 = flow.tensor.transfer %46353 : tensor<4x?x4096xf16>{%dim_43952} to #hal.device.promise<@__device_1>
    %46355 = torch_c.from_builtin_tensor %46354 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46356 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43953 = arith.constant 1 : index
    %dim_43954 = tensor.dim %46356, %c1_43953 : tensor<4x?x4096xf16>
    %46357 = flow.tensor.transfer %46356 : tensor<4x?x4096xf16>{%dim_43954} to #hal.device.promise<@__device_1>
    %46358 = torch_c.from_builtin_tensor %46357 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46359 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43955 = arith.constant 1 : index
    %dim_43956 = tensor.dim %46359, %c1_43955 : tensor<4x?x4096xf16>
    %46360 = flow.tensor.transfer %46359 : tensor<4x?x4096xf16>{%dim_43956} to #hal.device.promise<@__device_1>
    %46361 = torch_c.from_builtin_tensor %46360 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46362 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43957 = arith.constant 1 : index
    %dim_43958 = tensor.dim %46362, %c1_43957 : tensor<4x?x4096xf16>
    %46363 = flow.tensor.transfer %46362 : tensor<4x?x4096xf16>{%dim_43958} to #hal.device.promise<@__device_1>
    %46364 = torch_c.from_builtin_tensor %46363 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43959 = torch.constant.int 1
    %46365 = torch.aten.add.Tensor %46346, %46279, %int1_43959 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43960 = torch.constant.int 1
    %46366 = torch.aten.add.Tensor %46365, %46349, %int1_43960 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43961 = torch.constant.int 1
    %46367 = torch.aten.add.Tensor %46366, %46352, %int1_43961 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43962 = torch.constant.int 1
    %46368 = torch.aten.add.Tensor %46367, %46355, %int1_43962 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43963 = torch.constant.int 1
    %46369 = torch.aten.add.Tensor %46368, %46358, %int1_43963 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43964 = torch.constant.int 1
    %46370 = torch.aten.add.Tensor %46369, %46361, %int1_43964 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43965 = torch.constant.int 1
    %46371 = torch.aten.add.Tensor %46370, %46364, %int1_43965 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46372 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43966 = arith.constant 1 : index
    %dim_43967 = tensor.dim %46372, %c1_43966 : tensor<4x?x4096xf16>
    %46373 = flow.tensor.transfer %46372 : tensor<4x?x4096xf16>{%dim_43967} to #hal.device.promise<@__device_2>
    %46374 = torch_c.from_builtin_tensor %46373 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46375 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43968 = arith.constant 1 : index
    %dim_43969 = tensor.dim %46375, %c1_43968 : tensor<4x?x4096xf16>
    %46376 = flow.tensor.transfer %46375 : tensor<4x?x4096xf16>{%dim_43969} to #hal.device.promise<@__device_2>
    %46377 = torch_c.from_builtin_tensor %46376 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46378 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43970 = arith.constant 1 : index
    %dim_43971 = tensor.dim %46378, %c1_43970 : tensor<4x?x4096xf16>
    %46379 = flow.tensor.transfer %46378 : tensor<4x?x4096xf16>{%dim_43971} to #hal.device.promise<@__device_2>
    %46380 = torch_c.from_builtin_tensor %46379 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46381 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43972 = arith.constant 1 : index
    %dim_43973 = tensor.dim %46381, %c1_43972 : tensor<4x?x4096xf16>
    %46382 = flow.tensor.transfer %46381 : tensor<4x?x4096xf16>{%dim_43973} to #hal.device.promise<@__device_2>
    %46383 = torch_c.from_builtin_tensor %46382 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46384 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43974 = arith.constant 1 : index
    %dim_43975 = tensor.dim %46384, %c1_43974 : tensor<4x?x4096xf16>
    %46385 = flow.tensor.transfer %46384 : tensor<4x?x4096xf16>{%dim_43975} to #hal.device.promise<@__device_2>
    %46386 = torch_c.from_builtin_tensor %46385 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46387 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43976 = arith.constant 1 : index
    %dim_43977 = tensor.dim %46387, %c1_43976 : tensor<4x?x4096xf16>
    %46388 = flow.tensor.transfer %46387 : tensor<4x?x4096xf16>{%dim_43977} to #hal.device.promise<@__device_2>
    %46389 = torch_c.from_builtin_tensor %46388 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46390 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43978 = arith.constant 1 : index
    %dim_43979 = tensor.dim %46390, %c1_43978 : tensor<4x?x4096xf16>
    %46391 = flow.tensor.transfer %46390 : tensor<4x?x4096xf16>{%dim_43979} to #hal.device.promise<@__device_2>
    %46392 = torch_c.from_builtin_tensor %46391 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43980 = torch.constant.int 1
    %46393 = torch.aten.add.Tensor %46374, %46377, %int1_43980 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43981 = torch.constant.int 1
    %46394 = torch.aten.add.Tensor %46393, %46285, %int1_43981 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43982 = torch.constant.int 1
    %46395 = torch.aten.add.Tensor %46394, %46380, %int1_43982 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43983 = torch.constant.int 1
    %46396 = torch.aten.add.Tensor %46395, %46383, %int1_43983 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43984 = torch.constant.int 1
    %46397 = torch.aten.add.Tensor %46396, %46386, %int1_43984 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43985 = torch.constant.int 1
    %46398 = torch.aten.add.Tensor %46397, %46389, %int1_43985 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_43986 = torch.constant.int 1
    %46399 = torch.aten.add.Tensor %46398, %46392, %int1_43986 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46400 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43987 = arith.constant 1 : index
    %dim_43988 = tensor.dim %46400, %c1_43987 : tensor<4x?x4096xf16>
    %46401 = flow.tensor.transfer %46400 : tensor<4x?x4096xf16>{%dim_43988} to #hal.device.promise<@__device_3>
    %46402 = torch_c.from_builtin_tensor %46401 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46403 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43989 = arith.constant 1 : index
    %dim_43990 = tensor.dim %46403, %c1_43989 : tensor<4x?x4096xf16>
    %46404 = flow.tensor.transfer %46403 : tensor<4x?x4096xf16>{%dim_43990} to #hal.device.promise<@__device_3>
    %46405 = torch_c.from_builtin_tensor %46404 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46406 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43991 = arith.constant 1 : index
    %dim_43992 = tensor.dim %46406, %c1_43991 : tensor<4x?x4096xf16>
    %46407 = flow.tensor.transfer %46406 : tensor<4x?x4096xf16>{%dim_43992} to #hal.device.promise<@__device_3>
    %46408 = torch_c.from_builtin_tensor %46407 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46409 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43993 = arith.constant 1 : index
    %dim_43994 = tensor.dim %46409, %c1_43993 : tensor<4x?x4096xf16>
    %46410 = flow.tensor.transfer %46409 : tensor<4x?x4096xf16>{%dim_43994} to #hal.device.promise<@__device_3>
    %46411 = torch_c.from_builtin_tensor %46410 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46412 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43995 = arith.constant 1 : index
    %dim_43996 = tensor.dim %46412, %c1_43995 : tensor<4x?x4096xf16>
    %46413 = flow.tensor.transfer %46412 : tensor<4x?x4096xf16>{%dim_43996} to #hal.device.promise<@__device_3>
    %46414 = torch_c.from_builtin_tensor %46413 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46415 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43997 = arith.constant 1 : index
    %dim_43998 = tensor.dim %46415, %c1_43997 : tensor<4x?x4096xf16>
    %46416 = flow.tensor.transfer %46415 : tensor<4x?x4096xf16>{%dim_43998} to #hal.device.promise<@__device_3>
    %46417 = torch_c.from_builtin_tensor %46416 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46418 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_43999 = arith.constant 1 : index
    %dim_44000 = tensor.dim %46418, %c1_43999 : tensor<4x?x4096xf16>
    %46419 = flow.tensor.transfer %46418 : tensor<4x?x4096xf16>{%dim_44000} to #hal.device.promise<@__device_3>
    %46420 = torch_c.from_builtin_tensor %46419 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44001 = torch.constant.int 1
    %46421 = torch.aten.add.Tensor %46402, %46405, %int1_44001 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44002 = torch.constant.int 1
    %46422 = torch.aten.add.Tensor %46421, %46408, %int1_44002 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44003 = torch.constant.int 1
    %46423 = torch.aten.add.Tensor %46422, %46291, %int1_44003 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44004 = torch.constant.int 1
    %46424 = torch.aten.add.Tensor %46423, %46411, %int1_44004 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44005 = torch.constant.int 1
    %46425 = torch.aten.add.Tensor %46424, %46414, %int1_44005 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44006 = torch.constant.int 1
    %46426 = torch.aten.add.Tensor %46425, %46417, %int1_44006 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44007 = torch.constant.int 1
    %46427 = torch.aten.add.Tensor %46426, %46420, %int1_44007 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46428 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44008 = arith.constant 1 : index
    %dim_44009 = tensor.dim %46428, %c1_44008 : tensor<4x?x4096xf16>
    %46429 = flow.tensor.transfer %46428 : tensor<4x?x4096xf16>{%dim_44009} to #hal.device.promise<@__device_4>
    %46430 = torch_c.from_builtin_tensor %46429 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46431 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44010 = arith.constant 1 : index
    %dim_44011 = tensor.dim %46431, %c1_44010 : tensor<4x?x4096xf16>
    %46432 = flow.tensor.transfer %46431 : tensor<4x?x4096xf16>{%dim_44011} to #hal.device.promise<@__device_4>
    %46433 = torch_c.from_builtin_tensor %46432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46434 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44012 = arith.constant 1 : index
    %dim_44013 = tensor.dim %46434, %c1_44012 : tensor<4x?x4096xf16>
    %46435 = flow.tensor.transfer %46434 : tensor<4x?x4096xf16>{%dim_44013} to #hal.device.promise<@__device_4>
    %46436 = torch_c.from_builtin_tensor %46435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46437 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44014 = arith.constant 1 : index
    %dim_44015 = tensor.dim %46437, %c1_44014 : tensor<4x?x4096xf16>
    %46438 = flow.tensor.transfer %46437 : tensor<4x?x4096xf16>{%dim_44015} to #hal.device.promise<@__device_4>
    %46439 = torch_c.from_builtin_tensor %46438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46440 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44016 = arith.constant 1 : index
    %dim_44017 = tensor.dim %46440, %c1_44016 : tensor<4x?x4096xf16>
    %46441 = flow.tensor.transfer %46440 : tensor<4x?x4096xf16>{%dim_44017} to #hal.device.promise<@__device_4>
    %46442 = torch_c.from_builtin_tensor %46441 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46443 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44018 = arith.constant 1 : index
    %dim_44019 = tensor.dim %46443, %c1_44018 : tensor<4x?x4096xf16>
    %46444 = flow.tensor.transfer %46443 : tensor<4x?x4096xf16>{%dim_44019} to #hal.device.promise<@__device_4>
    %46445 = torch_c.from_builtin_tensor %46444 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46446 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44020 = arith.constant 1 : index
    %dim_44021 = tensor.dim %46446, %c1_44020 : tensor<4x?x4096xf16>
    %46447 = flow.tensor.transfer %46446 : tensor<4x?x4096xf16>{%dim_44021} to #hal.device.promise<@__device_4>
    %46448 = torch_c.from_builtin_tensor %46447 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44022 = torch.constant.int 1
    %46449 = torch.aten.add.Tensor %46430, %46433, %int1_44022 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44023 = torch.constant.int 1
    %46450 = torch.aten.add.Tensor %46449, %46436, %int1_44023 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44024 = torch.constant.int 1
    %46451 = torch.aten.add.Tensor %46450, %46439, %int1_44024 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44025 = torch.constant.int 1
    %46452 = torch.aten.add.Tensor %46451, %46297, %int1_44025 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44026 = torch.constant.int 1
    %46453 = torch.aten.add.Tensor %46452, %46442, %int1_44026 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44027 = torch.constant.int 1
    %46454 = torch.aten.add.Tensor %46453, %46445, %int1_44027 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44028 = torch.constant.int 1
    %46455 = torch.aten.add.Tensor %46454, %46448, %int1_44028 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46456 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44029 = arith.constant 1 : index
    %dim_44030 = tensor.dim %46456, %c1_44029 : tensor<4x?x4096xf16>
    %46457 = flow.tensor.transfer %46456 : tensor<4x?x4096xf16>{%dim_44030} to #hal.device.promise<@__device_5>
    %46458 = torch_c.from_builtin_tensor %46457 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46459 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44031 = arith.constant 1 : index
    %dim_44032 = tensor.dim %46459, %c1_44031 : tensor<4x?x4096xf16>
    %46460 = flow.tensor.transfer %46459 : tensor<4x?x4096xf16>{%dim_44032} to #hal.device.promise<@__device_5>
    %46461 = torch_c.from_builtin_tensor %46460 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46462 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44033 = arith.constant 1 : index
    %dim_44034 = tensor.dim %46462, %c1_44033 : tensor<4x?x4096xf16>
    %46463 = flow.tensor.transfer %46462 : tensor<4x?x4096xf16>{%dim_44034} to #hal.device.promise<@__device_5>
    %46464 = torch_c.from_builtin_tensor %46463 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46465 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44035 = arith.constant 1 : index
    %dim_44036 = tensor.dim %46465, %c1_44035 : tensor<4x?x4096xf16>
    %46466 = flow.tensor.transfer %46465 : tensor<4x?x4096xf16>{%dim_44036} to #hal.device.promise<@__device_5>
    %46467 = torch_c.from_builtin_tensor %46466 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46468 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44037 = arith.constant 1 : index
    %dim_44038 = tensor.dim %46468, %c1_44037 : tensor<4x?x4096xf16>
    %46469 = flow.tensor.transfer %46468 : tensor<4x?x4096xf16>{%dim_44038} to #hal.device.promise<@__device_5>
    %46470 = torch_c.from_builtin_tensor %46469 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46471 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44039 = arith.constant 1 : index
    %dim_44040 = tensor.dim %46471, %c1_44039 : tensor<4x?x4096xf16>
    %46472 = flow.tensor.transfer %46471 : tensor<4x?x4096xf16>{%dim_44040} to #hal.device.promise<@__device_5>
    %46473 = torch_c.from_builtin_tensor %46472 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46474 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44041 = arith.constant 1 : index
    %dim_44042 = tensor.dim %46474, %c1_44041 : tensor<4x?x4096xf16>
    %46475 = flow.tensor.transfer %46474 : tensor<4x?x4096xf16>{%dim_44042} to #hal.device.promise<@__device_5>
    %46476 = torch_c.from_builtin_tensor %46475 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44043 = torch.constant.int 1
    %46477 = torch.aten.add.Tensor %46458, %46461, %int1_44043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44044 = torch.constant.int 1
    %46478 = torch.aten.add.Tensor %46477, %46464, %int1_44044 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44045 = torch.constant.int 1
    %46479 = torch.aten.add.Tensor %46478, %46467, %int1_44045 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44046 = torch.constant.int 1
    %46480 = torch.aten.add.Tensor %46479, %46470, %int1_44046 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44047 = torch.constant.int 1
    %46481 = torch.aten.add.Tensor %46480, %46303, %int1_44047 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44048 = torch.constant.int 1
    %46482 = torch.aten.add.Tensor %46481, %46473, %int1_44048 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44049 = torch.constant.int 1
    %46483 = torch.aten.add.Tensor %46482, %46476, %int1_44049 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46484 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44050 = arith.constant 1 : index
    %dim_44051 = tensor.dim %46484, %c1_44050 : tensor<4x?x4096xf16>
    %46485 = flow.tensor.transfer %46484 : tensor<4x?x4096xf16>{%dim_44051} to #hal.device.promise<@__device_6>
    %46486 = torch_c.from_builtin_tensor %46485 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46487 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44052 = arith.constant 1 : index
    %dim_44053 = tensor.dim %46487, %c1_44052 : tensor<4x?x4096xf16>
    %46488 = flow.tensor.transfer %46487 : tensor<4x?x4096xf16>{%dim_44053} to #hal.device.promise<@__device_6>
    %46489 = torch_c.from_builtin_tensor %46488 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46490 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44054 = arith.constant 1 : index
    %dim_44055 = tensor.dim %46490, %c1_44054 : tensor<4x?x4096xf16>
    %46491 = flow.tensor.transfer %46490 : tensor<4x?x4096xf16>{%dim_44055} to #hal.device.promise<@__device_6>
    %46492 = torch_c.from_builtin_tensor %46491 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46493 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44056 = arith.constant 1 : index
    %dim_44057 = tensor.dim %46493, %c1_44056 : tensor<4x?x4096xf16>
    %46494 = flow.tensor.transfer %46493 : tensor<4x?x4096xf16>{%dim_44057} to #hal.device.promise<@__device_6>
    %46495 = torch_c.from_builtin_tensor %46494 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46496 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44058 = arith.constant 1 : index
    %dim_44059 = tensor.dim %46496, %c1_44058 : tensor<4x?x4096xf16>
    %46497 = flow.tensor.transfer %46496 : tensor<4x?x4096xf16>{%dim_44059} to #hal.device.promise<@__device_6>
    %46498 = torch_c.from_builtin_tensor %46497 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46499 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44060 = arith.constant 1 : index
    %dim_44061 = tensor.dim %46499, %c1_44060 : tensor<4x?x4096xf16>
    %46500 = flow.tensor.transfer %46499 : tensor<4x?x4096xf16>{%dim_44061} to #hal.device.promise<@__device_6>
    %46501 = torch_c.from_builtin_tensor %46500 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46502 = torch_c.to_builtin_tensor %46315 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44062 = arith.constant 1 : index
    %dim_44063 = tensor.dim %46502, %c1_44062 : tensor<4x?x4096xf16>
    %46503 = flow.tensor.transfer %46502 : tensor<4x?x4096xf16>{%dim_44063} to #hal.device.promise<@__device_6>
    %46504 = torch_c.from_builtin_tensor %46503 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44064 = torch.constant.int 1
    %46505 = torch.aten.add.Tensor %46486, %46489, %int1_44064 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44065 = torch.constant.int 1
    %46506 = torch.aten.add.Tensor %46505, %46492, %int1_44065 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44066 = torch.constant.int 1
    %46507 = torch.aten.add.Tensor %46506, %46495, %int1_44066 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44067 = torch.constant.int 1
    %46508 = torch.aten.add.Tensor %46507, %46498, %int1_44067 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44068 = torch.constant.int 1
    %46509 = torch.aten.add.Tensor %46508, %46501, %int1_44068 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44069 = torch.constant.int 1
    %46510 = torch.aten.add.Tensor %46509, %46309, %int1_44069 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44070 = torch.constant.int 1
    %46511 = torch.aten.add.Tensor %46510, %46504, %int1_44070 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46512 = torch_c.to_builtin_tensor %46273 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44071 = arith.constant 1 : index
    %dim_44072 = tensor.dim %46512, %c1_44071 : tensor<4x?x4096xf16>
    %46513 = flow.tensor.transfer %46512 : tensor<4x?x4096xf16>{%dim_44072} to #hal.device.promise<@__device_7>
    %46514 = torch_c.from_builtin_tensor %46513 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46515 = torch_c.to_builtin_tensor %46279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44073 = arith.constant 1 : index
    %dim_44074 = tensor.dim %46515, %c1_44073 : tensor<4x?x4096xf16>
    %46516 = flow.tensor.transfer %46515 : tensor<4x?x4096xf16>{%dim_44074} to #hal.device.promise<@__device_7>
    %46517 = torch_c.from_builtin_tensor %46516 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46518 = torch_c.to_builtin_tensor %46285 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44075 = arith.constant 1 : index
    %dim_44076 = tensor.dim %46518, %c1_44075 : tensor<4x?x4096xf16>
    %46519 = flow.tensor.transfer %46518 : tensor<4x?x4096xf16>{%dim_44076} to #hal.device.promise<@__device_7>
    %46520 = torch_c.from_builtin_tensor %46519 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46521 = torch_c.to_builtin_tensor %46291 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44077 = arith.constant 1 : index
    %dim_44078 = tensor.dim %46521, %c1_44077 : tensor<4x?x4096xf16>
    %46522 = flow.tensor.transfer %46521 : tensor<4x?x4096xf16>{%dim_44078} to #hal.device.promise<@__device_7>
    %46523 = torch_c.from_builtin_tensor %46522 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46524 = torch_c.to_builtin_tensor %46297 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44079 = arith.constant 1 : index
    %dim_44080 = tensor.dim %46524, %c1_44079 : tensor<4x?x4096xf16>
    %46525 = flow.tensor.transfer %46524 : tensor<4x?x4096xf16>{%dim_44080} to #hal.device.promise<@__device_7>
    %46526 = torch_c.from_builtin_tensor %46525 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46527 = torch_c.to_builtin_tensor %46303 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44081 = arith.constant 1 : index
    %dim_44082 = tensor.dim %46527, %c1_44081 : tensor<4x?x4096xf16>
    %46528 = flow.tensor.transfer %46527 : tensor<4x?x4096xf16>{%dim_44082} to #hal.device.promise<@__device_7>
    %46529 = torch_c.from_builtin_tensor %46528 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46530 = torch_c.to_builtin_tensor %46309 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44083 = arith.constant 1 : index
    %dim_44084 = tensor.dim %46530, %c1_44083 : tensor<4x?x4096xf16>
    %46531 = flow.tensor.transfer %46530 : tensor<4x?x4096xf16>{%dim_44084} to #hal.device.promise<@__device_7>
    %46532 = torch_c.from_builtin_tensor %46531 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44085 = torch.constant.int 1
    %46533 = torch.aten.add.Tensor %46514, %46517, %int1_44085 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44086 = torch.constant.int 1
    %46534 = torch.aten.add.Tensor %46533, %46520, %int1_44086 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44087 = torch.constant.int 1
    %46535 = torch.aten.add.Tensor %46534, %46523, %int1_44087 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44088 = torch.constant.int 1
    %46536 = torch.aten.add.Tensor %46535, %46526, %int1_44088 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44089 = torch.constant.int 1
    %46537 = torch.aten.add.Tensor %46536, %46529, %int1_44089 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44090 = torch.constant.int 1
    %46538 = torch.aten.add.Tensor %46537, %46532, %int1_44090 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44091 = torch.constant.int 1
    %46539 = torch.aten.add.Tensor %46538, %46315, %int1_44091 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44092 = torch.constant.int 1
    %46540 = torch.aten.add.Tensor %45199, %46343, %int1_44092 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44093 = torch.constant.int 1
    %46541 = torch.aten.add.Tensor %45200, %46371, %int1_44093 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44094 = torch.constant.int 1
    %46542 = torch.aten.add.Tensor %45201, %46399, %int1_44094 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44095 = torch.constant.int 1
    %46543 = torch.aten.add.Tensor %45202, %46427, %int1_44095 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44096 = torch.constant.int 1
    %46544 = torch.aten.add.Tensor %45203, %46455, %int1_44096 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44097 = torch.constant.int 1
    %46545 = torch.aten.add.Tensor %45204, %46483, %int1_44097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44098 = torch.constant.int 1
    %46546 = torch.aten.add.Tensor %45205, %46511, %int1_44098 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44099 = torch.constant.int 1
    %46547 = torch.aten.add.Tensor %45206, %46539, %int1_44099 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_44100 = torch.constant.int 6
    %46548 = torch.prims.convert_element_type %46540, %int6_44100 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44101 = torch.constant.int 6
    %46549 = torch.prims.convert_element_type %46541, %int6_44101 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44102 = torch.constant.int 6
    %46550 = torch.prims.convert_element_type %46542, %int6_44102 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44103 = torch.constant.int 6
    %46551 = torch.prims.convert_element_type %46543, %int6_44103 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44104 = torch.constant.int 6
    %46552 = torch.prims.convert_element_type %46544, %int6_44104 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44105 = torch.constant.int 6
    %46553 = torch.prims.convert_element_type %46545, %int6_44105 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44106 = torch.constant.int 6
    %46554 = torch.prims.convert_element_type %46546, %int6_44106 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44107 = torch.constant.int 6
    %46555 = torch.prims.convert_element_type %46547, %int6_44107 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44108 = torch.constant.int 2
    %46556 = torch.aten.pow.Tensor_Scalar %46548, %int2_44108 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44109 = torch.constant.int 2
    %46557 = torch.aten.pow.Tensor_Scalar %46549, %int2_44109 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44110 = torch.constant.int 2
    %46558 = torch.aten.pow.Tensor_Scalar %46550, %int2_44110 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44111 = torch.constant.int 2
    %46559 = torch.aten.pow.Tensor_Scalar %46551, %int2_44111 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44112 = torch.constant.int 2
    %46560 = torch.aten.pow.Tensor_Scalar %46552, %int2_44112 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44113 = torch.constant.int 2
    %46561 = torch.aten.pow.Tensor_Scalar %46553, %int2_44113 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44114 = torch.constant.int 2
    %46562 = torch.aten.pow.Tensor_Scalar %46554, %int2_44114 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44115 = torch.constant.int 2
    %46563 = torch.aten.pow.Tensor_Scalar %46555, %int2_44115 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_44116 = torch.constant.int -1
    %46564 = torch.prim.ListConstruct %int-1_44116 : (!torch.int) -> !torch.list<int>
    %true_44117 = torch.constant.bool true
    %none_44118 = torch.constant.none
    %46565 = torch.aten.mean.dim %46556, %46564, %true_44117, %none_44118 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44119 = torch.constant.int -1
    %46566 = torch.prim.ListConstruct %int-1_44119 : (!torch.int) -> !torch.list<int>
    %true_44120 = torch.constant.bool true
    %none_44121 = torch.constant.none
    %46567 = torch.aten.mean.dim %46557, %46566, %true_44120, %none_44121 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44122 = torch.constant.int -1
    %46568 = torch.prim.ListConstruct %int-1_44122 : (!torch.int) -> !torch.list<int>
    %true_44123 = torch.constant.bool true
    %none_44124 = torch.constant.none
    %46569 = torch.aten.mean.dim %46558, %46568, %true_44123, %none_44124 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44125 = torch.constant.int -1
    %46570 = torch.prim.ListConstruct %int-1_44125 : (!torch.int) -> !torch.list<int>
    %true_44126 = torch.constant.bool true
    %none_44127 = torch.constant.none
    %46571 = torch.aten.mean.dim %46559, %46570, %true_44126, %none_44127 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44128 = torch.constant.int -1
    %46572 = torch.prim.ListConstruct %int-1_44128 : (!torch.int) -> !torch.list<int>
    %true_44129 = torch.constant.bool true
    %none_44130 = torch.constant.none
    %46573 = torch.aten.mean.dim %46560, %46572, %true_44129, %none_44130 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44131 = torch.constant.int -1
    %46574 = torch.prim.ListConstruct %int-1_44131 : (!torch.int) -> !torch.list<int>
    %true_44132 = torch.constant.bool true
    %none_44133 = torch.constant.none
    %46575 = torch.aten.mean.dim %46561, %46574, %true_44132, %none_44133 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44134 = torch.constant.int -1
    %46576 = torch.prim.ListConstruct %int-1_44134 : (!torch.int) -> !torch.list<int>
    %true_44135 = torch.constant.bool true
    %none_44136 = torch.constant.none
    %46577 = torch.aten.mean.dim %46562, %46576, %true_44135, %none_44136 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44137 = torch.constant.int -1
    %46578 = torch.prim.ListConstruct %int-1_44137 : (!torch.int) -> !torch.list<int>
    %true_44138 = torch.constant.bool true
    %none_44139 = torch.constant.none
    %46579 = torch.aten.mean.dim %46563, %46578, %true_44138, %none_44139 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44140 = torch.constant.float 9.9999997473787516E-6
    %int1_44141 = torch.constant.int 1
    %46580 = torch.aten.add.Scalar %46565, %float9.999990e-06_44140, %int1_44141 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44142 = torch.constant.float 9.9999997473787516E-6
    %int1_44143 = torch.constant.int 1
    %46581 = torch.aten.add.Scalar %46567, %float9.999990e-06_44142, %int1_44143 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44144 = torch.constant.float 9.9999997473787516E-6
    %int1_44145 = torch.constant.int 1
    %46582 = torch.aten.add.Scalar %46569, %float9.999990e-06_44144, %int1_44145 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44146 = torch.constant.float 9.9999997473787516E-6
    %int1_44147 = torch.constant.int 1
    %46583 = torch.aten.add.Scalar %46571, %float9.999990e-06_44146, %int1_44147 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44148 = torch.constant.float 9.9999997473787516E-6
    %int1_44149 = torch.constant.int 1
    %46584 = torch.aten.add.Scalar %46573, %float9.999990e-06_44148, %int1_44149 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44150 = torch.constant.float 9.9999997473787516E-6
    %int1_44151 = torch.constant.int 1
    %46585 = torch.aten.add.Scalar %46575, %float9.999990e-06_44150, %int1_44151 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44152 = torch.constant.float 9.9999997473787516E-6
    %int1_44153 = torch.constant.int 1
    %46586 = torch.aten.add.Scalar %46577, %float9.999990e-06_44152, %int1_44153 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44154 = torch.constant.float 9.9999997473787516E-6
    %int1_44155 = torch.constant.int 1
    %46587 = torch.aten.add.Scalar %46579, %float9.999990e-06_44154, %int1_44155 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46588 = torch.aten.rsqrt %46580 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46589 = torch.aten.rsqrt %46581 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46590 = torch.aten.rsqrt %46582 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46591 = torch.aten.rsqrt %46583 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46592 = torch.aten.rsqrt %46584 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46593 = torch.aten.rsqrt %46585 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46594 = torch.aten.rsqrt %46586 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46595 = torch.aten.rsqrt %46587 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %46595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %46596 = torch.aten.mul.Tensor %46548, %46588 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46597 = torch.aten.mul.Tensor %46549, %46589 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46598 = torch.aten.mul.Tensor %46550, %46590 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46599 = torch.aten.mul.Tensor %46551, %46591 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46600 = torch.aten.mul.Tensor %46552, %46592 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46601 = torch.aten.mul.Tensor %46553, %46593 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46602 = torch.aten.mul.Tensor %46554, %46594 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46603 = torch.aten.mul.Tensor %46555, %46595 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46604 = torch.aten.mul.Tensor %1704, %46596 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46605 = torch.aten.mul.Tensor %1705, %46597 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46606 = torch.aten.mul.Tensor %1706, %46598 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46607 = torch.aten.mul.Tensor %1707, %46599 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46608 = torch.aten.mul.Tensor %1708, %46600 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46609 = torch.aten.mul.Tensor %1709, %46601 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46610 = torch.aten.mul.Tensor %1710, %46602 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %46611 = torch.aten.mul.Tensor %1711, %46603 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %46611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_44156 = torch.constant.int 5
    %46612 = torch.prims.convert_element_type %46604, %int5_44156 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44157 = torch.constant.int 5
    %46613 = torch.prims.convert_element_type %46605, %int5_44157 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44158 = torch.constant.int 5
    %46614 = torch.prims.convert_element_type %46606, %int5_44158 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44159 = torch.constant.int 5
    %46615 = torch.prims.convert_element_type %46607, %int5_44159 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44160 = torch.constant.int 5
    %46616 = torch.prims.convert_element_type %46608, %int5_44160 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44161 = torch.constant.int 5
    %46617 = torch.prims.convert_element_type %46609, %int5_44161 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44162 = torch.constant.int 5
    %46618 = torch.prims.convert_element_type %46610, %int5_44162 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44163 = torch.constant.int 5
    %46619 = torch.prims.convert_element_type %46611, %int5_44163 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44164 = torch.constant.int 1
    %int0_44165 = torch.constant.int 0
    %46620 = torch.prim.ListConstruct %int1_44164, %int0_44165 : (!torch.int, !torch.int) -> !torch.list<int>
    %46621 = torch.aten.permute %1712, %46620 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44166 = torch.constant.int 1
    %int0_44167 = torch.constant.int 0
    %46622 = torch.prim.ListConstruct %int1_44166, %int0_44167 : (!torch.int, !torch.int) -> !torch.list<int>
    %46623 = torch.aten.permute %1713, %46622 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44168 = torch.constant.int 1
    %int0_44169 = torch.constant.int 0
    %46624 = torch.prim.ListConstruct %int1_44168, %int0_44169 : (!torch.int, !torch.int) -> !torch.list<int>
    %46625 = torch.aten.permute %1714, %46624 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44170 = torch.constant.int 1
    %int0_44171 = torch.constant.int 0
    %46626 = torch.prim.ListConstruct %int1_44170, %int0_44171 : (!torch.int, !torch.int) -> !torch.list<int>
    %46627 = torch.aten.permute %1715, %46626 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44172 = torch.constant.int 1
    %int0_44173 = torch.constant.int 0
    %46628 = torch.prim.ListConstruct %int1_44172, %int0_44173 : (!torch.int, !torch.int) -> !torch.list<int>
    %46629 = torch.aten.permute %1716, %46628 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44174 = torch.constant.int 1
    %int0_44175 = torch.constant.int 0
    %46630 = torch.prim.ListConstruct %int1_44174, %int0_44175 : (!torch.int, !torch.int) -> !torch.list<int>
    %46631 = torch.aten.permute %1717, %46630 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44176 = torch.constant.int 1
    %int0_44177 = torch.constant.int 0
    %46632 = torch.prim.ListConstruct %int1_44176, %int0_44177 : (!torch.int, !torch.int) -> !torch.list<int>
    %46633 = torch.aten.permute %1718, %46632 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44178 = torch.constant.int 1
    %int0_44179 = torch.constant.int 0
    %46634 = torch.prim.ListConstruct %int1_44178, %int0_44179 : (!torch.int, !torch.int) -> !torch.list<int>
    %46635 = torch.aten.permute %1719, %46634 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_44180 = torch.constant.int 4
    %46636 = torch.aten.mul.int %int4_44180, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44181 = torch.constant.int 4096
    %46637 = torch.prim.ListConstruct %46636, %int4096_44181 : (!torch.int, !torch.int) -> !torch.list<int>
    %46638 = torch.aten.view %46612, %46637 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46638, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46639 = torch.aten.mm %46638, %46621 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46639, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44182 = torch.constant.int 4
    %int1792_44183 = torch.constant.int 1792
    %46640 = torch.prim.ListConstruct %int4_44182, %2482, %int1792_44183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46641 = torch.aten.view %46639, %46640 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44184 = torch.constant.int 4
    %46642 = torch.aten.mul.int %int4_44184, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44185 = torch.constant.int 4096
    %46643 = torch.prim.ListConstruct %46642, %int4096_44185 : (!torch.int, !torch.int) -> !torch.list<int>
    %46644 = torch.aten.view %46613, %46643 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46644, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46645 = torch.aten.mm %46644, %46623 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46645, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44186 = torch.constant.int 4
    %int1792_44187 = torch.constant.int 1792
    %46646 = torch.prim.ListConstruct %int4_44186, %2482, %int1792_44187 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46647 = torch.aten.view %46645, %46646 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44188 = torch.constant.int 4
    %46648 = torch.aten.mul.int %int4_44188, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44189 = torch.constant.int 4096
    %46649 = torch.prim.ListConstruct %46648, %int4096_44189 : (!torch.int, !torch.int) -> !torch.list<int>
    %46650 = torch.aten.view %46614, %46649 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46650, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46651 = torch.aten.mm %46650, %46625 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46651, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44190 = torch.constant.int 4
    %int1792_44191 = torch.constant.int 1792
    %46652 = torch.prim.ListConstruct %int4_44190, %2482, %int1792_44191 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46653 = torch.aten.view %46651, %46652 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44192 = torch.constant.int 4
    %46654 = torch.aten.mul.int %int4_44192, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44193 = torch.constant.int 4096
    %46655 = torch.prim.ListConstruct %46654, %int4096_44193 : (!torch.int, !torch.int) -> !torch.list<int>
    %46656 = torch.aten.view %46615, %46655 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46656, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46657 = torch.aten.mm %46656, %46627 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46657, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44194 = torch.constant.int 4
    %int1792_44195 = torch.constant.int 1792
    %46658 = torch.prim.ListConstruct %int4_44194, %2482, %int1792_44195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46659 = torch.aten.view %46657, %46658 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44196 = torch.constant.int 4
    %46660 = torch.aten.mul.int %int4_44196, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44197 = torch.constant.int 4096
    %46661 = torch.prim.ListConstruct %46660, %int4096_44197 : (!torch.int, !torch.int) -> !torch.list<int>
    %46662 = torch.aten.view %46616, %46661 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46662, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46663 = torch.aten.mm %46662, %46629 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46663, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44198 = torch.constant.int 4
    %int1792_44199 = torch.constant.int 1792
    %46664 = torch.prim.ListConstruct %int4_44198, %2482, %int1792_44199 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46665 = torch.aten.view %46663, %46664 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44200 = torch.constant.int 4
    %46666 = torch.aten.mul.int %int4_44200, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44201 = torch.constant.int 4096
    %46667 = torch.prim.ListConstruct %46666, %int4096_44201 : (!torch.int, !torch.int) -> !torch.list<int>
    %46668 = torch.aten.view %46617, %46667 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46668, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46669 = torch.aten.mm %46668, %46631 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46669, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44202 = torch.constant.int 4
    %int1792_44203 = torch.constant.int 1792
    %46670 = torch.prim.ListConstruct %int4_44202, %2482, %int1792_44203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46671 = torch.aten.view %46669, %46670 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44204 = torch.constant.int 4
    %46672 = torch.aten.mul.int %int4_44204, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44205 = torch.constant.int 4096
    %46673 = torch.prim.ListConstruct %46672, %int4096_44205 : (!torch.int, !torch.int) -> !torch.list<int>
    %46674 = torch.aten.view %46618, %46673 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46674, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46675 = torch.aten.mm %46674, %46633 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46675, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44206 = torch.constant.int 4
    %int1792_44207 = torch.constant.int 1792
    %46676 = torch.prim.ListConstruct %int4_44206, %2482, %int1792_44207 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46677 = torch.aten.view %46675, %46676 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44208 = torch.constant.int 4
    %46678 = torch.aten.mul.int %int4_44208, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44209 = torch.constant.int 4096
    %46679 = torch.prim.ListConstruct %46678, %int4096_44209 : (!torch.int, !torch.int) -> !torch.list<int>
    %46680 = torch.aten.view %46619, %46679 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46680, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46681 = torch.aten.mm %46680, %46635 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46681, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44210 = torch.constant.int 4
    %int1792_44211 = torch.constant.int 1792
    %46682 = torch.prim.ListConstruct %int4_44210, %2482, %int1792_44211 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46683 = torch.aten.view %46681, %46682 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46684 = torch.aten.silu %46641 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46685 = torch.aten.silu %46647 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46686 = torch.aten.silu %46653 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46687 = torch.aten.silu %46659 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46688 = torch.aten.silu %46665 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46689 = torch.aten.silu %46671 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46690 = torch.aten.silu %46677 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46691 = torch.aten.silu %46683 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_44212 = torch.constant.int 1
    %int0_44213 = torch.constant.int 0
    %46692 = torch.prim.ListConstruct %int1_44212, %int0_44213 : (!torch.int, !torch.int) -> !torch.list<int>
    %46693 = torch.aten.permute %1720, %46692 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44214 = torch.constant.int 1
    %int0_44215 = torch.constant.int 0
    %46694 = torch.prim.ListConstruct %int1_44214, %int0_44215 : (!torch.int, !torch.int) -> !torch.list<int>
    %46695 = torch.aten.permute %1721, %46694 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44216 = torch.constant.int 1
    %int0_44217 = torch.constant.int 0
    %46696 = torch.prim.ListConstruct %int1_44216, %int0_44217 : (!torch.int, !torch.int) -> !torch.list<int>
    %46697 = torch.aten.permute %1722, %46696 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44218 = torch.constant.int 1
    %int0_44219 = torch.constant.int 0
    %46698 = torch.prim.ListConstruct %int1_44218, %int0_44219 : (!torch.int, !torch.int) -> !torch.list<int>
    %46699 = torch.aten.permute %1723, %46698 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44220 = torch.constant.int 1
    %int0_44221 = torch.constant.int 0
    %46700 = torch.prim.ListConstruct %int1_44220, %int0_44221 : (!torch.int, !torch.int) -> !torch.list<int>
    %46701 = torch.aten.permute %1724, %46700 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44222 = torch.constant.int 1
    %int0_44223 = torch.constant.int 0
    %46702 = torch.prim.ListConstruct %int1_44222, %int0_44223 : (!torch.int, !torch.int) -> !torch.list<int>
    %46703 = torch.aten.permute %1725, %46702 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44224 = torch.constant.int 1
    %int0_44225 = torch.constant.int 0
    %46704 = torch.prim.ListConstruct %int1_44224, %int0_44225 : (!torch.int, !torch.int) -> !torch.list<int>
    %46705 = torch.aten.permute %1726, %46704 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_44226 = torch.constant.int 1
    %int0_44227 = torch.constant.int 0
    %46706 = torch.prim.ListConstruct %int1_44226, %int0_44227 : (!torch.int, !torch.int) -> !torch.list<int>
    %46707 = torch.aten.permute %1727, %46706 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_44228 = torch.constant.int 4
    %46708 = torch.aten.mul.int %int4_44228, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44229 = torch.constant.int 4096
    %46709 = torch.prim.ListConstruct %46708, %int4096_44229 : (!torch.int, !torch.int) -> !torch.list<int>
    %46710 = torch.aten.view %46612, %46709 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46710, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46711 = torch.aten.mm %46710, %46693 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46711, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44230 = torch.constant.int 4
    %int1792_44231 = torch.constant.int 1792
    %46712 = torch.prim.ListConstruct %int4_44230, %2482, %int1792_44231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46713 = torch.aten.view %46711, %46712 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44232 = torch.constant.int 4
    %46714 = torch.aten.mul.int %int4_44232, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44233 = torch.constant.int 4096
    %46715 = torch.prim.ListConstruct %46714, %int4096_44233 : (!torch.int, !torch.int) -> !torch.list<int>
    %46716 = torch.aten.view %46613, %46715 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46716, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46717 = torch.aten.mm %46716, %46695 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46717, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44234 = torch.constant.int 4
    %int1792_44235 = torch.constant.int 1792
    %46718 = torch.prim.ListConstruct %int4_44234, %2482, %int1792_44235 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46719 = torch.aten.view %46717, %46718 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44236 = torch.constant.int 4
    %46720 = torch.aten.mul.int %int4_44236, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44237 = torch.constant.int 4096
    %46721 = torch.prim.ListConstruct %46720, %int4096_44237 : (!torch.int, !torch.int) -> !torch.list<int>
    %46722 = torch.aten.view %46614, %46721 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46722, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46723 = torch.aten.mm %46722, %46697 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46723, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44238 = torch.constant.int 4
    %int1792_44239 = torch.constant.int 1792
    %46724 = torch.prim.ListConstruct %int4_44238, %2482, %int1792_44239 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46725 = torch.aten.view %46723, %46724 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44240 = torch.constant.int 4
    %46726 = torch.aten.mul.int %int4_44240, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44241 = torch.constant.int 4096
    %46727 = torch.prim.ListConstruct %46726, %int4096_44241 : (!torch.int, !torch.int) -> !torch.list<int>
    %46728 = torch.aten.view %46615, %46727 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46728, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46729 = torch.aten.mm %46728, %46699 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46729, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44242 = torch.constant.int 4
    %int1792_44243 = torch.constant.int 1792
    %46730 = torch.prim.ListConstruct %int4_44242, %2482, %int1792_44243 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46731 = torch.aten.view %46729, %46730 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44244 = torch.constant.int 4
    %46732 = torch.aten.mul.int %int4_44244, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44245 = torch.constant.int 4096
    %46733 = torch.prim.ListConstruct %46732, %int4096_44245 : (!torch.int, !torch.int) -> !torch.list<int>
    %46734 = torch.aten.view %46616, %46733 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46734, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46735 = torch.aten.mm %46734, %46701 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46735, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44246 = torch.constant.int 4
    %int1792_44247 = torch.constant.int 1792
    %46736 = torch.prim.ListConstruct %int4_44246, %2482, %int1792_44247 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46737 = torch.aten.view %46735, %46736 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44248 = torch.constant.int 4
    %46738 = torch.aten.mul.int %int4_44248, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44249 = torch.constant.int 4096
    %46739 = torch.prim.ListConstruct %46738, %int4096_44249 : (!torch.int, !torch.int) -> !torch.list<int>
    %46740 = torch.aten.view %46617, %46739 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46740, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46741 = torch.aten.mm %46740, %46703 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46741, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44250 = torch.constant.int 4
    %int1792_44251 = torch.constant.int 1792
    %46742 = torch.prim.ListConstruct %int4_44250, %2482, %int1792_44251 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46743 = torch.aten.view %46741, %46742 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44252 = torch.constant.int 4
    %46744 = torch.aten.mul.int %int4_44252, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44253 = torch.constant.int 4096
    %46745 = torch.prim.ListConstruct %46744, %int4096_44253 : (!torch.int, !torch.int) -> !torch.list<int>
    %46746 = torch.aten.view %46618, %46745 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46746, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46747 = torch.aten.mm %46746, %46705 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46747, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44254 = torch.constant.int 4
    %int1792_44255 = torch.constant.int 1792
    %46748 = torch.prim.ListConstruct %int4_44254, %2482, %int1792_44255 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46749 = torch.aten.view %46747, %46748 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_44256 = torch.constant.int 4
    %46750 = torch.aten.mul.int %int4_44256, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44257 = torch.constant.int 4096
    %46751 = torch.prim.ListConstruct %46750, %int4096_44257 : (!torch.int, !torch.int) -> !torch.list<int>
    %46752 = torch.aten.view %46619, %46751 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46752, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %46753 = torch.aten.mm %46752, %46707 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46753, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_44258 = torch.constant.int 4
    %int1792_44259 = torch.constant.int 1792
    %46754 = torch.prim.ListConstruct %int4_44258, %2482, %int1792_44259 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46755 = torch.aten.view %46753, %46754 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46756 = torch.aten.mul.Tensor %46684, %46713 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46757 = torch.aten.mul.Tensor %46685, %46719 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46758 = torch.aten.mul.Tensor %46686, %46725 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46759 = torch.aten.mul.Tensor %46687, %46731 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46760 = torch.aten.mul.Tensor %46688, %46737 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46761 = torch.aten.mul.Tensor %46689, %46743 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46762 = torch.aten.mul.Tensor %46690, %46749 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %46763 = torch.aten.mul.Tensor %46691, %46755 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %46763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_44260 = torch.constant.int 1
    %int0_44261 = torch.constant.int 0
    %46764 = torch.prim.ListConstruct %int1_44260, %int0_44261 : (!torch.int, !torch.int) -> !torch.list<int>
    %46765 = torch.aten.permute %1728, %46764 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44262 = torch.constant.int 1
    %int0_44263 = torch.constant.int 0
    %46766 = torch.prim.ListConstruct %int1_44262, %int0_44263 : (!torch.int, !torch.int) -> !torch.list<int>
    %46767 = torch.aten.permute %1729, %46766 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44264 = torch.constant.int 1
    %int0_44265 = torch.constant.int 0
    %46768 = torch.prim.ListConstruct %int1_44264, %int0_44265 : (!torch.int, !torch.int) -> !torch.list<int>
    %46769 = torch.aten.permute %1730, %46768 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44266 = torch.constant.int 1
    %int0_44267 = torch.constant.int 0
    %46770 = torch.prim.ListConstruct %int1_44266, %int0_44267 : (!torch.int, !torch.int) -> !torch.list<int>
    %46771 = torch.aten.permute %1731, %46770 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44268 = torch.constant.int 1
    %int0_44269 = torch.constant.int 0
    %46772 = torch.prim.ListConstruct %int1_44268, %int0_44269 : (!torch.int, !torch.int) -> !torch.list<int>
    %46773 = torch.aten.permute %1732, %46772 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44270 = torch.constant.int 1
    %int0_44271 = torch.constant.int 0
    %46774 = torch.prim.ListConstruct %int1_44270, %int0_44271 : (!torch.int, !torch.int) -> !torch.list<int>
    %46775 = torch.aten.permute %1733, %46774 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44272 = torch.constant.int 1
    %int0_44273 = torch.constant.int 0
    %46776 = torch.prim.ListConstruct %int1_44272, %int0_44273 : (!torch.int, !torch.int) -> !torch.list<int>
    %46777 = torch.aten.permute %1734, %46776 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44274 = torch.constant.int 1
    %int0_44275 = torch.constant.int 0
    %46778 = torch.prim.ListConstruct %int1_44274, %int0_44275 : (!torch.int, !torch.int) -> !torch.list<int>
    %46779 = torch.aten.permute %1735, %46778 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_44276 = torch.constant.int 1
    %46780 = torch.aten.size.int %46641, %int1_44276 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44277 = torch.constant.int 4
    %46781 = torch.aten.mul.int %int4_44277, %46780 : !torch.int, !torch.int -> !torch.int
    %int1792_44278 = torch.constant.int 1792
    %46782 = torch.prim.ListConstruct %46781, %int1792_44278 : (!torch.int, !torch.int) -> !torch.list<int>
    %46783 = torch.aten.view %46756, %46782 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46783, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46784 = torch.aten.mm %46783, %46765 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46784, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44279 = torch.constant.int 4
    %int4096_44280 = torch.constant.int 4096
    %46785 = torch.prim.ListConstruct %int4_44279, %46780, %int4096_44280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46786 = torch.aten.view %46784, %46785 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44281 = torch.constant.int 1
    %46787 = torch.aten.size.int %46647, %int1_44281 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44282 = torch.constant.int 4
    %46788 = torch.aten.mul.int %int4_44282, %46787 : !torch.int, !torch.int -> !torch.int
    %int1792_44283 = torch.constant.int 1792
    %46789 = torch.prim.ListConstruct %46788, %int1792_44283 : (!torch.int, !torch.int) -> !torch.list<int>
    %46790 = torch.aten.view %46757, %46789 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46790, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46791 = torch.aten.mm %46790, %46767 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46791, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44284 = torch.constant.int 4
    %int4096_44285 = torch.constant.int 4096
    %46792 = torch.prim.ListConstruct %int4_44284, %46787, %int4096_44285 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46793 = torch.aten.view %46791, %46792 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44286 = torch.constant.int 1
    %46794 = torch.aten.size.int %46653, %int1_44286 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44287 = torch.constant.int 4
    %46795 = torch.aten.mul.int %int4_44287, %46794 : !torch.int, !torch.int -> !torch.int
    %int1792_44288 = torch.constant.int 1792
    %46796 = torch.prim.ListConstruct %46795, %int1792_44288 : (!torch.int, !torch.int) -> !torch.list<int>
    %46797 = torch.aten.view %46758, %46796 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46797, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46798 = torch.aten.mm %46797, %46769 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46798, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44289 = torch.constant.int 4
    %int4096_44290 = torch.constant.int 4096
    %46799 = torch.prim.ListConstruct %int4_44289, %46794, %int4096_44290 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46800 = torch.aten.view %46798, %46799 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44291 = torch.constant.int 1
    %46801 = torch.aten.size.int %46659, %int1_44291 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44292 = torch.constant.int 4
    %46802 = torch.aten.mul.int %int4_44292, %46801 : !torch.int, !torch.int -> !torch.int
    %int1792_44293 = torch.constant.int 1792
    %46803 = torch.prim.ListConstruct %46802, %int1792_44293 : (!torch.int, !torch.int) -> !torch.list<int>
    %46804 = torch.aten.view %46759, %46803 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46804, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46805 = torch.aten.mm %46804, %46771 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46805, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44294 = torch.constant.int 4
    %int4096_44295 = torch.constant.int 4096
    %46806 = torch.prim.ListConstruct %int4_44294, %46801, %int4096_44295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46807 = torch.aten.view %46805, %46806 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44296 = torch.constant.int 1
    %46808 = torch.aten.size.int %46665, %int1_44296 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44297 = torch.constant.int 4
    %46809 = torch.aten.mul.int %int4_44297, %46808 : !torch.int, !torch.int -> !torch.int
    %int1792_44298 = torch.constant.int 1792
    %46810 = torch.prim.ListConstruct %46809, %int1792_44298 : (!torch.int, !torch.int) -> !torch.list<int>
    %46811 = torch.aten.view %46760, %46810 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46811, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46812 = torch.aten.mm %46811, %46773 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46812, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44299 = torch.constant.int 4
    %int4096_44300 = torch.constant.int 4096
    %46813 = torch.prim.ListConstruct %int4_44299, %46808, %int4096_44300 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46814 = torch.aten.view %46812, %46813 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44301 = torch.constant.int 1
    %46815 = torch.aten.size.int %46671, %int1_44301 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44302 = torch.constant.int 4
    %46816 = torch.aten.mul.int %int4_44302, %46815 : !torch.int, !torch.int -> !torch.int
    %int1792_44303 = torch.constant.int 1792
    %46817 = torch.prim.ListConstruct %46816, %int1792_44303 : (!torch.int, !torch.int) -> !torch.list<int>
    %46818 = torch.aten.view %46761, %46817 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46818, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46819 = torch.aten.mm %46818, %46775 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46819, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44304 = torch.constant.int 4
    %int4096_44305 = torch.constant.int 4096
    %46820 = torch.prim.ListConstruct %int4_44304, %46815, %int4096_44305 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46821 = torch.aten.view %46819, %46820 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44306 = torch.constant.int 1
    %46822 = torch.aten.size.int %46677, %int1_44306 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44307 = torch.constant.int 4
    %46823 = torch.aten.mul.int %int4_44307, %46822 : !torch.int, !torch.int -> !torch.int
    %int1792_44308 = torch.constant.int 1792
    %46824 = torch.prim.ListConstruct %46823, %int1792_44308 : (!torch.int, !torch.int) -> !torch.list<int>
    %46825 = torch.aten.view %46762, %46824 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46825, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46826 = torch.aten.mm %46825, %46777 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46826, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44309 = torch.constant.int 4
    %int4096_44310 = torch.constant.int 4096
    %46827 = torch.prim.ListConstruct %int4_44309, %46822, %int4096_44310 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46828 = torch.aten.view %46826, %46827 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44311 = torch.constant.int 1
    %46829 = torch.aten.size.int %46683, %int1_44311 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_44312 = torch.constant.int 4
    %46830 = torch.aten.mul.int %int4_44312, %46829 : !torch.int, !torch.int -> !torch.int
    %int1792_44313 = torch.constant.int 1792
    %46831 = torch.prim.ListConstruct %46830, %int1792_44313 : (!torch.int, !torch.int) -> !torch.list<int>
    %46832 = torch.aten.view %46763, %46831 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %46832, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %46833 = torch.aten.mm %46832, %46779 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %46833, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_44314 = torch.constant.int 4
    %int4096_44315 = torch.constant.int 4096
    %46834 = torch.prim.ListConstruct %int4_44314, %46829, %int4096_44315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %46835 = torch.aten.view %46833, %46834 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46836 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44316 = arith.constant 1 : index
    %dim_44317 = tensor.dim %46836, %c1_44316 : tensor<4x?x4096xf16>
    %46837 = flow.tensor.transfer %46836 : tensor<4x?x4096xf16>{%dim_44317} to #hal.device.promise<@__device_0>
    %46838 = torch_c.from_builtin_tensor %46837 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46839 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44318 = arith.constant 1 : index
    %dim_44319 = tensor.dim %46839, %c1_44318 : tensor<4x?x4096xf16>
    %46840 = flow.tensor.transfer %46839 : tensor<4x?x4096xf16>{%dim_44319} to #hal.device.promise<@__device_0>
    %46841 = torch_c.from_builtin_tensor %46840 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46842 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44320 = arith.constant 1 : index
    %dim_44321 = tensor.dim %46842, %c1_44320 : tensor<4x?x4096xf16>
    %46843 = flow.tensor.transfer %46842 : tensor<4x?x4096xf16>{%dim_44321} to #hal.device.promise<@__device_0>
    %46844 = torch_c.from_builtin_tensor %46843 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46845 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44322 = arith.constant 1 : index
    %dim_44323 = tensor.dim %46845, %c1_44322 : tensor<4x?x4096xf16>
    %46846 = flow.tensor.transfer %46845 : tensor<4x?x4096xf16>{%dim_44323} to #hal.device.promise<@__device_0>
    %46847 = torch_c.from_builtin_tensor %46846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46848 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44324 = arith.constant 1 : index
    %dim_44325 = tensor.dim %46848, %c1_44324 : tensor<4x?x4096xf16>
    %46849 = flow.tensor.transfer %46848 : tensor<4x?x4096xf16>{%dim_44325} to #hal.device.promise<@__device_0>
    %46850 = torch_c.from_builtin_tensor %46849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46851 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44326 = arith.constant 1 : index
    %dim_44327 = tensor.dim %46851, %c1_44326 : tensor<4x?x4096xf16>
    %46852 = flow.tensor.transfer %46851 : tensor<4x?x4096xf16>{%dim_44327} to #hal.device.promise<@__device_0>
    %46853 = torch_c.from_builtin_tensor %46852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46854 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44328 = arith.constant 1 : index
    %dim_44329 = tensor.dim %46854, %c1_44328 : tensor<4x?x4096xf16>
    %46855 = flow.tensor.transfer %46854 : tensor<4x?x4096xf16>{%dim_44329} to #hal.device.promise<@__device_0>
    %46856 = torch_c.from_builtin_tensor %46855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44330 = torch.constant.int 1
    %46857 = torch.aten.add.Tensor %46786, %46838, %int1_44330 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44331 = torch.constant.int 1
    %46858 = torch.aten.add.Tensor %46857, %46841, %int1_44331 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44332 = torch.constant.int 1
    %46859 = torch.aten.add.Tensor %46858, %46844, %int1_44332 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44333 = torch.constant.int 1
    %46860 = torch.aten.add.Tensor %46859, %46847, %int1_44333 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44334 = torch.constant.int 1
    %46861 = torch.aten.add.Tensor %46860, %46850, %int1_44334 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44335 = torch.constant.int 1
    %46862 = torch.aten.add.Tensor %46861, %46853, %int1_44335 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44336 = torch.constant.int 1
    %46863 = torch.aten.add.Tensor %46862, %46856, %int1_44336 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46864 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44337 = arith.constant 1 : index
    %dim_44338 = tensor.dim %46864, %c1_44337 : tensor<4x?x4096xf16>
    %46865 = flow.tensor.transfer %46864 : tensor<4x?x4096xf16>{%dim_44338} to #hal.device.promise<@__device_1>
    %46866 = torch_c.from_builtin_tensor %46865 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46867 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44339 = arith.constant 1 : index
    %dim_44340 = tensor.dim %46867, %c1_44339 : tensor<4x?x4096xf16>
    %46868 = flow.tensor.transfer %46867 : tensor<4x?x4096xf16>{%dim_44340} to #hal.device.promise<@__device_1>
    %46869 = torch_c.from_builtin_tensor %46868 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46870 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44341 = arith.constant 1 : index
    %dim_44342 = tensor.dim %46870, %c1_44341 : tensor<4x?x4096xf16>
    %46871 = flow.tensor.transfer %46870 : tensor<4x?x4096xf16>{%dim_44342} to #hal.device.promise<@__device_1>
    %46872 = torch_c.from_builtin_tensor %46871 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46873 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44343 = arith.constant 1 : index
    %dim_44344 = tensor.dim %46873, %c1_44343 : tensor<4x?x4096xf16>
    %46874 = flow.tensor.transfer %46873 : tensor<4x?x4096xf16>{%dim_44344} to #hal.device.promise<@__device_1>
    %46875 = torch_c.from_builtin_tensor %46874 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46876 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44345 = arith.constant 1 : index
    %dim_44346 = tensor.dim %46876, %c1_44345 : tensor<4x?x4096xf16>
    %46877 = flow.tensor.transfer %46876 : tensor<4x?x4096xf16>{%dim_44346} to #hal.device.promise<@__device_1>
    %46878 = torch_c.from_builtin_tensor %46877 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46879 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44347 = arith.constant 1 : index
    %dim_44348 = tensor.dim %46879, %c1_44347 : tensor<4x?x4096xf16>
    %46880 = flow.tensor.transfer %46879 : tensor<4x?x4096xf16>{%dim_44348} to #hal.device.promise<@__device_1>
    %46881 = torch_c.from_builtin_tensor %46880 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46882 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44349 = arith.constant 1 : index
    %dim_44350 = tensor.dim %46882, %c1_44349 : tensor<4x?x4096xf16>
    %46883 = flow.tensor.transfer %46882 : tensor<4x?x4096xf16>{%dim_44350} to #hal.device.promise<@__device_1>
    %46884 = torch_c.from_builtin_tensor %46883 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44351 = torch.constant.int 1
    %46885 = torch.aten.add.Tensor %46866, %46793, %int1_44351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44352 = torch.constant.int 1
    %46886 = torch.aten.add.Tensor %46885, %46869, %int1_44352 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44353 = torch.constant.int 1
    %46887 = torch.aten.add.Tensor %46886, %46872, %int1_44353 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44354 = torch.constant.int 1
    %46888 = torch.aten.add.Tensor %46887, %46875, %int1_44354 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44355 = torch.constant.int 1
    %46889 = torch.aten.add.Tensor %46888, %46878, %int1_44355 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44356 = torch.constant.int 1
    %46890 = torch.aten.add.Tensor %46889, %46881, %int1_44356 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44357 = torch.constant.int 1
    %46891 = torch.aten.add.Tensor %46890, %46884, %int1_44357 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46892 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44358 = arith.constant 1 : index
    %dim_44359 = tensor.dim %46892, %c1_44358 : tensor<4x?x4096xf16>
    %46893 = flow.tensor.transfer %46892 : tensor<4x?x4096xf16>{%dim_44359} to #hal.device.promise<@__device_2>
    %46894 = torch_c.from_builtin_tensor %46893 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46895 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44360 = arith.constant 1 : index
    %dim_44361 = tensor.dim %46895, %c1_44360 : tensor<4x?x4096xf16>
    %46896 = flow.tensor.transfer %46895 : tensor<4x?x4096xf16>{%dim_44361} to #hal.device.promise<@__device_2>
    %46897 = torch_c.from_builtin_tensor %46896 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46898 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44362 = arith.constant 1 : index
    %dim_44363 = tensor.dim %46898, %c1_44362 : tensor<4x?x4096xf16>
    %46899 = flow.tensor.transfer %46898 : tensor<4x?x4096xf16>{%dim_44363} to #hal.device.promise<@__device_2>
    %46900 = torch_c.from_builtin_tensor %46899 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46901 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44364 = arith.constant 1 : index
    %dim_44365 = tensor.dim %46901, %c1_44364 : tensor<4x?x4096xf16>
    %46902 = flow.tensor.transfer %46901 : tensor<4x?x4096xf16>{%dim_44365} to #hal.device.promise<@__device_2>
    %46903 = torch_c.from_builtin_tensor %46902 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46904 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44366 = arith.constant 1 : index
    %dim_44367 = tensor.dim %46904, %c1_44366 : tensor<4x?x4096xf16>
    %46905 = flow.tensor.transfer %46904 : tensor<4x?x4096xf16>{%dim_44367} to #hal.device.promise<@__device_2>
    %46906 = torch_c.from_builtin_tensor %46905 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46907 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44368 = arith.constant 1 : index
    %dim_44369 = tensor.dim %46907, %c1_44368 : tensor<4x?x4096xf16>
    %46908 = flow.tensor.transfer %46907 : tensor<4x?x4096xf16>{%dim_44369} to #hal.device.promise<@__device_2>
    %46909 = torch_c.from_builtin_tensor %46908 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46910 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44370 = arith.constant 1 : index
    %dim_44371 = tensor.dim %46910, %c1_44370 : tensor<4x?x4096xf16>
    %46911 = flow.tensor.transfer %46910 : tensor<4x?x4096xf16>{%dim_44371} to #hal.device.promise<@__device_2>
    %46912 = torch_c.from_builtin_tensor %46911 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44372 = torch.constant.int 1
    %46913 = torch.aten.add.Tensor %46894, %46897, %int1_44372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44373 = torch.constant.int 1
    %46914 = torch.aten.add.Tensor %46913, %46800, %int1_44373 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44374 = torch.constant.int 1
    %46915 = torch.aten.add.Tensor %46914, %46900, %int1_44374 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44375 = torch.constant.int 1
    %46916 = torch.aten.add.Tensor %46915, %46903, %int1_44375 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44376 = torch.constant.int 1
    %46917 = torch.aten.add.Tensor %46916, %46906, %int1_44376 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44377 = torch.constant.int 1
    %46918 = torch.aten.add.Tensor %46917, %46909, %int1_44377 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44378 = torch.constant.int 1
    %46919 = torch.aten.add.Tensor %46918, %46912, %int1_44378 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46920 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44379 = arith.constant 1 : index
    %dim_44380 = tensor.dim %46920, %c1_44379 : tensor<4x?x4096xf16>
    %46921 = flow.tensor.transfer %46920 : tensor<4x?x4096xf16>{%dim_44380} to #hal.device.promise<@__device_3>
    %46922 = torch_c.from_builtin_tensor %46921 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46923 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44381 = arith.constant 1 : index
    %dim_44382 = tensor.dim %46923, %c1_44381 : tensor<4x?x4096xf16>
    %46924 = flow.tensor.transfer %46923 : tensor<4x?x4096xf16>{%dim_44382} to #hal.device.promise<@__device_3>
    %46925 = torch_c.from_builtin_tensor %46924 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46926 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44383 = arith.constant 1 : index
    %dim_44384 = tensor.dim %46926, %c1_44383 : tensor<4x?x4096xf16>
    %46927 = flow.tensor.transfer %46926 : tensor<4x?x4096xf16>{%dim_44384} to #hal.device.promise<@__device_3>
    %46928 = torch_c.from_builtin_tensor %46927 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46929 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44385 = arith.constant 1 : index
    %dim_44386 = tensor.dim %46929, %c1_44385 : tensor<4x?x4096xf16>
    %46930 = flow.tensor.transfer %46929 : tensor<4x?x4096xf16>{%dim_44386} to #hal.device.promise<@__device_3>
    %46931 = torch_c.from_builtin_tensor %46930 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46932 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44387 = arith.constant 1 : index
    %dim_44388 = tensor.dim %46932, %c1_44387 : tensor<4x?x4096xf16>
    %46933 = flow.tensor.transfer %46932 : tensor<4x?x4096xf16>{%dim_44388} to #hal.device.promise<@__device_3>
    %46934 = torch_c.from_builtin_tensor %46933 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46935 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44389 = arith.constant 1 : index
    %dim_44390 = tensor.dim %46935, %c1_44389 : tensor<4x?x4096xf16>
    %46936 = flow.tensor.transfer %46935 : tensor<4x?x4096xf16>{%dim_44390} to #hal.device.promise<@__device_3>
    %46937 = torch_c.from_builtin_tensor %46936 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46938 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44391 = arith.constant 1 : index
    %dim_44392 = tensor.dim %46938, %c1_44391 : tensor<4x?x4096xf16>
    %46939 = flow.tensor.transfer %46938 : tensor<4x?x4096xf16>{%dim_44392} to #hal.device.promise<@__device_3>
    %46940 = torch_c.from_builtin_tensor %46939 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44393 = torch.constant.int 1
    %46941 = torch.aten.add.Tensor %46922, %46925, %int1_44393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44394 = torch.constant.int 1
    %46942 = torch.aten.add.Tensor %46941, %46928, %int1_44394 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44395 = torch.constant.int 1
    %46943 = torch.aten.add.Tensor %46942, %46807, %int1_44395 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44396 = torch.constant.int 1
    %46944 = torch.aten.add.Tensor %46943, %46931, %int1_44396 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44397 = torch.constant.int 1
    %46945 = torch.aten.add.Tensor %46944, %46934, %int1_44397 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44398 = torch.constant.int 1
    %46946 = torch.aten.add.Tensor %46945, %46937, %int1_44398 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44399 = torch.constant.int 1
    %46947 = torch.aten.add.Tensor %46946, %46940, %int1_44399 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46948 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44400 = arith.constant 1 : index
    %dim_44401 = tensor.dim %46948, %c1_44400 : tensor<4x?x4096xf16>
    %46949 = flow.tensor.transfer %46948 : tensor<4x?x4096xf16>{%dim_44401} to #hal.device.promise<@__device_4>
    %46950 = torch_c.from_builtin_tensor %46949 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46951 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44402 = arith.constant 1 : index
    %dim_44403 = tensor.dim %46951, %c1_44402 : tensor<4x?x4096xf16>
    %46952 = flow.tensor.transfer %46951 : tensor<4x?x4096xf16>{%dim_44403} to #hal.device.promise<@__device_4>
    %46953 = torch_c.from_builtin_tensor %46952 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46954 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44404 = arith.constant 1 : index
    %dim_44405 = tensor.dim %46954, %c1_44404 : tensor<4x?x4096xf16>
    %46955 = flow.tensor.transfer %46954 : tensor<4x?x4096xf16>{%dim_44405} to #hal.device.promise<@__device_4>
    %46956 = torch_c.from_builtin_tensor %46955 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46957 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44406 = arith.constant 1 : index
    %dim_44407 = tensor.dim %46957, %c1_44406 : tensor<4x?x4096xf16>
    %46958 = flow.tensor.transfer %46957 : tensor<4x?x4096xf16>{%dim_44407} to #hal.device.promise<@__device_4>
    %46959 = torch_c.from_builtin_tensor %46958 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46960 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44408 = arith.constant 1 : index
    %dim_44409 = tensor.dim %46960, %c1_44408 : tensor<4x?x4096xf16>
    %46961 = flow.tensor.transfer %46960 : tensor<4x?x4096xf16>{%dim_44409} to #hal.device.promise<@__device_4>
    %46962 = torch_c.from_builtin_tensor %46961 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46963 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44410 = arith.constant 1 : index
    %dim_44411 = tensor.dim %46963, %c1_44410 : tensor<4x?x4096xf16>
    %46964 = flow.tensor.transfer %46963 : tensor<4x?x4096xf16>{%dim_44411} to #hal.device.promise<@__device_4>
    %46965 = torch_c.from_builtin_tensor %46964 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46966 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44412 = arith.constant 1 : index
    %dim_44413 = tensor.dim %46966, %c1_44412 : tensor<4x?x4096xf16>
    %46967 = flow.tensor.transfer %46966 : tensor<4x?x4096xf16>{%dim_44413} to #hal.device.promise<@__device_4>
    %46968 = torch_c.from_builtin_tensor %46967 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44414 = torch.constant.int 1
    %46969 = torch.aten.add.Tensor %46950, %46953, %int1_44414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44415 = torch.constant.int 1
    %46970 = torch.aten.add.Tensor %46969, %46956, %int1_44415 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44416 = torch.constant.int 1
    %46971 = torch.aten.add.Tensor %46970, %46959, %int1_44416 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44417 = torch.constant.int 1
    %46972 = torch.aten.add.Tensor %46971, %46814, %int1_44417 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44418 = torch.constant.int 1
    %46973 = torch.aten.add.Tensor %46972, %46962, %int1_44418 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44419 = torch.constant.int 1
    %46974 = torch.aten.add.Tensor %46973, %46965, %int1_44419 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44420 = torch.constant.int 1
    %46975 = torch.aten.add.Tensor %46974, %46968, %int1_44420 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46976 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44421 = arith.constant 1 : index
    %dim_44422 = tensor.dim %46976, %c1_44421 : tensor<4x?x4096xf16>
    %46977 = flow.tensor.transfer %46976 : tensor<4x?x4096xf16>{%dim_44422} to #hal.device.promise<@__device_5>
    %46978 = torch_c.from_builtin_tensor %46977 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46979 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44423 = arith.constant 1 : index
    %dim_44424 = tensor.dim %46979, %c1_44423 : tensor<4x?x4096xf16>
    %46980 = flow.tensor.transfer %46979 : tensor<4x?x4096xf16>{%dim_44424} to #hal.device.promise<@__device_5>
    %46981 = torch_c.from_builtin_tensor %46980 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46982 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44425 = arith.constant 1 : index
    %dim_44426 = tensor.dim %46982, %c1_44425 : tensor<4x?x4096xf16>
    %46983 = flow.tensor.transfer %46982 : tensor<4x?x4096xf16>{%dim_44426} to #hal.device.promise<@__device_5>
    %46984 = torch_c.from_builtin_tensor %46983 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46985 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44427 = arith.constant 1 : index
    %dim_44428 = tensor.dim %46985, %c1_44427 : tensor<4x?x4096xf16>
    %46986 = flow.tensor.transfer %46985 : tensor<4x?x4096xf16>{%dim_44428} to #hal.device.promise<@__device_5>
    %46987 = torch_c.from_builtin_tensor %46986 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46988 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44429 = arith.constant 1 : index
    %dim_44430 = tensor.dim %46988, %c1_44429 : tensor<4x?x4096xf16>
    %46989 = flow.tensor.transfer %46988 : tensor<4x?x4096xf16>{%dim_44430} to #hal.device.promise<@__device_5>
    %46990 = torch_c.from_builtin_tensor %46989 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46991 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44431 = arith.constant 1 : index
    %dim_44432 = tensor.dim %46991, %c1_44431 : tensor<4x?x4096xf16>
    %46992 = flow.tensor.transfer %46991 : tensor<4x?x4096xf16>{%dim_44432} to #hal.device.promise<@__device_5>
    %46993 = torch_c.from_builtin_tensor %46992 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %46994 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44433 = arith.constant 1 : index
    %dim_44434 = tensor.dim %46994, %c1_44433 : tensor<4x?x4096xf16>
    %46995 = flow.tensor.transfer %46994 : tensor<4x?x4096xf16>{%dim_44434} to #hal.device.promise<@__device_5>
    %46996 = torch_c.from_builtin_tensor %46995 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44435 = torch.constant.int 1
    %46997 = torch.aten.add.Tensor %46978, %46981, %int1_44435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44436 = torch.constant.int 1
    %46998 = torch.aten.add.Tensor %46997, %46984, %int1_44436 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44437 = torch.constant.int 1
    %46999 = torch.aten.add.Tensor %46998, %46987, %int1_44437 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %46999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44438 = torch.constant.int 1
    %47000 = torch.aten.add.Tensor %46999, %46990, %int1_44438 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44439 = torch.constant.int 1
    %47001 = torch.aten.add.Tensor %47000, %46821, %int1_44439 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44440 = torch.constant.int 1
    %47002 = torch.aten.add.Tensor %47001, %46993, %int1_44440 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44441 = torch.constant.int 1
    %47003 = torch.aten.add.Tensor %47002, %46996, %int1_44441 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47004 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44442 = arith.constant 1 : index
    %dim_44443 = tensor.dim %47004, %c1_44442 : tensor<4x?x4096xf16>
    %47005 = flow.tensor.transfer %47004 : tensor<4x?x4096xf16>{%dim_44443} to #hal.device.promise<@__device_6>
    %47006 = torch_c.from_builtin_tensor %47005 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47007 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44444 = arith.constant 1 : index
    %dim_44445 = tensor.dim %47007, %c1_44444 : tensor<4x?x4096xf16>
    %47008 = flow.tensor.transfer %47007 : tensor<4x?x4096xf16>{%dim_44445} to #hal.device.promise<@__device_6>
    %47009 = torch_c.from_builtin_tensor %47008 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47010 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44446 = arith.constant 1 : index
    %dim_44447 = tensor.dim %47010, %c1_44446 : tensor<4x?x4096xf16>
    %47011 = flow.tensor.transfer %47010 : tensor<4x?x4096xf16>{%dim_44447} to #hal.device.promise<@__device_6>
    %47012 = torch_c.from_builtin_tensor %47011 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47013 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44448 = arith.constant 1 : index
    %dim_44449 = tensor.dim %47013, %c1_44448 : tensor<4x?x4096xf16>
    %47014 = flow.tensor.transfer %47013 : tensor<4x?x4096xf16>{%dim_44449} to #hal.device.promise<@__device_6>
    %47015 = torch_c.from_builtin_tensor %47014 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47016 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44450 = arith.constant 1 : index
    %dim_44451 = tensor.dim %47016, %c1_44450 : tensor<4x?x4096xf16>
    %47017 = flow.tensor.transfer %47016 : tensor<4x?x4096xf16>{%dim_44451} to #hal.device.promise<@__device_6>
    %47018 = torch_c.from_builtin_tensor %47017 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47019 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44452 = arith.constant 1 : index
    %dim_44453 = tensor.dim %47019, %c1_44452 : tensor<4x?x4096xf16>
    %47020 = flow.tensor.transfer %47019 : tensor<4x?x4096xf16>{%dim_44453} to #hal.device.promise<@__device_6>
    %47021 = torch_c.from_builtin_tensor %47020 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47022 = torch_c.to_builtin_tensor %46835 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44454 = arith.constant 1 : index
    %dim_44455 = tensor.dim %47022, %c1_44454 : tensor<4x?x4096xf16>
    %47023 = flow.tensor.transfer %47022 : tensor<4x?x4096xf16>{%dim_44455} to #hal.device.promise<@__device_6>
    %47024 = torch_c.from_builtin_tensor %47023 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44456 = torch.constant.int 1
    %47025 = torch.aten.add.Tensor %47006, %47009, %int1_44456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44457 = torch.constant.int 1
    %47026 = torch.aten.add.Tensor %47025, %47012, %int1_44457 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44458 = torch.constant.int 1
    %47027 = torch.aten.add.Tensor %47026, %47015, %int1_44458 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44459 = torch.constant.int 1
    %47028 = torch.aten.add.Tensor %47027, %47018, %int1_44459 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44460 = torch.constant.int 1
    %47029 = torch.aten.add.Tensor %47028, %47021, %int1_44460 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44461 = torch.constant.int 1
    %47030 = torch.aten.add.Tensor %47029, %46828, %int1_44461 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44462 = torch.constant.int 1
    %47031 = torch.aten.add.Tensor %47030, %47024, %int1_44462 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47032 = torch_c.to_builtin_tensor %46786 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44463 = arith.constant 1 : index
    %dim_44464 = tensor.dim %47032, %c1_44463 : tensor<4x?x4096xf16>
    %47033 = flow.tensor.transfer %47032 : tensor<4x?x4096xf16>{%dim_44464} to #hal.device.promise<@__device_7>
    %47034 = torch_c.from_builtin_tensor %47033 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47035 = torch_c.to_builtin_tensor %46793 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44465 = arith.constant 1 : index
    %dim_44466 = tensor.dim %47035, %c1_44465 : tensor<4x?x4096xf16>
    %47036 = flow.tensor.transfer %47035 : tensor<4x?x4096xf16>{%dim_44466} to #hal.device.promise<@__device_7>
    %47037 = torch_c.from_builtin_tensor %47036 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47038 = torch_c.to_builtin_tensor %46800 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44467 = arith.constant 1 : index
    %dim_44468 = tensor.dim %47038, %c1_44467 : tensor<4x?x4096xf16>
    %47039 = flow.tensor.transfer %47038 : tensor<4x?x4096xf16>{%dim_44468} to #hal.device.promise<@__device_7>
    %47040 = torch_c.from_builtin_tensor %47039 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47041 = torch_c.to_builtin_tensor %46807 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44469 = arith.constant 1 : index
    %dim_44470 = tensor.dim %47041, %c1_44469 : tensor<4x?x4096xf16>
    %47042 = flow.tensor.transfer %47041 : tensor<4x?x4096xf16>{%dim_44470} to #hal.device.promise<@__device_7>
    %47043 = torch_c.from_builtin_tensor %47042 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47044 = torch_c.to_builtin_tensor %46814 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44471 = arith.constant 1 : index
    %dim_44472 = tensor.dim %47044, %c1_44471 : tensor<4x?x4096xf16>
    %47045 = flow.tensor.transfer %47044 : tensor<4x?x4096xf16>{%dim_44472} to #hal.device.promise<@__device_7>
    %47046 = torch_c.from_builtin_tensor %47045 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47047 = torch_c.to_builtin_tensor %46821 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44473 = arith.constant 1 : index
    %dim_44474 = tensor.dim %47047, %c1_44473 : tensor<4x?x4096xf16>
    %47048 = flow.tensor.transfer %47047 : tensor<4x?x4096xf16>{%dim_44474} to #hal.device.promise<@__device_7>
    %47049 = torch_c.from_builtin_tensor %47048 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %47050 = torch_c.to_builtin_tensor %46828 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_44475 = arith.constant 1 : index
    %dim_44476 = tensor.dim %47050, %c1_44475 : tensor<4x?x4096xf16>
    %47051 = flow.tensor.transfer %47050 : tensor<4x?x4096xf16>{%dim_44476} to #hal.device.promise<@__device_7>
    %47052 = torch_c.from_builtin_tensor %47051 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44477 = torch.constant.int 1
    %47053 = torch.aten.add.Tensor %47034, %47037, %int1_44477 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44478 = torch.constant.int 1
    %47054 = torch.aten.add.Tensor %47053, %47040, %int1_44478 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44479 = torch.constant.int 1
    %47055 = torch.aten.add.Tensor %47054, %47043, %int1_44479 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44480 = torch.constant.int 1
    %47056 = torch.aten.add.Tensor %47055, %47046, %int1_44480 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44481 = torch.constant.int 1
    %47057 = torch.aten.add.Tensor %47056, %47049, %int1_44481 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44482 = torch.constant.int 1
    %47058 = torch.aten.add.Tensor %47057, %47052, %int1_44482 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44483 = torch.constant.int 1
    %47059 = torch.aten.add.Tensor %47058, %46835, %int1_44483 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44484 = torch.constant.int 1
    %47060 = torch.aten.add.Tensor %46540, %46863, %int1_44484 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44485 = torch.constant.int 1
    %47061 = torch.aten.add.Tensor %46541, %46891, %int1_44485 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44486 = torch.constant.int 1
    %47062 = torch.aten.add.Tensor %46542, %46919, %int1_44486 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44487 = torch.constant.int 1
    %47063 = torch.aten.add.Tensor %46543, %46947, %int1_44487 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44488 = torch.constant.int 1
    %47064 = torch.aten.add.Tensor %46544, %46975, %int1_44488 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44489 = torch.constant.int 1
    %47065 = torch.aten.add.Tensor %46545, %47003, %int1_44489 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44490 = torch.constant.int 1
    %47066 = torch.aten.add.Tensor %46546, %47031, %int1_44490 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44491 = torch.constant.int 1
    %47067 = torch.aten.add.Tensor %46547, %47059, %int1_44491 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_44492 = torch.constant.int 6
    %47068 = torch.prims.convert_element_type %47060, %int6_44492 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44493 = torch.constant.int 6
    %47069 = torch.prims.convert_element_type %47061, %int6_44493 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44494 = torch.constant.int 6
    %47070 = torch.prims.convert_element_type %47062, %int6_44494 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44495 = torch.constant.int 6
    %47071 = torch.prims.convert_element_type %47063, %int6_44495 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44496 = torch.constant.int 6
    %47072 = torch.prims.convert_element_type %47064, %int6_44496 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44497 = torch.constant.int 6
    %47073 = torch.prims.convert_element_type %47065, %int6_44497 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44498 = torch.constant.int 6
    %47074 = torch.prims.convert_element_type %47066, %int6_44498 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_44499 = torch.constant.int 6
    %47075 = torch.prims.convert_element_type %47067, %int6_44499 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44500 = torch.constant.int 2
    %47076 = torch.aten.pow.Tensor_Scalar %47068, %int2_44500 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44501 = torch.constant.int 2
    %47077 = torch.aten.pow.Tensor_Scalar %47069, %int2_44501 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44502 = torch.constant.int 2
    %47078 = torch.aten.pow.Tensor_Scalar %47070, %int2_44502 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44503 = torch.constant.int 2
    %47079 = torch.aten.pow.Tensor_Scalar %47071, %int2_44503 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44504 = torch.constant.int 2
    %47080 = torch.aten.pow.Tensor_Scalar %47072, %int2_44504 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44505 = torch.constant.int 2
    %47081 = torch.aten.pow.Tensor_Scalar %47073, %int2_44505 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44506 = torch.constant.int 2
    %47082 = torch.aten.pow.Tensor_Scalar %47074, %int2_44506 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_44507 = torch.constant.int 2
    %47083 = torch.aten.pow.Tensor_Scalar %47075, %int2_44507 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_44508 = torch.constant.int -1
    %47084 = torch.prim.ListConstruct %int-1_44508 : (!torch.int) -> !torch.list<int>
    %true_44509 = torch.constant.bool true
    %none_44510 = torch.constant.none
    %47085 = torch.aten.mean.dim %47076, %47084, %true_44509, %none_44510 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44511 = torch.constant.int -1
    %47086 = torch.prim.ListConstruct %int-1_44511 : (!torch.int) -> !torch.list<int>
    %true_44512 = torch.constant.bool true
    %none_44513 = torch.constant.none
    %47087 = torch.aten.mean.dim %47077, %47086, %true_44512, %none_44513 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44514 = torch.constant.int -1
    %47088 = torch.prim.ListConstruct %int-1_44514 : (!torch.int) -> !torch.list<int>
    %true_44515 = torch.constant.bool true
    %none_44516 = torch.constant.none
    %47089 = torch.aten.mean.dim %47078, %47088, %true_44515, %none_44516 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44517 = torch.constant.int -1
    %47090 = torch.prim.ListConstruct %int-1_44517 : (!torch.int) -> !torch.list<int>
    %true_44518 = torch.constant.bool true
    %none_44519 = torch.constant.none
    %47091 = torch.aten.mean.dim %47079, %47090, %true_44518, %none_44519 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44520 = torch.constant.int -1
    %47092 = torch.prim.ListConstruct %int-1_44520 : (!torch.int) -> !torch.list<int>
    %true_44521 = torch.constant.bool true
    %none_44522 = torch.constant.none
    %47093 = torch.aten.mean.dim %47080, %47092, %true_44521, %none_44522 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44523 = torch.constant.int -1
    %47094 = torch.prim.ListConstruct %int-1_44523 : (!torch.int) -> !torch.list<int>
    %true_44524 = torch.constant.bool true
    %none_44525 = torch.constant.none
    %47095 = torch.aten.mean.dim %47081, %47094, %true_44524, %none_44525 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44526 = torch.constant.int -1
    %47096 = torch.prim.ListConstruct %int-1_44526 : (!torch.int) -> !torch.list<int>
    %true_44527 = torch.constant.bool true
    %none_44528 = torch.constant.none
    %47097 = torch.aten.mean.dim %47082, %47096, %true_44527, %none_44528 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_44529 = torch.constant.int -1
    %47098 = torch.prim.ListConstruct %int-1_44529 : (!torch.int) -> !torch.list<int>
    %true_44530 = torch.constant.bool true
    %none_44531 = torch.constant.none
    %47099 = torch.aten.mean.dim %47083, %47098, %true_44530, %none_44531 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44532 = torch.constant.float 9.9999997473787516E-6
    %int1_44533 = torch.constant.int 1
    %47100 = torch.aten.add.Scalar %47085, %float9.999990e-06_44532, %int1_44533 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44534 = torch.constant.float 9.9999997473787516E-6
    %int1_44535 = torch.constant.int 1
    %47101 = torch.aten.add.Scalar %47087, %float9.999990e-06_44534, %int1_44535 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44536 = torch.constant.float 9.9999997473787516E-6
    %int1_44537 = torch.constant.int 1
    %47102 = torch.aten.add.Scalar %47089, %float9.999990e-06_44536, %int1_44537 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44538 = torch.constant.float 9.9999997473787516E-6
    %int1_44539 = torch.constant.int 1
    %47103 = torch.aten.add.Scalar %47091, %float9.999990e-06_44538, %int1_44539 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44540 = torch.constant.float 9.9999997473787516E-6
    %int1_44541 = torch.constant.int 1
    %47104 = torch.aten.add.Scalar %47093, %float9.999990e-06_44540, %int1_44541 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44542 = torch.constant.float 9.9999997473787516E-6
    %int1_44543 = torch.constant.int 1
    %47105 = torch.aten.add.Scalar %47095, %float9.999990e-06_44542, %int1_44543 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44544 = torch.constant.float 9.9999997473787516E-6
    %int1_44545 = torch.constant.int 1
    %47106 = torch.aten.add.Scalar %47097, %float9.999990e-06_44544, %int1_44545 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_44546 = torch.constant.float 9.9999997473787516E-6
    %int1_44547 = torch.constant.int 1
    %47107 = torch.aten.add.Scalar %47099, %float9.999990e-06_44546, %int1_44547 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47108 = torch.aten.rsqrt %47100 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47109 = torch.aten.rsqrt %47101 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47110 = torch.aten.rsqrt %47102 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47111 = torch.aten.rsqrt %47103 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47112 = torch.aten.rsqrt %47104 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47113 = torch.aten.rsqrt %47105 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47114 = torch.aten.rsqrt %47106 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47115 = torch.aten.rsqrt %47107 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %47115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %47116 = torch.aten.mul.Tensor %47068, %47108 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47117 = torch.aten.mul.Tensor %47069, %47109 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47118 = torch.aten.mul.Tensor %47070, %47110 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47119 = torch.aten.mul.Tensor %47071, %47111 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47120 = torch.aten.mul.Tensor %47072, %47112 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47121 = torch.aten.mul.Tensor %47073, %47113 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47122 = torch.aten.mul.Tensor %47074, %47114 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47123 = torch.aten.mul.Tensor %47075, %47115 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47124 = torch.aten.mul.Tensor %1736, %47116 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47125 = torch.aten.mul.Tensor %1737, %47117 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47126 = torch.aten.mul.Tensor %1738, %47118 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47127 = torch.aten.mul.Tensor %1739, %47119 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47128 = torch.aten.mul.Tensor %1740, %47120 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47129 = torch.aten.mul.Tensor %1741, %47121 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47130 = torch.aten.mul.Tensor %1742, %47122 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %47131 = torch.aten.mul.Tensor %1743, %47123 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %47131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_44548 = torch.constant.int 5
    %47132 = torch.prims.convert_element_type %47124, %int5_44548 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44549 = torch.constant.int 5
    %47133 = torch.prims.convert_element_type %47125, %int5_44549 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44550 = torch.constant.int 5
    %47134 = torch.prims.convert_element_type %47126, %int5_44550 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44551 = torch.constant.int 5
    %47135 = torch.prims.convert_element_type %47127, %int5_44551 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44552 = torch.constant.int 5
    %47136 = torch.prims.convert_element_type %47128, %int5_44552 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44553 = torch.constant.int 5
    %47137 = torch.prims.convert_element_type %47129, %int5_44553 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44554 = torch.constant.int 5
    %47138 = torch.prims.convert_element_type %47130, %int5_44554 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_44555 = torch.constant.int 5
    %47139 = torch.prims.convert_element_type %47131, %int5_44555 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %47139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_44556 = torch.constant.int 1
    %int0_44557 = torch.constant.int 0
    %47140 = torch.prim.ListConstruct %int1_44556, %int0_44557 : (!torch.int, !torch.int) -> !torch.list<int>
    %47141 = torch.aten.permute %1744, %47140 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44558 = torch.constant.int 1
    %int0_44559 = torch.constant.int 0
    %47142 = torch.prim.ListConstruct %int1_44558, %int0_44559 : (!torch.int, !torch.int) -> !torch.list<int>
    %47143 = torch.aten.permute %1745, %47142 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44560 = torch.constant.int 1
    %int0_44561 = torch.constant.int 0
    %47144 = torch.prim.ListConstruct %int1_44560, %int0_44561 : (!torch.int, !torch.int) -> !torch.list<int>
    %47145 = torch.aten.permute %1746, %47144 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44562 = torch.constant.int 1
    %int0_44563 = torch.constant.int 0
    %47146 = torch.prim.ListConstruct %int1_44562, %int0_44563 : (!torch.int, !torch.int) -> !torch.list<int>
    %47147 = torch.aten.permute %1747, %47146 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44564 = torch.constant.int 1
    %int0_44565 = torch.constant.int 0
    %47148 = torch.prim.ListConstruct %int1_44564, %int0_44565 : (!torch.int, !torch.int) -> !torch.list<int>
    %47149 = torch.aten.permute %1748, %47148 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44566 = torch.constant.int 1
    %int0_44567 = torch.constant.int 0
    %47150 = torch.prim.ListConstruct %int1_44566, %int0_44567 : (!torch.int, !torch.int) -> !torch.list<int>
    %47151 = torch.aten.permute %1749, %47150 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44568 = torch.constant.int 1
    %int0_44569 = torch.constant.int 0
    %47152 = torch.prim.ListConstruct %int1_44568, %int0_44569 : (!torch.int, !torch.int) -> !torch.list<int>
    %47153 = torch.aten.permute %1750, %47152 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_44570 = torch.constant.int 1
    %int0_44571 = torch.constant.int 0
    %47154 = torch.prim.ListConstruct %int1_44570, %int0_44571 : (!torch.int, !torch.int) -> !torch.list<int>
    %47155 = torch.aten.permute %1751, %47154 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_44572 = torch.constant.int 4
    %47156 = torch.aten.mul.int %int4_44572, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44573 = torch.constant.int 4096
    %47157 = torch.prim.ListConstruct %47156, %int4096_44573 : (!torch.int, !torch.int) -> !torch.list<int>
    %47158 = torch.aten.view %47132, %47157 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47158, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47159 = torch.aten.mm %47158, %47141 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47159, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44574 = torch.constant.int 4
    %int512_44575 = torch.constant.int 512
    %47160 = torch.prim.ListConstruct %int4_44574, %2482, %int512_44575 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47161 = torch.aten.view %47159, %47160 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44576 = torch.constant.int 4
    %47162 = torch.aten.mul.int %int4_44576, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44577 = torch.constant.int 4096
    %47163 = torch.prim.ListConstruct %47162, %int4096_44577 : (!torch.int, !torch.int) -> !torch.list<int>
    %47164 = torch.aten.view %47133, %47163 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47164, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47165 = torch.aten.mm %47164, %47143 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47165, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44578 = torch.constant.int 4
    %int512_44579 = torch.constant.int 512
    %47166 = torch.prim.ListConstruct %int4_44578, %2482, %int512_44579 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47167 = torch.aten.view %47165, %47166 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44580 = torch.constant.int 4
    %47168 = torch.aten.mul.int %int4_44580, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44581 = torch.constant.int 4096
    %47169 = torch.prim.ListConstruct %47168, %int4096_44581 : (!torch.int, !torch.int) -> !torch.list<int>
    %47170 = torch.aten.view %47134, %47169 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47170, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47171 = torch.aten.mm %47170, %47145 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47171, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44582 = torch.constant.int 4
    %int512_44583 = torch.constant.int 512
    %47172 = torch.prim.ListConstruct %int4_44582, %2482, %int512_44583 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47173 = torch.aten.view %47171, %47172 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44584 = torch.constant.int 4
    %47174 = torch.aten.mul.int %int4_44584, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44585 = torch.constant.int 4096
    %47175 = torch.prim.ListConstruct %47174, %int4096_44585 : (!torch.int, !torch.int) -> !torch.list<int>
    %47176 = torch.aten.view %47135, %47175 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47176, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47177 = torch.aten.mm %47176, %47147 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47177, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44586 = torch.constant.int 4
    %int512_44587 = torch.constant.int 512
    %47178 = torch.prim.ListConstruct %int4_44586, %2482, %int512_44587 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47179 = torch.aten.view %47177, %47178 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44588 = torch.constant.int 4
    %47180 = torch.aten.mul.int %int4_44588, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44589 = torch.constant.int 4096
    %47181 = torch.prim.ListConstruct %47180, %int4096_44589 : (!torch.int, !torch.int) -> !torch.list<int>
    %47182 = torch.aten.view %47136, %47181 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47182, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47183 = torch.aten.mm %47182, %47149 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47183, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44590 = torch.constant.int 4
    %int512_44591 = torch.constant.int 512
    %47184 = torch.prim.ListConstruct %int4_44590, %2482, %int512_44591 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47185 = torch.aten.view %47183, %47184 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44592 = torch.constant.int 4
    %47186 = torch.aten.mul.int %int4_44592, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44593 = torch.constant.int 4096
    %47187 = torch.prim.ListConstruct %47186, %int4096_44593 : (!torch.int, !torch.int) -> !torch.list<int>
    %47188 = torch.aten.view %47137, %47187 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47188, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47189 = torch.aten.mm %47188, %47151 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47189, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44594 = torch.constant.int 4
    %int512_44595 = torch.constant.int 512
    %47190 = torch.prim.ListConstruct %int4_44594, %2482, %int512_44595 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47191 = torch.aten.view %47189, %47190 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44596 = torch.constant.int 4
    %47192 = torch.aten.mul.int %int4_44596, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44597 = torch.constant.int 4096
    %47193 = torch.prim.ListConstruct %47192, %int4096_44597 : (!torch.int, !torch.int) -> !torch.list<int>
    %47194 = torch.aten.view %47138, %47193 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47194, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47195 = torch.aten.mm %47194, %47153 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47195, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44598 = torch.constant.int 4
    %int512_44599 = torch.constant.int 512
    %47196 = torch.prim.ListConstruct %int4_44598, %2482, %int512_44599 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47197 = torch.aten.view %47195, %47196 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_44600 = torch.constant.int 4
    %47198 = torch.aten.mul.int %int4_44600, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44601 = torch.constant.int 4096
    %47199 = torch.prim.ListConstruct %47198, %int4096_44601 : (!torch.int, !torch.int) -> !torch.list<int>
    %47200 = torch.aten.view %47139, %47199 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47200, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47201 = torch.aten.mm %47200, %47155 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %47201, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_44602 = torch.constant.int 4
    %int512_44603 = torch.constant.int 512
    %47202 = torch.prim.ListConstruct %int4_44602, %2482, %int512_44603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47203 = torch.aten.view %47201, %47202 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %47203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_44604 = torch.constant.int 1
    %int0_44605 = torch.constant.int 0
    %47204 = torch.prim.ListConstruct %int1_44604, %int0_44605 : (!torch.int, !torch.int) -> !torch.list<int>
    %47205 = torch.aten.permute %1752, %47204 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44606 = torch.constant.int 1
    %int0_44607 = torch.constant.int 0
    %47206 = torch.prim.ListConstruct %int1_44606, %int0_44607 : (!torch.int, !torch.int) -> !torch.list<int>
    %47207 = torch.aten.permute %1753, %47206 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44608 = torch.constant.int 1
    %int0_44609 = torch.constant.int 0
    %47208 = torch.prim.ListConstruct %int1_44608, %int0_44609 : (!torch.int, !torch.int) -> !torch.list<int>
    %47209 = torch.aten.permute %1754, %47208 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44610 = torch.constant.int 1
    %int0_44611 = torch.constant.int 0
    %47210 = torch.prim.ListConstruct %int1_44610, %int0_44611 : (!torch.int, !torch.int) -> !torch.list<int>
    %47211 = torch.aten.permute %1755, %47210 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44612 = torch.constant.int 1
    %int0_44613 = torch.constant.int 0
    %47212 = torch.prim.ListConstruct %int1_44612, %int0_44613 : (!torch.int, !torch.int) -> !torch.list<int>
    %47213 = torch.aten.permute %1756, %47212 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44614 = torch.constant.int 1
    %int0_44615 = torch.constant.int 0
    %47214 = torch.prim.ListConstruct %int1_44614, %int0_44615 : (!torch.int, !torch.int) -> !torch.list<int>
    %47215 = torch.aten.permute %1757, %47214 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44616 = torch.constant.int 1
    %int0_44617 = torch.constant.int 0
    %47216 = torch.prim.ListConstruct %int1_44616, %int0_44617 : (!torch.int, !torch.int) -> !torch.list<int>
    %47217 = torch.aten.permute %1758, %47216 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44618 = torch.constant.int 1
    %int0_44619 = torch.constant.int 0
    %47218 = torch.prim.ListConstruct %int1_44618, %int0_44619 : (!torch.int, !torch.int) -> !torch.list<int>
    %47219 = torch.aten.permute %1759, %47218 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_44620 = torch.constant.int 4
    %47220 = torch.aten.mul.int %int4_44620, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44621 = torch.constant.int 4096
    %47221 = torch.prim.ListConstruct %47220, %int4096_44621 : (!torch.int, !torch.int) -> !torch.list<int>
    %47222 = torch.aten.view %47132, %47221 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47222, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47223 = torch.aten.mm %47222, %47205 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47223, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44622 = torch.constant.int 4
    %int128_44623 = torch.constant.int 128
    %47224 = torch.prim.ListConstruct %int4_44622, %2482, %int128_44623 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47225 = torch.aten.view %47223, %47224 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44624 = torch.constant.int 4
    %47226 = torch.aten.mul.int %int4_44624, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44625 = torch.constant.int 4096
    %47227 = torch.prim.ListConstruct %47226, %int4096_44625 : (!torch.int, !torch.int) -> !torch.list<int>
    %47228 = torch.aten.view %47133, %47227 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47228, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47229 = torch.aten.mm %47228, %47207 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47229, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44626 = torch.constant.int 4
    %int128_44627 = torch.constant.int 128
    %47230 = torch.prim.ListConstruct %int4_44626, %2482, %int128_44627 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47231 = torch.aten.view %47229, %47230 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44628 = torch.constant.int 4
    %47232 = torch.aten.mul.int %int4_44628, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44629 = torch.constant.int 4096
    %47233 = torch.prim.ListConstruct %47232, %int4096_44629 : (!torch.int, !torch.int) -> !torch.list<int>
    %47234 = torch.aten.view %47134, %47233 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47234, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47235 = torch.aten.mm %47234, %47209 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47235, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44630 = torch.constant.int 4
    %int128_44631 = torch.constant.int 128
    %47236 = torch.prim.ListConstruct %int4_44630, %2482, %int128_44631 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47237 = torch.aten.view %47235, %47236 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44632 = torch.constant.int 4
    %47238 = torch.aten.mul.int %int4_44632, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44633 = torch.constant.int 4096
    %47239 = torch.prim.ListConstruct %47238, %int4096_44633 : (!torch.int, !torch.int) -> !torch.list<int>
    %47240 = torch.aten.view %47135, %47239 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47240, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47241 = torch.aten.mm %47240, %47211 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47241, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44634 = torch.constant.int 4
    %int128_44635 = torch.constant.int 128
    %47242 = torch.prim.ListConstruct %int4_44634, %2482, %int128_44635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47243 = torch.aten.view %47241, %47242 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44636 = torch.constant.int 4
    %47244 = torch.aten.mul.int %int4_44636, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44637 = torch.constant.int 4096
    %47245 = torch.prim.ListConstruct %47244, %int4096_44637 : (!torch.int, !torch.int) -> !torch.list<int>
    %47246 = torch.aten.view %47136, %47245 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47246, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47247 = torch.aten.mm %47246, %47213 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47247, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44638 = torch.constant.int 4
    %int128_44639 = torch.constant.int 128
    %47248 = torch.prim.ListConstruct %int4_44638, %2482, %int128_44639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47249 = torch.aten.view %47247, %47248 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44640 = torch.constant.int 4
    %47250 = torch.aten.mul.int %int4_44640, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44641 = torch.constant.int 4096
    %47251 = torch.prim.ListConstruct %47250, %int4096_44641 : (!torch.int, !torch.int) -> !torch.list<int>
    %47252 = torch.aten.view %47137, %47251 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47252, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47253 = torch.aten.mm %47252, %47215 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47253, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44642 = torch.constant.int 4
    %int128_44643 = torch.constant.int 128
    %47254 = torch.prim.ListConstruct %int4_44642, %2482, %int128_44643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47255 = torch.aten.view %47253, %47254 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44644 = torch.constant.int 4
    %47256 = torch.aten.mul.int %int4_44644, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44645 = torch.constant.int 4096
    %47257 = torch.prim.ListConstruct %47256, %int4096_44645 : (!torch.int, !torch.int) -> !torch.list<int>
    %47258 = torch.aten.view %47138, %47257 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47258, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47259 = torch.aten.mm %47258, %47217 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47259, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44646 = torch.constant.int 4
    %int128_44647 = torch.constant.int 128
    %47260 = torch.prim.ListConstruct %int4_44646, %2482, %int128_44647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47261 = torch.aten.view %47259, %47260 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44648 = torch.constant.int 4
    %47262 = torch.aten.mul.int %int4_44648, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44649 = torch.constant.int 4096
    %47263 = torch.prim.ListConstruct %47262, %int4096_44649 : (!torch.int, !torch.int) -> !torch.list<int>
    %47264 = torch.aten.view %47139, %47263 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47264, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47265 = torch.aten.mm %47264, %47219 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47265, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44650 = torch.constant.int 4
    %int128_44651 = torch.constant.int 128
    %47266 = torch.prim.ListConstruct %int4_44650, %2482, %int128_44651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47267 = torch.aten.view %47265, %47266 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_44652 = torch.constant.int 1
    %int0_44653 = torch.constant.int 0
    %47268 = torch.prim.ListConstruct %int1_44652, %int0_44653 : (!torch.int, !torch.int) -> !torch.list<int>
    %47269 = torch.aten.permute %1760, %47268 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44654 = torch.constant.int 1
    %int0_44655 = torch.constant.int 0
    %47270 = torch.prim.ListConstruct %int1_44654, %int0_44655 : (!torch.int, !torch.int) -> !torch.list<int>
    %47271 = torch.aten.permute %1761, %47270 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44656 = torch.constant.int 1
    %int0_44657 = torch.constant.int 0
    %47272 = torch.prim.ListConstruct %int1_44656, %int0_44657 : (!torch.int, !torch.int) -> !torch.list<int>
    %47273 = torch.aten.permute %1762, %47272 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44658 = torch.constant.int 1
    %int0_44659 = torch.constant.int 0
    %47274 = torch.prim.ListConstruct %int1_44658, %int0_44659 : (!torch.int, !torch.int) -> !torch.list<int>
    %47275 = torch.aten.permute %1763, %47274 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44660 = torch.constant.int 1
    %int0_44661 = torch.constant.int 0
    %47276 = torch.prim.ListConstruct %int1_44660, %int0_44661 : (!torch.int, !torch.int) -> !torch.list<int>
    %47277 = torch.aten.permute %1764, %47276 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44662 = torch.constant.int 1
    %int0_44663 = torch.constant.int 0
    %47278 = torch.prim.ListConstruct %int1_44662, %int0_44663 : (!torch.int, !torch.int) -> !torch.list<int>
    %47279 = torch.aten.permute %1765, %47278 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44664 = torch.constant.int 1
    %int0_44665 = torch.constant.int 0
    %47280 = torch.prim.ListConstruct %int1_44664, %int0_44665 : (!torch.int, !torch.int) -> !torch.list<int>
    %47281 = torch.aten.permute %1766, %47280 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_44666 = torch.constant.int 1
    %int0_44667 = torch.constant.int 0
    %47282 = torch.prim.ListConstruct %int1_44666, %int0_44667 : (!torch.int, !torch.int) -> !torch.list<int>
    %47283 = torch.aten.permute %1767, %47282 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_44668 = torch.constant.int 4
    %47284 = torch.aten.mul.int %int4_44668, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44669 = torch.constant.int 4096
    %47285 = torch.prim.ListConstruct %47284, %int4096_44669 : (!torch.int, !torch.int) -> !torch.list<int>
    %47286 = torch.aten.view %47132, %47285 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47286, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47287 = torch.aten.mm %47286, %47269 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47287, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44670 = torch.constant.int 4
    %int128_44671 = torch.constant.int 128
    %47288 = torch.prim.ListConstruct %int4_44670, %2482, %int128_44671 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47289 = torch.aten.view %47287, %47288 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44672 = torch.constant.int 4
    %47290 = torch.aten.mul.int %int4_44672, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44673 = torch.constant.int 4096
    %47291 = torch.prim.ListConstruct %47290, %int4096_44673 : (!torch.int, !torch.int) -> !torch.list<int>
    %47292 = torch.aten.view %47133, %47291 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47292, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47293 = torch.aten.mm %47292, %47271 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47293, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44674 = torch.constant.int 4
    %int128_44675 = torch.constant.int 128
    %47294 = torch.prim.ListConstruct %int4_44674, %2482, %int128_44675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47295 = torch.aten.view %47293, %47294 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44676 = torch.constant.int 4
    %47296 = torch.aten.mul.int %int4_44676, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44677 = torch.constant.int 4096
    %47297 = torch.prim.ListConstruct %47296, %int4096_44677 : (!torch.int, !torch.int) -> !torch.list<int>
    %47298 = torch.aten.view %47134, %47297 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47298, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47299 = torch.aten.mm %47298, %47273 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47299, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44678 = torch.constant.int 4
    %int128_44679 = torch.constant.int 128
    %47300 = torch.prim.ListConstruct %int4_44678, %2482, %int128_44679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47301 = torch.aten.view %47299, %47300 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44680 = torch.constant.int 4
    %47302 = torch.aten.mul.int %int4_44680, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44681 = torch.constant.int 4096
    %47303 = torch.prim.ListConstruct %47302, %int4096_44681 : (!torch.int, !torch.int) -> !torch.list<int>
    %47304 = torch.aten.view %47135, %47303 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47304, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47305 = torch.aten.mm %47304, %47275 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47305, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44682 = torch.constant.int 4
    %int128_44683 = torch.constant.int 128
    %47306 = torch.prim.ListConstruct %int4_44682, %2482, %int128_44683 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47307 = torch.aten.view %47305, %47306 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44684 = torch.constant.int 4
    %47308 = torch.aten.mul.int %int4_44684, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44685 = torch.constant.int 4096
    %47309 = torch.prim.ListConstruct %47308, %int4096_44685 : (!torch.int, !torch.int) -> !torch.list<int>
    %47310 = torch.aten.view %47136, %47309 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47310, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47311 = torch.aten.mm %47310, %47277 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47311, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44686 = torch.constant.int 4
    %int128_44687 = torch.constant.int 128
    %47312 = torch.prim.ListConstruct %int4_44686, %2482, %int128_44687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47313 = torch.aten.view %47311, %47312 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44688 = torch.constant.int 4
    %47314 = torch.aten.mul.int %int4_44688, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44689 = torch.constant.int 4096
    %47315 = torch.prim.ListConstruct %47314, %int4096_44689 : (!torch.int, !torch.int) -> !torch.list<int>
    %47316 = torch.aten.view %47137, %47315 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47316, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47317 = torch.aten.mm %47316, %47279 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47317, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44690 = torch.constant.int 4
    %int128_44691 = torch.constant.int 128
    %47318 = torch.prim.ListConstruct %int4_44690, %2482, %int128_44691 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47319 = torch.aten.view %47317, %47318 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44692 = torch.constant.int 4
    %47320 = torch.aten.mul.int %int4_44692, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44693 = torch.constant.int 4096
    %47321 = torch.prim.ListConstruct %47320, %int4096_44693 : (!torch.int, !torch.int) -> !torch.list<int>
    %47322 = torch.aten.view %47138, %47321 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47322, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47323 = torch.aten.mm %47322, %47281 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47323, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44694 = torch.constant.int 4
    %int128_44695 = torch.constant.int 128
    %47324 = torch.prim.ListConstruct %int4_44694, %2482, %int128_44695 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47325 = torch.aten.view %47323, %47324 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44696 = torch.constant.int 4
    %47326 = torch.aten.mul.int %int4_44696, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_44697 = torch.constant.int 4096
    %47327 = torch.prim.ListConstruct %47326, %int4096_44697 : (!torch.int, !torch.int) -> !torch.list<int>
    %47328 = torch.aten.view %47139, %47327 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %47328, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %47329 = torch.aten.mm %47328, %47283 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %47329, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_44698 = torch.constant.int 4
    %int128_44699 = torch.constant.int 128
    %47330 = torch.prim.ListConstruct %int4_44698, %2482, %int128_44699 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47331 = torch.aten.view %47329, %47330 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %47331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_44700 = torch.constant.int 4
    %int4_44701 = torch.constant.int 4
    %int128_44702 = torch.constant.int 128
    %47332 = torch.prim.ListConstruct %int4_44700, %2482, %int4_44701, %int128_44702 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47333 = torch.aten.view %47161, %47332 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44703 = torch.constant.int 4
    %int4_44704 = torch.constant.int 4
    %int128_44705 = torch.constant.int 128
    %47334 = torch.prim.ListConstruct %int4_44703, %2482, %int4_44704, %int128_44705 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47335 = torch.aten.view %47167, %47334 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44706 = torch.constant.int 4
    %int4_44707 = torch.constant.int 4
    %int128_44708 = torch.constant.int 128
    %47336 = torch.prim.ListConstruct %int4_44706, %2482, %int4_44707, %int128_44708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47337 = torch.aten.view %47173, %47336 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44709 = torch.constant.int 4
    %int4_44710 = torch.constant.int 4
    %int128_44711 = torch.constant.int 128
    %47338 = torch.prim.ListConstruct %int4_44709, %2482, %int4_44710, %int128_44711 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47339 = torch.aten.view %47179, %47338 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44712 = torch.constant.int 4
    %int4_44713 = torch.constant.int 4
    %int128_44714 = torch.constant.int 128
    %47340 = torch.prim.ListConstruct %int4_44712, %2482, %int4_44713, %int128_44714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47341 = torch.aten.view %47185, %47340 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44715 = torch.constant.int 4
    %int4_44716 = torch.constant.int 4
    %int128_44717 = torch.constant.int 128
    %47342 = torch.prim.ListConstruct %int4_44715, %2482, %int4_44716, %int128_44717 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47343 = torch.aten.view %47191, %47342 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44718 = torch.constant.int 4
    %int4_44719 = torch.constant.int 4
    %int128_44720 = torch.constant.int 128
    %47344 = torch.prim.ListConstruct %int4_44718, %2482, %int4_44719, %int128_44720 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47345 = torch.aten.view %47197, %47344 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44721 = torch.constant.int 4
    %int4_44722 = torch.constant.int 4
    %int128_44723 = torch.constant.int 128
    %47346 = torch.prim.ListConstruct %int4_44721, %2482, %int4_44722, %int128_44723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47347 = torch.aten.view %47203, %47346 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_44724 = torch.constant.int 4
    %int1_44725 = torch.constant.int 1
    %int128_44726 = torch.constant.int 128
    %47348 = torch.prim.ListConstruct %int4_44724, %2482, %int1_44725, %int128_44726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47349 = torch.aten.view %47225, %47348 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44727 = torch.constant.int 4
    %int1_44728 = torch.constant.int 1
    %int128_44729 = torch.constant.int 128
    %47350 = torch.prim.ListConstruct %int4_44727, %2482, %int1_44728, %int128_44729 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47351 = torch.aten.view %47231, %47350 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44730 = torch.constant.int 4
    %int1_44731 = torch.constant.int 1
    %int128_44732 = torch.constant.int 128
    %47352 = torch.prim.ListConstruct %int4_44730, %2482, %int1_44731, %int128_44732 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47353 = torch.aten.view %47237, %47352 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44733 = torch.constant.int 4
    %int1_44734 = torch.constant.int 1
    %int128_44735 = torch.constant.int 128
    %47354 = torch.prim.ListConstruct %int4_44733, %2482, %int1_44734, %int128_44735 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47355 = torch.aten.view %47243, %47354 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44736 = torch.constant.int 4
    %int1_44737 = torch.constant.int 1
    %int128_44738 = torch.constant.int 128
    %47356 = torch.prim.ListConstruct %int4_44736, %2482, %int1_44737, %int128_44738 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47357 = torch.aten.view %47249, %47356 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44739 = torch.constant.int 4
    %int1_44740 = torch.constant.int 1
    %int128_44741 = torch.constant.int 128
    %47358 = torch.prim.ListConstruct %int4_44739, %2482, %int1_44740, %int128_44741 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47359 = torch.aten.view %47255, %47358 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44742 = torch.constant.int 4
    %int1_44743 = torch.constant.int 1
    %int128_44744 = torch.constant.int 128
    %47360 = torch.prim.ListConstruct %int4_44742, %2482, %int1_44743, %int128_44744 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47361 = torch.aten.view %47261, %47360 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44745 = torch.constant.int 4
    %int1_44746 = torch.constant.int 1
    %int128_44747 = torch.constant.int 128
    %47362 = torch.prim.ListConstruct %int4_44745, %2482, %int1_44746, %int128_44747 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47363 = torch.aten.view %47267, %47362 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44748 = torch.constant.int 4
    %int1_44749 = torch.constant.int 1
    %int128_44750 = torch.constant.int 128
    %47364 = torch.prim.ListConstruct %int4_44748, %2482, %int1_44749, %int128_44750 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47365 = torch.aten.view %47289, %47364 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44751 = torch.constant.int 4
    %int1_44752 = torch.constant.int 1
    %int128_44753 = torch.constant.int 128
    %47366 = torch.prim.ListConstruct %int4_44751, %2482, %int1_44752, %int128_44753 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47367 = torch.aten.view %47295, %47366 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44754 = torch.constant.int 4
    %int1_44755 = torch.constant.int 1
    %int128_44756 = torch.constant.int 128
    %47368 = torch.prim.ListConstruct %int4_44754, %2482, %int1_44755, %int128_44756 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47369 = torch.aten.view %47301, %47368 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44757 = torch.constant.int 4
    %int1_44758 = torch.constant.int 1
    %int128_44759 = torch.constant.int 128
    %47370 = torch.prim.ListConstruct %int4_44757, %2482, %int1_44758, %int128_44759 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47371 = torch.aten.view %47307, %47370 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44760 = torch.constant.int 4
    %int1_44761 = torch.constant.int 1
    %int128_44762 = torch.constant.int 128
    %47372 = torch.prim.ListConstruct %int4_44760, %2482, %int1_44761, %int128_44762 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47373 = torch.aten.view %47313, %47372 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44763 = torch.constant.int 4
    %int1_44764 = torch.constant.int 1
    %int128_44765 = torch.constant.int 128
    %47374 = torch.prim.ListConstruct %int4_44763, %2482, %int1_44764, %int128_44765 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47375 = torch.aten.view %47319, %47374 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44766 = torch.constant.int 4
    %int1_44767 = torch.constant.int 1
    %int128_44768 = torch.constant.int 128
    %47376 = torch.prim.ListConstruct %int4_44766, %2482, %int1_44767, %int128_44768 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47377 = torch.aten.view %47325, %47376 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_44769 = torch.constant.int 4
    %int1_44770 = torch.constant.int 1
    %int128_44771 = torch.constant.int 128
    %47378 = torch.prim.ListConstruct %int4_44769, %2482, %int1_44770, %int128_44771 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47379 = torch.aten.view %47331, %47378 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_44772 = torch.constant.int 131072
    %none_44773 = torch.constant.none
    %none_44774 = torch.constant.none
    %cpu_44775 = torch.constant.device "cpu"
    %false_44776 = torch.constant.bool false
    %47380 = torch.aten.arange %int131072_44772, %none_44773, %none_44774, %cpu_44775, %false_44776 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_44777 = torch.constant.int 0
    %int128_44778 = torch.constant.int 128
    %int2_44779 = torch.constant.int 2
    %none_44780 = torch.constant.none
    %none_44781 = torch.constant.none
    %cpu_44782 = torch.constant.device "cpu"
    %false_44783 = torch.constant.bool false
    %47381 = torch.aten.arange.start_step %int0_44777, %int128_44778, %int2_44779, %none_44780, %none_44781, %cpu_44782, %false_44783 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_44784 = torch.constant.int 0
    %int0_44785 = torch.constant.int 0
    %int64_44786 = torch.constant.int 64
    %int1_44787 = torch.constant.int 1
    %47382 = torch.aten.slice.Tensor %47381, %int0_44784, %int0_44785, %int64_44786, %int1_44787 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_44788 = torch.constant.int 6
    %47383 = torch.prims.convert_element_type %47382, %int6_44788 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_44789 = torch.constant.int 128
    %47384 = torch.aten.div.Scalar %47383, %int128_44789 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_44790 = torch.constant.float 5.000000e+05
    %47385 = torch.aten.pow.Scalar %float5.000000e05_44790, %47384 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %47386 = torch.aten.reciprocal %47385 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_44791 = torch.constant.float 1.000000e+00
    %47387 = torch.aten.mul.Scalar %47386, %float1.000000e00_44791 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_44792 = torch.constant.int 131072
    %int1_44793 = torch.constant.int 1
    %47388 = torch.prim.ListConstruct %int131072_44792, %int1_44793 : (!torch.int, !torch.int) -> !torch.list<int>
    %47389 = torch.aten.view %47380, %47388 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %47390 = torch.aten.mul.Tensor %47389, %47387 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %47391 = torch.aten.cos %47390 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %47392 = torch.aten.sin %47390 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %47393 = torch.aten.complex %47391, %47392 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %47394 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47395 = flow.tensor.transfer %47394 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %47396 = torch_c.from_builtin_tensor %47395 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47397 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47398 = flow.tensor.transfer %47397 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %47399 = torch_c.from_builtin_tensor %47398 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47400 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47401 = flow.tensor.transfer %47400 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %47402 = torch_c.from_builtin_tensor %47401 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47403 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47404 = flow.tensor.transfer %47403 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %47405 = torch_c.from_builtin_tensor %47404 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47406 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47407 = flow.tensor.transfer %47406 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %47408 = torch_c.from_builtin_tensor %47407 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47409 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47410 = flow.tensor.transfer %47409 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %47411 = torch_c.from_builtin_tensor %47410 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47412 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47413 = flow.tensor.transfer %47412 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %47414 = torch_c.from_builtin_tensor %47413 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47415 = torch_c.to_builtin_tensor %47393 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47416 = flow.tensor.transfer %47415 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %47417 = torch_c.from_builtin_tensor %47416 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_44794 = torch.constant.int 1
    %47418 = torch.aten.size.int %47161, %int1_44794 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44795 = torch.constant.int 0
    %47419 = torch.aten.add.int %int0_44795, %47418 : !torch.int, !torch.int -> !torch.int
    %int0_44796 = torch.constant.int 0
    %int0_44797 = torch.constant.int 0
    %int1_44798 = torch.constant.int 1
    %47420 = torch.aten.slice.Tensor %47396, %int0_44796, %int0_44797, %47419, %int1_44798 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47420, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44799 = torch.constant.int 1
    %int0_44800 = torch.constant.int 0
    %int9223372036854775807_44801 = torch.constant.int 9223372036854775807
    %int1_44802 = torch.constant.int 1
    %47421 = torch.aten.slice.Tensor %47420, %int1_44799, %int0_44800, %int9223372036854775807_44801, %int1_44802 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47421, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44803 = torch.constant.int 0
    %47422 = torch.aten.unsqueeze %47421, %int0_44803 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47422, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44804 = torch.constant.int 2
    %47423 = torch.aten.unsqueeze %47422, %int2_44804 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47423, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44805 = torch.constant.int 3
    %int0_44806 = torch.constant.int 0
    %int9223372036854775807_44807 = torch.constant.int 9223372036854775807
    %int1_44808 = torch.constant.int 1
    %47424 = torch.aten.slice.Tensor %47423, %int3_44805, %int0_44806, %int9223372036854775807_44807, %int1_44808 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47424, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47425 = torch_c.to_builtin_tensor %47333 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44809 = arith.constant 1 : index
    %dim_44810 = tensor.dim %47425, %c1_44809 : tensor<4x?x4x128xf16>
    %47426 = flow.tensor.bitcast %47425 : tensor<4x?x4x128xf16>{%dim_44810} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44810}
    %47427 = torch_c.from_builtin_tensor %47426 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47428 = torch.aten.mul.Tensor %47427, %47424 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47429 = torch_c.to_builtin_tensor %47428 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44811 = arith.constant 1 : index
    %dim_44812 = tensor.dim %47429, %c1_44811 : tensor<4x?x4x64xcomplex<f32>>
    %47430 = flow.tensor.bitcast %47429 : tensor<4x?x4x64xcomplex<f32>>{%dim_44812} -> tensor<4x?x4x128xf32>{%dim_44812}
    %47431 = torch_c.from_builtin_tensor %47430 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44813 = torch.constant.int 5
    %47432 = torch.prims.convert_element_type %47431, %int5_44813 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44814 = torch.constant.int 1
    %47433 = torch.aten.size.int %47167, %int1_44814 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44815 = torch.constant.int 0
    %47434 = torch.aten.add.int %int0_44815, %47433 : !torch.int, !torch.int -> !torch.int
    %int0_44816 = torch.constant.int 0
    %int0_44817 = torch.constant.int 0
    %int1_44818 = torch.constant.int 1
    %47435 = torch.aten.slice.Tensor %47399, %int0_44816, %int0_44817, %47434, %int1_44818 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47435, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44819 = torch.constant.int 1
    %int0_44820 = torch.constant.int 0
    %int9223372036854775807_44821 = torch.constant.int 9223372036854775807
    %int1_44822 = torch.constant.int 1
    %47436 = torch.aten.slice.Tensor %47435, %int1_44819, %int0_44820, %int9223372036854775807_44821, %int1_44822 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47436, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44823 = torch.constant.int 0
    %47437 = torch.aten.unsqueeze %47436, %int0_44823 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47437, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44824 = torch.constant.int 2
    %47438 = torch.aten.unsqueeze %47437, %int2_44824 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47438, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44825 = torch.constant.int 3
    %int0_44826 = torch.constant.int 0
    %int9223372036854775807_44827 = torch.constant.int 9223372036854775807
    %int1_44828 = torch.constant.int 1
    %47439 = torch.aten.slice.Tensor %47438, %int3_44825, %int0_44826, %int9223372036854775807_44827, %int1_44828 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47439, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47440 = torch_c.to_builtin_tensor %47335 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44829 = arith.constant 1 : index
    %dim_44830 = tensor.dim %47440, %c1_44829 : tensor<4x?x4x128xf16>
    %47441 = flow.tensor.bitcast %47440 : tensor<4x?x4x128xf16>{%dim_44830} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44830}
    %47442 = torch_c.from_builtin_tensor %47441 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47443 = torch.aten.mul.Tensor %47442, %47439 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47444 = torch_c.to_builtin_tensor %47443 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44831 = arith.constant 1 : index
    %dim_44832 = tensor.dim %47444, %c1_44831 : tensor<4x?x4x64xcomplex<f32>>
    %47445 = flow.tensor.bitcast %47444 : tensor<4x?x4x64xcomplex<f32>>{%dim_44832} -> tensor<4x?x4x128xf32>{%dim_44832}
    %47446 = torch_c.from_builtin_tensor %47445 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44833 = torch.constant.int 5
    %47447 = torch.prims.convert_element_type %47446, %int5_44833 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44834 = torch.constant.int 1
    %47448 = torch.aten.size.int %47173, %int1_44834 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44835 = torch.constant.int 0
    %47449 = torch.aten.add.int %int0_44835, %47448 : !torch.int, !torch.int -> !torch.int
    %int0_44836 = torch.constant.int 0
    %int0_44837 = torch.constant.int 0
    %int1_44838 = torch.constant.int 1
    %47450 = torch.aten.slice.Tensor %47402, %int0_44836, %int0_44837, %47449, %int1_44838 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47450, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44839 = torch.constant.int 1
    %int0_44840 = torch.constant.int 0
    %int9223372036854775807_44841 = torch.constant.int 9223372036854775807
    %int1_44842 = torch.constant.int 1
    %47451 = torch.aten.slice.Tensor %47450, %int1_44839, %int0_44840, %int9223372036854775807_44841, %int1_44842 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47451, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44843 = torch.constant.int 0
    %47452 = torch.aten.unsqueeze %47451, %int0_44843 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47452, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44844 = torch.constant.int 2
    %47453 = torch.aten.unsqueeze %47452, %int2_44844 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47453, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44845 = torch.constant.int 3
    %int0_44846 = torch.constant.int 0
    %int9223372036854775807_44847 = torch.constant.int 9223372036854775807
    %int1_44848 = torch.constant.int 1
    %47454 = torch.aten.slice.Tensor %47453, %int3_44845, %int0_44846, %int9223372036854775807_44847, %int1_44848 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47454, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47455 = torch_c.to_builtin_tensor %47337 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44849 = arith.constant 1 : index
    %dim_44850 = tensor.dim %47455, %c1_44849 : tensor<4x?x4x128xf16>
    %47456 = flow.tensor.bitcast %47455 : tensor<4x?x4x128xf16>{%dim_44850} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44850}
    %47457 = torch_c.from_builtin_tensor %47456 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47458 = torch.aten.mul.Tensor %47457, %47454 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47459 = torch_c.to_builtin_tensor %47458 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44851 = arith.constant 1 : index
    %dim_44852 = tensor.dim %47459, %c1_44851 : tensor<4x?x4x64xcomplex<f32>>
    %47460 = flow.tensor.bitcast %47459 : tensor<4x?x4x64xcomplex<f32>>{%dim_44852} -> tensor<4x?x4x128xf32>{%dim_44852}
    %47461 = torch_c.from_builtin_tensor %47460 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44853 = torch.constant.int 5
    %47462 = torch.prims.convert_element_type %47461, %int5_44853 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44854 = torch.constant.int 1
    %47463 = torch.aten.size.int %47179, %int1_44854 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44855 = torch.constant.int 0
    %47464 = torch.aten.add.int %int0_44855, %47463 : !torch.int, !torch.int -> !torch.int
    %int0_44856 = torch.constant.int 0
    %int0_44857 = torch.constant.int 0
    %int1_44858 = torch.constant.int 1
    %47465 = torch.aten.slice.Tensor %47405, %int0_44856, %int0_44857, %47464, %int1_44858 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47465, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44859 = torch.constant.int 1
    %int0_44860 = torch.constant.int 0
    %int9223372036854775807_44861 = torch.constant.int 9223372036854775807
    %int1_44862 = torch.constant.int 1
    %47466 = torch.aten.slice.Tensor %47465, %int1_44859, %int0_44860, %int9223372036854775807_44861, %int1_44862 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47466, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44863 = torch.constant.int 0
    %47467 = torch.aten.unsqueeze %47466, %int0_44863 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47467, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44864 = torch.constant.int 2
    %47468 = torch.aten.unsqueeze %47467, %int2_44864 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47468, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44865 = torch.constant.int 3
    %int0_44866 = torch.constant.int 0
    %int9223372036854775807_44867 = torch.constant.int 9223372036854775807
    %int1_44868 = torch.constant.int 1
    %47469 = torch.aten.slice.Tensor %47468, %int3_44865, %int0_44866, %int9223372036854775807_44867, %int1_44868 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47469, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47470 = torch_c.to_builtin_tensor %47339 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44869 = arith.constant 1 : index
    %dim_44870 = tensor.dim %47470, %c1_44869 : tensor<4x?x4x128xf16>
    %47471 = flow.tensor.bitcast %47470 : tensor<4x?x4x128xf16>{%dim_44870} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44870}
    %47472 = torch_c.from_builtin_tensor %47471 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47473 = torch.aten.mul.Tensor %47472, %47469 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47474 = torch_c.to_builtin_tensor %47473 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44871 = arith.constant 1 : index
    %dim_44872 = tensor.dim %47474, %c1_44871 : tensor<4x?x4x64xcomplex<f32>>
    %47475 = flow.tensor.bitcast %47474 : tensor<4x?x4x64xcomplex<f32>>{%dim_44872} -> tensor<4x?x4x128xf32>{%dim_44872}
    %47476 = torch_c.from_builtin_tensor %47475 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44873 = torch.constant.int 5
    %47477 = torch.prims.convert_element_type %47476, %int5_44873 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44874 = torch.constant.int 1
    %47478 = torch.aten.size.int %47185, %int1_44874 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44875 = torch.constant.int 0
    %47479 = torch.aten.add.int %int0_44875, %47478 : !torch.int, !torch.int -> !torch.int
    %int0_44876 = torch.constant.int 0
    %int0_44877 = torch.constant.int 0
    %int1_44878 = torch.constant.int 1
    %47480 = torch.aten.slice.Tensor %47408, %int0_44876, %int0_44877, %47479, %int1_44878 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47480, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44879 = torch.constant.int 1
    %int0_44880 = torch.constant.int 0
    %int9223372036854775807_44881 = torch.constant.int 9223372036854775807
    %int1_44882 = torch.constant.int 1
    %47481 = torch.aten.slice.Tensor %47480, %int1_44879, %int0_44880, %int9223372036854775807_44881, %int1_44882 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47481, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44883 = torch.constant.int 0
    %47482 = torch.aten.unsqueeze %47481, %int0_44883 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47482, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44884 = torch.constant.int 2
    %47483 = torch.aten.unsqueeze %47482, %int2_44884 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47483, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44885 = torch.constant.int 3
    %int0_44886 = torch.constant.int 0
    %int9223372036854775807_44887 = torch.constant.int 9223372036854775807
    %int1_44888 = torch.constant.int 1
    %47484 = torch.aten.slice.Tensor %47483, %int3_44885, %int0_44886, %int9223372036854775807_44887, %int1_44888 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47484, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47485 = torch_c.to_builtin_tensor %47341 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44889 = arith.constant 1 : index
    %dim_44890 = tensor.dim %47485, %c1_44889 : tensor<4x?x4x128xf16>
    %47486 = flow.tensor.bitcast %47485 : tensor<4x?x4x128xf16>{%dim_44890} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44890}
    %47487 = torch_c.from_builtin_tensor %47486 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47488 = torch.aten.mul.Tensor %47487, %47484 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47489 = torch_c.to_builtin_tensor %47488 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44891 = arith.constant 1 : index
    %dim_44892 = tensor.dim %47489, %c1_44891 : tensor<4x?x4x64xcomplex<f32>>
    %47490 = flow.tensor.bitcast %47489 : tensor<4x?x4x64xcomplex<f32>>{%dim_44892} -> tensor<4x?x4x128xf32>{%dim_44892}
    %47491 = torch_c.from_builtin_tensor %47490 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44893 = torch.constant.int 5
    %47492 = torch.prims.convert_element_type %47491, %int5_44893 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44894 = torch.constant.int 1
    %47493 = torch.aten.size.int %47191, %int1_44894 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44895 = torch.constant.int 0
    %47494 = torch.aten.add.int %int0_44895, %47493 : !torch.int, !torch.int -> !torch.int
    %int0_44896 = torch.constant.int 0
    %int0_44897 = torch.constant.int 0
    %int1_44898 = torch.constant.int 1
    %47495 = torch.aten.slice.Tensor %47411, %int0_44896, %int0_44897, %47494, %int1_44898 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47495, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44899 = torch.constant.int 1
    %int0_44900 = torch.constant.int 0
    %int9223372036854775807_44901 = torch.constant.int 9223372036854775807
    %int1_44902 = torch.constant.int 1
    %47496 = torch.aten.slice.Tensor %47495, %int1_44899, %int0_44900, %int9223372036854775807_44901, %int1_44902 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47496, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44903 = torch.constant.int 0
    %47497 = torch.aten.unsqueeze %47496, %int0_44903 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47497, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44904 = torch.constant.int 2
    %47498 = torch.aten.unsqueeze %47497, %int2_44904 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47498, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44905 = torch.constant.int 3
    %int0_44906 = torch.constant.int 0
    %int9223372036854775807_44907 = torch.constant.int 9223372036854775807
    %int1_44908 = torch.constant.int 1
    %47499 = torch.aten.slice.Tensor %47498, %int3_44905, %int0_44906, %int9223372036854775807_44907, %int1_44908 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47499, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47500 = torch_c.to_builtin_tensor %47343 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44909 = arith.constant 1 : index
    %dim_44910 = tensor.dim %47500, %c1_44909 : tensor<4x?x4x128xf16>
    %47501 = flow.tensor.bitcast %47500 : tensor<4x?x4x128xf16>{%dim_44910} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44910}
    %47502 = torch_c.from_builtin_tensor %47501 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47503 = torch.aten.mul.Tensor %47502, %47499 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47504 = torch_c.to_builtin_tensor %47503 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44911 = arith.constant 1 : index
    %dim_44912 = tensor.dim %47504, %c1_44911 : tensor<4x?x4x64xcomplex<f32>>
    %47505 = flow.tensor.bitcast %47504 : tensor<4x?x4x64xcomplex<f32>>{%dim_44912} -> tensor<4x?x4x128xf32>{%dim_44912}
    %47506 = torch_c.from_builtin_tensor %47505 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44913 = torch.constant.int 5
    %47507 = torch.prims.convert_element_type %47506, %int5_44913 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44914 = torch.constant.int 1
    %47508 = torch.aten.size.int %47197, %int1_44914 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44915 = torch.constant.int 0
    %47509 = torch.aten.add.int %int0_44915, %47508 : !torch.int, !torch.int -> !torch.int
    %int0_44916 = torch.constant.int 0
    %int0_44917 = torch.constant.int 0
    %int1_44918 = torch.constant.int 1
    %47510 = torch.aten.slice.Tensor %47414, %int0_44916, %int0_44917, %47509, %int1_44918 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47510, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44919 = torch.constant.int 1
    %int0_44920 = torch.constant.int 0
    %int9223372036854775807_44921 = torch.constant.int 9223372036854775807
    %int1_44922 = torch.constant.int 1
    %47511 = torch.aten.slice.Tensor %47510, %int1_44919, %int0_44920, %int9223372036854775807_44921, %int1_44922 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47511, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44923 = torch.constant.int 0
    %47512 = torch.aten.unsqueeze %47511, %int0_44923 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47512, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44924 = torch.constant.int 2
    %47513 = torch.aten.unsqueeze %47512, %int2_44924 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47513, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44925 = torch.constant.int 3
    %int0_44926 = torch.constant.int 0
    %int9223372036854775807_44927 = torch.constant.int 9223372036854775807
    %int1_44928 = torch.constant.int 1
    %47514 = torch.aten.slice.Tensor %47513, %int3_44925, %int0_44926, %int9223372036854775807_44927, %int1_44928 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47514, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47515 = torch_c.to_builtin_tensor %47345 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44929 = arith.constant 1 : index
    %dim_44930 = tensor.dim %47515, %c1_44929 : tensor<4x?x4x128xf16>
    %47516 = flow.tensor.bitcast %47515 : tensor<4x?x4x128xf16>{%dim_44930} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44930}
    %47517 = torch_c.from_builtin_tensor %47516 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47518 = torch.aten.mul.Tensor %47517, %47514 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47519 = torch_c.to_builtin_tensor %47518 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44931 = arith.constant 1 : index
    %dim_44932 = tensor.dim %47519, %c1_44931 : tensor<4x?x4x64xcomplex<f32>>
    %47520 = flow.tensor.bitcast %47519 : tensor<4x?x4x64xcomplex<f32>>{%dim_44932} -> tensor<4x?x4x128xf32>{%dim_44932}
    %47521 = torch_c.from_builtin_tensor %47520 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44933 = torch.constant.int 5
    %47522 = torch.prims.convert_element_type %47521, %int5_44933 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_44934 = torch.constant.int 1
    %47523 = torch.aten.size.int %47203, %int1_44934 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_44935 = torch.constant.int 0
    %47524 = torch.aten.add.int %int0_44935, %47523 : !torch.int, !torch.int -> !torch.int
    %int0_44936 = torch.constant.int 0
    %int0_44937 = torch.constant.int 0
    %int1_44938 = torch.constant.int 1
    %47525 = torch.aten.slice.Tensor %47417, %int0_44936, %int0_44937, %47524, %int1_44938 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47525, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44939 = torch.constant.int 1
    %int0_44940 = torch.constant.int 0
    %int9223372036854775807_44941 = torch.constant.int 9223372036854775807
    %int1_44942 = torch.constant.int 1
    %47526 = torch.aten.slice.Tensor %47525, %int1_44939, %int0_44940, %int9223372036854775807_44941, %int1_44942 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47526, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44943 = torch.constant.int 0
    %47527 = torch.aten.unsqueeze %47526, %int0_44943 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47527, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44944 = torch.constant.int 2
    %47528 = torch.aten.unsqueeze %47527, %int2_44944 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47528, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44945 = torch.constant.int 3
    %int0_44946 = torch.constant.int 0
    %int9223372036854775807_44947 = torch.constant.int 9223372036854775807
    %int1_44948 = torch.constant.int 1
    %47529 = torch.aten.slice.Tensor %47528, %int3_44945, %int0_44946, %int9223372036854775807_44947, %int1_44948 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47529, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47530 = torch_c.to_builtin_tensor %47347 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_44949 = arith.constant 1 : index
    %dim_44950 = tensor.dim %47530, %c1_44949 : tensor<4x?x4x128xf16>
    %47531 = flow.tensor.bitcast %47530 : tensor<4x?x4x128xf16>{%dim_44950} -> tensor<4x?x4x64xcomplex<f16>>{%dim_44950}
    %47532 = torch_c.from_builtin_tensor %47531 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %47532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %47533 = torch.aten.mul.Tensor %47532, %47529 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %47533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %47534 = torch_c.to_builtin_tensor %47533 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_44951 = arith.constant 1 : index
    %dim_44952 = tensor.dim %47534, %c1_44951 : tensor<4x?x4x64xcomplex<f32>>
    %47535 = flow.tensor.bitcast %47534 : tensor<4x?x4x64xcomplex<f32>>{%dim_44952} -> tensor<4x?x4x128xf32>{%dim_44952}
    %47536 = torch_c.from_builtin_tensor %47535 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %47536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_44953 = torch.constant.int 5
    %47537 = torch.prims.convert_element_type %47536, %int5_44953 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %47537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_44954 = torch.constant.int 131072
    %none_44955 = torch.constant.none
    %none_44956 = torch.constant.none
    %cpu_44957 = torch.constant.device "cpu"
    %false_44958 = torch.constant.bool false
    %47538 = torch.aten.arange %int131072_44954, %none_44955, %none_44956, %cpu_44957, %false_44958 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_44959 = torch.constant.int 0
    %int128_44960 = torch.constant.int 128
    %int2_44961 = torch.constant.int 2
    %none_44962 = torch.constant.none
    %none_44963 = torch.constant.none
    %cpu_44964 = torch.constant.device "cpu"
    %false_44965 = torch.constant.bool false
    %47539 = torch.aten.arange.start_step %int0_44959, %int128_44960, %int2_44961, %none_44962, %none_44963, %cpu_44964, %false_44965 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_44966 = torch.constant.int 0
    %int0_44967 = torch.constant.int 0
    %int64_44968 = torch.constant.int 64
    %int1_44969 = torch.constant.int 1
    %47540 = torch.aten.slice.Tensor %47539, %int0_44966, %int0_44967, %int64_44968, %int1_44969 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_44970 = torch.constant.int 6
    %47541 = torch.prims.convert_element_type %47540, %int6_44970 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_44971 = torch.constant.int 128
    %47542 = torch.aten.div.Scalar %47541, %int128_44971 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_44972 = torch.constant.float 5.000000e+05
    %47543 = torch.aten.pow.Scalar %float5.000000e05_44972, %47542 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %47544 = torch.aten.reciprocal %47543 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_44973 = torch.constant.float 1.000000e+00
    %47545 = torch.aten.mul.Scalar %47544, %float1.000000e00_44973 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_44974 = torch.constant.int 131072
    %int1_44975 = torch.constant.int 1
    %47546 = torch.prim.ListConstruct %int131072_44974, %int1_44975 : (!torch.int, !torch.int) -> !torch.list<int>
    %47547 = torch.aten.view %47538, %47546 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %47548 = torch.aten.mul.Tensor %47547, %47545 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %47549 = torch.aten.cos %47548 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %47550 = torch.aten.sin %47548 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %47551 = torch.aten.complex %47549, %47550 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %47552 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47553 = flow.tensor.transfer %47552 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %47554 = torch_c.from_builtin_tensor %47553 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47555 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47556 = flow.tensor.transfer %47555 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %47557 = torch_c.from_builtin_tensor %47556 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47558 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47559 = flow.tensor.transfer %47558 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %47560 = torch_c.from_builtin_tensor %47559 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47561 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47562 = flow.tensor.transfer %47561 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %47563 = torch_c.from_builtin_tensor %47562 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47564 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47565 = flow.tensor.transfer %47564 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %47566 = torch_c.from_builtin_tensor %47565 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47567 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47568 = flow.tensor.transfer %47567 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %47569 = torch_c.from_builtin_tensor %47568 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47570 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47571 = flow.tensor.transfer %47570 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %47572 = torch_c.from_builtin_tensor %47571 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %47573 = torch_c.to_builtin_tensor %47551 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %47574 = flow.tensor.transfer %47573 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %47575 = torch_c.from_builtin_tensor %47574 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_44976 = torch.constant.int 1
    %47576 = torch.aten.size.int %47225, %int1_44976 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_44977 = torch.constant.int 0
    %47577 = torch.aten.add.int %int0_44977, %47576 : !torch.int, !torch.int -> !torch.int
    %int0_44978 = torch.constant.int 0
    %int0_44979 = torch.constant.int 0
    %int1_44980 = torch.constant.int 1
    %47578 = torch.aten.slice.Tensor %47554, %int0_44978, %int0_44979, %47577, %int1_44980 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47578, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_44981 = torch.constant.int 1
    %int0_44982 = torch.constant.int 0
    %int9223372036854775807_44983 = torch.constant.int 9223372036854775807
    %int1_44984 = torch.constant.int 1
    %47579 = torch.aten.slice.Tensor %47578, %int1_44981, %int0_44982, %int9223372036854775807_44983, %int1_44984 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47579, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_44985 = torch.constant.int 0
    %47580 = torch.aten.unsqueeze %47579, %int0_44985 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47580, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_44986 = torch.constant.int 2
    %47581 = torch.aten.unsqueeze %47580, %int2_44986 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47581, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_44987 = torch.constant.int 3
    %int0_44988 = torch.constant.int 0
    %int9223372036854775807_44989 = torch.constant.int 9223372036854775807
    %int1_44990 = torch.constant.int 1
    %47582 = torch.aten.slice.Tensor %47581, %int3_44987, %int0_44988, %int9223372036854775807_44989, %int1_44990 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47582, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47583 = torch_c.to_builtin_tensor %47349 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_44991 = arith.constant 1 : index
    %dim_44992 = tensor.dim %47583, %c1_44991 : tensor<4x?x1x128xf16>
    %47584 = flow.tensor.bitcast %47583 : tensor<4x?x1x128xf16>{%dim_44992} -> tensor<4x?x1x64xcomplex<f16>>{%dim_44992}
    %47585 = torch_c.from_builtin_tensor %47584 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47586 = torch.aten.mul.Tensor %47585, %47582 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47587 = torch_c.to_builtin_tensor %47586 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_44993 = arith.constant 1 : index
    %dim_44994 = tensor.dim %47587, %c1_44993 : tensor<4x?x1x64xcomplex<f32>>
    %47588 = flow.tensor.bitcast %47587 : tensor<4x?x1x64xcomplex<f32>>{%dim_44994} -> tensor<4x?x1x128xf32>{%dim_44994}
    %47589 = torch_c.from_builtin_tensor %47588 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_44995 = torch.constant.int 5
    %47590 = torch.prims.convert_element_type %47589, %int5_44995 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_44996 = torch.constant.int 1
    %47591 = torch.aten.size.int %47231, %int1_44996 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_44997 = torch.constant.int 0
    %47592 = torch.aten.add.int %int0_44997, %47591 : !torch.int, !torch.int -> !torch.int
    %int0_44998 = torch.constant.int 0
    %int0_44999 = torch.constant.int 0
    %int1_45000 = torch.constant.int 1
    %47593 = torch.aten.slice.Tensor %47557, %int0_44998, %int0_44999, %47592, %int1_45000 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47593, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45001 = torch.constant.int 1
    %int0_45002 = torch.constant.int 0
    %int9223372036854775807_45003 = torch.constant.int 9223372036854775807
    %int1_45004 = torch.constant.int 1
    %47594 = torch.aten.slice.Tensor %47593, %int1_45001, %int0_45002, %int9223372036854775807_45003, %int1_45004 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47594, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45005 = torch.constant.int 0
    %47595 = torch.aten.unsqueeze %47594, %int0_45005 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47595, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45006 = torch.constant.int 2
    %47596 = torch.aten.unsqueeze %47595, %int2_45006 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47596, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45007 = torch.constant.int 3
    %int0_45008 = torch.constant.int 0
    %int9223372036854775807_45009 = torch.constant.int 9223372036854775807
    %int1_45010 = torch.constant.int 1
    %47597 = torch.aten.slice.Tensor %47596, %int3_45007, %int0_45008, %int9223372036854775807_45009, %int1_45010 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47597, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47598 = torch_c.to_builtin_tensor %47351 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45011 = arith.constant 1 : index
    %dim_45012 = tensor.dim %47598, %c1_45011 : tensor<4x?x1x128xf16>
    %47599 = flow.tensor.bitcast %47598 : tensor<4x?x1x128xf16>{%dim_45012} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45012}
    %47600 = torch_c.from_builtin_tensor %47599 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47601 = torch.aten.mul.Tensor %47600, %47597 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47602 = torch_c.to_builtin_tensor %47601 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45013 = arith.constant 1 : index
    %dim_45014 = tensor.dim %47602, %c1_45013 : tensor<4x?x1x64xcomplex<f32>>
    %47603 = flow.tensor.bitcast %47602 : tensor<4x?x1x64xcomplex<f32>>{%dim_45014} -> tensor<4x?x1x128xf32>{%dim_45014}
    %47604 = torch_c.from_builtin_tensor %47603 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45015 = torch.constant.int 5
    %47605 = torch.prims.convert_element_type %47604, %int5_45015 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45016 = torch.constant.int 1
    %47606 = torch.aten.size.int %47237, %int1_45016 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45017 = torch.constant.int 0
    %47607 = torch.aten.add.int %int0_45017, %47606 : !torch.int, !torch.int -> !torch.int
    %int0_45018 = torch.constant.int 0
    %int0_45019 = torch.constant.int 0
    %int1_45020 = torch.constant.int 1
    %47608 = torch.aten.slice.Tensor %47560, %int0_45018, %int0_45019, %47607, %int1_45020 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47608, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45021 = torch.constant.int 1
    %int0_45022 = torch.constant.int 0
    %int9223372036854775807_45023 = torch.constant.int 9223372036854775807
    %int1_45024 = torch.constant.int 1
    %47609 = torch.aten.slice.Tensor %47608, %int1_45021, %int0_45022, %int9223372036854775807_45023, %int1_45024 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47609, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45025 = torch.constant.int 0
    %47610 = torch.aten.unsqueeze %47609, %int0_45025 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47610, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45026 = torch.constant.int 2
    %47611 = torch.aten.unsqueeze %47610, %int2_45026 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47611, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45027 = torch.constant.int 3
    %int0_45028 = torch.constant.int 0
    %int9223372036854775807_45029 = torch.constant.int 9223372036854775807
    %int1_45030 = torch.constant.int 1
    %47612 = torch.aten.slice.Tensor %47611, %int3_45027, %int0_45028, %int9223372036854775807_45029, %int1_45030 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47612, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47613 = torch_c.to_builtin_tensor %47353 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45031 = arith.constant 1 : index
    %dim_45032 = tensor.dim %47613, %c1_45031 : tensor<4x?x1x128xf16>
    %47614 = flow.tensor.bitcast %47613 : tensor<4x?x1x128xf16>{%dim_45032} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45032}
    %47615 = torch_c.from_builtin_tensor %47614 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47616 = torch.aten.mul.Tensor %47615, %47612 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47617 = torch_c.to_builtin_tensor %47616 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45033 = arith.constant 1 : index
    %dim_45034 = tensor.dim %47617, %c1_45033 : tensor<4x?x1x64xcomplex<f32>>
    %47618 = flow.tensor.bitcast %47617 : tensor<4x?x1x64xcomplex<f32>>{%dim_45034} -> tensor<4x?x1x128xf32>{%dim_45034}
    %47619 = torch_c.from_builtin_tensor %47618 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45035 = torch.constant.int 5
    %47620 = torch.prims.convert_element_type %47619, %int5_45035 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45036 = torch.constant.int 1
    %47621 = torch.aten.size.int %47243, %int1_45036 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45037 = torch.constant.int 0
    %47622 = torch.aten.add.int %int0_45037, %47621 : !torch.int, !torch.int -> !torch.int
    %int0_45038 = torch.constant.int 0
    %int0_45039 = torch.constant.int 0
    %int1_45040 = torch.constant.int 1
    %47623 = torch.aten.slice.Tensor %47563, %int0_45038, %int0_45039, %47622, %int1_45040 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47623, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45041 = torch.constant.int 1
    %int0_45042 = torch.constant.int 0
    %int9223372036854775807_45043 = torch.constant.int 9223372036854775807
    %int1_45044 = torch.constant.int 1
    %47624 = torch.aten.slice.Tensor %47623, %int1_45041, %int0_45042, %int9223372036854775807_45043, %int1_45044 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47624, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45045 = torch.constant.int 0
    %47625 = torch.aten.unsqueeze %47624, %int0_45045 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47625, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45046 = torch.constant.int 2
    %47626 = torch.aten.unsqueeze %47625, %int2_45046 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47626, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45047 = torch.constant.int 3
    %int0_45048 = torch.constant.int 0
    %int9223372036854775807_45049 = torch.constant.int 9223372036854775807
    %int1_45050 = torch.constant.int 1
    %47627 = torch.aten.slice.Tensor %47626, %int3_45047, %int0_45048, %int9223372036854775807_45049, %int1_45050 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47627, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47628 = torch_c.to_builtin_tensor %47355 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45051 = arith.constant 1 : index
    %dim_45052 = tensor.dim %47628, %c1_45051 : tensor<4x?x1x128xf16>
    %47629 = flow.tensor.bitcast %47628 : tensor<4x?x1x128xf16>{%dim_45052} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45052}
    %47630 = torch_c.from_builtin_tensor %47629 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47631 = torch.aten.mul.Tensor %47630, %47627 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47632 = torch_c.to_builtin_tensor %47631 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45053 = arith.constant 1 : index
    %dim_45054 = tensor.dim %47632, %c1_45053 : tensor<4x?x1x64xcomplex<f32>>
    %47633 = flow.tensor.bitcast %47632 : tensor<4x?x1x64xcomplex<f32>>{%dim_45054} -> tensor<4x?x1x128xf32>{%dim_45054}
    %47634 = torch_c.from_builtin_tensor %47633 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45055 = torch.constant.int 5
    %47635 = torch.prims.convert_element_type %47634, %int5_45055 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45056 = torch.constant.int 1
    %47636 = torch.aten.size.int %47249, %int1_45056 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45057 = torch.constant.int 0
    %47637 = torch.aten.add.int %int0_45057, %47636 : !torch.int, !torch.int -> !torch.int
    %int0_45058 = torch.constant.int 0
    %int0_45059 = torch.constant.int 0
    %int1_45060 = torch.constant.int 1
    %47638 = torch.aten.slice.Tensor %47566, %int0_45058, %int0_45059, %47637, %int1_45060 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47638, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45061 = torch.constant.int 1
    %int0_45062 = torch.constant.int 0
    %int9223372036854775807_45063 = torch.constant.int 9223372036854775807
    %int1_45064 = torch.constant.int 1
    %47639 = torch.aten.slice.Tensor %47638, %int1_45061, %int0_45062, %int9223372036854775807_45063, %int1_45064 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47639, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45065 = torch.constant.int 0
    %47640 = torch.aten.unsqueeze %47639, %int0_45065 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47640, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45066 = torch.constant.int 2
    %47641 = torch.aten.unsqueeze %47640, %int2_45066 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47641, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45067 = torch.constant.int 3
    %int0_45068 = torch.constant.int 0
    %int9223372036854775807_45069 = torch.constant.int 9223372036854775807
    %int1_45070 = torch.constant.int 1
    %47642 = torch.aten.slice.Tensor %47641, %int3_45067, %int0_45068, %int9223372036854775807_45069, %int1_45070 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47642, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47643 = torch_c.to_builtin_tensor %47357 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45071 = arith.constant 1 : index
    %dim_45072 = tensor.dim %47643, %c1_45071 : tensor<4x?x1x128xf16>
    %47644 = flow.tensor.bitcast %47643 : tensor<4x?x1x128xf16>{%dim_45072} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45072}
    %47645 = torch_c.from_builtin_tensor %47644 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47646 = torch.aten.mul.Tensor %47645, %47642 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47647 = torch_c.to_builtin_tensor %47646 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45073 = arith.constant 1 : index
    %dim_45074 = tensor.dim %47647, %c1_45073 : tensor<4x?x1x64xcomplex<f32>>
    %47648 = flow.tensor.bitcast %47647 : tensor<4x?x1x64xcomplex<f32>>{%dim_45074} -> tensor<4x?x1x128xf32>{%dim_45074}
    %47649 = torch_c.from_builtin_tensor %47648 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45075 = torch.constant.int 5
    %47650 = torch.prims.convert_element_type %47649, %int5_45075 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45076 = torch.constant.int 1
    %47651 = torch.aten.size.int %47255, %int1_45076 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45077 = torch.constant.int 0
    %47652 = torch.aten.add.int %int0_45077, %47651 : !torch.int, !torch.int -> !torch.int
    %int0_45078 = torch.constant.int 0
    %int0_45079 = torch.constant.int 0
    %int1_45080 = torch.constant.int 1
    %47653 = torch.aten.slice.Tensor %47569, %int0_45078, %int0_45079, %47652, %int1_45080 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47653, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45081 = torch.constant.int 1
    %int0_45082 = torch.constant.int 0
    %int9223372036854775807_45083 = torch.constant.int 9223372036854775807
    %int1_45084 = torch.constant.int 1
    %47654 = torch.aten.slice.Tensor %47653, %int1_45081, %int0_45082, %int9223372036854775807_45083, %int1_45084 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47654, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45085 = torch.constant.int 0
    %47655 = torch.aten.unsqueeze %47654, %int0_45085 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47655, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45086 = torch.constant.int 2
    %47656 = torch.aten.unsqueeze %47655, %int2_45086 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47656, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45087 = torch.constant.int 3
    %int0_45088 = torch.constant.int 0
    %int9223372036854775807_45089 = torch.constant.int 9223372036854775807
    %int1_45090 = torch.constant.int 1
    %47657 = torch.aten.slice.Tensor %47656, %int3_45087, %int0_45088, %int9223372036854775807_45089, %int1_45090 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47657, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47658 = torch_c.to_builtin_tensor %47359 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45091 = arith.constant 1 : index
    %dim_45092 = tensor.dim %47658, %c1_45091 : tensor<4x?x1x128xf16>
    %47659 = flow.tensor.bitcast %47658 : tensor<4x?x1x128xf16>{%dim_45092} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45092}
    %47660 = torch_c.from_builtin_tensor %47659 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47661 = torch.aten.mul.Tensor %47660, %47657 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47662 = torch_c.to_builtin_tensor %47661 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45093 = arith.constant 1 : index
    %dim_45094 = tensor.dim %47662, %c1_45093 : tensor<4x?x1x64xcomplex<f32>>
    %47663 = flow.tensor.bitcast %47662 : tensor<4x?x1x64xcomplex<f32>>{%dim_45094} -> tensor<4x?x1x128xf32>{%dim_45094}
    %47664 = torch_c.from_builtin_tensor %47663 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45095 = torch.constant.int 5
    %47665 = torch.prims.convert_element_type %47664, %int5_45095 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45096 = torch.constant.int 1
    %47666 = torch.aten.size.int %47261, %int1_45096 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45097 = torch.constant.int 0
    %47667 = torch.aten.add.int %int0_45097, %47666 : !torch.int, !torch.int -> !torch.int
    %int0_45098 = torch.constant.int 0
    %int0_45099 = torch.constant.int 0
    %int1_45100 = torch.constant.int 1
    %47668 = torch.aten.slice.Tensor %47572, %int0_45098, %int0_45099, %47667, %int1_45100 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47668, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45101 = torch.constant.int 1
    %int0_45102 = torch.constant.int 0
    %int9223372036854775807_45103 = torch.constant.int 9223372036854775807
    %int1_45104 = torch.constant.int 1
    %47669 = torch.aten.slice.Tensor %47668, %int1_45101, %int0_45102, %int9223372036854775807_45103, %int1_45104 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47669, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45105 = torch.constant.int 0
    %47670 = torch.aten.unsqueeze %47669, %int0_45105 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47670, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45106 = torch.constant.int 2
    %47671 = torch.aten.unsqueeze %47670, %int2_45106 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47671, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45107 = torch.constant.int 3
    %int0_45108 = torch.constant.int 0
    %int9223372036854775807_45109 = torch.constant.int 9223372036854775807
    %int1_45110 = torch.constant.int 1
    %47672 = torch.aten.slice.Tensor %47671, %int3_45107, %int0_45108, %int9223372036854775807_45109, %int1_45110 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47672, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47673 = torch_c.to_builtin_tensor %47361 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45111 = arith.constant 1 : index
    %dim_45112 = tensor.dim %47673, %c1_45111 : tensor<4x?x1x128xf16>
    %47674 = flow.tensor.bitcast %47673 : tensor<4x?x1x128xf16>{%dim_45112} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45112}
    %47675 = torch_c.from_builtin_tensor %47674 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47676 = torch.aten.mul.Tensor %47675, %47672 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47677 = torch_c.to_builtin_tensor %47676 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45113 = arith.constant 1 : index
    %dim_45114 = tensor.dim %47677, %c1_45113 : tensor<4x?x1x64xcomplex<f32>>
    %47678 = flow.tensor.bitcast %47677 : tensor<4x?x1x64xcomplex<f32>>{%dim_45114} -> tensor<4x?x1x128xf32>{%dim_45114}
    %47679 = torch_c.from_builtin_tensor %47678 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45115 = torch.constant.int 5
    %47680 = torch.prims.convert_element_type %47679, %int5_45115 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_45116 = torch.constant.int 1
    %47681 = torch.aten.size.int %47267, %int1_45116 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_45117 = torch.constant.int 0
    %47682 = torch.aten.add.int %int0_45117, %47681 : !torch.int, !torch.int -> !torch.int
    %int0_45118 = torch.constant.int 0
    %int0_45119 = torch.constant.int 0
    %int1_45120 = torch.constant.int 1
    %47683 = torch.aten.slice.Tensor %47575, %int0_45118, %int0_45119, %47682, %int1_45120 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47683, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_45121 = torch.constant.int 1
    %int0_45122 = torch.constant.int 0
    %int9223372036854775807_45123 = torch.constant.int 9223372036854775807
    %int1_45124 = torch.constant.int 1
    %47684 = torch.aten.slice.Tensor %47683, %int1_45121, %int0_45122, %int9223372036854775807_45123, %int1_45124 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %47684, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_45125 = torch.constant.int 0
    %47685 = torch.aten.unsqueeze %47684, %int0_45125 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %47685, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_45126 = torch.constant.int 2
    %47686 = torch.aten.unsqueeze %47685, %int2_45126 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47686, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_45127 = torch.constant.int 3
    %int0_45128 = torch.constant.int 0
    %int9223372036854775807_45129 = torch.constant.int 9223372036854775807
    %int1_45130 = torch.constant.int 1
    %47687 = torch.aten.slice.Tensor %47686, %int3_45127, %int0_45128, %int9223372036854775807_45129, %int1_45130 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47687, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %47688 = torch_c.to_builtin_tensor %47363 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_45131 = arith.constant 1 : index
    %dim_45132 = tensor.dim %47688, %c1_45131 : tensor<4x?x1x128xf16>
    %47689 = flow.tensor.bitcast %47688 : tensor<4x?x1x128xf16>{%dim_45132} -> tensor<4x?x1x64xcomplex<f16>>{%dim_45132}
    %47690 = torch_c.from_builtin_tensor %47689 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %47690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %47691 = torch.aten.mul.Tensor %47690, %47687 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %47691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %47692 = torch_c.to_builtin_tensor %47691 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_45133 = arith.constant 1 : index
    %dim_45134 = tensor.dim %47692, %c1_45133 : tensor<4x?x1x64xcomplex<f32>>
    %47693 = flow.tensor.bitcast %47692 : tensor<4x?x1x64xcomplex<f32>>{%dim_45134} -> tensor<4x?x1x128xf32>{%dim_45134}
    %47694 = torch_c.from_builtin_tensor %47693 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %47694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_45135 = torch.constant.int 5
    %47695 = torch.prims.convert_element_type %47694, %int5_45135 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %47695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_45136 = torch.constant.int 64
    %47696 = torch.aten.mul.Scalar %2364, %int64_45136 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47696, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45137 = torch.constant.int 64
    %47697 = torch.aten.mul.Scalar %2367, %int64_45137 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47697, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45138 = torch.constant.int 64
    %47698 = torch.aten.mul.Scalar %2370, %int64_45138 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47698, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45139 = torch.constant.int 64
    %47699 = torch.aten.mul.Scalar %2373, %int64_45139 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47699, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45140 = torch.constant.int 64
    %47700 = torch.aten.mul.Scalar %2376, %int64_45140 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47700, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45141 = torch.constant.int 64
    %47701 = torch.aten.mul.Scalar %2379, %int64_45141 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47701, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45142 = torch.constant.int 64
    %47702 = torch.aten.mul.Scalar %2382, %int64_45142 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47702, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_45143 = torch.constant.int 64
    %47703 = torch.aten.mul.Scalar %2385, %int64_45143 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47703, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48 = torch.constant.int 48
    %int1_45144 = torch.constant.int 1
    %47704 = torch.aten.add.Scalar %47696, %int48, %int1_45144 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47704, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45145 = torch.constant.int 48
    %int1_45146 = torch.constant.int 1
    %47705 = torch.aten.add.Scalar %47697, %int48_45145, %int1_45146 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47705, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45147 = torch.constant.int 48
    %int1_45148 = torch.constant.int 1
    %47706 = torch.aten.add.Scalar %47698, %int48_45147, %int1_45148 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47706, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45149 = torch.constant.int 48
    %int1_45150 = torch.constant.int 1
    %47707 = torch.aten.add.Scalar %47699, %int48_45149, %int1_45150 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47707, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45151 = torch.constant.int 48
    %int1_45152 = torch.constant.int 1
    %47708 = torch.aten.add.Scalar %47700, %int48_45151, %int1_45152 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47708, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45153 = torch.constant.int 48
    %int1_45154 = torch.constant.int 1
    %47709 = torch.aten.add.Scalar %47701, %int48_45153, %int1_45154 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47709, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45155 = torch.constant.int 48
    %int1_45156 = torch.constant.int 1
    %47710 = torch.aten.add.Scalar %47702, %int48_45155, %int1_45156 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47710, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int48_45157 = torch.constant.int 48
    %int1_45158 = torch.constant.int 1
    %47711 = torch.aten.add.Scalar %47703, %int48_45157, %int1_45158 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47711, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_45159 = torch.constant.int 4
    %int16_45160 = torch.constant.int 16
    %int1_45161 = torch.constant.int 1
    %int128_45162 = torch.constant.int 128
    %47712 = torch.prim.ListConstruct %int4_45159, %3095, %int16_45160, %int1_45161, %int128_45162 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47713 = torch.aten.view %47590, %47712 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47713, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45163 = torch.constant.int 4
    %int16_45164 = torch.constant.int 16
    %int1_45165 = torch.constant.int 1
    %int128_45166 = torch.constant.int 128
    %47714 = torch.prim.ListConstruct %int4_45163, %3095, %int16_45164, %int1_45165, %int128_45166 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47715 = torch.aten.view %47605, %47714 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47715, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45167 = torch.constant.int 4
    %int16_45168 = torch.constant.int 16
    %int1_45169 = torch.constant.int 1
    %int128_45170 = torch.constant.int 128
    %47716 = torch.prim.ListConstruct %int4_45167, %3095, %int16_45168, %int1_45169, %int128_45170 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47717 = torch.aten.view %47620, %47716 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47717, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45171 = torch.constant.int 4
    %int16_45172 = torch.constant.int 16
    %int1_45173 = torch.constant.int 1
    %int128_45174 = torch.constant.int 128
    %47718 = torch.prim.ListConstruct %int4_45171, %3095, %int16_45172, %int1_45173, %int128_45174 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47719 = torch.aten.view %47635, %47718 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47719, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45175 = torch.constant.int 4
    %int16_45176 = torch.constant.int 16
    %int1_45177 = torch.constant.int 1
    %int128_45178 = torch.constant.int 128
    %47720 = torch.prim.ListConstruct %int4_45175, %3095, %int16_45176, %int1_45177, %int128_45178 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47721 = torch.aten.view %47650, %47720 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47721, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45179 = torch.constant.int 4
    %int16_45180 = torch.constant.int 16
    %int1_45181 = torch.constant.int 1
    %int128_45182 = torch.constant.int 128
    %47722 = torch.prim.ListConstruct %int4_45179, %3095, %int16_45180, %int1_45181, %int128_45182 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47723 = torch.aten.view %47665, %47722 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47723, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45183 = torch.constant.int 4
    %int16_45184 = torch.constant.int 16
    %int1_45185 = torch.constant.int 1
    %int128_45186 = torch.constant.int 128
    %47724 = torch.prim.ListConstruct %int4_45183, %3095, %int16_45184, %int1_45185, %int128_45186 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47725 = torch.aten.view %47680, %47724 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47725, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45187 = torch.constant.int 4
    %int16_45188 = torch.constant.int 16
    %int1_45189 = torch.constant.int 1
    %int128_45190 = torch.constant.int 128
    %47726 = torch.prim.ListConstruct %int4_45187, %3095, %int16_45188, %int1_45189, %int128_45190 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47727 = torch.aten.view %47695, %47726 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47727, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45191 = torch.constant.int 4
    %47728 = torch.aten.mul.int %int4_45191, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45192 = torch.constant.int 16
    %int1_45193 = torch.constant.int 1
    %int128_45194 = torch.constant.int 128
    %47729 = torch.prim.ListConstruct %47728, %int16_45192, %int1_45193, %int128_45194 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47730 = torch.aten.view %47713, %47729 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47730, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45195 = torch.constant.int 4
    %47731 = torch.aten.mul.int %int4_45195, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45196 = torch.constant.int 16
    %int1_45197 = torch.constant.int 1
    %int128_45198 = torch.constant.int 128
    %47732 = torch.prim.ListConstruct %47731, %int16_45196, %int1_45197, %int128_45198 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47733 = torch.aten.view %47715, %47732 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47733, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45199 = torch.constant.int 4
    %47734 = torch.aten.mul.int %int4_45199, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45200 = torch.constant.int 16
    %int1_45201 = torch.constant.int 1
    %int128_45202 = torch.constant.int 128
    %47735 = torch.prim.ListConstruct %47734, %int16_45200, %int1_45201, %int128_45202 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47736 = torch.aten.view %47717, %47735 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47736, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45203 = torch.constant.int 4
    %47737 = torch.aten.mul.int %int4_45203, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45204 = torch.constant.int 16
    %int1_45205 = torch.constant.int 1
    %int128_45206 = torch.constant.int 128
    %47738 = torch.prim.ListConstruct %47737, %int16_45204, %int1_45205, %int128_45206 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47739 = torch.aten.view %47719, %47738 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47739, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45207 = torch.constant.int 4
    %47740 = torch.aten.mul.int %int4_45207, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45208 = torch.constant.int 16
    %int1_45209 = torch.constant.int 1
    %int128_45210 = torch.constant.int 128
    %47741 = torch.prim.ListConstruct %47740, %int16_45208, %int1_45209, %int128_45210 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47742 = torch.aten.view %47721, %47741 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47742, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45211 = torch.constant.int 4
    %47743 = torch.aten.mul.int %int4_45211, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45212 = torch.constant.int 16
    %int1_45213 = torch.constant.int 1
    %int128_45214 = torch.constant.int 128
    %47744 = torch.prim.ListConstruct %47743, %int16_45212, %int1_45213, %int128_45214 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47745 = torch.aten.view %47723, %47744 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47745, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45215 = torch.constant.int 4
    %47746 = torch.aten.mul.int %int4_45215, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45216 = torch.constant.int 16
    %int1_45217 = torch.constant.int 1
    %int128_45218 = torch.constant.int 128
    %47747 = torch.prim.ListConstruct %47746, %int16_45216, %int1_45217, %int128_45218 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47748 = torch.aten.view %47725, %47747 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47748, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45219 = torch.constant.int 4
    %47749 = torch.aten.mul.int %int4_45219, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45220 = torch.constant.int 16
    %int1_45221 = torch.constant.int 1
    %int128_45222 = torch.constant.int 128
    %47750 = torch.prim.ListConstruct %47749, %int16_45220, %int1_45221, %int128_45222 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47751 = torch.aten.view %47727, %47750 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47751, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45223 = torch.constant.int 4
    %47752 = torch.aten.mul.int %int4_45223, %3095 : !torch.int, !torch.int -> !torch.int
    %47753 = torch.prim.ListConstruct %47752 : (!torch.int) -> !torch.list<int>
    %47754 = torch.aten.view %47704, %47753 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47754, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45224 = torch.constant.int 4
    %47755 = torch.aten.mul.int %int4_45224, %3095 : !torch.int, !torch.int -> !torch.int
    %47756 = torch.prim.ListConstruct %47755 : (!torch.int) -> !torch.list<int>
    %47757 = torch.aten.view %47705, %47756 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47757, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45225 = torch.constant.int 4
    %47758 = torch.aten.mul.int %int4_45225, %3095 : !torch.int, !torch.int -> !torch.int
    %47759 = torch.prim.ListConstruct %47758 : (!torch.int) -> !torch.list<int>
    %47760 = torch.aten.view %47706, %47759 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47760, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45226 = torch.constant.int 4
    %47761 = torch.aten.mul.int %int4_45226, %3095 : !torch.int, !torch.int -> !torch.int
    %47762 = torch.prim.ListConstruct %47761 : (!torch.int) -> !torch.list<int>
    %47763 = torch.aten.view %47707, %47762 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47763, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45227 = torch.constant.int 4
    %47764 = torch.aten.mul.int %int4_45227, %3095 : !torch.int, !torch.int -> !torch.int
    %47765 = torch.prim.ListConstruct %47764 : (!torch.int) -> !torch.list<int>
    %47766 = torch.aten.view %47708, %47765 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47766, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45228 = torch.constant.int 4
    %47767 = torch.aten.mul.int %int4_45228, %3095 : !torch.int, !torch.int -> !torch.int
    %47768 = torch.prim.ListConstruct %47767 : (!torch.int) -> !torch.list<int>
    %47769 = torch.aten.view %47709, %47768 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47769, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45229 = torch.constant.int 4
    %47770 = torch.aten.mul.int %int4_45229, %3095 : !torch.int, !torch.int -> !torch.int
    %47771 = torch.prim.ListConstruct %47770 : (!torch.int) -> !torch.list<int>
    %47772 = torch.aten.view %47710, %47771 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47772, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45230 = torch.constant.int 4
    %47773 = torch.aten.mul.int %int4_45230, %3095 : !torch.int, !torch.int -> !torch.int
    %47774 = torch.prim.ListConstruct %47773 : (!torch.int) -> !torch.list<int>
    %47775 = torch.aten.view %47711, %47774 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47775, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45231 = torch.constant.int 4
    %int16_45232 = torch.constant.int 16
    %int1_45233 = torch.constant.int 1
    %int128_45234 = torch.constant.int 128
    %47776 = torch.prim.ListConstruct %int4_45231, %3095, %int16_45232, %int1_45233, %int128_45234 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47777 = torch.aten.view %47365, %47776 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47777, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45235 = torch.constant.int 4
    %int16_45236 = torch.constant.int 16
    %int1_45237 = torch.constant.int 1
    %int128_45238 = torch.constant.int 128
    %47778 = torch.prim.ListConstruct %int4_45235, %3095, %int16_45236, %int1_45237, %int128_45238 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47779 = torch.aten.view %47367, %47778 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47779, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45239 = torch.constant.int 4
    %int16_45240 = torch.constant.int 16
    %int1_45241 = torch.constant.int 1
    %int128_45242 = torch.constant.int 128
    %47780 = torch.prim.ListConstruct %int4_45239, %3095, %int16_45240, %int1_45241, %int128_45242 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47781 = torch.aten.view %47369, %47780 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47781, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45243 = torch.constant.int 4
    %int16_45244 = torch.constant.int 16
    %int1_45245 = torch.constant.int 1
    %int128_45246 = torch.constant.int 128
    %47782 = torch.prim.ListConstruct %int4_45243, %3095, %int16_45244, %int1_45245, %int128_45246 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47783 = torch.aten.view %47371, %47782 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47783, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45247 = torch.constant.int 4
    %int16_45248 = torch.constant.int 16
    %int1_45249 = torch.constant.int 1
    %int128_45250 = torch.constant.int 128
    %47784 = torch.prim.ListConstruct %int4_45247, %3095, %int16_45248, %int1_45249, %int128_45250 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47785 = torch.aten.view %47373, %47784 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47785, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45251 = torch.constant.int 4
    %int16_45252 = torch.constant.int 16
    %int1_45253 = torch.constant.int 1
    %int128_45254 = torch.constant.int 128
    %47786 = torch.prim.ListConstruct %int4_45251, %3095, %int16_45252, %int1_45253, %int128_45254 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47787 = torch.aten.view %47375, %47786 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47787, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45255 = torch.constant.int 4
    %int16_45256 = torch.constant.int 16
    %int1_45257 = torch.constant.int 1
    %int128_45258 = torch.constant.int 128
    %47788 = torch.prim.ListConstruct %int4_45255, %3095, %int16_45256, %int1_45257, %int128_45258 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47789 = torch.aten.view %47377, %47788 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47789, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45259 = torch.constant.int 4
    %int16_45260 = torch.constant.int 16
    %int1_45261 = torch.constant.int 1
    %int128_45262 = torch.constant.int 128
    %47790 = torch.prim.ListConstruct %int4_45259, %3095, %int16_45260, %int1_45261, %int128_45262 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47791 = torch.aten.view %47379, %47790 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %47791, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_45263 = torch.constant.int 4
    %47792 = torch.aten.mul.int %int4_45263, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45264 = torch.constant.int 16
    %int1_45265 = torch.constant.int 1
    %int128_45266 = torch.constant.int 128
    %47793 = torch.prim.ListConstruct %47792, %int16_45264, %int1_45265, %int128_45266 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47794 = torch.aten.view %47777, %47793 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47794, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45267 = torch.constant.int 4
    %47795 = torch.aten.mul.int %int4_45267, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45268 = torch.constant.int 16
    %int1_45269 = torch.constant.int 1
    %int128_45270 = torch.constant.int 128
    %47796 = torch.prim.ListConstruct %47795, %int16_45268, %int1_45269, %int128_45270 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47797 = torch.aten.view %47779, %47796 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47797, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45271 = torch.constant.int 4
    %47798 = torch.aten.mul.int %int4_45271, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45272 = torch.constant.int 16
    %int1_45273 = torch.constant.int 1
    %int128_45274 = torch.constant.int 128
    %47799 = torch.prim.ListConstruct %47798, %int16_45272, %int1_45273, %int128_45274 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47800 = torch.aten.view %47781, %47799 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47800, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45275 = torch.constant.int 4
    %47801 = torch.aten.mul.int %int4_45275, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45276 = torch.constant.int 16
    %int1_45277 = torch.constant.int 1
    %int128_45278 = torch.constant.int 128
    %47802 = torch.prim.ListConstruct %47801, %int16_45276, %int1_45277, %int128_45278 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47803 = torch.aten.view %47783, %47802 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47803, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45279 = torch.constant.int 4
    %47804 = torch.aten.mul.int %int4_45279, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45280 = torch.constant.int 16
    %int1_45281 = torch.constant.int 1
    %int128_45282 = torch.constant.int 128
    %47805 = torch.prim.ListConstruct %47804, %int16_45280, %int1_45281, %int128_45282 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47806 = torch.aten.view %47785, %47805 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47806, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45283 = torch.constant.int 4
    %47807 = torch.aten.mul.int %int4_45283, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45284 = torch.constant.int 16
    %int1_45285 = torch.constant.int 1
    %int128_45286 = torch.constant.int 128
    %47808 = torch.prim.ListConstruct %47807, %int16_45284, %int1_45285, %int128_45286 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47809 = torch.aten.view %47787, %47808 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47809, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45287 = torch.constant.int 4
    %47810 = torch.aten.mul.int %int4_45287, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45288 = torch.constant.int 16
    %int1_45289 = torch.constant.int 1
    %int128_45290 = torch.constant.int 128
    %47811 = torch.prim.ListConstruct %47810, %int16_45288, %int1_45289, %int128_45290 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47812 = torch.aten.view %47789, %47811 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47812, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_45291 = torch.constant.int 4
    %47813 = torch.aten.mul.int %int4_45291, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_45292 = torch.constant.int 16
    %int1_45293 = torch.constant.int 1
    %int128_45294 = torch.constant.int 128
    %47814 = torch.prim.ListConstruct %47813, %int16_45292, %int1_45293, %int128_45294 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47815 = torch.aten.view %47791, %47814 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47815, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_45295 = torch.constant.int 1
    %int1_45296 = torch.constant.int 1
    %47816 = torch.aten.add.Scalar %47704, %int1_45295, %int1_45296 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47816, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45297 = torch.constant.int 1
    %int1_45298 = torch.constant.int 1
    %47817 = torch.aten.add.Scalar %47705, %int1_45297, %int1_45298 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47817, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45299 = torch.constant.int 1
    %int1_45300 = torch.constant.int 1
    %47818 = torch.aten.add.Scalar %47706, %int1_45299, %int1_45300 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47818, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45301 = torch.constant.int 1
    %int1_45302 = torch.constant.int 1
    %47819 = torch.aten.add.Scalar %47707, %int1_45301, %int1_45302 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47819, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45303 = torch.constant.int 1
    %int1_45304 = torch.constant.int 1
    %47820 = torch.aten.add.Scalar %47708, %int1_45303, %int1_45304 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47820, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45305 = torch.constant.int 1
    %int1_45306 = torch.constant.int 1
    %47821 = torch.aten.add.Scalar %47709, %int1_45305, %int1_45306 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47821, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45307 = torch.constant.int 1
    %int1_45308 = torch.constant.int 1
    %47822 = torch.aten.add.Scalar %47710, %int1_45307, %int1_45308 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47822, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_45309 = torch.constant.int 1
    %int1_45310 = torch.constant.int 1
    %47823 = torch.aten.add.Scalar %47711, %int1_45309, %int1_45310 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %47823, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_45311 = torch.constant.int 4
    %47824 = torch.aten.mul.int %int4_45311, %3095 : !torch.int, !torch.int -> !torch.int
    %47825 = torch.prim.ListConstruct %47824 : (!torch.int) -> !torch.list<int>
    %47826 = torch.aten.view %47816, %47825 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47826, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45312 = torch.constant.int 4
    %47827 = torch.aten.mul.int %int4_45312, %3095 : !torch.int, !torch.int -> !torch.int
    %47828 = torch.prim.ListConstruct %47827 : (!torch.int) -> !torch.list<int>
    %47829 = torch.aten.view %47817, %47828 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47829, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45313 = torch.constant.int 4
    %47830 = torch.aten.mul.int %int4_45313, %3095 : !torch.int, !torch.int -> !torch.int
    %47831 = torch.prim.ListConstruct %47830 : (!torch.int) -> !torch.list<int>
    %47832 = torch.aten.view %47818, %47831 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47832, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45314 = torch.constant.int 4
    %47833 = torch.aten.mul.int %int4_45314, %3095 : !torch.int, !torch.int -> !torch.int
    %47834 = torch.prim.ListConstruct %47833 : (!torch.int) -> !torch.list<int>
    %47835 = torch.aten.view %47819, %47834 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47835, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45315 = torch.constant.int 4
    %47836 = torch.aten.mul.int %int4_45315, %3095 : !torch.int, !torch.int -> !torch.int
    %47837 = torch.prim.ListConstruct %47836 : (!torch.int) -> !torch.list<int>
    %47838 = torch.aten.view %47820, %47837 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47838, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45316 = torch.constant.int 4
    %47839 = torch.aten.mul.int %int4_45316, %3095 : !torch.int, !torch.int -> !torch.int
    %47840 = torch.prim.ListConstruct %47839 : (!torch.int) -> !torch.list<int>
    %47841 = torch.aten.view %47821, %47840 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47841, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45317 = torch.constant.int 4
    %47842 = torch.aten.mul.int %int4_45317, %3095 : !torch.int, !torch.int -> !torch.int
    %47843 = torch.prim.ListConstruct %47842 : (!torch.int) -> !torch.list<int>
    %47844 = torch.aten.view %47822, %47843 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47844, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_45318 = torch.constant.int 4
    %47845 = torch.aten.mul.int %int4_45318, %3095 : !torch.int, !torch.int -> !torch.int
    %47846 = torch.prim.ListConstruct %47845 : (!torch.int) -> !torch.list<int>
    %47847 = torch.aten.view %47823, %47846 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47847, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %47848 = torch.prim.ListConstruct %47754, %47826 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45319 = torch.constant.int 0
    %47849 = torch.aten.cat %47848, %int0_45319 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47849, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47850 = torch.prim.ListConstruct %47757, %47829 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45320 = torch.constant.int 0
    %47851 = torch.aten.cat %47850, %int0_45320 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47851, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47852 = torch.prim.ListConstruct %47760, %47832 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45321 = torch.constant.int 0
    %47853 = torch.aten.cat %47852, %int0_45321 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47853, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47854 = torch.prim.ListConstruct %47763, %47835 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45322 = torch.constant.int 0
    %47855 = torch.aten.cat %47854, %int0_45322 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47855, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47856 = torch.prim.ListConstruct %47766, %47838 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45323 = torch.constant.int 0
    %47857 = torch.aten.cat %47856, %int0_45323 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47857, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47858 = torch.prim.ListConstruct %47769, %47841 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45324 = torch.constant.int 0
    %47859 = torch.aten.cat %47858, %int0_45324 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47859, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47860 = torch.prim.ListConstruct %47772, %47844 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45325 = torch.constant.int 0
    %47861 = torch.aten.cat %47860, %int0_45325 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47861, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47862 = torch.prim.ListConstruct %47775, %47847 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_45326 = torch.constant.int 0
    %47863 = torch.aten.cat %47862, %int0_45326 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %47863, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %47864 = torch.prim.ListConstruct %47730, %47794 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45327 = torch.constant.int 0
    %47865 = torch.aten.cat %47864, %int0_45327 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47865, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47866 = torch.prim.ListConstruct %47733, %47797 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45328 = torch.constant.int 0
    %47867 = torch.aten.cat %47866, %int0_45328 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47867, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47868 = torch.prim.ListConstruct %47736, %47800 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45329 = torch.constant.int 0
    %47869 = torch.aten.cat %47868, %int0_45329 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47869, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47870 = torch.prim.ListConstruct %47739, %47803 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45330 = torch.constant.int 0
    %47871 = torch.aten.cat %47870, %int0_45330 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47871, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47872 = torch.prim.ListConstruct %47742, %47806 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45331 = torch.constant.int 0
    %47873 = torch.aten.cat %47872, %int0_45331 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47873, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47874 = torch.prim.ListConstruct %47745, %47809 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45332 = torch.constant.int 0
    %47875 = torch.aten.cat %47874, %int0_45332 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47875, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47876 = torch.prim.ListConstruct %47748, %47812 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45333 = torch.constant.int 0
    %47877 = torch.aten.cat %47876, %int0_45333 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47877, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47878 = torch.prim.ListConstruct %47751, %47815 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_45334 = torch.constant.int 0
    %47879 = torch.aten.cat %47878, %int0_45334 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47879, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45335 = torch.constant.int 32
    %int2_45336 = torch.constant.int 2
    %int16_45337 = torch.constant.int 16
    %int1_45338 = torch.constant.int 1
    %int128_45339 = torch.constant.int 128
    %47880 = torch.prim.ListConstruct %3023, %int32_45335, %int2_45336, %int16_45337, %int1_45338, %int128_45339 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47881 = torch.aten.view %46030, %47880 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47881, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45340 = torch.constant.int 32
    %47882 = torch.aten.mul.int %3023, %int32_45340 : !torch.int, !torch.int -> !torch.int
    %int2_45341 = torch.constant.int 2
    %47883 = torch.aten.mul.int %47882, %int2_45341 : !torch.int, !torch.int -> !torch.int
    %int16_45342 = torch.constant.int 16
    %int1_45343 = torch.constant.int 1
    %int128_45344 = torch.constant.int 128
    %47884 = torch.prim.ListConstruct %47883, %int16_45342, %int1_45343, %int128_45344 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47885 = torch.aten.view %47881, %47884 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47885, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47886 = torch.prim.ListConstruct %47849 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45345 = torch.constant.bool false
    %47887 = torch.aten.index_put %47885, %47886, %47865, %false_45345 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47887, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45346 = torch.constant.int 32
    %int2_45347 = torch.constant.int 2
    %int16_45348 = torch.constant.int 16
    %int1_45349 = torch.constant.int 1
    %int128_45350 = torch.constant.int 128
    %47888 = torch.prim.ListConstruct %3023, %int32_45346, %int2_45347, %int16_45348, %int1_45349, %int128_45350 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47889 = torch.aten.view %47887, %47888 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47889, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45351 = torch.constant.int 131072
    %47890 = torch.prim.ListConstruct %3023, %int131072_45351 : (!torch.int, !torch.int) -> !torch.list<int>
    %47891 = torch.aten.view %47889, %47890 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47891, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45352 = torch.constant.int 32
    %int2_45353 = torch.constant.int 2
    %int16_45354 = torch.constant.int 16
    %int1_45355 = torch.constant.int 1
    %int128_45356 = torch.constant.int 128
    %47892 = torch.prim.ListConstruct %3026, %int32_45352, %int2_45353, %int16_45354, %int1_45355, %int128_45356 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47893 = torch.aten.view %46042, %47892 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47893, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45357 = torch.constant.int 32
    %47894 = torch.aten.mul.int %3026, %int32_45357 : !torch.int, !torch.int -> !torch.int
    %int2_45358 = torch.constant.int 2
    %47895 = torch.aten.mul.int %47894, %int2_45358 : !torch.int, !torch.int -> !torch.int
    %int16_45359 = torch.constant.int 16
    %int1_45360 = torch.constant.int 1
    %int128_45361 = torch.constant.int 128
    %47896 = torch.prim.ListConstruct %47895, %int16_45359, %int1_45360, %int128_45361 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47897 = torch.aten.view %47893, %47896 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47897, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47898 = torch.prim.ListConstruct %47851 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45362 = torch.constant.bool false
    %47899 = torch.aten.index_put %47897, %47898, %47867, %false_45362 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47899, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45363 = torch.constant.int 32
    %int2_45364 = torch.constant.int 2
    %int16_45365 = torch.constant.int 16
    %int1_45366 = torch.constant.int 1
    %int128_45367 = torch.constant.int 128
    %47900 = torch.prim.ListConstruct %3026, %int32_45363, %int2_45364, %int16_45365, %int1_45366, %int128_45367 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47901 = torch.aten.view %47899, %47900 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47901, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45368 = torch.constant.int 131072
    %47902 = torch.prim.ListConstruct %3026, %int131072_45368 : (!torch.int, !torch.int) -> !torch.list<int>
    %47903 = torch.aten.view %47901, %47902 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47903, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45369 = torch.constant.int 32
    %int2_45370 = torch.constant.int 2
    %int16_45371 = torch.constant.int 16
    %int1_45372 = torch.constant.int 1
    %int128_45373 = torch.constant.int 128
    %47904 = torch.prim.ListConstruct %3029, %int32_45369, %int2_45370, %int16_45371, %int1_45372, %int128_45373 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47905 = torch.aten.view %46054, %47904 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47905, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45374 = torch.constant.int 32
    %47906 = torch.aten.mul.int %3029, %int32_45374 : !torch.int, !torch.int -> !torch.int
    %int2_45375 = torch.constant.int 2
    %47907 = torch.aten.mul.int %47906, %int2_45375 : !torch.int, !torch.int -> !torch.int
    %int16_45376 = torch.constant.int 16
    %int1_45377 = torch.constant.int 1
    %int128_45378 = torch.constant.int 128
    %47908 = torch.prim.ListConstruct %47907, %int16_45376, %int1_45377, %int128_45378 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47909 = torch.aten.view %47905, %47908 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47909, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47910 = torch.prim.ListConstruct %47853 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45379 = torch.constant.bool false
    %47911 = torch.aten.index_put %47909, %47910, %47869, %false_45379 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47911, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45380 = torch.constant.int 32
    %int2_45381 = torch.constant.int 2
    %int16_45382 = torch.constant.int 16
    %int1_45383 = torch.constant.int 1
    %int128_45384 = torch.constant.int 128
    %47912 = torch.prim.ListConstruct %3029, %int32_45380, %int2_45381, %int16_45382, %int1_45383, %int128_45384 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47913 = torch.aten.view %47911, %47912 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47913, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45385 = torch.constant.int 131072
    %47914 = torch.prim.ListConstruct %3029, %int131072_45385 : (!torch.int, !torch.int) -> !torch.list<int>
    %47915 = torch.aten.view %47913, %47914 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47915, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45386 = torch.constant.int 32
    %int2_45387 = torch.constant.int 2
    %int16_45388 = torch.constant.int 16
    %int1_45389 = torch.constant.int 1
    %int128_45390 = torch.constant.int 128
    %47916 = torch.prim.ListConstruct %3032, %int32_45386, %int2_45387, %int16_45388, %int1_45389, %int128_45390 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47917 = torch.aten.view %46066, %47916 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47917, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45391 = torch.constant.int 32
    %47918 = torch.aten.mul.int %3032, %int32_45391 : !torch.int, !torch.int -> !torch.int
    %int2_45392 = torch.constant.int 2
    %47919 = torch.aten.mul.int %47918, %int2_45392 : !torch.int, !torch.int -> !torch.int
    %int16_45393 = torch.constant.int 16
    %int1_45394 = torch.constant.int 1
    %int128_45395 = torch.constant.int 128
    %47920 = torch.prim.ListConstruct %47919, %int16_45393, %int1_45394, %int128_45395 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47921 = torch.aten.view %47917, %47920 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47921, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47922 = torch.prim.ListConstruct %47855 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45396 = torch.constant.bool false
    %47923 = torch.aten.index_put %47921, %47922, %47871, %false_45396 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47923, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45397 = torch.constant.int 32
    %int2_45398 = torch.constant.int 2
    %int16_45399 = torch.constant.int 16
    %int1_45400 = torch.constant.int 1
    %int128_45401 = torch.constant.int 128
    %47924 = torch.prim.ListConstruct %3032, %int32_45397, %int2_45398, %int16_45399, %int1_45400, %int128_45401 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47925 = torch.aten.view %47923, %47924 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47925, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45402 = torch.constant.int 131072
    %47926 = torch.prim.ListConstruct %3032, %int131072_45402 : (!torch.int, !torch.int) -> !torch.list<int>
    %47927 = torch.aten.view %47925, %47926 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47927, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45403 = torch.constant.int 32
    %int2_45404 = torch.constant.int 2
    %int16_45405 = torch.constant.int 16
    %int1_45406 = torch.constant.int 1
    %int128_45407 = torch.constant.int 128
    %47928 = torch.prim.ListConstruct %3035, %int32_45403, %int2_45404, %int16_45405, %int1_45406, %int128_45407 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47929 = torch.aten.view %46078, %47928 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47929, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45408 = torch.constant.int 32
    %47930 = torch.aten.mul.int %3035, %int32_45408 : !torch.int, !torch.int -> !torch.int
    %int2_45409 = torch.constant.int 2
    %47931 = torch.aten.mul.int %47930, %int2_45409 : !torch.int, !torch.int -> !torch.int
    %int16_45410 = torch.constant.int 16
    %int1_45411 = torch.constant.int 1
    %int128_45412 = torch.constant.int 128
    %47932 = torch.prim.ListConstruct %47931, %int16_45410, %int1_45411, %int128_45412 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47933 = torch.aten.view %47929, %47932 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47933, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47934 = torch.prim.ListConstruct %47857 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45413 = torch.constant.bool false
    %47935 = torch.aten.index_put %47933, %47934, %47873, %false_45413 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47935, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45414 = torch.constant.int 32
    %int2_45415 = torch.constant.int 2
    %int16_45416 = torch.constant.int 16
    %int1_45417 = torch.constant.int 1
    %int128_45418 = torch.constant.int 128
    %47936 = torch.prim.ListConstruct %3035, %int32_45414, %int2_45415, %int16_45416, %int1_45417, %int128_45418 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47937 = torch.aten.view %47935, %47936 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47937, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45419 = torch.constant.int 131072
    %47938 = torch.prim.ListConstruct %3035, %int131072_45419 : (!torch.int, !torch.int) -> !torch.list<int>
    %47939 = torch.aten.view %47937, %47938 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47939, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45420 = torch.constant.int 32
    %int2_45421 = torch.constant.int 2
    %int16_45422 = torch.constant.int 16
    %int1_45423 = torch.constant.int 1
    %int128_45424 = torch.constant.int 128
    %47940 = torch.prim.ListConstruct %3038, %int32_45420, %int2_45421, %int16_45422, %int1_45423, %int128_45424 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47941 = torch.aten.view %46090, %47940 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47941, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45425 = torch.constant.int 32
    %47942 = torch.aten.mul.int %3038, %int32_45425 : !torch.int, !torch.int -> !torch.int
    %int2_45426 = torch.constant.int 2
    %47943 = torch.aten.mul.int %47942, %int2_45426 : !torch.int, !torch.int -> !torch.int
    %int16_45427 = torch.constant.int 16
    %int1_45428 = torch.constant.int 1
    %int128_45429 = torch.constant.int 128
    %47944 = torch.prim.ListConstruct %47943, %int16_45427, %int1_45428, %int128_45429 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47945 = torch.aten.view %47941, %47944 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47945, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47946 = torch.prim.ListConstruct %47859 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45430 = torch.constant.bool false
    %47947 = torch.aten.index_put %47945, %47946, %47875, %false_45430 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47947, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45431 = torch.constant.int 32
    %int2_45432 = torch.constant.int 2
    %int16_45433 = torch.constant.int 16
    %int1_45434 = torch.constant.int 1
    %int128_45435 = torch.constant.int 128
    %47948 = torch.prim.ListConstruct %3038, %int32_45431, %int2_45432, %int16_45433, %int1_45434, %int128_45435 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47949 = torch.aten.view %47947, %47948 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47949, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45436 = torch.constant.int 131072
    %47950 = torch.prim.ListConstruct %3038, %int131072_45436 : (!torch.int, !torch.int) -> !torch.list<int>
    %47951 = torch.aten.view %47949, %47950 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47951, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45437 = torch.constant.int 32
    %int2_45438 = torch.constant.int 2
    %int16_45439 = torch.constant.int 16
    %int1_45440 = torch.constant.int 1
    %int128_45441 = torch.constant.int 128
    %47952 = torch.prim.ListConstruct %3041, %int32_45437, %int2_45438, %int16_45439, %int1_45440, %int128_45441 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47953 = torch.aten.view %46102, %47952 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47953, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45442 = torch.constant.int 32
    %47954 = torch.aten.mul.int %3041, %int32_45442 : !torch.int, !torch.int -> !torch.int
    %int2_45443 = torch.constant.int 2
    %47955 = torch.aten.mul.int %47954, %int2_45443 : !torch.int, !torch.int -> !torch.int
    %int16_45444 = torch.constant.int 16
    %int1_45445 = torch.constant.int 1
    %int128_45446 = torch.constant.int 128
    %47956 = torch.prim.ListConstruct %47955, %int16_45444, %int1_45445, %int128_45446 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47957 = torch.aten.view %47953, %47956 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47957, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47958 = torch.prim.ListConstruct %47861 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45447 = torch.constant.bool false
    %47959 = torch.aten.index_put %47957, %47958, %47877, %false_45447 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47959, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45448 = torch.constant.int 32
    %int2_45449 = torch.constant.int 2
    %int16_45450 = torch.constant.int 16
    %int1_45451 = torch.constant.int 1
    %int128_45452 = torch.constant.int 128
    %47960 = torch.prim.ListConstruct %3041, %int32_45448, %int2_45449, %int16_45450, %int1_45451, %int128_45452 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47961 = torch.aten.view %47959, %47960 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47961, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45453 = torch.constant.int 131072
    %47962 = torch.prim.ListConstruct %3041, %int131072_45453 : (!torch.int, !torch.int) -> !torch.list<int>
    %47963 = torch.aten.view %47961, %47962 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47963, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_45454 = torch.constant.int 32
    %int2_45455 = torch.constant.int 2
    %int16_45456 = torch.constant.int 16
    %int1_45457 = torch.constant.int 1
    %int128_45458 = torch.constant.int 128
    %47964 = torch.prim.ListConstruct %3044, %int32_45454, %int2_45455, %int16_45456, %int1_45457, %int128_45458 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47965 = torch.aten.view %46114, %47964 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47965, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_45459 = torch.constant.int 32
    %47966 = torch.aten.mul.int %3044, %int32_45459 : !torch.int, !torch.int -> !torch.int
    %int2_45460 = torch.constant.int 2
    %47967 = torch.aten.mul.int %47966, %int2_45460 : !torch.int, !torch.int -> !torch.int
    %int16_45461 = torch.constant.int 16
    %int1_45462 = torch.constant.int 1
    %int128_45463 = torch.constant.int 128
    %47968 = torch.prim.ListConstruct %47967, %int16_45461, %int1_45462, %int128_45463 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47969 = torch.aten.view %47965, %47968 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47969, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %47970 = torch.prim.ListConstruct %47863 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_45464 = torch.constant.bool false
    %47971 = torch.aten.index_put %47969, %47970, %47879, %false_45464 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %47971, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_45465 = torch.constant.int 32
    %int2_45466 = torch.constant.int 2
    %int16_45467 = torch.constant.int 16
    %int1_45468 = torch.constant.int 1
    %int128_45469 = torch.constant.int 128
    %47972 = torch.prim.ListConstruct %3044, %int32_45465, %int2_45466, %int16_45467, %int1_45468, %int128_45469 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %47973 = torch.aten.view %47971, %47972 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %47973, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_45470 = torch.constant.int 131072
    %47974 = torch.prim.ListConstruct %3044, %int131072_45470 : (!torch.int, !torch.int) -> !torch.list<int>
    %47975 = torch.aten.view %47973, %47974 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %47975, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_45471 = torch.constant.int -2
    %47976 = torch.aten.unsqueeze %47590, %int-2_45471 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45472 = torch.constant.int -2
    %47977 = torch.aten.unsqueeze %47605, %int-2_45472 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45473 = torch.constant.int -2
    %47978 = torch.aten.unsqueeze %47620, %int-2_45473 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45474 = torch.constant.int -2
    %47979 = torch.aten.unsqueeze %47635, %int-2_45474 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45475 = torch.constant.int -2
    %47980 = torch.aten.unsqueeze %47650, %int-2_45475 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45476 = torch.constant.int -2
    %47981 = torch.aten.unsqueeze %47665, %int-2_45476 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45477 = torch.constant.int -2
    %47982 = torch.aten.unsqueeze %47680, %int-2_45477 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45478 = torch.constant.int -2
    %47983 = torch.aten.unsqueeze %47695, %int-2_45478 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %47983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_45479 = torch.constant.int 4
    %int1_45480 = torch.constant.int 1
    %int4_45481 = torch.constant.int 4
    %int128_45482 = torch.constant.int 128
    %47984 = torch.prim.ListConstruct %int4_45479, %47576, %int1_45480, %int4_45481, %int128_45482 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45483 = torch.constant.bool false
    %47985 = torch.aten.expand %47976, %47984, %false_45483 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45484 = torch.constant.int 4
    %int1_45485 = torch.constant.int 1
    %int4_45486 = torch.constant.int 4
    %int128_45487 = torch.constant.int 128
    %47986 = torch.prim.ListConstruct %int4_45484, %47576, %int1_45485, %int4_45486, %int128_45487 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45488 = torch.constant.bool false
    %47987 = torch.aten.expand %47977, %47986, %false_45488 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45489 = torch.constant.int 4
    %int1_45490 = torch.constant.int 1
    %int4_45491 = torch.constant.int 4
    %int128_45492 = torch.constant.int 128
    %47988 = torch.prim.ListConstruct %int4_45489, %47576, %int1_45490, %int4_45491, %int128_45492 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45493 = torch.constant.bool false
    %47989 = torch.aten.expand %47978, %47988, %false_45493 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45494 = torch.constant.int 4
    %int1_45495 = torch.constant.int 1
    %int4_45496 = torch.constant.int 4
    %int128_45497 = torch.constant.int 128
    %47990 = torch.prim.ListConstruct %int4_45494, %47576, %int1_45495, %int4_45496, %int128_45497 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45498 = torch.constant.bool false
    %47991 = torch.aten.expand %47979, %47990, %false_45498 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45499 = torch.constant.int 4
    %int1_45500 = torch.constant.int 1
    %int4_45501 = torch.constant.int 4
    %int128_45502 = torch.constant.int 128
    %47992 = torch.prim.ListConstruct %int4_45499, %47576, %int1_45500, %int4_45501, %int128_45502 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45503 = torch.constant.bool false
    %47993 = torch.aten.expand %47980, %47992, %false_45503 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45504 = torch.constant.int 4
    %int1_45505 = torch.constant.int 1
    %int4_45506 = torch.constant.int 4
    %int128_45507 = torch.constant.int 128
    %47994 = torch.prim.ListConstruct %int4_45504, %47576, %int1_45505, %int4_45506, %int128_45507 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45508 = torch.constant.bool false
    %47995 = torch.aten.expand %47981, %47994, %false_45508 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45509 = torch.constant.int 4
    %int1_45510 = torch.constant.int 1
    %int4_45511 = torch.constant.int 4
    %int128_45512 = torch.constant.int 128
    %47996 = torch.prim.ListConstruct %int4_45509, %47576, %int1_45510, %int4_45511, %int128_45512 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45513 = torch.constant.bool false
    %47997 = torch.aten.expand %47982, %47996, %false_45513 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45514 = torch.constant.int 4
    %int1_45515 = torch.constant.int 1
    %int4_45516 = torch.constant.int 4
    %int128_45517 = torch.constant.int 128
    %47998 = torch.prim.ListConstruct %int4_45514, %47576, %int1_45515, %int4_45516, %int128_45517 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45518 = torch.constant.bool false
    %47999 = torch.aten.expand %47983, %47998, %false_45518 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %47999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45519 = torch.constant.int 4
    %int4_45520 = torch.constant.int 4
    %int128_45521 = torch.constant.int 128
    %48000 = torch.prim.ListConstruct %int4_45519, %47576, %int4_45520, %int128_45521 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48001 = torch.aten.view %47985, %48000 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45522 = torch.constant.int 4
    %int4_45523 = torch.constant.int 4
    %int128_45524 = torch.constant.int 128
    %48002 = torch.prim.ListConstruct %int4_45522, %47576, %int4_45523, %int128_45524 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48003 = torch.aten.view %47987, %48002 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45525 = torch.constant.int 4
    %int4_45526 = torch.constant.int 4
    %int128_45527 = torch.constant.int 128
    %48004 = torch.prim.ListConstruct %int4_45525, %47576, %int4_45526, %int128_45527 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48005 = torch.aten.view %47989, %48004 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45528 = torch.constant.int 4
    %int4_45529 = torch.constant.int 4
    %int128_45530 = torch.constant.int 128
    %48006 = torch.prim.ListConstruct %int4_45528, %47576, %int4_45529, %int128_45530 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48007 = torch.aten.view %47991, %48006 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45531 = torch.constant.int 4
    %int4_45532 = torch.constant.int 4
    %int128_45533 = torch.constant.int 128
    %48008 = torch.prim.ListConstruct %int4_45531, %47576, %int4_45532, %int128_45533 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48009 = torch.aten.view %47993, %48008 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45534 = torch.constant.int 4
    %int4_45535 = torch.constant.int 4
    %int128_45536 = torch.constant.int 128
    %48010 = torch.prim.ListConstruct %int4_45534, %47576, %int4_45535, %int128_45536 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48011 = torch.aten.view %47995, %48010 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45537 = torch.constant.int 4
    %int4_45538 = torch.constant.int 4
    %int128_45539 = torch.constant.int 128
    %48012 = torch.prim.ListConstruct %int4_45537, %47576, %int4_45538, %int128_45539 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48013 = torch.aten.view %47997, %48012 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45540 = torch.constant.int 4
    %int4_45541 = torch.constant.int 4
    %int128_45542 = torch.constant.int 128
    %48014 = torch.prim.ListConstruct %int4_45540, %47576, %int4_45541, %int128_45542 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48015 = torch.aten.view %47999, %48014 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_45543 = torch.constant.int -2
    %48016 = torch.aten.unsqueeze %47365, %int-2_45543 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45544 = torch.constant.int -2
    %48017 = torch.aten.unsqueeze %47367, %int-2_45544 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45545 = torch.constant.int -2
    %48018 = torch.aten.unsqueeze %47369, %int-2_45545 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45546 = torch.constant.int -2
    %48019 = torch.aten.unsqueeze %47371, %int-2_45546 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45547 = torch.constant.int -2
    %48020 = torch.aten.unsqueeze %47373, %int-2_45547 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45548 = torch.constant.int -2
    %48021 = torch.aten.unsqueeze %47375, %int-2_45548 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45549 = torch.constant.int -2
    %48022 = torch.aten.unsqueeze %47377, %int-2_45549 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_45550 = torch.constant.int -2
    %48023 = torch.aten.unsqueeze %47379, %int-2_45550 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %48023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_45551 = torch.constant.int 1
    %48024 = torch.aten.size.int %47289, %int1_45551 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_45552 = torch.constant.int 4
    %int1_45553 = torch.constant.int 1
    %int4_45554 = torch.constant.int 4
    %int128_45555 = torch.constant.int 128
    %48025 = torch.prim.ListConstruct %int4_45552, %48024, %int1_45553, %int4_45554, %int128_45555 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45556 = torch.constant.bool false
    %48026 = torch.aten.expand %48016, %48025, %false_45556 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45557 = torch.constant.int 4
    %int1_45558 = torch.constant.int 1
    %int4_45559 = torch.constant.int 4
    %int128_45560 = torch.constant.int 128
    %48027 = torch.prim.ListConstruct %int4_45557, %48024, %int1_45558, %int4_45559, %int128_45560 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45561 = torch.constant.bool false
    %48028 = torch.aten.expand %48017, %48027, %false_45561 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45562 = torch.constant.int 4
    %int1_45563 = torch.constant.int 1
    %int4_45564 = torch.constant.int 4
    %int128_45565 = torch.constant.int 128
    %48029 = torch.prim.ListConstruct %int4_45562, %48024, %int1_45563, %int4_45564, %int128_45565 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45566 = torch.constant.bool false
    %48030 = torch.aten.expand %48018, %48029, %false_45566 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45567 = torch.constant.int 4
    %int1_45568 = torch.constant.int 1
    %int4_45569 = torch.constant.int 4
    %int128_45570 = torch.constant.int 128
    %48031 = torch.prim.ListConstruct %int4_45567, %48024, %int1_45568, %int4_45569, %int128_45570 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45571 = torch.constant.bool false
    %48032 = torch.aten.expand %48019, %48031, %false_45571 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45572 = torch.constant.int 4
    %int1_45573 = torch.constant.int 1
    %int4_45574 = torch.constant.int 4
    %int128_45575 = torch.constant.int 128
    %48033 = torch.prim.ListConstruct %int4_45572, %48024, %int1_45573, %int4_45574, %int128_45575 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45576 = torch.constant.bool false
    %48034 = torch.aten.expand %48020, %48033, %false_45576 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45577 = torch.constant.int 4
    %int1_45578 = torch.constant.int 1
    %int4_45579 = torch.constant.int 4
    %int128_45580 = torch.constant.int 128
    %48035 = torch.prim.ListConstruct %int4_45577, %48024, %int1_45578, %int4_45579, %int128_45580 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45581 = torch.constant.bool false
    %48036 = torch.aten.expand %48021, %48035, %false_45581 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45582 = torch.constant.int 4
    %int1_45583 = torch.constant.int 1
    %int4_45584 = torch.constant.int 4
    %int128_45585 = torch.constant.int 128
    %48037 = torch.prim.ListConstruct %int4_45582, %48024, %int1_45583, %int4_45584, %int128_45585 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45586 = torch.constant.bool false
    %48038 = torch.aten.expand %48022, %48037, %false_45586 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45587 = torch.constant.int 4
    %int1_45588 = torch.constant.int 1
    %int4_45589 = torch.constant.int 4
    %int128_45590 = torch.constant.int 128
    %48039 = torch.prim.ListConstruct %int4_45587, %48024, %int1_45588, %int4_45589, %int128_45590 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_45591 = torch.constant.bool false
    %48040 = torch.aten.expand %48023, %48039, %false_45591 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %48040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_45592 = torch.constant.int 4
    %int4_45593 = torch.constant.int 4
    %int128_45594 = torch.constant.int 128
    %48041 = torch.prim.ListConstruct %int4_45592, %48024, %int4_45593, %int128_45594 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48042 = torch.aten.view %48026, %48041 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45595 = torch.constant.int 4
    %int4_45596 = torch.constant.int 4
    %int128_45597 = torch.constant.int 128
    %48043 = torch.prim.ListConstruct %int4_45595, %48024, %int4_45596, %int128_45597 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48044 = torch.aten.view %48028, %48043 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45598 = torch.constant.int 4
    %int4_45599 = torch.constant.int 4
    %int128_45600 = torch.constant.int 128
    %48045 = torch.prim.ListConstruct %int4_45598, %48024, %int4_45599, %int128_45600 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48046 = torch.aten.view %48030, %48045 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45601 = torch.constant.int 4
    %int4_45602 = torch.constant.int 4
    %int128_45603 = torch.constant.int 128
    %48047 = torch.prim.ListConstruct %int4_45601, %48024, %int4_45602, %int128_45603 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48048 = torch.aten.view %48032, %48047 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45604 = torch.constant.int 4
    %int4_45605 = torch.constant.int 4
    %int128_45606 = torch.constant.int 128
    %48049 = torch.prim.ListConstruct %int4_45604, %48024, %int4_45605, %int128_45606 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48050 = torch.aten.view %48034, %48049 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45607 = torch.constant.int 4
    %int4_45608 = torch.constant.int 4
    %int128_45609 = torch.constant.int 128
    %48051 = torch.prim.ListConstruct %int4_45607, %48024, %int4_45608, %int128_45609 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48052 = torch.aten.view %48036, %48051 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45610 = torch.constant.int 4
    %int4_45611 = torch.constant.int 4
    %int128_45612 = torch.constant.int 128
    %48053 = torch.prim.ListConstruct %int4_45610, %48024, %int4_45611, %int128_45612 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48054 = torch.aten.view %48038, %48053 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45613 = torch.constant.int 4
    %int4_45614 = torch.constant.int 4
    %int128_45615 = torch.constant.int 128
    %48055 = torch.prim.ListConstruct %int4_45613, %48024, %int4_45614, %int128_45615 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48056 = torch.aten.view %48040, %48055 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45616 = torch.constant.int 1
    %int2_45617 = torch.constant.int 2
    %48057 = torch.aten.transpose.int %47432, %int1_45616, %int2_45617 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48057, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45618 = torch.constant.int 1
    %int2_45619 = torch.constant.int 2
    %48058 = torch.aten.transpose.int %47447, %int1_45618, %int2_45619 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48058, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45620 = torch.constant.int 1
    %int2_45621 = torch.constant.int 2
    %48059 = torch.aten.transpose.int %47462, %int1_45620, %int2_45621 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48059, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45622 = torch.constant.int 1
    %int2_45623 = torch.constant.int 2
    %48060 = torch.aten.transpose.int %47477, %int1_45622, %int2_45623 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48060, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45624 = torch.constant.int 1
    %int2_45625 = torch.constant.int 2
    %48061 = torch.aten.transpose.int %47492, %int1_45624, %int2_45625 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48061, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45626 = torch.constant.int 1
    %int2_45627 = torch.constant.int 2
    %48062 = torch.aten.transpose.int %47507, %int1_45626, %int2_45627 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48062, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45628 = torch.constant.int 1
    %int2_45629 = torch.constant.int 2
    %48063 = torch.aten.transpose.int %47522, %int1_45628, %int2_45629 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48063, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45630 = torch.constant.int 1
    %int2_45631 = torch.constant.int 2
    %48064 = torch.aten.transpose.int %47537, %int1_45630, %int2_45631 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48064, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45632 = torch.constant.int 1
    %int2_45633 = torch.constant.int 2
    %48065 = torch.aten.transpose.int %48001, %int1_45632, %int2_45633 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48065, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45634 = torch.constant.int 1
    %int2_45635 = torch.constant.int 2
    %48066 = torch.aten.transpose.int %48003, %int1_45634, %int2_45635 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48066, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45636 = torch.constant.int 1
    %int2_45637 = torch.constant.int 2
    %48067 = torch.aten.transpose.int %48005, %int1_45636, %int2_45637 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48067, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45638 = torch.constant.int 1
    %int2_45639 = torch.constant.int 2
    %48068 = torch.aten.transpose.int %48007, %int1_45638, %int2_45639 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48068, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45640 = torch.constant.int 1
    %int2_45641 = torch.constant.int 2
    %48069 = torch.aten.transpose.int %48009, %int1_45640, %int2_45641 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48069, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45642 = torch.constant.int 1
    %int2_45643 = torch.constant.int 2
    %48070 = torch.aten.transpose.int %48011, %int1_45642, %int2_45643 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48070, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45644 = torch.constant.int 1
    %int2_45645 = torch.constant.int 2
    %48071 = torch.aten.transpose.int %48013, %int1_45644, %int2_45645 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48071, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45646 = torch.constant.int 1
    %int2_45647 = torch.constant.int 2
    %48072 = torch.aten.transpose.int %48015, %int1_45646, %int2_45647 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48072, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45648 = torch.constant.int 1
    %int2_45649 = torch.constant.int 2
    %48073 = torch.aten.transpose.int %48042, %int1_45648, %int2_45649 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48073, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45650 = torch.constant.int 1
    %int2_45651 = torch.constant.int 2
    %48074 = torch.aten.transpose.int %48044, %int1_45650, %int2_45651 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48074, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45652 = torch.constant.int 1
    %int2_45653 = torch.constant.int 2
    %48075 = torch.aten.transpose.int %48046, %int1_45652, %int2_45653 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48075, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45654 = torch.constant.int 1
    %int2_45655 = torch.constant.int 2
    %48076 = torch.aten.transpose.int %48048, %int1_45654, %int2_45655 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48076, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45656 = torch.constant.int 1
    %int2_45657 = torch.constant.int 2
    %48077 = torch.aten.transpose.int %48050, %int1_45656, %int2_45657 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48077, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45658 = torch.constant.int 1
    %int2_45659 = torch.constant.int 2
    %48078 = torch.aten.transpose.int %48052, %int1_45658, %int2_45659 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48078, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45660 = torch.constant.int 1
    %int2_45661 = torch.constant.int 2
    %48079 = torch.aten.transpose.int %48054, %int1_45660, %int2_45661 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48079, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45662 = torch.constant.int 1
    %int2_45663 = torch.constant.int 2
    %48080 = torch.aten.transpose.int %48056, %int1_45662, %int2_45663 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %48080, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45664 = torch.constant.float 0.000000e+00
    %true_45665 = torch.constant.bool true
    %none_45666 = torch.constant.none
    %none_45667 = torch.constant.none
    %48081:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48057, %48065, %48073, %float0.000000e00_45664, %true_45665, %none_45666, %none_45667) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48081#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45668 = torch.constant.float 0.000000e+00
    %true_45669 = torch.constant.bool true
    %none_45670 = torch.constant.none
    %none_45671 = torch.constant.none
    %48082:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48058, %48066, %48074, %float0.000000e00_45668, %true_45669, %none_45670, %none_45671) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48082#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45672 = torch.constant.float 0.000000e+00
    %true_45673 = torch.constant.bool true
    %none_45674 = torch.constant.none
    %none_45675 = torch.constant.none
    %48083:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48059, %48067, %48075, %float0.000000e00_45672, %true_45673, %none_45674, %none_45675) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48083#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45676 = torch.constant.float 0.000000e+00
    %true_45677 = torch.constant.bool true
    %none_45678 = torch.constant.none
    %none_45679 = torch.constant.none
    %48084:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48060, %48068, %48076, %float0.000000e00_45676, %true_45677, %none_45678, %none_45679) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48084#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45680 = torch.constant.float 0.000000e+00
    %true_45681 = torch.constant.bool true
    %none_45682 = torch.constant.none
    %none_45683 = torch.constant.none
    %48085:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48061, %48069, %48077, %float0.000000e00_45680, %true_45681, %none_45682, %none_45683) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48085#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45684 = torch.constant.float 0.000000e+00
    %true_45685 = torch.constant.bool true
    %none_45686 = torch.constant.none
    %none_45687 = torch.constant.none
    %48086:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48062, %48070, %48078, %float0.000000e00_45684, %true_45685, %none_45686, %none_45687) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48086#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45688 = torch.constant.float 0.000000e+00
    %true_45689 = torch.constant.bool true
    %none_45690 = torch.constant.none
    %none_45691 = torch.constant.none
    %48087:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48063, %48071, %48079, %float0.000000e00_45688, %true_45689, %none_45690, %none_45691) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48087#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_45692 = torch.constant.float 0.000000e+00
    %true_45693 = torch.constant.bool true
    %none_45694 = torch.constant.none
    %none_45695 = torch.constant.none
    %48088:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%48064, %48072, %48080, %float0.000000e00_45692, %true_45693, %none_45694, %none_45695) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %48088#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_45696 = torch.constant.int 1
    %int2_45697 = torch.constant.int 2
    %48089 = torch.aten.transpose.int %48081#0, %int1_45696, %int2_45697 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45698 = torch.constant.int 1
    %int2_45699 = torch.constant.int 2
    %48090 = torch.aten.transpose.int %48082#0, %int1_45698, %int2_45699 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45700 = torch.constant.int 1
    %int2_45701 = torch.constant.int 2
    %48091 = torch.aten.transpose.int %48083#0, %int1_45700, %int2_45701 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45702 = torch.constant.int 1
    %int2_45703 = torch.constant.int 2
    %48092 = torch.aten.transpose.int %48084#0, %int1_45702, %int2_45703 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45704 = torch.constant.int 1
    %int2_45705 = torch.constant.int 2
    %48093 = torch.aten.transpose.int %48085#0, %int1_45704, %int2_45705 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45706 = torch.constant.int 1
    %int2_45707 = torch.constant.int 2
    %48094 = torch.aten.transpose.int %48086#0, %int1_45706, %int2_45707 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45708 = torch.constant.int 1
    %int2_45709 = torch.constant.int 2
    %48095 = torch.aten.transpose.int %48087#0, %int1_45708, %int2_45709 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_45710 = torch.constant.int 1
    %int2_45711 = torch.constant.int 2
    %48096 = torch.aten.transpose.int %48088#0, %int1_45710, %int2_45711 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %48096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_45712 = torch.constant.int 4
    %int512_45713 = torch.constant.int 512
    %48097 = torch.prim.ListConstruct %int4_45712, %47418, %int512_45713 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48098 = torch.aten.view %48089, %48097 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45714 = torch.constant.int 4
    %int512_45715 = torch.constant.int 512
    %48099 = torch.prim.ListConstruct %int4_45714, %47433, %int512_45715 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48100 = torch.aten.view %48090, %48099 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45716 = torch.constant.int 4
    %int512_45717 = torch.constant.int 512
    %48101 = torch.prim.ListConstruct %int4_45716, %47448, %int512_45717 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48102 = torch.aten.view %48091, %48101 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45718 = torch.constant.int 4
    %int512_45719 = torch.constant.int 512
    %48103 = torch.prim.ListConstruct %int4_45718, %47463, %int512_45719 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48104 = torch.aten.view %48092, %48103 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45720 = torch.constant.int 4
    %int512_45721 = torch.constant.int 512
    %48105 = torch.prim.ListConstruct %int4_45720, %47478, %int512_45721 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48106 = torch.aten.view %48093, %48105 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45722 = torch.constant.int 4
    %int512_45723 = torch.constant.int 512
    %48107 = torch.prim.ListConstruct %int4_45722, %47493, %int512_45723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48108 = torch.aten.view %48094, %48107 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45724 = torch.constant.int 4
    %int512_45725 = torch.constant.int 512
    %48109 = torch.prim.ListConstruct %int4_45724, %47508, %int512_45725 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48110 = torch.aten.view %48095, %48109 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_45726 = torch.constant.int 4
    %int512_45727 = torch.constant.int 512
    %48111 = torch.prim.ListConstruct %int4_45726, %47523, %int512_45727 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48112 = torch.aten.view %48096, %48111 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %48112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_45728 = torch.constant.int 1
    %int0_45729 = torch.constant.int 0
    %48113 = torch.prim.ListConstruct %int1_45728, %int0_45729 : (!torch.int, !torch.int) -> !torch.list<int>
    %48114 = torch.aten.permute %1768, %48113 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45730 = torch.constant.int 1
    %int0_45731 = torch.constant.int 0
    %48115 = torch.prim.ListConstruct %int1_45730, %int0_45731 : (!torch.int, !torch.int) -> !torch.list<int>
    %48116 = torch.aten.permute %1769, %48115 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45732 = torch.constant.int 1
    %int0_45733 = torch.constant.int 0
    %48117 = torch.prim.ListConstruct %int1_45732, %int0_45733 : (!torch.int, !torch.int) -> !torch.list<int>
    %48118 = torch.aten.permute %1770, %48117 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45734 = torch.constant.int 1
    %int0_45735 = torch.constant.int 0
    %48119 = torch.prim.ListConstruct %int1_45734, %int0_45735 : (!torch.int, !torch.int) -> !torch.list<int>
    %48120 = torch.aten.permute %1771, %48119 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45736 = torch.constant.int 1
    %int0_45737 = torch.constant.int 0
    %48121 = torch.prim.ListConstruct %int1_45736, %int0_45737 : (!torch.int, !torch.int) -> !torch.list<int>
    %48122 = torch.aten.permute %1772, %48121 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45738 = torch.constant.int 1
    %int0_45739 = torch.constant.int 0
    %48123 = torch.prim.ListConstruct %int1_45738, %int0_45739 : (!torch.int, !torch.int) -> !torch.list<int>
    %48124 = torch.aten.permute %1773, %48123 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45740 = torch.constant.int 1
    %int0_45741 = torch.constant.int 0
    %48125 = torch.prim.ListConstruct %int1_45740, %int0_45741 : (!torch.int, !torch.int) -> !torch.list<int>
    %48126 = torch.aten.permute %1774, %48125 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_45742 = torch.constant.int 1
    %int0_45743 = torch.constant.int 0
    %48127 = torch.prim.ListConstruct %int1_45742, %int0_45743 : (!torch.int, !torch.int) -> !torch.list<int>
    %48128 = torch.aten.permute %1775, %48127 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_45744 = torch.constant.int 4
    %48129 = torch.aten.mul.int %int4_45744, %47418 : !torch.int, !torch.int -> !torch.int
    %int512_45745 = torch.constant.int 512
    %48130 = torch.prim.ListConstruct %48129, %int512_45745 : (!torch.int, !torch.int) -> !torch.list<int>
    %48131 = torch.aten.view %48098, %48130 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48131, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48132 = torch.aten.mm %48131, %48114 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48132, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45746 = torch.constant.int 4
    %int4096_45747 = torch.constant.int 4096
    %48133 = torch.prim.ListConstruct %int4_45746, %47418, %int4096_45747 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48134 = torch.aten.view %48132, %48133 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45748 = torch.constant.int 4
    %48135 = torch.aten.mul.int %int4_45748, %47433 : !torch.int, !torch.int -> !torch.int
    %int512_45749 = torch.constant.int 512
    %48136 = torch.prim.ListConstruct %48135, %int512_45749 : (!torch.int, !torch.int) -> !torch.list<int>
    %48137 = torch.aten.view %48100, %48136 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48137, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48138 = torch.aten.mm %48137, %48116 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48138, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45750 = torch.constant.int 4
    %int4096_45751 = torch.constant.int 4096
    %48139 = torch.prim.ListConstruct %int4_45750, %47433, %int4096_45751 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48140 = torch.aten.view %48138, %48139 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45752 = torch.constant.int 4
    %48141 = torch.aten.mul.int %int4_45752, %47448 : !torch.int, !torch.int -> !torch.int
    %int512_45753 = torch.constant.int 512
    %48142 = torch.prim.ListConstruct %48141, %int512_45753 : (!torch.int, !torch.int) -> !torch.list<int>
    %48143 = torch.aten.view %48102, %48142 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48143, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48144 = torch.aten.mm %48143, %48118 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48144, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45754 = torch.constant.int 4
    %int4096_45755 = torch.constant.int 4096
    %48145 = torch.prim.ListConstruct %int4_45754, %47448, %int4096_45755 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48146 = torch.aten.view %48144, %48145 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45756 = torch.constant.int 4
    %48147 = torch.aten.mul.int %int4_45756, %47463 : !torch.int, !torch.int -> !torch.int
    %int512_45757 = torch.constant.int 512
    %48148 = torch.prim.ListConstruct %48147, %int512_45757 : (!torch.int, !torch.int) -> !torch.list<int>
    %48149 = torch.aten.view %48104, %48148 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48149, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48150 = torch.aten.mm %48149, %48120 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48150, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45758 = torch.constant.int 4
    %int4096_45759 = torch.constant.int 4096
    %48151 = torch.prim.ListConstruct %int4_45758, %47463, %int4096_45759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48152 = torch.aten.view %48150, %48151 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45760 = torch.constant.int 4
    %48153 = torch.aten.mul.int %int4_45760, %47478 : !torch.int, !torch.int -> !torch.int
    %int512_45761 = torch.constant.int 512
    %48154 = torch.prim.ListConstruct %48153, %int512_45761 : (!torch.int, !torch.int) -> !torch.list<int>
    %48155 = torch.aten.view %48106, %48154 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48155, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48156 = torch.aten.mm %48155, %48122 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48156, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45762 = torch.constant.int 4
    %int4096_45763 = torch.constant.int 4096
    %48157 = torch.prim.ListConstruct %int4_45762, %47478, %int4096_45763 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48158 = torch.aten.view %48156, %48157 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45764 = torch.constant.int 4
    %48159 = torch.aten.mul.int %int4_45764, %47493 : !torch.int, !torch.int -> !torch.int
    %int512_45765 = torch.constant.int 512
    %48160 = torch.prim.ListConstruct %48159, %int512_45765 : (!torch.int, !torch.int) -> !torch.list<int>
    %48161 = torch.aten.view %48108, %48160 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48161, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48162 = torch.aten.mm %48161, %48124 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48162, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45766 = torch.constant.int 4
    %int4096_45767 = torch.constant.int 4096
    %48163 = torch.prim.ListConstruct %int4_45766, %47493, %int4096_45767 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48164 = torch.aten.view %48162, %48163 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45768 = torch.constant.int 4
    %48165 = torch.aten.mul.int %int4_45768, %47508 : !torch.int, !torch.int -> !torch.int
    %int512_45769 = torch.constant.int 512
    %48166 = torch.prim.ListConstruct %48165, %int512_45769 : (!torch.int, !torch.int) -> !torch.list<int>
    %48167 = torch.aten.view %48110, %48166 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48167, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48168 = torch.aten.mm %48167, %48126 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48168, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45770 = torch.constant.int 4
    %int4096_45771 = torch.constant.int 4096
    %48169 = torch.prim.ListConstruct %int4_45770, %47508, %int4096_45771 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48170 = torch.aten.view %48168, %48169 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_45772 = torch.constant.int 4
    %48171 = torch.aten.mul.int %int4_45772, %47523 : !torch.int, !torch.int -> !torch.int
    %int512_45773 = torch.constant.int 512
    %48172 = torch.prim.ListConstruct %48171, %int512_45773 : (!torch.int, !torch.int) -> !torch.list<int>
    %48173 = torch.aten.view %48112, %48172 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %48173, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %48174 = torch.aten.mm %48173, %48128 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48174, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_45774 = torch.constant.int 4
    %int4096_45775 = torch.constant.int 4096
    %48175 = torch.prim.ListConstruct %int4_45774, %47523, %int4096_45775 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48176 = torch.aten.view %48174, %48175 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48177 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45776 = arith.constant 1 : index
    %dim_45777 = tensor.dim %48177, %c1_45776 : tensor<4x?x4096xf16>
    %48178 = flow.tensor.transfer %48177 : tensor<4x?x4096xf16>{%dim_45777} to #hal.device.promise<@__device_0>
    %48179 = torch_c.from_builtin_tensor %48178 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48180 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45778 = arith.constant 1 : index
    %dim_45779 = tensor.dim %48180, %c1_45778 : tensor<4x?x4096xf16>
    %48181 = flow.tensor.transfer %48180 : tensor<4x?x4096xf16>{%dim_45779} to #hal.device.promise<@__device_0>
    %48182 = torch_c.from_builtin_tensor %48181 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48183 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45780 = arith.constant 1 : index
    %dim_45781 = tensor.dim %48183, %c1_45780 : tensor<4x?x4096xf16>
    %48184 = flow.tensor.transfer %48183 : tensor<4x?x4096xf16>{%dim_45781} to #hal.device.promise<@__device_0>
    %48185 = torch_c.from_builtin_tensor %48184 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48186 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45782 = arith.constant 1 : index
    %dim_45783 = tensor.dim %48186, %c1_45782 : tensor<4x?x4096xf16>
    %48187 = flow.tensor.transfer %48186 : tensor<4x?x4096xf16>{%dim_45783} to #hal.device.promise<@__device_0>
    %48188 = torch_c.from_builtin_tensor %48187 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48189 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45784 = arith.constant 1 : index
    %dim_45785 = tensor.dim %48189, %c1_45784 : tensor<4x?x4096xf16>
    %48190 = flow.tensor.transfer %48189 : tensor<4x?x4096xf16>{%dim_45785} to #hal.device.promise<@__device_0>
    %48191 = torch_c.from_builtin_tensor %48190 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48192 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45786 = arith.constant 1 : index
    %dim_45787 = tensor.dim %48192, %c1_45786 : tensor<4x?x4096xf16>
    %48193 = flow.tensor.transfer %48192 : tensor<4x?x4096xf16>{%dim_45787} to #hal.device.promise<@__device_0>
    %48194 = torch_c.from_builtin_tensor %48193 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48195 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45788 = arith.constant 1 : index
    %dim_45789 = tensor.dim %48195, %c1_45788 : tensor<4x?x4096xf16>
    %48196 = flow.tensor.transfer %48195 : tensor<4x?x4096xf16>{%dim_45789} to #hal.device.promise<@__device_0>
    %48197 = torch_c.from_builtin_tensor %48196 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45790 = torch.constant.int 1
    %48198 = torch.aten.add.Tensor %48134, %48179, %int1_45790 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45791 = torch.constant.int 1
    %48199 = torch.aten.add.Tensor %48198, %48182, %int1_45791 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45792 = torch.constant.int 1
    %48200 = torch.aten.add.Tensor %48199, %48185, %int1_45792 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45793 = torch.constant.int 1
    %48201 = torch.aten.add.Tensor %48200, %48188, %int1_45793 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45794 = torch.constant.int 1
    %48202 = torch.aten.add.Tensor %48201, %48191, %int1_45794 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45795 = torch.constant.int 1
    %48203 = torch.aten.add.Tensor %48202, %48194, %int1_45795 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45796 = torch.constant.int 1
    %48204 = torch.aten.add.Tensor %48203, %48197, %int1_45796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48205 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45797 = arith.constant 1 : index
    %dim_45798 = tensor.dim %48205, %c1_45797 : tensor<4x?x4096xf16>
    %48206 = flow.tensor.transfer %48205 : tensor<4x?x4096xf16>{%dim_45798} to #hal.device.promise<@__device_1>
    %48207 = torch_c.from_builtin_tensor %48206 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48208 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45799 = arith.constant 1 : index
    %dim_45800 = tensor.dim %48208, %c1_45799 : tensor<4x?x4096xf16>
    %48209 = flow.tensor.transfer %48208 : tensor<4x?x4096xf16>{%dim_45800} to #hal.device.promise<@__device_1>
    %48210 = torch_c.from_builtin_tensor %48209 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48211 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45801 = arith.constant 1 : index
    %dim_45802 = tensor.dim %48211, %c1_45801 : tensor<4x?x4096xf16>
    %48212 = flow.tensor.transfer %48211 : tensor<4x?x4096xf16>{%dim_45802} to #hal.device.promise<@__device_1>
    %48213 = torch_c.from_builtin_tensor %48212 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48214 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45803 = arith.constant 1 : index
    %dim_45804 = tensor.dim %48214, %c1_45803 : tensor<4x?x4096xf16>
    %48215 = flow.tensor.transfer %48214 : tensor<4x?x4096xf16>{%dim_45804} to #hal.device.promise<@__device_1>
    %48216 = torch_c.from_builtin_tensor %48215 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48217 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45805 = arith.constant 1 : index
    %dim_45806 = tensor.dim %48217, %c1_45805 : tensor<4x?x4096xf16>
    %48218 = flow.tensor.transfer %48217 : tensor<4x?x4096xf16>{%dim_45806} to #hal.device.promise<@__device_1>
    %48219 = torch_c.from_builtin_tensor %48218 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48220 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45807 = arith.constant 1 : index
    %dim_45808 = tensor.dim %48220, %c1_45807 : tensor<4x?x4096xf16>
    %48221 = flow.tensor.transfer %48220 : tensor<4x?x4096xf16>{%dim_45808} to #hal.device.promise<@__device_1>
    %48222 = torch_c.from_builtin_tensor %48221 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48223 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45809 = arith.constant 1 : index
    %dim_45810 = tensor.dim %48223, %c1_45809 : tensor<4x?x4096xf16>
    %48224 = flow.tensor.transfer %48223 : tensor<4x?x4096xf16>{%dim_45810} to #hal.device.promise<@__device_1>
    %48225 = torch_c.from_builtin_tensor %48224 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45811 = torch.constant.int 1
    %48226 = torch.aten.add.Tensor %48207, %48140, %int1_45811 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45812 = torch.constant.int 1
    %48227 = torch.aten.add.Tensor %48226, %48210, %int1_45812 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45813 = torch.constant.int 1
    %48228 = torch.aten.add.Tensor %48227, %48213, %int1_45813 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45814 = torch.constant.int 1
    %48229 = torch.aten.add.Tensor %48228, %48216, %int1_45814 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45815 = torch.constant.int 1
    %48230 = torch.aten.add.Tensor %48229, %48219, %int1_45815 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45816 = torch.constant.int 1
    %48231 = torch.aten.add.Tensor %48230, %48222, %int1_45816 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45817 = torch.constant.int 1
    %48232 = torch.aten.add.Tensor %48231, %48225, %int1_45817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48233 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45818 = arith.constant 1 : index
    %dim_45819 = tensor.dim %48233, %c1_45818 : tensor<4x?x4096xf16>
    %48234 = flow.tensor.transfer %48233 : tensor<4x?x4096xf16>{%dim_45819} to #hal.device.promise<@__device_2>
    %48235 = torch_c.from_builtin_tensor %48234 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48236 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45820 = arith.constant 1 : index
    %dim_45821 = tensor.dim %48236, %c1_45820 : tensor<4x?x4096xf16>
    %48237 = flow.tensor.transfer %48236 : tensor<4x?x4096xf16>{%dim_45821} to #hal.device.promise<@__device_2>
    %48238 = torch_c.from_builtin_tensor %48237 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48239 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45822 = arith.constant 1 : index
    %dim_45823 = tensor.dim %48239, %c1_45822 : tensor<4x?x4096xf16>
    %48240 = flow.tensor.transfer %48239 : tensor<4x?x4096xf16>{%dim_45823} to #hal.device.promise<@__device_2>
    %48241 = torch_c.from_builtin_tensor %48240 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48242 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45824 = arith.constant 1 : index
    %dim_45825 = tensor.dim %48242, %c1_45824 : tensor<4x?x4096xf16>
    %48243 = flow.tensor.transfer %48242 : tensor<4x?x4096xf16>{%dim_45825} to #hal.device.promise<@__device_2>
    %48244 = torch_c.from_builtin_tensor %48243 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48245 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45826 = arith.constant 1 : index
    %dim_45827 = tensor.dim %48245, %c1_45826 : tensor<4x?x4096xf16>
    %48246 = flow.tensor.transfer %48245 : tensor<4x?x4096xf16>{%dim_45827} to #hal.device.promise<@__device_2>
    %48247 = torch_c.from_builtin_tensor %48246 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48248 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45828 = arith.constant 1 : index
    %dim_45829 = tensor.dim %48248, %c1_45828 : tensor<4x?x4096xf16>
    %48249 = flow.tensor.transfer %48248 : tensor<4x?x4096xf16>{%dim_45829} to #hal.device.promise<@__device_2>
    %48250 = torch_c.from_builtin_tensor %48249 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48251 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45830 = arith.constant 1 : index
    %dim_45831 = tensor.dim %48251, %c1_45830 : tensor<4x?x4096xf16>
    %48252 = flow.tensor.transfer %48251 : tensor<4x?x4096xf16>{%dim_45831} to #hal.device.promise<@__device_2>
    %48253 = torch_c.from_builtin_tensor %48252 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45832 = torch.constant.int 1
    %48254 = torch.aten.add.Tensor %48235, %48238, %int1_45832 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45833 = torch.constant.int 1
    %48255 = torch.aten.add.Tensor %48254, %48146, %int1_45833 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45834 = torch.constant.int 1
    %48256 = torch.aten.add.Tensor %48255, %48241, %int1_45834 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45835 = torch.constant.int 1
    %48257 = torch.aten.add.Tensor %48256, %48244, %int1_45835 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45836 = torch.constant.int 1
    %48258 = torch.aten.add.Tensor %48257, %48247, %int1_45836 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45837 = torch.constant.int 1
    %48259 = torch.aten.add.Tensor %48258, %48250, %int1_45837 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45838 = torch.constant.int 1
    %48260 = torch.aten.add.Tensor %48259, %48253, %int1_45838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48261 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45839 = arith.constant 1 : index
    %dim_45840 = tensor.dim %48261, %c1_45839 : tensor<4x?x4096xf16>
    %48262 = flow.tensor.transfer %48261 : tensor<4x?x4096xf16>{%dim_45840} to #hal.device.promise<@__device_3>
    %48263 = torch_c.from_builtin_tensor %48262 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48264 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45841 = arith.constant 1 : index
    %dim_45842 = tensor.dim %48264, %c1_45841 : tensor<4x?x4096xf16>
    %48265 = flow.tensor.transfer %48264 : tensor<4x?x4096xf16>{%dim_45842} to #hal.device.promise<@__device_3>
    %48266 = torch_c.from_builtin_tensor %48265 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48267 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45843 = arith.constant 1 : index
    %dim_45844 = tensor.dim %48267, %c1_45843 : tensor<4x?x4096xf16>
    %48268 = flow.tensor.transfer %48267 : tensor<4x?x4096xf16>{%dim_45844} to #hal.device.promise<@__device_3>
    %48269 = torch_c.from_builtin_tensor %48268 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48270 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45845 = arith.constant 1 : index
    %dim_45846 = tensor.dim %48270, %c1_45845 : tensor<4x?x4096xf16>
    %48271 = flow.tensor.transfer %48270 : tensor<4x?x4096xf16>{%dim_45846} to #hal.device.promise<@__device_3>
    %48272 = torch_c.from_builtin_tensor %48271 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48273 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45847 = arith.constant 1 : index
    %dim_45848 = tensor.dim %48273, %c1_45847 : tensor<4x?x4096xf16>
    %48274 = flow.tensor.transfer %48273 : tensor<4x?x4096xf16>{%dim_45848} to #hal.device.promise<@__device_3>
    %48275 = torch_c.from_builtin_tensor %48274 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48276 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45849 = arith.constant 1 : index
    %dim_45850 = tensor.dim %48276, %c1_45849 : tensor<4x?x4096xf16>
    %48277 = flow.tensor.transfer %48276 : tensor<4x?x4096xf16>{%dim_45850} to #hal.device.promise<@__device_3>
    %48278 = torch_c.from_builtin_tensor %48277 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48279 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45851 = arith.constant 1 : index
    %dim_45852 = tensor.dim %48279, %c1_45851 : tensor<4x?x4096xf16>
    %48280 = flow.tensor.transfer %48279 : tensor<4x?x4096xf16>{%dim_45852} to #hal.device.promise<@__device_3>
    %48281 = torch_c.from_builtin_tensor %48280 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45853 = torch.constant.int 1
    %48282 = torch.aten.add.Tensor %48263, %48266, %int1_45853 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45854 = torch.constant.int 1
    %48283 = torch.aten.add.Tensor %48282, %48269, %int1_45854 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45855 = torch.constant.int 1
    %48284 = torch.aten.add.Tensor %48283, %48152, %int1_45855 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45856 = torch.constant.int 1
    %48285 = torch.aten.add.Tensor %48284, %48272, %int1_45856 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45857 = torch.constant.int 1
    %48286 = torch.aten.add.Tensor %48285, %48275, %int1_45857 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45858 = torch.constant.int 1
    %48287 = torch.aten.add.Tensor %48286, %48278, %int1_45858 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45859 = torch.constant.int 1
    %48288 = torch.aten.add.Tensor %48287, %48281, %int1_45859 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48289 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45860 = arith.constant 1 : index
    %dim_45861 = tensor.dim %48289, %c1_45860 : tensor<4x?x4096xf16>
    %48290 = flow.tensor.transfer %48289 : tensor<4x?x4096xf16>{%dim_45861} to #hal.device.promise<@__device_4>
    %48291 = torch_c.from_builtin_tensor %48290 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48292 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45862 = arith.constant 1 : index
    %dim_45863 = tensor.dim %48292, %c1_45862 : tensor<4x?x4096xf16>
    %48293 = flow.tensor.transfer %48292 : tensor<4x?x4096xf16>{%dim_45863} to #hal.device.promise<@__device_4>
    %48294 = torch_c.from_builtin_tensor %48293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48295 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45864 = arith.constant 1 : index
    %dim_45865 = tensor.dim %48295, %c1_45864 : tensor<4x?x4096xf16>
    %48296 = flow.tensor.transfer %48295 : tensor<4x?x4096xf16>{%dim_45865} to #hal.device.promise<@__device_4>
    %48297 = torch_c.from_builtin_tensor %48296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48298 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45866 = arith.constant 1 : index
    %dim_45867 = tensor.dim %48298, %c1_45866 : tensor<4x?x4096xf16>
    %48299 = flow.tensor.transfer %48298 : tensor<4x?x4096xf16>{%dim_45867} to #hal.device.promise<@__device_4>
    %48300 = torch_c.from_builtin_tensor %48299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48301 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45868 = arith.constant 1 : index
    %dim_45869 = tensor.dim %48301, %c1_45868 : tensor<4x?x4096xf16>
    %48302 = flow.tensor.transfer %48301 : tensor<4x?x4096xf16>{%dim_45869} to #hal.device.promise<@__device_4>
    %48303 = torch_c.from_builtin_tensor %48302 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48304 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45870 = arith.constant 1 : index
    %dim_45871 = tensor.dim %48304, %c1_45870 : tensor<4x?x4096xf16>
    %48305 = flow.tensor.transfer %48304 : tensor<4x?x4096xf16>{%dim_45871} to #hal.device.promise<@__device_4>
    %48306 = torch_c.from_builtin_tensor %48305 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48307 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45872 = arith.constant 1 : index
    %dim_45873 = tensor.dim %48307, %c1_45872 : tensor<4x?x4096xf16>
    %48308 = flow.tensor.transfer %48307 : tensor<4x?x4096xf16>{%dim_45873} to #hal.device.promise<@__device_4>
    %48309 = torch_c.from_builtin_tensor %48308 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45874 = torch.constant.int 1
    %48310 = torch.aten.add.Tensor %48291, %48294, %int1_45874 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45875 = torch.constant.int 1
    %48311 = torch.aten.add.Tensor %48310, %48297, %int1_45875 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45876 = torch.constant.int 1
    %48312 = torch.aten.add.Tensor %48311, %48300, %int1_45876 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45877 = torch.constant.int 1
    %48313 = torch.aten.add.Tensor %48312, %48158, %int1_45877 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45878 = torch.constant.int 1
    %48314 = torch.aten.add.Tensor %48313, %48303, %int1_45878 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45879 = torch.constant.int 1
    %48315 = torch.aten.add.Tensor %48314, %48306, %int1_45879 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45880 = torch.constant.int 1
    %48316 = torch.aten.add.Tensor %48315, %48309, %int1_45880 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48317 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45881 = arith.constant 1 : index
    %dim_45882 = tensor.dim %48317, %c1_45881 : tensor<4x?x4096xf16>
    %48318 = flow.tensor.transfer %48317 : tensor<4x?x4096xf16>{%dim_45882} to #hal.device.promise<@__device_5>
    %48319 = torch_c.from_builtin_tensor %48318 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48320 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45883 = arith.constant 1 : index
    %dim_45884 = tensor.dim %48320, %c1_45883 : tensor<4x?x4096xf16>
    %48321 = flow.tensor.transfer %48320 : tensor<4x?x4096xf16>{%dim_45884} to #hal.device.promise<@__device_5>
    %48322 = torch_c.from_builtin_tensor %48321 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48323 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45885 = arith.constant 1 : index
    %dim_45886 = tensor.dim %48323, %c1_45885 : tensor<4x?x4096xf16>
    %48324 = flow.tensor.transfer %48323 : tensor<4x?x4096xf16>{%dim_45886} to #hal.device.promise<@__device_5>
    %48325 = torch_c.from_builtin_tensor %48324 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48326 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45887 = arith.constant 1 : index
    %dim_45888 = tensor.dim %48326, %c1_45887 : tensor<4x?x4096xf16>
    %48327 = flow.tensor.transfer %48326 : tensor<4x?x4096xf16>{%dim_45888} to #hal.device.promise<@__device_5>
    %48328 = torch_c.from_builtin_tensor %48327 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48329 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45889 = arith.constant 1 : index
    %dim_45890 = tensor.dim %48329, %c1_45889 : tensor<4x?x4096xf16>
    %48330 = flow.tensor.transfer %48329 : tensor<4x?x4096xf16>{%dim_45890} to #hal.device.promise<@__device_5>
    %48331 = torch_c.from_builtin_tensor %48330 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48332 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45891 = arith.constant 1 : index
    %dim_45892 = tensor.dim %48332, %c1_45891 : tensor<4x?x4096xf16>
    %48333 = flow.tensor.transfer %48332 : tensor<4x?x4096xf16>{%dim_45892} to #hal.device.promise<@__device_5>
    %48334 = torch_c.from_builtin_tensor %48333 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48335 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45893 = arith.constant 1 : index
    %dim_45894 = tensor.dim %48335, %c1_45893 : tensor<4x?x4096xf16>
    %48336 = flow.tensor.transfer %48335 : tensor<4x?x4096xf16>{%dim_45894} to #hal.device.promise<@__device_5>
    %48337 = torch_c.from_builtin_tensor %48336 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45895 = torch.constant.int 1
    %48338 = torch.aten.add.Tensor %48319, %48322, %int1_45895 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45896 = torch.constant.int 1
    %48339 = torch.aten.add.Tensor %48338, %48325, %int1_45896 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45897 = torch.constant.int 1
    %48340 = torch.aten.add.Tensor %48339, %48328, %int1_45897 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45898 = torch.constant.int 1
    %48341 = torch.aten.add.Tensor %48340, %48331, %int1_45898 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45899 = torch.constant.int 1
    %48342 = torch.aten.add.Tensor %48341, %48164, %int1_45899 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45900 = torch.constant.int 1
    %48343 = torch.aten.add.Tensor %48342, %48334, %int1_45900 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45901 = torch.constant.int 1
    %48344 = torch.aten.add.Tensor %48343, %48337, %int1_45901 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48345 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45902 = arith.constant 1 : index
    %dim_45903 = tensor.dim %48345, %c1_45902 : tensor<4x?x4096xf16>
    %48346 = flow.tensor.transfer %48345 : tensor<4x?x4096xf16>{%dim_45903} to #hal.device.promise<@__device_6>
    %48347 = torch_c.from_builtin_tensor %48346 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48348 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45904 = arith.constant 1 : index
    %dim_45905 = tensor.dim %48348, %c1_45904 : tensor<4x?x4096xf16>
    %48349 = flow.tensor.transfer %48348 : tensor<4x?x4096xf16>{%dim_45905} to #hal.device.promise<@__device_6>
    %48350 = torch_c.from_builtin_tensor %48349 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48351 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45906 = arith.constant 1 : index
    %dim_45907 = tensor.dim %48351, %c1_45906 : tensor<4x?x4096xf16>
    %48352 = flow.tensor.transfer %48351 : tensor<4x?x4096xf16>{%dim_45907} to #hal.device.promise<@__device_6>
    %48353 = torch_c.from_builtin_tensor %48352 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48354 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45908 = arith.constant 1 : index
    %dim_45909 = tensor.dim %48354, %c1_45908 : tensor<4x?x4096xf16>
    %48355 = flow.tensor.transfer %48354 : tensor<4x?x4096xf16>{%dim_45909} to #hal.device.promise<@__device_6>
    %48356 = torch_c.from_builtin_tensor %48355 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48357 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45910 = arith.constant 1 : index
    %dim_45911 = tensor.dim %48357, %c1_45910 : tensor<4x?x4096xf16>
    %48358 = flow.tensor.transfer %48357 : tensor<4x?x4096xf16>{%dim_45911} to #hal.device.promise<@__device_6>
    %48359 = torch_c.from_builtin_tensor %48358 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48360 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45912 = arith.constant 1 : index
    %dim_45913 = tensor.dim %48360, %c1_45912 : tensor<4x?x4096xf16>
    %48361 = flow.tensor.transfer %48360 : tensor<4x?x4096xf16>{%dim_45913} to #hal.device.promise<@__device_6>
    %48362 = torch_c.from_builtin_tensor %48361 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48363 = torch_c.to_builtin_tensor %48176 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45914 = arith.constant 1 : index
    %dim_45915 = tensor.dim %48363, %c1_45914 : tensor<4x?x4096xf16>
    %48364 = flow.tensor.transfer %48363 : tensor<4x?x4096xf16>{%dim_45915} to #hal.device.promise<@__device_6>
    %48365 = torch_c.from_builtin_tensor %48364 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45916 = torch.constant.int 1
    %48366 = torch.aten.add.Tensor %48347, %48350, %int1_45916 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45917 = torch.constant.int 1
    %48367 = torch.aten.add.Tensor %48366, %48353, %int1_45917 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45918 = torch.constant.int 1
    %48368 = torch.aten.add.Tensor %48367, %48356, %int1_45918 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45919 = torch.constant.int 1
    %48369 = torch.aten.add.Tensor %48368, %48359, %int1_45919 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45920 = torch.constant.int 1
    %48370 = torch.aten.add.Tensor %48369, %48362, %int1_45920 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45921 = torch.constant.int 1
    %48371 = torch.aten.add.Tensor %48370, %48170, %int1_45921 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45922 = torch.constant.int 1
    %48372 = torch.aten.add.Tensor %48371, %48365, %int1_45922 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48373 = torch_c.to_builtin_tensor %48134 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45923 = arith.constant 1 : index
    %dim_45924 = tensor.dim %48373, %c1_45923 : tensor<4x?x4096xf16>
    %48374 = flow.tensor.transfer %48373 : tensor<4x?x4096xf16>{%dim_45924} to #hal.device.promise<@__device_7>
    %48375 = torch_c.from_builtin_tensor %48374 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48376 = torch_c.to_builtin_tensor %48140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45925 = arith.constant 1 : index
    %dim_45926 = tensor.dim %48376, %c1_45925 : tensor<4x?x4096xf16>
    %48377 = flow.tensor.transfer %48376 : tensor<4x?x4096xf16>{%dim_45926} to #hal.device.promise<@__device_7>
    %48378 = torch_c.from_builtin_tensor %48377 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48379 = torch_c.to_builtin_tensor %48146 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45927 = arith.constant 1 : index
    %dim_45928 = tensor.dim %48379, %c1_45927 : tensor<4x?x4096xf16>
    %48380 = flow.tensor.transfer %48379 : tensor<4x?x4096xf16>{%dim_45928} to #hal.device.promise<@__device_7>
    %48381 = torch_c.from_builtin_tensor %48380 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48382 = torch_c.to_builtin_tensor %48152 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45929 = arith.constant 1 : index
    %dim_45930 = tensor.dim %48382, %c1_45929 : tensor<4x?x4096xf16>
    %48383 = flow.tensor.transfer %48382 : tensor<4x?x4096xf16>{%dim_45930} to #hal.device.promise<@__device_7>
    %48384 = torch_c.from_builtin_tensor %48383 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48385 = torch_c.to_builtin_tensor %48158 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45931 = arith.constant 1 : index
    %dim_45932 = tensor.dim %48385, %c1_45931 : tensor<4x?x4096xf16>
    %48386 = flow.tensor.transfer %48385 : tensor<4x?x4096xf16>{%dim_45932} to #hal.device.promise<@__device_7>
    %48387 = torch_c.from_builtin_tensor %48386 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48388 = torch_c.to_builtin_tensor %48164 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45933 = arith.constant 1 : index
    %dim_45934 = tensor.dim %48388, %c1_45933 : tensor<4x?x4096xf16>
    %48389 = flow.tensor.transfer %48388 : tensor<4x?x4096xf16>{%dim_45934} to #hal.device.promise<@__device_7>
    %48390 = torch_c.from_builtin_tensor %48389 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48391 = torch_c.to_builtin_tensor %48170 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_45935 = arith.constant 1 : index
    %dim_45936 = tensor.dim %48391, %c1_45935 : tensor<4x?x4096xf16>
    %48392 = flow.tensor.transfer %48391 : tensor<4x?x4096xf16>{%dim_45936} to #hal.device.promise<@__device_7>
    %48393 = torch_c.from_builtin_tensor %48392 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45937 = torch.constant.int 1
    %48394 = torch.aten.add.Tensor %48375, %48378, %int1_45937 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45938 = torch.constant.int 1
    %48395 = torch.aten.add.Tensor %48394, %48381, %int1_45938 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45939 = torch.constant.int 1
    %48396 = torch.aten.add.Tensor %48395, %48384, %int1_45939 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45940 = torch.constant.int 1
    %48397 = torch.aten.add.Tensor %48396, %48387, %int1_45940 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45941 = torch.constant.int 1
    %48398 = torch.aten.add.Tensor %48397, %48390, %int1_45941 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45942 = torch.constant.int 1
    %48399 = torch.aten.add.Tensor %48398, %48393, %int1_45942 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45943 = torch.constant.int 1
    %48400 = torch.aten.add.Tensor %48399, %48176, %int1_45943 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45944 = torch.constant.int 1
    %48401 = torch.aten.add.Tensor %47060, %48204, %int1_45944 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45945 = torch.constant.int 1
    %48402 = torch.aten.add.Tensor %47061, %48232, %int1_45945 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45946 = torch.constant.int 1
    %48403 = torch.aten.add.Tensor %47062, %48260, %int1_45946 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45947 = torch.constant.int 1
    %48404 = torch.aten.add.Tensor %47063, %48288, %int1_45947 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45948 = torch.constant.int 1
    %48405 = torch.aten.add.Tensor %47064, %48316, %int1_45948 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45949 = torch.constant.int 1
    %48406 = torch.aten.add.Tensor %47065, %48344, %int1_45949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45950 = torch.constant.int 1
    %48407 = torch.aten.add.Tensor %47066, %48372, %int1_45950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_45951 = torch.constant.int 1
    %48408 = torch.aten.add.Tensor %47067, %48400, %int1_45951 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_45952 = torch.constant.int 6
    %48409 = torch.prims.convert_element_type %48401, %int6_45952 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45953 = torch.constant.int 6
    %48410 = torch.prims.convert_element_type %48402, %int6_45953 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45954 = torch.constant.int 6
    %48411 = torch.prims.convert_element_type %48403, %int6_45954 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45955 = torch.constant.int 6
    %48412 = torch.prims.convert_element_type %48404, %int6_45955 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45956 = torch.constant.int 6
    %48413 = torch.prims.convert_element_type %48405, %int6_45956 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45957 = torch.constant.int 6
    %48414 = torch.prims.convert_element_type %48406, %int6_45957 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45958 = torch.constant.int 6
    %48415 = torch.prims.convert_element_type %48407, %int6_45958 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_45959 = torch.constant.int 6
    %48416 = torch.prims.convert_element_type %48408, %int6_45959 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45960 = torch.constant.int 2
    %48417 = torch.aten.pow.Tensor_Scalar %48409, %int2_45960 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45961 = torch.constant.int 2
    %48418 = torch.aten.pow.Tensor_Scalar %48410, %int2_45961 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45962 = torch.constant.int 2
    %48419 = torch.aten.pow.Tensor_Scalar %48411, %int2_45962 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45963 = torch.constant.int 2
    %48420 = torch.aten.pow.Tensor_Scalar %48412, %int2_45963 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45964 = torch.constant.int 2
    %48421 = torch.aten.pow.Tensor_Scalar %48413, %int2_45964 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45965 = torch.constant.int 2
    %48422 = torch.aten.pow.Tensor_Scalar %48414, %int2_45965 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45966 = torch.constant.int 2
    %48423 = torch.aten.pow.Tensor_Scalar %48415, %int2_45966 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_45967 = torch.constant.int 2
    %48424 = torch.aten.pow.Tensor_Scalar %48416, %int2_45967 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_45968 = torch.constant.int -1
    %48425 = torch.prim.ListConstruct %int-1_45968 : (!torch.int) -> !torch.list<int>
    %true_45969 = torch.constant.bool true
    %none_45970 = torch.constant.none
    %48426 = torch.aten.mean.dim %48417, %48425, %true_45969, %none_45970 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45971 = torch.constant.int -1
    %48427 = torch.prim.ListConstruct %int-1_45971 : (!torch.int) -> !torch.list<int>
    %true_45972 = torch.constant.bool true
    %none_45973 = torch.constant.none
    %48428 = torch.aten.mean.dim %48418, %48427, %true_45972, %none_45973 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45974 = torch.constant.int -1
    %48429 = torch.prim.ListConstruct %int-1_45974 : (!torch.int) -> !torch.list<int>
    %true_45975 = torch.constant.bool true
    %none_45976 = torch.constant.none
    %48430 = torch.aten.mean.dim %48419, %48429, %true_45975, %none_45976 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45977 = torch.constant.int -1
    %48431 = torch.prim.ListConstruct %int-1_45977 : (!torch.int) -> !torch.list<int>
    %true_45978 = torch.constant.bool true
    %none_45979 = torch.constant.none
    %48432 = torch.aten.mean.dim %48420, %48431, %true_45978, %none_45979 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45980 = torch.constant.int -1
    %48433 = torch.prim.ListConstruct %int-1_45980 : (!torch.int) -> !torch.list<int>
    %true_45981 = torch.constant.bool true
    %none_45982 = torch.constant.none
    %48434 = torch.aten.mean.dim %48421, %48433, %true_45981, %none_45982 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45983 = torch.constant.int -1
    %48435 = torch.prim.ListConstruct %int-1_45983 : (!torch.int) -> !torch.list<int>
    %true_45984 = torch.constant.bool true
    %none_45985 = torch.constant.none
    %48436 = torch.aten.mean.dim %48422, %48435, %true_45984, %none_45985 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45986 = torch.constant.int -1
    %48437 = torch.prim.ListConstruct %int-1_45986 : (!torch.int) -> !torch.list<int>
    %true_45987 = torch.constant.bool true
    %none_45988 = torch.constant.none
    %48438 = torch.aten.mean.dim %48423, %48437, %true_45987, %none_45988 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_45989 = torch.constant.int -1
    %48439 = torch.prim.ListConstruct %int-1_45989 : (!torch.int) -> !torch.list<int>
    %true_45990 = torch.constant.bool true
    %none_45991 = torch.constant.none
    %48440 = torch.aten.mean.dim %48424, %48439, %true_45990, %none_45991 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_45992 = torch.constant.float 9.9999997473787516E-6
    %int1_45993 = torch.constant.int 1
    %48441 = torch.aten.add.Scalar %48426, %float9.999990e-06_45992, %int1_45993 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_45994 = torch.constant.float 9.9999997473787516E-6
    %int1_45995 = torch.constant.int 1
    %48442 = torch.aten.add.Scalar %48428, %float9.999990e-06_45994, %int1_45995 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_45996 = torch.constant.float 9.9999997473787516E-6
    %int1_45997 = torch.constant.int 1
    %48443 = torch.aten.add.Scalar %48430, %float9.999990e-06_45996, %int1_45997 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_45998 = torch.constant.float 9.9999997473787516E-6
    %int1_45999 = torch.constant.int 1
    %48444 = torch.aten.add.Scalar %48432, %float9.999990e-06_45998, %int1_45999 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46000 = torch.constant.float 9.9999997473787516E-6
    %int1_46001 = torch.constant.int 1
    %48445 = torch.aten.add.Scalar %48434, %float9.999990e-06_46000, %int1_46001 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46002 = torch.constant.float 9.9999997473787516E-6
    %int1_46003 = torch.constant.int 1
    %48446 = torch.aten.add.Scalar %48436, %float9.999990e-06_46002, %int1_46003 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46004 = torch.constant.float 9.9999997473787516E-6
    %int1_46005 = torch.constant.int 1
    %48447 = torch.aten.add.Scalar %48438, %float9.999990e-06_46004, %int1_46005 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46006 = torch.constant.float 9.9999997473787516E-6
    %int1_46007 = torch.constant.int 1
    %48448 = torch.aten.add.Scalar %48440, %float9.999990e-06_46006, %int1_46007 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48449 = torch.aten.rsqrt %48441 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48450 = torch.aten.rsqrt %48442 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48451 = torch.aten.rsqrt %48443 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48452 = torch.aten.rsqrt %48444 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48453 = torch.aten.rsqrt %48445 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48454 = torch.aten.rsqrt %48446 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48455 = torch.aten.rsqrt %48447 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48456 = torch.aten.rsqrt %48448 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48457 = torch.aten.mul.Tensor %48409, %48449 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48458 = torch.aten.mul.Tensor %48410, %48450 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48459 = torch.aten.mul.Tensor %48411, %48451 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48460 = torch.aten.mul.Tensor %48412, %48452 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48461 = torch.aten.mul.Tensor %48413, %48453 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48462 = torch.aten.mul.Tensor %48414, %48454 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48463 = torch.aten.mul.Tensor %48415, %48455 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48464 = torch.aten.mul.Tensor %48416, %48456 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48465 = torch.aten.mul.Tensor %1776, %48457 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48466 = torch.aten.mul.Tensor %1777, %48458 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48467 = torch.aten.mul.Tensor %1778, %48459 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48468 = torch.aten.mul.Tensor %1779, %48460 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48469 = torch.aten.mul.Tensor %1780, %48461 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48470 = torch.aten.mul.Tensor %1781, %48462 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48471 = torch.aten.mul.Tensor %1782, %48463 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48472 = torch.aten.mul.Tensor %1783, %48464 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_46008 = torch.constant.int 5
    %48473 = torch.prims.convert_element_type %48465, %int5_46008 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46009 = torch.constant.int 5
    %48474 = torch.prims.convert_element_type %48466, %int5_46009 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46010 = torch.constant.int 5
    %48475 = torch.prims.convert_element_type %48467, %int5_46010 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46011 = torch.constant.int 5
    %48476 = torch.prims.convert_element_type %48468, %int5_46011 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46012 = torch.constant.int 5
    %48477 = torch.prims.convert_element_type %48469, %int5_46012 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46013 = torch.constant.int 5
    %48478 = torch.prims.convert_element_type %48470, %int5_46013 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46014 = torch.constant.int 5
    %48479 = torch.prims.convert_element_type %48471, %int5_46014 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46015 = torch.constant.int 5
    %48480 = torch.prims.convert_element_type %48472, %int5_46015 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46016 = torch.constant.int 1
    %int0_46017 = torch.constant.int 0
    %48481 = torch.prim.ListConstruct %int1_46016, %int0_46017 : (!torch.int, !torch.int) -> !torch.list<int>
    %48482 = torch.aten.permute %1784, %48481 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46018 = torch.constant.int 1
    %int0_46019 = torch.constant.int 0
    %48483 = torch.prim.ListConstruct %int1_46018, %int0_46019 : (!torch.int, !torch.int) -> !torch.list<int>
    %48484 = torch.aten.permute %1785, %48483 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46020 = torch.constant.int 1
    %int0_46021 = torch.constant.int 0
    %48485 = torch.prim.ListConstruct %int1_46020, %int0_46021 : (!torch.int, !torch.int) -> !torch.list<int>
    %48486 = torch.aten.permute %1786, %48485 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46022 = torch.constant.int 1
    %int0_46023 = torch.constant.int 0
    %48487 = torch.prim.ListConstruct %int1_46022, %int0_46023 : (!torch.int, !torch.int) -> !torch.list<int>
    %48488 = torch.aten.permute %1787, %48487 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46024 = torch.constant.int 1
    %int0_46025 = torch.constant.int 0
    %48489 = torch.prim.ListConstruct %int1_46024, %int0_46025 : (!torch.int, !torch.int) -> !torch.list<int>
    %48490 = torch.aten.permute %1788, %48489 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46026 = torch.constant.int 1
    %int0_46027 = torch.constant.int 0
    %48491 = torch.prim.ListConstruct %int1_46026, %int0_46027 : (!torch.int, !torch.int) -> !torch.list<int>
    %48492 = torch.aten.permute %1789, %48491 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46028 = torch.constant.int 1
    %int0_46029 = torch.constant.int 0
    %48493 = torch.prim.ListConstruct %int1_46028, %int0_46029 : (!torch.int, !torch.int) -> !torch.list<int>
    %48494 = torch.aten.permute %1790, %48493 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46030 = torch.constant.int 1
    %int0_46031 = torch.constant.int 0
    %48495 = torch.prim.ListConstruct %int1_46030, %int0_46031 : (!torch.int, !torch.int) -> !torch.list<int>
    %48496 = torch.aten.permute %1791, %48495 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_46032 = torch.constant.int 4
    %48497 = torch.aten.mul.int %int4_46032, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46033 = torch.constant.int 4096
    %48498 = torch.prim.ListConstruct %48497, %int4096_46033 : (!torch.int, !torch.int) -> !torch.list<int>
    %48499 = torch.aten.view %48473, %48498 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48499, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48500 = torch.aten.mm %48499, %48482 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48500, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46034 = torch.constant.int 4
    %int1792_46035 = torch.constant.int 1792
    %48501 = torch.prim.ListConstruct %int4_46034, %2482, %int1792_46035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48502 = torch.aten.view %48500, %48501 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46036 = torch.constant.int 4
    %48503 = torch.aten.mul.int %int4_46036, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46037 = torch.constant.int 4096
    %48504 = torch.prim.ListConstruct %48503, %int4096_46037 : (!torch.int, !torch.int) -> !torch.list<int>
    %48505 = torch.aten.view %48474, %48504 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48505, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48506 = torch.aten.mm %48505, %48484 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48506, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46038 = torch.constant.int 4
    %int1792_46039 = torch.constant.int 1792
    %48507 = torch.prim.ListConstruct %int4_46038, %2482, %int1792_46039 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48508 = torch.aten.view %48506, %48507 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46040 = torch.constant.int 4
    %48509 = torch.aten.mul.int %int4_46040, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46041 = torch.constant.int 4096
    %48510 = torch.prim.ListConstruct %48509, %int4096_46041 : (!torch.int, !torch.int) -> !torch.list<int>
    %48511 = torch.aten.view %48475, %48510 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48511, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48512 = torch.aten.mm %48511, %48486 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48512, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46042 = torch.constant.int 4
    %int1792_46043 = torch.constant.int 1792
    %48513 = torch.prim.ListConstruct %int4_46042, %2482, %int1792_46043 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48514 = torch.aten.view %48512, %48513 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46044 = torch.constant.int 4
    %48515 = torch.aten.mul.int %int4_46044, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46045 = torch.constant.int 4096
    %48516 = torch.prim.ListConstruct %48515, %int4096_46045 : (!torch.int, !torch.int) -> !torch.list<int>
    %48517 = torch.aten.view %48476, %48516 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48517, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48518 = torch.aten.mm %48517, %48488 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48518, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46046 = torch.constant.int 4
    %int1792_46047 = torch.constant.int 1792
    %48519 = torch.prim.ListConstruct %int4_46046, %2482, %int1792_46047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48520 = torch.aten.view %48518, %48519 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46048 = torch.constant.int 4
    %48521 = torch.aten.mul.int %int4_46048, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46049 = torch.constant.int 4096
    %48522 = torch.prim.ListConstruct %48521, %int4096_46049 : (!torch.int, !torch.int) -> !torch.list<int>
    %48523 = torch.aten.view %48477, %48522 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48523, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48524 = torch.aten.mm %48523, %48490 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48524, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46050 = torch.constant.int 4
    %int1792_46051 = torch.constant.int 1792
    %48525 = torch.prim.ListConstruct %int4_46050, %2482, %int1792_46051 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48526 = torch.aten.view %48524, %48525 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46052 = torch.constant.int 4
    %48527 = torch.aten.mul.int %int4_46052, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46053 = torch.constant.int 4096
    %48528 = torch.prim.ListConstruct %48527, %int4096_46053 : (!torch.int, !torch.int) -> !torch.list<int>
    %48529 = torch.aten.view %48478, %48528 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48529, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48530 = torch.aten.mm %48529, %48492 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48530, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46054 = torch.constant.int 4
    %int1792_46055 = torch.constant.int 1792
    %48531 = torch.prim.ListConstruct %int4_46054, %2482, %int1792_46055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48532 = torch.aten.view %48530, %48531 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46056 = torch.constant.int 4
    %48533 = torch.aten.mul.int %int4_46056, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46057 = torch.constant.int 4096
    %48534 = torch.prim.ListConstruct %48533, %int4096_46057 : (!torch.int, !torch.int) -> !torch.list<int>
    %48535 = torch.aten.view %48479, %48534 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48535, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48536 = torch.aten.mm %48535, %48494 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48536, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46058 = torch.constant.int 4
    %int1792_46059 = torch.constant.int 1792
    %48537 = torch.prim.ListConstruct %int4_46058, %2482, %int1792_46059 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48538 = torch.aten.view %48536, %48537 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46060 = torch.constant.int 4
    %48539 = torch.aten.mul.int %int4_46060, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46061 = torch.constant.int 4096
    %48540 = torch.prim.ListConstruct %48539, %int4096_46061 : (!torch.int, !torch.int) -> !torch.list<int>
    %48541 = torch.aten.view %48480, %48540 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48541, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48542 = torch.aten.mm %48541, %48496 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48542, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46062 = torch.constant.int 4
    %int1792_46063 = torch.constant.int 1792
    %48543 = torch.prim.ListConstruct %int4_46062, %2482, %int1792_46063 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48544 = torch.aten.view %48542, %48543 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48545 = torch.aten.silu %48502 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48546 = torch.aten.silu %48508 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48547 = torch.aten.silu %48514 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48548 = torch.aten.silu %48520 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48549 = torch.aten.silu %48526 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48550 = torch.aten.silu %48532 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48551 = torch.aten.silu %48538 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48552 = torch.aten.silu %48544 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_46064 = torch.constant.int 1
    %int0_46065 = torch.constant.int 0
    %48553 = torch.prim.ListConstruct %int1_46064, %int0_46065 : (!torch.int, !torch.int) -> !torch.list<int>
    %48554 = torch.aten.permute %1792, %48553 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46066 = torch.constant.int 1
    %int0_46067 = torch.constant.int 0
    %48555 = torch.prim.ListConstruct %int1_46066, %int0_46067 : (!torch.int, !torch.int) -> !torch.list<int>
    %48556 = torch.aten.permute %1793, %48555 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46068 = torch.constant.int 1
    %int0_46069 = torch.constant.int 0
    %48557 = torch.prim.ListConstruct %int1_46068, %int0_46069 : (!torch.int, !torch.int) -> !torch.list<int>
    %48558 = torch.aten.permute %1794, %48557 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46070 = torch.constant.int 1
    %int0_46071 = torch.constant.int 0
    %48559 = torch.prim.ListConstruct %int1_46070, %int0_46071 : (!torch.int, !torch.int) -> !torch.list<int>
    %48560 = torch.aten.permute %1795, %48559 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46072 = torch.constant.int 1
    %int0_46073 = torch.constant.int 0
    %48561 = torch.prim.ListConstruct %int1_46072, %int0_46073 : (!torch.int, !torch.int) -> !torch.list<int>
    %48562 = torch.aten.permute %1796, %48561 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46074 = torch.constant.int 1
    %int0_46075 = torch.constant.int 0
    %48563 = torch.prim.ListConstruct %int1_46074, %int0_46075 : (!torch.int, !torch.int) -> !torch.list<int>
    %48564 = torch.aten.permute %1797, %48563 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46076 = torch.constant.int 1
    %int0_46077 = torch.constant.int 0
    %48565 = torch.prim.ListConstruct %int1_46076, %int0_46077 : (!torch.int, !torch.int) -> !torch.list<int>
    %48566 = torch.aten.permute %1798, %48565 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_46078 = torch.constant.int 1
    %int0_46079 = torch.constant.int 0
    %48567 = torch.prim.ListConstruct %int1_46078, %int0_46079 : (!torch.int, !torch.int) -> !torch.list<int>
    %48568 = torch.aten.permute %1799, %48567 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_46080 = torch.constant.int 4
    %48569 = torch.aten.mul.int %int4_46080, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46081 = torch.constant.int 4096
    %48570 = torch.prim.ListConstruct %48569, %int4096_46081 : (!torch.int, !torch.int) -> !torch.list<int>
    %48571 = torch.aten.view %48473, %48570 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48571, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48572 = torch.aten.mm %48571, %48554 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48572, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46082 = torch.constant.int 4
    %int1792_46083 = torch.constant.int 1792
    %48573 = torch.prim.ListConstruct %int4_46082, %2482, %int1792_46083 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48574 = torch.aten.view %48572, %48573 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46084 = torch.constant.int 4
    %48575 = torch.aten.mul.int %int4_46084, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46085 = torch.constant.int 4096
    %48576 = torch.prim.ListConstruct %48575, %int4096_46085 : (!torch.int, !torch.int) -> !torch.list<int>
    %48577 = torch.aten.view %48474, %48576 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48577, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48578 = torch.aten.mm %48577, %48556 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48578, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46086 = torch.constant.int 4
    %int1792_46087 = torch.constant.int 1792
    %48579 = torch.prim.ListConstruct %int4_46086, %2482, %int1792_46087 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48580 = torch.aten.view %48578, %48579 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46088 = torch.constant.int 4
    %48581 = torch.aten.mul.int %int4_46088, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46089 = torch.constant.int 4096
    %48582 = torch.prim.ListConstruct %48581, %int4096_46089 : (!torch.int, !torch.int) -> !torch.list<int>
    %48583 = torch.aten.view %48475, %48582 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48583, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48584 = torch.aten.mm %48583, %48558 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48584, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46090 = torch.constant.int 4
    %int1792_46091 = torch.constant.int 1792
    %48585 = torch.prim.ListConstruct %int4_46090, %2482, %int1792_46091 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48586 = torch.aten.view %48584, %48585 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46092 = torch.constant.int 4
    %48587 = torch.aten.mul.int %int4_46092, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46093 = torch.constant.int 4096
    %48588 = torch.prim.ListConstruct %48587, %int4096_46093 : (!torch.int, !torch.int) -> !torch.list<int>
    %48589 = torch.aten.view %48476, %48588 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48589, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48590 = torch.aten.mm %48589, %48560 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48590, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46094 = torch.constant.int 4
    %int1792_46095 = torch.constant.int 1792
    %48591 = torch.prim.ListConstruct %int4_46094, %2482, %int1792_46095 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48592 = torch.aten.view %48590, %48591 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46096 = torch.constant.int 4
    %48593 = torch.aten.mul.int %int4_46096, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46097 = torch.constant.int 4096
    %48594 = torch.prim.ListConstruct %48593, %int4096_46097 : (!torch.int, !torch.int) -> !torch.list<int>
    %48595 = torch.aten.view %48477, %48594 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48595, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48596 = torch.aten.mm %48595, %48562 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48596, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46098 = torch.constant.int 4
    %int1792_46099 = torch.constant.int 1792
    %48597 = torch.prim.ListConstruct %int4_46098, %2482, %int1792_46099 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48598 = torch.aten.view %48596, %48597 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46100 = torch.constant.int 4
    %48599 = torch.aten.mul.int %int4_46100, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46101 = torch.constant.int 4096
    %48600 = torch.prim.ListConstruct %48599, %int4096_46101 : (!torch.int, !torch.int) -> !torch.list<int>
    %48601 = torch.aten.view %48478, %48600 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48601, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48602 = torch.aten.mm %48601, %48564 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48602, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46102 = torch.constant.int 4
    %int1792_46103 = torch.constant.int 1792
    %48603 = torch.prim.ListConstruct %int4_46102, %2482, %int1792_46103 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48604 = torch.aten.view %48602, %48603 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46104 = torch.constant.int 4
    %48605 = torch.aten.mul.int %int4_46104, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46105 = torch.constant.int 4096
    %48606 = torch.prim.ListConstruct %48605, %int4096_46105 : (!torch.int, !torch.int) -> !torch.list<int>
    %48607 = torch.aten.view %48479, %48606 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48607, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48608 = torch.aten.mm %48607, %48566 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48608, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46106 = torch.constant.int 4
    %int1792_46107 = torch.constant.int 1792
    %48609 = torch.prim.ListConstruct %int4_46106, %2482, %int1792_46107 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48610 = torch.aten.view %48608, %48609 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_46108 = torch.constant.int 4
    %48611 = torch.aten.mul.int %int4_46108, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46109 = torch.constant.int 4096
    %48612 = torch.prim.ListConstruct %48611, %int4096_46109 : (!torch.int, !torch.int) -> !torch.list<int>
    %48613 = torch.aten.view %48480, %48612 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48613, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %48614 = torch.aten.mm %48613, %48568 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48614, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_46110 = torch.constant.int 4
    %int1792_46111 = torch.constant.int 1792
    %48615 = torch.prim.ListConstruct %int4_46110, %2482, %int1792_46111 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48616 = torch.aten.view %48614, %48615 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48617 = torch.aten.mul.Tensor %48545, %48574 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48618 = torch.aten.mul.Tensor %48546, %48580 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48619 = torch.aten.mul.Tensor %48547, %48586 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48620 = torch.aten.mul.Tensor %48548, %48592 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48621 = torch.aten.mul.Tensor %48549, %48598 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48622 = torch.aten.mul.Tensor %48550, %48604 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48623 = torch.aten.mul.Tensor %48551, %48610 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %48624 = torch.aten.mul.Tensor %48552, %48616 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %48624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_46112 = torch.constant.int 1
    %int0_46113 = torch.constant.int 0
    %48625 = torch.prim.ListConstruct %int1_46112, %int0_46113 : (!torch.int, !torch.int) -> !torch.list<int>
    %48626 = torch.aten.permute %1800, %48625 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46114 = torch.constant.int 1
    %int0_46115 = torch.constant.int 0
    %48627 = torch.prim.ListConstruct %int1_46114, %int0_46115 : (!torch.int, !torch.int) -> !torch.list<int>
    %48628 = torch.aten.permute %1801, %48627 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46116 = torch.constant.int 1
    %int0_46117 = torch.constant.int 0
    %48629 = torch.prim.ListConstruct %int1_46116, %int0_46117 : (!torch.int, !torch.int) -> !torch.list<int>
    %48630 = torch.aten.permute %1802, %48629 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46118 = torch.constant.int 1
    %int0_46119 = torch.constant.int 0
    %48631 = torch.prim.ListConstruct %int1_46118, %int0_46119 : (!torch.int, !torch.int) -> !torch.list<int>
    %48632 = torch.aten.permute %1803, %48631 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46120 = torch.constant.int 1
    %int0_46121 = torch.constant.int 0
    %48633 = torch.prim.ListConstruct %int1_46120, %int0_46121 : (!torch.int, !torch.int) -> !torch.list<int>
    %48634 = torch.aten.permute %1804, %48633 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46122 = torch.constant.int 1
    %int0_46123 = torch.constant.int 0
    %48635 = torch.prim.ListConstruct %int1_46122, %int0_46123 : (!torch.int, !torch.int) -> !torch.list<int>
    %48636 = torch.aten.permute %1805, %48635 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46124 = torch.constant.int 1
    %int0_46125 = torch.constant.int 0
    %48637 = torch.prim.ListConstruct %int1_46124, %int0_46125 : (!torch.int, !torch.int) -> !torch.list<int>
    %48638 = torch.aten.permute %1806, %48637 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46126 = torch.constant.int 1
    %int0_46127 = torch.constant.int 0
    %48639 = torch.prim.ListConstruct %int1_46126, %int0_46127 : (!torch.int, !torch.int) -> !torch.list<int>
    %48640 = torch.aten.permute %1807, %48639 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_46128 = torch.constant.int 1
    %48641 = torch.aten.size.int %48502, %int1_46128 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46129 = torch.constant.int 4
    %48642 = torch.aten.mul.int %int4_46129, %48641 : !torch.int, !torch.int -> !torch.int
    %int1792_46130 = torch.constant.int 1792
    %48643 = torch.prim.ListConstruct %48642, %int1792_46130 : (!torch.int, !torch.int) -> !torch.list<int>
    %48644 = torch.aten.view %48617, %48643 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48644, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48645 = torch.aten.mm %48644, %48626 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48645, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46131 = torch.constant.int 4
    %int4096_46132 = torch.constant.int 4096
    %48646 = torch.prim.ListConstruct %int4_46131, %48641, %int4096_46132 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48647 = torch.aten.view %48645, %48646 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46133 = torch.constant.int 1
    %48648 = torch.aten.size.int %48508, %int1_46133 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46134 = torch.constant.int 4
    %48649 = torch.aten.mul.int %int4_46134, %48648 : !torch.int, !torch.int -> !torch.int
    %int1792_46135 = torch.constant.int 1792
    %48650 = torch.prim.ListConstruct %48649, %int1792_46135 : (!torch.int, !torch.int) -> !torch.list<int>
    %48651 = torch.aten.view %48618, %48650 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48651, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48652 = torch.aten.mm %48651, %48628 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48652, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46136 = torch.constant.int 4
    %int4096_46137 = torch.constant.int 4096
    %48653 = torch.prim.ListConstruct %int4_46136, %48648, %int4096_46137 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48654 = torch.aten.view %48652, %48653 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46138 = torch.constant.int 1
    %48655 = torch.aten.size.int %48514, %int1_46138 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46139 = torch.constant.int 4
    %48656 = torch.aten.mul.int %int4_46139, %48655 : !torch.int, !torch.int -> !torch.int
    %int1792_46140 = torch.constant.int 1792
    %48657 = torch.prim.ListConstruct %48656, %int1792_46140 : (!torch.int, !torch.int) -> !torch.list<int>
    %48658 = torch.aten.view %48619, %48657 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48658, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48659 = torch.aten.mm %48658, %48630 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48659, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46141 = torch.constant.int 4
    %int4096_46142 = torch.constant.int 4096
    %48660 = torch.prim.ListConstruct %int4_46141, %48655, %int4096_46142 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48661 = torch.aten.view %48659, %48660 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46143 = torch.constant.int 1
    %48662 = torch.aten.size.int %48520, %int1_46143 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46144 = torch.constant.int 4
    %48663 = torch.aten.mul.int %int4_46144, %48662 : !torch.int, !torch.int -> !torch.int
    %int1792_46145 = torch.constant.int 1792
    %48664 = torch.prim.ListConstruct %48663, %int1792_46145 : (!torch.int, !torch.int) -> !torch.list<int>
    %48665 = torch.aten.view %48620, %48664 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48665, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48666 = torch.aten.mm %48665, %48632 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48666, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46146 = torch.constant.int 4
    %int4096_46147 = torch.constant.int 4096
    %48667 = torch.prim.ListConstruct %int4_46146, %48662, %int4096_46147 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48668 = torch.aten.view %48666, %48667 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46148 = torch.constant.int 1
    %48669 = torch.aten.size.int %48526, %int1_46148 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46149 = torch.constant.int 4
    %48670 = torch.aten.mul.int %int4_46149, %48669 : !torch.int, !torch.int -> !torch.int
    %int1792_46150 = torch.constant.int 1792
    %48671 = torch.prim.ListConstruct %48670, %int1792_46150 : (!torch.int, !torch.int) -> !torch.list<int>
    %48672 = torch.aten.view %48621, %48671 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48672, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48673 = torch.aten.mm %48672, %48634 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48673, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46151 = torch.constant.int 4
    %int4096_46152 = torch.constant.int 4096
    %48674 = torch.prim.ListConstruct %int4_46151, %48669, %int4096_46152 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48675 = torch.aten.view %48673, %48674 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46153 = torch.constant.int 1
    %48676 = torch.aten.size.int %48532, %int1_46153 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46154 = torch.constant.int 4
    %48677 = torch.aten.mul.int %int4_46154, %48676 : !torch.int, !torch.int -> !torch.int
    %int1792_46155 = torch.constant.int 1792
    %48678 = torch.prim.ListConstruct %48677, %int1792_46155 : (!torch.int, !torch.int) -> !torch.list<int>
    %48679 = torch.aten.view %48622, %48678 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48679, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48680 = torch.aten.mm %48679, %48636 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48680, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46156 = torch.constant.int 4
    %int4096_46157 = torch.constant.int 4096
    %48681 = torch.prim.ListConstruct %int4_46156, %48676, %int4096_46157 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48682 = torch.aten.view %48680, %48681 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46158 = torch.constant.int 1
    %48683 = torch.aten.size.int %48538, %int1_46158 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46159 = torch.constant.int 4
    %48684 = torch.aten.mul.int %int4_46159, %48683 : !torch.int, !torch.int -> !torch.int
    %int1792_46160 = torch.constant.int 1792
    %48685 = torch.prim.ListConstruct %48684, %int1792_46160 : (!torch.int, !torch.int) -> !torch.list<int>
    %48686 = torch.aten.view %48623, %48685 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48686, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48687 = torch.aten.mm %48686, %48638 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48687, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46161 = torch.constant.int 4
    %int4096_46162 = torch.constant.int 4096
    %48688 = torch.prim.ListConstruct %int4_46161, %48683, %int4096_46162 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48689 = torch.aten.view %48687, %48688 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46163 = torch.constant.int 1
    %48690 = torch.aten.size.int %48544, %int1_46163 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_46164 = torch.constant.int 4
    %48691 = torch.aten.mul.int %int4_46164, %48690 : !torch.int, !torch.int -> !torch.int
    %int1792_46165 = torch.constant.int 1792
    %48692 = torch.prim.ListConstruct %48691, %int1792_46165 : (!torch.int, !torch.int) -> !torch.list<int>
    %48693 = torch.aten.view %48624, %48692 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %48693, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %48694 = torch.aten.mm %48693, %48640 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %48694, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_46166 = torch.constant.int 4
    %int4096_46167 = torch.constant.int 4096
    %48695 = torch.prim.ListConstruct %int4_46166, %48690, %int4096_46167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %48696 = torch.aten.view %48694, %48695 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48697 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46168 = arith.constant 1 : index
    %dim_46169 = tensor.dim %48697, %c1_46168 : tensor<4x?x4096xf16>
    %48698 = flow.tensor.transfer %48697 : tensor<4x?x4096xf16>{%dim_46169} to #hal.device.promise<@__device_0>
    %48699 = torch_c.from_builtin_tensor %48698 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48700 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46170 = arith.constant 1 : index
    %dim_46171 = tensor.dim %48700, %c1_46170 : tensor<4x?x4096xf16>
    %48701 = flow.tensor.transfer %48700 : tensor<4x?x4096xf16>{%dim_46171} to #hal.device.promise<@__device_0>
    %48702 = torch_c.from_builtin_tensor %48701 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48703 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46172 = arith.constant 1 : index
    %dim_46173 = tensor.dim %48703, %c1_46172 : tensor<4x?x4096xf16>
    %48704 = flow.tensor.transfer %48703 : tensor<4x?x4096xf16>{%dim_46173} to #hal.device.promise<@__device_0>
    %48705 = torch_c.from_builtin_tensor %48704 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48706 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46174 = arith.constant 1 : index
    %dim_46175 = tensor.dim %48706, %c1_46174 : tensor<4x?x4096xf16>
    %48707 = flow.tensor.transfer %48706 : tensor<4x?x4096xf16>{%dim_46175} to #hal.device.promise<@__device_0>
    %48708 = torch_c.from_builtin_tensor %48707 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48709 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46176 = arith.constant 1 : index
    %dim_46177 = tensor.dim %48709, %c1_46176 : tensor<4x?x4096xf16>
    %48710 = flow.tensor.transfer %48709 : tensor<4x?x4096xf16>{%dim_46177} to #hal.device.promise<@__device_0>
    %48711 = torch_c.from_builtin_tensor %48710 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48712 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46178 = arith.constant 1 : index
    %dim_46179 = tensor.dim %48712, %c1_46178 : tensor<4x?x4096xf16>
    %48713 = flow.tensor.transfer %48712 : tensor<4x?x4096xf16>{%dim_46179} to #hal.device.promise<@__device_0>
    %48714 = torch_c.from_builtin_tensor %48713 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48715 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46180 = arith.constant 1 : index
    %dim_46181 = tensor.dim %48715, %c1_46180 : tensor<4x?x4096xf16>
    %48716 = flow.tensor.transfer %48715 : tensor<4x?x4096xf16>{%dim_46181} to #hal.device.promise<@__device_0>
    %48717 = torch_c.from_builtin_tensor %48716 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46182 = torch.constant.int 1
    %48718 = torch.aten.add.Tensor %48647, %48699, %int1_46182 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46183 = torch.constant.int 1
    %48719 = torch.aten.add.Tensor %48718, %48702, %int1_46183 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46184 = torch.constant.int 1
    %48720 = torch.aten.add.Tensor %48719, %48705, %int1_46184 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46185 = torch.constant.int 1
    %48721 = torch.aten.add.Tensor %48720, %48708, %int1_46185 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46186 = torch.constant.int 1
    %48722 = torch.aten.add.Tensor %48721, %48711, %int1_46186 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46187 = torch.constant.int 1
    %48723 = torch.aten.add.Tensor %48722, %48714, %int1_46187 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46188 = torch.constant.int 1
    %48724 = torch.aten.add.Tensor %48723, %48717, %int1_46188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48725 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46189 = arith.constant 1 : index
    %dim_46190 = tensor.dim %48725, %c1_46189 : tensor<4x?x4096xf16>
    %48726 = flow.tensor.transfer %48725 : tensor<4x?x4096xf16>{%dim_46190} to #hal.device.promise<@__device_1>
    %48727 = torch_c.from_builtin_tensor %48726 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48728 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46191 = arith.constant 1 : index
    %dim_46192 = tensor.dim %48728, %c1_46191 : tensor<4x?x4096xf16>
    %48729 = flow.tensor.transfer %48728 : tensor<4x?x4096xf16>{%dim_46192} to #hal.device.promise<@__device_1>
    %48730 = torch_c.from_builtin_tensor %48729 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48731 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46193 = arith.constant 1 : index
    %dim_46194 = tensor.dim %48731, %c1_46193 : tensor<4x?x4096xf16>
    %48732 = flow.tensor.transfer %48731 : tensor<4x?x4096xf16>{%dim_46194} to #hal.device.promise<@__device_1>
    %48733 = torch_c.from_builtin_tensor %48732 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48734 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46195 = arith.constant 1 : index
    %dim_46196 = tensor.dim %48734, %c1_46195 : tensor<4x?x4096xf16>
    %48735 = flow.tensor.transfer %48734 : tensor<4x?x4096xf16>{%dim_46196} to #hal.device.promise<@__device_1>
    %48736 = torch_c.from_builtin_tensor %48735 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48737 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46197 = arith.constant 1 : index
    %dim_46198 = tensor.dim %48737, %c1_46197 : tensor<4x?x4096xf16>
    %48738 = flow.tensor.transfer %48737 : tensor<4x?x4096xf16>{%dim_46198} to #hal.device.promise<@__device_1>
    %48739 = torch_c.from_builtin_tensor %48738 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48740 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46199 = arith.constant 1 : index
    %dim_46200 = tensor.dim %48740, %c1_46199 : tensor<4x?x4096xf16>
    %48741 = flow.tensor.transfer %48740 : tensor<4x?x4096xf16>{%dim_46200} to #hal.device.promise<@__device_1>
    %48742 = torch_c.from_builtin_tensor %48741 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48743 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46201 = arith.constant 1 : index
    %dim_46202 = tensor.dim %48743, %c1_46201 : tensor<4x?x4096xf16>
    %48744 = flow.tensor.transfer %48743 : tensor<4x?x4096xf16>{%dim_46202} to #hal.device.promise<@__device_1>
    %48745 = torch_c.from_builtin_tensor %48744 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46203 = torch.constant.int 1
    %48746 = torch.aten.add.Tensor %48727, %48654, %int1_46203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46204 = torch.constant.int 1
    %48747 = torch.aten.add.Tensor %48746, %48730, %int1_46204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46205 = torch.constant.int 1
    %48748 = torch.aten.add.Tensor %48747, %48733, %int1_46205 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46206 = torch.constant.int 1
    %48749 = torch.aten.add.Tensor %48748, %48736, %int1_46206 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46207 = torch.constant.int 1
    %48750 = torch.aten.add.Tensor %48749, %48739, %int1_46207 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46208 = torch.constant.int 1
    %48751 = torch.aten.add.Tensor %48750, %48742, %int1_46208 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46209 = torch.constant.int 1
    %48752 = torch.aten.add.Tensor %48751, %48745, %int1_46209 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48753 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46210 = arith.constant 1 : index
    %dim_46211 = tensor.dim %48753, %c1_46210 : tensor<4x?x4096xf16>
    %48754 = flow.tensor.transfer %48753 : tensor<4x?x4096xf16>{%dim_46211} to #hal.device.promise<@__device_2>
    %48755 = torch_c.from_builtin_tensor %48754 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48756 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46212 = arith.constant 1 : index
    %dim_46213 = tensor.dim %48756, %c1_46212 : tensor<4x?x4096xf16>
    %48757 = flow.tensor.transfer %48756 : tensor<4x?x4096xf16>{%dim_46213} to #hal.device.promise<@__device_2>
    %48758 = torch_c.from_builtin_tensor %48757 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48759 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46214 = arith.constant 1 : index
    %dim_46215 = tensor.dim %48759, %c1_46214 : tensor<4x?x4096xf16>
    %48760 = flow.tensor.transfer %48759 : tensor<4x?x4096xf16>{%dim_46215} to #hal.device.promise<@__device_2>
    %48761 = torch_c.from_builtin_tensor %48760 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48762 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46216 = arith.constant 1 : index
    %dim_46217 = tensor.dim %48762, %c1_46216 : tensor<4x?x4096xf16>
    %48763 = flow.tensor.transfer %48762 : tensor<4x?x4096xf16>{%dim_46217} to #hal.device.promise<@__device_2>
    %48764 = torch_c.from_builtin_tensor %48763 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48765 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46218 = arith.constant 1 : index
    %dim_46219 = tensor.dim %48765, %c1_46218 : tensor<4x?x4096xf16>
    %48766 = flow.tensor.transfer %48765 : tensor<4x?x4096xf16>{%dim_46219} to #hal.device.promise<@__device_2>
    %48767 = torch_c.from_builtin_tensor %48766 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48768 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46220 = arith.constant 1 : index
    %dim_46221 = tensor.dim %48768, %c1_46220 : tensor<4x?x4096xf16>
    %48769 = flow.tensor.transfer %48768 : tensor<4x?x4096xf16>{%dim_46221} to #hal.device.promise<@__device_2>
    %48770 = torch_c.from_builtin_tensor %48769 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48771 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46222 = arith.constant 1 : index
    %dim_46223 = tensor.dim %48771, %c1_46222 : tensor<4x?x4096xf16>
    %48772 = flow.tensor.transfer %48771 : tensor<4x?x4096xf16>{%dim_46223} to #hal.device.promise<@__device_2>
    %48773 = torch_c.from_builtin_tensor %48772 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46224 = torch.constant.int 1
    %48774 = torch.aten.add.Tensor %48755, %48758, %int1_46224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46225 = torch.constant.int 1
    %48775 = torch.aten.add.Tensor %48774, %48661, %int1_46225 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46226 = torch.constant.int 1
    %48776 = torch.aten.add.Tensor %48775, %48761, %int1_46226 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46227 = torch.constant.int 1
    %48777 = torch.aten.add.Tensor %48776, %48764, %int1_46227 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46228 = torch.constant.int 1
    %48778 = torch.aten.add.Tensor %48777, %48767, %int1_46228 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46229 = torch.constant.int 1
    %48779 = torch.aten.add.Tensor %48778, %48770, %int1_46229 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46230 = torch.constant.int 1
    %48780 = torch.aten.add.Tensor %48779, %48773, %int1_46230 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48781 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46231 = arith.constant 1 : index
    %dim_46232 = tensor.dim %48781, %c1_46231 : tensor<4x?x4096xf16>
    %48782 = flow.tensor.transfer %48781 : tensor<4x?x4096xf16>{%dim_46232} to #hal.device.promise<@__device_3>
    %48783 = torch_c.from_builtin_tensor %48782 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48784 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46233 = arith.constant 1 : index
    %dim_46234 = tensor.dim %48784, %c1_46233 : tensor<4x?x4096xf16>
    %48785 = flow.tensor.transfer %48784 : tensor<4x?x4096xf16>{%dim_46234} to #hal.device.promise<@__device_3>
    %48786 = torch_c.from_builtin_tensor %48785 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48787 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46235 = arith.constant 1 : index
    %dim_46236 = tensor.dim %48787, %c1_46235 : tensor<4x?x4096xf16>
    %48788 = flow.tensor.transfer %48787 : tensor<4x?x4096xf16>{%dim_46236} to #hal.device.promise<@__device_3>
    %48789 = torch_c.from_builtin_tensor %48788 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48790 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46237 = arith.constant 1 : index
    %dim_46238 = tensor.dim %48790, %c1_46237 : tensor<4x?x4096xf16>
    %48791 = flow.tensor.transfer %48790 : tensor<4x?x4096xf16>{%dim_46238} to #hal.device.promise<@__device_3>
    %48792 = torch_c.from_builtin_tensor %48791 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48793 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46239 = arith.constant 1 : index
    %dim_46240 = tensor.dim %48793, %c1_46239 : tensor<4x?x4096xf16>
    %48794 = flow.tensor.transfer %48793 : tensor<4x?x4096xf16>{%dim_46240} to #hal.device.promise<@__device_3>
    %48795 = torch_c.from_builtin_tensor %48794 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48796 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46241 = arith.constant 1 : index
    %dim_46242 = tensor.dim %48796, %c1_46241 : tensor<4x?x4096xf16>
    %48797 = flow.tensor.transfer %48796 : tensor<4x?x4096xf16>{%dim_46242} to #hal.device.promise<@__device_3>
    %48798 = torch_c.from_builtin_tensor %48797 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48799 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46243 = arith.constant 1 : index
    %dim_46244 = tensor.dim %48799, %c1_46243 : tensor<4x?x4096xf16>
    %48800 = flow.tensor.transfer %48799 : tensor<4x?x4096xf16>{%dim_46244} to #hal.device.promise<@__device_3>
    %48801 = torch_c.from_builtin_tensor %48800 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46245 = torch.constant.int 1
    %48802 = torch.aten.add.Tensor %48783, %48786, %int1_46245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46246 = torch.constant.int 1
    %48803 = torch.aten.add.Tensor %48802, %48789, %int1_46246 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46247 = torch.constant.int 1
    %48804 = torch.aten.add.Tensor %48803, %48668, %int1_46247 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46248 = torch.constant.int 1
    %48805 = torch.aten.add.Tensor %48804, %48792, %int1_46248 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46249 = torch.constant.int 1
    %48806 = torch.aten.add.Tensor %48805, %48795, %int1_46249 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46250 = torch.constant.int 1
    %48807 = torch.aten.add.Tensor %48806, %48798, %int1_46250 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46251 = torch.constant.int 1
    %48808 = torch.aten.add.Tensor %48807, %48801, %int1_46251 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48809 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46252 = arith.constant 1 : index
    %dim_46253 = tensor.dim %48809, %c1_46252 : tensor<4x?x4096xf16>
    %48810 = flow.tensor.transfer %48809 : tensor<4x?x4096xf16>{%dim_46253} to #hal.device.promise<@__device_4>
    %48811 = torch_c.from_builtin_tensor %48810 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48812 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46254 = arith.constant 1 : index
    %dim_46255 = tensor.dim %48812, %c1_46254 : tensor<4x?x4096xf16>
    %48813 = flow.tensor.transfer %48812 : tensor<4x?x4096xf16>{%dim_46255} to #hal.device.promise<@__device_4>
    %48814 = torch_c.from_builtin_tensor %48813 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48815 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46256 = arith.constant 1 : index
    %dim_46257 = tensor.dim %48815, %c1_46256 : tensor<4x?x4096xf16>
    %48816 = flow.tensor.transfer %48815 : tensor<4x?x4096xf16>{%dim_46257} to #hal.device.promise<@__device_4>
    %48817 = torch_c.from_builtin_tensor %48816 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48818 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46258 = arith.constant 1 : index
    %dim_46259 = tensor.dim %48818, %c1_46258 : tensor<4x?x4096xf16>
    %48819 = flow.tensor.transfer %48818 : tensor<4x?x4096xf16>{%dim_46259} to #hal.device.promise<@__device_4>
    %48820 = torch_c.from_builtin_tensor %48819 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48821 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46260 = arith.constant 1 : index
    %dim_46261 = tensor.dim %48821, %c1_46260 : tensor<4x?x4096xf16>
    %48822 = flow.tensor.transfer %48821 : tensor<4x?x4096xf16>{%dim_46261} to #hal.device.promise<@__device_4>
    %48823 = torch_c.from_builtin_tensor %48822 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48824 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46262 = arith.constant 1 : index
    %dim_46263 = tensor.dim %48824, %c1_46262 : tensor<4x?x4096xf16>
    %48825 = flow.tensor.transfer %48824 : tensor<4x?x4096xf16>{%dim_46263} to #hal.device.promise<@__device_4>
    %48826 = torch_c.from_builtin_tensor %48825 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48827 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46264 = arith.constant 1 : index
    %dim_46265 = tensor.dim %48827, %c1_46264 : tensor<4x?x4096xf16>
    %48828 = flow.tensor.transfer %48827 : tensor<4x?x4096xf16>{%dim_46265} to #hal.device.promise<@__device_4>
    %48829 = torch_c.from_builtin_tensor %48828 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46266 = torch.constant.int 1
    %48830 = torch.aten.add.Tensor %48811, %48814, %int1_46266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46267 = torch.constant.int 1
    %48831 = torch.aten.add.Tensor %48830, %48817, %int1_46267 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46268 = torch.constant.int 1
    %48832 = torch.aten.add.Tensor %48831, %48820, %int1_46268 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46269 = torch.constant.int 1
    %48833 = torch.aten.add.Tensor %48832, %48675, %int1_46269 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46270 = torch.constant.int 1
    %48834 = torch.aten.add.Tensor %48833, %48823, %int1_46270 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46271 = torch.constant.int 1
    %48835 = torch.aten.add.Tensor %48834, %48826, %int1_46271 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46272 = torch.constant.int 1
    %48836 = torch.aten.add.Tensor %48835, %48829, %int1_46272 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48837 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46273 = arith.constant 1 : index
    %dim_46274 = tensor.dim %48837, %c1_46273 : tensor<4x?x4096xf16>
    %48838 = flow.tensor.transfer %48837 : tensor<4x?x4096xf16>{%dim_46274} to #hal.device.promise<@__device_5>
    %48839 = torch_c.from_builtin_tensor %48838 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48840 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46275 = arith.constant 1 : index
    %dim_46276 = tensor.dim %48840, %c1_46275 : tensor<4x?x4096xf16>
    %48841 = flow.tensor.transfer %48840 : tensor<4x?x4096xf16>{%dim_46276} to #hal.device.promise<@__device_5>
    %48842 = torch_c.from_builtin_tensor %48841 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48843 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46277 = arith.constant 1 : index
    %dim_46278 = tensor.dim %48843, %c1_46277 : tensor<4x?x4096xf16>
    %48844 = flow.tensor.transfer %48843 : tensor<4x?x4096xf16>{%dim_46278} to #hal.device.promise<@__device_5>
    %48845 = torch_c.from_builtin_tensor %48844 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48846 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46279 = arith.constant 1 : index
    %dim_46280 = tensor.dim %48846, %c1_46279 : tensor<4x?x4096xf16>
    %48847 = flow.tensor.transfer %48846 : tensor<4x?x4096xf16>{%dim_46280} to #hal.device.promise<@__device_5>
    %48848 = torch_c.from_builtin_tensor %48847 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48849 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46281 = arith.constant 1 : index
    %dim_46282 = tensor.dim %48849, %c1_46281 : tensor<4x?x4096xf16>
    %48850 = flow.tensor.transfer %48849 : tensor<4x?x4096xf16>{%dim_46282} to #hal.device.promise<@__device_5>
    %48851 = torch_c.from_builtin_tensor %48850 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48852 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46283 = arith.constant 1 : index
    %dim_46284 = tensor.dim %48852, %c1_46283 : tensor<4x?x4096xf16>
    %48853 = flow.tensor.transfer %48852 : tensor<4x?x4096xf16>{%dim_46284} to #hal.device.promise<@__device_5>
    %48854 = torch_c.from_builtin_tensor %48853 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48855 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46285 = arith.constant 1 : index
    %dim_46286 = tensor.dim %48855, %c1_46285 : tensor<4x?x4096xf16>
    %48856 = flow.tensor.transfer %48855 : tensor<4x?x4096xf16>{%dim_46286} to #hal.device.promise<@__device_5>
    %48857 = torch_c.from_builtin_tensor %48856 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46287 = torch.constant.int 1
    %48858 = torch.aten.add.Tensor %48839, %48842, %int1_46287 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46288 = torch.constant.int 1
    %48859 = torch.aten.add.Tensor %48858, %48845, %int1_46288 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46289 = torch.constant.int 1
    %48860 = torch.aten.add.Tensor %48859, %48848, %int1_46289 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46290 = torch.constant.int 1
    %48861 = torch.aten.add.Tensor %48860, %48851, %int1_46290 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46291 = torch.constant.int 1
    %48862 = torch.aten.add.Tensor %48861, %48682, %int1_46291 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46292 = torch.constant.int 1
    %48863 = torch.aten.add.Tensor %48862, %48854, %int1_46292 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46293 = torch.constant.int 1
    %48864 = torch.aten.add.Tensor %48863, %48857, %int1_46293 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48865 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46294 = arith.constant 1 : index
    %dim_46295 = tensor.dim %48865, %c1_46294 : tensor<4x?x4096xf16>
    %48866 = flow.tensor.transfer %48865 : tensor<4x?x4096xf16>{%dim_46295} to #hal.device.promise<@__device_6>
    %48867 = torch_c.from_builtin_tensor %48866 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48868 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46296 = arith.constant 1 : index
    %dim_46297 = tensor.dim %48868, %c1_46296 : tensor<4x?x4096xf16>
    %48869 = flow.tensor.transfer %48868 : tensor<4x?x4096xf16>{%dim_46297} to #hal.device.promise<@__device_6>
    %48870 = torch_c.from_builtin_tensor %48869 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48871 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46298 = arith.constant 1 : index
    %dim_46299 = tensor.dim %48871, %c1_46298 : tensor<4x?x4096xf16>
    %48872 = flow.tensor.transfer %48871 : tensor<4x?x4096xf16>{%dim_46299} to #hal.device.promise<@__device_6>
    %48873 = torch_c.from_builtin_tensor %48872 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48873, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48874 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46300 = arith.constant 1 : index
    %dim_46301 = tensor.dim %48874, %c1_46300 : tensor<4x?x4096xf16>
    %48875 = flow.tensor.transfer %48874 : tensor<4x?x4096xf16>{%dim_46301} to #hal.device.promise<@__device_6>
    %48876 = torch_c.from_builtin_tensor %48875 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48877 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46302 = arith.constant 1 : index
    %dim_46303 = tensor.dim %48877, %c1_46302 : tensor<4x?x4096xf16>
    %48878 = flow.tensor.transfer %48877 : tensor<4x?x4096xf16>{%dim_46303} to #hal.device.promise<@__device_6>
    %48879 = torch_c.from_builtin_tensor %48878 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48880 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46304 = arith.constant 1 : index
    %dim_46305 = tensor.dim %48880, %c1_46304 : tensor<4x?x4096xf16>
    %48881 = flow.tensor.transfer %48880 : tensor<4x?x4096xf16>{%dim_46305} to #hal.device.promise<@__device_6>
    %48882 = torch_c.from_builtin_tensor %48881 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48883 = torch_c.to_builtin_tensor %48696 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46306 = arith.constant 1 : index
    %dim_46307 = tensor.dim %48883, %c1_46306 : tensor<4x?x4096xf16>
    %48884 = flow.tensor.transfer %48883 : tensor<4x?x4096xf16>{%dim_46307} to #hal.device.promise<@__device_6>
    %48885 = torch_c.from_builtin_tensor %48884 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46308 = torch.constant.int 1
    %48886 = torch.aten.add.Tensor %48867, %48870, %int1_46308 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46309 = torch.constant.int 1
    %48887 = torch.aten.add.Tensor %48886, %48873, %int1_46309 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46310 = torch.constant.int 1
    %48888 = torch.aten.add.Tensor %48887, %48876, %int1_46310 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46311 = torch.constant.int 1
    %48889 = torch.aten.add.Tensor %48888, %48879, %int1_46311 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46312 = torch.constant.int 1
    %48890 = torch.aten.add.Tensor %48889, %48882, %int1_46312 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46313 = torch.constant.int 1
    %48891 = torch.aten.add.Tensor %48890, %48689, %int1_46313 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46314 = torch.constant.int 1
    %48892 = torch.aten.add.Tensor %48891, %48885, %int1_46314 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48893 = torch_c.to_builtin_tensor %48647 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46315 = arith.constant 1 : index
    %dim_46316 = tensor.dim %48893, %c1_46315 : tensor<4x?x4096xf16>
    %48894 = flow.tensor.transfer %48893 : tensor<4x?x4096xf16>{%dim_46316} to #hal.device.promise<@__device_7>
    %48895 = torch_c.from_builtin_tensor %48894 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48896 = torch_c.to_builtin_tensor %48654 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46317 = arith.constant 1 : index
    %dim_46318 = tensor.dim %48896, %c1_46317 : tensor<4x?x4096xf16>
    %48897 = flow.tensor.transfer %48896 : tensor<4x?x4096xf16>{%dim_46318} to #hal.device.promise<@__device_7>
    %48898 = torch_c.from_builtin_tensor %48897 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48899 = torch_c.to_builtin_tensor %48661 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46319 = arith.constant 1 : index
    %dim_46320 = tensor.dim %48899, %c1_46319 : tensor<4x?x4096xf16>
    %48900 = flow.tensor.transfer %48899 : tensor<4x?x4096xf16>{%dim_46320} to #hal.device.promise<@__device_7>
    %48901 = torch_c.from_builtin_tensor %48900 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48902 = torch_c.to_builtin_tensor %48668 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46321 = arith.constant 1 : index
    %dim_46322 = tensor.dim %48902, %c1_46321 : tensor<4x?x4096xf16>
    %48903 = flow.tensor.transfer %48902 : tensor<4x?x4096xf16>{%dim_46322} to #hal.device.promise<@__device_7>
    %48904 = torch_c.from_builtin_tensor %48903 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48905 = torch_c.to_builtin_tensor %48675 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46323 = arith.constant 1 : index
    %dim_46324 = tensor.dim %48905, %c1_46323 : tensor<4x?x4096xf16>
    %48906 = flow.tensor.transfer %48905 : tensor<4x?x4096xf16>{%dim_46324} to #hal.device.promise<@__device_7>
    %48907 = torch_c.from_builtin_tensor %48906 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48908 = torch_c.to_builtin_tensor %48682 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46325 = arith.constant 1 : index
    %dim_46326 = tensor.dim %48908, %c1_46325 : tensor<4x?x4096xf16>
    %48909 = flow.tensor.transfer %48908 : tensor<4x?x4096xf16>{%dim_46326} to #hal.device.promise<@__device_7>
    %48910 = torch_c.from_builtin_tensor %48909 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %48911 = torch_c.to_builtin_tensor %48689 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_46327 = arith.constant 1 : index
    %dim_46328 = tensor.dim %48911, %c1_46327 : tensor<4x?x4096xf16>
    %48912 = flow.tensor.transfer %48911 : tensor<4x?x4096xf16>{%dim_46328} to #hal.device.promise<@__device_7>
    %48913 = torch_c.from_builtin_tensor %48912 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46329 = torch.constant.int 1
    %48914 = torch.aten.add.Tensor %48895, %48898, %int1_46329 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46330 = torch.constant.int 1
    %48915 = torch.aten.add.Tensor %48914, %48901, %int1_46330 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46331 = torch.constant.int 1
    %48916 = torch.aten.add.Tensor %48915, %48904, %int1_46331 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46332 = torch.constant.int 1
    %48917 = torch.aten.add.Tensor %48916, %48907, %int1_46332 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46333 = torch.constant.int 1
    %48918 = torch.aten.add.Tensor %48917, %48910, %int1_46333 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46334 = torch.constant.int 1
    %48919 = torch.aten.add.Tensor %48918, %48913, %int1_46334 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46335 = torch.constant.int 1
    %48920 = torch.aten.add.Tensor %48919, %48696, %int1_46335 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46336 = torch.constant.int 1
    %48921 = torch.aten.add.Tensor %48401, %48724, %int1_46336 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46337 = torch.constant.int 1
    %48922 = torch.aten.add.Tensor %48402, %48752, %int1_46337 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46338 = torch.constant.int 1
    %48923 = torch.aten.add.Tensor %48403, %48780, %int1_46338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46339 = torch.constant.int 1
    %48924 = torch.aten.add.Tensor %48404, %48808, %int1_46339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46340 = torch.constant.int 1
    %48925 = torch.aten.add.Tensor %48405, %48836, %int1_46340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46341 = torch.constant.int 1
    %48926 = torch.aten.add.Tensor %48406, %48864, %int1_46341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46342 = torch.constant.int 1
    %48927 = torch.aten.add.Tensor %48407, %48892, %int1_46342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46343 = torch.constant.int 1
    %48928 = torch.aten.add.Tensor %48408, %48920, %int1_46343 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_46344 = torch.constant.int 6
    %48929 = torch.prims.convert_element_type %48921, %int6_46344 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46345 = torch.constant.int 6
    %48930 = torch.prims.convert_element_type %48922, %int6_46345 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46346 = torch.constant.int 6
    %48931 = torch.prims.convert_element_type %48923, %int6_46346 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46347 = torch.constant.int 6
    %48932 = torch.prims.convert_element_type %48924, %int6_46347 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46348 = torch.constant.int 6
    %48933 = torch.prims.convert_element_type %48925, %int6_46348 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46349 = torch.constant.int 6
    %48934 = torch.prims.convert_element_type %48926, %int6_46349 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46350 = torch.constant.int 6
    %48935 = torch.prims.convert_element_type %48927, %int6_46350 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_46351 = torch.constant.int 6
    %48936 = torch.prims.convert_element_type %48928, %int6_46351 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46352 = torch.constant.int 2
    %48937 = torch.aten.pow.Tensor_Scalar %48929, %int2_46352 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46353 = torch.constant.int 2
    %48938 = torch.aten.pow.Tensor_Scalar %48930, %int2_46353 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46354 = torch.constant.int 2
    %48939 = torch.aten.pow.Tensor_Scalar %48931, %int2_46354 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46355 = torch.constant.int 2
    %48940 = torch.aten.pow.Tensor_Scalar %48932, %int2_46355 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46356 = torch.constant.int 2
    %48941 = torch.aten.pow.Tensor_Scalar %48933, %int2_46356 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46357 = torch.constant.int 2
    %48942 = torch.aten.pow.Tensor_Scalar %48934, %int2_46357 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46358 = torch.constant.int 2
    %48943 = torch.aten.pow.Tensor_Scalar %48935, %int2_46358 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_46359 = torch.constant.int 2
    %48944 = torch.aten.pow.Tensor_Scalar %48936, %int2_46359 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_46360 = torch.constant.int -1
    %48945 = torch.prim.ListConstruct %int-1_46360 : (!torch.int) -> !torch.list<int>
    %true_46361 = torch.constant.bool true
    %none_46362 = torch.constant.none
    %48946 = torch.aten.mean.dim %48937, %48945, %true_46361, %none_46362 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46363 = torch.constant.int -1
    %48947 = torch.prim.ListConstruct %int-1_46363 : (!torch.int) -> !torch.list<int>
    %true_46364 = torch.constant.bool true
    %none_46365 = torch.constant.none
    %48948 = torch.aten.mean.dim %48938, %48947, %true_46364, %none_46365 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46366 = torch.constant.int -1
    %48949 = torch.prim.ListConstruct %int-1_46366 : (!torch.int) -> !torch.list<int>
    %true_46367 = torch.constant.bool true
    %none_46368 = torch.constant.none
    %48950 = torch.aten.mean.dim %48939, %48949, %true_46367, %none_46368 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46369 = torch.constant.int -1
    %48951 = torch.prim.ListConstruct %int-1_46369 : (!torch.int) -> !torch.list<int>
    %true_46370 = torch.constant.bool true
    %none_46371 = torch.constant.none
    %48952 = torch.aten.mean.dim %48940, %48951, %true_46370, %none_46371 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46372 = torch.constant.int -1
    %48953 = torch.prim.ListConstruct %int-1_46372 : (!torch.int) -> !torch.list<int>
    %true_46373 = torch.constant.bool true
    %none_46374 = torch.constant.none
    %48954 = torch.aten.mean.dim %48941, %48953, %true_46373, %none_46374 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46375 = torch.constant.int -1
    %48955 = torch.prim.ListConstruct %int-1_46375 : (!torch.int) -> !torch.list<int>
    %true_46376 = torch.constant.bool true
    %none_46377 = torch.constant.none
    %48956 = torch.aten.mean.dim %48942, %48955, %true_46376, %none_46377 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46378 = torch.constant.int -1
    %48957 = torch.prim.ListConstruct %int-1_46378 : (!torch.int) -> !torch.list<int>
    %true_46379 = torch.constant.bool true
    %none_46380 = torch.constant.none
    %48958 = torch.aten.mean.dim %48943, %48957, %true_46379, %none_46380 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_46381 = torch.constant.int -1
    %48959 = torch.prim.ListConstruct %int-1_46381 : (!torch.int) -> !torch.list<int>
    %true_46382 = torch.constant.bool true
    %none_46383 = torch.constant.none
    %48960 = torch.aten.mean.dim %48944, %48959, %true_46382, %none_46383 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46384 = torch.constant.float 9.9999997473787516E-6
    %int1_46385 = torch.constant.int 1
    %48961 = torch.aten.add.Scalar %48946, %float9.999990e-06_46384, %int1_46385 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46386 = torch.constant.float 9.9999997473787516E-6
    %int1_46387 = torch.constant.int 1
    %48962 = torch.aten.add.Scalar %48948, %float9.999990e-06_46386, %int1_46387 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46388 = torch.constant.float 9.9999997473787516E-6
    %int1_46389 = torch.constant.int 1
    %48963 = torch.aten.add.Scalar %48950, %float9.999990e-06_46388, %int1_46389 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46390 = torch.constant.float 9.9999997473787516E-6
    %int1_46391 = torch.constant.int 1
    %48964 = torch.aten.add.Scalar %48952, %float9.999990e-06_46390, %int1_46391 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46392 = torch.constant.float 9.9999997473787516E-6
    %int1_46393 = torch.constant.int 1
    %48965 = torch.aten.add.Scalar %48954, %float9.999990e-06_46392, %int1_46393 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46394 = torch.constant.float 9.9999997473787516E-6
    %int1_46395 = torch.constant.int 1
    %48966 = torch.aten.add.Scalar %48956, %float9.999990e-06_46394, %int1_46395 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46396 = torch.constant.float 9.9999997473787516E-6
    %int1_46397 = torch.constant.int 1
    %48967 = torch.aten.add.Scalar %48958, %float9.999990e-06_46396, %int1_46397 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_46398 = torch.constant.float 9.9999997473787516E-6
    %int1_46399 = torch.constant.int 1
    %48968 = torch.aten.add.Scalar %48960, %float9.999990e-06_46398, %int1_46399 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48969 = torch.aten.rsqrt %48961 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48970 = torch.aten.rsqrt %48962 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48971 = torch.aten.rsqrt %48963 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48972 = torch.aten.rsqrt %48964 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48973 = torch.aten.rsqrt %48965 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48974 = torch.aten.rsqrt %48966 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48975 = torch.aten.rsqrt %48967 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48976 = torch.aten.rsqrt %48968 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %48976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %48977 = torch.aten.mul.Tensor %48929, %48969 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48978 = torch.aten.mul.Tensor %48930, %48970 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48979 = torch.aten.mul.Tensor %48931, %48971 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48980 = torch.aten.mul.Tensor %48932, %48972 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48981 = torch.aten.mul.Tensor %48933, %48973 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48982 = torch.aten.mul.Tensor %48934, %48974 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48983 = torch.aten.mul.Tensor %48935, %48975 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48984 = torch.aten.mul.Tensor %48936, %48976 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48985 = torch.aten.mul.Tensor %1808, %48977 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48986 = torch.aten.mul.Tensor %1809, %48978 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48987 = torch.aten.mul.Tensor %1810, %48979 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48988 = torch.aten.mul.Tensor %1811, %48980 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48989 = torch.aten.mul.Tensor %1812, %48981 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48990 = torch.aten.mul.Tensor %1813, %48982 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48991 = torch.aten.mul.Tensor %1814, %48983 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %48992 = torch.aten.mul.Tensor %1815, %48984 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %48992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_46400 = torch.constant.int 5
    %48993 = torch.prims.convert_element_type %48985, %int5_46400 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46401 = torch.constant.int 5
    %48994 = torch.prims.convert_element_type %48986, %int5_46401 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46402 = torch.constant.int 5
    %48995 = torch.prims.convert_element_type %48987, %int5_46402 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46403 = torch.constant.int 5
    %48996 = torch.prims.convert_element_type %48988, %int5_46403 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46404 = torch.constant.int 5
    %48997 = torch.prims.convert_element_type %48989, %int5_46404 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46405 = torch.constant.int 5
    %48998 = torch.prims.convert_element_type %48990, %int5_46405 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46406 = torch.constant.int 5
    %48999 = torch.prims.convert_element_type %48991, %int5_46406 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %48999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_46407 = torch.constant.int 5
    %49000 = torch.prims.convert_element_type %48992, %int5_46407 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %49000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_46408 = torch.constant.int 1
    %int0_46409 = torch.constant.int 0
    %49001 = torch.prim.ListConstruct %int1_46408, %int0_46409 : (!torch.int, !torch.int) -> !torch.list<int>
    %49002 = torch.aten.permute %1816, %49001 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46410 = torch.constant.int 1
    %int0_46411 = torch.constant.int 0
    %49003 = torch.prim.ListConstruct %int1_46410, %int0_46411 : (!torch.int, !torch.int) -> !torch.list<int>
    %49004 = torch.aten.permute %1817, %49003 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46412 = torch.constant.int 1
    %int0_46413 = torch.constant.int 0
    %49005 = torch.prim.ListConstruct %int1_46412, %int0_46413 : (!torch.int, !torch.int) -> !torch.list<int>
    %49006 = torch.aten.permute %1818, %49005 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46414 = torch.constant.int 1
    %int0_46415 = torch.constant.int 0
    %49007 = torch.prim.ListConstruct %int1_46414, %int0_46415 : (!torch.int, !torch.int) -> !torch.list<int>
    %49008 = torch.aten.permute %1819, %49007 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46416 = torch.constant.int 1
    %int0_46417 = torch.constant.int 0
    %49009 = torch.prim.ListConstruct %int1_46416, %int0_46417 : (!torch.int, !torch.int) -> !torch.list<int>
    %49010 = torch.aten.permute %1820, %49009 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46418 = torch.constant.int 1
    %int0_46419 = torch.constant.int 0
    %49011 = torch.prim.ListConstruct %int1_46418, %int0_46419 : (!torch.int, !torch.int) -> !torch.list<int>
    %49012 = torch.aten.permute %1821, %49011 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46420 = torch.constant.int 1
    %int0_46421 = torch.constant.int 0
    %49013 = torch.prim.ListConstruct %int1_46420, %int0_46421 : (!torch.int, !torch.int) -> !torch.list<int>
    %49014 = torch.aten.permute %1822, %49013 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_46422 = torch.constant.int 1
    %int0_46423 = torch.constant.int 0
    %49015 = torch.prim.ListConstruct %int1_46422, %int0_46423 : (!torch.int, !torch.int) -> !torch.list<int>
    %49016 = torch.aten.permute %1823, %49015 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_46424 = torch.constant.int 4
    %49017 = torch.aten.mul.int %int4_46424, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46425 = torch.constant.int 4096
    %49018 = torch.prim.ListConstruct %49017, %int4096_46425 : (!torch.int, !torch.int) -> !torch.list<int>
    %49019 = torch.aten.view %48993, %49018 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49019, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49020 = torch.aten.mm %49019, %49002 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49020, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46426 = torch.constant.int 4
    %int512_46427 = torch.constant.int 512
    %49021 = torch.prim.ListConstruct %int4_46426, %2482, %int512_46427 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49022 = torch.aten.view %49020, %49021 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46428 = torch.constant.int 4
    %49023 = torch.aten.mul.int %int4_46428, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46429 = torch.constant.int 4096
    %49024 = torch.prim.ListConstruct %49023, %int4096_46429 : (!torch.int, !torch.int) -> !torch.list<int>
    %49025 = torch.aten.view %48994, %49024 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49025, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49026 = torch.aten.mm %49025, %49004 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49026, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46430 = torch.constant.int 4
    %int512_46431 = torch.constant.int 512
    %49027 = torch.prim.ListConstruct %int4_46430, %2482, %int512_46431 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49028 = torch.aten.view %49026, %49027 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46432 = torch.constant.int 4
    %49029 = torch.aten.mul.int %int4_46432, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46433 = torch.constant.int 4096
    %49030 = torch.prim.ListConstruct %49029, %int4096_46433 : (!torch.int, !torch.int) -> !torch.list<int>
    %49031 = torch.aten.view %48995, %49030 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49031, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49032 = torch.aten.mm %49031, %49006 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49032, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46434 = torch.constant.int 4
    %int512_46435 = torch.constant.int 512
    %49033 = torch.prim.ListConstruct %int4_46434, %2482, %int512_46435 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49034 = torch.aten.view %49032, %49033 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46436 = torch.constant.int 4
    %49035 = torch.aten.mul.int %int4_46436, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46437 = torch.constant.int 4096
    %49036 = torch.prim.ListConstruct %49035, %int4096_46437 : (!torch.int, !torch.int) -> !torch.list<int>
    %49037 = torch.aten.view %48996, %49036 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49037, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49038 = torch.aten.mm %49037, %49008 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49038, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46438 = torch.constant.int 4
    %int512_46439 = torch.constant.int 512
    %49039 = torch.prim.ListConstruct %int4_46438, %2482, %int512_46439 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49040 = torch.aten.view %49038, %49039 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46440 = torch.constant.int 4
    %49041 = torch.aten.mul.int %int4_46440, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46441 = torch.constant.int 4096
    %49042 = torch.prim.ListConstruct %49041, %int4096_46441 : (!torch.int, !torch.int) -> !torch.list<int>
    %49043 = torch.aten.view %48997, %49042 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49043, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49044 = torch.aten.mm %49043, %49010 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49044, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46442 = torch.constant.int 4
    %int512_46443 = torch.constant.int 512
    %49045 = torch.prim.ListConstruct %int4_46442, %2482, %int512_46443 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49046 = torch.aten.view %49044, %49045 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46444 = torch.constant.int 4
    %49047 = torch.aten.mul.int %int4_46444, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46445 = torch.constant.int 4096
    %49048 = torch.prim.ListConstruct %49047, %int4096_46445 : (!torch.int, !torch.int) -> !torch.list<int>
    %49049 = torch.aten.view %48998, %49048 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49049, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49050 = torch.aten.mm %49049, %49012 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49050, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46446 = torch.constant.int 4
    %int512_46447 = torch.constant.int 512
    %49051 = torch.prim.ListConstruct %int4_46446, %2482, %int512_46447 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49052 = torch.aten.view %49050, %49051 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46448 = torch.constant.int 4
    %49053 = torch.aten.mul.int %int4_46448, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46449 = torch.constant.int 4096
    %49054 = torch.prim.ListConstruct %49053, %int4096_46449 : (!torch.int, !torch.int) -> !torch.list<int>
    %49055 = torch.aten.view %48999, %49054 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49055, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49056 = torch.aten.mm %49055, %49014 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49056, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46450 = torch.constant.int 4
    %int512_46451 = torch.constant.int 512
    %49057 = torch.prim.ListConstruct %int4_46450, %2482, %int512_46451 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49058 = torch.aten.view %49056, %49057 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_46452 = torch.constant.int 4
    %49059 = torch.aten.mul.int %int4_46452, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46453 = torch.constant.int 4096
    %49060 = torch.prim.ListConstruct %49059, %int4096_46453 : (!torch.int, !torch.int) -> !torch.list<int>
    %49061 = torch.aten.view %49000, %49060 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49061, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49062 = torch.aten.mm %49061, %49016 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49062, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_46454 = torch.constant.int 4
    %int512_46455 = torch.constant.int 512
    %49063 = torch.prim.ListConstruct %int4_46454, %2482, %int512_46455 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49064 = torch.aten.view %49062, %49063 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_46456 = torch.constant.int 1
    %int0_46457 = torch.constant.int 0
    %49065 = torch.prim.ListConstruct %int1_46456, %int0_46457 : (!torch.int, !torch.int) -> !torch.list<int>
    %49066 = torch.aten.permute %1824, %49065 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46458 = torch.constant.int 1
    %int0_46459 = torch.constant.int 0
    %49067 = torch.prim.ListConstruct %int1_46458, %int0_46459 : (!torch.int, !torch.int) -> !torch.list<int>
    %49068 = torch.aten.permute %1825, %49067 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46460 = torch.constant.int 1
    %int0_46461 = torch.constant.int 0
    %49069 = torch.prim.ListConstruct %int1_46460, %int0_46461 : (!torch.int, !torch.int) -> !torch.list<int>
    %49070 = torch.aten.permute %1826, %49069 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46462 = torch.constant.int 1
    %int0_46463 = torch.constant.int 0
    %49071 = torch.prim.ListConstruct %int1_46462, %int0_46463 : (!torch.int, !torch.int) -> !torch.list<int>
    %49072 = torch.aten.permute %1827, %49071 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46464 = torch.constant.int 1
    %int0_46465 = torch.constant.int 0
    %49073 = torch.prim.ListConstruct %int1_46464, %int0_46465 : (!torch.int, !torch.int) -> !torch.list<int>
    %49074 = torch.aten.permute %1828, %49073 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46466 = torch.constant.int 1
    %int0_46467 = torch.constant.int 0
    %49075 = torch.prim.ListConstruct %int1_46466, %int0_46467 : (!torch.int, !torch.int) -> !torch.list<int>
    %49076 = torch.aten.permute %1829, %49075 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46468 = torch.constant.int 1
    %int0_46469 = torch.constant.int 0
    %49077 = torch.prim.ListConstruct %int1_46468, %int0_46469 : (!torch.int, !torch.int) -> !torch.list<int>
    %49078 = torch.aten.permute %1830, %49077 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46470 = torch.constant.int 1
    %int0_46471 = torch.constant.int 0
    %49079 = torch.prim.ListConstruct %int1_46470, %int0_46471 : (!torch.int, !torch.int) -> !torch.list<int>
    %49080 = torch.aten.permute %1831, %49079 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_46472 = torch.constant.int 4
    %49081 = torch.aten.mul.int %int4_46472, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46473 = torch.constant.int 4096
    %49082 = torch.prim.ListConstruct %49081, %int4096_46473 : (!torch.int, !torch.int) -> !torch.list<int>
    %49083 = torch.aten.view %48993, %49082 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49083, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49084 = torch.aten.mm %49083, %49066 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49084, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46474 = torch.constant.int 4
    %int128_46475 = torch.constant.int 128
    %49085 = torch.prim.ListConstruct %int4_46474, %2482, %int128_46475 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49086 = torch.aten.view %49084, %49085 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46476 = torch.constant.int 4
    %49087 = torch.aten.mul.int %int4_46476, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46477 = torch.constant.int 4096
    %49088 = torch.prim.ListConstruct %49087, %int4096_46477 : (!torch.int, !torch.int) -> !torch.list<int>
    %49089 = torch.aten.view %48994, %49088 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49089, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49090 = torch.aten.mm %49089, %49068 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49090, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46478 = torch.constant.int 4
    %int128_46479 = torch.constant.int 128
    %49091 = torch.prim.ListConstruct %int4_46478, %2482, %int128_46479 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49092 = torch.aten.view %49090, %49091 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46480 = torch.constant.int 4
    %49093 = torch.aten.mul.int %int4_46480, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46481 = torch.constant.int 4096
    %49094 = torch.prim.ListConstruct %49093, %int4096_46481 : (!torch.int, !torch.int) -> !torch.list<int>
    %49095 = torch.aten.view %48995, %49094 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49095, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49096 = torch.aten.mm %49095, %49070 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49096, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46482 = torch.constant.int 4
    %int128_46483 = torch.constant.int 128
    %49097 = torch.prim.ListConstruct %int4_46482, %2482, %int128_46483 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49098 = torch.aten.view %49096, %49097 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46484 = torch.constant.int 4
    %49099 = torch.aten.mul.int %int4_46484, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46485 = torch.constant.int 4096
    %49100 = torch.prim.ListConstruct %49099, %int4096_46485 : (!torch.int, !torch.int) -> !torch.list<int>
    %49101 = torch.aten.view %48996, %49100 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49101, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49102 = torch.aten.mm %49101, %49072 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49102, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46486 = torch.constant.int 4
    %int128_46487 = torch.constant.int 128
    %49103 = torch.prim.ListConstruct %int4_46486, %2482, %int128_46487 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49104 = torch.aten.view %49102, %49103 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46488 = torch.constant.int 4
    %49105 = torch.aten.mul.int %int4_46488, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46489 = torch.constant.int 4096
    %49106 = torch.prim.ListConstruct %49105, %int4096_46489 : (!torch.int, !torch.int) -> !torch.list<int>
    %49107 = torch.aten.view %48997, %49106 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49107, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49108 = torch.aten.mm %49107, %49074 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49108, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46490 = torch.constant.int 4
    %int128_46491 = torch.constant.int 128
    %49109 = torch.prim.ListConstruct %int4_46490, %2482, %int128_46491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49110 = torch.aten.view %49108, %49109 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46492 = torch.constant.int 4
    %49111 = torch.aten.mul.int %int4_46492, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46493 = torch.constant.int 4096
    %49112 = torch.prim.ListConstruct %49111, %int4096_46493 : (!torch.int, !torch.int) -> !torch.list<int>
    %49113 = torch.aten.view %48998, %49112 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49113, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49114 = torch.aten.mm %49113, %49076 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49114, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46494 = torch.constant.int 4
    %int128_46495 = torch.constant.int 128
    %49115 = torch.prim.ListConstruct %int4_46494, %2482, %int128_46495 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49116 = torch.aten.view %49114, %49115 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46496 = torch.constant.int 4
    %49117 = torch.aten.mul.int %int4_46496, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46497 = torch.constant.int 4096
    %49118 = torch.prim.ListConstruct %49117, %int4096_46497 : (!torch.int, !torch.int) -> !torch.list<int>
    %49119 = torch.aten.view %48999, %49118 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49119, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49120 = torch.aten.mm %49119, %49078 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49120, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46498 = torch.constant.int 4
    %int128_46499 = torch.constant.int 128
    %49121 = torch.prim.ListConstruct %int4_46498, %2482, %int128_46499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49122 = torch.aten.view %49120, %49121 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46500 = torch.constant.int 4
    %49123 = torch.aten.mul.int %int4_46500, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46501 = torch.constant.int 4096
    %49124 = torch.prim.ListConstruct %49123, %int4096_46501 : (!torch.int, !torch.int) -> !torch.list<int>
    %49125 = torch.aten.view %49000, %49124 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49125, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49126 = torch.aten.mm %49125, %49080 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49126, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46502 = torch.constant.int 4
    %int128_46503 = torch.constant.int 128
    %49127 = torch.prim.ListConstruct %int4_46502, %2482, %int128_46503 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49128 = torch.aten.view %49126, %49127 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_46504 = torch.constant.int 1
    %int0_46505 = torch.constant.int 0
    %49129 = torch.prim.ListConstruct %int1_46504, %int0_46505 : (!torch.int, !torch.int) -> !torch.list<int>
    %49130 = torch.aten.permute %1832, %49129 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46506 = torch.constant.int 1
    %int0_46507 = torch.constant.int 0
    %49131 = torch.prim.ListConstruct %int1_46506, %int0_46507 : (!torch.int, !torch.int) -> !torch.list<int>
    %49132 = torch.aten.permute %1833, %49131 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46508 = torch.constant.int 1
    %int0_46509 = torch.constant.int 0
    %49133 = torch.prim.ListConstruct %int1_46508, %int0_46509 : (!torch.int, !torch.int) -> !torch.list<int>
    %49134 = torch.aten.permute %1834, %49133 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46510 = torch.constant.int 1
    %int0_46511 = torch.constant.int 0
    %49135 = torch.prim.ListConstruct %int1_46510, %int0_46511 : (!torch.int, !torch.int) -> !torch.list<int>
    %49136 = torch.aten.permute %1835, %49135 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46512 = torch.constant.int 1
    %int0_46513 = torch.constant.int 0
    %49137 = torch.prim.ListConstruct %int1_46512, %int0_46513 : (!torch.int, !torch.int) -> !torch.list<int>
    %49138 = torch.aten.permute %1836, %49137 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46514 = torch.constant.int 1
    %int0_46515 = torch.constant.int 0
    %49139 = torch.prim.ListConstruct %int1_46514, %int0_46515 : (!torch.int, !torch.int) -> !torch.list<int>
    %49140 = torch.aten.permute %1837, %49139 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46516 = torch.constant.int 1
    %int0_46517 = torch.constant.int 0
    %49141 = torch.prim.ListConstruct %int1_46516, %int0_46517 : (!torch.int, !torch.int) -> !torch.list<int>
    %49142 = torch.aten.permute %1838, %49141 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_46518 = torch.constant.int 1
    %int0_46519 = torch.constant.int 0
    %49143 = torch.prim.ListConstruct %int1_46518, %int0_46519 : (!torch.int, !torch.int) -> !torch.list<int>
    %49144 = torch.aten.permute %1839, %49143 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_46520 = torch.constant.int 4
    %49145 = torch.aten.mul.int %int4_46520, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46521 = torch.constant.int 4096
    %49146 = torch.prim.ListConstruct %49145, %int4096_46521 : (!torch.int, !torch.int) -> !torch.list<int>
    %49147 = torch.aten.view %48993, %49146 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49147, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49148 = torch.aten.mm %49147, %49130 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49148, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46522 = torch.constant.int 4
    %int128_46523 = torch.constant.int 128
    %49149 = torch.prim.ListConstruct %int4_46522, %2482, %int128_46523 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49150 = torch.aten.view %49148, %49149 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46524 = torch.constant.int 4
    %49151 = torch.aten.mul.int %int4_46524, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46525 = torch.constant.int 4096
    %49152 = torch.prim.ListConstruct %49151, %int4096_46525 : (!torch.int, !torch.int) -> !torch.list<int>
    %49153 = torch.aten.view %48994, %49152 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49153, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49154 = torch.aten.mm %49153, %49132 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49154, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46526 = torch.constant.int 4
    %int128_46527 = torch.constant.int 128
    %49155 = torch.prim.ListConstruct %int4_46526, %2482, %int128_46527 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49156 = torch.aten.view %49154, %49155 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46528 = torch.constant.int 4
    %49157 = torch.aten.mul.int %int4_46528, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46529 = torch.constant.int 4096
    %49158 = torch.prim.ListConstruct %49157, %int4096_46529 : (!torch.int, !torch.int) -> !torch.list<int>
    %49159 = torch.aten.view %48995, %49158 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49159, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49160 = torch.aten.mm %49159, %49134 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49160, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46530 = torch.constant.int 4
    %int128_46531 = torch.constant.int 128
    %49161 = torch.prim.ListConstruct %int4_46530, %2482, %int128_46531 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49162 = torch.aten.view %49160, %49161 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46532 = torch.constant.int 4
    %49163 = torch.aten.mul.int %int4_46532, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46533 = torch.constant.int 4096
    %49164 = torch.prim.ListConstruct %49163, %int4096_46533 : (!torch.int, !torch.int) -> !torch.list<int>
    %49165 = torch.aten.view %48996, %49164 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49165, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49166 = torch.aten.mm %49165, %49136 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49166, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46534 = torch.constant.int 4
    %int128_46535 = torch.constant.int 128
    %49167 = torch.prim.ListConstruct %int4_46534, %2482, %int128_46535 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49168 = torch.aten.view %49166, %49167 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46536 = torch.constant.int 4
    %49169 = torch.aten.mul.int %int4_46536, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46537 = torch.constant.int 4096
    %49170 = torch.prim.ListConstruct %49169, %int4096_46537 : (!torch.int, !torch.int) -> !torch.list<int>
    %49171 = torch.aten.view %48997, %49170 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49171, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49172 = torch.aten.mm %49171, %49138 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49172, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46538 = torch.constant.int 4
    %int128_46539 = torch.constant.int 128
    %49173 = torch.prim.ListConstruct %int4_46538, %2482, %int128_46539 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49174 = torch.aten.view %49172, %49173 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46540 = torch.constant.int 4
    %49175 = torch.aten.mul.int %int4_46540, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46541 = torch.constant.int 4096
    %49176 = torch.prim.ListConstruct %49175, %int4096_46541 : (!torch.int, !torch.int) -> !torch.list<int>
    %49177 = torch.aten.view %48998, %49176 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49177, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49178 = torch.aten.mm %49177, %49140 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49178, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46542 = torch.constant.int 4
    %int128_46543 = torch.constant.int 128
    %49179 = torch.prim.ListConstruct %int4_46542, %2482, %int128_46543 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49180 = torch.aten.view %49178, %49179 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46544 = torch.constant.int 4
    %49181 = torch.aten.mul.int %int4_46544, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46545 = torch.constant.int 4096
    %49182 = torch.prim.ListConstruct %49181, %int4096_46545 : (!torch.int, !torch.int) -> !torch.list<int>
    %49183 = torch.aten.view %48999, %49182 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49183, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49184 = torch.aten.mm %49183, %49142 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49184, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46546 = torch.constant.int 4
    %int128_46547 = torch.constant.int 128
    %49185 = torch.prim.ListConstruct %int4_46546, %2482, %int128_46547 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49186 = torch.aten.view %49184, %49185 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46548 = torch.constant.int 4
    %49187 = torch.aten.mul.int %int4_46548, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_46549 = torch.constant.int 4096
    %49188 = torch.prim.ListConstruct %49187, %int4096_46549 : (!torch.int, !torch.int) -> !torch.list<int>
    %49189 = torch.aten.view %49000, %49188 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49189, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %49190 = torch.aten.mm %49189, %49144 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %49190, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_46550 = torch.constant.int 4
    %int128_46551 = torch.constant.int 128
    %49191 = torch.prim.ListConstruct %int4_46550, %2482, %int128_46551 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49192 = torch.aten.view %49190, %49191 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %49192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_46552 = torch.constant.int 4
    %int4_46553 = torch.constant.int 4
    %int128_46554 = torch.constant.int 128
    %49193 = torch.prim.ListConstruct %int4_46552, %2482, %int4_46553, %int128_46554 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49194 = torch.aten.view %49022, %49193 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46555 = torch.constant.int 4
    %int4_46556 = torch.constant.int 4
    %int128_46557 = torch.constant.int 128
    %49195 = torch.prim.ListConstruct %int4_46555, %2482, %int4_46556, %int128_46557 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49196 = torch.aten.view %49028, %49195 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46558 = torch.constant.int 4
    %int4_46559 = torch.constant.int 4
    %int128_46560 = torch.constant.int 128
    %49197 = torch.prim.ListConstruct %int4_46558, %2482, %int4_46559, %int128_46560 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49198 = torch.aten.view %49034, %49197 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46561 = torch.constant.int 4
    %int4_46562 = torch.constant.int 4
    %int128_46563 = torch.constant.int 128
    %49199 = torch.prim.ListConstruct %int4_46561, %2482, %int4_46562, %int128_46563 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49200 = torch.aten.view %49040, %49199 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46564 = torch.constant.int 4
    %int4_46565 = torch.constant.int 4
    %int128_46566 = torch.constant.int 128
    %49201 = torch.prim.ListConstruct %int4_46564, %2482, %int4_46565, %int128_46566 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49202 = torch.aten.view %49046, %49201 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46567 = torch.constant.int 4
    %int4_46568 = torch.constant.int 4
    %int128_46569 = torch.constant.int 128
    %49203 = torch.prim.ListConstruct %int4_46567, %2482, %int4_46568, %int128_46569 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49204 = torch.aten.view %49052, %49203 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46570 = torch.constant.int 4
    %int4_46571 = torch.constant.int 4
    %int128_46572 = torch.constant.int 128
    %49205 = torch.prim.ListConstruct %int4_46570, %2482, %int4_46571, %int128_46572 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49206 = torch.aten.view %49058, %49205 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46573 = torch.constant.int 4
    %int4_46574 = torch.constant.int 4
    %int128_46575 = torch.constant.int 128
    %49207 = torch.prim.ListConstruct %int4_46573, %2482, %int4_46574, %int128_46575 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49208 = torch.aten.view %49064, %49207 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_46576 = torch.constant.int 4
    %int1_46577 = torch.constant.int 1
    %int128_46578 = torch.constant.int 128
    %49209 = torch.prim.ListConstruct %int4_46576, %2482, %int1_46577, %int128_46578 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49210 = torch.aten.view %49086, %49209 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46579 = torch.constant.int 4
    %int1_46580 = torch.constant.int 1
    %int128_46581 = torch.constant.int 128
    %49211 = torch.prim.ListConstruct %int4_46579, %2482, %int1_46580, %int128_46581 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49212 = torch.aten.view %49092, %49211 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46582 = torch.constant.int 4
    %int1_46583 = torch.constant.int 1
    %int128_46584 = torch.constant.int 128
    %49213 = torch.prim.ListConstruct %int4_46582, %2482, %int1_46583, %int128_46584 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49214 = torch.aten.view %49098, %49213 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46585 = torch.constant.int 4
    %int1_46586 = torch.constant.int 1
    %int128_46587 = torch.constant.int 128
    %49215 = torch.prim.ListConstruct %int4_46585, %2482, %int1_46586, %int128_46587 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49216 = torch.aten.view %49104, %49215 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46588 = torch.constant.int 4
    %int1_46589 = torch.constant.int 1
    %int128_46590 = torch.constant.int 128
    %49217 = torch.prim.ListConstruct %int4_46588, %2482, %int1_46589, %int128_46590 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49218 = torch.aten.view %49110, %49217 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46591 = torch.constant.int 4
    %int1_46592 = torch.constant.int 1
    %int128_46593 = torch.constant.int 128
    %49219 = torch.prim.ListConstruct %int4_46591, %2482, %int1_46592, %int128_46593 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49220 = torch.aten.view %49116, %49219 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46594 = torch.constant.int 4
    %int1_46595 = torch.constant.int 1
    %int128_46596 = torch.constant.int 128
    %49221 = torch.prim.ListConstruct %int4_46594, %2482, %int1_46595, %int128_46596 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49222 = torch.aten.view %49122, %49221 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46597 = torch.constant.int 4
    %int1_46598 = torch.constant.int 1
    %int128_46599 = torch.constant.int 128
    %49223 = torch.prim.ListConstruct %int4_46597, %2482, %int1_46598, %int128_46599 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49224 = torch.aten.view %49128, %49223 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46600 = torch.constant.int 4
    %int1_46601 = torch.constant.int 1
    %int128_46602 = torch.constant.int 128
    %49225 = torch.prim.ListConstruct %int4_46600, %2482, %int1_46601, %int128_46602 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49226 = torch.aten.view %49150, %49225 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46603 = torch.constant.int 4
    %int1_46604 = torch.constant.int 1
    %int128_46605 = torch.constant.int 128
    %49227 = torch.prim.ListConstruct %int4_46603, %2482, %int1_46604, %int128_46605 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49228 = torch.aten.view %49156, %49227 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46606 = torch.constant.int 4
    %int1_46607 = torch.constant.int 1
    %int128_46608 = torch.constant.int 128
    %49229 = torch.prim.ListConstruct %int4_46606, %2482, %int1_46607, %int128_46608 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49230 = torch.aten.view %49162, %49229 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46609 = torch.constant.int 4
    %int1_46610 = torch.constant.int 1
    %int128_46611 = torch.constant.int 128
    %49231 = torch.prim.ListConstruct %int4_46609, %2482, %int1_46610, %int128_46611 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49232 = torch.aten.view %49168, %49231 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46612 = torch.constant.int 4
    %int1_46613 = torch.constant.int 1
    %int128_46614 = torch.constant.int 128
    %49233 = torch.prim.ListConstruct %int4_46612, %2482, %int1_46613, %int128_46614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49234 = torch.aten.view %49174, %49233 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46615 = torch.constant.int 4
    %int1_46616 = torch.constant.int 1
    %int128_46617 = torch.constant.int 128
    %49235 = torch.prim.ListConstruct %int4_46615, %2482, %int1_46616, %int128_46617 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49236 = torch.aten.view %49180, %49235 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46618 = torch.constant.int 4
    %int1_46619 = torch.constant.int 1
    %int128_46620 = torch.constant.int 128
    %49237 = torch.prim.ListConstruct %int4_46618, %2482, %int1_46619, %int128_46620 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49238 = torch.aten.view %49186, %49237 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_46621 = torch.constant.int 4
    %int1_46622 = torch.constant.int 1
    %int128_46623 = torch.constant.int 128
    %49239 = torch.prim.ListConstruct %int4_46621, %2482, %int1_46622, %int128_46623 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49240 = torch.aten.view %49192, %49239 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_46624 = torch.constant.int 131072
    %none_46625 = torch.constant.none
    %none_46626 = torch.constant.none
    %cpu_46627 = torch.constant.device "cpu"
    %false_46628 = torch.constant.bool false
    %49241 = torch.aten.arange %int131072_46624, %none_46625, %none_46626, %cpu_46627, %false_46628 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_46629 = torch.constant.int 0
    %int128_46630 = torch.constant.int 128
    %int2_46631 = torch.constant.int 2
    %none_46632 = torch.constant.none
    %none_46633 = torch.constant.none
    %cpu_46634 = torch.constant.device "cpu"
    %false_46635 = torch.constant.bool false
    %49242 = torch.aten.arange.start_step %int0_46629, %int128_46630, %int2_46631, %none_46632, %none_46633, %cpu_46634, %false_46635 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_46636 = torch.constant.int 0
    %int0_46637 = torch.constant.int 0
    %int64_46638 = torch.constant.int 64
    %int1_46639 = torch.constant.int 1
    %49243 = torch.aten.slice.Tensor %49242, %int0_46636, %int0_46637, %int64_46638, %int1_46639 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_46640 = torch.constant.int 6
    %49244 = torch.prims.convert_element_type %49243, %int6_46640 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_46641 = torch.constant.int 128
    %49245 = torch.aten.div.Scalar %49244, %int128_46641 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_46642 = torch.constant.float 5.000000e+05
    %49246 = torch.aten.pow.Scalar %float5.000000e05_46642, %49245 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %49247 = torch.aten.reciprocal %49246 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_46643 = torch.constant.float 1.000000e+00
    %49248 = torch.aten.mul.Scalar %49247, %float1.000000e00_46643 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_46644 = torch.constant.int 131072
    %int1_46645 = torch.constant.int 1
    %49249 = torch.prim.ListConstruct %int131072_46644, %int1_46645 : (!torch.int, !torch.int) -> !torch.list<int>
    %49250 = torch.aten.view %49241, %49249 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %49251 = torch.aten.mul.Tensor %49250, %49248 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %49252 = torch.aten.cos %49251 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %49253 = torch.aten.sin %49251 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %49254 = torch.aten.complex %49252, %49253 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %49255 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49256 = flow.tensor.transfer %49255 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %49257 = torch_c.from_builtin_tensor %49256 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49258 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49259 = flow.tensor.transfer %49258 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %49260 = torch_c.from_builtin_tensor %49259 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49261 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49262 = flow.tensor.transfer %49261 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %49263 = torch_c.from_builtin_tensor %49262 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49264 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49265 = flow.tensor.transfer %49264 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %49266 = torch_c.from_builtin_tensor %49265 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49267 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49268 = flow.tensor.transfer %49267 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %49269 = torch_c.from_builtin_tensor %49268 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49270 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49271 = flow.tensor.transfer %49270 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %49272 = torch_c.from_builtin_tensor %49271 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49273 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49274 = flow.tensor.transfer %49273 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %49275 = torch_c.from_builtin_tensor %49274 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49276 = torch_c.to_builtin_tensor %49254 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49277 = flow.tensor.transfer %49276 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %49278 = torch_c.from_builtin_tensor %49277 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_46646 = torch.constant.int 1
    %49279 = torch.aten.size.int %49022, %int1_46646 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46647 = torch.constant.int 0
    %49280 = torch.aten.add.int %int0_46647, %49279 : !torch.int, !torch.int -> !torch.int
    %int0_46648 = torch.constant.int 0
    %int0_46649 = torch.constant.int 0
    %int1_46650 = torch.constant.int 1
    %49281 = torch.aten.slice.Tensor %49257, %int0_46648, %int0_46649, %49280, %int1_46650 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49281, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46651 = torch.constant.int 1
    %int0_46652 = torch.constant.int 0
    %int9223372036854775807_46653 = torch.constant.int 9223372036854775807
    %int1_46654 = torch.constant.int 1
    %49282 = torch.aten.slice.Tensor %49281, %int1_46651, %int0_46652, %int9223372036854775807_46653, %int1_46654 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49282, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46655 = torch.constant.int 0
    %49283 = torch.aten.unsqueeze %49282, %int0_46655 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49283, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46656 = torch.constant.int 2
    %49284 = torch.aten.unsqueeze %49283, %int2_46656 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49284, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46657 = torch.constant.int 3
    %int0_46658 = torch.constant.int 0
    %int9223372036854775807_46659 = torch.constant.int 9223372036854775807
    %int1_46660 = torch.constant.int 1
    %49285 = torch.aten.slice.Tensor %49284, %int3_46657, %int0_46658, %int9223372036854775807_46659, %int1_46660 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49285, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49286 = torch_c.to_builtin_tensor %49194 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46661 = arith.constant 1 : index
    %dim_46662 = tensor.dim %49286, %c1_46661 : tensor<4x?x4x128xf16>
    %49287 = flow.tensor.bitcast %49286 : tensor<4x?x4x128xf16>{%dim_46662} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46662}
    %49288 = torch_c.from_builtin_tensor %49287 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49289 = torch.aten.mul.Tensor %49288, %49285 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49290 = torch_c.to_builtin_tensor %49289 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46663 = arith.constant 1 : index
    %dim_46664 = tensor.dim %49290, %c1_46663 : tensor<4x?x4x64xcomplex<f32>>
    %49291 = flow.tensor.bitcast %49290 : tensor<4x?x4x64xcomplex<f32>>{%dim_46664} -> tensor<4x?x4x128xf32>{%dim_46664}
    %49292 = torch_c.from_builtin_tensor %49291 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46665 = torch.constant.int 5
    %49293 = torch.prims.convert_element_type %49292, %int5_46665 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46666 = torch.constant.int 1
    %49294 = torch.aten.size.int %49028, %int1_46666 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46667 = torch.constant.int 0
    %49295 = torch.aten.add.int %int0_46667, %49294 : !torch.int, !torch.int -> !torch.int
    %int0_46668 = torch.constant.int 0
    %int0_46669 = torch.constant.int 0
    %int1_46670 = torch.constant.int 1
    %49296 = torch.aten.slice.Tensor %49260, %int0_46668, %int0_46669, %49295, %int1_46670 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49296, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46671 = torch.constant.int 1
    %int0_46672 = torch.constant.int 0
    %int9223372036854775807_46673 = torch.constant.int 9223372036854775807
    %int1_46674 = torch.constant.int 1
    %49297 = torch.aten.slice.Tensor %49296, %int1_46671, %int0_46672, %int9223372036854775807_46673, %int1_46674 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49297, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46675 = torch.constant.int 0
    %49298 = torch.aten.unsqueeze %49297, %int0_46675 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49298, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46676 = torch.constant.int 2
    %49299 = torch.aten.unsqueeze %49298, %int2_46676 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49299, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46677 = torch.constant.int 3
    %int0_46678 = torch.constant.int 0
    %int9223372036854775807_46679 = torch.constant.int 9223372036854775807
    %int1_46680 = torch.constant.int 1
    %49300 = torch.aten.slice.Tensor %49299, %int3_46677, %int0_46678, %int9223372036854775807_46679, %int1_46680 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49300, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49301 = torch_c.to_builtin_tensor %49196 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46681 = arith.constant 1 : index
    %dim_46682 = tensor.dim %49301, %c1_46681 : tensor<4x?x4x128xf16>
    %49302 = flow.tensor.bitcast %49301 : tensor<4x?x4x128xf16>{%dim_46682} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46682}
    %49303 = torch_c.from_builtin_tensor %49302 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49304 = torch.aten.mul.Tensor %49303, %49300 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49305 = torch_c.to_builtin_tensor %49304 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46683 = arith.constant 1 : index
    %dim_46684 = tensor.dim %49305, %c1_46683 : tensor<4x?x4x64xcomplex<f32>>
    %49306 = flow.tensor.bitcast %49305 : tensor<4x?x4x64xcomplex<f32>>{%dim_46684} -> tensor<4x?x4x128xf32>{%dim_46684}
    %49307 = torch_c.from_builtin_tensor %49306 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46685 = torch.constant.int 5
    %49308 = torch.prims.convert_element_type %49307, %int5_46685 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46686 = torch.constant.int 1
    %49309 = torch.aten.size.int %49034, %int1_46686 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46687 = torch.constant.int 0
    %49310 = torch.aten.add.int %int0_46687, %49309 : !torch.int, !torch.int -> !torch.int
    %int0_46688 = torch.constant.int 0
    %int0_46689 = torch.constant.int 0
    %int1_46690 = torch.constant.int 1
    %49311 = torch.aten.slice.Tensor %49263, %int0_46688, %int0_46689, %49310, %int1_46690 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49311, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46691 = torch.constant.int 1
    %int0_46692 = torch.constant.int 0
    %int9223372036854775807_46693 = torch.constant.int 9223372036854775807
    %int1_46694 = torch.constant.int 1
    %49312 = torch.aten.slice.Tensor %49311, %int1_46691, %int0_46692, %int9223372036854775807_46693, %int1_46694 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49312, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46695 = torch.constant.int 0
    %49313 = torch.aten.unsqueeze %49312, %int0_46695 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49313, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46696 = torch.constant.int 2
    %49314 = torch.aten.unsqueeze %49313, %int2_46696 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49314, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46697 = torch.constant.int 3
    %int0_46698 = torch.constant.int 0
    %int9223372036854775807_46699 = torch.constant.int 9223372036854775807
    %int1_46700 = torch.constant.int 1
    %49315 = torch.aten.slice.Tensor %49314, %int3_46697, %int0_46698, %int9223372036854775807_46699, %int1_46700 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49315, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49316 = torch_c.to_builtin_tensor %49198 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46701 = arith.constant 1 : index
    %dim_46702 = tensor.dim %49316, %c1_46701 : tensor<4x?x4x128xf16>
    %49317 = flow.tensor.bitcast %49316 : tensor<4x?x4x128xf16>{%dim_46702} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46702}
    %49318 = torch_c.from_builtin_tensor %49317 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49319 = torch.aten.mul.Tensor %49318, %49315 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49320 = torch_c.to_builtin_tensor %49319 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46703 = arith.constant 1 : index
    %dim_46704 = tensor.dim %49320, %c1_46703 : tensor<4x?x4x64xcomplex<f32>>
    %49321 = flow.tensor.bitcast %49320 : tensor<4x?x4x64xcomplex<f32>>{%dim_46704} -> tensor<4x?x4x128xf32>{%dim_46704}
    %49322 = torch_c.from_builtin_tensor %49321 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46705 = torch.constant.int 5
    %49323 = torch.prims.convert_element_type %49322, %int5_46705 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46706 = torch.constant.int 1
    %49324 = torch.aten.size.int %49040, %int1_46706 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46707 = torch.constant.int 0
    %49325 = torch.aten.add.int %int0_46707, %49324 : !torch.int, !torch.int -> !torch.int
    %int0_46708 = torch.constant.int 0
    %int0_46709 = torch.constant.int 0
    %int1_46710 = torch.constant.int 1
    %49326 = torch.aten.slice.Tensor %49266, %int0_46708, %int0_46709, %49325, %int1_46710 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49326, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46711 = torch.constant.int 1
    %int0_46712 = torch.constant.int 0
    %int9223372036854775807_46713 = torch.constant.int 9223372036854775807
    %int1_46714 = torch.constant.int 1
    %49327 = torch.aten.slice.Tensor %49326, %int1_46711, %int0_46712, %int9223372036854775807_46713, %int1_46714 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49327, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46715 = torch.constant.int 0
    %49328 = torch.aten.unsqueeze %49327, %int0_46715 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49328, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46716 = torch.constant.int 2
    %49329 = torch.aten.unsqueeze %49328, %int2_46716 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49329, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46717 = torch.constant.int 3
    %int0_46718 = torch.constant.int 0
    %int9223372036854775807_46719 = torch.constant.int 9223372036854775807
    %int1_46720 = torch.constant.int 1
    %49330 = torch.aten.slice.Tensor %49329, %int3_46717, %int0_46718, %int9223372036854775807_46719, %int1_46720 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49330, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49331 = torch_c.to_builtin_tensor %49200 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46721 = arith.constant 1 : index
    %dim_46722 = tensor.dim %49331, %c1_46721 : tensor<4x?x4x128xf16>
    %49332 = flow.tensor.bitcast %49331 : tensor<4x?x4x128xf16>{%dim_46722} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46722}
    %49333 = torch_c.from_builtin_tensor %49332 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49334 = torch.aten.mul.Tensor %49333, %49330 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49335 = torch_c.to_builtin_tensor %49334 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46723 = arith.constant 1 : index
    %dim_46724 = tensor.dim %49335, %c1_46723 : tensor<4x?x4x64xcomplex<f32>>
    %49336 = flow.tensor.bitcast %49335 : tensor<4x?x4x64xcomplex<f32>>{%dim_46724} -> tensor<4x?x4x128xf32>{%dim_46724}
    %49337 = torch_c.from_builtin_tensor %49336 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46725 = torch.constant.int 5
    %49338 = torch.prims.convert_element_type %49337, %int5_46725 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46726 = torch.constant.int 1
    %49339 = torch.aten.size.int %49046, %int1_46726 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46727 = torch.constant.int 0
    %49340 = torch.aten.add.int %int0_46727, %49339 : !torch.int, !torch.int -> !torch.int
    %int0_46728 = torch.constant.int 0
    %int0_46729 = torch.constant.int 0
    %int1_46730 = torch.constant.int 1
    %49341 = torch.aten.slice.Tensor %49269, %int0_46728, %int0_46729, %49340, %int1_46730 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49341, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46731 = torch.constant.int 1
    %int0_46732 = torch.constant.int 0
    %int9223372036854775807_46733 = torch.constant.int 9223372036854775807
    %int1_46734 = torch.constant.int 1
    %49342 = torch.aten.slice.Tensor %49341, %int1_46731, %int0_46732, %int9223372036854775807_46733, %int1_46734 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49342, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46735 = torch.constant.int 0
    %49343 = torch.aten.unsqueeze %49342, %int0_46735 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49343, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46736 = torch.constant.int 2
    %49344 = torch.aten.unsqueeze %49343, %int2_46736 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49344, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46737 = torch.constant.int 3
    %int0_46738 = torch.constant.int 0
    %int9223372036854775807_46739 = torch.constant.int 9223372036854775807
    %int1_46740 = torch.constant.int 1
    %49345 = torch.aten.slice.Tensor %49344, %int3_46737, %int0_46738, %int9223372036854775807_46739, %int1_46740 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49345, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49346 = torch_c.to_builtin_tensor %49202 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46741 = arith.constant 1 : index
    %dim_46742 = tensor.dim %49346, %c1_46741 : tensor<4x?x4x128xf16>
    %49347 = flow.tensor.bitcast %49346 : tensor<4x?x4x128xf16>{%dim_46742} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46742}
    %49348 = torch_c.from_builtin_tensor %49347 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49349 = torch.aten.mul.Tensor %49348, %49345 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49350 = torch_c.to_builtin_tensor %49349 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46743 = arith.constant 1 : index
    %dim_46744 = tensor.dim %49350, %c1_46743 : tensor<4x?x4x64xcomplex<f32>>
    %49351 = flow.tensor.bitcast %49350 : tensor<4x?x4x64xcomplex<f32>>{%dim_46744} -> tensor<4x?x4x128xf32>{%dim_46744}
    %49352 = torch_c.from_builtin_tensor %49351 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46745 = torch.constant.int 5
    %49353 = torch.prims.convert_element_type %49352, %int5_46745 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46746 = torch.constant.int 1
    %49354 = torch.aten.size.int %49052, %int1_46746 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46747 = torch.constant.int 0
    %49355 = torch.aten.add.int %int0_46747, %49354 : !torch.int, !torch.int -> !torch.int
    %int0_46748 = torch.constant.int 0
    %int0_46749 = torch.constant.int 0
    %int1_46750 = torch.constant.int 1
    %49356 = torch.aten.slice.Tensor %49272, %int0_46748, %int0_46749, %49355, %int1_46750 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49356, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46751 = torch.constant.int 1
    %int0_46752 = torch.constant.int 0
    %int9223372036854775807_46753 = torch.constant.int 9223372036854775807
    %int1_46754 = torch.constant.int 1
    %49357 = torch.aten.slice.Tensor %49356, %int1_46751, %int0_46752, %int9223372036854775807_46753, %int1_46754 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49357, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46755 = torch.constant.int 0
    %49358 = torch.aten.unsqueeze %49357, %int0_46755 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49358, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46756 = torch.constant.int 2
    %49359 = torch.aten.unsqueeze %49358, %int2_46756 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49359, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46757 = torch.constant.int 3
    %int0_46758 = torch.constant.int 0
    %int9223372036854775807_46759 = torch.constant.int 9223372036854775807
    %int1_46760 = torch.constant.int 1
    %49360 = torch.aten.slice.Tensor %49359, %int3_46757, %int0_46758, %int9223372036854775807_46759, %int1_46760 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49360, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49361 = torch_c.to_builtin_tensor %49204 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46761 = arith.constant 1 : index
    %dim_46762 = tensor.dim %49361, %c1_46761 : tensor<4x?x4x128xf16>
    %49362 = flow.tensor.bitcast %49361 : tensor<4x?x4x128xf16>{%dim_46762} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46762}
    %49363 = torch_c.from_builtin_tensor %49362 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49364 = torch.aten.mul.Tensor %49363, %49360 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49365 = torch_c.to_builtin_tensor %49364 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46763 = arith.constant 1 : index
    %dim_46764 = tensor.dim %49365, %c1_46763 : tensor<4x?x4x64xcomplex<f32>>
    %49366 = flow.tensor.bitcast %49365 : tensor<4x?x4x64xcomplex<f32>>{%dim_46764} -> tensor<4x?x4x128xf32>{%dim_46764}
    %49367 = torch_c.from_builtin_tensor %49366 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46765 = torch.constant.int 5
    %49368 = torch.prims.convert_element_type %49367, %int5_46765 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46766 = torch.constant.int 1
    %49369 = torch.aten.size.int %49058, %int1_46766 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46767 = torch.constant.int 0
    %49370 = torch.aten.add.int %int0_46767, %49369 : !torch.int, !torch.int -> !torch.int
    %int0_46768 = torch.constant.int 0
    %int0_46769 = torch.constant.int 0
    %int1_46770 = torch.constant.int 1
    %49371 = torch.aten.slice.Tensor %49275, %int0_46768, %int0_46769, %49370, %int1_46770 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49371, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46771 = torch.constant.int 1
    %int0_46772 = torch.constant.int 0
    %int9223372036854775807_46773 = torch.constant.int 9223372036854775807
    %int1_46774 = torch.constant.int 1
    %49372 = torch.aten.slice.Tensor %49371, %int1_46771, %int0_46772, %int9223372036854775807_46773, %int1_46774 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49372, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46775 = torch.constant.int 0
    %49373 = torch.aten.unsqueeze %49372, %int0_46775 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49373, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46776 = torch.constant.int 2
    %49374 = torch.aten.unsqueeze %49373, %int2_46776 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49374, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46777 = torch.constant.int 3
    %int0_46778 = torch.constant.int 0
    %int9223372036854775807_46779 = torch.constant.int 9223372036854775807
    %int1_46780 = torch.constant.int 1
    %49375 = torch.aten.slice.Tensor %49374, %int3_46777, %int0_46778, %int9223372036854775807_46779, %int1_46780 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49375, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49376 = torch_c.to_builtin_tensor %49206 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46781 = arith.constant 1 : index
    %dim_46782 = tensor.dim %49376, %c1_46781 : tensor<4x?x4x128xf16>
    %49377 = flow.tensor.bitcast %49376 : tensor<4x?x4x128xf16>{%dim_46782} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46782}
    %49378 = torch_c.from_builtin_tensor %49377 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49379 = torch.aten.mul.Tensor %49378, %49375 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49380 = torch_c.to_builtin_tensor %49379 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46783 = arith.constant 1 : index
    %dim_46784 = tensor.dim %49380, %c1_46783 : tensor<4x?x4x64xcomplex<f32>>
    %49381 = flow.tensor.bitcast %49380 : tensor<4x?x4x64xcomplex<f32>>{%dim_46784} -> tensor<4x?x4x128xf32>{%dim_46784}
    %49382 = torch_c.from_builtin_tensor %49381 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46785 = torch.constant.int 5
    %49383 = torch.prims.convert_element_type %49382, %int5_46785 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_46786 = torch.constant.int 1
    %49384 = torch.aten.size.int %49064, %int1_46786 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_46787 = torch.constant.int 0
    %49385 = torch.aten.add.int %int0_46787, %49384 : !torch.int, !torch.int -> !torch.int
    %int0_46788 = torch.constant.int 0
    %int0_46789 = torch.constant.int 0
    %int1_46790 = torch.constant.int 1
    %49386 = torch.aten.slice.Tensor %49278, %int0_46788, %int0_46789, %49385, %int1_46790 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49386, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46791 = torch.constant.int 1
    %int0_46792 = torch.constant.int 0
    %int9223372036854775807_46793 = torch.constant.int 9223372036854775807
    %int1_46794 = torch.constant.int 1
    %49387 = torch.aten.slice.Tensor %49386, %int1_46791, %int0_46792, %int9223372036854775807_46793, %int1_46794 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49387, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46795 = torch.constant.int 0
    %49388 = torch.aten.unsqueeze %49387, %int0_46795 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49388, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46796 = torch.constant.int 2
    %49389 = torch.aten.unsqueeze %49388, %int2_46796 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49389, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46797 = torch.constant.int 3
    %int0_46798 = torch.constant.int 0
    %int9223372036854775807_46799 = torch.constant.int 9223372036854775807
    %int1_46800 = torch.constant.int 1
    %49390 = torch.aten.slice.Tensor %49389, %int3_46797, %int0_46798, %int9223372036854775807_46799, %int1_46800 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49390, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49391 = torch_c.to_builtin_tensor %49208 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_46801 = arith.constant 1 : index
    %dim_46802 = tensor.dim %49391, %c1_46801 : tensor<4x?x4x128xf16>
    %49392 = flow.tensor.bitcast %49391 : tensor<4x?x4x128xf16>{%dim_46802} -> tensor<4x?x4x64xcomplex<f16>>{%dim_46802}
    %49393 = torch_c.from_builtin_tensor %49392 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %49393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %49394 = torch.aten.mul.Tensor %49393, %49390 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %49394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %49395 = torch_c.to_builtin_tensor %49394 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_46803 = arith.constant 1 : index
    %dim_46804 = tensor.dim %49395, %c1_46803 : tensor<4x?x4x64xcomplex<f32>>
    %49396 = flow.tensor.bitcast %49395 : tensor<4x?x4x64xcomplex<f32>>{%dim_46804} -> tensor<4x?x4x128xf32>{%dim_46804}
    %49397 = torch_c.from_builtin_tensor %49396 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %49397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_46805 = torch.constant.int 5
    %49398 = torch.prims.convert_element_type %49397, %int5_46805 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_46806 = torch.constant.int 131072
    %none_46807 = torch.constant.none
    %none_46808 = torch.constant.none
    %cpu_46809 = torch.constant.device "cpu"
    %false_46810 = torch.constant.bool false
    %49399 = torch.aten.arange %int131072_46806, %none_46807, %none_46808, %cpu_46809, %false_46810 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_46811 = torch.constant.int 0
    %int128_46812 = torch.constant.int 128
    %int2_46813 = torch.constant.int 2
    %none_46814 = torch.constant.none
    %none_46815 = torch.constant.none
    %cpu_46816 = torch.constant.device "cpu"
    %false_46817 = torch.constant.bool false
    %49400 = torch.aten.arange.start_step %int0_46811, %int128_46812, %int2_46813, %none_46814, %none_46815, %cpu_46816, %false_46817 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_46818 = torch.constant.int 0
    %int0_46819 = torch.constant.int 0
    %int64_46820 = torch.constant.int 64
    %int1_46821 = torch.constant.int 1
    %49401 = torch.aten.slice.Tensor %49400, %int0_46818, %int0_46819, %int64_46820, %int1_46821 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_46822 = torch.constant.int 6
    %49402 = torch.prims.convert_element_type %49401, %int6_46822 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_46823 = torch.constant.int 128
    %49403 = torch.aten.div.Scalar %49402, %int128_46823 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_46824 = torch.constant.float 5.000000e+05
    %49404 = torch.aten.pow.Scalar %float5.000000e05_46824, %49403 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %49405 = torch.aten.reciprocal %49404 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_46825 = torch.constant.float 1.000000e+00
    %49406 = torch.aten.mul.Scalar %49405, %float1.000000e00_46825 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_46826 = torch.constant.int 131072
    %int1_46827 = torch.constant.int 1
    %49407 = torch.prim.ListConstruct %int131072_46826, %int1_46827 : (!torch.int, !torch.int) -> !torch.list<int>
    %49408 = torch.aten.view %49399, %49407 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %49409 = torch.aten.mul.Tensor %49408, %49406 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %49410 = torch.aten.cos %49409 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %49411 = torch.aten.sin %49409 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %49412 = torch.aten.complex %49410, %49411 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %49413 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49414 = flow.tensor.transfer %49413 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %49415 = torch_c.from_builtin_tensor %49414 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49416 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49417 = flow.tensor.transfer %49416 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %49418 = torch_c.from_builtin_tensor %49417 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49419 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49420 = flow.tensor.transfer %49419 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %49421 = torch_c.from_builtin_tensor %49420 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49422 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49423 = flow.tensor.transfer %49422 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %49424 = torch_c.from_builtin_tensor %49423 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49425 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49426 = flow.tensor.transfer %49425 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %49427 = torch_c.from_builtin_tensor %49426 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49428 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49429 = flow.tensor.transfer %49428 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %49430 = torch_c.from_builtin_tensor %49429 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49431 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49432 = flow.tensor.transfer %49431 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %49433 = torch_c.from_builtin_tensor %49432 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %49434 = torch_c.to_builtin_tensor %49412 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %49435 = flow.tensor.transfer %49434 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %49436 = torch_c.from_builtin_tensor %49435 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_46828 = torch.constant.int 1
    %49437 = torch.aten.size.int %49086, %int1_46828 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46829 = torch.constant.int 0
    %49438 = torch.aten.add.int %int0_46829, %49437 : !torch.int, !torch.int -> !torch.int
    %int0_46830 = torch.constant.int 0
    %int0_46831 = torch.constant.int 0
    %int1_46832 = torch.constant.int 1
    %49439 = torch.aten.slice.Tensor %49415, %int0_46830, %int0_46831, %49438, %int1_46832 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49439, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46833 = torch.constant.int 1
    %int0_46834 = torch.constant.int 0
    %int9223372036854775807_46835 = torch.constant.int 9223372036854775807
    %int1_46836 = torch.constant.int 1
    %49440 = torch.aten.slice.Tensor %49439, %int1_46833, %int0_46834, %int9223372036854775807_46835, %int1_46836 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49440, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46837 = torch.constant.int 0
    %49441 = torch.aten.unsqueeze %49440, %int0_46837 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49441, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46838 = torch.constant.int 2
    %49442 = torch.aten.unsqueeze %49441, %int2_46838 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49442, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46839 = torch.constant.int 3
    %int0_46840 = torch.constant.int 0
    %int9223372036854775807_46841 = torch.constant.int 9223372036854775807
    %int1_46842 = torch.constant.int 1
    %49443 = torch.aten.slice.Tensor %49442, %int3_46839, %int0_46840, %int9223372036854775807_46841, %int1_46842 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49443, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49444 = torch_c.to_builtin_tensor %49210 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46843 = arith.constant 1 : index
    %dim_46844 = tensor.dim %49444, %c1_46843 : tensor<4x?x1x128xf16>
    %49445 = flow.tensor.bitcast %49444 : tensor<4x?x1x128xf16>{%dim_46844} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46844}
    %49446 = torch_c.from_builtin_tensor %49445 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49447 = torch.aten.mul.Tensor %49446, %49443 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49448 = torch_c.to_builtin_tensor %49447 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46845 = arith.constant 1 : index
    %dim_46846 = tensor.dim %49448, %c1_46845 : tensor<4x?x1x64xcomplex<f32>>
    %49449 = flow.tensor.bitcast %49448 : tensor<4x?x1x64xcomplex<f32>>{%dim_46846} -> tensor<4x?x1x128xf32>{%dim_46846}
    %49450 = torch_c.from_builtin_tensor %49449 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46847 = torch.constant.int 5
    %49451 = torch.prims.convert_element_type %49450, %int5_46847 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46848 = torch.constant.int 1
    %49452 = torch.aten.size.int %49092, %int1_46848 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46849 = torch.constant.int 0
    %49453 = torch.aten.add.int %int0_46849, %49452 : !torch.int, !torch.int -> !torch.int
    %int0_46850 = torch.constant.int 0
    %int0_46851 = torch.constant.int 0
    %int1_46852 = torch.constant.int 1
    %49454 = torch.aten.slice.Tensor %49418, %int0_46850, %int0_46851, %49453, %int1_46852 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49454, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46853 = torch.constant.int 1
    %int0_46854 = torch.constant.int 0
    %int9223372036854775807_46855 = torch.constant.int 9223372036854775807
    %int1_46856 = torch.constant.int 1
    %49455 = torch.aten.slice.Tensor %49454, %int1_46853, %int0_46854, %int9223372036854775807_46855, %int1_46856 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49455, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46857 = torch.constant.int 0
    %49456 = torch.aten.unsqueeze %49455, %int0_46857 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49456, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46858 = torch.constant.int 2
    %49457 = torch.aten.unsqueeze %49456, %int2_46858 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49457, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46859 = torch.constant.int 3
    %int0_46860 = torch.constant.int 0
    %int9223372036854775807_46861 = torch.constant.int 9223372036854775807
    %int1_46862 = torch.constant.int 1
    %49458 = torch.aten.slice.Tensor %49457, %int3_46859, %int0_46860, %int9223372036854775807_46861, %int1_46862 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49458, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49459 = torch_c.to_builtin_tensor %49212 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46863 = arith.constant 1 : index
    %dim_46864 = tensor.dim %49459, %c1_46863 : tensor<4x?x1x128xf16>
    %49460 = flow.tensor.bitcast %49459 : tensor<4x?x1x128xf16>{%dim_46864} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46864}
    %49461 = torch_c.from_builtin_tensor %49460 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49462 = torch.aten.mul.Tensor %49461, %49458 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49463 = torch_c.to_builtin_tensor %49462 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46865 = arith.constant 1 : index
    %dim_46866 = tensor.dim %49463, %c1_46865 : tensor<4x?x1x64xcomplex<f32>>
    %49464 = flow.tensor.bitcast %49463 : tensor<4x?x1x64xcomplex<f32>>{%dim_46866} -> tensor<4x?x1x128xf32>{%dim_46866}
    %49465 = torch_c.from_builtin_tensor %49464 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46867 = torch.constant.int 5
    %49466 = torch.prims.convert_element_type %49465, %int5_46867 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46868 = torch.constant.int 1
    %49467 = torch.aten.size.int %49098, %int1_46868 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46869 = torch.constant.int 0
    %49468 = torch.aten.add.int %int0_46869, %49467 : !torch.int, !torch.int -> !torch.int
    %int0_46870 = torch.constant.int 0
    %int0_46871 = torch.constant.int 0
    %int1_46872 = torch.constant.int 1
    %49469 = torch.aten.slice.Tensor %49421, %int0_46870, %int0_46871, %49468, %int1_46872 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49469, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46873 = torch.constant.int 1
    %int0_46874 = torch.constant.int 0
    %int9223372036854775807_46875 = torch.constant.int 9223372036854775807
    %int1_46876 = torch.constant.int 1
    %49470 = torch.aten.slice.Tensor %49469, %int1_46873, %int0_46874, %int9223372036854775807_46875, %int1_46876 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49470, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46877 = torch.constant.int 0
    %49471 = torch.aten.unsqueeze %49470, %int0_46877 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49471, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46878 = torch.constant.int 2
    %49472 = torch.aten.unsqueeze %49471, %int2_46878 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49472, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46879 = torch.constant.int 3
    %int0_46880 = torch.constant.int 0
    %int9223372036854775807_46881 = torch.constant.int 9223372036854775807
    %int1_46882 = torch.constant.int 1
    %49473 = torch.aten.slice.Tensor %49472, %int3_46879, %int0_46880, %int9223372036854775807_46881, %int1_46882 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49473, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49474 = torch_c.to_builtin_tensor %49214 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46883 = arith.constant 1 : index
    %dim_46884 = tensor.dim %49474, %c1_46883 : tensor<4x?x1x128xf16>
    %49475 = flow.tensor.bitcast %49474 : tensor<4x?x1x128xf16>{%dim_46884} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46884}
    %49476 = torch_c.from_builtin_tensor %49475 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49477 = torch.aten.mul.Tensor %49476, %49473 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49478 = torch_c.to_builtin_tensor %49477 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46885 = arith.constant 1 : index
    %dim_46886 = tensor.dim %49478, %c1_46885 : tensor<4x?x1x64xcomplex<f32>>
    %49479 = flow.tensor.bitcast %49478 : tensor<4x?x1x64xcomplex<f32>>{%dim_46886} -> tensor<4x?x1x128xf32>{%dim_46886}
    %49480 = torch_c.from_builtin_tensor %49479 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46887 = torch.constant.int 5
    %49481 = torch.prims.convert_element_type %49480, %int5_46887 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46888 = torch.constant.int 1
    %49482 = torch.aten.size.int %49104, %int1_46888 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46889 = torch.constant.int 0
    %49483 = torch.aten.add.int %int0_46889, %49482 : !torch.int, !torch.int -> !torch.int
    %int0_46890 = torch.constant.int 0
    %int0_46891 = torch.constant.int 0
    %int1_46892 = torch.constant.int 1
    %49484 = torch.aten.slice.Tensor %49424, %int0_46890, %int0_46891, %49483, %int1_46892 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49484, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46893 = torch.constant.int 1
    %int0_46894 = torch.constant.int 0
    %int9223372036854775807_46895 = torch.constant.int 9223372036854775807
    %int1_46896 = torch.constant.int 1
    %49485 = torch.aten.slice.Tensor %49484, %int1_46893, %int0_46894, %int9223372036854775807_46895, %int1_46896 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49485, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46897 = torch.constant.int 0
    %49486 = torch.aten.unsqueeze %49485, %int0_46897 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49486, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46898 = torch.constant.int 2
    %49487 = torch.aten.unsqueeze %49486, %int2_46898 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49487, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46899 = torch.constant.int 3
    %int0_46900 = torch.constant.int 0
    %int9223372036854775807_46901 = torch.constant.int 9223372036854775807
    %int1_46902 = torch.constant.int 1
    %49488 = torch.aten.slice.Tensor %49487, %int3_46899, %int0_46900, %int9223372036854775807_46901, %int1_46902 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49488, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49489 = torch_c.to_builtin_tensor %49216 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46903 = arith.constant 1 : index
    %dim_46904 = tensor.dim %49489, %c1_46903 : tensor<4x?x1x128xf16>
    %49490 = flow.tensor.bitcast %49489 : tensor<4x?x1x128xf16>{%dim_46904} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46904}
    %49491 = torch_c.from_builtin_tensor %49490 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49492 = torch.aten.mul.Tensor %49491, %49488 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49493 = torch_c.to_builtin_tensor %49492 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46905 = arith.constant 1 : index
    %dim_46906 = tensor.dim %49493, %c1_46905 : tensor<4x?x1x64xcomplex<f32>>
    %49494 = flow.tensor.bitcast %49493 : tensor<4x?x1x64xcomplex<f32>>{%dim_46906} -> tensor<4x?x1x128xf32>{%dim_46906}
    %49495 = torch_c.from_builtin_tensor %49494 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46907 = torch.constant.int 5
    %49496 = torch.prims.convert_element_type %49495, %int5_46907 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46908 = torch.constant.int 1
    %49497 = torch.aten.size.int %49110, %int1_46908 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46909 = torch.constant.int 0
    %49498 = torch.aten.add.int %int0_46909, %49497 : !torch.int, !torch.int -> !torch.int
    %int0_46910 = torch.constant.int 0
    %int0_46911 = torch.constant.int 0
    %int1_46912 = torch.constant.int 1
    %49499 = torch.aten.slice.Tensor %49427, %int0_46910, %int0_46911, %49498, %int1_46912 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49499, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46913 = torch.constant.int 1
    %int0_46914 = torch.constant.int 0
    %int9223372036854775807_46915 = torch.constant.int 9223372036854775807
    %int1_46916 = torch.constant.int 1
    %49500 = torch.aten.slice.Tensor %49499, %int1_46913, %int0_46914, %int9223372036854775807_46915, %int1_46916 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49500, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46917 = torch.constant.int 0
    %49501 = torch.aten.unsqueeze %49500, %int0_46917 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49501, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46918 = torch.constant.int 2
    %49502 = torch.aten.unsqueeze %49501, %int2_46918 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49502, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46919 = torch.constant.int 3
    %int0_46920 = torch.constant.int 0
    %int9223372036854775807_46921 = torch.constant.int 9223372036854775807
    %int1_46922 = torch.constant.int 1
    %49503 = torch.aten.slice.Tensor %49502, %int3_46919, %int0_46920, %int9223372036854775807_46921, %int1_46922 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49503, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49504 = torch_c.to_builtin_tensor %49218 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46923 = arith.constant 1 : index
    %dim_46924 = tensor.dim %49504, %c1_46923 : tensor<4x?x1x128xf16>
    %49505 = flow.tensor.bitcast %49504 : tensor<4x?x1x128xf16>{%dim_46924} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46924}
    %49506 = torch_c.from_builtin_tensor %49505 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49507 = torch.aten.mul.Tensor %49506, %49503 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49508 = torch_c.to_builtin_tensor %49507 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46925 = arith.constant 1 : index
    %dim_46926 = tensor.dim %49508, %c1_46925 : tensor<4x?x1x64xcomplex<f32>>
    %49509 = flow.tensor.bitcast %49508 : tensor<4x?x1x64xcomplex<f32>>{%dim_46926} -> tensor<4x?x1x128xf32>{%dim_46926}
    %49510 = torch_c.from_builtin_tensor %49509 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46927 = torch.constant.int 5
    %49511 = torch.prims.convert_element_type %49510, %int5_46927 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46928 = torch.constant.int 1
    %49512 = torch.aten.size.int %49116, %int1_46928 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46929 = torch.constant.int 0
    %49513 = torch.aten.add.int %int0_46929, %49512 : !torch.int, !torch.int -> !torch.int
    %int0_46930 = torch.constant.int 0
    %int0_46931 = torch.constant.int 0
    %int1_46932 = torch.constant.int 1
    %49514 = torch.aten.slice.Tensor %49430, %int0_46930, %int0_46931, %49513, %int1_46932 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49514, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46933 = torch.constant.int 1
    %int0_46934 = torch.constant.int 0
    %int9223372036854775807_46935 = torch.constant.int 9223372036854775807
    %int1_46936 = torch.constant.int 1
    %49515 = torch.aten.slice.Tensor %49514, %int1_46933, %int0_46934, %int9223372036854775807_46935, %int1_46936 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49515, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46937 = torch.constant.int 0
    %49516 = torch.aten.unsqueeze %49515, %int0_46937 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49516, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46938 = torch.constant.int 2
    %49517 = torch.aten.unsqueeze %49516, %int2_46938 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49517, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46939 = torch.constant.int 3
    %int0_46940 = torch.constant.int 0
    %int9223372036854775807_46941 = torch.constant.int 9223372036854775807
    %int1_46942 = torch.constant.int 1
    %49518 = torch.aten.slice.Tensor %49517, %int3_46939, %int0_46940, %int9223372036854775807_46941, %int1_46942 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49518, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49519 = torch_c.to_builtin_tensor %49220 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46943 = arith.constant 1 : index
    %dim_46944 = tensor.dim %49519, %c1_46943 : tensor<4x?x1x128xf16>
    %49520 = flow.tensor.bitcast %49519 : tensor<4x?x1x128xf16>{%dim_46944} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46944}
    %49521 = torch_c.from_builtin_tensor %49520 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49522 = torch.aten.mul.Tensor %49521, %49518 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49523 = torch_c.to_builtin_tensor %49522 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46945 = arith.constant 1 : index
    %dim_46946 = tensor.dim %49523, %c1_46945 : tensor<4x?x1x64xcomplex<f32>>
    %49524 = flow.tensor.bitcast %49523 : tensor<4x?x1x64xcomplex<f32>>{%dim_46946} -> tensor<4x?x1x128xf32>{%dim_46946}
    %49525 = torch_c.from_builtin_tensor %49524 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46947 = torch.constant.int 5
    %49526 = torch.prims.convert_element_type %49525, %int5_46947 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46948 = torch.constant.int 1
    %49527 = torch.aten.size.int %49122, %int1_46948 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46949 = torch.constant.int 0
    %49528 = torch.aten.add.int %int0_46949, %49527 : !torch.int, !torch.int -> !torch.int
    %int0_46950 = torch.constant.int 0
    %int0_46951 = torch.constant.int 0
    %int1_46952 = torch.constant.int 1
    %49529 = torch.aten.slice.Tensor %49433, %int0_46950, %int0_46951, %49528, %int1_46952 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49529, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46953 = torch.constant.int 1
    %int0_46954 = torch.constant.int 0
    %int9223372036854775807_46955 = torch.constant.int 9223372036854775807
    %int1_46956 = torch.constant.int 1
    %49530 = torch.aten.slice.Tensor %49529, %int1_46953, %int0_46954, %int9223372036854775807_46955, %int1_46956 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49530, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46957 = torch.constant.int 0
    %49531 = torch.aten.unsqueeze %49530, %int0_46957 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49531, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46958 = torch.constant.int 2
    %49532 = torch.aten.unsqueeze %49531, %int2_46958 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49532, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46959 = torch.constant.int 3
    %int0_46960 = torch.constant.int 0
    %int9223372036854775807_46961 = torch.constant.int 9223372036854775807
    %int1_46962 = torch.constant.int 1
    %49533 = torch.aten.slice.Tensor %49532, %int3_46959, %int0_46960, %int9223372036854775807_46961, %int1_46962 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49533, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49534 = torch_c.to_builtin_tensor %49222 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46963 = arith.constant 1 : index
    %dim_46964 = tensor.dim %49534, %c1_46963 : tensor<4x?x1x128xf16>
    %49535 = flow.tensor.bitcast %49534 : tensor<4x?x1x128xf16>{%dim_46964} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46964}
    %49536 = torch_c.from_builtin_tensor %49535 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49537 = torch.aten.mul.Tensor %49536, %49533 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49538 = torch_c.to_builtin_tensor %49537 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46965 = arith.constant 1 : index
    %dim_46966 = tensor.dim %49538, %c1_46965 : tensor<4x?x1x64xcomplex<f32>>
    %49539 = flow.tensor.bitcast %49538 : tensor<4x?x1x64xcomplex<f32>>{%dim_46966} -> tensor<4x?x1x128xf32>{%dim_46966}
    %49540 = torch_c.from_builtin_tensor %49539 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46967 = torch.constant.int 5
    %49541 = torch.prims.convert_element_type %49540, %int5_46967 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_46968 = torch.constant.int 1
    %49542 = torch.aten.size.int %49128, %int1_46968 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_46969 = torch.constant.int 0
    %49543 = torch.aten.add.int %int0_46969, %49542 : !torch.int, !torch.int -> !torch.int
    %int0_46970 = torch.constant.int 0
    %int0_46971 = torch.constant.int 0
    %int1_46972 = torch.constant.int 1
    %49544 = torch.aten.slice.Tensor %49436, %int0_46970, %int0_46971, %49543, %int1_46972 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49544, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_46973 = torch.constant.int 1
    %int0_46974 = torch.constant.int 0
    %int9223372036854775807_46975 = torch.constant.int 9223372036854775807
    %int1_46976 = torch.constant.int 1
    %49545 = torch.aten.slice.Tensor %49544, %int1_46973, %int0_46974, %int9223372036854775807_46975, %int1_46976 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %49545, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_46977 = torch.constant.int 0
    %49546 = torch.aten.unsqueeze %49545, %int0_46977 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %49546, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_46978 = torch.constant.int 2
    %49547 = torch.aten.unsqueeze %49546, %int2_46978 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49547, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_46979 = torch.constant.int 3
    %int0_46980 = torch.constant.int 0
    %int9223372036854775807_46981 = torch.constant.int 9223372036854775807
    %int1_46982 = torch.constant.int 1
    %49548 = torch.aten.slice.Tensor %49547, %int3_46979, %int0_46980, %int9223372036854775807_46981, %int1_46982 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49548, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %49549 = torch_c.to_builtin_tensor %49224 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_46983 = arith.constant 1 : index
    %dim_46984 = tensor.dim %49549, %c1_46983 : tensor<4x?x1x128xf16>
    %49550 = flow.tensor.bitcast %49549 : tensor<4x?x1x128xf16>{%dim_46984} -> tensor<4x?x1x64xcomplex<f16>>{%dim_46984}
    %49551 = torch_c.from_builtin_tensor %49550 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %49551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %49552 = torch.aten.mul.Tensor %49551, %49548 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %49552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %49553 = torch_c.to_builtin_tensor %49552 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_46985 = arith.constant 1 : index
    %dim_46986 = tensor.dim %49553, %c1_46985 : tensor<4x?x1x64xcomplex<f32>>
    %49554 = flow.tensor.bitcast %49553 : tensor<4x?x1x64xcomplex<f32>>{%dim_46986} -> tensor<4x?x1x128xf32>{%dim_46986}
    %49555 = torch_c.from_builtin_tensor %49554 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %49555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_46987 = torch.constant.int 5
    %49556 = torch.prims.convert_element_type %49555, %int5_46987 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %49556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_46988 = torch.constant.int 64
    %49557 = torch.aten.mul.Scalar %2364, %int64_46988 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49557, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46989 = torch.constant.int 64
    %49558 = torch.aten.mul.Scalar %2367, %int64_46989 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49558, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46990 = torch.constant.int 64
    %49559 = torch.aten.mul.Scalar %2370, %int64_46990 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49559, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46991 = torch.constant.int 64
    %49560 = torch.aten.mul.Scalar %2373, %int64_46991 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49560, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46992 = torch.constant.int 64
    %49561 = torch.aten.mul.Scalar %2376, %int64_46992 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49561, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46993 = torch.constant.int 64
    %49562 = torch.aten.mul.Scalar %2379, %int64_46993 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49562, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46994 = torch.constant.int 64
    %49563 = torch.aten.mul.Scalar %2382, %int64_46994 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49563, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_46995 = torch.constant.int 64
    %49564 = torch.aten.mul.Scalar %2385, %int64_46995 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49564, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50 = torch.constant.int 50
    %int1_46996 = torch.constant.int 1
    %49565 = torch.aten.add.Scalar %49557, %int50, %int1_46996 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49565, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_46997 = torch.constant.int 50
    %int1_46998 = torch.constant.int 1
    %49566 = torch.aten.add.Scalar %49558, %int50_46997, %int1_46998 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49566, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_46999 = torch.constant.int 50
    %int1_47000 = torch.constant.int 1
    %49567 = torch.aten.add.Scalar %49559, %int50_46999, %int1_47000 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49567, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_47001 = torch.constant.int 50
    %int1_47002 = torch.constant.int 1
    %49568 = torch.aten.add.Scalar %49560, %int50_47001, %int1_47002 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49568, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_47003 = torch.constant.int 50
    %int1_47004 = torch.constant.int 1
    %49569 = torch.aten.add.Scalar %49561, %int50_47003, %int1_47004 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49569, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_47005 = torch.constant.int 50
    %int1_47006 = torch.constant.int 1
    %49570 = torch.aten.add.Scalar %49562, %int50_47005, %int1_47006 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49570, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_47007 = torch.constant.int 50
    %int1_47008 = torch.constant.int 1
    %49571 = torch.aten.add.Scalar %49563, %int50_47007, %int1_47008 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49571, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int50_47009 = torch.constant.int 50
    %int1_47010 = torch.constant.int 1
    %49572 = torch.aten.add.Scalar %49564, %int50_47009, %int1_47010 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49572, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_47011 = torch.constant.int 4
    %int16_47012 = torch.constant.int 16
    %int1_47013 = torch.constant.int 1
    %int128_47014 = torch.constant.int 128
    %49573 = torch.prim.ListConstruct %int4_47011, %3095, %int16_47012, %int1_47013, %int128_47014 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49574 = torch.aten.view %49451, %49573 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49574, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47015 = torch.constant.int 4
    %int16_47016 = torch.constant.int 16
    %int1_47017 = torch.constant.int 1
    %int128_47018 = torch.constant.int 128
    %49575 = torch.prim.ListConstruct %int4_47015, %3095, %int16_47016, %int1_47017, %int128_47018 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49576 = torch.aten.view %49466, %49575 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49576, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47019 = torch.constant.int 4
    %int16_47020 = torch.constant.int 16
    %int1_47021 = torch.constant.int 1
    %int128_47022 = torch.constant.int 128
    %49577 = torch.prim.ListConstruct %int4_47019, %3095, %int16_47020, %int1_47021, %int128_47022 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49578 = torch.aten.view %49481, %49577 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49578, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47023 = torch.constant.int 4
    %int16_47024 = torch.constant.int 16
    %int1_47025 = torch.constant.int 1
    %int128_47026 = torch.constant.int 128
    %49579 = torch.prim.ListConstruct %int4_47023, %3095, %int16_47024, %int1_47025, %int128_47026 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49580 = torch.aten.view %49496, %49579 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49580, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47027 = torch.constant.int 4
    %int16_47028 = torch.constant.int 16
    %int1_47029 = torch.constant.int 1
    %int128_47030 = torch.constant.int 128
    %49581 = torch.prim.ListConstruct %int4_47027, %3095, %int16_47028, %int1_47029, %int128_47030 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49582 = torch.aten.view %49511, %49581 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49582, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47031 = torch.constant.int 4
    %int16_47032 = torch.constant.int 16
    %int1_47033 = torch.constant.int 1
    %int128_47034 = torch.constant.int 128
    %49583 = torch.prim.ListConstruct %int4_47031, %3095, %int16_47032, %int1_47033, %int128_47034 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49584 = torch.aten.view %49526, %49583 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49584, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47035 = torch.constant.int 4
    %int16_47036 = torch.constant.int 16
    %int1_47037 = torch.constant.int 1
    %int128_47038 = torch.constant.int 128
    %49585 = torch.prim.ListConstruct %int4_47035, %3095, %int16_47036, %int1_47037, %int128_47038 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49586 = torch.aten.view %49541, %49585 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49586, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47039 = torch.constant.int 4
    %int16_47040 = torch.constant.int 16
    %int1_47041 = torch.constant.int 1
    %int128_47042 = torch.constant.int 128
    %49587 = torch.prim.ListConstruct %int4_47039, %3095, %int16_47040, %int1_47041, %int128_47042 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49588 = torch.aten.view %49556, %49587 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49588, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47043 = torch.constant.int 4
    %49589 = torch.aten.mul.int %int4_47043, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47044 = torch.constant.int 16
    %int1_47045 = torch.constant.int 1
    %int128_47046 = torch.constant.int 128
    %49590 = torch.prim.ListConstruct %49589, %int16_47044, %int1_47045, %int128_47046 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49591 = torch.aten.view %49574, %49590 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49591, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47047 = torch.constant.int 4
    %49592 = torch.aten.mul.int %int4_47047, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47048 = torch.constant.int 16
    %int1_47049 = torch.constant.int 1
    %int128_47050 = torch.constant.int 128
    %49593 = torch.prim.ListConstruct %49592, %int16_47048, %int1_47049, %int128_47050 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49594 = torch.aten.view %49576, %49593 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49594, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47051 = torch.constant.int 4
    %49595 = torch.aten.mul.int %int4_47051, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47052 = torch.constant.int 16
    %int1_47053 = torch.constant.int 1
    %int128_47054 = torch.constant.int 128
    %49596 = torch.prim.ListConstruct %49595, %int16_47052, %int1_47053, %int128_47054 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49597 = torch.aten.view %49578, %49596 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49597, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47055 = torch.constant.int 4
    %49598 = torch.aten.mul.int %int4_47055, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47056 = torch.constant.int 16
    %int1_47057 = torch.constant.int 1
    %int128_47058 = torch.constant.int 128
    %49599 = torch.prim.ListConstruct %49598, %int16_47056, %int1_47057, %int128_47058 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49600 = torch.aten.view %49580, %49599 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49600, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47059 = torch.constant.int 4
    %49601 = torch.aten.mul.int %int4_47059, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47060 = torch.constant.int 16
    %int1_47061 = torch.constant.int 1
    %int128_47062 = torch.constant.int 128
    %49602 = torch.prim.ListConstruct %49601, %int16_47060, %int1_47061, %int128_47062 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49603 = torch.aten.view %49582, %49602 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49603, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47063 = torch.constant.int 4
    %49604 = torch.aten.mul.int %int4_47063, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47064 = torch.constant.int 16
    %int1_47065 = torch.constant.int 1
    %int128_47066 = torch.constant.int 128
    %49605 = torch.prim.ListConstruct %49604, %int16_47064, %int1_47065, %int128_47066 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49606 = torch.aten.view %49584, %49605 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49606, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47067 = torch.constant.int 4
    %49607 = torch.aten.mul.int %int4_47067, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47068 = torch.constant.int 16
    %int1_47069 = torch.constant.int 1
    %int128_47070 = torch.constant.int 128
    %49608 = torch.prim.ListConstruct %49607, %int16_47068, %int1_47069, %int128_47070 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49609 = torch.aten.view %49586, %49608 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49609, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47071 = torch.constant.int 4
    %49610 = torch.aten.mul.int %int4_47071, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47072 = torch.constant.int 16
    %int1_47073 = torch.constant.int 1
    %int128_47074 = torch.constant.int 128
    %49611 = torch.prim.ListConstruct %49610, %int16_47072, %int1_47073, %int128_47074 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49612 = torch.aten.view %49588, %49611 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49612, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47075 = torch.constant.int 4
    %49613 = torch.aten.mul.int %int4_47075, %3095 : !torch.int, !torch.int -> !torch.int
    %49614 = torch.prim.ListConstruct %49613 : (!torch.int) -> !torch.list<int>
    %49615 = torch.aten.view %49565, %49614 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49615, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47076 = torch.constant.int 4
    %49616 = torch.aten.mul.int %int4_47076, %3095 : !torch.int, !torch.int -> !torch.int
    %49617 = torch.prim.ListConstruct %49616 : (!torch.int) -> !torch.list<int>
    %49618 = torch.aten.view %49566, %49617 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49618, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47077 = torch.constant.int 4
    %49619 = torch.aten.mul.int %int4_47077, %3095 : !torch.int, !torch.int -> !torch.int
    %49620 = torch.prim.ListConstruct %49619 : (!torch.int) -> !torch.list<int>
    %49621 = torch.aten.view %49567, %49620 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49621, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47078 = torch.constant.int 4
    %49622 = torch.aten.mul.int %int4_47078, %3095 : !torch.int, !torch.int -> !torch.int
    %49623 = torch.prim.ListConstruct %49622 : (!torch.int) -> !torch.list<int>
    %49624 = torch.aten.view %49568, %49623 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49624, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47079 = torch.constant.int 4
    %49625 = torch.aten.mul.int %int4_47079, %3095 : !torch.int, !torch.int -> !torch.int
    %49626 = torch.prim.ListConstruct %49625 : (!torch.int) -> !torch.list<int>
    %49627 = torch.aten.view %49569, %49626 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49627, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47080 = torch.constant.int 4
    %49628 = torch.aten.mul.int %int4_47080, %3095 : !torch.int, !torch.int -> !torch.int
    %49629 = torch.prim.ListConstruct %49628 : (!torch.int) -> !torch.list<int>
    %49630 = torch.aten.view %49570, %49629 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49630, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47081 = torch.constant.int 4
    %49631 = torch.aten.mul.int %int4_47081, %3095 : !torch.int, !torch.int -> !torch.int
    %49632 = torch.prim.ListConstruct %49631 : (!torch.int) -> !torch.list<int>
    %49633 = torch.aten.view %49571, %49632 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49633, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47082 = torch.constant.int 4
    %49634 = torch.aten.mul.int %int4_47082, %3095 : !torch.int, !torch.int -> !torch.int
    %49635 = torch.prim.ListConstruct %49634 : (!torch.int) -> !torch.list<int>
    %49636 = torch.aten.view %49572, %49635 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49636, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47083 = torch.constant.int 4
    %int16_47084 = torch.constant.int 16
    %int1_47085 = torch.constant.int 1
    %int128_47086 = torch.constant.int 128
    %49637 = torch.prim.ListConstruct %int4_47083, %3095, %int16_47084, %int1_47085, %int128_47086 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49638 = torch.aten.view %49226, %49637 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49638, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47087 = torch.constant.int 4
    %int16_47088 = torch.constant.int 16
    %int1_47089 = torch.constant.int 1
    %int128_47090 = torch.constant.int 128
    %49639 = torch.prim.ListConstruct %int4_47087, %3095, %int16_47088, %int1_47089, %int128_47090 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49640 = torch.aten.view %49228, %49639 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49640, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47091 = torch.constant.int 4
    %int16_47092 = torch.constant.int 16
    %int1_47093 = torch.constant.int 1
    %int128_47094 = torch.constant.int 128
    %49641 = torch.prim.ListConstruct %int4_47091, %3095, %int16_47092, %int1_47093, %int128_47094 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49642 = torch.aten.view %49230, %49641 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49642, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47095 = torch.constant.int 4
    %int16_47096 = torch.constant.int 16
    %int1_47097 = torch.constant.int 1
    %int128_47098 = torch.constant.int 128
    %49643 = torch.prim.ListConstruct %int4_47095, %3095, %int16_47096, %int1_47097, %int128_47098 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49644 = torch.aten.view %49232, %49643 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49644, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47099 = torch.constant.int 4
    %int16_47100 = torch.constant.int 16
    %int1_47101 = torch.constant.int 1
    %int128_47102 = torch.constant.int 128
    %49645 = torch.prim.ListConstruct %int4_47099, %3095, %int16_47100, %int1_47101, %int128_47102 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49646 = torch.aten.view %49234, %49645 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49646, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47103 = torch.constant.int 4
    %int16_47104 = torch.constant.int 16
    %int1_47105 = torch.constant.int 1
    %int128_47106 = torch.constant.int 128
    %49647 = torch.prim.ListConstruct %int4_47103, %3095, %int16_47104, %int1_47105, %int128_47106 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49648 = torch.aten.view %49236, %49647 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49648, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47107 = torch.constant.int 4
    %int16_47108 = torch.constant.int 16
    %int1_47109 = torch.constant.int 1
    %int128_47110 = torch.constant.int 128
    %49649 = torch.prim.ListConstruct %int4_47107, %3095, %int16_47108, %int1_47109, %int128_47110 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49650 = torch.aten.view %49238, %49649 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49650, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47111 = torch.constant.int 4
    %int16_47112 = torch.constant.int 16
    %int1_47113 = torch.constant.int 1
    %int128_47114 = torch.constant.int 128
    %49651 = torch.prim.ListConstruct %int4_47111, %3095, %int16_47112, %int1_47113, %int128_47114 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49652 = torch.aten.view %49240, %49651 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %49652, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_47115 = torch.constant.int 4
    %49653 = torch.aten.mul.int %int4_47115, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47116 = torch.constant.int 16
    %int1_47117 = torch.constant.int 1
    %int128_47118 = torch.constant.int 128
    %49654 = torch.prim.ListConstruct %49653, %int16_47116, %int1_47117, %int128_47118 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49655 = torch.aten.view %49638, %49654 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49655, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47119 = torch.constant.int 4
    %49656 = torch.aten.mul.int %int4_47119, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47120 = torch.constant.int 16
    %int1_47121 = torch.constant.int 1
    %int128_47122 = torch.constant.int 128
    %49657 = torch.prim.ListConstruct %49656, %int16_47120, %int1_47121, %int128_47122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49658 = torch.aten.view %49640, %49657 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49658, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47123 = torch.constant.int 4
    %49659 = torch.aten.mul.int %int4_47123, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47124 = torch.constant.int 16
    %int1_47125 = torch.constant.int 1
    %int128_47126 = torch.constant.int 128
    %49660 = torch.prim.ListConstruct %49659, %int16_47124, %int1_47125, %int128_47126 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49661 = torch.aten.view %49642, %49660 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49661, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47127 = torch.constant.int 4
    %49662 = torch.aten.mul.int %int4_47127, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47128 = torch.constant.int 16
    %int1_47129 = torch.constant.int 1
    %int128_47130 = torch.constant.int 128
    %49663 = torch.prim.ListConstruct %49662, %int16_47128, %int1_47129, %int128_47130 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49664 = torch.aten.view %49644, %49663 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49664, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47131 = torch.constant.int 4
    %49665 = torch.aten.mul.int %int4_47131, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47132 = torch.constant.int 16
    %int1_47133 = torch.constant.int 1
    %int128_47134 = torch.constant.int 128
    %49666 = torch.prim.ListConstruct %49665, %int16_47132, %int1_47133, %int128_47134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49667 = torch.aten.view %49646, %49666 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49667, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47135 = torch.constant.int 4
    %49668 = torch.aten.mul.int %int4_47135, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47136 = torch.constant.int 16
    %int1_47137 = torch.constant.int 1
    %int128_47138 = torch.constant.int 128
    %49669 = torch.prim.ListConstruct %49668, %int16_47136, %int1_47137, %int128_47138 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49670 = torch.aten.view %49648, %49669 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49670, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47139 = torch.constant.int 4
    %49671 = torch.aten.mul.int %int4_47139, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47140 = torch.constant.int 16
    %int1_47141 = torch.constant.int 1
    %int128_47142 = torch.constant.int 128
    %49672 = torch.prim.ListConstruct %49671, %int16_47140, %int1_47141, %int128_47142 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49673 = torch.aten.view %49650, %49672 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49673, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_47143 = torch.constant.int 4
    %49674 = torch.aten.mul.int %int4_47143, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_47144 = torch.constant.int 16
    %int1_47145 = torch.constant.int 1
    %int128_47146 = torch.constant.int 128
    %49675 = torch.prim.ListConstruct %49674, %int16_47144, %int1_47145, %int128_47146 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49676 = torch.aten.view %49652, %49675 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49676, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_47147 = torch.constant.int 1
    %int1_47148 = torch.constant.int 1
    %49677 = torch.aten.add.Scalar %49565, %int1_47147, %int1_47148 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49677, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47149 = torch.constant.int 1
    %int1_47150 = torch.constant.int 1
    %49678 = torch.aten.add.Scalar %49566, %int1_47149, %int1_47150 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49678, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47151 = torch.constant.int 1
    %int1_47152 = torch.constant.int 1
    %49679 = torch.aten.add.Scalar %49567, %int1_47151, %int1_47152 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49679, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47153 = torch.constant.int 1
    %int1_47154 = torch.constant.int 1
    %49680 = torch.aten.add.Scalar %49568, %int1_47153, %int1_47154 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49680, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47155 = torch.constant.int 1
    %int1_47156 = torch.constant.int 1
    %49681 = torch.aten.add.Scalar %49569, %int1_47155, %int1_47156 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49681, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47157 = torch.constant.int 1
    %int1_47158 = torch.constant.int 1
    %49682 = torch.aten.add.Scalar %49570, %int1_47157, %int1_47158 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49682, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47159 = torch.constant.int 1
    %int1_47160 = torch.constant.int 1
    %49683 = torch.aten.add.Scalar %49571, %int1_47159, %int1_47160 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49683, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_47161 = torch.constant.int 1
    %int1_47162 = torch.constant.int 1
    %49684 = torch.aten.add.Scalar %49572, %int1_47161, %int1_47162 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %49684, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_47163 = torch.constant.int 4
    %49685 = torch.aten.mul.int %int4_47163, %3095 : !torch.int, !torch.int -> !torch.int
    %49686 = torch.prim.ListConstruct %49685 : (!torch.int) -> !torch.list<int>
    %49687 = torch.aten.view %49677, %49686 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49687, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47164 = torch.constant.int 4
    %49688 = torch.aten.mul.int %int4_47164, %3095 : !torch.int, !torch.int -> !torch.int
    %49689 = torch.prim.ListConstruct %49688 : (!torch.int) -> !torch.list<int>
    %49690 = torch.aten.view %49678, %49689 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49690, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47165 = torch.constant.int 4
    %49691 = torch.aten.mul.int %int4_47165, %3095 : !torch.int, !torch.int -> !torch.int
    %49692 = torch.prim.ListConstruct %49691 : (!torch.int) -> !torch.list<int>
    %49693 = torch.aten.view %49679, %49692 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49693, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47166 = torch.constant.int 4
    %49694 = torch.aten.mul.int %int4_47166, %3095 : !torch.int, !torch.int -> !torch.int
    %49695 = torch.prim.ListConstruct %49694 : (!torch.int) -> !torch.list<int>
    %49696 = torch.aten.view %49680, %49695 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49696, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47167 = torch.constant.int 4
    %49697 = torch.aten.mul.int %int4_47167, %3095 : !torch.int, !torch.int -> !torch.int
    %49698 = torch.prim.ListConstruct %49697 : (!torch.int) -> !torch.list<int>
    %49699 = torch.aten.view %49681, %49698 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49699, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47168 = torch.constant.int 4
    %49700 = torch.aten.mul.int %int4_47168, %3095 : !torch.int, !torch.int -> !torch.int
    %49701 = torch.prim.ListConstruct %49700 : (!torch.int) -> !torch.list<int>
    %49702 = torch.aten.view %49682, %49701 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49702, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47169 = torch.constant.int 4
    %49703 = torch.aten.mul.int %int4_47169, %3095 : !torch.int, !torch.int -> !torch.int
    %49704 = torch.prim.ListConstruct %49703 : (!torch.int) -> !torch.list<int>
    %49705 = torch.aten.view %49683, %49704 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49705, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_47170 = torch.constant.int 4
    %49706 = torch.aten.mul.int %int4_47170, %3095 : !torch.int, !torch.int -> !torch.int
    %49707 = torch.prim.ListConstruct %49706 : (!torch.int) -> !torch.list<int>
    %49708 = torch.aten.view %49684, %49707 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49708, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %49709 = torch.prim.ListConstruct %49615, %49687 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47171 = torch.constant.int 0
    %49710 = torch.aten.cat %49709, %int0_47171 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49710, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49711 = torch.prim.ListConstruct %49618, %49690 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47172 = torch.constant.int 0
    %49712 = torch.aten.cat %49711, %int0_47172 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49712, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49713 = torch.prim.ListConstruct %49621, %49693 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47173 = torch.constant.int 0
    %49714 = torch.aten.cat %49713, %int0_47173 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49714, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49715 = torch.prim.ListConstruct %49624, %49696 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47174 = torch.constant.int 0
    %49716 = torch.aten.cat %49715, %int0_47174 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49716, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49717 = torch.prim.ListConstruct %49627, %49699 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47175 = torch.constant.int 0
    %49718 = torch.aten.cat %49717, %int0_47175 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49718, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49719 = torch.prim.ListConstruct %49630, %49702 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47176 = torch.constant.int 0
    %49720 = torch.aten.cat %49719, %int0_47176 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49720, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49721 = torch.prim.ListConstruct %49633, %49705 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47177 = torch.constant.int 0
    %49722 = torch.aten.cat %49721, %int0_47177 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49722, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49723 = torch.prim.ListConstruct %49636, %49708 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_47178 = torch.constant.int 0
    %49724 = torch.aten.cat %49723, %int0_47178 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %49724, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %49725 = torch.prim.ListConstruct %49591, %49655 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47179 = torch.constant.int 0
    %49726 = torch.aten.cat %49725, %int0_47179 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49726, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49727 = torch.prim.ListConstruct %49594, %49658 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47180 = torch.constant.int 0
    %49728 = torch.aten.cat %49727, %int0_47180 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49728, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49729 = torch.prim.ListConstruct %49597, %49661 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47181 = torch.constant.int 0
    %49730 = torch.aten.cat %49729, %int0_47181 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49730, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49731 = torch.prim.ListConstruct %49600, %49664 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47182 = torch.constant.int 0
    %49732 = torch.aten.cat %49731, %int0_47182 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49732, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49733 = torch.prim.ListConstruct %49603, %49667 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47183 = torch.constant.int 0
    %49734 = torch.aten.cat %49733, %int0_47183 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49734, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49735 = torch.prim.ListConstruct %49606, %49670 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47184 = torch.constant.int 0
    %49736 = torch.aten.cat %49735, %int0_47184 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49736, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49737 = torch.prim.ListConstruct %49609, %49673 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47185 = torch.constant.int 0
    %49738 = torch.aten.cat %49737, %int0_47185 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49738, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49739 = torch.prim.ListConstruct %49612, %49676 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_47186 = torch.constant.int 0
    %49740 = torch.aten.cat %49739, %int0_47186 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49740, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47187 = torch.constant.int 32
    %int2_47188 = torch.constant.int 2
    %int16_47189 = torch.constant.int 16
    %int1_47190 = torch.constant.int 1
    %int128_47191 = torch.constant.int 128
    %49741 = torch.prim.ListConstruct %3023, %int32_47187, %int2_47188, %int16_47189, %int1_47190, %int128_47191 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49742 = torch.aten.view %47891, %49741 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49742, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47192 = torch.constant.int 32
    %49743 = torch.aten.mul.int %3023, %int32_47192 : !torch.int, !torch.int -> !torch.int
    %int2_47193 = torch.constant.int 2
    %49744 = torch.aten.mul.int %49743, %int2_47193 : !torch.int, !torch.int -> !torch.int
    %int16_47194 = torch.constant.int 16
    %int1_47195 = torch.constant.int 1
    %int128_47196 = torch.constant.int 128
    %49745 = torch.prim.ListConstruct %49744, %int16_47194, %int1_47195, %int128_47196 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49746 = torch.aten.view %49742, %49745 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49746, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49747 = torch.prim.ListConstruct %49710 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47197 = torch.constant.bool false
    %49748 = torch.aten.index_put %49746, %49747, %49726, %false_47197 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49748, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47198 = torch.constant.int 32
    %int2_47199 = torch.constant.int 2
    %int16_47200 = torch.constant.int 16
    %int1_47201 = torch.constant.int 1
    %int128_47202 = torch.constant.int 128
    %49749 = torch.prim.ListConstruct %3023, %int32_47198, %int2_47199, %int16_47200, %int1_47201, %int128_47202 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49750 = torch.aten.view %49748, %49749 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49750, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47203 = torch.constant.int 131072
    %49751 = torch.prim.ListConstruct %3023, %int131072_47203 : (!torch.int, !torch.int) -> !torch.list<int>
    %49752 = torch.aten.view %49750, %49751 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49752, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47204 = torch.constant.int 32
    %int2_47205 = torch.constant.int 2
    %int16_47206 = torch.constant.int 16
    %int1_47207 = torch.constant.int 1
    %int128_47208 = torch.constant.int 128
    %49753 = torch.prim.ListConstruct %3026, %int32_47204, %int2_47205, %int16_47206, %int1_47207, %int128_47208 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49754 = torch.aten.view %47903, %49753 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49754, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47209 = torch.constant.int 32
    %49755 = torch.aten.mul.int %3026, %int32_47209 : !torch.int, !torch.int -> !torch.int
    %int2_47210 = torch.constant.int 2
    %49756 = torch.aten.mul.int %49755, %int2_47210 : !torch.int, !torch.int -> !torch.int
    %int16_47211 = torch.constant.int 16
    %int1_47212 = torch.constant.int 1
    %int128_47213 = torch.constant.int 128
    %49757 = torch.prim.ListConstruct %49756, %int16_47211, %int1_47212, %int128_47213 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49758 = torch.aten.view %49754, %49757 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49758, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49759 = torch.prim.ListConstruct %49712 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47214 = torch.constant.bool false
    %49760 = torch.aten.index_put %49758, %49759, %49728, %false_47214 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49760, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47215 = torch.constant.int 32
    %int2_47216 = torch.constant.int 2
    %int16_47217 = torch.constant.int 16
    %int1_47218 = torch.constant.int 1
    %int128_47219 = torch.constant.int 128
    %49761 = torch.prim.ListConstruct %3026, %int32_47215, %int2_47216, %int16_47217, %int1_47218, %int128_47219 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49762 = torch.aten.view %49760, %49761 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49762, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47220 = torch.constant.int 131072
    %49763 = torch.prim.ListConstruct %3026, %int131072_47220 : (!torch.int, !torch.int) -> !torch.list<int>
    %49764 = torch.aten.view %49762, %49763 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49764, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47221 = torch.constant.int 32
    %int2_47222 = torch.constant.int 2
    %int16_47223 = torch.constant.int 16
    %int1_47224 = torch.constant.int 1
    %int128_47225 = torch.constant.int 128
    %49765 = torch.prim.ListConstruct %3029, %int32_47221, %int2_47222, %int16_47223, %int1_47224, %int128_47225 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49766 = torch.aten.view %47915, %49765 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49766, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47226 = torch.constant.int 32
    %49767 = torch.aten.mul.int %3029, %int32_47226 : !torch.int, !torch.int -> !torch.int
    %int2_47227 = torch.constant.int 2
    %49768 = torch.aten.mul.int %49767, %int2_47227 : !torch.int, !torch.int -> !torch.int
    %int16_47228 = torch.constant.int 16
    %int1_47229 = torch.constant.int 1
    %int128_47230 = torch.constant.int 128
    %49769 = torch.prim.ListConstruct %49768, %int16_47228, %int1_47229, %int128_47230 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49770 = torch.aten.view %49766, %49769 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49770, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49771 = torch.prim.ListConstruct %49714 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47231 = torch.constant.bool false
    %49772 = torch.aten.index_put %49770, %49771, %49730, %false_47231 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49772, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47232 = torch.constant.int 32
    %int2_47233 = torch.constant.int 2
    %int16_47234 = torch.constant.int 16
    %int1_47235 = torch.constant.int 1
    %int128_47236 = torch.constant.int 128
    %49773 = torch.prim.ListConstruct %3029, %int32_47232, %int2_47233, %int16_47234, %int1_47235, %int128_47236 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49774 = torch.aten.view %49772, %49773 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49774, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47237 = torch.constant.int 131072
    %49775 = torch.prim.ListConstruct %3029, %int131072_47237 : (!torch.int, !torch.int) -> !torch.list<int>
    %49776 = torch.aten.view %49774, %49775 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49776, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47238 = torch.constant.int 32
    %int2_47239 = torch.constant.int 2
    %int16_47240 = torch.constant.int 16
    %int1_47241 = torch.constant.int 1
    %int128_47242 = torch.constant.int 128
    %49777 = torch.prim.ListConstruct %3032, %int32_47238, %int2_47239, %int16_47240, %int1_47241, %int128_47242 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49778 = torch.aten.view %47927, %49777 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49778, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47243 = torch.constant.int 32
    %49779 = torch.aten.mul.int %3032, %int32_47243 : !torch.int, !torch.int -> !torch.int
    %int2_47244 = torch.constant.int 2
    %49780 = torch.aten.mul.int %49779, %int2_47244 : !torch.int, !torch.int -> !torch.int
    %int16_47245 = torch.constant.int 16
    %int1_47246 = torch.constant.int 1
    %int128_47247 = torch.constant.int 128
    %49781 = torch.prim.ListConstruct %49780, %int16_47245, %int1_47246, %int128_47247 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49782 = torch.aten.view %49778, %49781 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49782, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49783 = torch.prim.ListConstruct %49716 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47248 = torch.constant.bool false
    %49784 = torch.aten.index_put %49782, %49783, %49732, %false_47248 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49784, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47249 = torch.constant.int 32
    %int2_47250 = torch.constant.int 2
    %int16_47251 = torch.constant.int 16
    %int1_47252 = torch.constant.int 1
    %int128_47253 = torch.constant.int 128
    %49785 = torch.prim.ListConstruct %3032, %int32_47249, %int2_47250, %int16_47251, %int1_47252, %int128_47253 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49786 = torch.aten.view %49784, %49785 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49786, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47254 = torch.constant.int 131072
    %49787 = torch.prim.ListConstruct %3032, %int131072_47254 : (!torch.int, !torch.int) -> !torch.list<int>
    %49788 = torch.aten.view %49786, %49787 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49788, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47255 = torch.constant.int 32
    %int2_47256 = torch.constant.int 2
    %int16_47257 = torch.constant.int 16
    %int1_47258 = torch.constant.int 1
    %int128_47259 = torch.constant.int 128
    %49789 = torch.prim.ListConstruct %3035, %int32_47255, %int2_47256, %int16_47257, %int1_47258, %int128_47259 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49790 = torch.aten.view %47939, %49789 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49790, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47260 = torch.constant.int 32
    %49791 = torch.aten.mul.int %3035, %int32_47260 : !torch.int, !torch.int -> !torch.int
    %int2_47261 = torch.constant.int 2
    %49792 = torch.aten.mul.int %49791, %int2_47261 : !torch.int, !torch.int -> !torch.int
    %int16_47262 = torch.constant.int 16
    %int1_47263 = torch.constant.int 1
    %int128_47264 = torch.constant.int 128
    %49793 = torch.prim.ListConstruct %49792, %int16_47262, %int1_47263, %int128_47264 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49794 = torch.aten.view %49790, %49793 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49794, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49795 = torch.prim.ListConstruct %49718 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47265 = torch.constant.bool false
    %49796 = torch.aten.index_put %49794, %49795, %49734, %false_47265 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49796, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47266 = torch.constant.int 32
    %int2_47267 = torch.constant.int 2
    %int16_47268 = torch.constant.int 16
    %int1_47269 = torch.constant.int 1
    %int128_47270 = torch.constant.int 128
    %49797 = torch.prim.ListConstruct %3035, %int32_47266, %int2_47267, %int16_47268, %int1_47269, %int128_47270 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49798 = torch.aten.view %49796, %49797 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49798, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47271 = torch.constant.int 131072
    %49799 = torch.prim.ListConstruct %3035, %int131072_47271 : (!torch.int, !torch.int) -> !torch.list<int>
    %49800 = torch.aten.view %49798, %49799 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49800, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47272 = torch.constant.int 32
    %int2_47273 = torch.constant.int 2
    %int16_47274 = torch.constant.int 16
    %int1_47275 = torch.constant.int 1
    %int128_47276 = torch.constant.int 128
    %49801 = torch.prim.ListConstruct %3038, %int32_47272, %int2_47273, %int16_47274, %int1_47275, %int128_47276 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49802 = torch.aten.view %47951, %49801 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49802, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47277 = torch.constant.int 32
    %49803 = torch.aten.mul.int %3038, %int32_47277 : !torch.int, !torch.int -> !torch.int
    %int2_47278 = torch.constant.int 2
    %49804 = torch.aten.mul.int %49803, %int2_47278 : !torch.int, !torch.int -> !torch.int
    %int16_47279 = torch.constant.int 16
    %int1_47280 = torch.constant.int 1
    %int128_47281 = torch.constant.int 128
    %49805 = torch.prim.ListConstruct %49804, %int16_47279, %int1_47280, %int128_47281 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49806 = torch.aten.view %49802, %49805 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49806, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49807 = torch.prim.ListConstruct %49720 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47282 = torch.constant.bool false
    %49808 = torch.aten.index_put %49806, %49807, %49736, %false_47282 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49808, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47283 = torch.constant.int 32
    %int2_47284 = torch.constant.int 2
    %int16_47285 = torch.constant.int 16
    %int1_47286 = torch.constant.int 1
    %int128_47287 = torch.constant.int 128
    %49809 = torch.prim.ListConstruct %3038, %int32_47283, %int2_47284, %int16_47285, %int1_47286, %int128_47287 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49810 = torch.aten.view %49808, %49809 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49810, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47288 = torch.constant.int 131072
    %49811 = torch.prim.ListConstruct %3038, %int131072_47288 : (!torch.int, !torch.int) -> !torch.list<int>
    %49812 = torch.aten.view %49810, %49811 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49812, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47289 = torch.constant.int 32
    %int2_47290 = torch.constant.int 2
    %int16_47291 = torch.constant.int 16
    %int1_47292 = torch.constant.int 1
    %int128_47293 = torch.constant.int 128
    %49813 = torch.prim.ListConstruct %3041, %int32_47289, %int2_47290, %int16_47291, %int1_47292, %int128_47293 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49814 = torch.aten.view %47963, %49813 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49814, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47294 = torch.constant.int 32
    %49815 = torch.aten.mul.int %3041, %int32_47294 : !torch.int, !torch.int -> !torch.int
    %int2_47295 = torch.constant.int 2
    %49816 = torch.aten.mul.int %49815, %int2_47295 : !torch.int, !torch.int -> !torch.int
    %int16_47296 = torch.constant.int 16
    %int1_47297 = torch.constant.int 1
    %int128_47298 = torch.constant.int 128
    %49817 = torch.prim.ListConstruct %49816, %int16_47296, %int1_47297, %int128_47298 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49818 = torch.aten.view %49814, %49817 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49818, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49819 = torch.prim.ListConstruct %49722 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47299 = torch.constant.bool false
    %49820 = torch.aten.index_put %49818, %49819, %49738, %false_47299 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49820, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47300 = torch.constant.int 32
    %int2_47301 = torch.constant.int 2
    %int16_47302 = torch.constant.int 16
    %int1_47303 = torch.constant.int 1
    %int128_47304 = torch.constant.int 128
    %49821 = torch.prim.ListConstruct %3041, %int32_47300, %int2_47301, %int16_47302, %int1_47303, %int128_47304 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49822 = torch.aten.view %49820, %49821 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49822, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47305 = torch.constant.int 131072
    %49823 = torch.prim.ListConstruct %3041, %int131072_47305 : (!torch.int, !torch.int) -> !torch.list<int>
    %49824 = torch.aten.view %49822, %49823 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49824, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_47306 = torch.constant.int 32
    %int2_47307 = torch.constant.int 2
    %int16_47308 = torch.constant.int 16
    %int1_47309 = torch.constant.int 1
    %int128_47310 = torch.constant.int 128
    %49825 = torch.prim.ListConstruct %3044, %int32_47306, %int2_47307, %int16_47308, %int1_47309, %int128_47310 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49826 = torch.aten.view %47975, %49825 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49826, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_47311 = torch.constant.int 32
    %49827 = torch.aten.mul.int %3044, %int32_47311 : !torch.int, !torch.int -> !torch.int
    %int2_47312 = torch.constant.int 2
    %49828 = torch.aten.mul.int %49827, %int2_47312 : !torch.int, !torch.int -> !torch.int
    %int16_47313 = torch.constant.int 16
    %int1_47314 = torch.constant.int 1
    %int128_47315 = torch.constant.int 128
    %49829 = torch.prim.ListConstruct %49828, %int16_47313, %int1_47314, %int128_47315 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49830 = torch.aten.view %49826, %49829 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49830, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %49831 = torch.prim.ListConstruct %49724 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_47316 = torch.constant.bool false
    %49832 = torch.aten.index_put %49830, %49831, %49740, %false_47316 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %49832, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_47317 = torch.constant.int 32
    %int2_47318 = torch.constant.int 2
    %int16_47319 = torch.constant.int 16
    %int1_47320 = torch.constant.int 1
    %int128_47321 = torch.constant.int 128
    %49833 = torch.prim.ListConstruct %3044, %int32_47317, %int2_47318, %int16_47319, %int1_47320, %int128_47321 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49834 = torch.aten.view %49832, %49833 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %49834, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_47322 = torch.constant.int 131072
    %49835 = torch.prim.ListConstruct %3044, %int131072_47322 : (!torch.int, !torch.int) -> !torch.list<int>
    %49836 = torch.aten.view %49834, %49835 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %49836, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_47323 = torch.constant.int -2
    %49837 = torch.aten.unsqueeze %49451, %int-2_47323 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47324 = torch.constant.int -2
    %49838 = torch.aten.unsqueeze %49466, %int-2_47324 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47325 = torch.constant.int -2
    %49839 = torch.aten.unsqueeze %49481, %int-2_47325 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47326 = torch.constant.int -2
    %49840 = torch.aten.unsqueeze %49496, %int-2_47326 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47327 = torch.constant.int -2
    %49841 = torch.aten.unsqueeze %49511, %int-2_47327 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47328 = torch.constant.int -2
    %49842 = torch.aten.unsqueeze %49526, %int-2_47328 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47329 = torch.constant.int -2
    %49843 = torch.aten.unsqueeze %49541, %int-2_47329 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47330 = torch.constant.int -2
    %49844 = torch.aten.unsqueeze %49556, %int-2_47330 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_47331 = torch.constant.int 4
    %int1_47332 = torch.constant.int 1
    %int4_47333 = torch.constant.int 4
    %int128_47334 = torch.constant.int 128
    %49845 = torch.prim.ListConstruct %int4_47331, %49437, %int1_47332, %int4_47333, %int128_47334 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47335 = torch.constant.bool false
    %49846 = torch.aten.expand %49837, %49845, %false_47335 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47336 = torch.constant.int 4
    %int1_47337 = torch.constant.int 1
    %int4_47338 = torch.constant.int 4
    %int128_47339 = torch.constant.int 128
    %49847 = torch.prim.ListConstruct %int4_47336, %49437, %int1_47337, %int4_47338, %int128_47339 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47340 = torch.constant.bool false
    %49848 = torch.aten.expand %49838, %49847, %false_47340 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47341 = torch.constant.int 4
    %int1_47342 = torch.constant.int 1
    %int4_47343 = torch.constant.int 4
    %int128_47344 = torch.constant.int 128
    %49849 = torch.prim.ListConstruct %int4_47341, %49437, %int1_47342, %int4_47343, %int128_47344 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47345 = torch.constant.bool false
    %49850 = torch.aten.expand %49839, %49849, %false_47345 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47346 = torch.constant.int 4
    %int1_47347 = torch.constant.int 1
    %int4_47348 = torch.constant.int 4
    %int128_47349 = torch.constant.int 128
    %49851 = torch.prim.ListConstruct %int4_47346, %49437, %int1_47347, %int4_47348, %int128_47349 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47350 = torch.constant.bool false
    %49852 = torch.aten.expand %49840, %49851, %false_47350 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47351 = torch.constant.int 4
    %int1_47352 = torch.constant.int 1
    %int4_47353 = torch.constant.int 4
    %int128_47354 = torch.constant.int 128
    %49853 = torch.prim.ListConstruct %int4_47351, %49437, %int1_47352, %int4_47353, %int128_47354 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47355 = torch.constant.bool false
    %49854 = torch.aten.expand %49841, %49853, %false_47355 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47356 = torch.constant.int 4
    %int1_47357 = torch.constant.int 1
    %int4_47358 = torch.constant.int 4
    %int128_47359 = torch.constant.int 128
    %49855 = torch.prim.ListConstruct %int4_47356, %49437, %int1_47357, %int4_47358, %int128_47359 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47360 = torch.constant.bool false
    %49856 = torch.aten.expand %49842, %49855, %false_47360 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47361 = torch.constant.int 4
    %int1_47362 = torch.constant.int 1
    %int4_47363 = torch.constant.int 4
    %int128_47364 = torch.constant.int 128
    %49857 = torch.prim.ListConstruct %int4_47361, %49437, %int1_47362, %int4_47363, %int128_47364 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47365 = torch.constant.bool false
    %49858 = torch.aten.expand %49843, %49857, %false_47365 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47366 = torch.constant.int 4
    %int1_47367 = torch.constant.int 1
    %int4_47368 = torch.constant.int 4
    %int128_47369 = torch.constant.int 128
    %49859 = torch.prim.ListConstruct %int4_47366, %49437, %int1_47367, %int4_47368, %int128_47369 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47370 = torch.constant.bool false
    %49860 = torch.aten.expand %49844, %49859, %false_47370 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47371 = torch.constant.int 4
    %int4_47372 = torch.constant.int 4
    %int128_47373 = torch.constant.int 128
    %49861 = torch.prim.ListConstruct %int4_47371, %49437, %int4_47372, %int128_47373 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49862 = torch.aten.view %49846, %49861 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47374 = torch.constant.int 4
    %int4_47375 = torch.constant.int 4
    %int128_47376 = torch.constant.int 128
    %49863 = torch.prim.ListConstruct %int4_47374, %49437, %int4_47375, %int128_47376 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49864 = torch.aten.view %49848, %49863 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47377 = torch.constant.int 4
    %int4_47378 = torch.constant.int 4
    %int128_47379 = torch.constant.int 128
    %49865 = torch.prim.ListConstruct %int4_47377, %49437, %int4_47378, %int128_47379 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49866 = torch.aten.view %49850, %49865 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47380 = torch.constant.int 4
    %int4_47381 = torch.constant.int 4
    %int128_47382 = torch.constant.int 128
    %49867 = torch.prim.ListConstruct %int4_47380, %49437, %int4_47381, %int128_47382 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49868 = torch.aten.view %49852, %49867 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47383 = torch.constant.int 4
    %int4_47384 = torch.constant.int 4
    %int128_47385 = torch.constant.int 128
    %49869 = torch.prim.ListConstruct %int4_47383, %49437, %int4_47384, %int128_47385 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49870 = torch.aten.view %49854, %49869 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47386 = torch.constant.int 4
    %int4_47387 = torch.constant.int 4
    %int128_47388 = torch.constant.int 128
    %49871 = torch.prim.ListConstruct %int4_47386, %49437, %int4_47387, %int128_47388 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49872 = torch.aten.view %49856, %49871 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47389 = torch.constant.int 4
    %int4_47390 = torch.constant.int 4
    %int128_47391 = torch.constant.int 128
    %49873 = torch.prim.ListConstruct %int4_47389, %49437, %int4_47390, %int128_47391 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49874 = torch.aten.view %49858, %49873 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47392 = torch.constant.int 4
    %int4_47393 = torch.constant.int 4
    %int128_47394 = torch.constant.int 128
    %49875 = torch.prim.ListConstruct %int4_47392, %49437, %int4_47393, %int128_47394 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49876 = torch.aten.view %49860, %49875 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_47395 = torch.constant.int -2
    %49877 = torch.aten.unsqueeze %49226, %int-2_47395 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47396 = torch.constant.int -2
    %49878 = torch.aten.unsqueeze %49228, %int-2_47396 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47397 = torch.constant.int -2
    %49879 = torch.aten.unsqueeze %49230, %int-2_47397 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47398 = torch.constant.int -2
    %49880 = torch.aten.unsqueeze %49232, %int-2_47398 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47399 = torch.constant.int -2
    %49881 = torch.aten.unsqueeze %49234, %int-2_47399 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47400 = torch.constant.int -2
    %49882 = torch.aten.unsqueeze %49236, %int-2_47400 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47401 = torch.constant.int -2
    %49883 = torch.aten.unsqueeze %49238, %int-2_47401 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_47402 = torch.constant.int -2
    %49884 = torch.aten.unsqueeze %49240, %int-2_47402 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %49884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_47403 = torch.constant.int 1
    %49885 = torch.aten.size.int %49150, %int1_47403 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_47404 = torch.constant.int 4
    %int1_47405 = torch.constant.int 1
    %int4_47406 = torch.constant.int 4
    %int128_47407 = torch.constant.int 128
    %49886 = torch.prim.ListConstruct %int4_47404, %49885, %int1_47405, %int4_47406, %int128_47407 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47408 = torch.constant.bool false
    %49887 = torch.aten.expand %49877, %49886, %false_47408 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47409 = torch.constant.int 4
    %int1_47410 = torch.constant.int 1
    %int4_47411 = torch.constant.int 4
    %int128_47412 = torch.constant.int 128
    %49888 = torch.prim.ListConstruct %int4_47409, %49885, %int1_47410, %int4_47411, %int128_47412 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47413 = torch.constant.bool false
    %49889 = torch.aten.expand %49878, %49888, %false_47413 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47414 = torch.constant.int 4
    %int1_47415 = torch.constant.int 1
    %int4_47416 = torch.constant.int 4
    %int128_47417 = torch.constant.int 128
    %49890 = torch.prim.ListConstruct %int4_47414, %49885, %int1_47415, %int4_47416, %int128_47417 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47418 = torch.constant.bool false
    %49891 = torch.aten.expand %49879, %49890, %false_47418 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47419 = torch.constant.int 4
    %int1_47420 = torch.constant.int 1
    %int4_47421 = torch.constant.int 4
    %int128_47422 = torch.constant.int 128
    %49892 = torch.prim.ListConstruct %int4_47419, %49885, %int1_47420, %int4_47421, %int128_47422 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47423 = torch.constant.bool false
    %49893 = torch.aten.expand %49880, %49892, %false_47423 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47424 = torch.constant.int 4
    %int1_47425 = torch.constant.int 1
    %int4_47426 = torch.constant.int 4
    %int128_47427 = torch.constant.int 128
    %49894 = torch.prim.ListConstruct %int4_47424, %49885, %int1_47425, %int4_47426, %int128_47427 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47428 = torch.constant.bool false
    %49895 = torch.aten.expand %49881, %49894, %false_47428 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47429 = torch.constant.int 4
    %int1_47430 = torch.constant.int 1
    %int4_47431 = torch.constant.int 4
    %int128_47432 = torch.constant.int 128
    %49896 = torch.prim.ListConstruct %int4_47429, %49885, %int1_47430, %int4_47431, %int128_47432 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47433 = torch.constant.bool false
    %49897 = torch.aten.expand %49882, %49896, %false_47433 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47434 = torch.constant.int 4
    %int1_47435 = torch.constant.int 1
    %int4_47436 = torch.constant.int 4
    %int128_47437 = torch.constant.int 128
    %49898 = torch.prim.ListConstruct %int4_47434, %49885, %int1_47435, %int4_47436, %int128_47437 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47438 = torch.constant.bool false
    %49899 = torch.aten.expand %49883, %49898, %false_47438 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47439 = torch.constant.int 4
    %int1_47440 = torch.constant.int 1
    %int4_47441 = torch.constant.int 4
    %int128_47442 = torch.constant.int 128
    %49900 = torch.prim.ListConstruct %int4_47439, %49885, %int1_47440, %int4_47441, %int128_47442 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_47443 = torch.constant.bool false
    %49901 = torch.aten.expand %49884, %49900, %false_47443 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %49901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_47444 = torch.constant.int 4
    %int4_47445 = torch.constant.int 4
    %int128_47446 = torch.constant.int 128
    %49902 = torch.prim.ListConstruct %int4_47444, %49885, %int4_47445, %int128_47446 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49903 = torch.aten.view %49887, %49902 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47447 = torch.constant.int 4
    %int4_47448 = torch.constant.int 4
    %int128_47449 = torch.constant.int 128
    %49904 = torch.prim.ListConstruct %int4_47447, %49885, %int4_47448, %int128_47449 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49905 = torch.aten.view %49889, %49904 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47450 = torch.constant.int 4
    %int4_47451 = torch.constant.int 4
    %int128_47452 = torch.constant.int 128
    %49906 = torch.prim.ListConstruct %int4_47450, %49885, %int4_47451, %int128_47452 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49907 = torch.aten.view %49891, %49906 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47453 = torch.constant.int 4
    %int4_47454 = torch.constant.int 4
    %int128_47455 = torch.constant.int 128
    %49908 = torch.prim.ListConstruct %int4_47453, %49885, %int4_47454, %int128_47455 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49909 = torch.aten.view %49893, %49908 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47456 = torch.constant.int 4
    %int4_47457 = torch.constant.int 4
    %int128_47458 = torch.constant.int 128
    %49910 = torch.prim.ListConstruct %int4_47456, %49885, %int4_47457, %int128_47458 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49911 = torch.aten.view %49895, %49910 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47459 = torch.constant.int 4
    %int4_47460 = torch.constant.int 4
    %int128_47461 = torch.constant.int 128
    %49912 = torch.prim.ListConstruct %int4_47459, %49885, %int4_47460, %int128_47461 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49913 = torch.aten.view %49897, %49912 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47462 = torch.constant.int 4
    %int4_47463 = torch.constant.int 4
    %int128_47464 = torch.constant.int 128
    %49914 = torch.prim.ListConstruct %int4_47462, %49885, %int4_47463, %int128_47464 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49915 = torch.aten.view %49899, %49914 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47465 = torch.constant.int 4
    %int4_47466 = torch.constant.int 4
    %int128_47467 = torch.constant.int 128
    %49916 = torch.prim.ListConstruct %int4_47465, %49885, %int4_47466, %int128_47467 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49917 = torch.aten.view %49901, %49916 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47468 = torch.constant.int 1
    %int2_47469 = torch.constant.int 2
    %49918 = torch.aten.transpose.int %49293, %int1_47468, %int2_47469 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49918, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47470 = torch.constant.int 1
    %int2_47471 = torch.constant.int 2
    %49919 = torch.aten.transpose.int %49308, %int1_47470, %int2_47471 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49919, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47472 = torch.constant.int 1
    %int2_47473 = torch.constant.int 2
    %49920 = torch.aten.transpose.int %49323, %int1_47472, %int2_47473 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49920, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47474 = torch.constant.int 1
    %int2_47475 = torch.constant.int 2
    %49921 = torch.aten.transpose.int %49338, %int1_47474, %int2_47475 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49921, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47476 = torch.constant.int 1
    %int2_47477 = torch.constant.int 2
    %49922 = torch.aten.transpose.int %49353, %int1_47476, %int2_47477 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49922, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47478 = torch.constant.int 1
    %int2_47479 = torch.constant.int 2
    %49923 = torch.aten.transpose.int %49368, %int1_47478, %int2_47479 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49923, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47480 = torch.constant.int 1
    %int2_47481 = torch.constant.int 2
    %49924 = torch.aten.transpose.int %49383, %int1_47480, %int2_47481 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49924, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47482 = torch.constant.int 1
    %int2_47483 = torch.constant.int 2
    %49925 = torch.aten.transpose.int %49398, %int1_47482, %int2_47483 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49925, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47484 = torch.constant.int 1
    %int2_47485 = torch.constant.int 2
    %49926 = torch.aten.transpose.int %49862, %int1_47484, %int2_47485 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49926, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47486 = torch.constant.int 1
    %int2_47487 = torch.constant.int 2
    %49927 = torch.aten.transpose.int %49864, %int1_47486, %int2_47487 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49927, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47488 = torch.constant.int 1
    %int2_47489 = torch.constant.int 2
    %49928 = torch.aten.transpose.int %49866, %int1_47488, %int2_47489 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49928, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47490 = torch.constant.int 1
    %int2_47491 = torch.constant.int 2
    %49929 = torch.aten.transpose.int %49868, %int1_47490, %int2_47491 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49929, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47492 = torch.constant.int 1
    %int2_47493 = torch.constant.int 2
    %49930 = torch.aten.transpose.int %49870, %int1_47492, %int2_47493 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49930, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47494 = torch.constant.int 1
    %int2_47495 = torch.constant.int 2
    %49931 = torch.aten.transpose.int %49872, %int1_47494, %int2_47495 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49931, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47496 = torch.constant.int 1
    %int2_47497 = torch.constant.int 2
    %49932 = torch.aten.transpose.int %49874, %int1_47496, %int2_47497 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49932, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47498 = torch.constant.int 1
    %int2_47499 = torch.constant.int 2
    %49933 = torch.aten.transpose.int %49876, %int1_47498, %int2_47499 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49933, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47500 = torch.constant.int 1
    %int2_47501 = torch.constant.int 2
    %49934 = torch.aten.transpose.int %49903, %int1_47500, %int2_47501 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49934, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47502 = torch.constant.int 1
    %int2_47503 = torch.constant.int 2
    %49935 = torch.aten.transpose.int %49905, %int1_47502, %int2_47503 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49935, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47504 = torch.constant.int 1
    %int2_47505 = torch.constant.int 2
    %49936 = torch.aten.transpose.int %49907, %int1_47504, %int2_47505 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49936, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47506 = torch.constant.int 1
    %int2_47507 = torch.constant.int 2
    %49937 = torch.aten.transpose.int %49909, %int1_47506, %int2_47507 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49937, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47508 = torch.constant.int 1
    %int2_47509 = torch.constant.int 2
    %49938 = torch.aten.transpose.int %49911, %int1_47508, %int2_47509 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49938, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47510 = torch.constant.int 1
    %int2_47511 = torch.constant.int 2
    %49939 = torch.aten.transpose.int %49913, %int1_47510, %int2_47511 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49939, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47512 = torch.constant.int 1
    %int2_47513 = torch.constant.int 2
    %49940 = torch.aten.transpose.int %49915, %int1_47512, %int2_47513 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49940, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47514 = torch.constant.int 1
    %int2_47515 = torch.constant.int 2
    %49941 = torch.aten.transpose.int %49917, %int1_47514, %int2_47515 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %49941, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47516 = torch.constant.float 0.000000e+00
    %true_47517 = torch.constant.bool true
    %none_47518 = torch.constant.none
    %none_47519 = torch.constant.none
    %49942:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49918, %49926, %49934, %float0.000000e00_47516, %true_47517, %none_47518, %none_47519) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49942#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47520 = torch.constant.float 0.000000e+00
    %true_47521 = torch.constant.bool true
    %none_47522 = torch.constant.none
    %none_47523 = torch.constant.none
    %49943:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49919, %49927, %49935, %float0.000000e00_47520, %true_47521, %none_47522, %none_47523) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49943#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47524 = torch.constant.float 0.000000e+00
    %true_47525 = torch.constant.bool true
    %none_47526 = torch.constant.none
    %none_47527 = torch.constant.none
    %49944:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49920, %49928, %49936, %float0.000000e00_47524, %true_47525, %none_47526, %none_47527) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49944#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47528 = torch.constant.float 0.000000e+00
    %true_47529 = torch.constant.bool true
    %none_47530 = torch.constant.none
    %none_47531 = torch.constant.none
    %49945:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49921, %49929, %49937, %float0.000000e00_47528, %true_47529, %none_47530, %none_47531) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49945#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47532 = torch.constant.float 0.000000e+00
    %true_47533 = torch.constant.bool true
    %none_47534 = torch.constant.none
    %none_47535 = torch.constant.none
    %49946:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49922, %49930, %49938, %float0.000000e00_47532, %true_47533, %none_47534, %none_47535) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49946#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47536 = torch.constant.float 0.000000e+00
    %true_47537 = torch.constant.bool true
    %none_47538 = torch.constant.none
    %none_47539 = torch.constant.none
    %49947:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49923, %49931, %49939, %float0.000000e00_47536, %true_47537, %none_47538, %none_47539) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49947#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47540 = torch.constant.float 0.000000e+00
    %true_47541 = torch.constant.bool true
    %none_47542 = torch.constant.none
    %none_47543 = torch.constant.none
    %49948:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49924, %49932, %49940, %float0.000000e00_47540, %true_47541, %none_47542, %none_47543) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49948#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_47544 = torch.constant.float 0.000000e+00
    %true_47545 = torch.constant.bool true
    %none_47546 = torch.constant.none
    %none_47547 = torch.constant.none
    %49949:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%49925, %49933, %49941, %float0.000000e00_47544, %true_47545, %none_47546, %none_47547) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %49949#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_47548 = torch.constant.int 1
    %int2_47549 = torch.constant.int 2
    %49950 = torch.aten.transpose.int %49942#0, %int1_47548, %int2_47549 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47550 = torch.constant.int 1
    %int2_47551 = torch.constant.int 2
    %49951 = torch.aten.transpose.int %49943#0, %int1_47550, %int2_47551 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47552 = torch.constant.int 1
    %int2_47553 = torch.constant.int 2
    %49952 = torch.aten.transpose.int %49944#0, %int1_47552, %int2_47553 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47554 = torch.constant.int 1
    %int2_47555 = torch.constant.int 2
    %49953 = torch.aten.transpose.int %49945#0, %int1_47554, %int2_47555 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47556 = torch.constant.int 1
    %int2_47557 = torch.constant.int 2
    %49954 = torch.aten.transpose.int %49946#0, %int1_47556, %int2_47557 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47558 = torch.constant.int 1
    %int2_47559 = torch.constant.int 2
    %49955 = torch.aten.transpose.int %49947#0, %int1_47558, %int2_47559 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47560 = torch.constant.int 1
    %int2_47561 = torch.constant.int 2
    %49956 = torch.aten.transpose.int %49948#0, %int1_47560, %int2_47561 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_47562 = torch.constant.int 1
    %int2_47563 = torch.constant.int 2
    %49957 = torch.aten.transpose.int %49949#0, %int1_47562, %int2_47563 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %49957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_47564 = torch.constant.int 4
    %int512_47565 = torch.constant.int 512
    %49958 = torch.prim.ListConstruct %int4_47564, %49279, %int512_47565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49959 = torch.aten.view %49950, %49958 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47566 = torch.constant.int 4
    %int512_47567 = torch.constant.int 512
    %49960 = torch.prim.ListConstruct %int4_47566, %49294, %int512_47567 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49961 = torch.aten.view %49951, %49960 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47568 = torch.constant.int 4
    %int512_47569 = torch.constant.int 512
    %49962 = torch.prim.ListConstruct %int4_47568, %49309, %int512_47569 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49963 = torch.aten.view %49952, %49962 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47570 = torch.constant.int 4
    %int512_47571 = torch.constant.int 512
    %49964 = torch.prim.ListConstruct %int4_47570, %49324, %int512_47571 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49965 = torch.aten.view %49953, %49964 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47572 = torch.constant.int 4
    %int512_47573 = torch.constant.int 512
    %49966 = torch.prim.ListConstruct %int4_47572, %49339, %int512_47573 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49967 = torch.aten.view %49954, %49966 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47574 = torch.constant.int 4
    %int512_47575 = torch.constant.int 512
    %49968 = torch.prim.ListConstruct %int4_47574, %49354, %int512_47575 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49969 = torch.aten.view %49955, %49968 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47576 = torch.constant.int 4
    %int512_47577 = torch.constant.int 512
    %49970 = torch.prim.ListConstruct %int4_47576, %49369, %int512_47577 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49971 = torch.aten.view %49956, %49970 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_47578 = torch.constant.int 4
    %int512_47579 = torch.constant.int 512
    %49972 = torch.prim.ListConstruct %int4_47578, %49384, %int512_47579 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49973 = torch.aten.view %49957, %49972 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %49973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_47580 = torch.constant.int 1
    %int0_47581 = torch.constant.int 0
    %49974 = torch.prim.ListConstruct %int1_47580, %int0_47581 : (!torch.int, !torch.int) -> !torch.list<int>
    %49975 = torch.aten.permute %1840, %49974 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47582 = torch.constant.int 1
    %int0_47583 = torch.constant.int 0
    %49976 = torch.prim.ListConstruct %int1_47582, %int0_47583 : (!torch.int, !torch.int) -> !torch.list<int>
    %49977 = torch.aten.permute %1841, %49976 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47584 = torch.constant.int 1
    %int0_47585 = torch.constant.int 0
    %49978 = torch.prim.ListConstruct %int1_47584, %int0_47585 : (!torch.int, !torch.int) -> !torch.list<int>
    %49979 = torch.aten.permute %1842, %49978 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47586 = torch.constant.int 1
    %int0_47587 = torch.constant.int 0
    %49980 = torch.prim.ListConstruct %int1_47586, %int0_47587 : (!torch.int, !torch.int) -> !torch.list<int>
    %49981 = torch.aten.permute %1843, %49980 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47588 = torch.constant.int 1
    %int0_47589 = torch.constant.int 0
    %49982 = torch.prim.ListConstruct %int1_47588, %int0_47589 : (!torch.int, !torch.int) -> !torch.list<int>
    %49983 = torch.aten.permute %1844, %49982 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47590 = torch.constant.int 1
    %int0_47591 = torch.constant.int 0
    %49984 = torch.prim.ListConstruct %int1_47590, %int0_47591 : (!torch.int, !torch.int) -> !torch.list<int>
    %49985 = torch.aten.permute %1845, %49984 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47592 = torch.constant.int 1
    %int0_47593 = torch.constant.int 0
    %49986 = torch.prim.ListConstruct %int1_47592, %int0_47593 : (!torch.int, !torch.int) -> !torch.list<int>
    %49987 = torch.aten.permute %1846, %49986 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_47594 = torch.constant.int 1
    %int0_47595 = torch.constant.int 0
    %49988 = torch.prim.ListConstruct %int1_47594, %int0_47595 : (!torch.int, !torch.int) -> !torch.list<int>
    %49989 = torch.aten.permute %1847, %49988 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_47596 = torch.constant.int 4
    %49990 = torch.aten.mul.int %int4_47596, %49279 : !torch.int, !torch.int -> !torch.int
    %int512_47597 = torch.constant.int 512
    %49991 = torch.prim.ListConstruct %49990, %int512_47597 : (!torch.int, !torch.int) -> !torch.list<int>
    %49992 = torch.aten.view %49959, %49991 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49992, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %49993 = torch.aten.mm %49992, %49975 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49993, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47598 = torch.constant.int 4
    %int4096_47599 = torch.constant.int 4096
    %49994 = torch.prim.ListConstruct %int4_47598, %49279, %int4096_47599 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %49995 = torch.aten.view %49993, %49994 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %49995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47600 = torch.constant.int 4
    %49996 = torch.aten.mul.int %int4_47600, %49294 : !torch.int, !torch.int -> !torch.int
    %int512_47601 = torch.constant.int 512
    %49997 = torch.prim.ListConstruct %49996, %int512_47601 : (!torch.int, !torch.int) -> !torch.list<int>
    %49998 = torch.aten.view %49961, %49997 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %49998, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %49999 = torch.aten.mm %49998, %49977 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %49999, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47602 = torch.constant.int 4
    %int4096_47603 = torch.constant.int 4096
    %50000 = torch.prim.ListConstruct %int4_47602, %49294, %int4096_47603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50001 = torch.aten.view %49999, %50000 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47604 = torch.constant.int 4
    %50002 = torch.aten.mul.int %int4_47604, %49309 : !torch.int, !torch.int -> !torch.int
    %int512_47605 = torch.constant.int 512
    %50003 = torch.prim.ListConstruct %50002, %int512_47605 : (!torch.int, !torch.int) -> !torch.list<int>
    %50004 = torch.aten.view %49963, %50003 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50004, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50005 = torch.aten.mm %50004, %49979 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50005, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47606 = torch.constant.int 4
    %int4096_47607 = torch.constant.int 4096
    %50006 = torch.prim.ListConstruct %int4_47606, %49309, %int4096_47607 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50007 = torch.aten.view %50005, %50006 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47608 = torch.constant.int 4
    %50008 = torch.aten.mul.int %int4_47608, %49324 : !torch.int, !torch.int -> !torch.int
    %int512_47609 = torch.constant.int 512
    %50009 = torch.prim.ListConstruct %50008, %int512_47609 : (!torch.int, !torch.int) -> !torch.list<int>
    %50010 = torch.aten.view %49965, %50009 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50010, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50011 = torch.aten.mm %50010, %49981 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50011, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47610 = torch.constant.int 4
    %int4096_47611 = torch.constant.int 4096
    %50012 = torch.prim.ListConstruct %int4_47610, %49324, %int4096_47611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50013 = torch.aten.view %50011, %50012 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47612 = torch.constant.int 4
    %50014 = torch.aten.mul.int %int4_47612, %49339 : !torch.int, !torch.int -> !torch.int
    %int512_47613 = torch.constant.int 512
    %50015 = torch.prim.ListConstruct %50014, %int512_47613 : (!torch.int, !torch.int) -> !torch.list<int>
    %50016 = torch.aten.view %49967, %50015 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50016, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50017 = torch.aten.mm %50016, %49983 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50017, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47614 = torch.constant.int 4
    %int4096_47615 = torch.constant.int 4096
    %50018 = torch.prim.ListConstruct %int4_47614, %49339, %int4096_47615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50019 = torch.aten.view %50017, %50018 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47616 = torch.constant.int 4
    %50020 = torch.aten.mul.int %int4_47616, %49354 : !torch.int, !torch.int -> !torch.int
    %int512_47617 = torch.constant.int 512
    %50021 = torch.prim.ListConstruct %50020, %int512_47617 : (!torch.int, !torch.int) -> !torch.list<int>
    %50022 = torch.aten.view %49969, %50021 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50022, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50023 = torch.aten.mm %50022, %49985 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50023, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47618 = torch.constant.int 4
    %int4096_47619 = torch.constant.int 4096
    %50024 = torch.prim.ListConstruct %int4_47618, %49354, %int4096_47619 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50025 = torch.aten.view %50023, %50024 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47620 = torch.constant.int 4
    %50026 = torch.aten.mul.int %int4_47620, %49369 : !torch.int, !torch.int -> !torch.int
    %int512_47621 = torch.constant.int 512
    %50027 = torch.prim.ListConstruct %50026, %int512_47621 : (!torch.int, !torch.int) -> !torch.list<int>
    %50028 = torch.aten.view %49971, %50027 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50028, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50029 = torch.aten.mm %50028, %49987 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50029, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47622 = torch.constant.int 4
    %int4096_47623 = torch.constant.int 4096
    %50030 = torch.prim.ListConstruct %int4_47622, %49369, %int4096_47623 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50031 = torch.aten.view %50029, %50030 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_47624 = torch.constant.int 4
    %50032 = torch.aten.mul.int %int4_47624, %49384 : !torch.int, !torch.int -> !torch.int
    %int512_47625 = torch.constant.int 512
    %50033 = torch.prim.ListConstruct %50032, %int512_47625 : (!torch.int, !torch.int) -> !torch.list<int>
    %50034 = torch.aten.view %49973, %50033 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50034, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %50035 = torch.aten.mm %50034, %49989 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50035, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47626 = torch.constant.int 4
    %int4096_47627 = torch.constant.int 4096
    %50036 = torch.prim.ListConstruct %int4_47626, %49384, %int4096_47627 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50037 = torch.aten.view %50035, %50036 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50038 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47628 = arith.constant 1 : index
    %dim_47629 = tensor.dim %50038, %c1_47628 : tensor<4x?x4096xf16>
    %50039 = flow.tensor.transfer %50038 : tensor<4x?x4096xf16>{%dim_47629} to #hal.device.promise<@__device_0>
    %50040 = torch_c.from_builtin_tensor %50039 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50041 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47630 = arith.constant 1 : index
    %dim_47631 = tensor.dim %50041, %c1_47630 : tensor<4x?x4096xf16>
    %50042 = flow.tensor.transfer %50041 : tensor<4x?x4096xf16>{%dim_47631} to #hal.device.promise<@__device_0>
    %50043 = torch_c.from_builtin_tensor %50042 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50044 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47632 = arith.constant 1 : index
    %dim_47633 = tensor.dim %50044, %c1_47632 : tensor<4x?x4096xf16>
    %50045 = flow.tensor.transfer %50044 : tensor<4x?x4096xf16>{%dim_47633} to #hal.device.promise<@__device_0>
    %50046 = torch_c.from_builtin_tensor %50045 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50047 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47634 = arith.constant 1 : index
    %dim_47635 = tensor.dim %50047, %c1_47634 : tensor<4x?x4096xf16>
    %50048 = flow.tensor.transfer %50047 : tensor<4x?x4096xf16>{%dim_47635} to #hal.device.promise<@__device_0>
    %50049 = torch_c.from_builtin_tensor %50048 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50050 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47636 = arith.constant 1 : index
    %dim_47637 = tensor.dim %50050, %c1_47636 : tensor<4x?x4096xf16>
    %50051 = flow.tensor.transfer %50050 : tensor<4x?x4096xf16>{%dim_47637} to #hal.device.promise<@__device_0>
    %50052 = torch_c.from_builtin_tensor %50051 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50053 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47638 = arith.constant 1 : index
    %dim_47639 = tensor.dim %50053, %c1_47638 : tensor<4x?x4096xf16>
    %50054 = flow.tensor.transfer %50053 : tensor<4x?x4096xf16>{%dim_47639} to #hal.device.promise<@__device_0>
    %50055 = torch_c.from_builtin_tensor %50054 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50056 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47640 = arith.constant 1 : index
    %dim_47641 = tensor.dim %50056, %c1_47640 : tensor<4x?x4096xf16>
    %50057 = flow.tensor.transfer %50056 : tensor<4x?x4096xf16>{%dim_47641} to #hal.device.promise<@__device_0>
    %50058 = torch_c.from_builtin_tensor %50057 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47642 = torch.constant.int 1
    %50059 = torch.aten.add.Tensor %49995, %50040, %int1_47642 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47643 = torch.constant.int 1
    %50060 = torch.aten.add.Tensor %50059, %50043, %int1_47643 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47644 = torch.constant.int 1
    %50061 = torch.aten.add.Tensor %50060, %50046, %int1_47644 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47645 = torch.constant.int 1
    %50062 = torch.aten.add.Tensor %50061, %50049, %int1_47645 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47646 = torch.constant.int 1
    %50063 = torch.aten.add.Tensor %50062, %50052, %int1_47646 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47647 = torch.constant.int 1
    %50064 = torch.aten.add.Tensor %50063, %50055, %int1_47647 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47648 = torch.constant.int 1
    %50065 = torch.aten.add.Tensor %50064, %50058, %int1_47648 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50066 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47649 = arith.constant 1 : index
    %dim_47650 = tensor.dim %50066, %c1_47649 : tensor<4x?x4096xf16>
    %50067 = flow.tensor.transfer %50066 : tensor<4x?x4096xf16>{%dim_47650} to #hal.device.promise<@__device_1>
    %50068 = torch_c.from_builtin_tensor %50067 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50069 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47651 = arith.constant 1 : index
    %dim_47652 = tensor.dim %50069, %c1_47651 : tensor<4x?x4096xf16>
    %50070 = flow.tensor.transfer %50069 : tensor<4x?x4096xf16>{%dim_47652} to #hal.device.promise<@__device_1>
    %50071 = torch_c.from_builtin_tensor %50070 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50072 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47653 = arith.constant 1 : index
    %dim_47654 = tensor.dim %50072, %c1_47653 : tensor<4x?x4096xf16>
    %50073 = flow.tensor.transfer %50072 : tensor<4x?x4096xf16>{%dim_47654} to #hal.device.promise<@__device_1>
    %50074 = torch_c.from_builtin_tensor %50073 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50075 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47655 = arith.constant 1 : index
    %dim_47656 = tensor.dim %50075, %c1_47655 : tensor<4x?x4096xf16>
    %50076 = flow.tensor.transfer %50075 : tensor<4x?x4096xf16>{%dim_47656} to #hal.device.promise<@__device_1>
    %50077 = torch_c.from_builtin_tensor %50076 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50078 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47657 = arith.constant 1 : index
    %dim_47658 = tensor.dim %50078, %c1_47657 : tensor<4x?x4096xf16>
    %50079 = flow.tensor.transfer %50078 : tensor<4x?x4096xf16>{%dim_47658} to #hal.device.promise<@__device_1>
    %50080 = torch_c.from_builtin_tensor %50079 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50081 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47659 = arith.constant 1 : index
    %dim_47660 = tensor.dim %50081, %c1_47659 : tensor<4x?x4096xf16>
    %50082 = flow.tensor.transfer %50081 : tensor<4x?x4096xf16>{%dim_47660} to #hal.device.promise<@__device_1>
    %50083 = torch_c.from_builtin_tensor %50082 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50084 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47661 = arith.constant 1 : index
    %dim_47662 = tensor.dim %50084, %c1_47661 : tensor<4x?x4096xf16>
    %50085 = flow.tensor.transfer %50084 : tensor<4x?x4096xf16>{%dim_47662} to #hal.device.promise<@__device_1>
    %50086 = torch_c.from_builtin_tensor %50085 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47663 = torch.constant.int 1
    %50087 = torch.aten.add.Tensor %50068, %50001, %int1_47663 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47664 = torch.constant.int 1
    %50088 = torch.aten.add.Tensor %50087, %50071, %int1_47664 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47665 = torch.constant.int 1
    %50089 = torch.aten.add.Tensor %50088, %50074, %int1_47665 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47666 = torch.constant.int 1
    %50090 = torch.aten.add.Tensor %50089, %50077, %int1_47666 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47667 = torch.constant.int 1
    %50091 = torch.aten.add.Tensor %50090, %50080, %int1_47667 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47668 = torch.constant.int 1
    %50092 = torch.aten.add.Tensor %50091, %50083, %int1_47668 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47669 = torch.constant.int 1
    %50093 = torch.aten.add.Tensor %50092, %50086, %int1_47669 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50094 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47670 = arith.constant 1 : index
    %dim_47671 = tensor.dim %50094, %c1_47670 : tensor<4x?x4096xf16>
    %50095 = flow.tensor.transfer %50094 : tensor<4x?x4096xf16>{%dim_47671} to #hal.device.promise<@__device_2>
    %50096 = torch_c.from_builtin_tensor %50095 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50097 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47672 = arith.constant 1 : index
    %dim_47673 = tensor.dim %50097, %c1_47672 : tensor<4x?x4096xf16>
    %50098 = flow.tensor.transfer %50097 : tensor<4x?x4096xf16>{%dim_47673} to #hal.device.promise<@__device_2>
    %50099 = torch_c.from_builtin_tensor %50098 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50100 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47674 = arith.constant 1 : index
    %dim_47675 = tensor.dim %50100, %c1_47674 : tensor<4x?x4096xf16>
    %50101 = flow.tensor.transfer %50100 : tensor<4x?x4096xf16>{%dim_47675} to #hal.device.promise<@__device_2>
    %50102 = torch_c.from_builtin_tensor %50101 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50103 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47676 = arith.constant 1 : index
    %dim_47677 = tensor.dim %50103, %c1_47676 : tensor<4x?x4096xf16>
    %50104 = flow.tensor.transfer %50103 : tensor<4x?x4096xf16>{%dim_47677} to #hal.device.promise<@__device_2>
    %50105 = torch_c.from_builtin_tensor %50104 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50106 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47678 = arith.constant 1 : index
    %dim_47679 = tensor.dim %50106, %c1_47678 : tensor<4x?x4096xf16>
    %50107 = flow.tensor.transfer %50106 : tensor<4x?x4096xf16>{%dim_47679} to #hal.device.promise<@__device_2>
    %50108 = torch_c.from_builtin_tensor %50107 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50109 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47680 = arith.constant 1 : index
    %dim_47681 = tensor.dim %50109, %c1_47680 : tensor<4x?x4096xf16>
    %50110 = flow.tensor.transfer %50109 : tensor<4x?x4096xf16>{%dim_47681} to #hal.device.promise<@__device_2>
    %50111 = torch_c.from_builtin_tensor %50110 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50112 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47682 = arith.constant 1 : index
    %dim_47683 = tensor.dim %50112, %c1_47682 : tensor<4x?x4096xf16>
    %50113 = flow.tensor.transfer %50112 : tensor<4x?x4096xf16>{%dim_47683} to #hal.device.promise<@__device_2>
    %50114 = torch_c.from_builtin_tensor %50113 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47684 = torch.constant.int 1
    %50115 = torch.aten.add.Tensor %50096, %50099, %int1_47684 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47685 = torch.constant.int 1
    %50116 = torch.aten.add.Tensor %50115, %50007, %int1_47685 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47686 = torch.constant.int 1
    %50117 = torch.aten.add.Tensor %50116, %50102, %int1_47686 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47687 = torch.constant.int 1
    %50118 = torch.aten.add.Tensor %50117, %50105, %int1_47687 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47688 = torch.constant.int 1
    %50119 = torch.aten.add.Tensor %50118, %50108, %int1_47688 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47689 = torch.constant.int 1
    %50120 = torch.aten.add.Tensor %50119, %50111, %int1_47689 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47690 = torch.constant.int 1
    %50121 = torch.aten.add.Tensor %50120, %50114, %int1_47690 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50122 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47691 = arith.constant 1 : index
    %dim_47692 = tensor.dim %50122, %c1_47691 : tensor<4x?x4096xf16>
    %50123 = flow.tensor.transfer %50122 : tensor<4x?x4096xf16>{%dim_47692} to #hal.device.promise<@__device_3>
    %50124 = torch_c.from_builtin_tensor %50123 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50125 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47693 = arith.constant 1 : index
    %dim_47694 = tensor.dim %50125, %c1_47693 : tensor<4x?x4096xf16>
    %50126 = flow.tensor.transfer %50125 : tensor<4x?x4096xf16>{%dim_47694} to #hal.device.promise<@__device_3>
    %50127 = torch_c.from_builtin_tensor %50126 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50128 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47695 = arith.constant 1 : index
    %dim_47696 = tensor.dim %50128, %c1_47695 : tensor<4x?x4096xf16>
    %50129 = flow.tensor.transfer %50128 : tensor<4x?x4096xf16>{%dim_47696} to #hal.device.promise<@__device_3>
    %50130 = torch_c.from_builtin_tensor %50129 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50131 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47697 = arith.constant 1 : index
    %dim_47698 = tensor.dim %50131, %c1_47697 : tensor<4x?x4096xf16>
    %50132 = flow.tensor.transfer %50131 : tensor<4x?x4096xf16>{%dim_47698} to #hal.device.promise<@__device_3>
    %50133 = torch_c.from_builtin_tensor %50132 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50134 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47699 = arith.constant 1 : index
    %dim_47700 = tensor.dim %50134, %c1_47699 : tensor<4x?x4096xf16>
    %50135 = flow.tensor.transfer %50134 : tensor<4x?x4096xf16>{%dim_47700} to #hal.device.promise<@__device_3>
    %50136 = torch_c.from_builtin_tensor %50135 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50137 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47701 = arith.constant 1 : index
    %dim_47702 = tensor.dim %50137, %c1_47701 : tensor<4x?x4096xf16>
    %50138 = flow.tensor.transfer %50137 : tensor<4x?x4096xf16>{%dim_47702} to #hal.device.promise<@__device_3>
    %50139 = torch_c.from_builtin_tensor %50138 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50140 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47703 = arith.constant 1 : index
    %dim_47704 = tensor.dim %50140, %c1_47703 : tensor<4x?x4096xf16>
    %50141 = flow.tensor.transfer %50140 : tensor<4x?x4096xf16>{%dim_47704} to #hal.device.promise<@__device_3>
    %50142 = torch_c.from_builtin_tensor %50141 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47705 = torch.constant.int 1
    %50143 = torch.aten.add.Tensor %50124, %50127, %int1_47705 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47706 = torch.constant.int 1
    %50144 = torch.aten.add.Tensor %50143, %50130, %int1_47706 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47707 = torch.constant.int 1
    %50145 = torch.aten.add.Tensor %50144, %50013, %int1_47707 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47708 = torch.constant.int 1
    %50146 = torch.aten.add.Tensor %50145, %50133, %int1_47708 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47709 = torch.constant.int 1
    %50147 = torch.aten.add.Tensor %50146, %50136, %int1_47709 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47710 = torch.constant.int 1
    %50148 = torch.aten.add.Tensor %50147, %50139, %int1_47710 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47711 = torch.constant.int 1
    %50149 = torch.aten.add.Tensor %50148, %50142, %int1_47711 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50150 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47712 = arith.constant 1 : index
    %dim_47713 = tensor.dim %50150, %c1_47712 : tensor<4x?x4096xf16>
    %50151 = flow.tensor.transfer %50150 : tensor<4x?x4096xf16>{%dim_47713} to #hal.device.promise<@__device_4>
    %50152 = torch_c.from_builtin_tensor %50151 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50153 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47714 = arith.constant 1 : index
    %dim_47715 = tensor.dim %50153, %c1_47714 : tensor<4x?x4096xf16>
    %50154 = flow.tensor.transfer %50153 : tensor<4x?x4096xf16>{%dim_47715} to #hal.device.promise<@__device_4>
    %50155 = torch_c.from_builtin_tensor %50154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50156 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47716 = arith.constant 1 : index
    %dim_47717 = tensor.dim %50156, %c1_47716 : tensor<4x?x4096xf16>
    %50157 = flow.tensor.transfer %50156 : tensor<4x?x4096xf16>{%dim_47717} to #hal.device.promise<@__device_4>
    %50158 = torch_c.from_builtin_tensor %50157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50159 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47718 = arith.constant 1 : index
    %dim_47719 = tensor.dim %50159, %c1_47718 : tensor<4x?x4096xf16>
    %50160 = flow.tensor.transfer %50159 : tensor<4x?x4096xf16>{%dim_47719} to #hal.device.promise<@__device_4>
    %50161 = torch_c.from_builtin_tensor %50160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50162 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47720 = arith.constant 1 : index
    %dim_47721 = tensor.dim %50162, %c1_47720 : tensor<4x?x4096xf16>
    %50163 = flow.tensor.transfer %50162 : tensor<4x?x4096xf16>{%dim_47721} to #hal.device.promise<@__device_4>
    %50164 = torch_c.from_builtin_tensor %50163 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50165 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47722 = arith.constant 1 : index
    %dim_47723 = tensor.dim %50165, %c1_47722 : tensor<4x?x4096xf16>
    %50166 = flow.tensor.transfer %50165 : tensor<4x?x4096xf16>{%dim_47723} to #hal.device.promise<@__device_4>
    %50167 = torch_c.from_builtin_tensor %50166 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50168 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47724 = arith.constant 1 : index
    %dim_47725 = tensor.dim %50168, %c1_47724 : tensor<4x?x4096xf16>
    %50169 = flow.tensor.transfer %50168 : tensor<4x?x4096xf16>{%dim_47725} to #hal.device.promise<@__device_4>
    %50170 = torch_c.from_builtin_tensor %50169 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47726 = torch.constant.int 1
    %50171 = torch.aten.add.Tensor %50152, %50155, %int1_47726 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47727 = torch.constant.int 1
    %50172 = torch.aten.add.Tensor %50171, %50158, %int1_47727 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47728 = torch.constant.int 1
    %50173 = torch.aten.add.Tensor %50172, %50161, %int1_47728 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47729 = torch.constant.int 1
    %50174 = torch.aten.add.Tensor %50173, %50019, %int1_47729 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47730 = torch.constant.int 1
    %50175 = torch.aten.add.Tensor %50174, %50164, %int1_47730 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47731 = torch.constant.int 1
    %50176 = torch.aten.add.Tensor %50175, %50167, %int1_47731 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47732 = torch.constant.int 1
    %50177 = torch.aten.add.Tensor %50176, %50170, %int1_47732 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50178 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47733 = arith.constant 1 : index
    %dim_47734 = tensor.dim %50178, %c1_47733 : tensor<4x?x4096xf16>
    %50179 = flow.tensor.transfer %50178 : tensor<4x?x4096xf16>{%dim_47734} to #hal.device.promise<@__device_5>
    %50180 = torch_c.from_builtin_tensor %50179 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50181 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47735 = arith.constant 1 : index
    %dim_47736 = tensor.dim %50181, %c1_47735 : tensor<4x?x4096xf16>
    %50182 = flow.tensor.transfer %50181 : tensor<4x?x4096xf16>{%dim_47736} to #hal.device.promise<@__device_5>
    %50183 = torch_c.from_builtin_tensor %50182 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50184 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47737 = arith.constant 1 : index
    %dim_47738 = tensor.dim %50184, %c1_47737 : tensor<4x?x4096xf16>
    %50185 = flow.tensor.transfer %50184 : tensor<4x?x4096xf16>{%dim_47738} to #hal.device.promise<@__device_5>
    %50186 = torch_c.from_builtin_tensor %50185 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50187 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47739 = arith.constant 1 : index
    %dim_47740 = tensor.dim %50187, %c1_47739 : tensor<4x?x4096xf16>
    %50188 = flow.tensor.transfer %50187 : tensor<4x?x4096xf16>{%dim_47740} to #hal.device.promise<@__device_5>
    %50189 = torch_c.from_builtin_tensor %50188 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50190 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47741 = arith.constant 1 : index
    %dim_47742 = tensor.dim %50190, %c1_47741 : tensor<4x?x4096xf16>
    %50191 = flow.tensor.transfer %50190 : tensor<4x?x4096xf16>{%dim_47742} to #hal.device.promise<@__device_5>
    %50192 = torch_c.from_builtin_tensor %50191 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50193 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47743 = arith.constant 1 : index
    %dim_47744 = tensor.dim %50193, %c1_47743 : tensor<4x?x4096xf16>
    %50194 = flow.tensor.transfer %50193 : tensor<4x?x4096xf16>{%dim_47744} to #hal.device.promise<@__device_5>
    %50195 = torch_c.from_builtin_tensor %50194 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50196 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47745 = arith.constant 1 : index
    %dim_47746 = tensor.dim %50196, %c1_47745 : tensor<4x?x4096xf16>
    %50197 = flow.tensor.transfer %50196 : tensor<4x?x4096xf16>{%dim_47746} to #hal.device.promise<@__device_5>
    %50198 = torch_c.from_builtin_tensor %50197 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47747 = torch.constant.int 1
    %50199 = torch.aten.add.Tensor %50180, %50183, %int1_47747 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47748 = torch.constant.int 1
    %50200 = torch.aten.add.Tensor %50199, %50186, %int1_47748 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47749 = torch.constant.int 1
    %50201 = torch.aten.add.Tensor %50200, %50189, %int1_47749 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47750 = torch.constant.int 1
    %50202 = torch.aten.add.Tensor %50201, %50192, %int1_47750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47751 = torch.constant.int 1
    %50203 = torch.aten.add.Tensor %50202, %50025, %int1_47751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47752 = torch.constant.int 1
    %50204 = torch.aten.add.Tensor %50203, %50195, %int1_47752 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47753 = torch.constant.int 1
    %50205 = torch.aten.add.Tensor %50204, %50198, %int1_47753 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50206 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47754 = arith.constant 1 : index
    %dim_47755 = tensor.dim %50206, %c1_47754 : tensor<4x?x4096xf16>
    %50207 = flow.tensor.transfer %50206 : tensor<4x?x4096xf16>{%dim_47755} to #hal.device.promise<@__device_6>
    %50208 = torch_c.from_builtin_tensor %50207 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50209 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47756 = arith.constant 1 : index
    %dim_47757 = tensor.dim %50209, %c1_47756 : tensor<4x?x4096xf16>
    %50210 = flow.tensor.transfer %50209 : tensor<4x?x4096xf16>{%dim_47757} to #hal.device.promise<@__device_6>
    %50211 = torch_c.from_builtin_tensor %50210 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50212 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47758 = arith.constant 1 : index
    %dim_47759 = tensor.dim %50212, %c1_47758 : tensor<4x?x4096xf16>
    %50213 = flow.tensor.transfer %50212 : tensor<4x?x4096xf16>{%dim_47759} to #hal.device.promise<@__device_6>
    %50214 = torch_c.from_builtin_tensor %50213 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50215 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47760 = arith.constant 1 : index
    %dim_47761 = tensor.dim %50215, %c1_47760 : tensor<4x?x4096xf16>
    %50216 = flow.tensor.transfer %50215 : tensor<4x?x4096xf16>{%dim_47761} to #hal.device.promise<@__device_6>
    %50217 = torch_c.from_builtin_tensor %50216 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50218 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47762 = arith.constant 1 : index
    %dim_47763 = tensor.dim %50218, %c1_47762 : tensor<4x?x4096xf16>
    %50219 = flow.tensor.transfer %50218 : tensor<4x?x4096xf16>{%dim_47763} to #hal.device.promise<@__device_6>
    %50220 = torch_c.from_builtin_tensor %50219 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50221 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47764 = arith.constant 1 : index
    %dim_47765 = tensor.dim %50221, %c1_47764 : tensor<4x?x4096xf16>
    %50222 = flow.tensor.transfer %50221 : tensor<4x?x4096xf16>{%dim_47765} to #hal.device.promise<@__device_6>
    %50223 = torch_c.from_builtin_tensor %50222 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50224 = torch_c.to_builtin_tensor %50037 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47766 = arith.constant 1 : index
    %dim_47767 = tensor.dim %50224, %c1_47766 : tensor<4x?x4096xf16>
    %50225 = flow.tensor.transfer %50224 : tensor<4x?x4096xf16>{%dim_47767} to #hal.device.promise<@__device_6>
    %50226 = torch_c.from_builtin_tensor %50225 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47768 = torch.constant.int 1
    %50227 = torch.aten.add.Tensor %50208, %50211, %int1_47768 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47769 = torch.constant.int 1
    %50228 = torch.aten.add.Tensor %50227, %50214, %int1_47769 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47770 = torch.constant.int 1
    %50229 = torch.aten.add.Tensor %50228, %50217, %int1_47770 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47771 = torch.constant.int 1
    %50230 = torch.aten.add.Tensor %50229, %50220, %int1_47771 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47772 = torch.constant.int 1
    %50231 = torch.aten.add.Tensor %50230, %50223, %int1_47772 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47773 = torch.constant.int 1
    %50232 = torch.aten.add.Tensor %50231, %50031, %int1_47773 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47774 = torch.constant.int 1
    %50233 = torch.aten.add.Tensor %50232, %50226, %int1_47774 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50234 = torch_c.to_builtin_tensor %49995 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47775 = arith.constant 1 : index
    %dim_47776 = tensor.dim %50234, %c1_47775 : tensor<4x?x4096xf16>
    %50235 = flow.tensor.transfer %50234 : tensor<4x?x4096xf16>{%dim_47776} to #hal.device.promise<@__device_7>
    %50236 = torch_c.from_builtin_tensor %50235 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50237 = torch_c.to_builtin_tensor %50001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47777 = arith.constant 1 : index
    %dim_47778 = tensor.dim %50237, %c1_47777 : tensor<4x?x4096xf16>
    %50238 = flow.tensor.transfer %50237 : tensor<4x?x4096xf16>{%dim_47778} to #hal.device.promise<@__device_7>
    %50239 = torch_c.from_builtin_tensor %50238 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50240 = torch_c.to_builtin_tensor %50007 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47779 = arith.constant 1 : index
    %dim_47780 = tensor.dim %50240, %c1_47779 : tensor<4x?x4096xf16>
    %50241 = flow.tensor.transfer %50240 : tensor<4x?x4096xf16>{%dim_47780} to #hal.device.promise<@__device_7>
    %50242 = torch_c.from_builtin_tensor %50241 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50243 = torch_c.to_builtin_tensor %50013 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47781 = arith.constant 1 : index
    %dim_47782 = tensor.dim %50243, %c1_47781 : tensor<4x?x4096xf16>
    %50244 = flow.tensor.transfer %50243 : tensor<4x?x4096xf16>{%dim_47782} to #hal.device.promise<@__device_7>
    %50245 = torch_c.from_builtin_tensor %50244 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50246 = torch_c.to_builtin_tensor %50019 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47783 = arith.constant 1 : index
    %dim_47784 = tensor.dim %50246, %c1_47783 : tensor<4x?x4096xf16>
    %50247 = flow.tensor.transfer %50246 : tensor<4x?x4096xf16>{%dim_47784} to #hal.device.promise<@__device_7>
    %50248 = torch_c.from_builtin_tensor %50247 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50249 = torch_c.to_builtin_tensor %50025 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47785 = arith.constant 1 : index
    %dim_47786 = tensor.dim %50249, %c1_47785 : tensor<4x?x4096xf16>
    %50250 = flow.tensor.transfer %50249 : tensor<4x?x4096xf16>{%dim_47786} to #hal.device.promise<@__device_7>
    %50251 = torch_c.from_builtin_tensor %50250 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50252 = torch_c.to_builtin_tensor %50031 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_47787 = arith.constant 1 : index
    %dim_47788 = tensor.dim %50252, %c1_47787 : tensor<4x?x4096xf16>
    %50253 = flow.tensor.transfer %50252 : tensor<4x?x4096xf16>{%dim_47788} to #hal.device.promise<@__device_7>
    %50254 = torch_c.from_builtin_tensor %50253 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47789 = torch.constant.int 1
    %50255 = torch.aten.add.Tensor %50236, %50239, %int1_47789 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47790 = torch.constant.int 1
    %50256 = torch.aten.add.Tensor %50255, %50242, %int1_47790 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47791 = torch.constant.int 1
    %50257 = torch.aten.add.Tensor %50256, %50245, %int1_47791 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47792 = torch.constant.int 1
    %50258 = torch.aten.add.Tensor %50257, %50248, %int1_47792 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47793 = torch.constant.int 1
    %50259 = torch.aten.add.Tensor %50258, %50251, %int1_47793 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47794 = torch.constant.int 1
    %50260 = torch.aten.add.Tensor %50259, %50254, %int1_47794 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47795 = torch.constant.int 1
    %50261 = torch.aten.add.Tensor %50260, %50037, %int1_47795 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47796 = torch.constant.int 1
    %50262 = torch.aten.add.Tensor %48921, %50065, %int1_47796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47797 = torch.constant.int 1
    %50263 = torch.aten.add.Tensor %48922, %50093, %int1_47797 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47798 = torch.constant.int 1
    %50264 = torch.aten.add.Tensor %48923, %50121, %int1_47798 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47799 = torch.constant.int 1
    %50265 = torch.aten.add.Tensor %48924, %50149, %int1_47799 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47800 = torch.constant.int 1
    %50266 = torch.aten.add.Tensor %48925, %50177, %int1_47800 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47801 = torch.constant.int 1
    %50267 = torch.aten.add.Tensor %48926, %50205, %int1_47801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47802 = torch.constant.int 1
    %50268 = torch.aten.add.Tensor %48927, %50233, %int1_47802 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47803 = torch.constant.int 1
    %50269 = torch.aten.add.Tensor %48928, %50261, %int1_47803 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_47804 = torch.constant.int 6
    %50270 = torch.prims.convert_element_type %50262, %int6_47804 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47805 = torch.constant.int 6
    %50271 = torch.prims.convert_element_type %50263, %int6_47805 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47806 = torch.constant.int 6
    %50272 = torch.prims.convert_element_type %50264, %int6_47806 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47807 = torch.constant.int 6
    %50273 = torch.prims.convert_element_type %50265, %int6_47807 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47808 = torch.constant.int 6
    %50274 = torch.prims.convert_element_type %50266, %int6_47808 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47809 = torch.constant.int 6
    %50275 = torch.prims.convert_element_type %50267, %int6_47809 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47810 = torch.constant.int 6
    %50276 = torch.prims.convert_element_type %50268, %int6_47810 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_47811 = torch.constant.int 6
    %50277 = torch.prims.convert_element_type %50269, %int6_47811 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47812 = torch.constant.int 2
    %50278 = torch.aten.pow.Tensor_Scalar %50270, %int2_47812 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47813 = torch.constant.int 2
    %50279 = torch.aten.pow.Tensor_Scalar %50271, %int2_47813 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47814 = torch.constant.int 2
    %50280 = torch.aten.pow.Tensor_Scalar %50272, %int2_47814 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47815 = torch.constant.int 2
    %50281 = torch.aten.pow.Tensor_Scalar %50273, %int2_47815 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47816 = torch.constant.int 2
    %50282 = torch.aten.pow.Tensor_Scalar %50274, %int2_47816 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47817 = torch.constant.int 2
    %50283 = torch.aten.pow.Tensor_Scalar %50275, %int2_47817 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47818 = torch.constant.int 2
    %50284 = torch.aten.pow.Tensor_Scalar %50276, %int2_47818 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_47819 = torch.constant.int 2
    %50285 = torch.aten.pow.Tensor_Scalar %50277, %int2_47819 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_47820 = torch.constant.int -1
    %50286 = torch.prim.ListConstruct %int-1_47820 : (!torch.int) -> !torch.list<int>
    %true_47821 = torch.constant.bool true
    %none_47822 = torch.constant.none
    %50287 = torch.aten.mean.dim %50278, %50286, %true_47821, %none_47822 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47823 = torch.constant.int -1
    %50288 = torch.prim.ListConstruct %int-1_47823 : (!torch.int) -> !torch.list<int>
    %true_47824 = torch.constant.bool true
    %none_47825 = torch.constant.none
    %50289 = torch.aten.mean.dim %50279, %50288, %true_47824, %none_47825 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47826 = torch.constant.int -1
    %50290 = torch.prim.ListConstruct %int-1_47826 : (!torch.int) -> !torch.list<int>
    %true_47827 = torch.constant.bool true
    %none_47828 = torch.constant.none
    %50291 = torch.aten.mean.dim %50280, %50290, %true_47827, %none_47828 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47829 = torch.constant.int -1
    %50292 = torch.prim.ListConstruct %int-1_47829 : (!torch.int) -> !torch.list<int>
    %true_47830 = torch.constant.bool true
    %none_47831 = torch.constant.none
    %50293 = torch.aten.mean.dim %50281, %50292, %true_47830, %none_47831 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47832 = torch.constant.int -1
    %50294 = torch.prim.ListConstruct %int-1_47832 : (!torch.int) -> !torch.list<int>
    %true_47833 = torch.constant.bool true
    %none_47834 = torch.constant.none
    %50295 = torch.aten.mean.dim %50282, %50294, %true_47833, %none_47834 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47835 = torch.constant.int -1
    %50296 = torch.prim.ListConstruct %int-1_47835 : (!torch.int) -> !torch.list<int>
    %true_47836 = torch.constant.bool true
    %none_47837 = torch.constant.none
    %50297 = torch.aten.mean.dim %50283, %50296, %true_47836, %none_47837 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47838 = torch.constant.int -1
    %50298 = torch.prim.ListConstruct %int-1_47838 : (!torch.int) -> !torch.list<int>
    %true_47839 = torch.constant.bool true
    %none_47840 = torch.constant.none
    %50299 = torch.aten.mean.dim %50284, %50298, %true_47839, %none_47840 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_47841 = torch.constant.int -1
    %50300 = torch.prim.ListConstruct %int-1_47841 : (!torch.int) -> !torch.list<int>
    %true_47842 = torch.constant.bool true
    %none_47843 = torch.constant.none
    %50301 = torch.aten.mean.dim %50285, %50300, %true_47842, %none_47843 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47844 = torch.constant.float 9.9999997473787516E-6
    %int1_47845 = torch.constant.int 1
    %50302 = torch.aten.add.Scalar %50287, %float9.999990e-06_47844, %int1_47845 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47846 = torch.constant.float 9.9999997473787516E-6
    %int1_47847 = torch.constant.int 1
    %50303 = torch.aten.add.Scalar %50289, %float9.999990e-06_47846, %int1_47847 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47848 = torch.constant.float 9.9999997473787516E-6
    %int1_47849 = torch.constant.int 1
    %50304 = torch.aten.add.Scalar %50291, %float9.999990e-06_47848, %int1_47849 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47850 = torch.constant.float 9.9999997473787516E-6
    %int1_47851 = torch.constant.int 1
    %50305 = torch.aten.add.Scalar %50293, %float9.999990e-06_47850, %int1_47851 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47852 = torch.constant.float 9.9999997473787516E-6
    %int1_47853 = torch.constant.int 1
    %50306 = torch.aten.add.Scalar %50295, %float9.999990e-06_47852, %int1_47853 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47854 = torch.constant.float 9.9999997473787516E-6
    %int1_47855 = torch.constant.int 1
    %50307 = torch.aten.add.Scalar %50297, %float9.999990e-06_47854, %int1_47855 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47856 = torch.constant.float 9.9999997473787516E-6
    %int1_47857 = torch.constant.int 1
    %50308 = torch.aten.add.Scalar %50299, %float9.999990e-06_47856, %int1_47857 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_47858 = torch.constant.float 9.9999997473787516E-6
    %int1_47859 = torch.constant.int 1
    %50309 = torch.aten.add.Scalar %50301, %float9.999990e-06_47858, %int1_47859 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50310 = torch.aten.rsqrt %50302 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50311 = torch.aten.rsqrt %50303 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50312 = torch.aten.rsqrt %50304 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50313 = torch.aten.rsqrt %50305 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50314 = torch.aten.rsqrt %50306 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50315 = torch.aten.rsqrt %50307 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50316 = torch.aten.rsqrt %50308 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50317 = torch.aten.rsqrt %50309 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50318 = torch.aten.mul.Tensor %50270, %50310 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50319 = torch.aten.mul.Tensor %50271, %50311 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50320 = torch.aten.mul.Tensor %50272, %50312 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50321 = torch.aten.mul.Tensor %50273, %50313 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50322 = torch.aten.mul.Tensor %50274, %50314 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50323 = torch.aten.mul.Tensor %50275, %50315 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50324 = torch.aten.mul.Tensor %50276, %50316 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50325 = torch.aten.mul.Tensor %50277, %50317 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50326 = torch.aten.mul.Tensor %1848, %50318 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50327 = torch.aten.mul.Tensor %1849, %50319 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50328 = torch.aten.mul.Tensor %1850, %50320 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50329 = torch.aten.mul.Tensor %1851, %50321 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50330 = torch.aten.mul.Tensor %1852, %50322 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50331 = torch.aten.mul.Tensor %1853, %50323 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50332 = torch.aten.mul.Tensor %1854, %50324 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50333 = torch.aten.mul.Tensor %1855, %50325 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_47860 = torch.constant.int 5
    %50334 = torch.prims.convert_element_type %50326, %int5_47860 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47861 = torch.constant.int 5
    %50335 = torch.prims.convert_element_type %50327, %int5_47861 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47862 = torch.constant.int 5
    %50336 = torch.prims.convert_element_type %50328, %int5_47862 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47863 = torch.constant.int 5
    %50337 = torch.prims.convert_element_type %50329, %int5_47863 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47864 = torch.constant.int 5
    %50338 = torch.prims.convert_element_type %50330, %int5_47864 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47865 = torch.constant.int 5
    %50339 = torch.prims.convert_element_type %50331, %int5_47865 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47866 = torch.constant.int 5
    %50340 = torch.prims.convert_element_type %50332, %int5_47866 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_47867 = torch.constant.int 5
    %50341 = torch.prims.convert_element_type %50333, %int5_47867 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47868 = torch.constant.int 1
    %int0_47869 = torch.constant.int 0
    %50342 = torch.prim.ListConstruct %int1_47868, %int0_47869 : (!torch.int, !torch.int) -> !torch.list<int>
    %50343 = torch.aten.permute %1856, %50342 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47870 = torch.constant.int 1
    %int0_47871 = torch.constant.int 0
    %50344 = torch.prim.ListConstruct %int1_47870, %int0_47871 : (!torch.int, !torch.int) -> !torch.list<int>
    %50345 = torch.aten.permute %1857, %50344 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47872 = torch.constant.int 1
    %int0_47873 = torch.constant.int 0
    %50346 = torch.prim.ListConstruct %int1_47872, %int0_47873 : (!torch.int, !torch.int) -> !torch.list<int>
    %50347 = torch.aten.permute %1858, %50346 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47874 = torch.constant.int 1
    %int0_47875 = torch.constant.int 0
    %50348 = torch.prim.ListConstruct %int1_47874, %int0_47875 : (!torch.int, !torch.int) -> !torch.list<int>
    %50349 = torch.aten.permute %1859, %50348 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47876 = torch.constant.int 1
    %int0_47877 = torch.constant.int 0
    %50350 = torch.prim.ListConstruct %int1_47876, %int0_47877 : (!torch.int, !torch.int) -> !torch.list<int>
    %50351 = torch.aten.permute %1860, %50350 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47878 = torch.constant.int 1
    %int0_47879 = torch.constant.int 0
    %50352 = torch.prim.ListConstruct %int1_47878, %int0_47879 : (!torch.int, !torch.int) -> !torch.list<int>
    %50353 = torch.aten.permute %1861, %50352 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47880 = torch.constant.int 1
    %int0_47881 = torch.constant.int 0
    %50354 = torch.prim.ListConstruct %int1_47880, %int0_47881 : (!torch.int, !torch.int) -> !torch.list<int>
    %50355 = torch.aten.permute %1862, %50354 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47882 = torch.constant.int 1
    %int0_47883 = torch.constant.int 0
    %50356 = torch.prim.ListConstruct %int1_47882, %int0_47883 : (!torch.int, !torch.int) -> !torch.list<int>
    %50357 = torch.aten.permute %1863, %50356 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_47884 = torch.constant.int 4
    %50358 = torch.aten.mul.int %int4_47884, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47885 = torch.constant.int 4096
    %50359 = torch.prim.ListConstruct %50358, %int4096_47885 : (!torch.int, !torch.int) -> !torch.list<int>
    %50360 = torch.aten.view %50334, %50359 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50360, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50361 = torch.aten.mm %50360, %50343 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50361, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47886 = torch.constant.int 4
    %int1792_47887 = torch.constant.int 1792
    %50362 = torch.prim.ListConstruct %int4_47886, %2482, %int1792_47887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50363 = torch.aten.view %50361, %50362 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47888 = torch.constant.int 4
    %50364 = torch.aten.mul.int %int4_47888, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47889 = torch.constant.int 4096
    %50365 = torch.prim.ListConstruct %50364, %int4096_47889 : (!torch.int, !torch.int) -> !torch.list<int>
    %50366 = torch.aten.view %50335, %50365 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50366, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50367 = torch.aten.mm %50366, %50345 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50367, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47890 = torch.constant.int 4
    %int1792_47891 = torch.constant.int 1792
    %50368 = torch.prim.ListConstruct %int4_47890, %2482, %int1792_47891 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50369 = torch.aten.view %50367, %50368 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47892 = torch.constant.int 4
    %50370 = torch.aten.mul.int %int4_47892, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47893 = torch.constant.int 4096
    %50371 = torch.prim.ListConstruct %50370, %int4096_47893 : (!torch.int, !torch.int) -> !torch.list<int>
    %50372 = torch.aten.view %50336, %50371 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50372, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50373 = torch.aten.mm %50372, %50347 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50373, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47894 = torch.constant.int 4
    %int1792_47895 = torch.constant.int 1792
    %50374 = torch.prim.ListConstruct %int4_47894, %2482, %int1792_47895 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50375 = torch.aten.view %50373, %50374 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47896 = torch.constant.int 4
    %50376 = torch.aten.mul.int %int4_47896, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47897 = torch.constant.int 4096
    %50377 = torch.prim.ListConstruct %50376, %int4096_47897 : (!torch.int, !torch.int) -> !torch.list<int>
    %50378 = torch.aten.view %50337, %50377 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50378, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50379 = torch.aten.mm %50378, %50349 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50379, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47898 = torch.constant.int 4
    %int1792_47899 = torch.constant.int 1792
    %50380 = torch.prim.ListConstruct %int4_47898, %2482, %int1792_47899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50381 = torch.aten.view %50379, %50380 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47900 = torch.constant.int 4
    %50382 = torch.aten.mul.int %int4_47900, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47901 = torch.constant.int 4096
    %50383 = torch.prim.ListConstruct %50382, %int4096_47901 : (!torch.int, !torch.int) -> !torch.list<int>
    %50384 = torch.aten.view %50338, %50383 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50384, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50385 = torch.aten.mm %50384, %50351 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50385, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47902 = torch.constant.int 4
    %int1792_47903 = torch.constant.int 1792
    %50386 = torch.prim.ListConstruct %int4_47902, %2482, %int1792_47903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50387 = torch.aten.view %50385, %50386 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47904 = torch.constant.int 4
    %50388 = torch.aten.mul.int %int4_47904, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47905 = torch.constant.int 4096
    %50389 = torch.prim.ListConstruct %50388, %int4096_47905 : (!torch.int, !torch.int) -> !torch.list<int>
    %50390 = torch.aten.view %50339, %50389 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50390, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50391 = torch.aten.mm %50390, %50353 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50391, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47906 = torch.constant.int 4
    %int1792_47907 = torch.constant.int 1792
    %50392 = torch.prim.ListConstruct %int4_47906, %2482, %int1792_47907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50393 = torch.aten.view %50391, %50392 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47908 = torch.constant.int 4
    %50394 = torch.aten.mul.int %int4_47908, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47909 = torch.constant.int 4096
    %50395 = torch.prim.ListConstruct %50394, %int4096_47909 : (!torch.int, !torch.int) -> !torch.list<int>
    %50396 = torch.aten.view %50340, %50395 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50396, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50397 = torch.aten.mm %50396, %50355 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50397, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47910 = torch.constant.int 4
    %int1792_47911 = torch.constant.int 1792
    %50398 = torch.prim.ListConstruct %int4_47910, %2482, %int1792_47911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50399 = torch.aten.view %50397, %50398 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47912 = torch.constant.int 4
    %50400 = torch.aten.mul.int %int4_47912, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47913 = torch.constant.int 4096
    %50401 = torch.prim.ListConstruct %50400, %int4096_47913 : (!torch.int, !torch.int) -> !torch.list<int>
    %50402 = torch.aten.view %50341, %50401 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50402, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50403 = torch.aten.mm %50402, %50357 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50403, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47914 = torch.constant.int 4
    %int1792_47915 = torch.constant.int 1792
    %50404 = torch.prim.ListConstruct %int4_47914, %2482, %int1792_47915 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50405 = torch.aten.view %50403, %50404 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50406 = torch.aten.silu %50363 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50407 = torch.aten.silu %50369 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50408 = torch.aten.silu %50375 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50409 = torch.aten.silu %50381 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50410 = torch.aten.silu %50387 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50411 = torch.aten.silu %50393 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50412 = torch.aten.silu %50399 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50413 = torch.aten.silu %50405 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_47916 = torch.constant.int 1
    %int0_47917 = torch.constant.int 0
    %50414 = torch.prim.ListConstruct %int1_47916, %int0_47917 : (!torch.int, !torch.int) -> !torch.list<int>
    %50415 = torch.aten.permute %1864, %50414 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47918 = torch.constant.int 1
    %int0_47919 = torch.constant.int 0
    %50416 = torch.prim.ListConstruct %int1_47918, %int0_47919 : (!torch.int, !torch.int) -> !torch.list<int>
    %50417 = torch.aten.permute %1865, %50416 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47920 = torch.constant.int 1
    %int0_47921 = torch.constant.int 0
    %50418 = torch.prim.ListConstruct %int1_47920, %int0_47921 : (!torch.int, !torch.int) -> !torch.list<int>
    %50419 = torch.aten.permute %1866, %50418 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47922 = torch.constant.int 1
    %int0_47923 = torch.constant.int 0
    %50420 = torch.prim.ListConstruct %int1_47922, %int0_47923 : (!torch.int, !torch.int) -> !torch.list<int>
    %50421 = torch.aten.permute %1867, %50420 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47924 = torch.constant.int 1
    %int0_47925 = torch.constant.int 0
    %50422 = torch.prim.ListConstruct %int1_47924, %int0_47925 : (!torch.int, !torch.int) -> !torch.list<int>
    %50423 = torch.aten.permute %1868, %50422 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47926 = torch.constant.int 1
    %int0_47927 = torch.constant.int 0
    %50424 = torch.prim.ListConstruct %int1_47926, %int0_47927 : (!torch.int, !torch.int) -> !torch.list<int>
    %50425 = torch.aten.permute %1869, %50424 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47928 = torch.constant.int 1
    %int0_47929 = torch.constant.int 0
    %50426 = torch.prim.ListConstruct %int1_47928, %int0_47929 : (!torch.int, !torch.int) -> !torch.list<int>
    %50427 = torch.aten.permute %1870, %50426 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_47930 = torch.constant.int 1
    %int0_47931 = torch.constant.int 0
    %50428 = torch.prim.ListConstruct %int1_47930, %int0_47931 : (!torch.int, !torch.int) -> !torch.list<int>
    %50429 = torch.aten.permute %1871, %50428 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_47932 = torch.constant.int 4
    %50430 = torch.aten.mul.int %int4_47932, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47933 = torch.constant.int 4096
    %50431 = torch.prim.ListConstruct %50430, %int4096_47933 : (!torch.int, !torch.int) -> !torch.list<int>
    %50432 = torch.aten.view %50334, %50431 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50432, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50433 = torch.aten.mm %50432, %50415 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50433, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47934 = torch.constant.int 4
    %int1792_47935 = torch.constant.int 1792
    %50434 = torch.prim.ListConstruct %int4_47934, %2482, %int1792_47935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50435 = torch.aten.view %50433, %50434 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47936 = torch.constant.int 4
    %50436 = torch.aten.mul.int %int4_47936, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47937 = torch.constant.int 4096
    %50437 = torch.prim.ListConstruct %50436, %int4096_47937 : (!torch.int, !torch.int) -> !torch.list<int>
    %50438 = torch.aten.view %50335, %50437 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50438, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50439 = torch.aten.mm %50438, %50417 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50439, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47938 = torch.constant.int 4
    %int1792_47939 = torch.constant.int 1792
    %50440 = torch.prim.ListConstruct %int4_47938, %2482, %int1792_47939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50441 = torch.aten.view %50439, %50440 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47940 = torch.constant.int 4
    %50442 = torch.aten.mul.int %int4_47940, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47941 = torch.constant.int 4096
    %50443 = torch.prim.ListConstruct %50442, %int4096_47941 : (!torch.int, !torch.int) -> !torch.list<int>
    %50444 = torch.aten.view %50336, %50443 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50444, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50445 = torch.aten.mm %50444, %50419 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50445, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47942 = torch.constant.int 4
    %int1792_47943 = torch.constant.int 1792
    %50446 = torch.prim.ListConstruct %int4_47942, %2482, %int1792_47943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50447 = torch.aten.view %50445, %50446 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47944 = torch.constant.int 4
    %50448 = torch.aten.mul.int %int4_47944, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47945 = torch.constant.int 4096
    %50449 = torch.prim.ListConstruct %50448, %int4096_47945 : (!torch.int, !torch.int) -> !torch.list<int>
    %50450 = torch.aten.view %50337, %50449 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50450, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50451 = torch.aten.mm %50450, %50421 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50451, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47946 = torch.constant.int 4
    %int1792_47947 = torch.constant.int 1792
    %50452 = torch.prim.ListConstruct %int4_47946, %2482, %int1792_47947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50453 = torch.aten.view %50451, %50452 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47948 = torch.constant.int 4
    %50454 = torch.aten.mul.int %int4_47948, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47949 = torch.constant.int 4096
    %50455 = torch.prim.ListConstruct %50454, %int4096_47949 : (!torch.int, !torch.int) -> !torch.list<int>
    %50456 = torch.aten.view %50338, %50455 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50456, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50457 = torch.aten.mm %50456, %50423 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50457, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47950 = torch.constant.int 4
    %int1792_47951 = torch.constant.int 1792
    %50458 = torch.prim.ListConstruct %int4_47950, %2482, %int1792_47951 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50459 = torch.aten.view %50457, %50458 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47952 = torch.constant.int 4
    %50460 = torch.aten.mul.int %int4_47952, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47953 = torch.constant.int 4096
    %50461 = torch.prim.ListConstruct %50460, %int4096_47953 : (!torch.int, !torch.int) -> !torch.list<int>
    %50462 = torch.aten.view %50339, %50461 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50462, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50463 = torch.aten.mm %50462, %50425 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50463, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47954 = torch.constant.int 4
    %int1792_47955 = torch.constant.int 1792
    %50464 = torch.prim.ListConstruct %int4_47954, %2482, %int1792_47955 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50465 = torch.aten.view %50463, %50464 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47956 = torch.constant.int 4
    %50466 = torch.aten.mul.int %int4_47956, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47957 = torch.constant.int 4096
    %50467 = torch.prim.ListConstruct %50466, %int4096_47957 : (!torch.int, !torch.int) -> !torch.list<int>
    %50468 = torch.aten.view %50340, %50467 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50468, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50469 = torch.aten.mm %50468, %50427 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50469, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47958 = torch.constant.int 4
    %int1792_47959 = torch.constant.int 1792
    %50470 = torch.prim.ListConstruct %int4_47958, %2482, %int1792_47959 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50471 = torch.aten.view %50469, %50470 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_47960 = torch.constant.int 4
    %50472 = torch.aten.mul.int %int4_47960, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_47961 = torch.constant.int 4096
    %50473 = torch.prim.ListConstruct %50472, %int4096_47961 : (!torch.int, !torch.int) -> !torch.list<int>
    %50474 = torch.aten.view %50341, %50473 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50474, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50475 = torch.aten.mm %50474, %50429 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50475, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_47962 = torch.constant.int 4
    %int1792_47963 = torch.constant.int 1792
    %50476 = torch.prim.ListConstruct %int4_47962, %2482, %int1792_47963 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50477 = torch.aten.view %50475, %50476 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50478 = torch.aten.mul.Tensor %50406, %50435 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50479 = torch.aten.mul.Tensor %50407, %50441 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50480 = torch.aten.mul.Tensor %50408, %50447 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50481 = torch.aten.mul.Tensor %50409, %50453 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50482 = torch.aten.mul.Tensor %50410, %50459 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50483 = torch.aten.mul.Tensor %50411, %50465 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50484 = torch.aten.mul.Tensor %50412, %50471 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %50485 = torch.aten.mul.Tensor %50413, %50477 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %50485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_47964 = torch.constant.int 1
    %int0_47965 = torch.constant.int 0
    %50486 = torch.prim.ListConstruct %int1_47964, %int0_47965 : (!torch.int, !torch.int) -> !torch.list<int>
    %50487 = torch.aten.permute %1872, %50486 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47966 = torch.constant.int 1
    %int0_47967 = torch.constant.int 0
    %50488 = torch.prim.ListConstruct %int1_47966, %int0_47967 : (!torch.int, !torch.int) -> !torch.list<int>
    %50489 = torch.aten.permute %1873, %50488 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47968 = torch.constant.int 1
    %int0_47969 = torch.constant.int 0
    %50490 = torch.prim.ListConstruct %int1_47968, %int0_47969 : (!torch.int, !torch.int) -> !torch.list<int>
    %50491 = torch.aten.permute %1874, %50490 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47970 = torch.constant.int 1
    %int0_47971 = torch.constant.int 0
    %50492 = torch.prim.ListConstruct %int1_47970, %int0_47971 : (!torch.int, !torch.int) -> !torch.list<int>
    %50493 = torch.aten.permute %1875, %50492 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47972 = torch.constant.int 1
    %int0_47973 = torch.constant.int 0
    %50494 = torch.prim.ListConstruct %int1_47972, %int0_47973 : (!torch.int, !torch.int) -> !torch.list<int>
    %50495 = torch.aten.permute %1876, %50494 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47974 = torch.constant.int 1
    %int0_47975 = torch.constant.int 0
    %50496 = torch.prim.ListConstruct %int1_47974, %int0_47975 : (!torch.int, !torch.int) -> !torch.list<int>
    %50497 = torch.aten.permute %1877, %50496 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47976 = torch.constant.int 1
    %int0_47977 = torch.constant.int 0
    %50498 = torch.prim.ListConstruct %int1_47976, %int0_47977 : (!torch.int, !torch.int) -> !torch.list<int>
    %50499 = torch.aten.permute %1878, %50498 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47978 = torch.constant.int 1
    %int0_47979 = torch.constant.int 0
    %50500 = torch.prim.ListConstruct %int1_47978, %int0_47979 : (!torch.int, !torch.int) -> !torch.list<int>
    %50501 = torch.aten.permute %1879, %50500 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_47980 = torch.constant.int 1
    %50502 = torch.aten.size.int %50363, %int1_47980 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_47981 = torch.constant.int 4
    %50503 = torch.aten.mul.int %int4_47981, %50502 : !torch.int, !torch.int -> !torch.int
    %int1792_47982 = torch.constant.int 1792
    %50504 = torch.prim.ListConstruct %50503, %int1792_47982 : (!torch.int, !torch.int) -> !torch.list<int>
    %50505 = torch.aten.view %50478, %50504 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50505, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50506 = torch.aten.mm %50505, %50487 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50506, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47983 = torch.constant.int 4
    %int4096_47984 = torch.constant.int 4096
    %50507 = torch.prim.ListConstruct %int4_47983, %50502, %int4096_47984 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50508 = torch.aten.view %50506, %50507 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47985 = torch.constant.int 1
    %50509 = torch.aten.size.int %50369, %int1_47985 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_47986 = torch.constant.int 4
    %50510 = torch.aten.mul.int %int4_47986, %50509 : !torch.int, !torch.int -> !torch.int
    %int1792_47987 = torch.constant.int 1792
    %50511 = torch.prim.ListConstruct %50510, %int1792_47987 : (!torch.int, !torch.int) -> !torch.list<int>
    %50512 = torch.aten.view %50479, %50511 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50512, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50513 = torch.aten.mm %50512, %50489 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50513, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47988 = torch.constant.int 4
    %int4096_47989 = torch.constant.int 4096
    %50514 = torch.prim.ListConstruct %int4_47988, %50509, %int4096_47989 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50515 = torch.aten.view %50513, %50514 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47990 = torch.constant.int 1
    %50516 = torch.aten.size.int %50375, %int1_47990 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_47991 = torch.constant.int 4
    %50517 = torch.aten.mul.int %int4_47991, %50516 : !torch.int, !torch.int -> !torch.int
    %int1792_47992 = torch.constant.int 1792
    %50518 = torch.prim.ListConstruct %50517, %int1792_47992 : (!torch.int, !torch.int) -> !torch.list<int>
    %50519 = torch.aten.view %50480, %50518 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50519, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50520 = torch.aten.mm %50519, %50491 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50520, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47993 = torch.constant.int 4
    %int4096_47994 = torch.constant.int 4096
    %50521 = torch.prim.ListConstruct %int4_47993, %50516, %int4096_47994 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50522 = torch.aten.view %50520, %50521 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_47995 = torch.constant.int 1
    %50523 = torch.aten.size.int %50381, %int1_47995 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_47996 = torch.constant.int 4
    %50524 = torch.aten.mul.int %int4_47996, %50523 : !torch.int, !torch.int -> !torch.int
    %int1792_47997 = torch.constant.int 1792
    %50525 = torch.prim.ListConstruct %50524, %int1792_47997 : (!torch.int, !torch.int) -> !torch.list<int>
    %50526 = torch.aten.view %50481, %50525 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50526, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50527 = torch.aten.mm %50526, %50493 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50527, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_47998 = torch.constant.int 4
    %int4096_47999 = torch.constant.int 4096
    %50528 = torch.prim.ListConstruct %int4_47998, %50523, %int4096_47999 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50529 = torch.aten.view %50527, %50528 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48000 = torch.constant.int 1
    %50530 = torch.aten.size.int %50387, %int1_48000 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_48001 = torch.constant.int 4
    %50531 = torch.aten.mul.int %int4_48001, %50530 : !torch.int, !torch.int -> !torch.int
    %int1792_48002 = torch.constant.int 1792
    %50532 = torch.prim.ListConstruct %50531, %int1792_48002 : (!torch.int, !torch.int) -> !torch.list<int>
    %50533 = torch.aten.view %50482, %50532 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50533, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50534 = torch.aten.mm %50533, %50495 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50534, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_48003 = torch.constant.int 4
    %int4096_48004 = torch.constant.int 4096
    %50535 = torch.prim.ListConstruct %int4_48003, %50530, %int4096_48004 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50536 = torch.aten.view %50534, %50535 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48005 = torch.constant.int 1
    %50537 = torch.aten.size.int %50393, %int1_48005 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_48006 = torch.constant.int 4
    %50538 = torch.aten.mul.int %int4_48006, %50537 : !torch.int, !torch.int -> !torch.int
    %int1792_48007 = torch.constant.int 1792
    %50539 = torch.prim.ListConstruct %50538, %int1792_48007 : (!torch.int, !torch.int) -> !torch.list<int>
    %50540 = torch.aten.view %50483, %50539 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50540, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50541 = torch.aten.mm %50540, %50497 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50541, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_48008 = torch.constant.int 4
    %int4096_48009 = torch.constant.int 4096
    %50542 = torch.prim.ListConstruct %int4_48008, %50537, %int4096_48009 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50543 = torch.aten.view %50541, %50542 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48010 = torch.constant.int 1
    %50544 = torch.aten.size.int %50399, %int1_48010 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_48011 = torch.constant.int 4
    %50545 = torch.aten.mul.int %int4_48011, %50544 : !torch.int, !torch.int -> !torch.int
    %int1792_48012 = torch.constant.int 1792
    %50546 = torch.prim.ListConstruct %50545, %int1792_48012 : (!torch.int, !torch.int) -> !torch.list<int>
    %50547 = torch.aten.view %50484, %50546 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50547, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50548 = torch.aten.mm %50547, %50499 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50548, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_48013 = torch.constant.int 4
    %int4096_48014 = torch.constant.int 4096
    %50549 = torch.prim.ListConstruct %int4_48013, %50544, %int4096_48014 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50550 = torch.aten.view %50548, %50549 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48015 = torch.constant.int 1
    %50551 = torch.aten.size.int %50405, %int1_48015 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_48016 = torch.constant.int 4
    %50552 = torch.aten.mul.int %int4_48016, %50551 : !torch.int, !torch.int -> !torch.int
    %int1792_48017 = torch.constant.int 1792
    %50553 = torch.prim.ListConstruct %50552, %int1792_48017 : (!torch.int, !torch.int) -> !torch.list<int>
    %50554 = torch.aten.view %50485, %50553 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %50554, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %50555 = torch.aten.mm %50554, %50501 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50555, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_48018 = torch.constant.int 4
    %int4096_48019 = torch.constant.int 4096
    %50556 = torch.prim.ListConstruct %int4_48018, %50551, %int4096_48019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50557 = torch.aten.view %50555, %50556 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50558 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48020 = arith.constant 1 : index
    %dim_48021 = tensor.dim %50558, %c1_48020 : tensor<4x?x4096xf16>
    %50559 = flow.tensor.transfer %50558 : tensor<4x?x4096xf16>{%dim_48021} to #hal.device.promise<@__device_0>
    %50560 = torch_c.from_builtin_tensor %50559 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50561 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48022 = arith.constant 1 : index
    %dim_48023 = tensor.dim %50561, %c1_48022 : tensor<4x?x4096xf16>
    %50562 = flow.tensor.transfer %50561 : tensor<4x?x4096xf16>{%dim_48023} to #hal.device.promise<@__device_0>
    %50563 = torch_c.from_builtin_tensor %50562 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50564 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48024 = arith.constant 1 : index
    %dim_48025 = tensor.dim %50564, %c1_48024 : tensor<4x?x4096xf16>
    %50565 = flow.tensor.transfer %50564 : tensor<4x?x4096xf16>{%dim_48025} to #hal.device.promise<@__device_0>
    %50566 = torch_c.from_builtin_tensor %50565 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50567 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48026 = arith.constant 1 : index
    %dim_48027 = tensor.dim %50567, %c1_48026 : tensor<4x?x4096xf16>
    %50568 = flow.tensor.transfer %50567 : tensor<4x?x4096xf16>{%dim_48027} to #hal.device.promise<@__device_0>
    %50569 = torch_c.from_builtin_tensor %50568 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50570 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48028 = arith.constant 1 : index
    %dim_48029 = tensor.dim %50570, %c1_48028 : tensor<4x?x4096xf16>
    %50571 = flow.tensor.transfer %50570 : tensor<4x?x4096xf16>{%dim_48029} to #hal.device.promise<@__device_0>
    %50572 = torch_c.from_builtin_tensor %50571 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50573 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48030 = arith.constant 1 : index
    %dim_48031 = tensor.dim %50573, %c1_48030 : tensor<4x?x4096xf16>
    %50574 = flow.tensor.transfer %50573 : tensor<4x?x4096xf16>{%dim_48031} to #hal.device.promise<@__device_0>
    %50575 = torch_c.from_builtin_tensor %50574 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50576 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48032 = arith.constant 1 : index
    %dim_48033 = tensor.dim %50576, %c1_48032 : tensor<4x?x4096xf16>
    %50577 = flow.tensor.transfer %50576 : tensor<4x?x4096xf16>{%dim_48033} to #hal.device.promise<@__device_0>
    %50578 = torch_c.from_builtin_tensor %50577 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48034 = torch.constant.int 1
    %50579 = torch.aten.add.Tensor %50508, %50560, %int1_48034 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48035 = torch.constant.int 1
    %50580 = torch.aten.add.Tensor %50579, %50563, %int1_48035 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48036 = torch.constant.int 1
    %50581 = torch.aten.add.Tensor %50580, %50566, %int1_48036 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48037 = torch.constant.int 1
    %50582 = torch.aten.add.Tensor %50581, %50569, %int1_48037 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48038 = torch.constant.int 1
    %50583 = torch.aten.add.Tensor %50582, %50572, %int1_48038 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48039 = torch.constant.int 1
    %50584 = torch.aten.add.Tensor %50583, %50575, %int1_48039 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48040 = torch.constant.int 1
    %50585 = torch.aten.add.Tensor %50584, %50578, %int1_48040 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50586 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48041 = arith.constant 1 : index
    %dim_48042 = tensor.dim %50586, %c1_48041 : tensor<4x?x4096xf16>
    %50587 = flow.tensor.transfer %50586 : tensor<4x?x4096xf16>{%dim_48042} to #hal.device.promise<@__device_1>
    %50588 = torch_c.from_builtin_tensor %50587 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50589 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48043 = arith.constant 1 : index
    %dim_48044 = tensor.dim %50589, %c1_48043 : tensor<4x?x4096xf16>
    %50590 = flow.tensor.transfer %50589 : tensor<4x?x4096xf16>{%dim_48044} to #hal.device.promise<@__device_1>
    %50591 = torch_c.from_builtin_tensor %50590 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50592 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48045 = arith.constant 1 : index
    %dim_48046 = tensor.dim %50592, %c1_48045 : tensor<4x?x4096xf16>
    %50593 = flow.tensor.transfer %50592 : tensor<4x?x4096xf16>{%dim_48046} to #hal.device.promise<@__device_1>
    %50594 = torch_c.from_builtin_tensor %50593 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50595 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48047 = arith.constant 1 : index
    %dim_48048 = tensor.dim %50595, %c1_48047 : tensor<4x?x4096xf16>
    %50596 = flow.tensor.transfer %50595 : tensor<4x?x4096xf16>{%dim_48048} to #hal.device.promise<@__device_1>
    %50597 = torch_c.from_builtin_tensor %50596 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50598 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48049 = arith.constant 1 : index
    %dim_48050 = tensor.dim %50598, %c1_48049 : tensor<4x?x4096xf16>
    %50599 = flow.tensor.transfer %50598 : tensor<4x?x4096xf16>{%dim_48050} to #hal.device.promise<@__device_1>
    %50600 = torch_c.from_builtin_tensor %50599 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50601 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48051 = arith.constant 1 : index
    %dim_48052 = tensor.dim %50601, %c1_48051 : tensor<4x?x4096xf16>
    %50602 = flow.tensor.transfer %50601 : tensor<4x?x4096xf16>{%dim_48052} to #hal.device.promise<@__device_1>
    %50603 = torch_c.from_builtin_tensor %50602 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50604 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48053 = arith.constant 1 : index
    %dim_48054 = tensor.dim %50604, %c1_48053 : tensor<4x?x4096xf16>
    %50605 = flow.tensor.transfer %50604 : tensor<4x?x4096xf16>{%dim_48054} to #hal.device.promise<@__device_1>
    %50606 = torch_c.from_builtin_tensor %50605 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48055 = torch.constant.int 1
    %50607 = torch.aten.add.Tensor %50588, %50515, %int1_48055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48056 = torch.constant.int 1
    %50608 = torch.aten.add.Tensor %50607, %50591, %int1_48056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48057 = torch.constant.int 1
    %50609 = torch.aten.add.Tensor %50608, %50594, %int1_48057 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48058 = torch.constant.int 1
    %50610 = torch.aten.add.Tensor %50609, %50597, %int1_48058 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48059 = torch.constant.int 1
    %50611 = torch.aten.add.Tensor %50610, %50600, %int1_48059 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48060 = torch.constant.int 1
    %50612 = torch.aten.add.Tensor %50611, %50603, %int1_48060 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48061 = torch.constant.int 1
    %50613 = torch.aten.add.Tensor %50612, %50606, %int1_48061 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50614 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48062 = arith.constant 1 : index
    %dim_48063 = tensor.dim %50614, %c1_48062 : tensor<4x?x4096xf16>
    %50615 = flow.tensor.transfer %50614 : tensor<4x?x4096xf16>{%dim_48063} to #hal.device.promise<@__device_2>
    %50616 = torch_c.from_builtin_tensor %50615 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50617 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48064 = arith.constant 1 : index
    %dim_48065 = tensor.dim %50617, %c1_48064 : tensor<4x?x4096xf16>
    %50618 = flow.tensor.transfer %50617 : tensor<4x?x4096xf16>{%dim_48065} to #hal.device.promise<@__device_2>
    %50619 = torch_c.from_builtin_tensor %50618 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50620 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48066 = arith.constant 1 : index
    %dim_48067 = tensor.dim %50620, %c1_48066 : tensor<4x?x4096xf16>
    %50621 = flow.tensor.transfer %50620 : tensor<4x?x4096xf16>{%dim_48067} to #hal.device.promise<@__device_2>
    %50622 = torch_c.from_builtin_tensor %50621 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50623 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48068 = arith.constant 1 : index
    %dim_48069 = tensor.dim %50623, %c1_48068 : tensor<4x?x4096xf16>
    %50624 = flow.tensor.transfer %50623 : tensor<4x?x4096xf16>{%dim_48069} to #hal.device.promise<@__device_2>
    %50625 = torch_c.from_builtin_tensor %50624 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50626 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48070 = arith.constant 1 : index
    %dim_48071 = tensor.dim %50626, %c1_48070 : tensor<4x?x4096xf16>
    %50627 = flow.tensor.transfer %50626 : tensor<4x?x4096xf16>{%dim_48071} to #hal.device.promise<@__device_2>
    %50628 = torch_c.from_builtin_tensor %50627 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50629 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48072 = arith.constant 1 : index
    %dim_48073 = tensor.dim %50629, %c1_48072 : tensor<4x?x4096xf16>
    %50630 = flow.tensor.transfer %50629 : tensor<4x?x4096xf16>{%dim_48073} to #hal.device.promise<@__device_2>
    %50631 = torch_c.from_builtin_tensor %50630 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50632 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48074 = arith.constant 1 : index
    %dim_48075 = tensor.dim %50632, %c1_48074 : tensor<4x?x4096xf16>
    %50633 = flow.tensor.transfer %50632 : tensor<4x?x4096xf16>{%dim_48075} to #hal.device.promise<@__device_2>
    %50634 = torch_c.from_builtin_tensor %50633 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48076 = torch.constant.int 1
    %50635 = torch.aten.add.Tensor %50616, %50619, %int1_48076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48077 = torch.constant.int 1
    %50636 = torch.aten.add.Tensor %50635, %50522, %int1_48077 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48078 = torch.constant.int 1
    %50637 = torch.aten.add.Tensor %50636, %50622, %int1_48078 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48079 = torch.constant.int 1
    %50638 = torch.aten.add.Tensor %50637, %50625, %int1_48079 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48080 = torch.constant.int 1
    %50639 = torch.aten.add.Tensor %50638, %50628, %int1_48080 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48081 = torch.constant.int 1
    %50640 = torch.aten.add.Tensor %50639, %50631, %int1_48081 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48082 = torch.constant.int 1
    %50641 = torch.aten.add.Tensor %50640, %50634, %int1_48082 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50642 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48083 = arith.constant 1 : index
    %dim_48084 = tensor.dim %50642, %c1_48083 : tensor<4x?x4096xf16>
    %50643 = flow.tensor.transfer %50642 : tensor<4x?x4096xf16>{%dim_48084} to #hal.device.promise<@__device_3>
    %50644 = torch_c.from_builtin_tensor %50643 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50645 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48085 = arith.constant 1 : index
    %dim_48086 = tensor.dim %50645, %c1_48085 : tensor<4x?x4096xf16>
    %50646 = flow.tensor.transfer %50645 : tensor<4x?x4096xf16>{%dim_48086} to #hal.device.promise<@__device_3>
    %50647 = torch_c.from_builtin_tensor %50646 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50648 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48087 = arith.constant 1 : index
    %dim_48088 = tensor.dim %50648, %c1_48087 : tensor<4x?x4096xf16>
    %50649 = flow.tensor.transfer %50648 : tensor<4x?x4096xf16>{%dim_48088} to #hal.device.promise<@__device_3>
    %50650 = torch_c.from_builtin_tensor %50649 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50651 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48089 = arith.constant 1 : index
    %dim_48090 = tensor.dim %50651, %c1_48089 : tensor<4x?x4096xf16>
    %50652 = flow.tensor.transfer %50651 : tensor<4x?x4096xf16>{%dim_48090} to #hal.device.promise<@__device_3>
    %50653 = torch_c.from_builtin_tensor %50652 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50654 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48091 = arith.constant 1 : index
    %dim_48092 = tensor.dim %50654, %c1_48091 : tensor<4x?x4096xf16>
    %50655 = flow.tensor.transfer %50654 : tensor<4x?x4096xf16>{%dim_48092} to #hal.device.promise<@__device_3>
    %50656 = torch_c.from_builtin_tensor %50655 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50657 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48093 = arith.constant 1 : index
    %dim_48094 = tensor.dim %50657, %c1_48093 : tensor<4x?x4096xf16>
    %50658 = flow.tensor.transfer %50657 : tensor<4x?x4096xf16>{%dim_48094} to #hal.device.promise<@__device_3>
    %50659 = torch_c.from_builtin_tensor %50658 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50660 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48095 = arith.constant 1 : index
    %dim_48096 = tensor.dim %50660, %c1_48095 : tensor<4x?x4096xf16>
    %50661 = flow.tensor.transfer %50660 : tensor<4x?x4096xf16>{%dim_48096} to #hal.device.promise<@__device_3>
    %50662 = torch_c.from_builtin_tensor %50661 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48097 = torch.constant.int 1
    %50663 = torch.aten.add.Tensor %50644, %50647, %int1_48097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48098 = torch.constant.int 1
    %50664 = torch.aten.add.Tensor %50663, %50650, %int1_48098 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48099 = torch.constant.int 1
    %50665 = torch.aten.add.Tensor %50664, %50529, %int1_48099 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48100 = torch.constant.int 1
    %50666 = torch.aten.add.Tensor %50665, %50653, %int1_48100 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48101 = torch.constant.int 1
    %50667 = torch.aten.add.Tensor %50666, %50656, %int1_48101 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48102 = torch.constant.int 1
    %50668 = torch.aten.add.Tensor %50667, %50659, %int1_48102 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48103 = torch.constant.int 1
    %50669 = torch.aten.add.Tensor %50668, %50662, %int1_48103 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50670 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48104 = arith.constant 1 : index
    %dim_48105 = tensor.dim %50670, %c1_48104 : tensor<4x?x4096xf16>
    %50671 = flow.tensor.transfer %50670 : tensor<4x?x4096xf16>{%dim_48105} to #hal.device.promise<@__device_4>
    %50672 = torch_c.from_builtin_tensor %50671 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50673 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48106 = arith.constant 1 : index
    %dim_48107 = tensor.dim %50673, %c1_48106 : tensor<4x?x4096xf16>
    %50674 = flow.tensor.transfer %50673 : tensor<4x?x4096xf16>{%dim_48107} to #hal.device.promise<@__device_4>
    %50675 = torch_c.from_builtin_tensor %50674 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50676 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48108 = arith.constant 1 : index
    %dim_48109 = tensor.dim %50676, %c1_48108 : tensor<4x?x4096xf16>
    %50677 = flow.tensor.transfer %50676 : tensor<4x?x4096xf16>{%dim_48109} to #hal.device.promise<@__device_4>
    %50678 = torch_c.from_builtin_tensor %50677 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50679 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48110 = arith.constant 1 : index
    %dim_48111 = tensor.dim %50679, %c1_48110 : tensor<4x?x4096xf16>
    %50680 = flow.tensor.transfer %50679 : tensor<4x?x4096xf16>{%dim_48111} to #hal.device.promise<@__device_4>
    %50681 = torch_c.from_builtin_tensor %50680 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50682 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48112 = arith.constant 1 : index
    %dim_48113 = tensor.dim %50682, %c1_48112 : tensor<4x?x4096xf16>
    %50683 = flow.tensor.transfer %50682 : tensor<4x?x4096xf16>{%dim_48113} to #hal.device.promise<@__device_4>
    %50684 = torch_c.from_builtin_tensor %50683 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50685 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48114 = arith.constant 1 : index
    %dim_48115 = tensor.dim %50685, %c1_48114 : tensor<4x?x4096xf16>
    %50686 = flow.tensor.transfer %50685 : tensor<4x?x4096xf16>{%dim_48115} to #hal.device.promise<@__device_4>
    %50687 = torch_c.from_builtin_tensor %50686 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50688 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48116 = arith.constant 1 : index
    %dim_48117 = tensor.dim %50688, %c1_48116 : tensor<4x?x4096xf16>
    %50689 = flow.tensor.transfer %50688 : tensor<4x?x4096xf16>{%dim_48117} to #hal.device.promise<@__device_4>
    %50690 = torch_c.from_builtin_tensor %50689 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48118 = torch.constant.int 1
    %50691 = torch.aten.add.Tensor %50672, %50675, %int1_48118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48119 = torch.constant.int 1
    %50692 = torch.aten.add.Tensor %50691, %50678, %int1_48119 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48120 = torch.constant.int 1
    %50693 = torch.aten.add.Tensor %50692, %50681, %int1_48120 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48121 = torch.constant.int 1
    %50694 = torch.aten.add.Tensor %50693, %50536, %int1_48121 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48122 = torch.constant.int 1
    %50695 = torch.aten.add.Tensor %50694, %50684, %int1_48122 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48123 = torch.constant.int 1
    %50696 = torch.aten.add.Tensor %50695, %50687, %int1_48123 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48124 = torch.constant.int 1
    %50697 = torch.aten.add.Tensor %50696, %50690, %int1_48124 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50698 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48125 = arith.constant 1 : index
    %dim_48126 = tensor.dim %50698, %c1_48125 : tensor<4x?x4096xf16>
    %50699 = flow.tensor.transfer %50698 : tensor<4x?x4096xf16>{%dim_48126} to #hal.device.promise<@__device_5>
    %50700 = torch_c.from_builtin_tensor %50699 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50701 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48127 = arith.constant 1 : index
    %dim_48128 = tensor.dim %50701, %c1_48127 : tensor<4x?x4096xf16>
    %50702 = flow.tensor.transfer %50701 : tensor<4x?x4096xf16>{%dim_48128} to #hal.device.promise<@__device_5>
    %50703 = torch_c.from_builtin_tensor %50702 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50704 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48129 = arith.constant 1 : index
    %dim_48130 = tensor.dim %50704, %c1_48129 : tensor<4x?x4096xf16>
    %50705 = flow.tensor.transfer %50704 : tensor<4x?x4096xf16>{%dim_48130} to #hal.device.promise<@__device_5>
    %50706 = torch_c.from_builtin_tensor %50705 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50707 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48131 = arith.constant 1 : index
    %dim_48132 = tensor.dim %50707, %c1_48131 : tensor<4x?x4096xf16>
    %50708 = flow.tensor.transfer %50707 : tensor<4x?x4096xf16>{%dim_48132} to #hal.device.promise<@__device_5>
    %50709 = torch_c.from_builtin_tensor %50708 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50710 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48133 = arith.constant 1 : index
    %dim_48134 = tensor.dim %50710, %c1_48133 : tensor<4x?x4096xf16>
    %50711 = flow.tensor.transfer %50710 : tensor<4x?x4096xf16>{%dim_48134} to #hal.device.promise<@__device_5>
    %50712 = torch_c.from_builtin_tensor %50711 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50713 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48135 = arith.constant 1 : index
    %dim_48136 = tensor.dim %50713, %c1_48135 : tensor<4x?x4096xf16>
    %50714 = flow.tensor.transfer %50713 : tensor<4x?x4096xf16>{%dim_48136} to #hal.device.promise<@__device_5>
    %50715 = torch_c.from_builtin_tensor %50714 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50716 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48137 = arith.constant 1 : index
    %dim_48138 = tensor.dim %50716, %c1_48137 : tensor<4x?x4096xf16>
    %50717 = flow.tensor.transfer %50716 : tensor<4x?x4096xf16>{%dim_48138} to #hal.device.promise<@__device_5>
    %50718 = torch_c.from_builtin_tensor %50717 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48139 = torch.constant.int 1
    %50719 = torch.aten.add.Tensor %50700, %50703, %int1_48139 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48140 = torch.constant.int 1
    %50720 = torch.aten.add.Tensor %50719, %50706, %int1_48140 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48141 = torch.constant.int 1
    %50721 = torch.aten.add.Tensor %50720, %50709, %int1_48141 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48142 = torch.constant.int 1
    %50722 = torch.aten.add.Tensor %50721, %50712, %int1_48142 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48143 = torch.constant.int 1
    %50723 = torch.aten.add.Tensor %50722, %50543, %int1_48143 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48144 = torch.constant.int 1
    %50724 = torch.aten.add.Tensor %50723, %50715, %int1_48144 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48145 = torch.constant.int 1
    %50725 = torch.aten.add.Tensor %50724, %50718, %int1_48145 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50726 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48146 = arith.constant 1 : index
    %dim_48147 = tensor.dim %50726, %c1_48146 : tensor<4x?x4096xf16>
    %50727 = flow.tensor.transfer %50726 : tensor<4x?x4096xf16>{%dim_48147} to #hal.device.promise<@__device_6>
    %50728 = torch_c.from_builtin_tensor %50727 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50729 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48148 = arith.constant 1 : index
    %dim_48149 = tensor.dim %50729, %c1_48148 : tensor<4x?x4096xf16>
    %50730 = flow.tensor.transfer %50729 : tensor<4x?x4096xf16>{%dim_48149} to #hal.device.promise<@__device_6>
    %50731 = torch_c.from_builtin_tensor %50730 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50732 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48150 = arith.constant 1 : index
    %dim_48151 = tensor.dim %50732, %c1_48150 : tensor<4x?x4096xf16>
    %50733 = flow.tensor.transfer %50732 : tensor<4x?x4096xf16>{%dim_48151} to #hal.device.promise<@__device_6>
    %50734 = torch_c.from_builtin_tensor %50733 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50734, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50735 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48152 = arith.constant 1 : index
    %dim_48153 = tensor.dim %50735, %c1_48152 : tensor<4x?x4096xf16>
    %50736 = flow.tensor.transfer %50735 : tensor<4x?x4096xf16>{%dim_48153} to #hal.device.promise<@__device_6>
    %50737 = torch_c.from_builtin_tensor %50736 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50738 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48154 = arith.constant 1 : index
    %dim_48155 = tensor.dim %50738, %c1_48154 : tensor<4x?x4096xf16>
    %50739 = flow.tensor.transfer %50738 : tensor<4x?x4096xf16>{%dim_48155} to #hal.device.promise<@__device_6>
    %50740 = torch_c.from_builtin_tensor %50739 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50741 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48156 = arith.constant 1 : index
    %dim_48157 = tensor.dim %50741, %c1_48156 : tensor<4x?x4096xf16>
    %50742 = flow.tensor.transfer %50741 : tensor<4x?x4096xf16>{%dim_48157} to #hal.device.promise<@__device_6>
    %50743 = torch_c.from_builtin_tensor %50742 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50744 = torch_c.to_builtin_tensor %50557 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48158 = arith.constant 1 : index
    %dim_48159 = tensor.dim %50744, %c1_48158 : tensor<4x?x4096xf16>
    %50745 = flow.tensor.transfer %50744 : tensor<4x?x4096xf16>{%dim_48159} to #hal.device.promise<@__device_6>
    %50746 = torch_c.from_builtin_tensor %50745 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48160 = torch.constant.int 1
    %50747 = torch.aten.add.Tensor %50728, %50731, %int1_48160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48161 = torch.constant.int 1
    %50748 = torch.aten.add.Tensor %50747, %50734, %int1_48161 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48162 = torch.constant.int 1
    %50749 = torch.aten.add.Tensor %50748, %50737, %int1_48162 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48163 = torch.constant.int 1
    %50750 = torch.aten.add.Tensor %50749, %50740, %int1_48163 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48164 = torch.constant.int 1
    %50751 = torch.aten.add.Tensor %50750, %50743, %int1_48164 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48165 = torch.constant.int 1
    %50752 = torch.aten.add.Tensor %50751, %50550, %int1_48165 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48166 = torch.constant.int 1
    %50753 = torch.aten.add.Tensor %50752, %50746, %int1_48166 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50754 = torch_c.to_builtin_tensor %50508 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48167 = arith.constant 1 : index
    %dim_48168 = tensor.dim %50754, %c1_48167 : tensor<4x?x4096xf16>
    %50755 = flow.tensor.transfer %50754 : tensor<4x?x4096xf16>{%dim_48168} to #hal.device.promise<@__device_7>
    %50756 = torch_c.from_builtin_tensor %50755 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50757 = torch_c.to_builtin_tensor %50515 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48169 = arith.constant 1 : index
    %dim_48170 = tensor.dim %50757, %c1_48169 : tensor<4x?x4096xf16>
    %50758 = flow.tensor.transfer %50757 : tensor<4x?x4096xf16>{%dim_48170} to #hal.device.promise<@__device_7>
    %50759 = torch_c.from_builtin_tensor %50758 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50760 = torch_c.to_builtin_tensor %50522 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48171 = arith.constant 1 : index
    %dim_48172 = tensor.dim %50760, %c1_48171 : tensor<4x?x4096xf16>
    %50761 = flow.tensor.transfer %50760 : tensor<4x?x4096xf16>{%dim_48172} to #hal.device.promise<@__device_7>
    %50762 = torch_c.from_builtin_tensor %50761 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50763 = torch_c.to_builtin_tensor %50529 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48173 = arith.constant 1 : index
    %dim_48174 = tensor.dim %50763, %c1_48173 : tensor<4x?x4096xf16>
    %50764 = flow.tensor.transfer %50763 : tensor<4x?x4096xf16>{%dim_48174} to #hal.device.promise<@__device_7>
    %50765 = torch_c.from_builtin_tensor %50764 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50766 = torch_c.to_builtin_tensor %50536 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48175 = arith.constant 1 : index
    %dim_48176 = tensor.dim %50766, %c1_48175 : tensor<4x?x4096xf16>
    %50767 = flow.tensor.transfer %50766 : tensor<4x?x4096xf16>{%dim_48176} to #hal.device.promise<@__device_7>
    %50768 = torch_c.from_builtin_tensor %50767 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50769 = torch_c.to_builtin_tensor %50543 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48177 = arith.constant 1 : index
    %dim_48178 = tensor.dim %50769, %c1_48177 : tensor<4x?x4096xf16>
    %50770 = flow.tensor.transfer %50769 : tensor<4x?x4096xf16>{%dim_48178} to #hal.device.promise<@__device_7>
    %50771 = torch_c.from_builtin_tensor %50770 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %50772 = torch_c.to_builtin_tensor %50550 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_48179 = arith.constant 1 : index
    %dim_48180 = tensor.dim %50772, %c1_48179 : tensor<4x?x4096xf16>
    %50773 = flow.tensor.transfer %50772 : tensor<4x?x4096xf16>{%dim_48180} to #hal.device.promise<@__device_7>
    %50774 = torch_c.from_builtin_tensor %50773 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48181 = torch.constant.int 1
    %50775 = torch.aten.add.Tensor %50756, %50759, %int1_48181 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48182 = torch.constant.int 1
    %50776 = torch.aten.add.Tensor %50775, %50762, %int1_48182 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48183 = torch.constant.int 1
    %50777 = torch.aten.add.Tensor %50776, %50765, %int1_48183 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48184 = torch.constant.int 1
    %50778 = torch.aten.add.Tensor %50777, %50768, %int1_48184 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48185 = torch.constant.int 1
    %50779 = torch.aten.add.Tensor %50778, %50771, %int1_48185 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48186 = torch.constant.int 1
    %50780 = torch.aten.add.Tensor %50779, %50774, %int1_48186 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48187 = torch.constant.int 1
    %50781 = torch.aten.add.Tensor %50780, %50557, %int1_48187 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48188 = torch.constant.int 1
    %50782 = torch.aten.add.Tensor %50262, %50585, %int1_48188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48189 = torch.constant.int 1
    %50783 = torch.aten.add.Tensor %50263, %50613, %int1_48189 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48190 = torch.constant.int 1
    %50784 = torch.aten.add.Tensor %50264, %50641, %int1_48190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48191 = torch.constant.int 1
    %50785 = torch.aten.add.Tensor %50265, %50669, %int1_48191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48192 = torch.constant.int 1
    %50786 = torch.aten.add.Tensor %50266, %50697, %int1_48192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48193 = torch.constant.int 1
    %50787 = torch.aten.add.Tensor %50267, %50725, %int1_48193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48194 = torch.constant.int 1
    %50788 = torch.aten.add.Tensor %50268, %50753, %int1_48194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48195 = torch.constant.int 1
    %50789 = torch.aten.add.Tensor %50269, %50781, %int1_48195 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_48196 = torch.constant.int 6
    %50790 = torch.prims.convert_element_type %50782, %int6_48196 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48197 = torch.constant.int 6
    %50791 = torch.prims.convert_element_type %50783, %int6_48197 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48198 = torch.constant.int 6
    %50792 = torch.prims.convert_element_type %50784, %int6_48198 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48199 = torch.constant.int 6
    %50793 = torch.prims.convert_element_type %50785, %int6_48199 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48200 = torch.constant.int 6
    %50794 = torch.prims.convert_element_type %50786, %int6_48200 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48201 = torch.constant.int 6
    %50795 = torch.prims.convert_element_type %50787, %int6_48201 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48202 = torch.constant.int 6
    %50796 = torch.prims.convert_element_type %50788, %int6_48202 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_48203 = torch.constant.int 6
    %50797 = torch.prims.convert_element_type %50789, %int6_48203 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48204 = torch.constant.int 2
    %50798 = torch.aten.pow.Tensor_Scalar %50790, %int2_48204 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50798, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48205 = torch.constant.int 2
    %50799 = torch.aten.pow.Tensor_Scalar %50791, %int2_48205 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48206 = torch.constant.int 2
    %50800 = torch.aten.pow.Tensor_Scalar %50792, %int2_48206 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48207 = torch.constant.int 2
    %50801 = torch.aten.pow.Tensor_Scalar %50793, %int2_48207 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48208 = torch.constant.int 2
    %50802 = torch.aten.pow.Tensor_Scalar %50794, %int2_48208 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48209 = torch.constant.int 2
    %50803 = torch.aten.pow.Tensor_Scalar %50795, %int2_48209 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48210 = torch.constant.int 2
    %50804 = torch.aten.pow.Tensor_Scalar %50796, %int2_48210 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_48211 = torch.constant.int 2
    %50805 = torch.aten.pow.Tensor_Scalar %50797, %int2_48211 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_48212 = torch.constant.int -1
    %50806 = torch.prim.ListConstruct %int-1_48212 : (!torch.int) -> !torch.list<int>
    %true_48213 = torch.constant.bool true
    %none_48214 = torch.constant.none
    %50807 = torch.aten.mean.dim %50798, %50806, %true_48213, %none_48214 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48215 = torch.constant.int -1
    %50808 = torch.prim.ListConstruct %int-1_48215 : (!torch.int) -> !torch.list<int>
    %true_48216 = torch.constant.bool true
    %none_48217 = torch.constant.none
    %50809 = torch.aten.mean.dim %50799, %50808, %true_48216, %none_48217 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48218 = torch.constant.int -1
    %50810 = torch.prim.ListConstruct %int-1_48218 : (!torch.int) -> !torch.list<int>
    %true_48219 = torch.constant.bool true
    %none_48220 = torch.constant.none
    %50811 = torch.aten.mean.dim %50800, %50810, %true_48219, %none_48220 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48221 = torch.constant.int -1
    %50812 = torch.prim.ListConstruct %int-1_48221 : (!torch.int) -> !torch.list<int>
    %true_48222 = torch.constant.bool true
    %none_48223 = torch.constant.none
    %50813 = torch.aten.mean.dim %50801, %50812, %true_48222, %none_48223 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48224 = torch.constant.int -1
    %50814 = torch.prim.ListConstruct %int-1_48224 : (!torch.int) -> !torch.list<int>
    %true_48225 = torch.constant.bool true
    %none_48226 = torch.constant.none
    %50815 = torch.aten.mean.dim %50802, %50814, %true_48225, %none_48226 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48227 = torch.constant.int -1
    %50816 = torch.prim.ListConstruct %int-1_48227 : (!torch.int) -> !torch.list<int>
    %true_48228 = torch.constant.bool true
    %none_48229 = torch.constant.none
    %50817 = torch.aten.mean.dim %50803, %50816, %true_48228, %none_48229 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48230 = torch.constant.int -1
    %50818 = torch.prim.ListConstruct %int-1_48230 : (!torch.int) -> !torch.list<int>
    %true_48231 = torch.constant.bool true
    %none_48232 = torch.constant.none
    %50819 = torch.aten.mean.dim %50804, %50818, %true_48231, %none_48232 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_48233 = torch.constant.int -1
    %50820 = torch.prim.ListConstruct %int-1_48233 : (!torch.int) -> !torch.list<int>
    %true_48234 = torch.constant.bool true
    %none_48235 = torch.constant.none
    %50821 = torch.aten.mean.dim %50805, %50820, %true_48234, %none_48235 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48236 = torch.constant.float 9.9999997473787516E-6
    %int1_48237 = torch.constant.int 1
    %50822 = torch.aten.add.Scalar %50807, %float9.999990e-06_48236, %int1_48237 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48238 = torch.constant.float 9.9999997473787516E-6
    %int1_48239 = torch.constant.int 1
    %50823 = torch.aten.add.Scalar %50809, %float9.999990e-06_48238, %int1_48239 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48240 = torch.constant.float 9.9999997473787516E-6
    %int1_48241 = torch.constant.int 1
    %50824 = torch.aten.add.Scalar %50811, %float9.999990e-06_48240, %int1_48241 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48242 = torch.constant.float 9.9999997473787516E-6
    %int1_48243 = torch.constant.int 1
    %50825 = torch.aten.add.Scalar %50813, %float9.999990e-06_48242, %int1_48243 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48244 = torch.constant.float 9.9999997473787516E-6
    %int1_48245 = torch.constant.int 1
    %50826 = torch.aten.add.Scalar %50815, %float9.999990e-06_48244, %int1_48245 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48246 = torch.constant.float 9.9999997473787516E-6
    %int1_48247 = torch.constant.int 1
    %50827 = torch.aten.add.Scalar %50817, %float9.999990e-06_48246, %int1_48247 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48248 = torch.constant.float 9.9999997473787516E-6
    %int1_48249 = torch.constant.int 1
    %50828 = torch.aten.add.Scalar %50819, %float9.999990e-06_48248, %int1_48249 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_48250 = torch.constant.float 9.9999997473787516E-6
    %int1_48251 = torch.constant.int 1
    %50829 = torch.aten.add.Scalar %50821, %float9.999990e-06_48250, %int1_48251 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50830 = torch.aten.rsqrt %50822 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50831 = torch.aten.rsqrt %50823 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50832 = torch.aten.rsqrt %50824 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50833 = torch.aten.rsqrt %50825 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50834 = torch.aten.rsqrt %50826 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50835 = torch.aten.rsqrt %50827 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50836 = torch.aten.rsqrt %50828 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50837 = torch.aten.rsqrt %50829 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %50837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %50838 = torch.aten.mul.Tensor %50790, %50830 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50839 = torch.aten.mul.Tensor %50791, %50831 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50840 = torch.aten.mul.Tensor %50792, %50832 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50841 = torch.aten.mul.Tensor %50793, %50833 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50842 = torch.aten.mul.Tensor %50794, %50834 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50843 = torch.aten.mul.Tensor %50795, %50835 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50844 = torch.aten.mul.Tensor %50796, %50836 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50845 = torch.aten.mul.Tensor %50797, %50837 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50846 = torch.aten.mul.Tensor %1880, %50838 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50847 = torch.aten.mul.Tensor %1881, %50839 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50848 = torch.aten.mul.Tensor %1882, %50840 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50849 = torch.aten.mul.Tensor %1883, %50841 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50850 = torch.aten.mul.Tensor %1884, %50842 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50851 = torch.aten.mul.Tensor %1885, %50843 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50852 = torch.aten.mul.Tensor %1886, %50844 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %50853 = torch.aten.mul.Tensor %1887, %50845 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %50853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_48252 = torch.constant.int 5
    %50854 = torch.prims.convert_element_type %50846, %int5_48252 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48253 = torch.constant.int 5
    %50855 = torch.prims.convert_element_type %50847, %int5_48253 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48254 = torch.constant.int 5
    %50856 = torch.prims.convert_element_type %50848, %int5_48254 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48255 = torch.constant.int 5
    %50857 = torch.prims.convert_element_type %50849, %int5_48255 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48256 = torch.constant.int 5
    %50858 = torch.prims.convert_element_type %50850, %int5_48256 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48257 = torch.constant.int 5
    %50859 = torch.prims.convert_element_type %50851, %int5_48257 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48258 = torch.constant.int 5
    %50860 = torch.prims.convert_element_type %50852, %int5_48258 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_48259 = torch.constant.int 5
    %50861 = torch.prims.convert_element_type %50853, %int5_48259 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %50861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_48260 = torch.constant.int 1
    %int0_48261 = torch.constant.int 0
    %50862 = torch.prim.ListConstruct %int1_48260, %int0_48261 : (!torch.int, !torch.int) -> !torch.list<int>
    %50863 = torch.aten.permute %1888, %50862 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48262 = torch.constant.int 1
    %int0_48263 = torch.constant.int 0
    %50864 = torch.prim.ListConstruct %int1_48262, %int0_48263 : (!torch.int, !torch.int) -> !torch.list<int>
    %50865 = torch.aten.permute %1889, %50864 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48264 = torch.constant.int 1
    %int0_48265 = torch.constant.int 0
    %50866 = torch.prim.ListConstruct %int1_48264, %int0_48265 : (!torch.int, !torch.int) -> !torch.list<int>
    %50867 = torch.aten.permute %1890, %50866 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48266 = torch.constant.int 1
    %int0_48267 = torch.constant.int 0
    %50868 = torch.prim.ListConstruct %int1_48266, %int0_48267 : (!torch.int, !torch.int) -> !torch.list<int>
    %50869 = torch.aten.permute %1891, %50868 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48268 = torch.constant.int 1
    %int0_48269 = torch.constant.int 0
    %50870 = torch.prim.ListConstruct %int1_48268, %int0_48269 : (!torch.int, !torch.int) -> !torch.list<int>
    %50871 = torch.aten.permute %1892, %50870 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48270 = torch.constant.int 1
    %int0_48271 = torch.constant.int 0
    %50872 = torch.prim.ListConstruct %int1_48270, %int0_48271 : (!torch.int, !torch.int) -> !torch.list<int>
    %50873 = torch.aten.permute %1893, %50872 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48272 = torch.constant.int 1
    %int0_48273 = torch.constant.int 0
    %50874 = torch.prim.ListConstruct %int1_48272, %int0_48273 : (!torch.int, !torch.int) -> !torch.list<int>
    %50875 = torch.aten.permute %1894, %50874 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_48274 = torch.constant.int 1
    %int0_48275 = torch.constant.int 0
    %50876 = torch.prim.ListConstruct %int1_48274, %int0_48275 : (!torch.int, !torch.int) -> !torch.list<int>
    %50877 = torch.aten.permute %1895, %50876 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_48276 = torch.constant.int 4
    %50878 = torch.aten.mul.int %int4_48276, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48277 = torch.constant.int 4096
    %50879 = torch.prim.ListConstruct %50878, %int4096_48277 : (!torch.int, !torch.int) -> !torch.list<int>
    %50880 = torch.aten.view %50854, %50879 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50880, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50881 = torch.aten.mm %50880, %50863 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50881, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48278 = torch.constant.int 4
    %int512_48279 = torch.constant.int 512
    %50882 = torch.prim.ListConstruct %int4_48278, %2482, %int512_48279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50883 = torch.aten.view %50881, %50882 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48280 = torch.constant.int 4
    %50884 = torch.aten.mul.int %int4_48280, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48281 = torch.constant.int 4096
    %50885 = torch.prim.ListConstruct %50884, %int4096_48281 : (!torch.int, !torch.int) -> !torch.list<int>
    %50886 = torch.aten.view %50855, %50885 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50886, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50887 = torch.aten.mm %50886, %50865 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50887, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48282 = torch.constant.int 4
    %int512_48283 = torch.constant.int 512
    %50888 = torch.prim.ListConstruct %int4_48282, %2482, %int512_48283 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50889 = torch.aten.view %50887, %50888 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48284 = torch.constant.int 4
    %50890 = torch.aten.mul.int %int4_48284, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48285 = torch.constant.int 4096
    %50891 = torch.prim.ListConstruct %50890, %int4096_48285 : (!torch.int, !torch.int) -> !torch.list<int>
    %50892 = torch.aten.view %50856, %50891 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50892, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50893 = torch.aten.mm %50892, %50867 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50893, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48286 = torch.constant.int 4
    %int512_48287 = torch.constant.int 512
    %50894 = torch.prim.ListConstruct %int4_48286, %2482, %int512_48287 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50895 = torch.aten.view %50893, %50894 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48288 = torch.constant.int 4
    %50896 = torch.aten.mul.int %int4_48288, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48289 = torch.constant.int 4096
    %50897 = torch.prim.ListConstruct %50896, %int4096_48289 : (!torch.int, !torch.int) -> !torch.list<int>
    %50898 = torch.aten.view %50857, %50897 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50898, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50899 = torch.aten.mm %50898, %50869 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50899, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48290 = torch.constant.int 4
    %int512_48291 = torch.constant.int 512
    %50900 = torch.prim.ListConstruct %int4_48290, %2482, %int512_48291 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50901 = torch.aten.view %50899, %50900 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48292 = torch.constant.int 4
    %50902 = torch.aten.mul.int %int4_48292, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48293 = torch.constant.int 4096
    %50903 = torch.prim.ListConstruct %50902, %int4096_48293 : (!torch.int, !torch.int) -> !torch.list<int>
    %50904 = torch.aten.view %50858, %50903 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50904, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50905 = torch.aten.mm %50904, %50871 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50905, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48294 = torch.constant.int 4
    %int512_48295 = torch.constant.int 512
    %50906 = torch.prim.ListConstruct %int4_48294, %2482, %int512_48295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50907 = torch.aten.view %50905, %50906 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48296 = torch.constant.int 4
    %50908 = torch.aten.mul.int %int4_48296, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48297 = torch.constant.int 4096
    %50909 = torch.prim.ListConstruct %50908, %int4096_48297 : (!torch.int, !torch.int) -> !torch.list<int>
    %50910 = torch.aten.view %50859, %50909 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50910, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50911 = torch.aten.mm %50910, %50873 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50911, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48298 = torch.constant.int 4
    %int512_48299 = torch.constant.int 512
    %50912 = torch.prim.ListConstruct %int4_48298, %2482, %int512_48299 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50913 = torch.aten.view %50911, %50912 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48300 = torch.constant.int 4
    %50914 = torch.aten.mul.int %int4_48300, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48301 = torch.constant.int 4096
    %50915 = torch.prim.ListConstruct %50914, %int4096_48301 : (!torch.int, !torch.int) -> !torch.list<int>
    %50916 = torch.aten.view %50860, %50915 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50916, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50917 = torch.aten.mm %50916, %50875 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50917, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48302 = torch.constant.int 4
    %int512_48303 = torch.constant.int 512
    %50918 = torch.prim.ListConstruct %int4_48302, %2482, %int512_48303 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50919 = torch.aten.view %50917, %50918 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_48304 = torch.constant.int 4
    %50920 = torch.aten.mul.int %int4_48304, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48305 = torch.constant.int 4096
    %50921 = torch.prim.ListConstruct %50920, %int4096_48305 : (!torch.int, !torch.int) -> !torch.list<int>
    %50922 = torch.aten.view %50861, %50921 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50922, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50923 = torch.aten.mm %50922, %50877 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %50923, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_48306 = torch.constant.int 4
    %int512_48307 = torch.constant.int 512
    %50924 = torch.prim.ListConstruct %int4_48306, %2482, %int512_48307 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50925 = torch.aten.view %50923, %50924 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %50925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_48308 = torch.constant.int 1
    %int0_48309 = torch.constant.int 0
    %50926 = torch.prim.ListConstruct %int1_48308, %int0_48309 : (!torch.int, !torch.int) -> !torch.list<int>
    %50927 = torch.aten.permute %1896, %50926 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48310 = torch.constant.int 1
    %int0_48311 = torch.constant.int 0
    %50928 = torch.prim.ListConstruct %int1_48310, %int0_48311 : (!torch.int, !torch.int) -> !torch.list<int>
    %50929 = torch.aten.permute %1897, %50928 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48312 = torch.constant.int 1
    %int0_48313 = torch.constant.int 0
    %50930 = torch.prim.ListConstruct %int1_48312, %int0_48313 : (!torch.int, !torch.int) -> !torch.list<int>
    %50931 = torch.aten.permute %1898, %50930 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48314 = torch.constant.int 1
    %int0_48315 = torch.constant.int 0
    %50932 = torch.prim.ListConstruct %int1_48314, %int0_48315 : (!torch.int, !torch.int) -> !torch.list<int>
    %50933 = torch.aten.permute %1899, %50932 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48316 = torch.constant.int 1
    %int0_48317 = torch.constant.int 0
    %50934 = torch.prim.ListConstruct %int1_48316, %int0_48317 : (!torch.int, !torch.int) -> !torch.list<int>
    %50935 = torch.aten.permute %1900, %50934 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48318 = torch.constant.int 1
    %int0_48319 = torch.constant.int 0
    %50936 = torch.prim.ListConstruct %int1_48318, %int0_48319 : (!torch.int, !torch.int) -> !torch.list<int>
    %50937 = torch.aten.permute %1901, %50936 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48320 = torch.constant.int 1
    %int0_48321 = torch.constant.int 0
    %50938 = torch.prim.ListConstruct %int1_48320, %int0_48321 : (!torch.int, !torch.int) -> !torch.list<int>
    %50939 = torch.aten.permute %1902, %50938 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48322 = torch.constant.int 1
    %int0_48323 = torch.constant.int 0
    %50940 = torch.prim.ListConstruct %int1_48322, %int0_48323 : (!torch.int, !torch.int) -> !torch.list<int>
    %50941 = torch.aten.permute %1903, %50940 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_48324 = torch.constant.int 4
    %50942 = torch.aten.mul.int %int4_48324, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48325 = torch.constant.int 4096
    %50943 = torch.prim.ListConstruct %50942, %int4096_48325 : (!torch.int, !torch.int) -> !torch.list<int>
    %50944 = torch.aten.view %50854, %50943 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50944, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50945 = torch.aten.mm %50944, %50927 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50945, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48326 = torch.constant.int 4
    %int128_48327 = torch.constant.int 128
    %50946 = torch.prim.ListConstruct %int4_48326, %2482, %int128_48327 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50947 = torch.aten.view %50945, %50946 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48328 = torch.constant.int 4
    %50948 = torch.aten.mul.int %int4_48328, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48329 = torch.constant.int 4096
    %50949 = torch.prim.ListConstruct %50948, %int4096_48329 : (!torch.int, !torch.int) -> !torch.list<int>
    %50950 = torch.aten.view %50855, %50949 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50950, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50951 = torch.aten.mm %50950, %50929 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50951, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48330 = torch.constant.int 4
    %int128_48331 = torch.constant.int 128
    %50952 = torch.prim.ListConstruct %int4_48330, %2482, %int128_48331 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50953 = torch.aten.view %50951, %50952 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48332 = torch.constant.int 4
    %50954 = torch.aten.mul.int %int4_48332, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48333 = torch.constant.int 4096
    %50955 = torch.prim.ListConstruct %50954, %int4096_48333 : (!torch.int, !torch.int) -> !torch.list<int>
    %50956 = torch.aten.view %50856, %50955 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50956, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50957 = torch.aten.mm %50956, %50931 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50957, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48334 = torch.constant.int 4
    %int128_48335 = torch.constant.int 128
    %50958 = torch.prim.ListConstruct %int4_48334, %2482, %int128_48335 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50959 = torch.aten.view %50957, %50958 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48336 = torch.constant.int 4
    %50960 = torch.aten.mul.int %int4_48336, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48337 = torch.constant.int 4096
    %50961 = torch.prim.ListConstruct %50960, %int4096_48337 : (!torch.int, !torch.int) -> !torch.list<int>
    %50962 = torch.aten.view %50857, %50961 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50962, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50963 = torch.aten.mm %50962, %50933 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50963, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48338 = torch.constant.int 4
    %int128_48339 = torch.constant.int 128
    %50964 = torch.prim.ListConstruct %int4_48338, %2482, %int128_48339 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50965 = torch.aten.view %50963, %50964 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48340 = torch.constant.int 4
    %50966 = torch.aten.mul.int %int4_48340, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48341 = torch.constant.int 4096
    %50967 = torch.prim.ListConstruct %50966, %int4096_48341 : (!torch.int, !torch.int) -> !torch.list<int>
    %50968 = torch.aten.view %50858, %50967 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50968, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50969 = torch.aten.mm %50968, %50935 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50969, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48342 = torch.constant.int 4
    %int128_48343 = torch.constant.int 128
    %50970 = torch.prim.ListConstruct %int4_48342, %2482, %int128_48343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50971 = torch.aten.view %50969, %50970 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48344 = torch.constant.int 4
    %50972 = torch.aten.mul.int %int4_48344, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48345 = torch.constant.int 4096
    %50973 = torch.prim.ListConstruct %50972, %int4096_48345 : (!torch.int, !torch.int) -> !torch.list<int>
    %50974 = torch.aten.view %50859, %50973 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50974, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50975 = torch.aten.mm %50974, %50937 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50975, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48346 = torch.constant.int 4
    %int128_48347 = torch.constant.int 128
    %50976 = torch.prim.ListConstruct %int4_48346, %2482, %int128_48347 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50977 = torch.aten.view %50975, %50976 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48348 = torch.constant.int 4
    %50978 = torch.aten.mul.int %int4_48348, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48349 = torch.constant.int 4096
    %50979 = torch.prim.ListConstruct %50978, %int4096_48349 : (!torch.int, !torch.int) -> !torch.list<int>
    %50980 = torch.aten.view %50860, %50979 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50980, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50981 = torch.aten.mm %50980, %50939 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50981, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48350 = torch.constant.int 4
    %int128_48351 = torch.constant.int 128
    %50982 = torch.prim.ListConstruct %int4_48350, %2482, %int128_48351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50983 = torch.aten.view %50981, %50982 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48352 = torch.constant.int 4
    %50984 = torch.aten.mul.int %int4_48352, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48353 = torch.constant.int 4096
    %50985 = torch.prim.ListConstruct %50984, %int4096_48353 : (!torch.int, !torch.int) -> !torch.list<int>
    %50986 = torch.aten.view %50861, %50985 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %50986, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %50987 = torch.aten.mm %50986, %50941 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %50987, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48354 = torch.constant.int 4
    %int128_48355 = torch.constant.int 128
    %50988 = torch.prim.ListConstruct %int4_48354, %2482, %int128_48355 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %50989 = torch.aten.view %50987, %50988 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %50989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_48356 = torch.constant.int 1
    %int0_48357 = torch.constant.int 0
    %50990 = torch.prim.ListConstruct %int1_48356, %int0_48357 : (!torch.int, !torch.int) -> !torch.list<int>
    %50991 = torch.aten.permute %1904, %50990 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48358 = torch.constant.int 1
    %int0_48359 = torch.constant.int 0
    %50992 = torch.prim.ListConstruct %int1_48358, %int0_48359 : (!torch.int, !torch.int) -> !torch.list<int>
    %50993 = torch.aten.permute %1905, %50992 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48360 = torch.constant.int 1
    %int0_48361 = torch.constant.int 0
    %50994 = torch.prim.ListConstruct %int1_48360, %int0_48361 : (!torch.int, !torch.int) -> !torch.list<int>
    %50995 = torch.aten.permute %1906, %50994 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48362 = torch.constant.int 1
    %int0_48363 = torch.constant.int 0
    %50996 = torch.prim.ListConstruct %int1_48362, %int0_48363 : (!torch.int, !torch.int) -> !torch.list<int>
    %50997 = torch.aten.permute %1907, %50996 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48364 = torch.constant.int 1
    %int0_48365 = torch.constant.int 0
    %50998 = torch.prim.ListConstruct %int1_48364, %int0_48365 : (!torch.int, !torch.int) -> !torch.list<int>
    %50999 = torch.aten.permute %1908, %50998 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48366 = torch.constant.int 1
    %int0_48367 = torch.constant.int 0
    %51000 = torch.prim.ListConstruct %int1_48366, %int0_48367 : (!torch.int, !torch.int) -> !torch.list<int>
    %51001 = torch.aten.permute %1909, %51000 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48368 = torch.constant.int 1
    %int0_48369 = torch.constant.int 0
    %51002 = torch.prim.ListConstruct %int1_48368, %int0_48369 : (!torch.int, !torch.int) -> !torch.list<int>
    %51003 = torch.aten.permute %1910, %51002 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_48370 = torch.constant.int 1
    %int0_48371 = torch.constant.int 0
    %51004 = torch.prim.ListConstruct %int1_48370, %int0_48371 : (!torch.int, !torch.int) -> !torch.list<int>
    %51005 = torch.aten.permute %1911, %51004 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_48372 = torch.constant.int 4
    %51006 = torch.aten.mul.int %int4_48372, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48373 = torch.constant.int 4096
    %51007 = torch.prim.ListConstruct %51006, %int4096_48373 : (!torch.int, !torch.int) -> !torch.list<int>
    %51008 = torch.aten.view %50854, %51007 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51008, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51009 = torch.aten.mm %51008, %50991 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51009, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48374 = torch.constant.int 4
    %int128_48375 = torch.constant.int 128
    %51010 = torch.prim.ListConstruct %int4_48374, %2482, %int128_48375 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51011 = torch.aten.view %51009, %51010 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48376 = torch.constant.int 4
    %51012 = torch.aten.mul.int %int4_48376, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48377 = torch.constant.int 4096
    %51013 = torch.prim.ListConstruct %51012, %int4096_48377 : (!torch.int, !torch.int) -> !torch.list<int>
    %51014 = torch.aten.view %50855, %51013 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51014, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51015 = torch.aten.mm %51014, %50993 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51015, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48378 = torch.constant.int 4
    %int128_48379 = torch.constant.int 128
    %51016 = torch.prim.ListConstruct %int4_48378, %2482, %int128_48379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51017 = torch.aten.view %51015, %51016 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48380 = torch.constant.int 4
    %51018 = torch.aten.mul.int %int4_48380, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48381 = torch.constant.int 4096
    %51019 = torch.prim.ListConstruct %51018, %int4096_48381 : (!torch.int, !torch.int) -> !torch.list<int>
    %51020 = torch.aten.view %50856, %51019 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51020, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51021 = torch.aten.mm %51020, %50995 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51021, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48382 = torch.constant.int 4
    %int128_48383 = torch.constant.int 128
    %51022 = torch.prim.ListConstruct %int4_48382, %2482, %int128_48383 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51023 = torch.aten.view %51021, %51022 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48384 = torch.constant.int 4
    %51024 = torch.aten.mul.int %int4_48384, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48385 = torch.constant.int 4096
    %51025 = torch.prim.ListConstruct %51024, %int4096_48385 : (!torch.int, !torch.int) -> !torch.list<int>
    %51026 = torch.aten.view %50857, %51025 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51026, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51027 = torch.aten.mm %51026, %50997 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51027, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48386 = torch.constant.int 4
    %int128_48387 = torch.constant.int 128
    %51028 = torch.prim.ListConstruct %int4_48386, %2482, %int128_48387 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51029 = torch.aten.view %51027, %51028 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48388 = torch.constant.int 4
    %51030 = torch.aten.mul.int %int4_48388, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48389 = torch.constant.int 4096
    %51031 = torch.prim.ListConstruct %51030, %int4096_48389 : (!torch.int, !torch.int) -> !torch.list<int>
    %51032 = torch.aten.view %50858, %51031 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51032, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51033 = torch.aten.mm %51032, %50999 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51033, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48390 = torch.constant.int 4
    %int128_48391 = torch.constant.int 128
    %51034 = torch.prim.ListConstruct %int4_48390, %2482, %int128_48391 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51035 = torch.aten.view %51033, %51034 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48392 = torch.constant.int 4
    %51036 = torch.aten.mul.int %int4_48392, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48393 = torch.constant.int 4096
    %51037 = torch.prim.ListConstruct %51036, %int4096_48393 : (!torch.int, !torch.int) -> !torch.list<int>
    %51038 = torch.aten.view %50859, %51037 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51038, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51039 = torch.aten.mm %51038, %51001 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51039, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48394 = torch.constant.int 4
    %int128_48395 = torch.constant.int 128
    %51040 = torch.prim.ListConstruct %int4_48394, %2482, %int128_48395 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51041 = torch.aten.view %51039, %51040 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48396 = torch.constant.int 4
    %51042 = torch.aten.mul.int %int4_48396, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48397 = torch.constant.int 4096
    %51043 = torch.prim.ListConstruct %51042, %int4096_48397 : (!torch.int, !torch.int) -> !torch.list<int>
    %51044 = torch.aten.view %50860, %51043 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51044, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51045 = torch.aten.mm %51044, %51003 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51045, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48398 = torch.constant.int 4
    %int128_48399 = torch.constant.int 128
    %51046 = torch.prim.ListConstruct %int4_48398, %2482, %int128_48399 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51047 = torch.aten.view %51045, %51046 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48400 = torch.constant.int 4
    %51048 = torch.aten.mul.int %int4_48400, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_48401 = torch.constant.int 4096
    %51049 = torch.prim.ListConstruct %51048, %int4096_48401 : (!torch.int, !torch.int) -> !torch.list<int>
    %51050 = torch.aten.view %50861, %51049 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51050, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %51051 = torch.aten.mm %51050, %51005 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %51051, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_48402 = torch.constant.int 4
    %int128_48403 = torch.constant.int 128
    %51052 = torch.prim.ListConstruct %int4_48402, %2482, %int128_48403 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51053 = torch.aten.view %51051, %51052 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %51053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_48404 = torch.constant.int 4
    %int4_48405 = torch.constant.int 4
    %int128_48406 = torch.constant.int 128
    %51054 = torch.prim.ListConstruct %int4_48404, %2482, %int4_48405, %int128_48406 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51055 = torch.aten.view %50883, %51054 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48407 = torch.constant.int 4
    %int4_48408 = torch.constant.int 4
    %int128_48409 = torch.constant.int 128
    %51056 = torch.prim.ListConstruct %int4_48407, %2482, %int4_48408, %int128_48409 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51057 = torch.aten.view %50889, %51056 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48410 = torch.constant.int 4
    %int4_48411 = torch.constant.int 4
    %int128_48412 = torch.constant.int 128
    %51058 = torch.prim.ListConstruct %int4_48410, %2482, %int4_48411, %int128_48412 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51059 = torch.aten.view %50895, %51058 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48413 = torch.constant.int 4
    %int4_48414 = torch.constant.int 4
    %int128_48415 = torch.constant.int 128
    %51060 = torch.prim.ListConstruct %int4_48413, %2482, %int4_48414, %int128_48415 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51061 = torch.aten.view %50901, %51060 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48416 = torch.constant.int 4
    %int4_48417 = torch.constant.int 4
    %int128_48418 = torch.constant.int 128
    %51062 = torch.prim.ListConstruct %int4_48416, %2482, %int4_48417, %int128_48418 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51063 = torch.aten.view %50907, %51062 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48419 = torch.constant.int 4
    %int4_48420 = torch.constant.int 4
    %int128_48421 = torch.constant.int 128
    %51064 = torch.prim.ListConstruct %int4_48419, %2482, %int4_48420, %int128_48421 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51065 = torch.aten.view %50913, %51064 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48422 = torch.constant.int 4
    %int4_48423 = torch.constant.int 4
    %int128_48424 = torch.constant.int 128
    %51066 = torch.prim.ListConstruct %int4_48422, %2482, %int4_48423, %int128_48424 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51067 = torch.aten.view %50919, %51066 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48425 = torch.constant.int 4
    %int4_48426 = torch.constant.int 4
    %int128_48427 = torch.constant.int 128
    %51068 = torch.prim.ListConstruct %int4_48425, %2482, %int4_48426, %int128_48427 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51069 = torch.aten.view %50925, %51068 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_48428 = torch.constant.int 4
    %int1_48429 = torch.constant.int 1
    %int128_48430 = torch.constant.int 128
    %51070 = torch.prim.ListConstruct %int4_48428, %2482, %int1_48429, %int128_48430 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51071 = torch.aten.view %50947, %51070 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48431 = torch.constant.int 4
    %int1_48432 = torch.constant.int 1
    %int128_48433 = torch.constant.int 128
    %51072 = torch.prim.ListConstruct %int4_48431, %2482, %int1_48432, %int128_48433 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51073 = torch.aten.view %50953, %51072 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48434 = torch.constant.int 4
    %int1_48435 = torch.constant.int 1
    %int128_48436 = torch.constant.int 128
    %51074 = torch.prim.ListConstruct %int4_48434, %2482, %int1_48435, %int128_48436 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51075 = torch.aten.view %50959, %51074 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48437 = torch.constant.int 4
    %int1_48438 = torch.constant.int 1
    %int128_48439 = torch.constant.int 128
    %51076 = torch.prim.ListConstruct %int4_48437, %2482, %int1_48438, %int128_48439 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51077 = torch.aten.view %50965, %51076 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48440 = torch.constant.int 4
    %int1_48441 = torch.constant.int 1
    %int128_48442 = torch.constant.int 128
    %51078 = torch.prim.ListConstruct %int4_48440, %2482, %int1_48441, %int128_48442 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51079 = torch.aten.view %50971, %51078 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48443 = torch.constant.int 4
    %int1_48444 = torch.constant.int 1
    %int128_48445 = torch.constant.int 128
    %51080 = torch.prim.ListConstruct %int4_48443, %2482, %int1_48444, %int128_48445 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51081 = torch.aten.view %50977, %51080 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48446 = torch.constant.int 4
    %int1_48447 = torch.constant.int 1
    %int128_48448 = torch.constant.int 128
    %51082 = torch.prim.ListConstruct %int4_48446, %2482, %int1_48447, %int128_48448 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51083 = torch.aten.view %50983, %51082 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48449 = torch.constant.int 4
    %int1_48450 = torch.constant.int 1
    %int128_48451 = torch.constant.int 128
    %51084 = torch.prim.ListConstruct %int4_48449, %2482, %int1_48450, %int128_48451 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51085 = torch.aten.view %50989, %51084 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48452 = torch.constant.int 4
    %int1_48453 = torch.constant.int 1
    %int128_48454 = torch.constant.int 128
    %51086 = torch.prim.ListConstruct %int4_48452, %2482, %int1_48453, %int128_48454 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51087 = torch.aten.view %51011, %51086 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48455 = torch.constant.int 4
    %int1_48456 = torch.constant.int 1
    %int128_48457 = torch.constant.int 128
    %51088 = torch.prim.ListConstruct %int4_48455, %2482, %int1_48456, %int128_48457 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51089 = torch.aten.view %51017, %51088 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48458 = torch.constant.int 4
    %int1_48459 = torch.constant.int 1
    %int128_48460 = torch.constant.int 128
    %51090 = torch.prim.ListConstruct %int4_48458, %2482, %int1_48459, %int128_48460 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51091 = torch.aten.view %51023, %51090 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48461 = torch.constant.int 4
    %int1_48462 = torch.constant.int 1
    %int128_48463 = torch.constant.int 128
    %51092 = torch.prim.ListConstruct %int4_48461, %2482, %int1_48462, %int128_48463 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51093 = torch.aten.view %51029, %51092 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48464 = torch.constant.int 4
    %int1_48465 = torch.constant.int 1
    %int128_48466 = torch.constant.int 128
    %51094 = torch.prim.ListConstruct %int4_48464, %2482, %int1_48465, %int128_48466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51095 = torch.aten.view %51035, %51094 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48467 = torch.constant.int 4
    %int1_48468 = torch.constant.int 1
    %int128_48469 = torch.constant.int 128
    %51096 = torch.prim.ListConstruct %int4_48467, %2482, %int1_48468, %int128_48469 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51097 = torch.aten.view %51041, %51096 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48470 = torch.constant.int 4
    %int1_48471 = torch.constant.int 1
    %int128_48472 = torch.constant.int 128
    %51098 = torch.prim.ListConstruct %int4_48470, %2482, %int1_48471, %int128_48472 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51099 = torch.aten.view %51047, %51098 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_48473 = torch.constant.int 4
    %int1_48474 = torch.constant.int 1
    %int128_48475 = torch.constant.int 128
    %51100 = torch.prim.ListConstruct %int4_48473, %2482, %int1_48474, %int128_48475 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51101 = torch.aten.view %51053, %51100 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_48476 = torch.constant.int 131072
    %none_48477 = torch.constant.none
    %none_48478 = torch.constant.none
    %cpu_48479 = torch.constant.device "cpu"
    %false_48480 = torch.constant.bool false
    %51102 = torch.aten.arange %int131072_48476, %none_48477, %none_48478, %cpu_48479, %false_48480 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_48481 = torch.constant.int 0
    %int128_48482 = torch.constant.int 128
    %int2_48483 = torch.constant.int 2
    %none_48484 = torch.constant.none
    %none_48485 = torch.constant.none
    %cpu_48486 = torch.constant.device "cpu"
    %false_48487 = torch.constant.bool false
    %51103 = torch.aten.arange.start_step %int0_48481, %int128_48482, %int2_48483, %none_48484, %none_48485, %cpu_48486, %false_48487 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_48488 = torch.constant.int 0
    %int0_48489 = torch.constant.int 0
    %int64_48490 = torch.constant.int 64
    %int1_48491 = torch.constant.int 1
    %51104 = torch.aten.slice.Tensor %51103, %int0_48488, %int0_48489, %int64_48490, %int1_48491 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_48492 = torch.constant.int 6
    %51105 = torch.prims.convert_element_type %51104, %int6_48492 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_48493 = torch.constant.int 128
    %51106 = torch.aten.div.Scalar %51105, %int128_48493 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_48494 = torch.constant.float 5.000000e+05
    %51107 = torch.aten.pow.Scalar %float5.000000e05_48494, %51106 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %51108 = torch.aten.reciprocal %51107 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_48495 = torch.constant.float 1.000000e+00
    %51109 = torch.aten.mul.Scalar %51108, %float1.000000e00_48495 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_48496 = torch.constant.int 131072
    %int1_48497 = torch.constant.int 1
    %51110 = torch.prim.ListConstruct %int131072_48496, %int1_48497 : (!torch.int, !torch.int) -> !torch.list<int>
    %51111 = torch.aten.view %51102, %51110 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %51112 = torch.aten.mul.Tensor %51111, %51109 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %51113 = torch.aten.cos %51112 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %51114 = torch.aten.sin %51112 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %51115 = torch.aten.complex %51113, %51114 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %51116 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51117 = flow.tensor.transfer %51116 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %51118 = torch_c.from_builtin_tensor %51117 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51119 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51120 = flow.tensor.transfer %51119 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %51121 = torch_c.from_builtin_tensor %51120 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51122 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51123 = flow.tensor.transfer %51122 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %51124 = torch_c.from_builtin_tensor %51123 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51125 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51126 = flow.tensor.transfer %51125 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %51127 = torch_c.from_builtin_tensor %51126 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51128 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51129 = flow.tensor.transfer %51128 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %51130 = torch_c.from_builtin_tensor %51129 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51131 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51132 = flow.tensor.transfer %51131 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %51133 = torch_c.from_builtin_tensor %51132 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51134 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51135 = flow.tensor.transfer %51134 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %51136 = torch_c.from_builtin_tensor %51135 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51137 = torch_c.to_builtin_tensor %51115 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51138 = flow.tensor.transfer %51137 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %51139 = torch_c.from_builtin_tensor %51138 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_48498 = torch.constant.int 1
    %51140 = torch.aten.size.int %50883, %int1_48498 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48499 = torch.constant.int 0
    %51141 = torch.aten.add.int %int0_48499, %51140 : !torch.int, !torch.int -> !torch.int
    %int0_48500 = torch.constant.int 0
    %int0_48501 = torch.constant.int 0
    %int1_48502 = torch.constant.int 1
    %51142 = torch.aten.slice.Tensor %51118, %int0_48500, %int0_48501, %51141, %int1_48502 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51142, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48503 = torch.constant.int 1
    %int0_48504 = torch.constant.int 0
    %int9223372036854775807_48505 = torch.constant.int 9223372036854775807
    %int1_48506 = torch.constant.int 1
    %51143 = torch.aten.slice.Tensor %51142, %int1_48503, %int0_48504, %int9223372036854775807_48505, %int1_48506 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51143, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48507 = torch.constant.int 0
    %51144 = torch.aten.unsqueeze %51143, %int0_48507 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51144, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48508 = torch.constant.int 2
    %51145 = torch.aten.unsqueeze %51144, %int2_48508 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51145, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48509 = torch.constant.int 3
    %int0_48510 = torch.constant.int 0
    %int9223372036854775807_48511 = torch.constant.int 9223372036854775807
    %int1_48512 = torch.constant.int 1
    %51146 = torch.aten.slice.Tensor %51145, %int3_48509, %int0_48510, %int9223372036854775807_48511, %int1_48512 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51146, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51147 = torch_c.to_builtin_tensor %51055 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48513 = arith.constant 1 : index
    %dim_48514 = tensor.dim %51147, %c1_48513 : tensor<4x?x4x128xf16>
    %51148 = flow.tensor.bitcast %51147 : tensor<4x?x4x128xf16>{%dim_48514} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48514}
    %51149 = torch_c.from_builtin_tensor %51148 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51150 = torch.aten.mul.Tensor %51149, %51146 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51151 = torch_c.to_builtin_tensor %51150 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48515 = arith.constant 1 : index
    %dim_48516 = tensor.dim %51151, %c1_48515 : tensor<4x?x4x64xcomplex<f32>>
    %51152 = flow.tensor.bitcast %51151 : tensor<4x?x4x64xcomplex<f32>>{%dim_48516} -> tensor<4x?x4x128xf32>{%dim_48516}
    %51153 = torch_c.from_builtin_tensor %51152 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48517 = torch.constant.int 5
    %51154 = torch.prims.convert_element_type %51153, %int5_48517 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48518 = torch.constant.int 1
    %51155 = torch.aten.size.int %50889, %int1_48518 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48519 = torch.constant.int 0
    %51156 = torch.aten.add.int %int0_48519, %51155 : !torch.int, !torch.int -> !torch.int
    %int0_48520 = torch.constant.int 0
    %int0_48521 = torch.constant.int 0
    %int1_48522 = torch.constant.int 1
    %51157 = torch.aten.slice.Tensor %51121, %int0_48520, %int0_48521, %51156, %int1_48522 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51157, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48523 = torch.constant.int 1
    %int0_48524 = torch.constant.int 0
    %int9223372036854775807_48525 = torch.constant.int 9223372036854775807
    %int1_48526 = torch.constant.int 1
    %51158 = torch.aten.slice.Tensor %51157, %int1_48523, %int0_48524, %int9223372036854775807_48525, %int1_48526 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51158, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48527 = torch.constant.int 0
    %51159 = torch.aten.unsqueeze %51158, %int0_48527 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51159, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48528 = torch.constant.int 2
    %51160 = torch.aten.unsqueeze %51159, %int2_48528 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51160, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48529 = torch.constant.int 3
    %int0_48530 = torch.constant.int 0
    %int9223372036854775807_48531 = torch.constant.int 9223372036854775807
    %int1_48532 = torch.constant.int 1
    %51161 = torch.aten.slice.Tensor %51160, %int3_48529, %int0_48530, %int9223372036854775807_48531, %int1_48532 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51161, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51162 = torch_c.to_builtin_tensor %51057 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48533 = arith.constant 1 : index
    %dim_48534 = tensor.dim %51162, %c1_48533 : tensor<4x?x4x128xf16>
    %51163 = flow.tensor.bitcast %51162 : tensor<4x?x4x128xf16>{%dim_48534} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48534}
    %51164 = torch_c.from_builtin_tensor %51163 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51165 = torch.aten.mul.Tensor %51164, %51161 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51166 = torch_c.to_builtin_tensor %51165 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48535 = arith.constant 1 : index
    %dim_48536 = tensor.dim %51166, %c1_48535 : tensor<4x?x4x64xcomplex<f32>>
    %51167 = flow.tensor.bitcast %51166 : tensor<4x?x4x64xcomplex<f32>>{%dim_48536} -> tensor<4x?x4x128xf32>{%dim_48536}
    %51168 = torch_c.from_builtin_tensor %51167 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48537 = torch.constant.int 5
    %51169 = torch.prims.convert_element_type %51168, %int5_48537 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48538 = torch.constant.int 1
    %51170 = torch.aten.size.int %50895, %int1_48538 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48539 = torch.constant.int 0
    %51171 = torch.aten.add.int %int0_48539, %51170 : !torch.int, !torch.int -> !torch.int
    %int0_48540 = torch.constant.int 0
    %int0_48541 = torch.constant.int 0
    %int1_48542 = torch.constant.int 1
    %51172 = torch.aten.slice.Tensor %51124, %int0_48540, %int0_48541, %51171, %int1_48542 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51172, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48543 = torch.constant.int 1
    %int0_48544 = torch.constant.int 0
    %int9223372036854775807_48545 = torch.constant.int 9223372036854775807
    %int1_48546 = torch.constant.int 1
    %51173 = torch.aten.slice.Tensor %51172, %int1_48543, %int0_48544, %int9223372036854775807_48545, %int1_48546 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51173, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48547 = torch.constant.int 0
    %51174 = torch.aten.unsqueeze %51173, %int0_48547 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51174, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48548 = torch.constant.int 2
    %51175 = torch.aten.unsqueeze %51174, %int2_48548 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51175, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48549 = torch.constant.int 3
    %int0_48550 = torch.constant.int 0
    %int9223372036854775807_48551 = torch.constant.int 9223372036854775807
    %int1_48552 = torch.constant.int 1
    %51176 = torch.aten.slice.Tensor %51175, %int3_48549, %int0_48550, %int9223372036854775807_48551, %int1_48552 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51176, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51177 = torch_c.to_builtin_tensor %51059 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48553 = arith.constant 1 : index
    %dim_48554 = tensor.dim %51177, %c1_48553 : tensor<4x?x4x128xf16>
    %51178 = flow.tensor.bitcast %51177 : tensor<4x?x4x128xf16>{%dim_48554} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48554}
    %51179 = torch_c.from_builtin_tensor %51178 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51180 = torch.aten.mul.Tensor %51179, %51176 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51181 = torch_c.to_builtin_tensor %51180 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48555 = arith.constant 1 : index
    %dim_48556 = tensor.dim %51181, %c1_48555 : tensor<4x?x4x64xcomplex<f32>>
    %51182 = flow.tensor.bitcast %51181 : tensor<4x?x4x64xcomplex<f32>>{%dim_48556} -> tensor<4x?x4x128xf32>{%dim_48556}
    %51183 = torch_c.from_builtin_tensor %51182 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48557 = torch.constant.int 5
    %51184 = torch.prims.convert_element_type %51183, %int5_48557 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48558 = torch.constant.int 1
    %51185 = torch.aten.size.int %50901, %int1_48558 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48559 = torch.constant.int 0
    %51186 = torch.aten.add.int %int0_48559, %51185 : !torch.int, !torch.int -> !torch.int
    %int0_48560 = torch.constant.int 0
    %int0_48561 = torch.constant.int 0
    %int1_48562 = torch.constant.int 1
    %51187 = torch.aten.slice.Tensor %51127, %int0_48560, %int0_48561, %51186, %int1_48562 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51187, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48563 = torch.constant.int 1
    %int0_48564 = torch.constant.int 0
    %int9223372036854775807_48565 = torch.constant.int 9223372036854775807
    %int1_48566 = torch.constant.int 1
    %51188 = torch.aten.slice.Tensor %51187, %int1_48563, %int0_48564, %int9223372036854775807_48565, %int1_48566 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51188, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48567 = torch.constant.int 0
    %51189 = torch.aten.unsqueeze %51188, %int0_48567 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51189, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48568 = torch.constant.int 2
    %51190 = torch.aten.unsqueeze %51189, %int2_48568 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51190, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48569 = torch.constant.int 3
    %int0_48570 = torch.constant.int 0
    %int9223372036854775807_48571 = torch.constant.int 9223372036854775807
    %int1_48572 = torch.constant.int 1
    %51191 = torch.aten.slice.Tensor %51190, %int3_48569, %int0_48570, %int9223372036854775807_48571, %int1_48572 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51191, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51192 = torch_c.to_builtin_tensor %51061 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48573 = arith.constant 1 : index
    %dim_48574 = tensor.dim %51192, %c1_48573 : tensor<4x?x4x128xf16>
    %51193 = flow.tensor.bitcast %51192 : tensor<4x?x4x128xf16>{%dim_48574} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48574}
    %51194 = torch_c.from_builtin_tensor %51193 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51195 = torch.aten.mul.Tensor %51194, %51191 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51196 = torch_c.to_builtin_tensor %51195 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48575 = arith.constant 1 : index
    %dim_48576 = tensor.dim %51196, %c1_48575 : tensor<4x?x4x64xcomplex<f32>>
    %51197 = flow.tensor.bitcast %51196 : tensor<4x?x4x64xcomplex<f32>>{%dim_48576} -> tensor<4x?x4x128xf32>{%dim_48576}
    %51198 = torch_c.from_builtin_tensor %51197 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48577 = torch.constant.int 5
    %51199 = torch.prims.convert_element_type %51198, %int5_48577 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48578 = torch.constant.int 1
    %51200 = torch.aten.size.int %50907, %int1_48578 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48579 = torch.constant.int 0
    %51201 = torch.aten.add.int %int0_48579, %51200 : !torch.int, !torch.int -> !torch.int
    %int0_48580 = torch.constant.int 0
    %int0_48581 = torch.constant.int 0
    %int1_48582 = torch.constant.int 1
    %51202 = torch.aten.slice.Tensor %51130, %int0_48580, %int0_48581, %51201, %int1_48582 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51202, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48583 = torch.constant.int 1
    %int0_48584 = torch.constant.int 0
    %int9223372036854775807_48585 = torch.constant.int 9223372036854775807
    %int1_48586 = torch.constant.int 1
    %51203 = torch.aten.slice.Tensor %51202, %int1_48583, %int0_48584, %int9223372036854775807_48585, %int1_48586 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51203, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48587 = torch.constant.int 0
    %51204 = torch.aten.unsqueeze %51203, %int0_48587 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51204, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48588 = torch.constant.int 2
    %51205 = torch.aten.unsqueeze %51204, %int2_48588 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51205, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48589 = torch.constant.int 3
    %int0_48590 = torch.constant.int 0
    %int9223372036854775807_48591 = torch.constant.int 9223372036854775807
    %int1_48592 = torch.constant.int 1
    %51206 = torch.aten.slice.Tensor %51205, %int3_48589, %int0_48590, %int9223372036854775807_48591, %int1_48592 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51206, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51207 = torch_c.to_builtin_tensor %51063 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48593 = arith.constant 1 : index
    %dim_48594 = tensor.dim %51207, %c1_48593 : tensor<4x?x4x128xf16>
    %51208 = flow.tensor.bitcast %51207 : tensor<4x?x4x128xf16>{%dim_48594} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48594}
    %51209 = torch_c.from_builtin_tensor %51208 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51210 = torch.aten.mul.Tensor %51209, %51206 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51211 = torch_c.to_builtin_tensor %51210 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48595 = arith.constant 1 : index
    %dim_48596 = tensor.dim %51211, %c1_48595 : tensor<4x?x4x64xcomplex<f32>>
    %51212 = flow.tensor.bitcast %51211 : tensor<4x?x4x64xcomplex<f32>>{%dim_48596} -> tensor<4x?x4x128xf32>{%dim_48596}
    %51213 = torch_c.from_builtin_tensor %51212 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48597 = torch.constant.int 5
    %51214 = torch.prims.convert_element_type %51213, %int5_48597 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48598 = torch.constant.int 1
    %51215 = torch.aten.size.int %50913, %int1_48598 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48599 = torch.constant.int 0
    %51216 = torch.aten.add.int %int0_48599, %51215 : !torch.int, !torch.int -> !torch.int
    %int0_48600 = torch.constant.int 0
    %int0_48601 = torch.constant.int 0
    %int1_48602 = torch.constant.int 1
    %51217 = torch.aten.slice.Tensor %51133, %int0_48600, %int0_48601, %51216, %int1_48602 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51217, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48603 = torch.constant.int 1
    %int0_48604 = torch.constant.int 0
    %int9223372036854775807_48605 = torch.constant.int 9223372036854775807
    %int1_48606 = torch.constant.int 1
    %51218 = torch.aten.slice.Tensor %51217, %int1_48603, %int0_48604, %int9223372036854775807_48605, %int1_48606 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51218, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48607 = torch.constant.int 0
    %51219 = torch.aten.unsqueeze %51218, %int0_48607 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51219, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48608 = torch.constant.int 2
    %51220 = torch.aten.unsqueeze %51219, %int2_48608 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51220, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48609 = torch.constant.int 3
    %int0_48610 = torch.constant.int 0
    %int9223372036854775807_48611 = torch.constant.int 9223372036854775807
    %int1_48612 = torch.constant.int 1
    %51221 = torch.aten.slice.Tensor %51220, %int3_48609, %int0_48610, %int9223372036854775807_48611, %int1_48612 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51221, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51222 = torch_c.to_builtin_tensor %51065 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48613 = arith.constant 1 : index
    %dim_48614 = tensor.dim %51222, %c1_48613 : tensor<4x?x4x128xf16>
    %51223 = flow.tensor.bitcast %51222 : tensor<4x?x4x128xf16>{%dim_48614} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48614}
    %51224 = torch_c.from_builtin_tensor %51223 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51225 = torch.aten.mul.Tensor %51224, %51221 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51226 = torch_c.to_builtin_tensor %51225 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48615 = arith.constant 1 : index
    %dim_48616 = tensor.dim %51226, %c1_48615 : tensor<4x?x4x64xcomplex<f32>>
    %51227 = flow.tensor.bitcast %51226 : tensor<4x?x4x64xcomplex<f32>>{%dim_48616} -> tensor<4x?x4x128xf32>{%dim_48616}
    %51228 = torch_c.from_builtin_tensor %51227 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48617 = torch.constant.int 5
    %51229 = torch.prims.convert_element_type %51228, %int5_48617 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48618 = torch.constant.int 1
    %51230 = torch.aten.size.int %50919, %int1_48618 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48619 = torch.constant.int 0
    %51231 = torch.aten.add.int %int0_48619, %51230 : !torch.int, !torch.int -> !torch.int
    %int0_48620 = torch.constant.int 0
    %int0_48621 = torch.constant.int 0
    %int1_48622 = torch.constant.int 1
    %51232 = torch.aten.slice.Tensor %51136, %int0_48620, %int0_48621, %51231, %int1_48622 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51232, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48623 = torch.constant.int 1
    %int0_48624 = torch.constant.int 0
    %int9223372036854775807_48625 = torch.constant.int 9223372036854775807
    %int1_48626 = torch.constant.int 1
    %51233 = torch.aten.slice.Tensor %51232, %int1_48623, %int0_48624, %int9223372036854775807_48625, %int1_48626 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51233, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48627 = torch.constant.int 0
    %51234 = torch.aten.unsqueeze %51233, %int0_48627 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51234, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48628 = torch.constant.int 2
    %51235 = torch.aten.unsqueeze %51234, %int2_48628 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51235, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48629 = torch.constant.int 3
    %int0_48630 = torch.constant.int 0
    %int9223372036854775807_48631 = torch.constant.int 9223372036854775807
    %int1_48632 = torch.constant.int 1
    %51236 = torch.aten.slice.Tensor %51235, %int3_48629, %int0_48630, %int9223372036854775807_48631, %int1_48632 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51236, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51237 = torch_c.to_builtin_tensor %51067 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48633 = arith.constant 1 : index
    %dim_48634 = tensor.dim %51237, %c1_48633 : tensor<4x?x4x128xf16>
    %51238 = flow.tensor.bitcast %51237 : tensor<4x?x4x128xf16>{%dim_48634} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48634}
    %51239 = torch_c.from_builtin_tensor %51238 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51240 = torch.aten.mul.Tensor %51239, %51236 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51241 = torch_c.to_builtin_tensor %51240 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48635 = arith.constant 1 : index
    %dim_48636 = tensor.dim %51241, %c1_48635 : tensor<4x?x4x64xcomplex<f32>>
    %51242 = flow.tensor.bitcast %51241 : tensor<4x?x4x64xcomplex<f32>>{%dim_48636} -> tensor<4x?x4x128xf32>{%dim_48636}
    %51243 = torch_c.from_builtin_tensor %51242 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48637 = torch.constant.int 5
    %51244 = torch.prims.convert_element_type %51243, %int5_48637 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_48638 = torch.constant.int 1
    %51245 = torch.aten.size.int %50925, %int1_48638 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_48639 = torch.constant.int 0
    %51246 = torch.aten.add.int %int0_48639, %51245 : !torch.int, !torch.int -> !torch.int
    %int0_48640 = torch.constant.int 0
    %int0_48641 = torch.constant.int 0
    %int1_48642 = torch.constant.int 1
    %51247 = torch.aten.slice.Tensor %51139, %int0_48640, %int0_48641, %51246, %int1_48642 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51247, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48643 = torch.constant.int 1
    %int0_48644 = torch.constant.int 0
    %int9223372036854775807_48645 = torch.constant.int 9223372036854775807
    %int1_48646 = torch.constant.int 1
    %51248 = torch.aten.slice.Tensor %51247, %int1_48643, %int0_48644, %int9223372036854775807_48645, %int1_48646 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51248, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48647 = torch.constant.int 0
    %51249 = torch.aten.unsqueeze %51248, %int0_48647 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51249, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48648 = torch.constant.int 2
    %51250 = torch.aten.unsqueeze %51249, %int2_48648 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51250, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48649 = torch.constant.int 3
    %int0_48650 = torch.constant.int 0
    %int9223372036854775807_48651 = torch.constant.int 9223372036854775807
    %int1_48652 = torch.constant.int 1
    %51251 = torch.aten.slice.Tensor %51250, %int3_48649, %int0_48650, %int9223372036854775807_48651, %int1_48652 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51251, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51252 = torch_c.to_builtin_tensor %51069 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_48653 = arith.constant 1 : index
    %dim_48654 = tensor.dim %51252, %c1_48653 : tensor<4x?x4x128xf16>
    %51253 = flow.tensor.bitcast %51252 : tensor<4x?x4x128xf16>{%dim_48654} -> tensor<4x?x4x64xcomplex<f16>>{%dim_48654}
    %51254 = torch_c.from_builtin_tensor %51253 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %51254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %51255 = torch.aten.mul.Tensor %51254, %51251 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %51255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %51256 = torch_c.to_builtin_tensor %51255 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_48655 = arith.constant 1 : index
    %dim_48656 = tensor.dim %51256, %c1_48655 : tensor<4x?x4x64xcomplex<f32>>
    %51257 = flow.tensor.bitcast %51256 : tensor<4x?x4x64xcomplex<f32>>{%dim_48656} -> tensor<4x?x4x128xf32>{%dim_48656}
    %51258 = torch_c.from_builtin_tensor %51257 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %51258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_48657 = torch.constant.int 5
    %51259 = torch.prims.convert_element_type %51258, %int5_48657 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_48658 = torch.constant.int 131072
    %none_48659 = torch.constant.none
    %none_48660 = torch.constant.none
    %cpu_48661 = torch.constant.device "cpu"
    %false_48662 = torch.constant.bool false
    %51260 = torch.aten.arange %int131072_48658, %none_48659, %none_48660, %cpu_48661, %false_48662 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_48663 = torch.constant.int 0
    %int128_48664 = torch.constant.int 128
    %int2_48665 = torch.constant.int 2
    %none_48666 = torch.constant.none
    %none_48667 = torch.constant.none
    %cpu_48668 = torch.constant.device "cpu"
    %false_48669 = torch.constant.bool false
    %51261 = torch.aten.arange.start_step %int0_48663, %int128_48664, %int2_48665, %none_48666, %none_48667, %cpu_48668, %false_48669 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_48670 = torch.constant.int 0
    %int0_48671 = torch.constant.int 0
    %int64_48672 = torch.constant.int 64
    %int1_48673 = torch.constant.int 1
    %51262 = torch.aten.slice.Tensor %51261, %int0_48670, %int0_48671, %int64_48672, %int1_48673 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_48674 = torch.constant.int 6
    %51263 = torch.prims.convert_element_type %51262, %int6_48674 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_48675 = torch.constant.int 128
    %51264 = torch.aten.div.Scalar %51263, %int128_48675 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_48676 = torch.constant.float 5.000000e+05
    %51265 = torch.aten.pow.Scalar %float5.000000e05_48676, %51264 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %51266 = torch.aten.reciprocal %51265 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_48677 = torch.constant.float 1.000000e+00
    %51267 = torch.aten.mul.Scalar %51266, %float1.000000e00_48677 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_48678 = torch.constant.int 131072
    %int1_48679 = torch.constant.int 1
    %51268 = torch.prim.ListConstruct %int131072_48678, %int1_48679 : (!torch.int, !torch.int) -> !torch.list<int>
    %51269 = torch.aten.view %51260, %51268 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %51270 = torch.aten.mul.Tensor %51269, %51267 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %51271 = torch.aten.cos %51270 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %51272 = torch.aten.sin %51270 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %51273 = torch.aten.complex %51271, %51272 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %51274 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51275 = flow.tensor.transfer %51274 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %51276 = torch_c.from_builtin_tensor %51275 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51277 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51278 = flow.tensor.transfer %51277 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %51279 = torch_c.from_builtin_tensor %51278 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51280 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51281 = flow.tensor.transfer %51280 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %51282 = torch_c.from_builtin_tensor %51281 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51283 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51284 = flow.tensor.transfer %51283 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %51285 = torch_c.from_builtin_tensor %51284 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51286 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51287 = flow.tensor.transfer %51286 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %51288 = torch_c.from_builtin_tensor %51287 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51289 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51290 = flow.tensor.transfer %51289 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %51291 = torch_c.from_builtin_tensor %51290 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51292 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51293 = flow.tensor.transfer %51292 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %51294 = torch_c.from_builtin_tensor %51293 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %51295 = torch_c.to_builtin_tensor %51273 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %51296 = flow.tensor.transfer %51295 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %51297 = torch_c.from_builtin_tensor %51296 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_48680 = torch.constant.int 1
    %51298 = torch.aten.size.int %50947, %int1_48680 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48681 = torch.constant.int 0
    %51299 = torch.aten.add.int %int0_48681, %51298 : !torch.int, !torch.int -> !torch.int
    %int0_48682 = torch.constant.int 0
    %int0_48683 = torch.constant.int 0
    %int1_48684 = torch.constant.int 1
    %51300 = torch.aten.slice.Tensor %51276, %int0_48682, %int0_48683, %51299, %int1_48684 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51300, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48685 = torch.constant.int 1
    %int0_48686 = torch.constant.int 0
    %int9223372036854775807_48687 = torch.constant.int 9223372036854775807
    %int1_48688 = torch.constant.int 1
    %51301 = torch.aten.slice.Tensor %51300, %int1_48685, %int0_48686, %int9223372036854775807_48687, %int1_48688 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51301, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48689 = torch.constant.int 0
    %51302 = torch.aten.unsqueeze %51301, %int0_48689 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51302, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48690 = torch.constant.int 2
    %51303 = torch.aten.unsqueeze %51302, %int2_48690 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51303, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48691 = torch.constant.int 3
    %int0_48692 = torch.constant.int 0
    %int9223372036854775807_48693 = torch.constant.int 9223372036854775807
    %int1_48694 = torch.constant.int 1
    %51304 = torch.aten.slice.Tensor %51303, %int3_48691, %int0_48692, %int9223372036854775807_48693, %int1_48694 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51304, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51305 = torch_c.to_builtin_tensor %51071 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48695 = arith.constant 1 : index
    %dim_48696 = tensor.dim %51305, %c1_48695 : tensor<4x?x1x128xf16>
    %51306 = flow.tensor.bitcast %51305 : tensor<4x?x1x128xf16>{%dim_48696} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48696}
    %51307 = torch_c.from_builtin_tensor %51306 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51308 = torch.aten.mul.Tensor %51307, %51304 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51309 = torch_c.to_builtin_tensor %51308 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48697 = arith.constant 1 : index
    %dim_48698 = tensor.dim %51309, %c1_48697 : tensor<4x?x1x64xcomplex<f32>>
    %51310 = flow.tensor.bitcast %51309 : tensor<4x?x1x64xcomplex<f32>>{%dim_48698} -> tensor<4x?x1x128xf32>{%dim_48698}
    %51311 = torch_c.from_builtin_tensor %51310 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48699 = torch.constant.int 5
    %51312 = torch.prims.convert_element_type %51311, %int5_48699 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48700 = torch.constant.int 1
    %51313 = torch.aten.size.int %50953, %int1_48700 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48701 = torch.constant.int 0
    %51314 = torch.aten.add.int %int0_48701, %51313 : !torch.int, !torch.int -> !torch.int
    %int0_48702 = torch.constant.int 0
    %int0_48703 = torch.constant.int 0
    %int1_48704 = torch.constant.int 1
    %51315 = torch.aten.slice.Tensor %51279, %int0_48702, %int0_48703, %51314, %int1_48704 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51315, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48705 = torch.constant.int 1
    %int0_48706 = torch.constant.int 0
    %int9223372036854775807_48707 = torch.constant.int 9223372036854775807
    %int1_48708 = torch.constant.int 1
    %51316 = torch.aten.slice.Tensor %51315, %int1_48705, %int0_48706, %int9223372036854775807_48707, %int1_48708 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51316, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48709 = torch.constant.int 0
    %51317 = torch.aten.unsqueeze %51316, %int0_48709 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51317, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48710 = torch.constant.int 2
    %51318 = torch.aten.unsqueeze %51317, %int2_48710 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51318, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48711 = torch.constant.int 3
    %int0_48712 = torch.constant.int 0
    %int9223372036854775807_48713 = torch.constant.int 9223372036854775807
    %int1_48714 = torch.constant.int 1
    %51319 = torch.aten.slice.Tensor %51318, %int3_48711, %int0_48712, %int9223372036854775807_48713, %int1_48714 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51319, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51320 = torch_c.to_builtin_tensor %51073 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48715 = arith.constant 1 : index
    %dim_48716 = tensor.dim %51320, %c1_48715 : tensor<4x?x1x128xf16>
    %51321 = flow.tensor.bitcast %51320 : tensor<4x?x1x128xf16>{%dim_48716} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48716}
    %51322 = torch_c.from_builtin_tensor %51321 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51323 = torch.aten.mul.Tensor %51322, %51319 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51324 = torch_c.to_builtin_tensor %51323 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48717 = arith.constant 1 : index
    %dim_48718 = tensor.dim %51324, %c1_48717 : tensor<4x?x1x64xcomplex<f32>>
    %51325 = flow.tensor.bitcast %51324 : tensor<4x?x1x64xcomplex<f32>>{%dim_48718} -> tensor<4x?x1x128xf32>{%dim_48718}
    %51326 = torch_c.from_builtin_tensor %51325 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48719 = torch.constant.int 5
    %51327 = torch.prims.convert_element_type %51326, %int5_48719 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48720 = torch.constant.int 1
    %51328 = torch.aten.size.int %50959, %int1_48720 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48721 = torch.constant.int 0
    %51329 = torch.aten.add.int %int0_48721, %51328 : !torch.int, !torch.int -> !torch.int
    %int0_48722 = torch.constant.int 0
    %int0_48723 = torch.constant.int 0
    %int1_48724 = torch.constant.int 1
    %51330 = torch.aten.slice.Tensor %51282, %int0_48722, %int0_48723, %51329, %int1_48724 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51330, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48725 = torch.constant.int 1
    %int0_48726 = torch.constant.int 0
    %int9223372036854775807_48727 = torch.constant.int 9223372036854775807
    %int1_48728 = torch.constant.int 1
    %51331 = torch.aten.slice.Tensor %51330, %int1_48725, %int0_48726, %int9223372036854775807_48727, %int1_48728 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51331, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48729 = torch.constant.int 0
    %51332 = torch.aten.unsqueeze %51331, %int0_48729 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51332, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48730 = torch.constant.int 2
    %51333 = torch.aten.unsqueeze %51332, %int2_48730 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51333, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48731 = torch.constant.int 3
    %int0_48732 = torch.constant.int 0
    %int9223372036854775807_48733 = torch.constant.int 9223372036854775807
    %int1_48734 = torch.constant.int 1
    %51334 = torch.aten.slice.Tensor %51333, %int3_48731, %int0_48732, %int9223372036854775807_48733, %int1_48734 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51334, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51335 = torch_c.to_builtin_tensor %51075 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48735 = arith.constant 1 : index
    %dim_48736 = tensor.dim %51335, %c1_48735 : tensor<4x?x1x128xf16>
    %51336 = flow.tensor.bitcast %51335 : tensor<4x?x1x128xf16>{%dim_48736} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48736}
    %51337 = torch_c.from_builtin_tensor %51336 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51338 = torch.aten.mul.Tensor %51337, %51334 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51339 = torch_c.to_builtin_tensor %51338 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48737 = arith.constant 1 : index
    %dim_48738 = tensor.dim %51339, %c1_48737 : tensor<4x?x1x64xcomplex<f32>>
    %51340 = flow.tensor.bitcast %51339 : tensor<4x?x1x64xcomplex<f32>>{%dim_48738} -> tensor<4x?x1x128xf32>{%dim_48738}
    %51341 = torch_c.from_builtin_tensor %51340 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48739 = torch.constant.int 5
    %51342 = torch.prims.convert_element_type %51341, %int5_48739 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48740 = torch.constant.int 1
    %51343 = torch.aten.size.int %50965, %int1_48740 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48741 = torch.constant.int 0
    %51344 = torch.aten.add.int %int0_48741, %51343 : !torch.int, !torch.int -> !torch.int
    %int0_48742 = torch.constant.int 0
    %int0_48743 = torch.constant.int 0
    %int1_48744 = torch.constant.int 1
    %51345 = torch.aten.slice.Tensor %51285, %int0_48742, %int0_48743, %51344, %int1_48744 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51345, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48745 = torch.constant.int 1
    %int0_48746 = torch.constant.int 0
    %int9223372036854775807_48747 = torch.constant.int 9223372036854775807
    %int1_48748 = torch.constant.int 1
    %51346 = torch.aten.slice.Tensor %51345, %int1_48745, %int0_48746, %int9223372036854775807_48747, %int1_48748 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51346, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48749 = torch.constant.int 0
    %51347 = torch.aten.unsqueeze %51346, %int0_48749 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51347, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48750 = torch.constant.int 2
    %51348 = torch.aten.unsqueeze %51347, %int2_48750 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51348, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48751 = torch.constant.int 3
    %int0_48752 = torch.constant.int 0
    %int9223372036854775807_48753 = torch.constant.int 9223372036854775807
    %int1_48754 = torch.constant.int 1
    %51349 = torch.aten.slice.Tensor %51348, %int3_48751, %int0_48752, %int9223372036854775807_48753, %int1_48754 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51349, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51350 = torch_c.to_builtin_tensor %51077 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48755 = arith.constant 1 : index
    %dim_48756 = tensor.dim %51350, %c1_48755 : tensor<4x?x1x128xf16>
    %51351 = flow.tensor.bitcast %51350 : tensor<4x?x1x128xf16>{%dim_48756} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48756}
    %51352 = torch_c.from_builtin_tensor %51351 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51353 = torch.aten.mul.Tensor %51352, %51349 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51354 = torch_c.to_builtin_tensor %51353 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48757 = arith.constant 1 : index
    %dim_48758 = tensor.dim %51354, %c1_48757 : tensor<4x?x1x64xcomplex<f32>>
    %51355 = flow.tensor.bitcast %51354 : tensor<4x?x1x64xcomplex<f32>>{%dim_48758} -> tensor<4x?x1x128xf32>{%dim_48758}
    %51356 = torch_c.from_builtin_tensor %51355 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48759 = torch.constant.int 5
    %51357 = torch.prims.convert_element_type %51356, %int5_48759 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48760 = torch.constant.int 1
    %51358 = torch.aten.size.int %50971, %int1_48760 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48761 = torch.constant.int 0
    %51359 = torch.aten.add.int %int0_48761, %51358 : !torch.int, !torch.int -> !torch.int
    %int0_48762 = torch.constant.int 0
    %int0_48763 = torch.constant.int 0
    %int1_48764 = torch.constant.int 1
    %51360 = torch.aten.slice.Tensor %51288, %int0_48762, %int0_48763, %51359, %int1_48764 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51360, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48765 = torch.constant.int 1
    %int0_48766 = torch.constant.int 0
    %int9223372036854775807_48767 = torch.constant.int 9223372036854775807
    %int1_48768 = torch.constant.int 1
    %51361 = torch.aten.slice.Tensor %51360, %int1_48765, %int0_48766, %int9223372036854775807_48767, %int1_48768 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51361, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48769 = torch.constant.int 0
    %51362 = torch.aten.unsqueeze %51361, %int0_48769 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51362, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48770 = torch.constant.int 2
    %51363 = torch.aten.unsqueeze %51362, %int2_48770 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51363, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48771 = torch.constant.int 3
    %int0_48772 = torch.constant.int 0
    %int9223372036854775807_48773 = torch.constant.int 9223372036854775807
    %int1_48774 = torch.constant.int 1
    %51364 = torch.aten.slice.Tensor %51363, %int3_48771, %int0_48772, %int9223372036854775807_48773, %int1_48774 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51364, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51365 = torch_c.to_builtin_tensor %51079 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48775 = arith.constant 1 : index
    %dim_48776 = tensor.dim %51365, %c1_48775 : tensor<4x?x1x128xf16>
    %51366 = flow.tensor.bitcast %51365 : tensor<4x?x1x128xf16>{%dim_48776} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48776}
    %51367 = torch_c.from_builtin_tensor %51366 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51368 = torch.aten.mul.Tensor %51367, %51364 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51369 = torch_c.to_builtin_tensor %51368 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48777 = arith.constant 1 : index
    %dim_48778 = tensor.dim %51369, %c1_48777 : tensor<4x?x1x64xcomplex<f32>>
    %51370 = flow.tensor.bitcast %51369 : tensor<4x?x1x64xcomplex<f32>>{%dim_48778} -> tensor<4x?x1x128xf32>{%dim_48778}
    %51371 = torch_c.from_builtin_tensor %51370 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48779 = torch.constant.int 5
    %51372 = torch.prims.convert_element_type %51371, %int5_48779 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48780 = torch.constant.int 1
    %51373 = torch.aten.size.int %50977, %int1_48780 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48781 = torch.constant.int 0
    %51374 = torch.aten.add.int %int0_48781, %51373 : !torch.int, !torch.int -> !torch.int
    %int0_48782 = torch.constant.int 0
    %int0_48783 = torch.constant.int 0
    %int1_48784 = torch.constant.int 1
    %51375 = torch.aten.slice.Tensor %51291, %int0_48782, %int0_48783, %51374, %int1_48784 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51375, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48785 = torch.constant.int 1
    %int0_48786 = torch.constant.int 0
    %int9223372036854775807_48787 = torch.constant.int 9223372036854775807
    %int1_48788 = torch.constant.int 1
    %51376 = torch.aten.slice.Tensor %51375, %int1_48785, %int0_48786, %int9223372036854775807_48787, %int1_48788 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51376, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48789 = torch.constant.int 0
    %51377 = torch.aten.unsqueeze %51376, %int0_48789 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51377, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48790 = torch.constant.int 2
    %51378 = torch.aten.unsqueeze %51377, %int2_48790 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51378, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48791 = torch.constant.int 3
    %int0_48792 = torch.constant.int 0
    %int9223372036854775807_48793 = torch.constant.int 9223372036854775807
    %int1_48794 = torch.constant.int 1
    %51379 = torch.aten.slice.Tensor %51378, %int3_48791, %int0_48792, %int9223372036854775807_48793, %int1_48794 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51379, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51380 = torch_c.to_builtin_tensor %51081 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48795 = arith.constant 1 : index
    %dim_48796 = tensor.dim %51380, %c1_48795 : tensor<4x?x1x128xf16>
    %51381 = flow.tensor.bitcast %51380 : tensor<4x?x1x128xf16>{%dim_48796} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48796}
    %51382 = torch_c.from_builtin_tensor %51381 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51383 = torch.aten.mul.Tensor %51382, %51379 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51384 = torch_c.to_builtin_tensor %51383 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48797 = arith.constant 1 : index
    %dim_48798 = tensor.dim %51384, %c1_48797 : tensor<4x?x1x64xcomplex<f32>>
    %51385 = flow.tensor.bitcast %51384 : tensor<4x?x1x64xcomplex<f32>>{%dim_48798} -> tensor<4x?x1x128xf32>{%dim_48798}
    %51386 = torch_c.from_builtin_tensor %51385 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48799 = torch.constant.int 5
    %51387 = torch.prims.convert_element_type %51386, %int5_48799 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48800 = torch.constant.int 1
    %51388 = torch.aten.size.int %50983, %int1_48800 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48801 = torch.constant.int 0
    %51389 = torch.aten.add.int %int0_48801, %51388 : !torch.int, !torch.int -> !torch.int
    %int0_48802 = torch.constant.int 0
    %int0_48803 = torch.constant.int 0
    %int1_48804 = torch.constant.int 1
    %51390 = torch.aten.slice.Tensor %51294, %int0_48802, %int0_48803, %51389, %int1_48804 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51390, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48805 = torch.constant.int 1
    %int0_48806 = torch.constant.int 0
    %int9223372036854775807_48807 = torch.constant.int 9223372036854775807
    %int1_48808 = torch.constant.int 1
    %51391 = torch.aten.slice.Tensor %51390, %int1_48805, %int0_48806, %int9223372036854775807_48807, %int1_48808 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51391, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48809 = torch.constant.int 0
    %51392 = torch.aten.unsqueeze %51391, %int0_48809 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51392, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48810 = torch.constant.int 2
    %51393 = torch.aten.unsqueeze %51392, %int2_48810 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51393, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48811 = torch.constant.int 3
    %int0_48812 = torch.constant.int 0
    %int9223372036854775807_48813 = torch.constant.int 9223372036854775807
    %int1_48814 = torch.constant.int 1
    %51394 = torch.aten.slice.Tensor %51393, %int3_48811, %int0_48812, %int9223372036854775807_48813, %int1_48814 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51394, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51395 = torch_c.to_builtin_tensor %51083 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48815 = arith.constant 1 : index
    %dim_48816 = tensor.dim %51395, %c1_48815 : tensor<4x?x1x128xf16>
    %51396 = flow.tensor.bitcast %51395 : tensor<4x?x1x128xf16>{%dim_48816} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48816}
    %51397 = torch_c.from_builtin_tensor %51396 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51398 = torch.aten.mul.Tensor %51397, %51394 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51399 = torch_c.to_builtin_tensor %51398 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48817 = arith.constant 1 : index
    %dim_48818 = tensor.dim %51399, %c1_48817 : tensor<4x?x1x64xcomplex<f32>>
    %51400 = flow.tensor.bitcast %51399 : tensor<4x?x1x64xcomplex<f32>>{%dim_48818} -> tensor<4x?x1x128xf32>{%dim_48818}
    %51401 = torch_c.from_builtin_tensor %51400 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48819 = torch.constant.int 5
    %51402 = torch.prims.convert_element_type %51401, %int5_48819 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_48820 = torch.constant.int 1
    %51403 = torch.aten.size.int %50989, %int1_48820 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_48821 = torch.constant.int 0
    %51404 = torch.aten.add.int %int0_48821, %51403 : !torch.int, !torch.int -> !torch.int
    %int0_48822 = torch.constant.int 0
    %int0_48823 = torch.constant.int 0
    %int1_48824 = torch.constant.int 1
    %51405 = torch.aten.slice.Tensor %51297, %int0_48822, %int0_48823, %51404, %int1_48824 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51405, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_48825 = torch.constant.int 1
    %int0_48826 = torch.constant.int 0
    %int9223372036854775807_48827 = torch.constant.int 9223372036854775807
    %int1_48828 = torch.constant.int 1
    %51406 = torch.aten.slice.Tensor %51405, %int1_48825, %int0_48826, %int9223372036854775807_48827, %int1_48828 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %51406, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_48829 = torch.constant.int 0
    %51407 = torch.aten.unsqueeze %51406, %int0_48829 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %51407, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_48830 = torch.constant.int 2
    %51408 = torch.aten.unsqueeze %51407, %int2_48830 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51408, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_48831 = torch.constant.int 3
    %int0_48832 = torch.constant.int 0
    %int9223372036854775807_48833 = torch.constant.int 9223372036854775807
    %int1_48834 = torch.constant.int 1
    %51409 = torch.aten.slice.Tensor %51408, %int3_48831, %int0_48832, %int9223372036854775807_48833, %int1_48834 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51409, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %51410 = torch_c.to_builtin_tensor %51085 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_48835 = arith.constant 1 : index
    %dim_48836 = tensor.dim %51410, %c1_48835 : tensor<4x?x1x128xf16>
    %51411 = flow.tensor.bitcast %51410 : tensor<4x?x1x128xf16>{%dim_48836} -> tensor<4x?x1x64xcomplex<f16>>{%dim_48836}
    %51412 = torch_c.from_builtin_tensor %51411 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %51412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %51413 = torch.aten.mul.Tensor %51412, %51409 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %51413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %51414 = torch_c.to_builtin_tensor %51413 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_48837 = arith.constant 1 : index
    %dim_48838 = tensor.dim %51414, %c1_48837 : tensor<4x?x1x64xcomplex<f32>>
    %51415 = flow.tensor.bitcast %51414 : tensor<4x?x1x64xcomplex<f32>>{%dim_48838} -> tensor<4x?x1x128xf32>{%dim_48838}
    %51416 = torch_c.from_builtin_tensor %51415 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %51416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_48839 = torch.constant.int 5
    %51417 = torch.prims.convert_element_type %51416, %int5_48839 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %51417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_48840 = torch.constant.int 64
    %51418 = torch.aten.mul.Scalar %2364, %int64_48840 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51418, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48841 = torch.constant.int 64
    %51419 = torch.aten.mul.Scalar %2367, %int64_48841 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51419, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48842 = torch.constant.int 64
    %51420 = torch.aten.mul.Scalar %2370, %int64_48842 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51420, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48843 = torch.constant.int 64
    %51421 = torch.aten.mul.Scalar %2373, %int64_48843 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51421, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48844 = torch.constant.int 64
    %51422 = torch.aten.mul.Scalar %2376, %int64_48844 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51422, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48845 = torch.constant.int 64
    %51423 = torch.aten.mul.Scalar %2379, %int64_48845 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51423, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48846 = torch.constant.int 64
    %51424 = torch.aten.mul.Scalar %2382, %int64_48846 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51424, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_48847 = torch.constant.int 64
    %51425 = torch.aten.mul.Scalar %2385, %int64_48847 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51425, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52 = torch.constant.int 52
    %int1_48848 = torch.constant.int 1
    %51426 = torch.aten.add.Scalar %51418, %int52, %int1_48848 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51426, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48849 = torch.constant.int 52
    %int1_48850 = torch.constant.int 1
    %51427 = torch.aten.add.Scalar %51419, %int52_48849, %int1_48850 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51427, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48851 = torch.constant.int 52
    %int1_48852 = torch.constant.int 1
    %51428 = torch.aten.add.Scalar %51420, %int52_48851, %int1_48852 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51428, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48853 = torch.constant.int 52
    %int1_48854 = torch.constant.int 1
    %51429 = torch.aten.add.Scalar %51421, %int52_48853, %int1_48854 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51429, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48855 = torch.constant.int 52
    %int1_48856 = torch.constant.int 1
    %51430 = torch.aten.add.Scalar %51422, %int52_48855, %int1_48856 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51430, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48857 = torch.constant.int 52
    %int1_48858 = torch.constant.int 1
    %51431 = torch.aten.add.Scalar %51423, %int52_48857, %int1_48858 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51431, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48859 = torch.constant.int 52
    %int1_48860 = torch.constant.int 1
    %51432 = torch.aten.add.Scalar %51424, %int52_48859, %int1_48860 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51432, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int52_48861 = torch.constant.int 52
    %int1_48862 = torch.constant.int 1
    %51433 = torch.aten.add.Scalar %51425, %int52_48861, %int1_48862 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51433, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_48863 = torch.constant.int 4
    %int16_48864 = torch.constant.int 16
    %int1_48865 = torch.constant.int 1
    %int128_48866 = torch.constant.int 128
    %51434 = torch.prim.ListConstruct %int4_48863, %3095, %int16_48864, %int1_48865, %int128_48866 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51435 = torch.aten.view %51312, %51434 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51435, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48867 = torch.constant.int 4
    %int16_48868 = torch.constant.int 16
    %int1_48869 = torch.constant.int 1
    %int128_48870 = torch.constant.int 128
    %51436 = torch.prim.ListConstruct %int4_48867, %3095, %int16_48868, %int1_48869, %int128_48870 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51437 = torch.aten.view %51327, %51436 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51437, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48871 = torch.constant.int 4
    %int16_48872 = torch.constant.int 16
    %int1_48873 = torch.constant.int 1
    %int128_48874 = torch.constant.int 128
    %51438 = torch.prim.ListConstruct %int4_48871, %3095, %int16_48872, %int1_48873, %int128_48874 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51439 = torch.aten.view %51342, %51438 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51439, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48875 = torch.constant.int 4
    %int16_48876 = torch.constant.int 16
    %int1_48877 = torch.constant.int 1
    %int128_48878 = torch.constant.int 128
    %51440 = torch.prim.ListConstruct %int4_48875, %3095, %int16_48876, %int1_48877, %int128_48878 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51441 = torch.aten.view %51357, %51440 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51441, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48879 = torch.constant.int 4
    %int16_48880 = torch.constant.int 16
    %int1_48881 = torch.constant.int 1
    %int128_48882 = torch.constant.int 128
    %51442 = torch.prim.ListConstruct %int4_48879, %3095, %int16_48880, %int1_48881, %int128_48882 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51443 = torch.aten.view %51372, %51442 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51443, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48883 = torch.constant.int 4
    %int16_48884 = torch.constant.int 16
    %int1_48885 = torch.constant.int 1
    %int128_48886 = torch.constant.int 128
    %51444 = torch.prim.ListConstruct %int4_48883, %3095, %int16_48884, %int1_48885, %int128_48886 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51445 = torch.aten.view %51387, %51444 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51445, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48887 = torch.constant.int 4
    %int16_48888 = torch.constant.int 16
    %int1_48889 = torch.constant.int 1
    %int128_48890 = torch.constant.int 128
    %51446 = torch.prim.ListConstruct %int4_48887, %3095, %int16_48888, %int1_48889, %int128_48890 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51447 = torch.aten.view %51402, %51446 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51447, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48891 = torch.constant.int 4
    %int16_48892 = torch.constant.int 16
    %int1_48893 = torch.constant.int 1
    %int128_48894 = torch.constant.int 128
    %51448 = torch.prim.ListConstruct %int4_48891, %3095, %int16_48892, %int1_48893, %int128_48894 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51449 = torch.aten.view %51417, %51448 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51449, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48895 = torch.constant.int 4
    %51450 = torch.aten.mul.int %int4_48895, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48896 = torch.constant.int 16
    %int1_48897 = torch.constant.int 1
    %int128_48898 = torch.constant.int 128
    %51451 = torch.prim.ListConstruct %51450, %int16_48896, %int1_48897, %int128_48898 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51452 = torch.aten.view %51435, %51451 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51452, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48899 = torch.constant.int 4
    %51453 = torch.aten.mul.int %int4_48899, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48900 = torch.constant.int 16
    %int1_48901 = torch.constant.int 1
    %int128_48902 = torch.constant.int 128
    %51454 = torch.prim.ListConstruct %51453, %int16_48900, %int1_48901, %int128_48902 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51455 = torch.aten.view %51437, %51454 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51455, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48903 = torch.constant.int 4
    %51456 = torch.aten.mul.int %int4_48903, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48904 = torch.constant.int 16
    %int1_48905 = torch.constant.int 1
    %int128_48906 = torch.constant.int 128
    %51457 = torch.prim.ListConstruct %51456, %int16_48904, %int1_48905, %int128_48906 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51458 = torch.aten.view %51439, %51457 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51458, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48907 = torch.constant.int 4
    %51459 = torch.aten.mul.int %int4_48907, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48908 = torch.constant.int 16
    %int1_48909 = torch.constant.int 1
    %int128_48910 = torch.constant.int 128
    %51460 = torch.prim.ListConstruct %51459, %int16_48908, %int1_48909, %int128_48910 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51461 = torch.aten.view %51441, %51460 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51461, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48911 = torch.constant.int 4
    %51462 = torch.aten.mul.int %int4_48911, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48912 = torch.constant.int 16
    %int1_48913 = torch.constant.int 1
    %int128_48914 = torch.constant.int 128
    %51463 = torch.prim.ListConstruct %51462, %int16_48912, %int1_48913, %int128_48914 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51464 = torch.aten.view %51443, %51463 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51464, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48915 = torch.constant.int 4
    %51465 = torch.aten.mul.int %int4_48915, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48916 = torch.constant.int 16
    %int1_48917 = torch.constant.int 1
    %int128_48918 = torch.constant.int 128
    %51466 = torch.prim.ListConstruct %51465, %int16_48916, %int1_48917, %int128_48918 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51467 = torch.aten.view %51445, %51466 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51467, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48919 = torch.constant.int 4
    %51468 = torch.aten.mul.int %int4_48919, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48920 = torch.constant.int 16
    %int1_48921 = torch.constant.int 1
    %int128_48922 = torch.constant.int 128
    %51469 = torch.prim.ListConstruct %51468, %int16_48920, %int1_48921, %int128_48922 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51470 = torch.aten.view %51447, %51469 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51470, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48923 = torch.constant.int 4
    %51471 = torch.aten.mul.int %int4_48923, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48924 = torch.constant.int 16
    %int1_48925 = torch.constant.int 1
    %int128_48926 = torch.constant.int 128
    %51472 = torch.prim.ListConstruct %51471, %int16_48924, %int1_48925, %int128_48926 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51473 = torch.aten.view %51449, %51472 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51473, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48927 = torch.constant.int 4
    %51474 = torch.aten.mul.int %int4_48927, %3095 : !torch.int, !torch.int -> !torch.int
    %51475 = torch.prim.ListConstruct %51474 : (!torch.int) -> !torch.list<int>
    %51476 = torch.aten.view %51426, %51475 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51476, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48928 = torch.constant.int 4
    %51477 = torch.aten.mul.int %int4_48928, %3095 : !torch.int, !torch.int -> !torch.int
    %51478 = torch.prim.ListConstruct %51477 : (!torch.int) -> !torch.list<int>
    %51479 = torch.aten.view %51427, %51478 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51479, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48929 = torch.constant.int 4
    %51480 = torch.aten.mul.int %int4_48929, %3095 : !torch.int, !torch.int -> !torch.int
    %51481 = torch.prim.ListConstruct %51480 : (!torch.int) -> !torch.list<int>
    %51482 = torch.aten.view %51428, %51481 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51482, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48930 = torch.constant.int 4
    %51483 = torch.aten.mul.int %int4_48930, %3095 : !torch.int, !torch.int -> !torch.int
    %51484 = torch.prim.ListConstruct %51483 : (!torch.int) -> !torch.list<int>
    %51485 = torch.aten.view %51429, %51484 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51485, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48931 = torch.constant.int 4
    %51486 = torch.aten.mul.int %int4_48931, %3095 : !torch.int, !torch.int -> !torch.int
    %51487 = torch.prim.ListConstruct %51486 : (!torch.int) -> !torch.list<int>
    %51488 = torch.aten.view %51430, %51487 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51488, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48932 = torch.constant.int 4
    %51489 = torch.aten.mul.int %int4_48932, %3095 : !torch.int, !torch.int -> !torch.int
    %51490 = torch.prim.ListConstruct %51489 : (!torch.int) -> !torch.list<int>
    %51491 = torch.aten.view %51431, %51490 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51491, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48933 = torch.constant.int 4
    %51492 = torch.aten.mul.int %int4_48933, %3095 : !torch.int, !torch.int -> !torch.int
    %51493 = torch.prim.ListConstruct %51492 : (!torch.int) -> !torch.list<int>
    %51494 = torch.aten.view %51432, %51493 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51494, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48934 = torch.constant.int 4
    %51495 = torch.aten.mul.int %int4_48934, %3095 : !torch.int, !torch.int -> !torch.int
    %51496 = torch.prim.ListConstruct %51495 : (!torch.int) -> !torch.list<int>
    %51497 = torch.aten.view %51433, %51496 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51497, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_48935 = torch.constant.int 4
    %int16_48936 = torch.constant.int 16
    %int1_48937 = torch.constant.int 1
    %int128_48938 = torch.constant.int 128
    %51498 = torch.prim.ListConstruct %int4_48935, %3095, %int16_48936, %int1_48937, %int128_48938 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51499 = torch.aten.view %51087, %51498 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51499, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48939 = torch.constant.int 4
    %int16_48940 = torch.constant.int 16
    %int1_48941 = torch.constant.int 1
    %int128_48942 = torch.constant.int 128
    %51500 = torch.prim.ListConstruct %int4_48939, %3095, %int16_48940, %int1_48941, %int128_48942 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51501 = torch.aten.view %51089, %51500 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51501, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48943 = torch.constant.int 4
    %int16_48944 = torch.constant.int 16
    %int1_48945 = torch.constant.int 1
    %int128_48946 = torch.constant.int 128
    %51502 = torch.prim.ListConstruct %int4_48943, %3095, %int16_48944, %int1_48945, %int128_48946 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51503 = torch.aten.view %51091, %51502 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51503, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48947 = torch.constant.int 4
    %int16_48948 = torch.constant.int 16
    %int1_48949 = torch.constant.int 1
    %int128_48950 = torch.constant.int 128
    %51504 = torch.prim.ListConstruct %int4_48947, %3095, %int16_48948, %int1_48949, %int128_48950 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51505 = torch.aten.view %51093, %51504 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51505, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48951 = torch.constant.int 4
    %int16_48952 = torch.constant.int 16
    %int1_48953 = torch.constant.int 1
    %int128_48954 = torch.constant.int 128
    %51506 = torch.prim.ListConstruct %int4_48951, %3095, %int16_48952, %int1_48953, %int128_48954 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51507 = torch.aten.view %51095, %51506 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51507, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48955 = torch.constant.int 4
    %int16_48956 = torch.constant.int 16
    %int1_48957 = torch.constant.int 1
    %int128_48958 = torch.constant.int 128
    %51508 = torch.prim.ListConstruct %int4_48955, %3095, %int16_48956, %int1_48957, %int128_48958 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51509 = torch.aten.view %51097, %51508 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51509, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48959 = torch.constant.int 4
    %int16_48960 = torch.constant.int 16
    %int1_48961 = torch.constant.int 1
    %int128_48962 = torch.constant.int 128
    %51510 = torch.prim.ListConstruct %int4_48959, %3095, %int16_48960, %int1_48961, %int128_48962 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51511 = torch.aten.view %51099, %51510 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51511, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48963 = torch.constant.int 4
    %int16_48964 = torch.constant.int 16
    %int1_48965 = torch.constant.int 1
    %int128_48966 = torch.constant.int 128
    %51512 = torch.prim.ListConstruct %int4_48963, %3095, %int16_48964, %int1_48965, %int128_48966 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51513 = torch.aten.view %51101, %51512 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %51513, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_48967 = torch.constant.int 4
    %51514 = torch.aten.mul.int %int4_48967, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48968 = torch.constant.int 16
    %int1_48969 = torch.constant.int 1
    %int128_48970 = torch.constant.int 128
    %51515 = torch.prim.ListConstruct %51514, %int16_48968, %int1_48969, %int128_48970 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51516 = torch.aten.view %51499, %51515 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51516, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48971 = torch.constant.int 4
    %51517 = torch.aten.mul.int %int4_48971, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48972 = torch.constant.int 16
    %int1_48973 = torch.constant.int 1
    %int128_48974 = torch.constant.int 128
    %51518 = torch.prim.ListConstruct %51517, %int16_48972, %int1_48973, %int128_48974 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51519 = torch.aten.view %51501, %51518 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51519, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48975 = torch.constant.int 4
    %51520 = torch.aten.mul.int %int4_48975, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48976 = torch.constant.int 16
    %int1_48977 = torch.constant.int 1
    %int128_48978 = torch.constant.int 128
    %51521 = torch.prim.ListConstruct %51520, %int16_48976, %int1_48977, %int128_48978 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51522 = torch.aten.view %51503, %51521 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51522, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48979 = torch.constant.int 4
    %51523 = torch.aten.mul.int %int4_48979, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48980 = torch.constant.int 16
    %int1_48981 = torch.constant.int 1
    %int128_48982 = torch.constant.int 128
    %51524 = torch.prim.ListConstruct %51523, %int16_48980, %int1_48981, %int128_48982 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51525 = torch.aten.view %51505, %51524 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51525, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48983 = torch.constant.int 4
    %51526 = torch.aten.mul.int %int4_48983, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48984 = torch.constant.int 16
    %int1_48985 = torch.constant.int 1
    %int128_48986 = torch.constant.int 128
    %51527 = torch.prim.ListConstruct %51526, %int16_48984, %int1_48985, %int128_48986 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51528 = torch.aten.view %51507, %51527 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51528, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48987 = torch.constant.int 4
    %51529 = torch.aten.mul.int %int4_48987, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48988 = torch.constant.int 16
    %int1_48989 = torch.constant.int 1
    %int128_48990 = torch.constant.int 128
    %51530 = torch.prim.ListConstruct %51529, %int16_48988, %int1_48989, %int128_48990 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51531 = torch.aten.view %51509, %51530 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51531, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48991 = torch.constant.int 4
    %51532 = torch.aten.mul.int %int4_48991, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48992 = torch.constant.int 16
    %int1_48993 = torch.constant.int 1
    %int128_48994 = torch.constant.int 128
    %51533 = torch.prim.ListConstruct %51532, %int16_48992, %int1_48993, %int128_48994 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51534 = torch.aten.view %51511, %51533 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51534, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_48995 = torch.constant.int 4
    %51535 = torch.aten.mul.int %int4_48995, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_48996 = torch.constant.int 16
    %int1_48997 = torch.constant.int 1
    %int128_48998 = torch.constant.int 128
    %51536 = torch.prim.ListConstruct %51535, %int16_48996, %int1_48997, %int128_48998 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51537 = torch.aten.view %51513, %51536 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51537, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_48999 = torch.constant.int 1
    %int1_49000 = torch.constant.int 1
    %51538 = torch.aten.add.Scalar %51426, %int1_48999, %int1_49000 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51538, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49001 = torch.constant.int 1
    %int1_49002 = torch.constant.int 1
    %51539 = torch.aten.add.Scalar %51427, %int1_49001, %int1_49002 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51539, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49003 = torch.constant.int 1
    %int1_49004 = torch.constant.int 1
    %51540 = torch.aten.add.Scalar %51428, %int1_49003, %int1_49004 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51540, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49005 = torch.constant.int 1
    %int1_49006 = torch.constant.int 1
    %51541 = torch.aten.add.Scalar %51429, %int1_49005, %int1_49006 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51541, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49007 = torch.constant.int 1
    %int1_49008 = torch.constant.int 1
    %51542 = torch.aten.add.Scalar %51430, %int1_49007, %int1_49008 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51542, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49009 = torch.constant.int 1
    %int1_49010 = torch.constant.int 1
    %51543 = torch.aten.add.Scalar %51431, %int1_49009, %int1_49010 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51543, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49011 = torch.constant.int 1
    %int1_49012 = torch.constant.int 1
    %51544 = torch.aten.add.Scalar %51432, %int1_49011, %int1_49012 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51544, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_49013 = torch.constant.int 1
    %int1_49014 = torch.constant.int 1
    %51545 = torch.aten.add.Scalar %51433, %int1_49013, %int1_49014 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %51545, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_49015 = torch.constant.int 4
    %51546 = torch.aten.mul.int %int4_49015, %3095 : !torch.int, !torch.int -> !torch.int
    %51547 = torch.prim.ListConstruct %51546 : (!torch.int) -> !torch.list<int>
    %51548 = torch.aten.view %51538, %51547 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51548, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49016 = torch.constant.int 4
    %51549 = torch.aten.mul.int %int4_49016, %3095 : !torch.int, !torch.int -> !torch.int
    %51550 = torch.prim.ListConstruct %51549 : (!torch.int) -> !torch.list<int>
    %51551 = torch.aten.view %51539, %51550 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51551, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49017 = torch.constant.int 4
    %51552 = torch.aten.mul.int %int4_49017, %3095 : !torch.int, !torch.int -> !torch.int
    %51553 = torch.prim.ListConstruct %51552 : (!torch.int) -> !torch.list<int>
    %51554 = torch.aten.view %51540, %51553 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51554, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49018 = torch.constant.int 4
    %51555 = torch.aten.mul.int %int4_49018, %3095 : !torch.int, !torch.int -> !torch.int
    %51556 = torch.prim.ListConstruct %51555 : (!torch.int) -> !torch.list<int>
    %51557 = torch.aten.view %51541, %51556 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51557, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49019 = torch.constant.int 4
    %51558 = torch.aten.mul.int %int4_49019, %3095 : !torch.int, !torch.int -> !torch.int
    %51559 = torch.prim.ListConstruct %51558 : (!torch.int) -> !torch.list<int>
    %51560 = torch.aten.view %51542, %51559 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51560, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49020 = torch.constant.int 4
    %51561 = torch.aten.mul.int %int4_49020, %3095 : !torch.int, !torch.int -> !torch.int
    %51562 = torch.prim.ListConstruct %51561 : (!torch.int) -> !torch.list<int>
    %51563 = torch.aten.view %51543, %51562 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51563, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49021 = torch.constant.int 4
    %51564 = torch.aten.mul.int %int4_49021, %3095 : !torch.int, !torch.int -> !torch.int
    %51565 = torch.prim.ListConstruct %51564 : (!torch.int) -> !torch.list<int>
    %51566 = torch.aten.view %51544, %51565 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51566, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_49022 = torch.constant.int 4
    %51567 = torch.aten.mul.int %int4_49022, %3095 : !torch.int, !torch.int -> !torch.int
    %51568 = torch.prim.ListConstruct %51567 : (!torch.int) -> !torch.list<int>
    %51569 = torch.aten.view %51545, %51568 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51569, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %51570 = torch.prim.ListConstruct %51476, %51548 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49023 = torch.constant.int 0
    %51571 = torch.aten.cat %51570, %int0_49023 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51571, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51572 = torch.prim.ListConstruct %51479, %51551 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49024 = torch.constant.int 0
    %51573 = torch.aten.cat %51572, %int0_49024 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51573, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51574 = torch.prim.ListConstruct %51482, %51554 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49025 = torch.constant.int 0
    %51575 = torch.aten.cat %51574, %int0_49025 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51575, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51576 = torch.prim.ListConstruct %51485, %51557 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49026 = torch.constant.int 0
    %51577 = torch.aten.cat %51576, %int0_49026 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51577, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51578 = torch.prim.ListConstruct %51488, %51560 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49027 = torch.constant.int 0
    %51579 = torch.aten.cat %51578, %int0_49027 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51579, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51580 = torch.prim.ListConstruct %51491, %51563 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49028 = torch.constant.int 0
    %51581 = torch.aten.cat %51580, %int0_49028 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51581, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51582 = torch.prim.ListConstruct %51494, %51566 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49029 = torch.constant.int 0
    %51583 = torch.aten.cat %51582, %int0_49029 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51583, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51584 = torch.prim.ListConstruct %51497, %51569 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_49030 = torch.constant.int 0
    %51585 = torch.aten.cat %51584, %int0_49030 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %51585, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %51586 = torch.prim.ListConstruct %51452, %51516 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49031 = torch.constant.int 0
    %51587 = torch.aten.cat %51586, %int0_49031 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51587, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51588 = torch.prim.ListConstruct %51455, %51519 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49032 = torch.constant.int 0
    %51589 = torch.aten.cat %51588, %int0_49032 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51589, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51590 = torch.prim.ListConstruct %51458, %51522 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49033 = torch.constant.int 0
    %51591 = torch.aten.cat %51590, %int0_49033 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51591, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51592 = torch.prim.ListConstruct %51461, %51525 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49034 = torch.constant.int 0
    %51593 = torch.aten.cat %51592, %int0_49034 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51593, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51594 = torch.prim.ListConstruct %51464, %51528 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49035 = torch.constant.int 0
    %51595 = torch.aten.cat %51594, %int0_49035 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51595, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51596 = torch.prim.ListConstruct %51467, %51531 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49036 = torch.constant.int 0
    %51597 = torch.aten.cat %51596, %int0_49036 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51597, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51598 = torch.prim.ListConstruct %51470, %51534 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49037 = torch.constant.int 0
    %51599 = torch.aten.cat %51598, %int0_49037 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51599, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51600 = torch.prim.ListConstruct %51473, %51537 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_49038 = torch.constant.int 0
    %51601 = torch.aten.cat %51600, %int0_49038 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51601, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49039 = torch.constant.int 32
    %int2_49040 = torch.constant.int 2
    %int16_49041 = torch.constant.int 16
    %int1_49042 = torch.constant.int 1
    %int128_49043 = torch.constant.int 128
    %51602 = torch.prim.ListConstruct %3023, %int32_49039, %int2_49040, %int16_49041, %int1_49042, %int128_49043 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51603 = torch.aten.view %49752, %51602 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51603, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49044 = torch.constant.int 32
    %51604 = torch.aten.mul.int %3023, %int32_49044 : !torch.int, !torch.int -> !torch.int
    %int2_49045 = torch.constant.int 2
    %51605 = torch.aten.mul.int %51604, %int2_49045 : !torch.int, !torch.int -> !torch.int
    %int16_49046 = torch.constant.int 16
    %int1_49047 = torch.constant.int 1
    %int128_49048 = torch.constant.int 128
    %51606 = torch.prim.ListConstruct %51605, %int16_49046, %int1_49047, %int128_49048 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51607 = torch.aten.view %51603, %51606 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51607, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51608 = torch.prim.ListConstruct %51571 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49049 = torch.constant.bool false
    %51609 = torch.aten.index_put %51607, %51608, %51587, %false_49049 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51609, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49050 = torch.constant.int 32
    %int2_49051 = torch.constant.int 2
    %int16_49052 = torch.constant.int 16
    %int1_49053 = torch.constant.int 1
    %int128_49054 = torch.constant.int 128
    %51610 = torch.prim.ListConstruct %3023, %int32_49050, %int2_49051, %int16_49052, %int1_49053, %int128_49054 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51611 = torch.aten.view %51609, %51610 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51611, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49055 = torch.constant.int 131072
    %51612 = torch.prim.ListConstruct %3023, %int131072_49055 : (!torch.int, !torch.int) -> !torch.list<int>
    %51613 = torch.aten.view %51611, %51612 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51613, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49056 = torch.constant.int 32
    %int2_49057 = torch.constant.int 2
    %int16_49058 = torch.constant.int 16
    %int1_49059 = torch.constant.int 1
    %int128_49060 = torch.constant.int 128
    %51614 = torch.prim.ListConstruct %3026, %int32_49056, %int2_49057, %int16_49058, %int1_49059, %int128_49060 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51615 = torch.aten.view %49764, %51614 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51615, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49061 = torch.constant.int 32
    %51616 = torch.aten.mul.int %3026, %int32_49061 : !torch.int, !torch.int -> !torch.int
    %int2_49062 = torch.constant.int 2
    %51617 = torch.aten.mul.int %51616, %int2_49062 : !torch.int, !torch.int -> !torch.int
    %int16_49063 = torch.constant.int 16
    %int1_49064 = torch.constant.int 1
    %int128_49065 = torch.constant.int 128
    %51618 = torch.prim.ListConstruct %51617, %int16_49063, %int1_49064, %int128_49065 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51619 = torch.aten.view %51615, %51618 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51619, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51620 = torch.prim.ListConstruct %51573 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49066 = torch.constant.bool false
    %51621 = torch.aten.index_put %51619, %51620, %51589, %false_49066 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51621, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49067 = torch.constant.int 32
    %int2_49068 = torch.constant.int 2
    %int16_49069 = torch.constant.int 16
    %int1_49070 = torch.constant.int 1
    %int128_49071 = torch.constant.int 128
    %51622 = torch.prim.ListConstruct %3026, %int32_49067, %int2_49068, %int16_49069, %int1_49070, %int128_49071 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51623 = torch.aten.view %51621, %51622 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51623, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49072 = torch.constant.int 131072
    %51624 = torch.prim.ListConstruct %3026, %int131072_49072 : (!torch.int, !torch.int) -> !torch.list<int>
    %51625 = torch.aten.view %51623, %51624 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51625, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49073 = torch.constant.int 32
    %int2_49074 = torch.constant.int 2
    %int16_49075 = torch.constant.int 16
    %int1_49076 = torch.constant.int 1
    %int128_49077 = torch.constant.int 128
    %51626 = torch.prim.ListConstruct %3029, %int32_49073, %int2_49074, %int16_49075, %int1_49076, %int128_49077 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51627 = torch.aten.view %49776, %51626 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51627, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49078 = torch.constant.int 32
    %51628 = torch.aten.mul.int %3029, %int32_49078 : !torch.int, !torch.int -> !torch.int
    %int2_49079 = torch.constant.int 2
    %51629 = torch.aten.mul.int %51628, %int2_49079 : !torch.int, !torch.int -> !torch.int
    %int16_49080 = torch.constant.int 16
    %int1_49081 = torch.constant.int 1
    %int128_49082 = torch.constant.int 128
    %51630 = torch.prim.ListConstruct %51629, %int16_49080, %int1_49081, %int128_49082 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51631 = torch.aten.view %51627, %51630 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51631, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51632 = torch.prim.ListConstruct %51575 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49083 = torch.constant.bool false
    %51633 = torch.aten.index_put %51631, %51632, %51591, %false_49083 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51633, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49084 = torch.constant.int 32
    %int2_49085 = torch.constant.int 2
    %int16_49086 = torch.constant.int 16
    %int1_49087 = torch.constant.int 1
    %int128_49088 = torch.constant.int 128
    %51634 = torch.prim.ListConstruct %3029, %int32_49084, %int2_49085, %int16_49086, %int1_49087, %int128_49088 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51635 = torch.aten.view %51633, %51634 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51635, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49089 = torch.constant.int 131072
    %51636 = torch.prim.ListConstruct %3029, %int131072_49089 : (!torch.int, !torch.int) -> !torch.list<int>
    %51637 = torch.aten.view %51635, %51636 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51637, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49090 = torch.constant.int 32
    %int2_49091 = torch.constant.int 2
    %int16_49092 = torch.constant.int 16
    %int1_49093 = torch.constant.int 1
    %int128_49094 = torch.constant.int 128
    %51638 = torch.prim.ListConstruct %3032, %int32_49090, %int2_49091, %int16_49092, %int1_49093, %int128_49094 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51639 = torch.aten.view %49788, %51638 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51639, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49095 = torch.constant.int 32
    %51640 = torch.aten.mul.int %3032, %int32_49095 : !torch.int, !torch.int -> !torch.int
    %int2_49096 = torch.constant.int 2
    %51641 = torch.aten.mul.int %51640, %int2_49096 : !torch.int, !torch.int -> !torch.int
    %int16_49097 = torch.constant.int 16
    %int1_49098 = torch.constant.int 1
    %int128_49099 = torch.constant.int 128
    %51642 = torch.prim.ListConstruct %51641, %int16_49097, %int1_49098, %int128_49099 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51643 = torch.aten.view %51639, %51642 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51643, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51644 = torch.prim.ListConstruct %51577 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49100 = torch.constant.bool false
    %51645 = torch.aten.index_put %51643, %51644, %51593, %false_49100 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51645, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49101 = torch.constant.int 32
    %int2_49102 = torch.constant.int 2
    %int16_49103 = torch.constant.int 16
    %int1_49104 = torch.constant.int 1
    %int128_49105 = torch.constant.int 128
    %51646 = torch.prim.ListConstruct %3032, %int32_49101, %int2_49102, %int16_49103, %int1_49104, %int128_49105 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51647 = torch.aten.view %51645, %51646 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51647, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49106 = torch.constant.int 131072
    %51648 = torch.prim.ListConstruct %3032, %int131072_49106 : (!torch.int, !torch.int) -> !torch.list<int>
    %51649 = torch.aten.view %51647, %51648 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51649, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49107 = torch.constant.int 32
    %int2_49108 = torch.constant.int 2
    %int16_49109 = torch.constant.int 16
    %int1_49110 = torch.constant.int 1
    %int128_49111 = torch.constant.int 128
    %51650 = torch.prim.ListConstruct %3035, %int32_49107, %int2_49108, %int16_49109, %int1_49110, %int128_49111 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51651 = torch.aten.view %49800, %51650 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51651, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49112 = torch.constant.int 32
    %51652 = torch.aten.mul.int %3035, %int32_49112 : !torch.int, !torch.int -> !torch.int
    %int2_49113 = torch.constant.int 2
    %51653 = torch.aten.mul.int %51652, %int2_49113 : !torch.int, !torch.int -> !torch.int
    %int16_49114 = torch.constant.int 16
    %int1_49115 = torch.constant.int 1
    %int128_49116 = torch.constant.int 128
    %51654 = torch.prim.ListConstruct %51653, %int16_49114, %int1_49115, %int128_49116 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51655 = torch.aten.view %51651, %51654 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51655, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51656 = torch.prim.ListConstruct %51579 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49117 = torch.constant.bool false
    %51657 = torch.aten.index_put %51655, %51656, %51595, %false_49117 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51657, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49118 = torch.constant.int 32
    %int2_49119 = torch.constant.int 2
    %int16_49120 = torch.constant.int 16
    %int1_49121 = torch.constant.int 1
    %int128_49122 = torch.constant.int 128
    %51658 = torch.prim.ListConstruct %3035, %int32_49118, %int2_49119, %int16_49120, %int1_49121, %int128_49122 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51659 = torch.aten.view %51657, %51658 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51659, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49123 = torch.constant.int 131072
    %51660 = torch.prim.ListConstruct %3035, %int131072_49123 : (!torch.int, !torch.int) -> !torch.list<int>
    %51661 = torch.aten.view %51659, %51660 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51661, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49124 = torch.constant.int 32
    %int2_49125 = torch.constant.int 2
    %int16_49126 = torch.constant.int 16
    %int1_49127 = torch.constant.int 1
    %int128_49128 = torch.constant.int 128
    %51662 = torch.prim.ListConstruct %3038, %int32_49124, %int2_49125, %int16_49126, %int1_49127, %int128_49128 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51663 = torch.aten.view %49812, %51662 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51663, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49129 = torch.constant.int 32
    %51664 = torch.aten.mul.int %3038, %int32_49129 : !torch.int, !torch.int -> !torch.int
    %int2_49130 = torch.constant.int 2
    %51665 = torch.aten.mul.int %51664, %int2_49130 : !torch.int, !torch.int -> !torch.int
    %int16_49131 = torch.constant.int 16
    %int1_49132 = torch.constant.int 1
    %int128_49133 = torch.constant.int 128
    %51666 = torch.prim.ListConstruct %51665, %int16_49131, %int1_49132, %int128_49133 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51667 = torch.aten.view %51663, %51666 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51667, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51668 = torch.prim.ListConstruct %51581 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49134 = torch.constant.bool false
    %51669 = torch.aten.index_put %51667, %51668, %51597, %false_49134 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51669, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49135 = torch.constant.int 32
    %int2_49136 = torch.constant.int 2
    %int16_49137 = torch.constant.int 16
    %int1_49138 = torch.constant.int 1
    %int128_49139 = torch.constant.int 128
    %51670 = torch.prim.ListConstruct %3038, %int32_49135, %int2_49136, %int16_49137, %int1_49138, %int128_49139 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51671 = torch.aten.view %51669, %51670 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51671, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49140 = torch.constant.int 131072
    %51672 = torch.prim.ListConstruct %3038, %int131072_49140 : (!torch.int, !torch.int) -> !torch.list<int>
    %51673 = torch.aten.view %51671, %51672 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51673, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49141 = torch.constant.int 32
    %int2_49142 = torch.constant.int 2
    %int16_49143 = torch.constant.int 16
    %int1_49144 = torch.constant.int 1
    %int128_49145 = torch.constant.int 128
    %51674 = torch.prim.ListConstruct %3041, %int32_49141, %int2_49142, %int16_49143, %int1_49144, %int128_49145 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51675 = torch.aten.view %49824, %51674 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51675, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49146 = torch.constant.int 32
    %51676 = torch.aten.mul.int %3041, %int32_49146 : !torch.int, !torch.int -> !torch.int
    %int2_49147 = torch.constant.int 2
    %51677 = torch.aten.mul.int %51676, %int2_49147 : !torch.int, !torch.int -> !torch.int
    %int16_49148 = torch.constant.int 16
    %int1_49149 = torch.constant.int 1
    %int128_49150 = torch.constant.int 128
    %51678 = torch.prim.ListConstruct %51677, %int16_49148, %int1_49149, %int128_49150 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51679 = torch.aten.view %51675, %51678 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51679, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51680 = torch.prim.ListConstruct %51583 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49151 = torch.constant.bool false
    %51681 = torch.aten.index_put %51679, %51680, %51599, %false_49151 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51681, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49152 = torch.constant.int 32
    %int2_49153 = torch.constant.int 2
    %int16_49154 = torch.constant.int 16
    %int1_49155 = torch.constant.int 1
    %int128_49156 = torch.constant.int 128
    %51682 = torch.prim.ListConstruct %3041, %int32_49152, %int2_49153, %int16_49154, %int1_49155, %int128_49156 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51683 = torch.aten.view %51681, %51682 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51683, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49157 = torch.constant.int 131072
    %51684 = torch.prim.ListConstruct %3041, %int131072_49157 : (!torch.int, !torch.int) -> !torch.list<int>
    %51685 = torch.aten.view %51683, %51684 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51685, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_49158 = torch.constant.int 32
    %int2_49159 = torch.constant.int 2
    %int16_49160 = torch.constant.int 16
    %int1_49161 = torch.constant.int 1
    %int128_49162 = torch.constant.int 128
    %51686 = torch.prim.ListConstruct %3044, %int32_49158, %int2_49159, %int16_49160, %int1_49161, %int128_49162 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51687 = torch.aten.view %49836, %51686 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51687, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_49163 = torch.constant.int 32
    %51688 = torch.aten.mul.int %3044, %int32_49163 : !torch.int, !torch.int -> !torch.int
    %int2_49164 = torch.constant.int 2
    %51689 = torch.aten.mul.int %51688, %int2_49164 : !torch.int, !torch.int -> !torch.int
    %int16_49165 = torch.constant.int 16
    %int1_49166 = torch.constant.int 1
    %int128_49167 = torch.constant.int 128
    %51690 = torch.prim.ListConstruct %51689, %int16_49165, %int1_49166, %int128_49167 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51691 = torch.aten.view %51687, %51690 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51691, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %51692 = torch.prim.ListConstruct %51585 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_49168 = torch.constant.bool false
    %51693 = torch.aten.index_put %51691, %51692, %51601, %false_49168 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %51693, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_49169 = torch.constant.int 32
    %int2_49170 = torch.constant.int 2
    %int16_49171 = torch.constant.int 16
    %int1_49172 = torch.constant.int 1
    %int128_49173 = torch.constant.int 128
    %51694 = torch.prim.ListConstruct %3044, %int32_49169, %int2_49170, %int16_49171, %int1_49172, %int128_49173 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51695 = torch.aten.view %51693, %51694 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %51695, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_49174 = torch.constant.int 131072
    %51696 = torch.prim.ListConstruct %3044, %int131072_49174 : (!torch.int, !torch.int) -> !torch.list<int>
    %51697 = torch.aten.view %51695, %51696 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %51697, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_49175 = torch.constant.int -2
    %51698 = torch.aten.unsqueeze %51312, %int-2_49175 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49176 = torch.constant.int -2
    %51699 = torch.aten.unsqueeze %51327, %int-2_49176 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49177 = torch.constant.int -2
    %51700 = torch.aten.unsqueeze %51342, %int-2_49177 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49178 = torch.constant.int -2
    %51701 = torch.aten.unsqueeze %51357, %int-2_49178 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49179 = torch.constant.int -2
    %51702 = torch.aten.unsqueeze %51372, %int-2_49179 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49180 = torch.constant.int -2
    %51703 = torch.aten.unsqueeze %51387, %int-2_49180 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49181 = torch.constant.int -2
    %51704 = torch.aten.unsqueeze %51402, %int-2_49181 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49182 = torch.constant.int -2
    %51705 = torch.aten.unsqueeze %51417, %int-2_49182 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_49183 = torch.constant.int 4
    %int1_49184 = torch.constant.int 1
    %int4_49185 = torch.constant.int 4
    %int128_49186 = torch.constant.int 128
    %51706 = torch.prim.ListConstruct %int4_49183, %51298, %int1_49184, %int4_49185, %int128_49186 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49187 = torch.constant.bool false
    %51707 = torch.aten.expand %51698, %51706, %false_49187 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49188 = torch.constant.int 4
    %int1_49189 = torch.constant.int 1
    %int4_49190 = torch.constant.int 4
    %int128_49191 = torch.constant.int 128
    %51708 = torch.prim.ListConstruct %int4_49188, %51298, %int1_49189, %int4_49190, %int128_49191 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49192 = torch.constant.bool false
    %51709 = torch.aten.expand %51699, %51708, %false_49192 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49193 = torch.constant.int 4
    %int1_49194 = torch.constant.int 1
    %int4_49195 = torch.constant.int 4
    %int128_49196 = torch.constant.int 128
    %51710 = torch.prim.ListConstruct %int4_49193, %51298, %int1_49194, %int4_49195, %int128_49196 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49197 = torch.constant.bool false
    %51711 = torch.aten.expand %51700, %51710, %false_49197 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49198 = torch.constant.int 4
    %int1_49199 = torch.constant.int 1
    %int4_49200 = torch.constant.int 4
    %int128_49201 = torch.constant.int 128
    %51712 = torch.prim.ListConstruct %int4_49198, %51298, %int1_49199, %int4_49200, %int128_49201 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49202 = torch.constant.bool false
    %51713 = torch.aten.expand %51701, %51712, %false_49202 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49203 = torch.constant.int 4
    %int1_49204 = torch.constant.int 1
    %int4_49205 = torch.constant.int 4
    %int128_49206 = torch.constant.int 128
    %51714 = torch.prim.ListConstruct %int4_49203, %51298, %int1_49204, %int4_49205, %int128_49206 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49207 = torch.constant.bool false
    %51715 = torch.aten.expand %51702, %51714, %false_49207 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49208 = torch.constant.int 4
    %int1_49209 = torch.constant.int 1
    %int4_49210 = torch.constant.int 4
    %int128_49211 = torch.constant.int 128
    %51716 = torch.prim.ListConstruct %int4_49208, %51298, %int1_49209, %int4_49210, %int128_49211 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49212 = torch.constant.bool false
    %51717 = torch.aten.expand %51703, %51716, %false_49212 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49213 = torch.constant.int 4
    %int1_49214 = torch.constant.int 1
    %int4_49215 = torch.constant.int 4
    %int128_49216 = torch.constant.int 128
    %51718 = torch.prim.ListConstruct %int4_49213, %51298, %int1_49214, %int4_49215, %int128_49216 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49217 = torch.constant.bool false
    %51719 = torch.aten.expand %51704, %51718, %false_49217 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49218 = torch.constant.int 4
    %int1_49219 = torch.constant.int 1
    %int4_49220 = torch.constant.int 4
    %int128_49221 = torch.constant.int 128
    %51720 = torch.prim.ListConstruct %int4_49218, %51298, %int1_49219, %int4_49220, %int128_49221 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49222 = torch.constant.bool false
    %51721 = torch.aten.expand %51705, %51720, %false_49222 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49223 = torch.constant.int 4
    %int4_49224 = torch.constant.int 4
    %int128_49225 = torch.constant.int 128
    %51722 = torch.prim.ListConstruct %int4_49223, %51298, %int4_49224, %int128_49225 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51723 = torch.aten.view %51707, %51722 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49226 = torch.constant.int 4
    %int4_49227 = torch.constant.int 4
    %int128_49228 = torch.constant.int 128
    %51724 = torch.prim.ListConstruct %int4_49226, %51298, %int4_49227, %int128_49228 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51725 = torch.aten.view %51709, %51724 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49229 = torch.constant.int 4
    %int4_49230 = torch.constant.int 4
    %int128_49231 = torch.constant.int 128
    %51726 = torch.prim.ListConstruct %int4_49229, %51298, %int4_49230, %int128_49231 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51727 = torch.aten.view %51711, %51726 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49232 = torch.constant.int 4
    %int4_49233 = torch.constant.int 4
    %int128_49234 = torch.constant.int 128
    %51728 = torch.prim.ListConstruct %int4_49232, %51298, %int4_49233, %int128_49234 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51729 = torch.aten.view %51713, %51728 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49235 = torch.constant.int 4
    %int4_49236 = torch.constant.int 4
    %int128_49237 = torch.constant.int 128
    %51730 = torch.prim.ListConstruct %int4_49235, %51298, %int4_49236, %int128_49237 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51731 = torch.aten.view %51715, %51730 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49238 = torch.constant.int 4
    %int4_49239 = torch.constant.int 4
    %int128_49240 = torch.constant.int 128
    %51732 = torch.prim.ListConstruct %int4_49238, %51298, %int4_49239, %int128_49240 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51733 = torch.aten.view %51717, %51732 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49241 = torch.constant.int 4
    %int4_49242 = torch.constant.int 4
    %int128_49243 = torch.constant.int 128
    %51734 = torch.prim.ListConstruct %int4_49241, %51298, %int4_49242, %int128_49243 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51735 = torch.aten.view %51719, %51734 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49244 = torch.constant.int 4
    %int4_49245 = torch.constant.int 4
    %int128_49246 = torch.constant.int 128
    %51736 = torch.prim.ListConstruct %int4_49244, %51298, %int4_49245, %int128_49246 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51737 = torch.aten.view %51721, %51736 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_49247 = torch.constant.int -2
    %51738 = torch.aten.unsqueeze %51087, %int-2_49247 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49248 = torch.constant.int -2
    %51739 = torch.aten.unsqueeze %51089, %int-2_49248 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49249 = torch.constant.int -2
    %51740 = torch.aten.unsqueeze %51091, %int-2_49249 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49250 = torch.constant.int -2
    %51741 = torch.aten.unsqueeze %51093, %int-2_49250 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49251 = torch.constant.int -2
    %51742 = torch.aten.unsqueeze %51095, %int-2_49251 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51742, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49252 = torch.constant.int -2
    %51743 = torch.aten.unsqueeze %51097, %int-2_49252 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49253 = torch.constant.int -2
    %51744 = torch.aten.unsqueeze %51099, %int-2_49253 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_49254 = torch.constant.int -2
    %51745 = torch.aten.unsqueeze %51101, %int-2_49254 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %51745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_49255 = torch.constant.int 1
    %51746 = torch.aten.size.int %51011, %int1_49255 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_49256 = torch.constant.int 4
    %int1_49257 = torch.constant.int 1
    %int4_49258 = torch.constant.int 4
    %int128_49259 = torch.constant.int 128
    %51747 = torch.prim.ListConstruct %int4_49256, %51746, %int1_49257, %int4_49258, %int128_49259 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49260 = torch.constant.bool false
    %51748 = torch.aten.expand %51738, %51747, %false_49260 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49261 = torch.constant.int 4
    %int1_49262 = torch.constant.int 1
    %int4_49263 = torch.constant.int 4
    %int128_49264 = torch.constant.int 128
    %51749 = torch.prim.ListConstruct %int4_49261, %51746, %int1_49262, %int4_49263, %int128_49264 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49265 = torch.constant.bool false
    %51750 = torch.aten.expand %51739, %51749, %false_49265 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49266 = torch.constant.int 4
    %int1_49267 = torch.constant.int 1
    %int4_49268 = torch.constant.int 4
    %int128_49269 = torch.constant.int 128
    %51751 = torch.prim.ListConstruct %int4_49266, %51746, %int1_49267, %int4_49268, %int128_49269 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49270 = torch.constant.bool false
    %51752 = torch.aten.expand %51740, %51751, %false_49270 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49271 = torch.constant.int 4
    %int1_49272 = torch.constant.int 1
    %int4_49273 = torch.constant.int 4
    %int128_49274 = torch.constant.int 128
    %51753 = torch.prim.ListConstruct %int4_49271, %51746, %int1_49272, %int4_49273, %int128_49274 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49275 = torch.constant.bool false
    %51754 = torch.aten.expand %51741, %51753, %false_49275 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49276 = torch.constant.int 4
    %int1_49277 = torch.constant.int 1
    %int4_49278 = torch.constant.int 4
    %int128_49279 = torch.constant.int 128
    %51755 = torch.prim.ListConstruct %int4_49276, %51746, %int1_49277, %int4_49278, %int128_49279 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49280 = torch.constant.bool false
    %51756 = torch.aten.expand %51742, %51755, %false_49280 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49281 = torch.constant.int 4
    %int1_49282 = torch.constant.int 1
    %int4_49283 = torch.constant.int 4
    %int128_49284 = torch.constant.int 128
    %51757 = torch.prim.ListConstruct %int4_49281, %51746, %int1_49282, %int4_49283, %int128_49284 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49285 = torch.constant.bool false
    %51758 = torch.aten.expand %51743, %51757, %false_49285 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49286 = torch.constant.int 4
    %int1_49287 = torch.constant.int 1
    %int4_49288 = torch.constant.int 4
    %int128_49289 = torch.constant.int 128
    %51759 = torch.prim.ListConstruct %int4_49286, %51746, %int1_49287, %int4_49288, %int128_49289 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49290 = torch.constant.bool false
    %51760 = torch.aten.expand %51744, %51759, %false_49290 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49291 = torch.constant.int 4
    %int1_49292 = torch.constant.int 1
    %int4_49293 = torch.constant.int 4
    %int128_49294 = torch.constant.int 128
    %51761 = torch.prim.ListConstruct %int4_49291, %51746, %int1_49292, %int4_49293, %int128_49294 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_49295 = torch.constant.bool false
    %51762 = torch.aten.expand %51745, %51761, %false_49295 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %51762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_49296 = torch.constant.int 4
    %int4_49297 = torch.constant.int 4
    %int128_49298 = torch.constant.int 128
    %51763 = torch.prim.ListConstruct %int4_49296, %51746, %int4_49297, %int128_49298 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51764 = torch.aten.view %51748, %51763 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49299 = torch.constant.int 4
    %int4_49300 = torch.constant.int 4
    %int128_49301 = torch.constant.int 128
    %51765 = torch.prim.ListConstruct %int4_49299, %51746, %int4_49300, %int128_49301 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51766 = torch.aten.view %51750, %51765 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49302 = torch.constant.int 4
    %int4_49303 = torch.constant.int 4
    %int128_49304 = torch.constant.int 128
    %51767 = torch.prim.ListConstruct %int4_49302, %51746, %int4_49303, %int128_49304 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51768 = torch.aten.view %51752, %51767 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49305 = torch.constant.int 4
    %int4_49306 = torch.constant.int 4
    %int128_49307 = torch.constant.int 128
    %51769 = torch.prim.ListConstruct %int4_49305, %51746, %int4_49306, %int128_49307 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51770 = torch.aten.view %51754, %51769 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49308 = torch.constant.int 4
    %int4_49309 = torch.constant.int 4
    %int128_49310 = torch.constant.int 128
    %51771 = torch.prim.ListConstruct %int4_49308, %51746, %int4_49309, %int128_49310 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51772 = torch.aten.view %51756, %51771 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49311 = torch.constant.int 4
    %int4_49312 = torch.constant.int 4
    %int128_49313 = torch.constant.int 128
    %51773 = torch.prim.ListConstruct %int4_49311, %51746, %int4_49312, %int128_49313 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51774 = torch.aten.view %51758, %51773 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49314 = torch.constant.int 4
    %int4_49315 = torch.constant.int 4
    %int128_49316 = torch.constant.int 128
    %51775 = torch.prim.ListConstruct %int4_49314, %51746, %int4_49315, %int128_49316 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51776 = torch.aten.view %51760, %51775 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49317 = torch.constant.int 4
    %int4_49318 = torch.constant.int 4
    %int128_49319 = torch.constant.int 128
    %51777 = torch.prim.ListConstruct %int4_49317, %51746, %int4_49318, %int128_49319 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51778 = torch.aten.view %51762, %51777 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49320 = torch.constant.int 1
    %int2_49321 = torch.constant.int 2
    %51779 = torch.aten.transpose.int %51154, %int1_49320, %int2_49321 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51779, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49322 = torch.constant.int 1
    %int2_49323 = torch.constant.int 2
    %51780 = torch.aten.transpose.int %51169, %int1_49322, %int2_49323 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51780, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49324 = torch.constant.int 1
    %int2_49325 = torch.constant.int 2
    %51781 = torch.aten.transpose.int %51184, %int1_49324, %int2_49325 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51781, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49326 = torch.constant.int 1
    %int2_49327 = torch.constant.int 2
    %51782 = torch.aten.transpose.int %51199, %int1_49326, %int2_49327 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51782, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49328 = torch.constant.int 1
    %int2_49329 = torch.constant.int 2
    %51783 = torch.aten.transpose.int %51214, %int1_49328, %int2_49329 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51783, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49330 = torch.constant.int 1
    %int2_49331 = torch.constant.int 2
    %51784 = torch.aten.transpose.int %51229, %int1_49330, %int2_49331 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51784, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49332 = torch.constant.int 1
    %int2_49333 = torch.constant.int 2
    %51785 = torch.aten.transpose.int %51244, %int1_49332, %int2_49333 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51785, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49334 = torch.constant.int 1
    %int2_49335 = torch.constant.int 2
    %51786 = torch.aten.transpose.int %51259, %int1_49334, %int2_49335 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51786, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49336 = torch.constant.int 1
    %int2_49337 = torch.constant.int 2
    %51787 = torch.aten.transpose.int %51723, %int1_49336, %int2_49337 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51787, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49338 = torch.constant.int 1
    %int2_49339 = torch.constant.int 2
    %51788 = torch.aten.transpose.int %51725, %int1_49338, %int2_49339 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51788, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49340 = torch.constant.int 1
    %int2_49341 = torch.constant.int 2
    %51789 = torch.aten.transpose.int %51727, %int1_49340, %int2_49341 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51789, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49342 = torch.constant.int 1
    %int2_49343 = torch.constant.int 2
    %51790 = torch.aten.transpose.int %51729, %int1_49342, %int2_49343 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51790, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49344 = torch.constant.int 1
    %int2_49345 = torch.constant.int 2
    %51791 = torch.aten.transpose.int %51731, %int1_49344, %int2_49345 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51791, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49346 = torch.constant.int 1
    %int2_49347 = torch.constant.int 2
    %51792 = torch.aten.transpose.int %51733, %int1_49346, %int2_49347 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51792, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49348 = torch.constant.int 1
    %int2_49349 = torch.constant.int 2
    %51793 = torch.aten.transpose.int %51735, %int1_49348, %int2_49349 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51793, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49350 = torch.constant.int 1
    %int2_49351 = torch.constant.int 2
    %51794 = torch.aten.transpose.int %51737, %int1_49350, %int2_49351 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51794, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49352 = torch.constant.int 1
    %int2_49353 = torch.constant.int 2
    %51795 = torch.aten.transpose.int %51764, %int1_49352, %int2_49353 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51795, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49354 = torch.constant.int 1
    %int2_49355 = torch.constant.int 2
    %51796 = torch.aten.transpose.int %51766, %int1_49354, %int2_49355 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51796, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49356 = torch.constant.int 1
    %int2_49357 = torch.constant.int 2
    %51797 = torch.aten.transpose.int %51768, %int1_49356, %int2_49357 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51797, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49358 = torch.constant.int 1
    %int2_49359 = torch.constant.int 2
    %51798 = torch.aten.transpose.int %51770, %int1_49358, %int2_49359 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51798, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49360 = torch.constant.int 1
    %int2_49361 = torch.constant.int 2
    %51799 = torch.aten.transpose.int %51772, %int1_49360, %int2_49361 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51799, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49362 = torch.constant.int 1
    %int2_49363 = torch.constant.int 2
    %51800 = torch.aten.transpose.int %51774, %int1_49362, %int2_49363 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51800, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49364 = torch.constant.int 1
    %int2_49365 = torch.constant.int 2
    %51801 = torch.aten.transpose.int %51776, %int1_49364, %int2_49365 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51801, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49366 = torch.constant.int 1
    %int2_49367 = torch.constant.int 2
    %51802 = torch.aten.transpose.int %51778, %int1_49366, %int2_49367 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %51802, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49368 = torch.constant.float 0.000000e+00
    %true_49369 = torch.constant.bool true
    %none_49370 = torch.constant.none
    %none_49371 = torch.constant.none
    %51803:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51779, %51787, %51795, %float0.000000e00_49368, %true_49369, %none_49370, %none_49371) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51803#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49372 = torch.constant.float 0.000000e+00
    %true_49373 = torch.constant.bool true
    %none_49374 = torch.constant.none
    %none_49375 = torch.constant.none
    %51804:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51780, %51788, %51796, %float0.000000e00_49372, %true_49373, %none_49374, %none_49375) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51804#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49376 = torch.constant.float 0.000000e+00
    %true_49377 = torch.constant.bool true
    %none_49378 = torch.constant.none
    %none_49379 = torch.constant.none
    %51805:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51781, %51789, %51797, %float0.000000e00_49376, %true_49377, %none_49378, %none_49379) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51805#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49380 = torch.constant.float 0.000000e+00
    %true_49381 = torch.constant.bool true
    %none_49382 = torch.constant.none
    %none_49383 = torch.constant.none
    %51806:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51782, %51790, %51798, %float0.000000e00_49380, %true_49381, %none_49382, %none_49383) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51806#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49384 = torch.constant.float 0.000000e+00
    %true_49385 = torch.constant.bool true
    %none_49386 = torch.constant.none
    %none_49387 = torch.constant.none
    %51807:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51783, %51791, %51799, %float0.000000e00_49384, %true_49385, %none_49386, %none_49387) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51807#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49388 = torch.constant.float 0.000000e+00
    %true_49389 = torch.constant.bool true
    %none_49390 = torch.constant.none
    %none_49391 = torch.constant.none
    %51808:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51784, %51792, %51800, %float0.000000e00_49388, %true_49389, %none_49390, %none_49391) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51808#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49392 = torch.constant.float 0.000000e+00
    %true_49393 = torch.constant.bool true
    %none_49394 = torch.constant.none
    %none_49395 = torch.constant.none
    %51809:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51785, %51793, %51801, %float0.000000e00_49392, %true_49393, %none_49394, %none_49395) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51809#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_49396 = torch.constant.float 0.000000e+00
    %true_49397 = torch.constant.bool true
    %none_49398 = torch.constant.none
    %none_49399 = torch.constant.none
    %51810:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%51786, %51794, %51802, %float0.000000e00_49396, %true_49397, %none_49398, %none_49399) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %51810#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_49400 = torch.constant.int 1
    %int2_49401 = torch.constant.int 2
    %51811 = torch.aten.transpose.int %51803#0, %int1_49400, %int2_49401 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49402 = torch.constant.int 1
    %int2_49403 = torch.constant.int 2
    %51812 = torch.aten.transpose.int %51804#0, %int1_49402, %int2_49403 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49404 = torch.constant.int 1
    %int2_49405 = torch.constant.int 2
    %51813 = torch.aten.transpose.int %51805#0, %int1_49404, %int2_49405 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49406 = torch.constant.int 1
    %int2_49407 = torch.constant.int 2
    %51814 = torch.aten.transpose.int %51806#0, %int1_49406, %int2_49407 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49408 = torch.constant.int 1
    %int2_49409 = torch.constant.int 2
    %51815 = torch.aten.transpose.int %51807#0, %int1_49408, %int2_49409 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49410 = torch.constant.int 1
    %int2_49411 = torch.constant.int 2
    %51816 = torch.aten.transpose.int %51808#0, %int1_49410, %int2_49411 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49412 = torch.constant.int 1
    %int2_49413 = torch.constant.int 2
    %51817 = torch.aten.transpose.int %51809#0, %int1_49412, %int2_49413 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_49414 = torch.constant.int 1
    %int2_49415 = torch.constant.int 2
    %51818 = torch.aten.transpose.int %51810#0, %int1_49414, %int2_49415 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %51818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_49416 = torch.constant.int 4
    %int512_49417 = torch.constant.int 512
    %51819 = torch.prim.ListConstruct %int4_49416, %51140, %int512_49417 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51820 = torch.aten.view %51811, %51819 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49418 = torch.constant.int 4
    %int512_49419 = torch.constant.int 512
    %51821 = torch.prim.ListConstruct %int4_49418, %51155, %int512_49419 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51822 = torch.aten.view %51812, %51821 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49420 = torch.constant.int 4
    %int512_49421 = torch.constant.int 512
    %51823 = torch.prim.ListConstruct %int4_49420, %51170, %int512_49421 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51824 = torch.aten.view %51813, %51823 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49422 = torch.constant.int 4
    %int512_49423 = torch.constant.int 512
    %51825 = torch.prim.ListConstruct %int4_49422, %51185, %int512_49423 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51826 = torch.aten.view %51814, %51825 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49424 = torch.constant.int 4
    %int512_49425 = torch.constant.int 512
    %51827 = torch.prim.ListConstruct %int4_49424, %51200, %int512_49425 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51828 = torch.aten.view %51815, %51827 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49426 = torch.constant.int 4
    %int512_49427 = torch.constant.int 512
    %51829 = torch.prim.ListConstruct %int4_49426, %51215, %int512_49427 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51830 = torch.aten.view %51816, %51829 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49428 = torch.constant.int 4
    %int512_49429 = torch.constant.int 512
    %51831 = torch.prim.ListConstruct %int4_49428, %51230, %int512_49429 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51832 = torch.aten.view %51817, %51831 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_49430 = torch.constant.int 4
    %int512_49431 = torch.constant.int 512
    %51833 = torch.prim.ListConstruct %int4_49430, %51245, %int512_49431 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51834 = torch.aten.view %51818, %51833 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %51834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_49432 = torch.constant.int 1
    %int0_49433 = torch.constant.int 0
    %51835 = torch.prim.ListConstruct %int1_49432, %int0_49433 : (!torch.int, !torch.int) -> !torch.list<int>
    %51836 = torch.aten.permute %1912, %51835 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49434 = torch.constant.int 1
    %int0_49435 = torch.constant.int 0
    %51837 = torch.prim.ListConstruct %int1_49434, %int0_49435 : (!torch.int, !torch.int) -> !torch.list<int>
    %51838 = torch.aten.permute %1913, %51837 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49436 = torch.constant.int 1
    %int0_49437 = torch.constant.int 0
    %51839 = torch.prim.ListConstruct %int1_49436, %int0_49437 : (!torch.int, !torch.int) -> !torch.list<int>
    %51840 = torch.aten.permute %1914, %51839 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49438 = torch.constant.int 1
    %int0_49439 = torch.constant.int 0
    %51841 = torch.prim.ListConstruct %int1_49438, %int0_49439 : (!torch.int, !torch.int) -> !torch.list<int>
    %51842 = torch.aten.permute %1915, %51841 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49440 = torch.constant.int 1
    %int0_49441 = torch.constant.int 0
    %51843 = torch.prim.ListConstruct %int1_49440, %int0_49441 : (!torch.int, !torch.int) -> !torch.list<int>
    %51844 = torch.aten.permute %1916, %51843 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49442 = torch.constant.int 1
    %int0_49443 = torch.constant.int 0
    %51845 = torch.prim.ListConstruct %int1_49442, %int0_49443 : (!torch.int, !torch.int) -> !torch.list<int>
    %51846 = torch.aten.permute %1917, %51845 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49444 = torch.constant.int 1
    %int0_49445 = torch.constant.int 0
    %51847 = torch.prim.ListConstruct %int1_49444, %int0_49445 : (!torch.int, !torch.int) -> !torch.list<int>
    %51848 = torch.aten.permute %1918, %51847 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_49446 = torch.constant.int 1
    %int0_49447 = torch.constant.int 0
    %51849 = torch.prim.ListConstruct %int1_49446, %int0_49447 : (!torch.int, !torch.int) -> !torch.list<int>
    %51850 = torch.aten.permute %1919, %51849 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_49448 = torch.constant.int 4
    %51851 = torch.aten.mul.int %int4_49448, %51140 : !torch.int, !torch.int -> !torch.int
    %int512_49449 = torch.constant.int 512
    %51852 = torch.prim.ListConstruct %51851, %int512_49449 : (!torch.int, !torch.int) -> !torch.list<int>
    %51853 = torch.aten.view %51820, %51852 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51853, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51854 = torch.aten.mm %51853, %51836 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51854, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49450 = torch.constant.int 4
    %int4096_49451 = torch.constant.int 4096
    %51855 = torch.prim.ListConstruct %int4_49450, %51140, %int4096_49451 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51856 = torch.aten.view %51854, %51855 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49452 = torch.constant.int 4
    %51857 = torch.aten.mul.int %int4_49452, %51155 : !torch.int, !torch.int -> !torch.int
    %int512_49453 = torch.constant.int 512
    %51858 = torch.prim.ListConstruct %51857, %int512_49453 : (!torch.int, !torch.int) -> !torch.list<int>
    %51859 = torch.aten.view %51822, %51858 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51859, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51860 = torch.aten.mm %51859, %51838 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51860, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49454 = torch.constant.int 4
    %int4096_49455 = torch.constant.int 4096
    %51861 = torch.prim.ListConstruct %int4_49454, %51155, %int4096_49455 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51862 = torch.aten.view %51860, %51861 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49456 = torch.constant.int 4
    %51863 = torch.aten.mul.int %int4_49456, %51170 : !torch.int, !torch.int -> !torch.int
    %int512_49457 = torch.constant.int 512
    %51864 = torch.prim.ListConstruct %51863, %int512_49457 : (!torch.int, !torch.int) -> !torch.list<int>
    %51865 = torch.aten.view %51824, %51864 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51865, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51866 = torch.aten.mm %51865, %51840 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51866, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49458 = torch.constant.int 4
    %int4096_49459 = torch.constant.int 4096
    %51867 = torch.prim.ListConstruct %int4_49458, %51170, %int4096_49459 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51868 = torch.aten.view %51866, %51867 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49460 = torch.constant.int 4
    %51869 = torch.aten.mul.int %int4_49460, %51185 : !torch.int, !torch.int -> !torch.int
    %int512_49461 = torch.constant.int 512
    %51870 = torch.prim.ListConstruct %51869, %int512_49461 : (!torch.int, !torch.int) -> !torch.list<int>
    %51871 = torch.aten.view %51826, %51870 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51871, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51872 = torch.aten.mm %51871, %51842 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51872, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49462 = torch.constant.int 4
    %int4096_49463 = torch.constant.int 4096
    %51873 = torch.prim.ListConstruct %int4_49462, %51185, %int4096_49463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51874 = torch.aten.view %51872, %51873 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49464 = torch.constant.int 4
    %51875 = torch.aten.mul.int %int4_49464, %51200 : !torch.int, !torch.int -> !torch.int
    %int512_49465 = torch.constant.int 512
    %51876 = torch.prim.ListConstruct %51875, %int512_49465 : (!torch.int, !torch.int) -> !torch.list<int>
    %51877 = torch.aten.view %51828, %51876 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51877, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51878 = torch.aten.mm %51877, %51844 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51878, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49466 = torch.constant.int 4
    %int4096_49467 = torch.constant.int 4096
    %51879 = torch.prim.ListConstruct %int4_49466, %51200, %int4096_49467 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51880 = torch.aten.view %51878, %51879 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49468 = torch.constant.int 4
    %51881 = torch.aten.mul.int %int4_49468, %51215 : !torch.int, !torch.int -> !torch.int
    %int512_49469 = torch.constant.int 512
    %51882 = torch.prim.ListConstruct %51881, %int512_49469 : (!torch.int, !torch.int) -> !torch.list<int>
    %51883 = torch.aten.view %51830, %51882 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51883, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51884 = torch.aten.mm %51883, %51846 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51884, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49470 = torch.constant.int 4
    %int4096_49471 = torch.constant.int 4096
    %51885 = torch.prim.ListConstruct %int4_49470, %51215, %int4096_49471 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51886 = torch.aten.view %51884, %51885 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49472 = torch.constant.int 4
    %51887 = torch.aten.mul.int %int4_49472, %51230 : !torch.int, !torch.int -> !torch.int
    %int512_49473 = torch.constant.int 512
    %51888 = torch.prim.ListConstruct %51887, %int512_49473 : (!torch.int, !torch.int) -> !torch.list<int>
    %51889 = torch.aten.view %51832, %51888 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51889, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51890 = torch.aten.mm %51889, %51848 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51890, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49474 = torch.constant.int 4
    %int4096_49475 = torch.constant.int 4096
    %51891 = torch.prim.ListConstruct %int4_49474, %51230, %int4096_49475 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51892 = torch.aten.view %51890, %51891 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_49476 = torch.constant.int 4
    %51893 = torch.aten.mul.int %int4_49476, %51245 : !torch.int, !torch.int -> !torch.int
    %int512_49477 = torch.constant.int 512
    %51894 = torch.prim.ListConstruct %51893, %int512_49477 : (!torch.int, !torch.int) -> !torch.list<int>
    %51895 = torch.aten.view %51834, %51894 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %51895, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %51896 = torch.aten.mm %51895, %51850 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %51896, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49478 = torch.constant.int 4
    %int4096_49479 = torch.constant.int 4096
    %51897 = torch.prim.ListConstruct %int4_49478, %51245, %int4096_49479 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %51898 = torch.aten.view %51896, %51897 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51899 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49480 = arith.constant 1 : index
    %dim_49481 = tensor.dim %51899, %c1_49480 : tensor<4x?x4096xf16>
    %51900 = flow.tensor.transfer %51899 : tensor<4x?x4096xf16>{%dim_49481} to #hal.device.promise<@__device_0>
    %51901 = torch_c.from_builtin_tensor %51900 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51902 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49482 = arith.constant 1 : index
    %dim_49483 = tensor.dim %51902, %c1_49482 : tensor<4x?x4096xf16>
    %51903 = flow.tensor.transfer %51902 : tensor<4x?x4096xf16>{%dim_49483} to #hal.device.promise<@__device_0>
    %51904 = torch_c.from_builtin_tensor %51903 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51905 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49484 = arith.constant 1 : index
    %dim_49485 = tensor.dim %51905, %c1_49484 : tensor<4x?x4096xf16>
    %51906 = flow.tensor.transfer %51905 : tensor<4x?x4096xf16>{%dim_49485} to #hal.device.promise<@__device_0>
    %51907 = torch_c.from_builtin_tensor %51906 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51908 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49486 = arith.constant 1 : index
    %dim_49487 = tensor.dim %51908, %c1_49486 : tensor<4x?x4096xf16>
    %51909 = flow.tensor.transfer %51908 : tensor<4x?x4096xf16>{%dim_49487} to #hal.device.promise<@__device_0>
    %51910 = torch_c.from_builtin_tensor %51909 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51911 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49488 = arith.constant 1 : index
    %dim_49489 = tensor.dim %51911, %c1_49488 : tensor<4x?x4096xf16>
    %51912 = flow.tensor.transfer %51911 : tensor<4x?x4096xf16>{%dim_49489} to #hal.device.promise<@__device_0>
    %51913 = torch_c.from_builtin_tensor %51912 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51914 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49490 = arith.constant 1 : index
    %dim_49491 = tensor.dim %51914, %c1_49490 : tensor<4x?x4096xf16>
    %51915 = flow.tensor.transfer %51914 : tensor<4x?x4096xf16>{%dim_49491} to #hal.device.promise<@__device_0>
    %51916 = torch_c.from_builtin_tensor %51915 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51917 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49492 = arith.constant 1 : index
    %dim_49493 = tensor.dim %51917, %c1_49492 : tensor<4x?x4096xf16>
    %51918 = flow.tensor.transfer %51917 : tensor<4x?x4096xf16>{%dim_49493} to #hal.device.promise<@__device_0>
    %51919 = torch_c.from_builtin_tensor %51918 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49494 = torch.constant.int 1
    %51920 = torch.aten.add.Tensor %51856, %51901, %int1_49494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49495 = torch.constant.int 1
    %51921 = torch.aten.add.Tensor %51920, %51904, %int1_49495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49496 = torch.constant.int 1
    %51922 = torch.aten.add.Tensor %51921, %51907, %int1_49496 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49497 = torch.constant.int 1
    %51923 = torch.aten.add.Tensor %51922, %51910, %int1_49497 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49498 = torch.constant.int 1
    %51924 = torch.aten.add.Tensor %51923, %51913, %int1_49498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49499 = torch.constant.int 1
    %51925 = torch.aten.add.Tensor %51924, %51916, %int1_49499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49500 = torch.constant.int 1
    %51926 = torch.aten.add.Tensor %51925, %51919, %int1_49500 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51927 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49501 = arith.constant 1 : index
    %dim_49502 = tensor.dim %51927, %c1_49501 : tensor<4x?x4096xf16>
    %51928 = flow.tensor.transfer %51927 : tensor<4x?x4096xf16>{%dim_49502} to #hal.device.promise<@__device_1>
    %51929 = torch_c.from_builtin_tensor %51928 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51930 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49503 = arith.constant 1 : index
    %dim_49504 = tensor.dim %51930, %c1_49503 : tensor<4x?x4096xf16>
    %51931 = flow.tensor.transfer %51930 : tensor<4x?x4096xf16>{%dim_49504} to #hal.device.promise<@__device_1>
    %51932 = torch_c.from_builtin_tensor %51931 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51933 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49505 = arith.constant 1 : index
    %dim_49506 = tensor.dim %51933, %c1_49505 : tensor<4x?x4096xf16>
    %51934 = flow.tensor.transfer %51933 : tensor<4x?x4096xf16>{%dim_49506} to #hal.device.promise<@__device_1>
    %51935 = torch_c.from_builtin_tensor %51934 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51936 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49507 = arith.constant 1 : index
    %dim_49508 = tensor.dim %51936, %c1_49507 : tensor<4x?x4096xf16>
    %51937 = flow.tensor.transfer %51936 : tensor<4x?x4096xf16>{%dim_49508} to #hal.device.promise<@__device_1>
    %51938 = torch_c.from_builtin_tensor %51937 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51939 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49509 = arith.constant 1 : index
    %dim_49510 = tensor.dim %51939, %c1_49509 : tensor<4x?x4096xf16>
    %51940 = flow.tensor.transfer %51939 : tensor<4x?x4096xf16>{%dim_49510} to #hal.device.promise<@__device_1>
    %51941 = torch_c.from_builtin_tensor %51940 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51942 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49511 = arith.constant 1 : index
    %dim_49512 = tensor.dim %51942, %c1_49511 : tensor<4x?x4096xf16>
    %51943 = flow.tensor.transfer %51942 : tensor<4x?x4096xf16>{%dim_49512} to #hal.device.promise<@__device_1>
    %51944 = torch_c.from_builtin_tensor %51943 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51945 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49513 = arith.constant 1 : index
    %dim_49514 = tensor.dim %51945, %c1_49513 : tensor<4x?x4096xf16>
    %51946 = flow.tensor.transfer %51945 : tensor<4x?x4096xf16>{%dim_49514} to #hal.device.promise<@__device_1>
    %51947 = torch_c.from_builtin_tensor %51946 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49515 = torch.constant.int 1
    %51948 = torch.aten.add.Tensor %51929, %51862, %int1_49515 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49516 = torch.constant.int 1
    %51949 = torch.aten.add.Tensor %51948, %51932, %int1_49516 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49517 = torch.constant.int 1
    %51950 = torch.aten.add.Tensor %51949, %51935, %int1_49517 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49518 = torch.constant.int 1
    %51951 = torch.aten.add.Tensor %51950, %51938, %int1_49518 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49519 = torch.constant.int 1
    %51952 = torch.aten.add.Tensor %51951, %51941, %int1_49519 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49520 = torch.constant.int 1
    %51953 = torch.aten.add.Tensor %51952, %51944, %int1_49520 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49521 = torch.constant.int 1
    %51954 = torch.aten.add.Tensor %51953, %51947, %int1_49521 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51955 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49522 = arith.constant 1 : index
    %dim_49523 = tensor.dim %51955, %c1_49522 : tensor<4x?x4096xf16>
    %51956 = flow.tensor.transfer %51955 : tensor<4x?x4096xf16>{%dim_49523} to #hal.device.promise<@__device_2>
    %51957 = torch_c.from_builtin_tensor %51956 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51958 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49524 = arith.constant 1 : index
    %dim_49525 = tensor.dim %51958, %c1_49524 : tensor<4x?x4096xf16>
    %51959 = flow.tensor.transfer %51958 : tensor<4x?x4096xf16>{%dim_49525} to #hal.device.promise<@__device_2>
    %51960 = torch_c.from_builtin_tensor %51959 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51961 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49526 = arith.constant 1 : index
    %dim_49527 = tensor.dim %51961, %c1_49526 : tensor<4x?x4096xf16>
    %51962 = flow.tensor.transfer %51961 : tensor<4x?x4096xf16>{%dim_49527} to #hal.device.promise<@__device_2>
    %51963 = torch_c.from_builtin_tensor %51962 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51964 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49528 = arith.constant 1 : index
    %dim_49529 = tensor.dim %51964, %c1_49528 : tensor<4x?x4096xf16>
    %51965 = flow.tensor.transfer %51964 : tensor<4x?x4096xf16>{%dim_49529} to #hal.device.promise<@__device_2>
    %51966 = torch_c.from_builtin_tensor %51965 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51967 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49530 = arith.constant 1 : index
    %dim_49531 = tensor.dim %51967, %c1_49530 : tensor<4x?x4096xf16>
    %51968 = flow.tensor.transfer %51967 : tensor<4x?x4096xf16>{%dim_49531} to #hal.device.promise<@__device_2>
    %51969 = torch_c.from_builtin_tensor %51968 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51970 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49532 = arith.constant 1 : index
    %dim_49533 = tensor.dim %51970, %c1_49532 : tensor<4x?x4096xf16>
    %51971 = flow.tensor.transfer %51970 : tensor<4x?x4096xf16>{%dim_49533} to #hal.device.promise<@__device_2>
    %51972 = torch_c.from_builtin_tensor %51971 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51973 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49534 = arith.constant 1 : index
    %dim_49535 = tensor.dim %51973, %c1_49534 : tensor<4x?x4096xf16>
    %51974 = flow.tensor.transfer %51973 : tensor<4x?x4096xf16>{%dim_49535} to #hal.device.promise<@__device_2>
    %51975 = torch_c.from_builtin_tensor %51974 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49536 = torch.constant.int 1
    %51976 = torch.aten.add.Tensor %51957, %51960, %int1_49536 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49537 = torch.constant.int 1
    %51977 = torch.aten.add.Tensor %51976, %51868, %int1_49537 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49538 = torch.constant.int 1
    %51978 = torch.aten.add.Tensor %51977, %51963, %int1_49538 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49539 = torch.constant.int 1
    %51979 = torch.aten.add.Tensor %51978, %51966, %int1_49539 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49540 = torch.constant.int 1
    %51980 = torch.aten.add.Tensor %51979, %51969, %int1_49540 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49541 = torch.constant.int 1
    %51981 = torch.aten.add.Tensor %51980, %51972, %int1_49541 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49542 = torch.constant.int 1
    %51982 = torch.aten.add.Tensor %51981, %51975, %int1_49542 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51983 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49543 = arith.constant 1 : index
    %dim_49544 = tensor.dim %51983, %c1_49543 : tensor<4x?x4096xf16>
    %51984 = flow.tensor.transfer %51983 : tensor<4x?x4096xf16>{%dim_49544} to #hal.device.promise<@__device_3>
    %51985 = torch_c.from_builtin_tensor %51984 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51986 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49545 = arith.constant 1 : index
    %dim_49546 = tensor.dim %51986, %c1_49545 : tensor<4x?x4096xf16>
    %51987 = flow.tensor.transfer %51986 : tensor<4x?x4096xf16>{%dim_49546} to #hal.device.promise<@__device_3>
    %51988 = torch_c.from_builtin_tensor %51987 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51989 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49547 = arith.constant 1 : index
    %dim_49548 = tensor.dim %51989, %c1_49547 : tensor<4x?x4096xf16>
    %51990 = flow.tensor.transfer %51989 : tensor<4x?x4096xf16>{%dim_49548} to #hal.device.promise<@__device_3>
    %51991 = torch_c.from_builtin_tensor %51990 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51992 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49549 = arith.constant 1 : index
    %dim_49550 = tensor.dim %51992, %c1_49549 : tensor<4x?x4096xf16>
    %51993 = flow.tensor.transfer %51992 : tensor<4x?x4096xf16>{%dim_49550} to #hal.device.promise<@__device_3>
    %51994 = torch_c.from_builtin_tensor %51993 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51995 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49551 = arith.constant 1 : index
    %dim_49552 = tensor.dim %51995, %c1_49551 : tensor<4x?x4096xf16>
    %51996 = flow.tensor.transfer %51995 : tensor<4x?x4096xf16>{%dim_49552} to #hal.device.promise<@__device_3>
    %51997 = torch_c.from_builtin_tensor %51996 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %51997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %51998 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49553 = arith.constant 1 : index
    %dim_49554 = tensor.dim %51998, %c1_49553 : tensor<4x?x4096xf16>
    %51999 = flow.tensor.transfer %51998 : tensor<4x?x4096xf16>{%dim_49554} to #hal.device.promise<@__device_3>
    %52000 = torch_c.from_builtin_tensor %51999 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52001 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49555 = arith.constant 1 : index
    %dim_49556 = tensor.dim %52001, %c1_49555 : tensor<4x?x4096xf16>
    %52002 = flow.tensor.transfer %52001 : tensor<4x?x4096xf16>{%dim_49556} to #hal.device.promise<@__device_3>
    %52003 = torch_c.from_builtin_tensor %52002 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49557 = torch.constant.int 1
    %52004 = torch.aten.add.Tensor %51985, %51988, %int1_49557 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49558 = torch.constant.int 1
    %52005 = torch.aten.add.Tensor %52004, %51991, %int1_49558 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49559 = torch.constant.int 1
    %52006 = torch.aten.add.Tensor %52005, %51874, %int1_49559 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49560 = torch.constant.int 1
    %52007 = torch.aten.add.Tensor %52006, %51994, %int1_49560 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49561 = torch.constant.int 1
    %52008 = torch.aten.add.Tensor %52007, %51997, %int1_49561 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49562 = torch.constant.int 1
    %52009 = torch.aten.add.Tensor %52008, %52000, %int1_49562 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49563 = torch.constant.int 1
    %52010 = torch.aten.add.Tensor %52009, %52003, %int1_49563 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52011 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49564 = arith.constant 1 : index
    %dim_49565 = tensor.dim %52011, %c1_49564 : tensor<4x?x4096xf16>
    %52012 = flow.tensor.transfer %52011 : tensor<4x?x4096xf16>{%dim_49565} to #hal.device.promise<@__device_4>
    %52013 = torch_c.from_builtin_tensor %52012 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52014 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49566 = arith.constant 1 : index
    %dim_49567 = tensor.dim %52014, %c1_49566 : tensor<4x?x4096xf16>
    %52015 = flow.tensor.transfer %52014 : tensor<4x?x4096xf16>{%dim_49567} to #hal.device.promise<@__device_4>
    %52016 = torch_c.from_builtin_tensor %52015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52017 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49568 = arith.constant 1 : index
    %dim_49569 = tensor.dim %52017, %c1_49568 : tensor<4x?x4096xf16>
    %52018 = flow.tensor.transfer %52017 : tensor<4x?x4096xf16>{%dim_49569} to #hal.device.promise<@__device_4>
    %52019 = torch_c.from_builtin_tensor %52018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52020 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49570 = arith.constant 1 : index
    %dim_49571 = tensor.dim %52020, %c1_49570 : tensor<4x?x4096xf16>
    %52021 = flow.tensor.transfer %52020 : tensor<4x?x4096xf16>{%dim_49571} to #hal.device.promise<@__device_4>
    %52022 = torch_c.from_builtin_tensor %52021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52023 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49572 = arith.constant 1 : index
    %dim_49573 = tensor.dim %52023, %c1_49572 : tensor<4x?x4096xf16>
    %52024 = flow.tensor.transfer %52023 : tensor<4x?x4096xf16>{%dim_49573} to #hal.device.promise<@__device_4>
    %52025 = torch_c.from_builtin_tensor %52024 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52026 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49574 = arith.constant 1 : index
    %dim_49575 = tensor.dim %52026, %c1_49574 : tensor<4x?x4096xf16>
    %52027 = flow.tensor.transfer %52026 : tensor<4x?x4096xf16>{%dim_49575} to #hal.device.promise<@__device_4>
    %52028 = torch_c.from_builtin_tensor %52027 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52029 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49576 = arith.constant 1 : index
    %dim_49577 = tensor.dim %52029, %c1_49576 : tensor<4x?x4096xf16>
    %52030 = flow.tensor.transfer %52029 : tensor<4x?x4096xf16>{%dim_49577} to #hal.device.promise<@__device_4>
    %52031 = torch_c.from_builtin_tensor %52030 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49578 = torch.constant.int 1
    %52032 = torch.aten.add.Tensor %52013, %52016, %int1_49578 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49579 = torch.constant.int 1
    %52033 = torch.aten.add.Tensor %52032, %52019, %int1_49579 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49580 = torch.constant.int 1
    %52034 = torch.aten.add.Tensor %52033, %52022, %int1_49580 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49581 = torch.constant.int 1
    %52035 = torch.aten.add.Tensor %52034, %51880, %int1_49581 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49582 = torch.constant.int 1
    %52036 = torch.aten.add.Tensor %52035, %52025, %int1_49582 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49583 = torch.constant.int 1
    %52037 = torch.aten.add.Tensor %52036, %52028, %int1_49583 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49584 = torch.constant.int 1
    %52038 = torch.aten.add.Tensor %52037, %52031, %int1_49584 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52039 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49585 = arith.constant 1 : index
    %dim_49586 = tensor.dim %52039, %c1_49585 : tensor<4x?x4096xf16>
    %52040 = flow.tensor.transfer %52039 : tensor<4x?x4096xf16>{%dim_49586} to #hal.device.promise<@__device_5>
    %52041 = torch_c.from_builtin_tensor %52040 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52042 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49587 = arith.constant 1 : index
    %dim_49588 = tensor.dim %52042, %c1_49587 : tensor<4x?x4096xf16>
    %52043 = flow.tensor.transfer %52042 : tensor<4x?x4096xf16>{%dim_49588} to #hal.device.promise<@__device_5>
    %52044 = torch_c.from_builtin_tensor %52043 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52045 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49589 = arith.constant 1 : index
    %dim_49590 = tensor.dim %52045, %c1_49589 : tensor<4x?x4096xf16>
    %52046 = flow.tensor.transfer %52045 : tensor<4x?x4096xf16>{%dim_49590} to #hal.device.promise<@__device_5>
    %52047 = torch_c.from_builtin_tensor %52046 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52048 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49591 = arith.constant 1 : index
    %dim_49592 = tensor.dim %52048, %c1_49591 : tensor<4x?x4096xf16>
    %52049 = flow.tensor.transfer %52048 : tensor<4x?x4096xf16>{%dim_49592} to #hal.device.promise<@__device_5>
    %52050 = torch_c.from_builtin_tensor %52049 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52051 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49593 = arith.constant 1 : index
    %dim_49594 = tensor.dim %52051, %c1_49593 : tensor<4x?x4096xf16>
    %52052 = flow.tensor.transfer %52051 : tensor<4x?x4096xf16>{%dim_49594} to #hal.device.promise<@__device_5>
    %52053 = torch_c.from_builtin_tensor %52052 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52054 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49595 = arith.constant 1 : index
    %dim_49596 = tensor.dim %52054, %c1_49595 : tensor<4x?x4096xf16>
    %52055 = flow.tensor.transfer %52054 : tensor<4x?x4096xf16>{%dim_49596} to #hal.device.promise<@__device_5>
    %52056 = torch_c.from_builtin_tensor %52055 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52057 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49597 = arith.constant 1 : index
    %dim_49598 = tensor.dim %52057, %c1_49597 : tensor<4x?x4096xf16>
    %52058 = flow.tensor.transfer %52057 : tensor<4x?x4096xf16>{%dim_49598} to #hal.device.promise<@__device_5>
    %52059 = torch_c.from_builtin_tensor %52058 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49599 = torch.constant.int 1
    %52060 = torch.aten.add.Tensor %52041, %52044, %int1_49599 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49600 = torch.constant.int 1
    %52061 = torch.aten.add.Tensor %52060, %52047, %int1_49600 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49601 = torch.constant.int 1
    %52062 = torch.aten.add.Tensor %52061, %52050, %int1_49601 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49602 = torch.constant.int 1
    %52063 = torch.aten.add.Tensor %52062, %52053, %int1_49602 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49603 = torch.constant.int 1
    %52064 = torch.aten.add.Tensor %52063, %51886, %int1_49603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49604 = torch.constant.int 1
    %52065 = torch.aten.add.Tensor %52064, %52056, %int1_49604 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49605 = torch.constant.int 1
    %52066 = torch.aten.add.Tensor %52065, %52059, %int1_49605 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52067 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49606 = arith.constant 1 : index
    %dim_49607 = tensor.dim %52067, %c1_49606 : tensor<4x?x4096xf16>
    %52068 = flow.tensor.transfer %52067 : tensor<4x?x4096xf16>{%dim_49607} to #hal.device.promise<@__device_6>
    %52069 = torch_c.from_builtin_tensor %52068 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52070 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49608 = arith.constant 1 : index
    %dim_49609 = tensor.dim %52070, %c1_49608 : tensor<4x?x4096xf16>
    %52071 = flow.tensor.transfer %52070 : tensor<4x?x4096xf16>{%dim_49609} to #hal.device.promise<@__device_6>
    %52072 = torch_c.from_builtin_tensor %52071 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52073 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49610 = arith.constant 1 : index
    %dim_49611 = tensor.dim %52073, %c1_49610 : tensor<4x?x4096xf16>
    %52074 = flow.tensor.transfer %52073 : tensor<4x?x4096xf16>{%dim_49611} to #hal.device.promise<@__device_6>
    %52075 = torch_c.from_builtin_tensor %52074 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52076 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49612 = arith.constant 1 : index
    %dim_49613 = tensor.dim %52076, %c1_49612 : tensor<4x?x4096xf16>
    %52077 = flow.tensor.transfer %52076 : tensor<4x?x4096xf16>{%dim_49613} to #hal.device.promise<@__device_6>
    %52078 = torch_c.from_builtin_tensor %52077 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52079 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49614 = arith.constant 1 : index
    %dim_49615 = tensor.dim %52079, %c1_49614 : tensor<4x?x4096xf16>
    %52080 = flow.tensor.transfer %52079 : tensor<4x?x4096xf16>{%dim_49615} to #hal.device.promise<@__device_6>
    %52081 = torch_c.from_builtin_tensor %52080 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52082 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49616 = arith.constant 1 : index
    %dim_49617 = tensor.dim %52082, %c1_49616 : tensor<4x?x4096xf16>
    %52083 = flow.tensor.transfer %52082 : tensor<4x?x4096xf16>{%dim_49617} to #hal.device.promise<@__device_6>
    %52084 = torch_c.from_builtin_tensor %52083 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52085 = torch_c.to_builtin_tensor %51898 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49618 = arith.constant 1 : index
    %dim_49619 = tensor.dim %52085, %c1_49618 : tensor<4x?x4096xf16>
    %52086 = flow.tensor.transfer %52085 : tensor<4x?x4096xf16>{%dim_49619} to #hal.device.promise<@__device_6>
    %52087 = torch_c.from_builtin_tensor %52086 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49620 = torch.constant.int 1
    %52088 = torch.aten.add.Tensor %52069, %52072, %int1_49620 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49621 = torch.constant.int 1
    %52089 = torch.aten.add.Tensor %52088, %52075, %int1_49621 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49622 = torch.constant.int 1
    %52090 = torch.aten.add.Tensor %52089, %52078, %int1_49622 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49623 = torch.constant.int 1
    %52091 = torch.aten.add.Tensor %52090, %52081, %int1_49623 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49624 = torch.constant.int 1
    %52092 = torch.aten.add.Tensor %52091, %52084, %int1_49624 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49625 = torch.constant.int 1
    %52093 = torch.aten.add.Tensor %52092, %51892, %int1_49625 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49626 = torch.constant.int 1
    %52094 = torch.aten.add.Tensor %52093, %52087, %int1_49626 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52095 = torch_c.to_builtin_tensor %51856 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49627 = arith.constant 1 : index
    %dim_49628 = tensor.dim %52095, %c1_49627 : tensor<4x?x4096xf16>
    %52096 = flow.tensor.transfer %52095 : tensor<4x?x4096xf16>{%dim_49628} to #hal.device.promise<@__device_7>
    %52097 = torch_c.from_builtin_tensor %52096 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52098 = torch_c.to_builtin_tensor %51862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49629 = arith.constant 1 : index
    %dim_49630 = tensor.dim %52098, %c1_49629 : tensor<4x?x4096xf16>
    %52099 = flow.tensor.transfer %52098 : tensor<4x?x4096xf16>{%dim_49630} to #hal.device.promise<@__device_7>
    %52100 = torch_c.from_builtin_tensor %52099 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52101 = torch_c.to_builtin_tensor %51868 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49631 = arith.constant 1 : index
    %dim_49632 = tensor.dim %52101, %c1_49631 : tensor<4x?x4096xf16>
    %52102 = flow.tensor.transfer %52101 : tensor<4x?x4096xf16>{%dim_49632} to #hal.device.promise<@__device_7>
    %52103 = torch_c.from_builtin_tensor %52102 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52104 = torch_c.to_builtin_tensor %51874 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49633 = arith.constant 1 : index
    %dim_49634 = tensor.dim %52104, %c1_49633 : tensor<4x?x4096xf16>
    %52105 = flow.tensor.transfer %52104 : tensor<4x?x4096xf16>{%dim_49634} to #hal.device.promise<@__device_7>
    %52106 = torch_c.from_builtin_tensor %52105 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52107 = torch_c.to_builtin_tensor %51880 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49635 = arith.constant 1 : index
    %dim_49636 = tensor.dim %52107, %c1_49635 : tensor<4x?x4096xf16>
    %52108 = flow.tensor.transfer %52107 : tensor<4x?x4096xf16>{%dim_49636} to #hal.device.promise<@__device_7>
    %52109 = torch_c.from_builtin_tensor %52108 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52110 = torch_c.to_builtin_tensor %51886 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49637 = arith.constant 1 : index
    %dim_49638 = tensor.dim %52110, %c1_49637 : tensor<4x?x4096xf16>
    %52111 = flow.tensor.transfer %52110 : tensor<4x?x4096xf16>{%dim_49638} to #hal.device.promise<@__device_7>
    %52112 = torch_c.from_builtin_tensor %52111 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52113 = torch_c.to_builtin_tensor %51892 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49639 = arith.constant 1 : index
    %dim_49640 = tensor.dim %52113, %c1_49639 : tensor<4x?x4096xf16>
    %52114 = flow.tensor.transfer %52113 : tensor<4x?x4096xf16>{%dim_49640} to #hal.device.promise<@__device_7>
    %52115 = torch_c.from_builtin_tensor %52114 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49641 = torch.constant.int 1
    %52116 = torch.aten.add.Tensor %52097, %52100, %int1_49641 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49642 = torch.constant.int 1
    %52117 = torch.aten.add.Tensor %52116, %52103, %int1_49642 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49643 = torch.constant.int 1
    %52118 = torch.aten.add.Tensor %52117, %52106, %int1_49643 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49644 = torch.constant.int 1
    %52119 = torch.aten.add.Tensor %52118, %52109, %int1_49644 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49645 = torch.constant.int 1
    %52120 = torch.aten.add.Tensor %52119, %52112, %int1_49645 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49646 = torch.constant.int 1
    %52121 = torch.aten.add.Tensor %52120, %52115, %int1_49646 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49647 = torch.constant.int 1
    %52122 = torch.aten.add.Tensor %52121, %51898, %int1_49647 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49648 = torch.constant.int 1
    %52123 = torch.aten.add.Tensor %50782, %51926, %int1_49648 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49649 = torch.constant.int 1
    %52124 = torch.aten.add.Tensor %50783, %51954, %int1_49649 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49650 = torch.constant.int 1
    %52125 = torch.aten.add.Tensor %50784, %51982, %int1_49650 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49651 = torch.constant.int 1
    %52126 = torch.aten.add.Tensor %50785, %52010, %int1_49651 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49652 = torch.constant.int 1
    %52127 = torch.aten.add.Tensor %50786, %52038, %int1_49652 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49653 = torch.constant.int 1
    %52128 = torch.aten.add.Tensor %50787, %52066, %int1_49653 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49654 = torch.constant.int 1
    %52129 = torch.aten.add.Tensor %50788, %52094, %int1_49654 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49655 = torch.constant.int 1
    %52130 = torch.aten.add.Tensor %50789, %52122, %int1_49655 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_49656 = torch.constant.int 6
    %52131 = torch.prims.convert_element_type %52123, %int6_49656 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49657 = torch.constant.int 6
    %52132 = torch.prims.convert_element_type %52124, %int6_49657 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49658 = torch.constant.int 6
    %52133 = torch.prims.convert_element_type %52125, %int6_49658 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49659 = torch.constant.int 6
    %52134 = torch.prims.convert_element_type %52126, %int6_49659 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49660 = torch.constant.int 6
    %52135 = torch.prims.convert_element_type %52127, %int6_49660 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49661 = torch.constant.int 6
    %52136 = torch.prims.convert_element_type %52128, %int6_49661 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49662 = torch.constant.int 6
    %52137 = torch.prims.convert_element_type %52129, %int6_49662 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_49663 = torch.constant.int 6
    %52138 = torch.prims.convert_element_type %52130, %int6_49663 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49664 = torch.constant.int 2
    %52139 = torch.aten.pow.Tensor_Scalar %52131, %int2_49664 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49665 = torch.constant.int 2
    %52140 = torch.aten.pow.Tensor_Scalar %52132, %int2_49665 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49666 = torch.constant.int 2
    %52141 = torch.aten.pow.Tensor_Scalar %52133, %int2_49666 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49667 = torch.constant.int 2
    %52142 = torch.aten.pow.Tensor_Scalar %52134, %int2_49667 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49668 = torch.constant.int 2
    %52143 = torch.aten.pow.Tensor_Scalar %52135, %int2_49668 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49669 = torch.constant.int 2
    %52144 = torch.aten.pow.Tensor_Scalar %52136, %int2_49669 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49670 = torch.constant.int 2
    %52145 = torch.aten.pow.Tensor_Scalar %52137, %int2_49670 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_49671 = torch.constant.int 2
    %52146 = torch.aten.pow.Tensor_Scalar %52138, %int2_49671 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_49672 = torch.constant.int -1
    %52147 = torch.prim.ListConstruct %int-1_49672 : (!torch.int) -> !torch.list<int>
    %true_49673 = torch.constant.bool true
    %none_49674 = torch.constant.none
    %52148 = torch.aten.mean.dim %52139, %52147, %true_49673, %none_49674 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49675 = torch.constant.int -1
    %52149 = torch.prim.ListConstruct %int-1_49675 : (!torch.int) -> !torch.list<int>
    %true_49676 = torch.constant.bool true
    %none_49677 = torch.constant.none
    %52150 = torch.aten.mean.dim %52140, %52149, %true_49676, %none_49677 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49678 = torch.constant.int -1
    %52151 = torch.prim.ListConstruct %int-1_49678 : (!torch.int) -> !torch.list<int>
    %true_49679 = torch.constant.bool true
    %none_49680 = torch.constant.none
    %52152 = torch.aten.mean.dim %52141, %52151, %true_49679, %none_49680 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49681 = torch.constant.int -1
    %52153 = torch.prim.ListConstruct %int-1_49681 : (!torch.int) -> !torch.list<int>
    %true_49682 = torch.constant.bool true
    %none_49683 = torch.constant.none
    %52154 = torch.aten.mean.dim %52142, %52153, %true_49682, %none_49683 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49684 = torch.constant.int -1
    %52155 = torch.prim.ListConstruct %int-1_49684 : (!torch.int) -> !torch.list<int>
    %true_49685 = torch.constant.bool true
    %none_49686 = torch.constant.none
    %52156 = torch.aten.mean.dim %52143, %52155, %true_49685, %none_49686 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49687 = torch.constant.int -1
    %52157 = torch.prim.ListConstruct %int-1_49687 : (!torch.int) -> !torch.list<int>
    %true_49688 = torch.constant.bool true
    %none_49689 = torch.constant.none
    %52158 = torch.aten.mean.dim %52144, %52157, %true_49688, %none_49689 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49690 = torch.constant.int -1
    %52159 = torch.prim.ListConstruct %int-1_49690 : (!torch.int) -> !torch.list<int>
    %true_49691 = torch.constant.bool true
    %none_49692 = torch.constant.none
    %52160 = torch.aten.mean.dim %52145, %52159, %true_49691, %none_49692 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_49693 = torch.constant.int -1
    %52161 = torch.prim.ListConstruct %int-1_49693 : (!torch.int) -> !torch.list<int>
    %true_49694 = torch.constant.bool true
    %none_49695 = torch.constant.none
    %52162 = torch.aten.mean.dim %52146, %52161, %true_49694, %none_49695 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49696 = torch.constant.float 9.9999997473787516E-6
    %int1_49697 = torch.constant.int 1
    %52163 = torch.aten.add.Scalar %52148, %float9.999990e-06_49696, %int1_49697 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49698 = torch.constant.float 9.9999997473787516E-6
    %int1_49699 = torch.constant.int 1
    %52164 = torch.aten.add.Scalar %52150, %float9.999990e-06_49698, %int1_49699 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49700 = torch.constant.float 9.9999997473787516E-6
    %int1_49701 = torch.constant.int 1
    %52165 = torch.aten.add.Scalar %52152, %float9.999990e-06_49700, %int1_49701 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49702 = torch.constant.float 9.9999997473787516E-6
    %int1_49703 = torch.constant.int 1
    %52166 = torch.aten.add.Scalar %52154, %float9.999990e-06_49702, %int1_49703 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49704 = torch.constant.float 9.9999997473787516E-6
    %int1_49705 = torch.constant.int 1
    %52167 = torch.aten.add.Scalar %52156, %float9.999990e-06_49704, %int1_49705 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49706 = torch.constant.float 9.9999997473787516E-6
    %int1_49707 = torch.constant.int 1
    %52168 = torch.aten.add.Scalar %52158, %float9.999990e-06_49706, %int1_49707 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49708 = torch.constant.float 9.9999997473787516E-6
    %int1_49709 = torch.constant.int 1
    %52169 = torch.aten.add.Scalar %52160, %float9.999990e-06_49708, %int1_49709 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_49710 = torch.constant.float 9.9999997473787516E-6
    %int1_49711 = torch.constant.int 1
    %52170 = torch.aten.add.Scalar %52162, %float9.999990e-06_49710, %int1_49711 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52170, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52171 = torch.aten.rsqrt %52163 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52172 = torch.aten.rsqrt %52164 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52173 = torch.aten.rsqrt %52165 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52174 = torch.aten.rsqrt %52166 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52175 = torch.aten.rsqrt %52167 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52176 = torch.aten.rsqrt %52168 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52176, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52177 = torch.aten.rsqrt %52169 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52178 = torch.aten.rsqrt %52170 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52179 = torch.aten.mul.Tensor %52131, %52171 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52180 = torch.aten.mul.Tensor %52132, %52172 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52181 = torch.aten.mul.Tensor %52133, %52173 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52182 = torch.aten.mul.Tensor %52134, %52174 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52183 = torch.aten.mul.Tensor %52135, %52175 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52184 = torch.aten.mul.Tensor %52136, %52176 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52185 = torch.aten.mul.Tensor %52137, %52177 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52186 = torch.aten.mul.Tensor %52138, %52178 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52187 = torch.aten.mul.Tensor %1920, %52179 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52188 = torch.aten.mul.Tensor %1921, %52180 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52189 = torch.aten.mul.Tensor %1922, %52181 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52190 = torch.aten.mul.Tensor %1923, %52182 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52191 = torch.aten.mul.Tensor %1924, %52183 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52192 = torch.aten.mul.Tensor %1925, %52184 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52193 = torch.aten.mul.Tensor %1926, %52185 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52194 = torch.aten.mul.Tensor %1927, %52186 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_49712 = torch.constant.int 5
    %52195 = torch.prims.convert_element_type %52187, %int5_49712 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49713 = torch.constant.int 5
    %52196 = torch.prims.convert_element_type %52188, %int5_49713 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49714 = torch.constant.int 5
    %52197 = torch.prims.convert_element_type %52189, %int5_49714 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49715 = torch.constant.int 5
    %52198 = torch.prims.convert_element_type %52190, %int5_49715 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49716 = torch.constant.int 5
    %52199 = torch.prims.convert_element_type %52191, %int5_49716 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49717 = torch.constant.int 5
    %52200 = torch.prims.convert_element_type %52192, %int5_49717 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49718 = torch.constant.int 5
    %52201 = torch.prims.convert_element_type %52193, %int5_49718 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_49719 = torch.constant.int 5
    %52202 = torch.prims.convert_element_type %52194, %int5_49719 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49720 = torch.constant.int 1
    %int0_49721 = torch.constant.int 0
    %52203 = torch.prim.ListConstruct %int1_49720, %int0_49721 : (!torch.int, !torch.int) -> !torch.list<int>
    %52204 = torch.aten.permute %1928, %52203 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49722 = torch.constant.int 1
    %int0_49723 = torch.constant.int 0
    %52205 = torch.prim.ListConstruct %int1_49722, %int0_49723 : (!torch.int, !torch.int) -> !torch.list<int>
    %52206 = torch.aten.permute %1929, %52205 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49724 = torch.constant.int 1
    %int0_49725 = torch.constant.int 0
    %52207 = torch.prim.ListConstruct %int1_49724, %int0_49725 : (!torch.int, !torch.int) -> !torch.list<int>
    %52208 = torch.aten.permute %1930, %52207 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49726 = torch.constant.int 1
    %int0_49727 = torch.constant.int 0
    %52209 = torch.prim.ListConstruct %int1_49726, %int0_49727 : (!torch.int, !torch.int) -> !torch.list<int>
    %52210 = torch.aten.permute %1931, %52209 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49728 = torch.constant.int 1
    %int0_49729 = torch.constant.int 0
    %52211 = torch.prim.ListConstruct %int1_49728, %int0_49729 : (!torch.int, !torch.int) -> !torch.list<int>
    %52212 = torch.aten.permute %1932, %52211 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49730 = torch.constant.int 1
    %int0_49731 = torch.constant.int 0
    %52213 = torch.prim.ListConstruct %int1_49730, %int0_49731 : (!torch.int, !torch.int) -> !torch.list<int>
    %52214 = torch.aten.permute %1933, %52213 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49732 = torch.constant.int 1
    %int0_49733 = torch.constant.int 0
    %52215 = torch.prim.ListConstruct %int1_49732, %int0_49733 : (!torch.int, !torch.int) -> !torch.list<int>
    %52216 = torch.aten.permute %1934, %52215 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49734 = torch.constant.int 1
    %int0_49735 = torch.constant.int 0
    %52217 = torch.prim.ListConstruct %int1_49734, %int0_49735 : (!torch.int, !torch.int) -> !torch.list<int>
    %52218 = torch.aten.permute %1935, %52217 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_49736 = torch.constant.int 4
    %52219 = torch.aten.mul.int %int4_49736, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49737 = torch.constant.int 4096
    %52220 = torch.prim.ListConstruct %52219, %int4096_49737 : (!torch.int, !torch.int) -> !torch.list<int>
    %52221 = torch.aten.view %52195, %52220 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52221, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52222 = torch.aten.mm %52221, %52204 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52222, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49738 = torch.constant.int 4
    %int1792_49739 = torch.constant.int 1792
    %52223 = torch.prim.ListConstruct %int4_49738, %2482, %int1792_49739 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52224 = torch.aten.view %52222, %52223 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49740 = torch.constant.int 4
    %52225 = torch.aten.mul.int %int4_49740, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49741 = torch.constant.int 4096
    %52226 = torch.prim.ListConstruct %52225, %int4096_49741 : (!torch.int, !torch.int) -> !torch.list<int>
    %52227 = torch.aten.view %52196, %52226 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52227, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52228 = torch.aten.mm %52227, %52206 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52228, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49742 = torch.constant.int 4
    %int1792_49743 = torch.constant.int 1792
    %52229 = torch.prim.ListConstruct %int4_49742, %2482, %int1792_49743 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52230 = torch.aten.view %52228, %52229 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49744 = torch.constant.int 4
    %52231 = torch.aten.mul.int %int4_49744, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49745 = torch.constant.int 4096
    %52232 = torch.prim.ListConstruct %52231, %int4096_49745 : (!torch.int, !torch.int) -> !torch.list<int>
    %52233 = torch.aten.view %52197, %52232 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52233, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52234 = torch.aten.mm %52233, %52208 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52234, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49746 = torch.constant.int 4
    %int1792_49747 = torch.constant.int 1792
    %52235 = torch.prim.ListConstruct %int4_49746, %2482, %int1792_49747 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52236 = torch.aten.view %52234, %52235 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49748 = torch.constant.int 4
    %52237 = torch.aten.mul.int %int4_49748, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49749 = torch.constant.int 4096
    %52238 = torch.prim.ListConstruct %52237, %int4096_49749 : (!torch.int, !torch.int) -> !torch.list<int>
    %52239 = torch.aten.view %52198, %52238 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52239, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52240 = torch.aten.mm %52239, %52210 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52240, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49750 = torch.constant.int 4
    %int1792_49751 = torch.constant.int 1792
    %52241 = torch.prim.ListConstruct %int4_49750, %2482, %int1792_49751 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52242 = torch.aten.view %52240, %52241 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49752 = torch.constant.int 4
    %52243 = torch.aten.mul.int %int4_49752, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49753 = torch.constant.int 4096
    %52244 = torch.prim.ListConstruct %52243, %int4096_49753 : (!torch.int, !torch.int) -> !torch.list<int>
    %52245 = torch.aten.view %52199, %52244 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52245, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52246 = torch.aten.mm %52245, %52212 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52246, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49754 = torch.constant.int 4
    %int1792_49755 = torch.constant.int 1792
    %52247 = torch.prim.ListConstruct %int4_49754, %2482, %int1792_49755 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52248 = torch.aten.view %52246, %52247 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49756 = torch.constant.int 4
    %52249 = torch.aten.mul.int %int4_49756, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49757 = torch.constant.int 4096
    %52250 = torch.prim.ListConstruct %52249, %int4096_49757 : (!torch.int, !torch.int) -> !torch.list<int>
    %52251 = torch.aten.view %52200, %52250 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52251, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52252 = torch.aten.mm %52251, %52214 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52252, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49758 = torch.constant.int 4
    %int1792_49759 = torch.constant.int 1792
    %52253 = torch.prim.ListConstruct %int4_49758, %2482, %int1792_49759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52254 = torch.aten.view %52252, %52253 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49760 = torch.constant.int 4
    %52255 = torch.aten.mul.int %int4_49760, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49761 = torch.constant.int 4096
    %52256 = torch.prim.ListConstruct %52255, %int4096_49761 : (!torch.int, !torch.int) -> !torch.list<int>
    %52257 = torch.aten.view %52201, %52256 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52257, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52258 = torch.aten.mm %52257, %52216 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52258, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49762 = torch.constant.int 4
    %int1792_49763 = torch.constant.int 1792
    %52259 = torch.prim.ListConstruct %int4_49762, %2482, %int1792_49763 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52260 = torch.aten.view %52258, %52259 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49764 = torch.constant.int 4
    %52261 = torch.aten.mul.int %int4_49764, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49765 = torch.constant.int 4096
    %52262 = torch.prim.ListConstruct %52261, %int4096_49765 : (!torch.int, !torch.int) -> !torch.list<int>
    %52263 = torch.aten.view %52202, %52262 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52263, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52264 = torch.aten.mm %52263, %52218 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52264, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49766 = torch.constant.int 4
    %int1792_49767 = torch.constant.int 1792
    %52265 = torch.prim.ListConstruct %int4_49766, %2482, %int1792_49767 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52266 = torch.aten.view %52264, %52265 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52267 = torch.aten.silu %52224 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52268 = torch.aten.silu %52230 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52269 = torch.aten.silu %52236 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52270 = torch.aten.silu %52242 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52271 = torch.aten.silu %52248 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52272 = torch.aten.silu %52254 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52273 = torch.aten.silu %52260 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52274 = torch.aten.silu %52266 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_49768 = torch.constant.int 1
    %int0_49769 = torch.constant.int 0
    %52275 = torch.prim.ListConstruct %int1_49768, %int0_49769 : (!torch.int, !torch.int) -> !torch.list<int>
    %52276 = torch.aten.permute %1936, %52275 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49770 = torch.constant.int 1
    %int0_49771 = torch.constant.int 0
    %52277 = torch.prim.ListConstruct %int1_49770, %int0_49771 : (!torch.int, !torch.int) -> !torch.list<int>
    %52278 = torch.aten.permute %1937, %52277 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49772 = torch.constant.int 1
    %int0_49773 = torch.constant.int 0
    %52279 = torch.prim.ListConstruct %int1_49772, %int0_49773 : (!torch.int, !torch.int) -> !torch.list<int>
    %52280 = torch.aten.permute %1938, %52279 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49774 = torch.constant.int 1
    %int0_49775 = torch.constant.int 0
    %52281 = torch.prim.ListConstruct %int1_49774, %int0_49775 : (!torch.int, !torch.int) -> !torch.list<int>
    %52282 = torch.aten.permute %1939, %52281 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49776 = torch.constant.int 1
    %int0_49777 = torch.constant.int 0
    %52283 = torch.prim.ListConstruct %int1_49776, %int0_49777 : (!torch.int, !torch.int) -> !torch.list<int>
    %52284 = torch.aten.permute %1940, %52283 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49778 = torch.constant.int 1
    %int0_49779 = torch.constant.int 0
    %52285 = torch.prim.ListConstruct %int1_49778, %int0_49779 : (!torch.int, !torch.int) -> !torch.list<int>
    %52286 = torch.aten.permute %1941, %52285 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49780 = torch.constant.int 1
    %int0_49781 = torch.constant.int 0
    %52287 = torch.prim.ListConstruct %int1_49780, %int0_49781 : (!torch.int, !torch.int) -> !torch.list<int>
    %52288 = torch.aten.permute %1942, %52287 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_49782 = torch.constant.int 1
    %int0_49783 = torch.constant.int 0
    %52289 = torch.prim.ListConstruct %int1_49782, %int0_49783 : (!torch.int, !torch.int) -> !torch.list<int>
    %52290 = torch.aten.permute %1943, %52289 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_49784 = torch.constant.int 4
    %52291 = torch.aten.mul.int %int4_49784, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49785 = torch.constant.int 4096
    %52292 = torch.prim.ListConstruct %52291, %int4096_49785 : (!torch.int, !torch.int) -> !torch.list<int>
    %52293 = torch.aten.view %52195, %52292 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52293, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52294 = torch.aten.mm %52293, %52276 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52294, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49786 = torch.constant.int 4
    %int1792_49787 = torch.constant.int 1792
    %52295 = torch.prim.ListConstruct %int4_49786, %2482, %int1792_49787 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52296 = torch.aten.view %52294, %52295 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49788 = torch.constant.int 4
    %52297 = torch.aten.mul.int %int4_49788, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49789 = torch.constant.int 4096
    %52298 = torch.prim.ListConstruct %52297, %int4096_49789 : (!torch.int, !torch.int) -> !torch.list<int>
    %52299 = torch.aten.view %52196, %52298 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52299, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52300 = torch.aten.mm %52299, %52278 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52300, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49790 = torch.constant.int 4
    %int1792_49791 = torch.constant.int 1792
    %52301 = torch.prim.ListConstruct %int4_49790, %2482, %int1792_49791 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52302 = torch.aten.view %52300, %52301 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49792 = torch.constant.int 4
    %52303 = torch.aten.mul.int %int4_49792, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49793 = torch.constant.int 4096
    %52304 = torch.prim.ListConstruct %52303, %int4096_49793 : (!torch.int, !torch.int) -> !torch.list<int>
    %52305 = torch.aten.view %52197, %52304 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52305, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52306 = torch.aten.mm %52305, %52280 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52306, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49794 = torch.constant.int 4
    %int1792_49795 = torch.constant.int 1792
    %52307 = torch.prim.ListConstruct %int4_49794, %2482, %int1792_49795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52308 = torch.aten.view %52306, %52307 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49796 = torch.constant.int 4
    %52309 = torch.aten.mul.int %int4_49796, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49797 = torch.constant.int 4096
    %52310 = torch.prim.ListConstruct %52309, %int4096_49797 : (!torch.int, !torch.int) -> !torch.list<int>
    %52311 = torch.aten.view %52198, %52310 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52311, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52312 = torch.aten.mm %52311, %52282 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52312, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49798 = torch.constant.int 4
    %int1792_49799 = torch.constant.int 1792
    %52313 = torch.prim.ListConstruct %int4_49798, %2482, %int1792_49799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52314 = torch.aten.view %52312, %52313 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49800 = torch.constant.int 4
    %52315 = torch.aten.mul.int %int4_49800, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49801 = torch.constant.int 4096
    %52316 = torch.prim.ListConstruct %52315, %int4096_49801 : (!torch.int, !torch.int) -> !torch.list<int>
    %52317 = torch.aten.view %52199, %52316 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52317, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52318 = torch.aten.mm %52317, %52284 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52318, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49802 = torch.constant.int 4
    %int1792_49803 = torch.constant.int 1792
    %52319 = torch.prim.ListConstruct %int4_49802, %2482, %int1792_49803 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52320 = torch.aten.view %52318, %52319 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49804 = torch.constant.int 4
    %52321 = torch.aten.mul.int %int4_49804, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49805 = torch.constant.int 4096
    %52322 = torch.prim.ListConstruct %52321, %int4096_49805 : (!torch.int, !torch.int) -> !torch.list<int>
    %52323 = torch.aten.view %52200, %52322 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52323, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52324 = torch.aten.mm %52323, %52286 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52324, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49806 = torch.constant.int 4
    %int1792_49807 = torch.constant.int 1792
    %52325 = torch.prim.ListConstruct %int4_49806, %2482, %int1792_49807 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52326 = torch.aten.view %52324, %52325 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49808 = torch.constant.int 4
    %52327 = torch.aten.mul.int %int4_49808, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49809 = torch.constant.int 4096
    %52328 = torch.prim.ListConstruct %52327, %int4096_49809 : (!torch.int, !torch.int) -> !torch.list<int>
    %52329 = torch.aten.view %52201, %52328 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52329, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52330 = torch.aten.mm %52329, %52288 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52330, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49810 = torch.constant.int 4
    %int1792_49811 = torch.constant.int 1792
    %52331 = torch.prim.ListConstruct %int4_49810, %2482, %int1792_49811 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52332 = torch.aten.view %52330, %52331 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_49812 = torch.constant.int 4
    %52333 = torch.aten.mul.int %int4_49812, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_49813 = torch.constant.int 4096
    %52334 = torch.prim.ListConstruct %52333, %int4096_49813 : (!torch.int, !torch.int) -> !torch.list<int>
    %52335 = torch.aten.view %52202, %52334 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52335, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52336 = torch.aten.mm %52335, %52290 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52336, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_49814 = torch.constant.int 4
    %int1792_49815 = torch.constant.int 1792
    %52337 = torch.prim.ListConstruct %int4_49814, %2482, %int1792_49815 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52338 = torch.aten.view %52336, %52337 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52339 = torch.aten.mul.Tensor %52267, %52296 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52340 = torch.aten.mul.Tensor %52268, %52302 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52341 = torch.aten.mul.Tensor %52269, %52308 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52342 = torch.aten.mul.Tensor %52270, %52314 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52343 = torch.aten.mul.Tensor %52271, %52320 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52344 = torch.aten.mul.Tensor %52272, %52326 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52345 = torch.aten.mul.Tensor %52273, %52332 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %52346 = torch.aten.mul.Tensor %52274, %52338 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %52346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_49816 = torch.constant.int 1
    %int0_49817 = torch.constant.int 0
    %52347 = torch.prim.ListConstruct %int1_49816, %int0_49817 : (!torch.int, !torch.int) -> !torch.list<int>
    %52348 = torch.aten.permute %1944, %52347 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49818 = torch.constant.int 1
    %int0_49819 = torch.constant.int 0
    %52349 = torch.prim.ListConstruct %int1_49818, %int0_49819 : (!torch.int, !torch.int) -> !torch.list<int>
    %52350 = torch.aten.permute %1945, %52349 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49820 = torch.constant.int 1
    %int0_49821 = torch.constant.int 0
    %52351 = torch.prim.ListConstruct %int1_49820, %int0_49821 : (!torch.int, !torch.int) -> !torch.list<int>
    %52352 = torch.aten.permute %1946, %52351 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49822 = torch.constant.int 1
    %int0_49823 = torch.constant.int 0
    %52353 = torch.prim.ListConstruct %int1_49822, %int0_49823 : (!torch.int, !torch.int) -> !torch.list<int>
    %52354 = torch.aten.permute %1947, %52353 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49824 = torch.constant.int 1
    %int0_49825 = torch.constant.int 0
    %52355 = torch.prim.ListConstruct %int1_49824, %int0_49825 : (!torch.int, !torch.int) -> !torch.list<int>
    %52356 = torch.aten.permute %1948, %52355 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49826 = torch.constant.int 1
    %int0_49827 = torch.constant.int 0
    %52357 = torch.prim.ListConstruct %int1_49826, %int0_49827 : (!torch.int, !torch.int) -> !torch.list<int>
    %52358 = torch.aten.permute %1949, %52357 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49828 = torch.constant.int 1
    %int0_49829 = torch.constant.int 0
    %52359 = torch.prim.ListConstruct %int1_49828, %int0_49829 : (!torch.int, !torch.int) -> !torch.list<int>
    %52360 = torch.aten.permute %1950, %52359 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49830 = torch.constant.int 1
    %int0_49831 = torch.constant.int 0
    %52361 = torch.prim.ListConstruct %int1_49830, %int0_49831 : (!torch.int, !torch.int) -> !torch.list<int>
    %52362 = torch.aten.permute %1951, %52361 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_49832 = torch.constant.int 1
    %52363 = torch.aten.size.int %52224, %int1_49832 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49833 = torch.constant.int 4
    %52364 = torch.aten.mul.int %int4_49833, %52363 : !torch.int, !torch.int -> !torch.int
    %int1792_49834 = torch.constant.int 1792
    %52365 = torch.prim.ListConstruct %52364, %int1792_49834 : (!torch.int, !torch.int) -> !torch.list<int>
    %52366 = torch.aten.view %52339, %52365 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52366, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52367 = torch.aten.mm %52366, %52348 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52367, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49835 = torch.constant.int 4
    %int4096_49836 = torch.constant.int 4096
    %52368 = torch.prim.ListConstruct %int4_49835, %52363, %int4096_49836 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52369 = torch.aten.view %52367, %52368 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49837 = torch.constant.int 1
    %52370 = torch.aten.size.int %52230, %int1_49837 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49838 = torch.constant.int 4
    %52371 = torch.aten.mul.int %int4_49838, %52370 : !torch.int, !torch.int -> !torch.int
    %int1792_49839 = torch.constant.int 1792
    %52372 = torch.prim.ListConstruct %52371, %int1792_49839 : (!torch.int, !torch.int) -> !torch.list<int>
    %52373 = torch.aten.view %52340, %52372 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52373, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52374 = torch.aten.mm %52373, %52350 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52374, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49840 = torch.constant.int 4
    %int4096_49841 = torch.constant.int 4096
    %52375 = torch.prim.ListConstruct %int4_49840, %52370, %int4096_49841 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52376 = torch.aten.view %52374, %52375 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49842 = torch.constant.int 1
    %52377 = torch.aten.size.int %52236, %int1_49842 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49843 = torch.constant.int 4
    %52378 = torch.aten.mul.int %int4_49843, %52377 : !torch.int, !torch.int -> !torch.int
    %int1792_49844 = torch.constant.int 1792
    %52379 = torch.prim.ListConstruct %52378, %int1792_49844 : (!torch.int, !torch.int) -> !torch.list<int>
    %52380 = torch.aten.view %52341, %52379 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52380, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52381 = torch.aten.mm %52380, %52352 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52381, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49845 = torch.constant.int 4
    %int4096_49846 = torch.constant.int 4096
    %52382 = torch.prim.ListConstruct %int4_49845, %52377, %int4096_49846 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52383 = torch.aten.view %52381, %52382 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49847 = torch.constant.int 1
    %52384 = torch.aten.size.int %52242, %int1_49847 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49848 = torch.constant.int 4
    %52385 = torch.aten.mul.int %int4_49848, %52384 : !torch.int, !torch.int -> !torch.int
    %int1792_49849 = torch.constant.int 1792
    %52386 = torch.prim.ListConstruct %52385, %int1792_49849 : (!torch.int, !torch.int) -> !torch.list<int>
    %52387 = torch.aten.view %52342, %52386 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52387, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52388 = torch.aten.mm %52387, %52354 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52388, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49850 = torch.constant.int 4
    %int4096_49851 = torch.constant.int 4096
    %52389 = torch.prim.ListConstruct %int4_49850, %52384, %int4096_49851 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52390 = torch.aten.view %52388, %52389 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49852 = torch.constant.int 1
    %52391 = torch.aten.size.int %52248, %int1_49852 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49853 = torch.constant.int 4
    %52392 = torch.aten.mul.int %int4_49853, %52391 : !torch.int, !torch.int -> !torch.int
    %int1792_49854 = torch.constant.int 1792
    %52393 = torch.prim.ListConstruct %52392, %int1792_49854 : (!torch.int, !torch.int) -> !torch.list<int>
    %52394 = torch.aten.view %52343, %52393 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52394, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52395 = torch.aten.mm %52394, %52356 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52395, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49855 = torch.constant.int 4
    %int4096_49856 = torch.constant.int 4096
    %52396 = torch.prim.ListConstruct %int4_49855, %52391, %int4096_49856 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52397 = torch.aten.view %52395, %52396 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49857 = torch.constant.int 1
    %52398 = torch.aten.size.int %52254, %int1_49857 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49858 = torch.constant.int 4
    %52399 = torch.aten.mul.int %int4_49858, %52398 : !torch.int, !torch.int -> !torch.int
    %int1792_49859 = torch.constant.int 1792
    %52400 = torch.prim.ListConstruct %52399, %int1792_49859 : (!torch.int, !torch.int) -> !torch.list<int>
    %52401 = torch.aten.view %52344, %52400 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52401, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52402 = torch.aten.mm %52401, %52358 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52402, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49860 = torch.constant.int 4
    %int4096_49861 = torch.constant.int 4096
    %52403 = torch.prim.ListConstruct %int4_49860, %52398, %int4096_49861 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52404 = torch.aten.view %52402, %52403 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49862 = torch.constant.int 1
    %52405 = torch.aten.size.int %52260, %int1_49862 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49863 = torch.constant.int 4
    %52406 = torch.aten.mul.int %int4_49863, %52405 : !torch.int, !torch.int -> !torch.int
    %int1792_49864 = torch.constant.int 1792
    %52407 = torch.prim.ListConstruct %52406, %int1792_49864 : (!torch.int, !torch.int) -> !torch.list<int>
    %52408 = torch.aten.view %52345, %52407 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52408, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52409 = torch.aten.mm %52408, %52360 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52409, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49865 = torch.constant.int 4
    %int4096_49866 = torch.constant.int 4096
    %52410 = torch.prim.ListConstruct %int4_49865, %52405, %int4096_49866 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52411 = torch.aten.view %52409, %52410 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49867 = torch.constant.int 1
    %52412 = torch.aten.size.int %52266, %int1_49867 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_49868 = torch.constant.int 4
    %52413 = torch.aten.mul.int %int4_49868, %52412 : !torch.int, !torch.int -> !torch.int
    %int1792_49869 = torch.constant.int 1792
    %52414 = torch.prim.ListConstruct %52413, %int1792_49869 : (!torch.int, !torch.int) -> !torch.list<int>
    %52415 = torch.aten.view %52346, %52414 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %52415, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %52416 = torch.aten.mm %52415, %52362 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52416, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_49870 = torch.constant.int 4
    %int4096_49871 = torch.constant.int 4096
    %52417 = torch.prim.ListConstruct %int4_49870, %52412, %int4096_49871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52418 = torch.aten.view %52416, %52417 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52419 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49872 = arith.constant 1 : index
    %dim_49873 = tensor.dim %52419, %c1_49872 : tensor<4x?x4096xf16>
    %52420 = flow.tensor.transfer %52419 : tensor<4x?x4096xf16>{%dim_49873} to #hal.device.promise<@__device_0>
    %52421 = torch_c.from_builtin_tensor %52420 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52422 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49874 = arith.constant 1 : index
    %dim_49875 = tensor.dim %52422, %c1_49874 : tensor<4x?x4096xf16>
    %52423 = flow.tensor.transfer %52422 : tensor<4x?x4096xf16>{%dim_49875} to #hal.device.promise<@__device_0>
    %52424 = torch_c.from_builtin_tensor %52423 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52425 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49876 = arith.constant 1 : index
    %dim_49877 = tensor.dim %52425, %c1_49876 : tensor<4x?x4096xf16>
    %52426 = flow.tensor.transfer %52425 : tensor<4x?x4096xf16>{%dim_49877} to #hal.device.promise<@__device_0>
    %52427 = torch_c.from_builtin_tensor %52426 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52428 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49878 = arith.constant 1 : index
    %dim_49879 = tensor.dim %52428, %c1_49878 : tensor<4x?x4096xf16>
    %52429 = flow.tensor.transfer %52428 : tensor<4x?x4096xf16>{%dim_49879} to #hal.device.promise<@__device_0>
    %52430 = torch_c.from_builtin_tensor %52429 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52431 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49880 = arith.constant 1 : index
    %dim_49881 = tensor.dim %52431, %c1_49880 : tensor<4x?x4096xf16>
    %52432 = flow.tensor.transfer %52431 : tensor<4x?x4096xf16>{%dim_49881} to #hal.device.promise<@__device_0>
    %52433 = torch_c.from_builtin_tensor %52432 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52434 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49882 = arith.constant 1 : index
    %dim_49883 = tensor.dim %52434, %c1_49882 : tensor<4x?x4096xf16>
    %52435 = flow.tensor.transfer %52434 : tensor<4x?x4096xf16>{%dim_49883} to #hal.device.promise<@__device_0>
    %52436 = torch_c.from_builtin_tensor %52435 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52437 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49884 = arith.constant 1 : index
    %dim_49885 = tensor.dim %52437, %c1_49884 : tensor<4x?x4096xf16>
    %52438 = flow.tensor.transfer %52437 : tensor<4x?x4096xf16>{%dim_49885} to #hal.device.promise<@__device_0>
    %52439 = torch_c.from_builtin_tensor %52438 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49886 = torch.constant.int 1
    %52440 = torch.aten.add.Tensor %52369, %52421, %int1_49886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49887 = torch.constant.int 1
    %52441 = torch.aten.add.Tensor %52440, %52424, %int1_49887 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49888 = torch.constant.int 1
    %52442 = torch.aten.add.Tensor %52441, %52427, %int1_49888 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49889 = torch.constant.int 1
    %52443 = torch.aten.add.Tensor %52442, %52430, %int1_49889 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49890 = torch.constant.int 1
    %52444 = torch.aten.add.Tensor %52443, %52433, %int1_49890 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49891 = torch.constant.int 1
    %52445 = torch.aten.add.Tensor %52444, %52436, %int1_49891 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49892 = torch.constant.int 1
    %52446 = torch.aten.add.Tensor %52445, %52439, %int1_49892 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52447 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49893 = arith.constant 1 : index
    %dim_49894 = tensor.dim %52447, %c1_49893 : tensor<4x?x4096xf16>
    %52448 = flow.tensor.transfer %52447 : tensor<4x?x4096xf16>{%dim_49894} to #hal.device.promise<@__device_1>
    %52449 = torch_c.from_builtin_tensor %52448 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52450 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49895 = arith.constant 1 : index
    %dim_49896 = tensor.dim %52450, %c1_49895 : tensor<4x?x4096xf16>
    %52451 = flow.tensor.transfer %52450 : tensor<4x?x4096xf16>{%dim_49896} to #hal.device.promise<@__device_1>
    %52452 = torch_c.from_builtin_tensor %52451 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52453 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49897 = arith.constant 1 : index
    %dim_49898 = tensor.dim %52453, %c1_49897 : tensor<4x?x4096xf16>
    %52454 = flow.tensor.transfer %52453 : tensor<4x?x4096xf16>{%dim_49898} to #hal.device.promise<@__device_1>
    %52455 = torch_c.from_builtin_tensor %52454 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52456 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49899 = arith.constant 1 : index
    %dim_49900 = tensor.dim %52456, %c1_49899 : tensor<4x?x4096xf16>
    %52457 = flow.tensor.transfer %52456 : tensor<4x?x4096xf16>{%dim_49900} to #hal.device.promise<@__device_1>
    %52458 = torch_c.from_builtin_tensor %52457 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52459 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49901 = arith.constant 1 : index
    %dim_49902 = tensor.dim %52459, %c1_49901 : tensor<4x?x4096xf16>
    %52460 = flow.tensor.transfer %52459 : tensor<4x?x4096xf16>{%dim_49902} to #hal.device.promise<@__device_1>
    %52461 = torch_c.from_builtin_tensor %52460 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52462 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49903 = arith.constant 1 : index
    %dim_49904 = tensor.dim %52462, %c1_49903 : tensor<4x?x4096xf16>
    %52463 = flow.tensor.transfer %52462 : tensor<4x?x4096xf16>{%dim_49904} to #hal.device.promise<@__device_1>
    %52464 = torch_c.from_builtin_tensor %52463 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52465 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49905 = arith.constant 1 : index
    %dim_49906 = tensor.dim %52465, %c1_49905 : tensor<4x?x4096xf16>
    %52466 = flow.tensor.transfer %52465 : tensor<4x?x4096xf16>{%dim_49906} to #hal.device.promise<@__device_1>
    %52467 = torch_c.from_builtin_tensor %52466 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49907 = torch.constant.int 1
    %52468 = torch.aten.add.Tensor %52449, %52376, %int1_49907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49908 = torch.constant.int 1
    %52469 = torch.aten.add.Tensor %52468, %52452, %int1_49908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49909 = torch.constant.int 1
    %52470 = torch.aten.add.Tensor %52469, %52455, %int1_49909 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49910 = torch.constant.int 1
    %52471 = torch.aten.add.Tensor %52470, %52458, %int1_49910 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49911 = torch.constant.int 1
    %52472 = torch.aten.add.Tensor %52471, %52461, %int1_49911 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49912 = torch.constant.int 1
    %52473 = torch.aten.add.Tensor %52472, %52464, %int1_49912 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49913 = torch.constant.int 1
    %52474 = torch.aten.add.Tensor %52473, %52467, %int1_49913 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52475 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49914 = arith.constant 1 : index
    %dim_49915 = tensor.dim %52475, %c1_49914 : tensor<4x?x4096xf16>
    %52476 = flow.tensor.transfer %52475 : tensor<4x?x4096xf16>{%dim_49915} to #hal.device.promise<@__device_2>
    %52477 = torch_c.from_builtin_tensor %52476 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52478 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49916 = arith.constant 1 : index
    %dim_49917 = tensor.dim %52478, %c1_49916 : tensor<4x?x4096xf16>
    %52479 = flow.tensor.transfer %52478 : tensor<4x?x4096xf16>{%dim_49917} to #hal.device.promise<@__device_2>
    %52480 = torch_c.from_builtin_tensor %52479 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52481 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49918 = arith.constant 1 : index
    %dim_49919 = tensor.dim %52481, %c1_49918 : tensor<4x?x4096xf16>
    %52482 = flow.tensor.transfer %52481 : tensor<4x?x4096xf16>{%dim_49919} to #hal.device.promise<@__device_2>
    %52483 = torch_c.from_builtin_tensor %52482 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52484 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49920 = arith.constant 1 : index
    %dim_49921 = tensor.dim %52484, %c1_49920 : tensor<4x?x4096xf16>
    %52485 = flow.tensor.transfer %52484 : tensor<4x?x4096xf16>{%dim_49921} to #hal.device.promise<@__device_2>
    %52486 = torch_c.from_builtin_tensor %52485 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52487 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49922 = arith.constant 1 : index
    %dim_49923 = tensor.dim %52487, %c1_49922 : tensor<4x?x4096xf16>
    %52488 = flow.tensor.transfer %52487 : tensor<4x?x4096xf16>{%dim_49923} to #hal.device.promise<@__device_2>
    %52489 = torch_c.from_builtin_tensor %52488 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52490 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49924 = arith.constant 1 : index
    %dim_49925 = tensor.dim %52490, %c1_49924 : tensor<4x?x4096xf16>
    %52491 = flow.tensor.transfer %52490 : tensor<4x?x4096xf16>{%dim_49925} to #hal.device.promise<@__device_2>
    %52492 = torch_c.from_builtin_tensor %52491 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52493 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49926 = arith.constant 1 : index
    %dim_49927 = tensor.dim %52493, %c1_49926 : tensor<4x?x4096xf16>
    %52494 = flow.tensor.transfer %52493 : tensor<4x?x4096xf16>{%dim_49927} to #hal.device.promise<@__device_2>
    %52495 = torch_c.from_builtin_tensor %52494 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49928 = torch.constant.int 1
    %52496 = torch.aten.add.Tensor %52477, %52480, %int1_49928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49929 = torch.constant.int 1
    %52497 = torch.aten.add.Tensor %52496, %52383, %int1_49929 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49930 = torch.constant.int 1
    %52498 = torch.aten.add.Tensor %52497, %52483, %int1_49930 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49931 = torch.constant.int 1
    %52499 = torch.aten.add.Tensor %52498, %52486, %int1_49931 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49932 = torch.constant.int 1
    %52500 = torch.aten.add.Tensor %52499, %52489, %int1_49932 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49933 = torch.constant.int 1
    %52501 = torch.aten.add.Tensor %52500, %52492, %int1_49933 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49934 = torch.constant.int 1
    %52502 = torch.aten.add.Tensor %52501, %52495, %int1_49934 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52503 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49935 = arith.constant 1 : index
    %dim_49936 = tensor.dim %52503, %c1_49935 : tensor<4x?x4096xf16>
    %52504 = flow.tensor.transfer %52503 : tensor<4x?x4096xf16>{%dim_49936} to #hal.device.promise<@__device_3>
    %52505 = torch_c.from_builtin_tensor %52504 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52506 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49937 = arith.constant 1 : index
    %dim_49938 = tensor.dim %52506, %c1_49937 : tensor<4x?x4096xf16>
    %52507 = flow.tensor.transfer %52506 : tensor<4x?x4096xf16>{%dim_49938} to #hal.device.promise<@__device_3>
    %52508 = torch_c.from_builtin_tensor %52507 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52509 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49939 = arith.constant 1 : index
    %dim_49940 = tensor.dim %52509, %c1_49939 : tensor<4x?x4096xf16>
    %52510 = flow.tensor.transfer %52509 : tensor<4x?x4096xf16>{%dim_49940} to #hal.device.promise<@__device_3>
    %52511 = torch_c.from_builtin_tensor %52510 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52512 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49941 = arith.constant 1 : index
    %dim_49942 = tensor.dim %52512, %c1_49941 : tensor<4x?x4096xf16>
    %52513 = flow.tensor.transfer %52512 : tensor<4x?x4096xf16>{%dim_49942} to #hal.device.promise<@__device_3>
    %52514 = torch_c.from_builtin_tensor %52513 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52515 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49943 = arith.constant 1 : index
    %dim_49944 = tensor.dim %52515, %c1_49943 : tensor<4x?x4096xf16>
    %52516 = flow.tensor.transfer %52515 : tensor<4x?x4096xf16>{%dim_49944} to #hal.device.promise<@__device_3>
    %52517 = torch_c.from_builtin_tensor %52516 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52518 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49945 = arith.constant 1 : index
    %dim_49946 = tensor.dim %52518, %c1_49945 : tensor<4x?x4096xf16>
    %52519 = flow.tensor.transfer %52518 : tensor<4x?x4096xf16>{%dim_49946} to #hal.device.promise<@__device_3>
    %52520 = torch_c.from_builtin_tensor %52519 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52521 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49947 = arith.constant 1 : index
    %dim_49948 = tensor.dim %52521, %c1_49947 : tensor<4x?x4096xf16>
    %52522 = flow.tensor.transfer %52521 : tensor<4x?x4096xf16>{%dim_49948} to #hal.device.promise<@__device_3>
    %52523 = torch_c.from_builtin_tensor %52522 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49949 = torch.constant.int 1
    %52524 = torch.aten.add.Tensor %52505, %52508, %int1_49949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49950 = torch.constant.int 1
    %52525 = torch.aten.add.Tensor %52524, %52511, %int1_49950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49951 = torch.constant.int 1
    %52526 = torch.aten.add.Tensor %52525, %52390, %int1_49951 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49952 = torch.constant.int 1
    %52527 = torch.aten.add.Tensor %52526, %52514, %int1_49952 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49953 = torch.constant.int 1
    %52528 = torch.aten.add.Tensor %52527, %52517, %int1_49953 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49954 = torch.constant.int 1
    %52529 = torch.aten.add.Tensor %52528, %52520, %int1_49954 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49955 = torch.constant.int 1
    %52530 = torch.aten.add.Tensor %52529, %52523, %int1_49955 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52531 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49956 = arith.constant 1 : index
    %dim_49957 = tensor.dim %52531, %c1_49956 : tensor<4x?x4096xf16>
    %52532 = flow.tensor.transfer %52531 : tensor<4x?x4096xf16>{%dim_49957} to #hal.device.promise<@__device_4>
    %52533 = torch_c.from_builtin_tensor %52532 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52534 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49958 = arith.constant 1 : index
    %dim_49959 = tensor.dim %52534, %c1_49958 : tensor<4x?x4096xf16>
    %52535 = flow.tensor.transfer %52534 : tensor<4x?x4096xf16>{%dim_49959} to #hal.device.promise<@__device_4>
    %52536 = torch_c.from_builtin_tensor %52535 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52537 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49960 = arith.constant 1 : index
    %dim_49961 = tensor.dim %52537, %c1_49960 : tensor<4x?x4096xf16>
    %52538 = flow.tensor.transfer %52537 : tensor<4x?x4096xf16>{%dim_49961} to #hal.device.promise<@__device_4>
    %52539 = torch_c.from_builtin_tensor %52538 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52540 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49962 = arith.constant 1 : index
    %dim_49963 = tensor.dim %52540, %c1_49962 : tensor<4x?x4096xf16>
    %52541 = flow.tensor.transfer %52540 : tensor<4x?x4096xf16>{%dim_49963} to #hal.device.promise<@__device_4>
    %52542 = torch_c.from_builtin_tensor %52541 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52543 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49964 = arith.constant 1 : index
    %dim_49965 = tensor.dim %52543, %c1_49964 : tensor<4x?x4096xf16>
    %52544 = flow.tensor.transfer %52543 : tensor<4x?x4096xf16>{%dim_49965} to #hal.device.promise<@__device_4>
    %52545 = torch_c.from_builtin_tensor %52544 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52546 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49966 = arith.constant 1 : index
    %dim_49967 = tensor.dim %52546, %c1_49966 : tensor<4x?x4096xf16>
    %52547 = flow.tensor.transfer %52546 : tensor<4x?x4096xf16>{%dim_49967} to #hal.device.promise<@__device_4>
    %52548 = torch_c.from_builtin_tensor %52547 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52549 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49968 = arith.constant 1 : index
    %dim_49969 = tensor.dim %52549, %c1_49968 : tensor<4x?x4096xf16>
    %52550 = flow.tensor.transfer %52549 : tensor<4x?x4096xf16>{%dim_49969} to #hal.device.promise<@__device_4>
    %52551 = torch_c.from_builtin_tensor %52550 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49970 = torch.constant.int 1
    %52552 = torch.aten.add.Tensor %52533, %52536, %int1_49970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49971 = torch.constant.int 1
    %52553 = torch.aten.add.Tensor %52552, %52539, %int1_49971 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49972 = torch.constant.int 1
    %52554 = torch.aten.add.Tensor %52553, %52542, %int1_49972 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49973 = torch.constant.int 1
    %52555 = torch.aten.add.Tensor %52554, %52397, %int1_49973 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49974 = torch.constant.int 1
    %52556 = torch.aten.add.Tensor %52555, %52545, %int1_49974 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49975 = torch.constant.int 1
    %52557 = torch.aten.add.Tensor %52556, %52548, %int1_49975 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49976 = torch.constant.int 1
    %52558 = torch.aten.add.Tensor %52557, %52551, %int1_49976 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52559 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49977 = arith.constant 1 : index
    %dim_49978 = tensor.dim %52559, %c1_49977 : tensor<4x?x4096xf16>
    %52560 = flow.tensor.transfer %52559 : tensor<4x?x4096xf16>{%dim_49978} to #hal.device.promise<@__device_5>
    %52561 = torch_c.from_builtin_tensor %52560 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52562 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49979 = arith.constant 1 : index
    %dim_49980 = tensor.dim %52562, %c1_49979 : tensor<4x?x4096xf16>
    %52563 = flow.tensor.transfer %52562 : tensor<4x?x4096xf16>{%dim_49980} to #hal.device.promise<@__device_5>
    %52564 = torch_c.from_builtin_tensor %52563 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52565 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49981 = arith.constant 1 : index
    %dim_49982 = tensor.dim %52565, %c1_49981 : tensor<4x?x4096xf16>
    %52566 = flow.tensor.transfer %52565 : tensor<4x?x4096xf16>{%dim_49982} to #hal.device.promise<@__device_5>
    %52567 = torch_c.from_builtin_tensor %52566 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52568 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49983 = arith.constant 1 : index
    %dim_49984 = tensor.dim %52568, %c1_49983 : tensor<4x?x4096xf16>
    %52569 = flow.tensor.transfer %52568 : tensor<4x?x4096xf16>{%dim_49984} to #hal.device.promise<@__device_5>
    %52570 = torch_c.from_builtin_tensor %52569 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52571 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49985 = arith.constant 1 : index
    %dim_49986 = tensor.dim %52571, %c1_49985 : tensor<4x?x4096xf16>
    %52572 = flow.tensor.transfer %52571 : tensor<4x?x4096xf16>{%dim_49986} to #hal.device.promise<@__device_5>
    %52573 = torch_c.from_builtin_tensor %52572 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52574 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49987 = arith.constant 1 : index
    %dim_49988 = tensor.dim %52574, %c1_49987 : tensor<4x?x4096xf16>
    %52575 = flow.tensor.transfer %52574 : tensor<4x?x4096xf16>{%dim_49988} to #hal.device.promise<@__device_5>
    %52576 = torch_c.from_builtin_tensor %52575 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52577 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49989 = arith.constant 1 : index
    %dim_49990 = tensor.dim %52577, %c1_49989 : tensor<4x?x4096xf16>
    %52578 = flow.tensor.transfer %52577 : tensor<4x?x4096xf16>{%dim_49990} to #hal.device.promise<@__device_5>
    %52579 = torch_c.from_builtin_tensor %52578 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49991 = torch.constant.int 1
    %52580 = torch.aten.add.Tensor %52561, %52564, %int1_49991 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49992 = torch.constant.int 1
    %52581 = torch.aten.add.Tensor %52580, %52567, %int1_49992 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49993 = torch.constant.int 1
    %52582 = torch.aten.add.Tensor %52581, %52570, %int1_49993 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49994 = torch.constant.int 1
    %52583 = torch.aten.add.Tensor %52582, %52573, %int1_49994 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49995 = torch.constant.int 1
    %52584 = torch.aten.add.Tensor %52583, %52404, %int1_49995 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49996 = torch.constant.int 1
    %52585 = torch.aten.add.Tensor %52584, %52576, %int1_49996 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_49997 = torch.constant.int 1
    %52586 = torch.aten.add.Tensor %52585, %52579, %int1_49997 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52587 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_49998 = arith.constant 1 : index
    %dim_49999 = tensor.dim %52587, %c1_49998 : tensor<4x?x4096xf16>
    %52588 = flow.tensor.transfer %52587 : tensor<4x?x4096xf16>{%dim_49999} to #hal.device.promise<@__device_6>
    %52589 = torch_c.from_builtin_tensor %52588 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52590 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50000 = arith.constant 1 : index
    %dim_50001 = tensor.dim %52590, %c1_50000 : tensor<4x?x4096xf16>
    %52591 = flow.tensor.transfer %52590 : tensor<4x?x4096xf16>{%dim_50001} to #hal.device.promise<@__device_6>
    %52592 = torch_c.from_builtin_tensor %52591 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52593 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50002 = arith.constant 1 : index
    %dim_50003 = tensor.dim %52593, %c1_50002 : tensor<4x?x4096xf16>
    %52594 = flow.tensor.transfer %52593 : tensor<4x?x4096xf16>{%dim_50003} to #hal.device.promise<@__device_6>
    %52595 = torch_c.from_builtin_tensor %52594 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52595, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52596 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50004 = arith.constant 1 : index
    %dim_50005 = tensor.dim %52596, %c1_50004 : tensor<4x?x4096xf16>
    %52597 = flow.tensor.transfer %52596 : tensor<4x?x4096xf16>{%dim_50005} to #hal.device.promise<@__device_6>
    %52598 = torch_c.from_builtin_tensor %52597 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52599 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50006 = arith.constant 1 : index
    %dim_50007 = tensor.dim %52599, %c1_50006 : tensor<4x?x4096xf16>
    %52600 = flow.tensor.transfer %52599 : tensor<4x?x4096xf16>{%dim_50007} to #hal.device.promise<@__device_6>
    %52601 = torch_c.from_builtin_tensor %52600 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52602 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50008 = arith.constant 1 : index
    %dim_50009 = tensor.dim %52602, %c1_50008 : tensor<4x?x4096xf16>
    %52603 = flow.tensor.transfer %52602 : tensor<4x?x4096xf16>{%dim_50009} to #hal.device.promise<@__device_6>
    %52604 = torch_c.from_builtin_tensor %52603 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52605 = torch_c.to_builtin_tensor %52418 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50010 = arith.constant 1 : index
    %dim_50011 = tensor.dim %52605, %c1_50010 : tensor<4x?x4096xf16>
    %52606 = flow.tensor.transfer %52605 : tensor<4x?x4096xf16>{%dim_50011} to #hal.device.promise<@__device_6>
    %52607 = torch_c.from_builtin_tensor %52606 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50012 = torch.constant.int 1
    %52608 = torch.aten.add.Tensor %52589, %52592, %int1_50012 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50013 = torch.constant.int 1
    %52609 = torch.aten.add.Tensor %52608, %52595, %int1_50013 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50014 = torch.constant.int 1
    %52610 = torch.aten.add.Tensor %52609, %52598, %int1_50014 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50015 = torch.constant.int 1
    %52611 = torch.aten.add.Tensor %52610, %52601, %int1_50015 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50016 = torch.constant.int 1
    %52612 = torch.aten.add.Tensor %52611, %52604, %int1_50016 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50017 = torch.constant.int 1
    %52613 = torch.aten.add.Tensor %52612, %52411, %int1_50017 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50018 = torch.constant.int 1
    %52614 = torch.aten.add.Tensor %52613, %52607, %int1_50018 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52615 = torch_c.to_builtin_tensor %52369 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50019 = arith.constant 1 : index
    %dim_50020 = tensor.dim %52615, %c1_50019 : tensor<4x?x4096xf16>
    %52616 = flow.tensor.transfer %52615 : tensor<4x?x4096xf16>{%dim_50020} to #hal.device.promise<@__device_7>
    %52617 = torch_c.from_builtin_tensor %52616 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52618 = torch_c.to_builtin_tensor %52376 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50021 = arith.constant 1 : index
    %dim_50022 = tensor.dim %52618, %c1_50021 : tensor<4x?x4096xf16>
    %52619 = flow.tensor.transfer %52618 : tensor<4x?x4096xf16>{%dim_50022} to #hal.device.promise<@__device_7>
    %52620 = torch_c.from_builtin_tensor %52619 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52621 = torch_c.to_builtin_tensor %52383 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50023 = arith.constant 1 : index
    %dim_50024 = tensor.dim %52621, %c1_50023 : tensor<4x?x4096xf16>
    %52622 = flow.tensor.transfer %52621 : tensor<4x?x4096xf16>{%dim_50024} to #hal.device.promise<@__device_7>
    %52623 = torch_c.from_builtin_tensor %52622 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52624 = torch_c.to_builtin_tensor %52390 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50025 = arith.constant 1 : index
    %dim_50026 = tensor.dim %52624, %c1_50025 : tensor<4x?x4096xf16>
    %52625 = flow.tensor.transfer %52624 : tensor<4x?x4096xf16>{%dim_50026} to #hal.device.promise<@__device_7>
    %52626 = torch_c.from_builtin_tensor %52625 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52627 = torch_c.to_builtin_tensor %52397 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50027 = arith.constant 1 : index
    %dim_50028 = tensor.dim %52627, %c1_50027 : tensor<4x?x4096xf16>
    %52628 = flow.tensor.transfer %52627 : tensor<4x?x4096xf16>{%dim_50028} to #hal.device.promise<@__device_7>
    %52629 = torch_c.from_builtin_tensor %52628 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52630 = torch_c.to_builtin_tensor %52404 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50029 = arith.constant 1 : index
    %dim_50030 = tensor.dim %52630, %c1_50029 : tensor<4x?x4096xf16>
    %52631 = flow.tensor.transfer %52630 : tensor<4x?x4096xf16>{%dim_50030} to #hal.device.promise<@__device_7>
    %52632 = torch_c.from_builtin_tensor %52631 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %52633 = torch_c.to_builtin_tensor %52411 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_50031 = arith.constant 1 : index
    %dim_50032 = tensor.dim %52633, %c1_50031 : tensor<4x?x4096xf16>
    %52634 = flow.tensor.transfer %52633 : tensor<4x?x4096xf16>{%dim_50032} to #hal.device.promise<@__device_7>
    %52635 = torch_c.from_builtin_tensor %52634 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50033 = torch.constant.int 1
    %52636 = torch.aten.add.Tensor %52617, %52620, %int1_50033 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50034 = torch.constant.int 1
    %52637 = torch.aten.add.Tensor %52636, %52623, %int1_50034 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50035 = torch.constant.int 1
    %52638 = torch.aten.add.Tensor %52637, %52626, %int1_50035 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50036 = torch.constant.int 1
    %52639 = torch.aten.add.Tensor %52638, %52629, %int1_50036 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50037 = torch.constant.int 1
    %52640 = torch.aten.add.Tensor %52639, %52632, %int1_50037 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50038 = torch.constant.int 1
    %52641 = torch.aten.add.Tensor %52640, %52635, %int1_50038 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50039 = torch.constant.int 1
    %52642 = torch.aten.add.Tensor %52641, %52418, %int1_50039 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50040 = torch.constant.int 1
    %52643 = torch.aten.add.Tensor %52123, %52446, %int1_50040 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50041 = torch.constant.int 1
    %52644 = torch.aten.add.Tensor %52124, %52474, %int1_50041 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50042 = torch.constant.int 1
    %52645 = torch.aten.add.Tensor %52125, %52502, %int1_50042 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50043 = torch.constant.int 1
    %52646 = torch.aten.add.Tensor %52126, %52530, %int1_50043 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50044 = torch.constant.int 1
    %52647 = torch.aten.add.Tensor %52127, %52558, %int1_50044 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50045 = torch.constant.int 1
    %52648 = torch.aten.add.Tensor %52128, %52586, %int1_50045 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50046 = torch.constant.int 1
    %52649 = torch.aten.add.Tensor %52129, %52614, %int1_50046 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50047 = torch.constant.int 1
    %52650 = torch.aten.add.Tensor %52130, %52642, %int1_50047 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_50048 = torch.constant.int 6
    %52651 = torch.prims.convert_element_type %52643, %int6_50048 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50049 = torch.constant.int 6
    %52652 = torch.prims.convert_element_type %52644, %int6_50049 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50050 = torch.constant.int 6
    %52653 = torch.prims.convert_element_type %52645, %int6_50050 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50051 = torch.constant.int 6
    %52654 = torch.prims.convert_element_type %52646, %int6_50051 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50052 = torch.constant.int 6
    %52655 = torch.prims.convert_element_type %52647, %int6_50052 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50053 = torch.constant.int 6
    %52656 = torch.prims.convert_element_type %52648, %int6_50053 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50054 = torch.constant.int 6
    %52657 = torch.prims.convert_element_type %52649, %int6_50054 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_50055 = torch.constant.int 6
    %52658 = torch.prims.convert_element_type %52650, %int6_50055 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50056 = torch.constant.int 2
    %52659 = torch.aten.pow.Tensor_Scalar %52651, %int2_50056 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52659, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50057 = torch.constant.int 2
    %52660 = torch.aten.pow.Tensor_Scalar %52652, %int2_50057 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50058 = torch.constant.int 2
    %52661 = torch.aten.pow.Tensor_Scalar %52653, %int2_50058 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50059 = torch.constant.int 2
    %52662 = torch.aten.pow.Tensor_Scalar %52654, %int2_50059 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50060 = torch.constant.int 2
    %52663 = torch.aten.pow.Tensor_Scalar %52655, %int2_50060 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50061 = torch.constant.int 2
    %52664 = torch.aten.pow.Tensor_Scalar %52656, %int2_50061 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50062 = torch.constant.int 2
    %52665 = torch.aten.pow.Tensor_Scalar %52657, %int2_50062 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52665, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_50063 = torch.constant.int 2
    %52666 = torch.aten.pow.Tensor_Scalar %52658, %int2_50063 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_50064 = torch.constant.int -1
    %52667 = torch.prim.ListConstruct %int-1_50064 : (!torch.int) -> !torch.list<int>
    %true_50065 = torch.constant.bool true
    %none_50066 = torch.constant.none
    %52668 = torch.aten.mean.dim %52659, %52667, %true_50065, %none_50066 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50067 = torch.constant.int -1
    %52669 = torch.prim.ListConstruct %int-1_50067 : (!torch.int) -> !torch.list<int>
    %true_50068 = torch.constant.bool true
    %none_50069 = torch.constant.none
    %52670 = torch.aten.mean.dim %52660, %52669, %true_50068, %none_50069 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50070 = torch.constant.int -1
    %52671 = torch.prim.ListConstruct %int-1_50070 : (!torch.int) -> !torch.list<int>
    %true_50071 = torch.constant.bool true
    %none_50072 = torch.constant.none
    %52672 = torch.aten.mean.dim %52661, %52671, %true_50071, %none_50072 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50073 = torch.constant.int -1
    %52673 = torch.prim.ListConstruct %int-1_50073 : (!torch.int) -> !torch.list<int>
    %true_50074 = torch.constant.bool true
    %none_50075 = torch.constant.none
    %52674 = torch.aten.mean.dim %52662, %52673, %true_50074, %none_50075 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50076 = torch.constant.int -1
    %52675 = torch.prim.ListConstruct %int-1_50076 : (!torch.int) -> !torch.list<int>
    %true_50077 = torch.constant.bool true
    %none_50078 = torch.constant.none
    %52676 = torch.aten.mean.dim %52663, %52675, %true_50077, %none_50078 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50079 = torch.constant.int -1
    %52677 = torch.prim.ListConstruct %int-1_50079 : (!torch.int) -> !torch.list<int>
    %true_50080 = torch.constant.bool true
    %none_50081 = torch.constant.none
    %52678 = torch.aten.mean.dim %52664, %52677, %true_50080, %none_50081 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50082 = torch.constant.int -1
    %52679 = torch.prim.ListConstruct %int-1_50082 : (!torch.int) -> !torch.list<int>
    %true_50083 = torch.constant.bool true
    %none_50084 = torch.constant.none
    %52680 = torch.aten.mean.dim %52665, %52679, %true_50083, %none_50084 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_50085 = torch.constant.int -1
    %52681 = torch.prim.ListConstruct %int-1_50085 : (!torch.int) -> !torch.list<int>
    %true_50086 = torch.constant.bool true
    %none_50087 = torch.constant.none
    %52682 = torch.aten.mean.dim %52666, %52681, %true_50086, %none_50087 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50088 = torch.constant.float 9.9999997473787516E-6
    %int1_50089 = torch.constant.int 1
    %52683 = torch.aten.add.Scalar %52668, %float9.999990e-06_50088, %int1_50089 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50090 = torch.constant.float 9.9999997473787516E-6
    %int1_50091 = torch.constant.int 1
    %52684 = torch.aten.add.Scalar %52670, %float9.999990e-06_50090, %int1_50091 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50092 = torch.constant.float 9.9999997473787516E-6
    %int1_50093 = torch.constant.int 1
    %52685 = torch.aten.add.Scalar %52672, %float9.999990e-06_50092, %int1_50093 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50094 = torch.constant.float 9.9999997473787516E-6
    %int1_50095 = torch.constant.int 1
    %52686 = torch.aten.add.Scalar %52674, %float9.999990e-06_50094, %int1_50095 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50096 = torch.constant.float 9.9999997473787516E-6
    %int1_50097 = torch.constant.int 1
    %52687 = torch.aten.add.Scalar %52676, %float9.999990e-06_50096, %int1_50097 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50098 = torch.constant.float 9.9999997473787516E-6
    %int1_50099 = torch.constant.int 1
    %52688 = torch.aten.add.Scalar %52678, %float9.999990e-06_50098, %int1_50099 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50100 = torch.constant.float 9.9999997473787516E-6
    %int1_50101 = torch.constant.int 1
    %52689 = torch.aten.add.Scalar %52680, %float9.999990e-06_50100, %int1_50101 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_50102 = torch.constant.float 9.9999997473787516E-6
    %int1_50103 = torch.constant.int 1
    %52690 = torch.aten.add.Scalar %52682, %float9.999990e-06_50102, %int1_50103 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52690, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52691 = torch.aten.rsqrt %52683 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52692 = torch.aten.rsqrt %52684 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52693 = torch.aten.rsqrt %52685 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52694 = torch.aten.rsqrt %52686 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52695 = torch.aten.rsqrt %52687 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52696 = torch.aten.rsqrt %52688 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52696, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52697 = torch.aten.rsqrt %52689 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52698 = torch.aten.rsqrt %52690 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %52698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %52699 = torch.aten.mul.Tensor %52651, %52691 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52700 = torch.aten.mul.Tensor %52652, %52692 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52701 = torch.aten.mul.Tensor %52653, %52693 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52702 = torch.aten.mul.Tensor %52654, %52694 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52703 = torch.aten.mul.Tensor %52655, %52695 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52704 = torch.aten.mul.Tensor %52656, %52696 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52705 = torch.aten.mul.Tensor %52657, %52697 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52706 = torch.aten.mul.Tensor %52658, %52698 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52707 = torch.aten.mul.Tensor %1952, %52699 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52708 = torch.aten.mul.Tensor %1953, %52700 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52709 = torch.aten.mul.Tensor %1954, %52701 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52710 = torch.aten.mul.Tensor %1955, %52702 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52711 = torch.aten.mul.Tensor %1956, %52703 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52712 = torch.aten.mul.Tensor %1957, %52704 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52713 = torch.aten.mul.Tensor %1958, %52705 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %52714 = torch.aten.mul.Tensor %1959, %52706 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %52714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_50104 = torch.constant.int 5
    %52715 = torch.prims.convert_element_type %52707, %int5_50104 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50105 = torch.constant.int 5
    %52716 = torch.prims.convert_element_type %52708, %int5_50105 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50106 = torch.constant.int 5
    %52717 = torch.prims.convert_element_type %52709, %int5_50106 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50107 = torch.constant.int 5
    %52718 = torch.prims.convert_element_type %52710, %int5_50107 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50108 = torch.constant.int 5
    %52719 = torch.prims.convert_element_type %52711, %int5_50108 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50109 = torch.constant.int 5
    %52720 = torch.prims.convert_element_type %52712, %int5_50109 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50110 = torch.constant.int 5
    %52721 = torch.prims.convert_element_type %52713, %int5_50110 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_50111 = torch.constant.int 5
    %52722 = torch.prims.convert_element_type %52714, %int5_50111 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %52722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_50112 = torch.constant.int 1
    %int0_50113 = torch.constant.int 0
    %52723 = torch.prim.ListConstruct %int1_50112, %int0_50113 : (!torch.int, !torch.int) -> !torch.list<int>
    %52724 = torch.aten.permute %1960, %52723 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50114 = torch.constant.int 1
    %int0_50115 = torch.constant.int 0
    %52725 = torch.prim.ListConstruct %int1_50114, %int0_50115 : (!torch.int, !torch.int) -> !torch.list<int>
    %52726 = torch.aten.permute %1961, %52725 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50116 = torch.constant.int 1
    %int0_50117 = torch.constant.int 0
    %52727 = torch.prim.ListConstruct %int1_50116, %int0_50117 : (!torch.int, !torch.int) -> !torch.list<int>
    %52728 = torch.aten.permute %1962, %52727 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50118 = torch.constant.int 1
    %int0_50119 = torch.constant.int 0
    %52729 = torch.prim.ListConstruct %int1_50118, %int0_50119 : (!torch.int, !torch.int) -> !torch.list<int>
    %52730 = torch.aten.permute %1963, %52729 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50120 = torch.constant.int 1
    %int0_50121 = torch.constant.int 0
    %52731 = torch.prim.ListConstruct %int1_50120, %int0_50121 : (!torch.int, !torch.int) -> !torch.list<int>
    %52732 = torch.aten.permute %1964, %52731 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50122 = torch.constant.int 1
    %int0_50123 = torch.constant.int 0
    %52733 = torch.prim.ListConstruct %int1_50122, %int0_50123 : (!torch.int, !torch.int) -> !torch.list<int>
    %52734 = torch.aten.permute %1965, %52733 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50124 = torch.constant.int 1
    %int0_50125 = torch.constant.int 0
    %52735 = torch.prim.ListConstruct %int1_50124, %int0_50125 : (!torch.int, !torch.int) -> !torch.list<int>
    %52736 = torch.aten.permute %1966, %52735 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_50126 = torch.constant.int 1
    %int0_50127 = torch.constant.int 0
    %52737 = torch.prim.ListConstruct %int1_50126, %int0_50127 : (!torch.int, !torch.int) -> !torch.list<int>
    %52738 = torch.aten.permute %1967, %52737 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_50128 = torch.constant.int 4
    %52739 = torch.aten.mul.int %int4_50128, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50129 = torch.constant.int 4096
    %52740 = torch.prim.ListConstruct %52739, %int4096_50129 : (!torch.int, !torch.int) -> !torch.list<int>
    %52741 = torch.aten.view %52715, %52740 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52741, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52742 = torch.aten.mm %52741, %52724 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52742, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50130 = torch.constant.int 4
    %int512_50131 = torch.constant.int 512
    %52743 = torch.prim.ListConstruct %int4_50130, %2482, %int512_50131 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52744 = torch.aten.view %52742, %52743 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50132 = torch.constant.int 4
    %52745 = torch.aten.mul.int %int4_50132, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50133 = torch.constant.int 4096
    %52746 = torch.prim.ListConstruct %52745, %int4096_50133 : (!torch.int, !torch.int) -> !torch.list<int>
    %52747 = torch.aten.view %52716, %52746 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52747, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52748 = torch.aten.mm %52747, %52726 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52748, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50134 = torch.constant.int 4
    %int512_50135 = torch.constant.int 512
    %52749 = torch.prim.ListConstruct %int4_50134, %2482, %int512_50135 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52750 = torch.aten.view %52748, %52749 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50136 = torch.constant.int 4
    %52751 = torch.aten.mul.int %int4_50136, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50137 = torch.constant.int 4096
    %52752 = torch.prim.ListConstruct %52751, %int4096_50137 : (!torch.int, !torch.int) -> !torch.list<int>
    %52753 = torch.aten.view %52717, %52752 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52753, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52754 = torch.aten.mm %52753, %52728 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52754, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50138 = torch.constant.int 4
    %int512_50139 = torch.constant.int 512
    %52755 = torch.prim.ListConstruct %int4_50138, %2482, %int512_50139 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52756 = torch.aten.view %52754, %52755 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50140 = torch.constant.int 4
    %52757 = torch.aten.mul.int %int4_50140, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50141 = torch.constant.int 4096
    %52758 = torch.prim.ListConstruct %52757, %int4096_50141 : (!torch.int, !torch.int) -> !torch.list<int>
    %52759 = torch.aten.view %52718, %52758 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52759, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52760 = torch.aten.mm %52759, %52730 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52760, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50142 = torch.constant.int 4
    %int512_50143 = torch.constant.int 512
    %52761 = torch.prim.ListConstruct %int4_50142, %2482, %int512_50143 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52762 = torch.aten.view %52760, %52761 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50144 = torch.constant.int 4
    %52763 = torch.aten.mul.int %int4_50144, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50145 = torch.constant.int 4096
    %52764 = torch.prim.ListConstruct %52763, %int4096_50145 : (!torch.int, !torch.int) -> !torch.list<int>
    %52765 = torch.aten.view %52719, %52764 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52765, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52766 = torch.aten.mm %52765, %52732 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52766, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50146 = torch.constant.int 4
    %int512_50147 = torch.constant.int 512
    %52767 = torch.prim.ListConstruct %int4_50146, %2482, %int512_50147 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52768 = torch.aten.view %52766, %52767 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50148 = torch.constant.int 4
    %52769 = torch.aten.mul.int %int4_50148, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50149 = torch.constant.int 4096
    %52770 = torch.prim.ListConstruct %52769, %int4096_50149 : (!torch.int, !torch.int) -> !torch.list<int>
    %52771 = torch.aten.view %52720, %52770 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52771, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52772 = torch.aten.mm %52771, %52734 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52772, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50150 = torch.constant.int 4
    %int512_50151 = torch.constant.int 512
    %52773 = torch.prim.ListConstruct %int4_50150, %2482, %int512_50151 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52774 = torch.aten.view %52772, %52773 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50152 = torch.constant.int 4
    %52775 = torch.aten.mul.int %int4_50152, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50153 = torch.constant.int 4096
    %52776 = torch.prim.ListConstruct %52775, %int4096_50153 : (!torch.int, !torch.int) -> !torch.list<int>
    %52777 = torch.aten.view %52721, %52776 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52777, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52778 = torch.aten.mm %52777, %52736 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52778, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50154 = torch.constant.int 4
    %int512_50155 = torch.constant.int 512
    %52779 = torch.prim.ListConstruct %int4_50154, %2482, %int512_50155 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52780 = torch.aten.view %52778, %52779 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_50156 = torch.constant.int 4
    %52781 = torch.aten.mul.int %int4_50156, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50157 = torch.constant.int 4096
    %52782 = torch.prim.ListConstruct %52781, %int4096_50157 : (!torch.int, !torch.int) -> !torch.list<int>
    %52783 = torch.aten.view %52722, %52782 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52783, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52784 = torch.aten.mm %52783, %52738 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %52784, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_50158 = torch.constant.int 4
    %int512_50159 = torch.constant.int 512
    %52785 = torch.prim.ListConstruct %int4_50158, %2482, %int512_50159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52786 = torch.aten.view %52784, %52785 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %52786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_50160 = torch.constant.int 1
    %int0_50161 = torch.constant.int 0
    %52787 = torch.prim.ListConstruct %int1_50160, %int0_50161 : (!torch.int, !torch.int) -> !torch.list<int>
    %52788 = torch.aten.permute %1968, %52787 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50162 = torch.constant.int 1
    %int0_50163 = torch.constant.int 0
    %52789 = torch.prim.ListConstruct %int1_50162, %int0_50163 : (!torch.int, !torch.int) -> !torch.list<int>
    %52790 = torch.aten.permute %1969, %52789 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50164 = torch.constant.int 1
    %int0_50165 = torch.constant.int 0
    %52791 = torch.prim.ListConstruct %int1_50164, %int0_50165 : (!torch.int, !torch.int) -> !torch.list<int>
    %52792 = torch.aten.permute %1970, %52791 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50166 = torch.constant.int 1
    %int0_50167 = torch.constant.int 0
    %52793 = torch.prim.ListConstruct %int1_50166, %int0_50167 : (!torch.int, !torch.int) -> !torch.list<int>
    %52794 = torch.aten.permute %1971, %52793 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50168 = torch.constant.int 1
    %int0_50169 = torch.constant.int 0
    %52795 = torch.prim.ListConstruct %int1_50168, %int0_50169 : (!torch.int, !torch.int) -> !torch.list<int>
    %52796 = torch.aten.permute %1972, %52795 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50170 = torch.constant.int 1
    %int0_50171 = torch.constant.int 0
    %52797 = torch.prim.ListConstruct %int1_50170, %int0_50171 : (!torch.int, !torch.int) -> !torch.list<int>
    %52798 = torch.aten.permute %1973, %52797 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50172 = torch.constant.int 1
    %int0_50173 = torch.constant.int 0
    %52799 = torch.prim.ListConstruct %int1_50172, %int0_50173 : (!torch.int, !torch.int) -> !torch.list<int>
    %52800 = torch.aten.permute %1974, %52799 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50174 = torch.constant.int 1
    %int0_50175 = torch.constant.int 0
    %52801 = torch.prim.ListConstruct %int1_50174, %int0_50175 : (!torch.int, !torch.int) -> !torch.list<int>
    %52802 = torch.aten.permute %1975, %52801 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_50176 = torch.constant.int 4
    %52803 = torch.aten.mul.int %int4_50176, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50177 = torch.constant.int 4096
    %52804 = torch.prim.ListConstruct %52803, %int4096_50177 : (!torch.int, !torch.int) -> !torch.list<int>
    %52805 = torch.aten.view %52715, %52804 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52805, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52806 = torch.aten.mm %52805, %52788 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52806, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50178 = torch.constant.int 4
    %int128_50179 = torch.constant.int 128
    %52807 = torch.prim.ListConstruct %int4_50178, %2482, %int128_50179 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52808 = torch.aten.view %52806, %52807 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50180 = torch.constant.int 4
    %52809 = torch.aten.mul.int %int4_50180, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50181 = torch.constant.int 4096
    %52810 = torch.prim.ListConstruct %52809, %int4096_50181 : (!torch.int, !torch.int) -> !torch.list<int>
    %52811 = torch.aten.view %52716, %52810 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52811, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52812 = torch.aten.mm %52811, %52790 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52812, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50182 = torch.constant.int 4
    %int128_50183 = torch.constant.int 128
    %52813 = torch.prim.ListConstruct %int4_50182, %2482, %int128_50183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52814 = torch.aten.view %52812, %52813 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50184 = torch.constant.int 4
    %52815 = torch.aten.mul.int %int4_50184, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50185 = torch.constant.int 4096
    %52816 = torch.prim.ListConstruct %52815, %int4096_50185 : (!torch.int, !torch.int) -> !torch.list<int>
    %52817 = torch.aten.view %52717, %52816 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52817, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52818 = torch.aten.mm %52817, %52792 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52818, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50186 = torch.constant.int 4
    %int128_50187 = torch.constant.int 128
    %52819 = torch.prim.ListConstruct %int4_50186, %2482, %int128_50187 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52820 = torch.aten.view %52818, %52819 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50188 = torch.constant.int 4
    %52821 = torch.aten.mul.int %int4_50188, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50189 = torch.constant.int 4096
    %52822 = torch.prim.ListConstruct %52821, %int4096_50189 : (!torch.int, !torch.int) -> !torch.list<int>
    %52823 = torch.aten.view %52718, %52822 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52823, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52824 = torch.aten.mm %52823, %52794 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52824, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50190 = torch.constant.int 4
    %int128_50191 = torch.constant.int 128
    %52825 = torch.prim.ListConstruct %int4_50190, %2482, %int128_50191 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52826 = torch.aten.view %52824, %52825 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50192 = torch.constant.int 4
    %52827 = torch.aten.mul.int %int4_50192, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50193 = torch.constant.int 4096
    %52828 = torch.prim.ListConstruct %52827, %int4096_50193 : (!torch.int, !torch.int) -> !torch.list<int>
    %52829 = torch.aten.view %52719, %52828 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52829, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52830 = torch.aten.mm %52829, %52796 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52830, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50194 = torch.constant.int 4
    %int128_50195 = torch.constant.int 128
    %52831 = torch.prim.ListConstruct %int4_50194, %2482, %int128_50195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52832 = torch.aten.view %52830, %52831 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50196 = torch.constant.int 4
    %52833 = torch.aten.mul.int %int4_50196, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50197 = torch.constant.int 4096
    %52834 = torch.prim.ListConstruct %52833, %int4096_50197 : (!torch.int, !torch.int) -> !torch.list<int>
    %52835 = torch.aten.view %52720, %52834 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52835, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52836 = torch.aten.mm %52835, %52798 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52836, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50198 = torch.constant.int 4
    %int128_50199 = torch.constant.int 128
    %52837 = torch.prim.ListConstruct %int4_50198, %2482, %int128_50199 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52838 = torch.aten.view %52836, %52837 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50200 = torch.constant.int 4
    %52839 = torch.aten.mul.int %int4_50200, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50201 = torch.constant.int 4096
    %52840 = torch.prim.ListConstruct %52839, %int4096_50201 : (!torch.int, !torch.int) -> !torch.list<int>
    %52841 = torch.aten.view %52721, %52840 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52841, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52842 = torch.aten.mm %52841, %52800 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52842, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50202 = torch.constant.int 4
    %int128_50203 = torch.constant.int 128
    %52843 = torch.prim.ListConstruct %int4_50202, %2482, %int128_50203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52844 = torch.aten.view %52842, %52843 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50204 = torch.constant.int 4
    %52845 = torch.aten.mul.int %int4_50204, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50205 = torch.constant.int 4096
    %52846 = torch.prim.ListConstruct %52845, %int4096_50205 : (!torch.int, !torch.int) -> !torch.list<int>
    %52847 = torch.aten.view %52722, %52846 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52847, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52848 = torch.aten.mm %52847, %52802 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52848, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50206 = torch.constant.int 4
    %int128_50207 = torch.constant.int 128
    %52849 = torch.prim.ListConstruct %int4_50206, %2482, %int128_50207 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52850 = torch.aten.view %52848, %52849 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_50208 = torch.constant.int 1
    %int0_50209 = torch.constant.int 0
    %52851 = torch.prim.ListConstruct %int1_50208, %int0_50209 : (!torch.int, !torch.int) -> !torch.list<int>
    %52852 = torch.aten.permute %1976, %52851 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50210 = torch.constant.int 1
    %int0_50211 = torch.constant.int 0
    %52853 = torch.prim.ListConstruct %int1_50210, %int0_50211 : (!torch.int, !torch.int) -> !torch.list<int>
    %52854 = torch.aten.permute %1977, %52853 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50212 = torch.constant.int 1
    %int0_50213 = torch.constant.int 0
    %52855 = torch.prim.ListConstruct %int1_50212, %int0_50213 : (!torch.int, !torch.int) -> !torch.list<int>
    %52856 = torch.aten.permute %1978, %52855 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50214 = torch.constant.int 1
    %int0_50215 = torch.constant.int 0
    %52857 = torch.prim.ListConstruct %int1_50214, %int0_50215 : (!torch.int, !torch.int) -> !torch.list<int>
    %52858 = torch.aten.permute %1979, %52857 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50216 = torch.constant.int 1
    %int0_50217 = torch.constant.int 0
    %52859 = torch.prim.ListConstruct %int1_50216, %int0_50217 : (!torch.int, !torch.int) -> !torch.list<int>
    %52860 = torch.aten.permute %1980, %52859 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50218 = torch.constant.int 1
    %int0_50219 = torch.constant.int 0
    %52861 = torch.prim.ListConstruct %int1_50218, %int0_50219 : (!torch.int, !torch.int) -> !torch.list<int>
    %52862 = torch.aten.permute %1981, %52861 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50220 = torch.constant.int 1
    %int0_50221 = torch.constant.int 0
    %52863 = torch.prim.ListConstruct %int1_50220, %int0_50221 : (!torch.int, !torch.int) -> !torch.list<int>
    %52864 = torch.aten.permute %1982, %52863 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_50222 = torch.constant.int 1
    %int0_50223 = torch.constant.int 0
    %52865 = torch.prim.ListConstruct %int1_50222, %int0_50223 : (!torch.int, !torch.int) -> !torch.list<int>
    %52866 = torch.aten.permute %1983, %52865 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_50224 = torch.constant.int 4
    %52867 = torch.aten.mul.int %int4_50224, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50225 = torch.constant.int 4096
    %52868 = torch.prim.ListConstruct %52867, %int4096_50225 : (!torch.int, !torch.int) -> !torch.list<int>
    %52869 = torch.aten.view %52715, %52868 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52869, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52870 = torch.aten.mm %52869, %52852 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52870, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50226 = torch.constant.int 4
    %int128_50227 = torch.constant.int 128
    %52871 = torch.prim.ListConstruct %int4_50226, %2482, %int128_50227 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52872 = torch.aten.view %52870, %52871 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50228 = torch.constant.int 4
    %52873 = torch.aten.mul.int %int4_50228, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50229 = torch.constant.int 4096
    %52874 = torch.prim.ListConstruct %52873, %int4096_50229 : (!torch.int, !torch.int) -> !torch.list<int>
    %52875 = torch.aten.view %52716, %52874 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52875, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52876 = torch.aten.mm %52875, %52854 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52876, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50230 = torch.constant.int 4
    %int128_50231 = torch.constant.int 128
    %52877 = torch.prim.ListConstruct %int4_50230, %2482, %int128_50231 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52878 = torch.aten.view %52876, %52877 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50232 = torch.constant.int 4
    %52879 = torch.aten.mul.int %int4_50232, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50233 = torch.constant.int 4096
    %52880 = torch.prim.ListConstruct %52879, %int4096_50233 : (!torch.int, !torch.int) -> !torch.list<int>
    %52881 = torch.aten.view %52717, %52880 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52881, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52882 = torch.aten.mm %52881, %52856 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52882, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50234 = torch.constant.int 4
    %int128_50235 = torch.constant.int 128
    %52883 = torch.prim.ListConstruct %int4_50234, %2482, %int128_50235 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52884 = torch.aten.view %52882, %52883 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50236 = torch.constant.int 4
    %52885 = torch.aten.mul.int %int4_50236, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50237 = torch.constant.int 4096
    %52886 = torch.prim.ListConstruct %52885, %int4096_50237 : (!torch.int, !torch.int) -> !torch.list<int>
    %52887 = torch.aten.view %52718, %52886 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52887, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52888 = torch.aten.mm %52887, %52858 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52888, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50238 = torch.constant.int 4
    %int128_50239 = torch.constant.int 128
    %52889 = torch.prim.ListConstruct %int4_50238, %2482, %int128_50239 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52890 = torch.aten.view %52888, %52889 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50240 = torch.constant.int 4
    %52891 = torch.aten.mul.int %int4_50240, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50241 = torch.constant.int 4096
    %52892 = torch.prim.ListConstruct %52891, %int4096_50241 : (!torch.int, !torch.int) -> !torch.list<int>
    %52893 = torch.aten.view %52719, %52892 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52893, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52894 = torch.aten.mm %52893, %52860 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52894, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50242 = torch.constant.int 4
    %int128_50243 = torch.constant.int 128
    %52895 = torch.prim.ListConstruct %int4_50242, %2482, %int128_50243 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52896 = torch.aten.view %52894, %52895 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50244 = torch.constant.int 4
    %52897 = torch.aten.mul.int %int4_50244, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50245 = torch.constant.int 4096
    %52898 = torch.prim.ListConstruct %52897, %int4096_50245 : (!torch.int, !torch.int) -> !torch.list<int>
    %52899 = torch.aten.view %52720, %52898 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52899, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52900 = torch.aten.mm %52899, %52862 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52900, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50246 = torch.constant.int 4
    %int128_50247 = torch.constant.int 128
    %52901 = torch.prim.ListConstruct %int4_50246, %2482, %int128_50247 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52902 = torch.aten.view %52900, %52901 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50248 = torch.constant.int 4
    %52903 = torch.aten.mul.int %int4_50248, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50249 = torch.constant.int 4096
    %52904 = torch.prim.ListConstruct %52903, %int4096_50249 : (!torch.int, !torch.int) -> !torch.list<int>
    %52905 = torch.aten.view %52721, %52904 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52905, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52906 = torch.aten.mm %52905, %52864 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52906, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50250 = torch.constant.int 4
    %int128_50251 = torch.constant.int 128
    %52907 = torch.prim.ListConstruct %int4_50250, %2482, %int128_50251 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52908 = torch.aten.view %52906, %52907 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50252 = torch.constant.int 4
    %52909 = torch.aten.mul.int %int4_50252, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_50253 = torch.constant.int 4096
    %52910 = torch.prim.ListConstruct %52909, %int4096_50253 : (!torch.int, !torch.int) -> !torch.list<int>
    %52911 = torch.aten.view %52722, %52910 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %52911, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %52912 = torch.aten.mm %52911, %52866 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %52912, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_50254 = torch.constant.int 4
    %int128_50255 = torch.constant.int 128
    %52913 = torch.prim.ListConstruct %int4_50254, %2482, %int128_50255 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52914 = torch.aten.view %52912, %52913 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %52914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_50256 = torch.constant.int 4
    %int4_50257 = torch.constant.int 4
    %int128_50258 = torch.constant.int 128
    %52915 = torch.prim.ListConstruct %int4_50256, %2482, %int4_50257, %int128_50258 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52916 = torch.aten.view %52744, %52915 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50259 = torch.constant.int 4
    %int4_50260 = torch.constant.int 4
    %int128_50261 = torch.constant.int 128
    %52917 = torch.prim.ListConstruct %int4_50259, %2482, %int4_50260, %int128_50261 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52918 = torch.aten.view %52750, %52917 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50262 = torch.constant.int 4
    %int4_50263 = torch.constant.int 4
    %int128_50264 = torch.constant.int 128
    %52919 = torch.prim.ListConstruct %int4_50262, %2482, %int4_50263, %int128_50264 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52920 = torch.aten.view %52756, %52919 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50265 = torch.constant.int 4
    %int4_50266 = torch.constant.int 4
    %int128_50267 = torch.constant.int 128
    %52921 = torch.prim.ListConstruct %int4_50265, %2482, %int4_50266, %int128_50267 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52922 = torch.aten.view %52762, %52921 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50268 = torch.constant.int 4
    %int4_50269 = torch.constant.int 4
    %int128_50270 = torch.constant.int 128
    %52923 = torch.prim.ListConstruct %int4_50268, %2482, %int4_50269, %int128_50270 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52924 = torch.aten.view %52768, %52923 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50271 = torch.constant.int 4
    %int4_50272 = torch.constant.int 4
    %int128_50273 = torch.constant.int 128
    %52925 = torch.prim.ListConstruct %int4_50271, %2482, %int4_50272, %int128_50273 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52926 = torch.aten.view %52774, %52925 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50274 = torch.constant.int 4
    %int4_50275 = torch.constant.int 4
    %int128_50276 = torch.constant.int 128
    %52927 = torch.prim.ListConstruct %int4_50274, %2482, %int4_50275, %int128_50276 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52928 = torch.aten.view %52780, %52927 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50277 = torch.constant.int 4
    %int4_50278 = torch.constant.int 4
    %int128_50279 = torch.constant.int 128
    %52929 = torch.prim.ListConstruct %int4_50277, %2482, %int4_50278, %int128_50279 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52930 = torch.aten.view %52786, %52929 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %52930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_50280 = torch.constant.int 4
    %int1_50281 = torch.constant.int 1
    %int128_50282 = torch.constant.int 128
    %52931 = torch.prim.ListConstruct %int4_50280, %2482, %int1_50281, %int128_50282 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52932 = torch.aten.view %52808, %52931 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50283 = torch.constant.int 4
    %int1_50284 = torch.constant.int 1
    %int128_50285 = torch.constant.int 128
    %52933 = torch.prim.ListConstruct %int4_50283, %2482, %int1_50284, %int128_50285 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52934 = torch.aten.view %52814, %52933 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50286 = torch.constant.int 4
    %int1_50287 = torch.constant.int 1
    %int128_50288 = torch.constant.int 128
    %52935 = torch.prim.ListConstruct %int4_50286, %2482, %int1_50287, %int128_50288 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52936 = torch.aten.view %52820, %52935 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50289 = torch.constant.int 4
    %int1_50290 = torch.constant.int 1
    %int128_50291 = torch.constant.int 128
    %52937 = torch.prim.ListConstruct %int4_50289, %2482, %int1_50290, %int128_50291 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52938 = torch.aten.view %52826, %52937 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52938, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50292 = torch.constant.int 4
    %int1_50293 = torch.constant.int 1
    %int128_50294 = torch.constant.int 128
    %52939 = torch.prim.ListConstruct %int4_50292, %2482, %int1_50293, %int128_50294 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52940 = torch.aten.view %52832, %52939 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50295 = torch.constant.int 4
    %int1_50296 = torch.constant.int 1
    %int128_50297 = torch.constant.int 128
    %52941 = torch.prim.ListConstruct %int4_50295, %2482, %int1_50296, %int128_50297 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52942 = torch.aten.view %52838, %52941 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50298 = torch.constant.int 4
    %int1_50299 = torch.constant.int 1
    %int128_50300 = torch.constant.int 128
    %52943 = torch.prim.ListConstruct %int4_50298, %2482, %int1_50299, %int128_50300 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52944 = torch.aten.view %52844, %52943 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50301 = torch.constant.int 4
    %int1_50302 = torch.constant.int 1
    %int128_50303 = torch.constant.int 128
    %52945 = torch.prim.ListConstruct %int4_50301, %2482, %int1_50302, %int128_50303 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52946 = torch.aten.view %52850, %52945 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50304 = torch.constant.int 4
    %int1_50305 = torch.constant.int 1
    %int128_50306 = torch.constant.int 128
    %52947 = torch.prim.ListConstruct %int4_50304, %2482, %int1_50305, %int128_50306 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52948 = torch.aten.view %52872, %52947 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50307 = torch.constant.int 4
    %int1_50308 = torch.constant.int 1
    %int128_50309 = torch.constant.int 128
    %52949 = torch.prim.ListConstruct %int4_50307, %2482, %int1_50308, %int128_50309 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52950 = torch.aten.view %52878, %52949 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50310 = torch.constant.int 4
    %int1_50311 = torch.constant.int 1
    %int128_50312 = torch.constant.int 128
    %52951 = torch.prim.ListConstruct %int4_50310, %2482, %int1_50311, %int128_50312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52952 = torch.aten.view %52884, %52951 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50313 = torch.constant.int 4
    %int1_50314 = torch.constant.int 1
    %int128_50315 = torch.constant.int 128
    %52953 = torch.prim.ListConstruct %int4_50313, %2482, %int1_50314, %int128_50315 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52954 = torch.aten.view %52890, %52953 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50316 = torch.constant.int 4
    %int1_50317 = torch.constant.int 1
    %int128_50318 = torch.constant.int 128
    %52955 = torch.prim.ListConstruct %int4_50316, %2482, %int1_50317, %int128_50318 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52956 = torch.aten.view %52896, %52955 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50319 = torch.constant.int 4
    %int1_50320 = torch.constant.int 1
    %int128_50321 = torch.constant.int 128
    %52957 = torch.prim.ListConstruct %int4_50319, %2482, %int1_50320, %int128_50321 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52958 = torch.aten.view %52902, %52957 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50322 = torch.constant.int 4
    %int1_50323 = torch.constant.int 1
    %int128_50324 = torch.constant.int 128
    %52959 = torch.prim.ListConstruct %int4_50322, %2482, %int1_50323, %int128_50324 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52960 = torch.aten.view %52908, %52959 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_50325 = torch.constant.int 4
    %int1_50326 = torch.constant.int 1
    %int128_50327 = torch.constant.int 128
    %52961 = torch.prim.ListConstruct %int4_50325, %2482, %int1_50326, %int128_50327 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %52962 = torch.aten.view %52914, %52961 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %52962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_50328 = torch.constant.int 131072
    %none_50329 = torch.constant.none
    %none_50330 = torch.constant.none
    %cpu_50331 = torch.constant.device "cpu"
    %false_50332 = torch.constant.bool false
    %52963 = torch.aten.arange %int131072_50328, %none_50329, %none_50330, %cpu_50331, %false_50332 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_50333 = torch.constant.int 0
    %int128_50334 = torch.constant.int 128
    %int2_50335 = torch.constant.int 2
    %none_50336 = torch.constant.none
    %none_50337 = torch.constant.none
    %cpu_50338 = torch.constant.device "cpu"
    %false_50339 = torch.constant.bool false
    %52964 = torch.aten.arange.start_step %int0_50333, %int128_50334, %int2_50335, %none_50336, %none_50337, %cpu_50338, %false_50339 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_50340 = torch.constant.int 0
    %int0_50341 = torch.constant.int 0
    %int64_50342 = torch.constant.int 64
    %int1_50343 = torch.constant.int 1
    %52965 = torch.aten.slice.Tensor %52964, %int0_50340, %int0_50341, %int64_50342, %int1_50343 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_50344 = torch.constant.int 6
    %52966 = torch.prims.convert_element_type %52965, %int6_50344 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_50345 = torch.constant.int 128
    %52967 = torch.aten.div.Scalar %52966, %int128_50345 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_50346 = torch.constant.float 5.000000e+05
    %52968 = torch.aten.pow.Scalar %float5.000000e05_50346, %52967 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %52969 = torch.aten.reciprocal %52968 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_50347 = torch.constant.float 1.000000e+00
    %52970 = torch.aten.mul.Scalar %52969, %float1.000000e00_50347 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_50348 = torch.constant.int 131072
    %int1_50349 = torch.constant.int 1
    %52971 = torch.prim.ListConstruct %int131072_50348, %int1_50349 : (!torch.int, !torch.int) -> !torch.list<int>
    %52972 = torch.aten.view %52963, %52971 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %52973 = torch.aten.mul.Tensor %52972, %52970 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %52974 = torch.aten.cos %52973 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %52975 = torch.aten.sin %52973 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %52976 = torch.aten.complex %52974, %52975 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %52977 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52978 = flow.tensor.transfer %52977 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %52979 = torch_c.from_builtin_tensor %52978 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52980 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52981 = flow.tensor.transfer %52980 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %52982 = torch_c.from_builtin_tensor %52981 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52983 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52984 = flow.tensor.transfer %52983 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %52985 = torch_c.from_builtin_tensor %52984 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52986 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52987 = flow.tensor.transfer %52986 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %52988 = torch_c.from_builtin_tensor %52987 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52989 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52990 = flow.tensor.transfer %52989 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %52991 = torch_c.from_builtin_tensor %52990 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52992 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52993 = flow.tensor.transfer %52992 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %52994 = torch_c.from_builtin_tensor %52993 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52995 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52996 = flow.tensor.transfer %52995 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %52997 = torch_c.from_builtin_tensor %52996 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %52998 = torch_c.to_builtin_tensor %52976 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %52999 = flow.tensor.transfer %52998 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %53000 = torch_c.from_builtin_tensor %52999 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_50350 = torch.constant.int 1
    %53001 = torch.aten.size.int %52744, %int1_50350 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50351 = torch.constant.int 0
    %53002 = torch.aten.add.int %int0_50351, %53001 : !torch.int, !torch.int -> !torch.int
    %int0_50352 = torch.constant.int 0
    %int0_50353 = torch.constant.int 0
    %int1_50354 = torch.constant.int 1
    %53003 = torch.aten.slice.Tensor %52979, %int0_50352, %int0_50353, %53002, %int1_50354 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53003, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50355 = torch.constant.int 1
    %int0_50356 = torch.constant.int 0
    %int9223372036854775807_50357 = torch.constant.int 9223372036854775807
    %int1_50358 = torch.constant.int 1
    %53004 = torch.aten.slice.Tensor %53003, %int1_50355, %int0_50356, %int9223372036854775807_50357, %int1_50358 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53004, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50359 = torch.constant.int 0
    %53005 = torch.aten.unsqueeze %53004, %int0_50359 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53005, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50360 = torch.constant.int 2
    %53006 = torch.aten.unsqueeze %53005, %int2_50360 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53006, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50361 = torch.constant.int 3
    %int0_50362 = torch.constant.int 0
    %int9223372036854775807_50363 = torch.constant.int 9223372036854775807
    %int1_50364 = torch.constant.int 1
    %53007 = torch.aten.slice.Tensor %53006, %int3_50361, %int0_50362, %int9223372036854775807_50363, %int1_50364 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53007, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53008 = torch_c.to_builtin_tensor %52916 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50365 = arith.constant 1 : index
    %dim_50366 = tensor.dim %53008, %c1_50365 : tensor<4x?x4x128xf16>
    %53009 = flow.tensor.bitcast %53008 : tensor<4x?x4x128xf16>{%dim_50366} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50366}
    %53010 = torch_c.from_builtin_tensor %53009 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53011 = torch.aten.mul.Tensor %53010, %53007 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53012 = torch_c.to_builtin_tensor %53011 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50367 = arith.constant 1 : index
    %dim_50368 = tensor.dim %53012, %c1_50367 : tensor<4x?x4x64xcomplex<f32>>
    %53013 = flow.tensor.bitcast %53012 : tensor<4x?x4x64xcomplex<f32>>{%dim_50368} -> tensor<4x?x4x128xf32>{%dim_50368}
    %53014 = torch_c.from_builtin_tensor %53013 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50369 = torch.constant.int 5
    %53015 = torch.prims.convert_element_type %53014, %int5_50369 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50370 = torch.constant.int 1
    %53016 = torch.aten.size.int %52750, %int1_50370 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50371 = torch.constant.int 0
    %53017 = torch.aten.add.int %int0_50371, %53016 : !torch.int, !torch.int -> !torch.int
    %int0_50372 = torch.constant.int 0
    %int0_50373 = torch.constant.int 0
    %int1_50374 = torch.constant.int 1
    %53018 = torch.aten.slice.Tensor %52982, %int0_50372, %int0_50373, %53017, %int1_50374 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53018, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50375 = torch.constant.int 1
    %int0_50376 = torch.constant.int 0
    %int9223372036854775807_50377 = torch.constant.int 9223372036854775807
    %int1_50378 = torch.constant.int 1
    %53019 = torch.aten.slice.Tensor %53018, %int1_50375, %int0_50376, %int9223372036854775807_50377, %int1_50378 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53019, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50379 = torch.constant.int 0
    %53020 = torch.aten.unsqueeze %53019, %int0_50379 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53020, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50380 = torch.constant.int 2
    %53021 = torch.aten.unsqueeze %53020, %int2_50380 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53021, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50381 = torch.constant.int 3
    %int0_50382 = torch.constant.int 0
    %int9223372036854775807_50383 = torch.constant.int 9223372036854775807
    %int1_50384 = torch.constant.int 1
    %53022 = torch.aten.slice.Tensor %53021, %int3_50381, %int0_50382, %int9223372036854775807_50383, %int1_50384 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53022, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53023 = torch_c.to_builtin_tensor %52918 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50385 = arith.constant 1 : index
    %dim_50386 = tensor.dim %53023, %c1_50385 : tensor<4x?x4x128xf16>
    %53024 = flow.tensor.bitcast %53023 : tensor<4x?x4x128xf16>{%dim_50386} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50386}
    %53025 = torch_c.from_builtin_tensor %53024 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53026 = torch.aten.mul.Tensor %53025, %53022 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53027 = torch_c.to_builtin_tensor %53026 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50387 = arith.constant 1 : index
    %dim_50388 = tensor.dim %53027, %c1_50387 : tensor<4x?x4x64xcomplex<f32>>
    %53028 = flow.tensor.bitcast %53027 : tensor<4x?x4x64xcomplex<f32>>{%dim_50388} -> tensor<4x?x4x128xf32>{%dim_50388}
    %53029 = torch_c.from_builtin_tensor %53028 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50389 = torch.constant.int 5
    %53030 = torch.prims.convert_element_type %53029, %int5_50389 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50390 = torch.constant.int 1
    %53031 = torch.aten.size.int %52756, %int1_50390 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50391 = torch.constant.int 0
    %53032 = torch.aten.add.int %int0_50391, %53031 : !torch.int, !torch.int -> !torch.int
    %int0_50392 = torch.constant.int 0
    %int0_50393 = torch.constant.int 0
    %int1_50394 = torch.constant.int 1
    %53033 = torch.aten.slice.Tensor %52985, %int0_50392, %int0_50393, %53032, %int1_50394 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53033, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50395 = torch.constant.int 1
    %int0_50396 = torch.constant.int 0
    %int9223372036854775807_50397 = torch.constant.int 9223372036854775807
    %int1_50398 = torch.constant.int 1
    %53034 = torch.aten.slice.Tensor %53033, %int1_50395, %int0_50396, %int9223372036854775807_50397, %int1_50398 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53034, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50399 = torch.constant.int 0
    %53035 = torch.aten.unsqueeze %53034, %int0_50399 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53035, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50400 = torch.constant.int 2
    %53036 = torch.aten.unsqueeze %53035, %int2_50400 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53036, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50401 = torch.constant.int 3
    %int0_50402 = torch.constant.int 0
    %int9223372036854775807_50403 = torch.constant.int 9223372036854775807
    %int1_50404 = torch.constant.int 1
    %53037 = torch.aten.slice.Tensor %53036, %int3_50401, %int0_50402, %int9223372036854775807_50403, %int1_50404 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53037, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53038 = torch_c.to_builtin_tensor %52920 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50405 = arith.constant 1 : index
    %dim_50406 = tensor.dim %53038, %c1_50405 : tensor<4x?x4x128xf16>
    %53039 = flow.tensor.bitcast %53038 : tensor<4x?x4x128xf16>{%dim_50406} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50406}
    %53040 = torch_c.from_builtin_tensor %53039 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53041 = torch.aten.mul.Tensor %53040, %53037 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53042 = torch_c.to_builtin_tensor %53041 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50407 = arith.constant 1 : index
    %dim_50408 = tensor.dim %53042, %c1_50407 : tensor<4x?x4x64xcomplex<f32>>
    %53043 = flow.tensor.bitcast %53042 : tensor<4x?x4x64xcomplex<f32>>{%dim_50408} -> tensor<4x?x4x128xf32>{%dim_50408}
    %53044 = torch_c.from_builtin_tensor %53043 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50409 = torch.constant.int 5
    %53045 = torch.prims.convert_element_type %53044, %int5_50409 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50410 = torch.constant.int 1
    %53046 = torch.aten.size.int %52762, %int1_50410 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50411 = torch.constant.int 0
    %53047 = torch.aten.add.int %int0_50411, %53046 : !torch.int, !torch.int -> !torch.int
    %int0_50412 = torch.constant.int 0
    %int0_50413 = torch.constant.int 0
    %int1_50414 = torch.constant.int 1
    %53048 = torch.aten.slice.Tensor %52988, %int0_50412, %int0_50413, %53047, %int1_50414 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53048, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50415 = torch.constant.int 1
    %int0_50416 = torch.constant.int 0
    %int9223372036854775807_50417 = torch.constant.int 9223372036854775807
    %int1_50418 = torch.constant.int 1
    %53049 = torch.aten.slice.Tensor %53048, %int1_50415, %int0_50416, %int9223372036854775807_50417, %int1_50418 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53049, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50419 = torch.constant.int 0
    %53050 = torch.aten.unsqueeze %53049, %int0_50419 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53050, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50420 = torch.constant.int 2
    %53051 = torch.aten.unsqueeze %53050, %int2_50420 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53051, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50421 = torch.constant.int 3
    %int0_50422 = torch.constant.int 0
    %int9223372036854775807_50423 = torch.constant.int 9223372036854775807
    %int1_50424 = torch.constant.int 1
    %53052 = torch.aten.slice.Tensor %53051, %int3_50421, %int0_50422, %int9223372036854775807_50423, %int1_50424 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53052, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53053 = torch_c.to_builtin_tensor %52922 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50425 = arith.constant 1 : index
    %dim_50426 = tensor.dim %53053, %c1_50425 : tensor<4x?x4x128xf16>
    %53054 = flow.tensor.bitcast %53053 : tensor<4x?x4x128xf16>{%dim_50426} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50426}
    %53055 = torch_c.from_builtin_tensor %53054 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53056 = torch.aten.mul.Tensor %53055, %53052 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53057 = torch_c.to_builtin_tensor %53056 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50427 = arith.constant 1 : index
    %dim_50428 = tensor.dim %53057, %c1_50427 : tensor<4x?x4x64xcomplex<f32>>
    %53058 = flow.tensor.bitcast %53057 : tensor<4x?x4x64xcomplex<f32>>{%dim_50428} -> tensor<4x?x4x128xf32>{%dim_50428}
    %53059 = torch_c.from_builtin_tensor %53058 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50429 = torch.constant.int 5
    %53060 = torch.prims.convert_element_type %53059, %int5_50429 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50430 = torch.constant.int 1
    %53061 = torch.aten.size.int %52768, %int1_50430 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50431 = torch.constant.int 0
    %53062 = torch.aten.add.int %int0_50431, %53061 : !torch.int, !torch.int -> !torch.int
    %int0_50432 = torch.constant.int 0
    %int0_50433 = torch.constant.int 0
    %int1_50434 = torch.constant.int 1
    %53063 = torch.aten.slice.Tensor %52991, %int0_50432, %int0_50433, %53062, %int1_50434 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53063, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50435 = torch.constant.int 1
    %int0_50436 = torch.constant.int 0
    %int9223372036854775807_50437 = torch.constant.int 9223372036854775807
    %int1_50438 = torch.constant.int 1
    %53064 = torch.aten.slice.Tensor %53063, %int1_50435, %int0_50436, %int9223372036854775807_50437, %int1_50438 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53064, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50439 = torch.constant.int 0
    %53065 = torch.aten.unsqueeze %53064, %int0_50439 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53065, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50440 = torch.constant.int 2
    %53066 = torch.aten.unsqueeze %53065, %int2_50440 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53066, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50441 = torch.constant.int 3
    %int0_50442 = torch.constant.int 0
    %int9223372036854775807_50443 = torch.constant.int 9223372036854775807
    %int1_50444 = torch.constant.int 1
    %53067 = torch.aten.slice.Tensor %53066, %int3_50441, %int0_50442, %int9223372036854775807_50443, %int1_50444 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53067, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53068 = torch_c.to_builtin_tensor %52924 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50445 = arith.constant 1 : index
    %dim_50446 = tensor.dim %53068, %c1_50445 : tensor<4x?x4x128xf16>
    %53069 = flow.tensor.bitcast %53068 : tensor<4x?x4x128xf16>{%dim_50446} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50446}
    %53070 = torch_c.from_builtin_tensor %53069 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53071 = torch.aten.mul.Tensor %53070, %53067 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53072 = torch_c.to_builtin_tensor %53071 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50447 = arith.constant 1 : index
    %dim_50448 = tensor.dim %53072, %c1_50447 : tensor<4x?x4x64xcomplex<f32>>
    %53073 = flow.tensor.bitcast %53072 : tensor<4x?x4x64xcomplex<f32>>{%dim_50448} -> tensor<4x?x4x128xf32>{%dim_50448}
    %53074 = torch_c.from_builtin_tensor %53073 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50449 = torch.constant.int 5
    %53075 = torch.prims.convert_element_type %53074, %int5_50449 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50450 = torch.constant.int 1
    %53076 = torch.aten.size.int %52774, %int1_50450 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50451 = torch.constant.int 0
    %53077 = torch.aten.add.int %int0_50451, %53076 : !torch.int, !torch.int -> !torch.int
    %int0_50452 = torch.constant.int 0
    %int0_50453 = torch.constant.int 0
    %int1_50454 = torch.constant.int 1
    %53078 = torch.aten.slice.Tensor %52994, %int0_50452, %int0_50453, %53077, %int1_50454 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53078, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50455 = torch.constant.int 1
    %int0_50456 = torch.constant.int 0
    %int9223372036854775807_50457 = torch.constant.int 9223372036854775807
    %int1_50458 = torch.constant.int 1
    %53079 = torch.aten.slice.Tensor %53078, %int1_50455, %int0_50456, %int9223372036854775807_50457, %int1_50458 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53079, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50459 = torch.constant.int 0
    %53080 = torch.aten.unsqueeze %53079, %int0_50459 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53080, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50460 = torch.constant.int 2
    %53081 = torch.aten.unsqueeze %53080, %int2_50460 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53081, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50461 = torch.constant.int 3
    %int0_50462 = torch.constant.int 0
    %int9223372036854775807_50463 = torch.constant.int 9223372036854775807
    %int1_50464 = torch.constant.int 1
    %53082 = torch.aten.slice.Tensor %53081, %int3_50461, %int0_50462, %int9223372036854775807_50463, %int1_50464 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53082, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53083 = torch_c.to_builtin_tensor %52926 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50465 = arith.constant 1 : index
    %dim_50466 = tensor.dim %53083, %c1_50465 : tensor<4x?x4x128xf16>
    %53084 = flow.tensor.bitcast %53083 : tensor<4x?x4x128xf16>{%dim_50466} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50466}
    %53085 = torch_c.from_builtin_tensor %53084 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53086 = torch.aten.mul.Tensor %53085, %53082 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53087 = torch_c.to_builtin_tensor %53086 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50467 = arith.constant 1 : index
    %dim_50468 = tensor.dim %53087, %c1_50467 : tensor<4x?x4x64xcomplex<f32>>
    %53088 = flow.tensor.bitcast %53087 : tensor<4x?x4x64xcomplex<f32>>{%dim_50468} -> tensor<4x?x4x128xf32>{%dim_50468}
    %53089 = torch_c.from_builtin_tensor %53088 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50469 = torch.constant.int 5
    %53090 = torch.prims.convert_element_type %53089, %int5_50469 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50470 = torch.constant.int 1
    %53091 = torch.aten.size.int %52780, %int1_50470 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50471 = torch.constant.int 0
    %53092 = torch.aten.add.int %int0_50471, %53091 : !torch.int, !torch.int -> !torch.int
    %int0_50472 = torch.constant.int 0
    %int0_50473 = torch.constant.int 0
    %int1_50474 = torch.constant.int 1
    %53093 = torch.aten.slice.Tensor %52997, %int0_50472, %int0_50473, %53092, %int1_50474 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53093, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50475 = torch.constant.int 1
    %int0_50476 = torch.constant.int 0
    %int9223372036854775807_50477 = torch.constant.int 9223372036854775807
    %int1_50478 = torch.constant.int 1
    %53094 = torch.aten.slice.Tensor %53093, %int1_50475, %int0_50476, %int9223372036854775807_50477, %int1_50478 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53094, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50479 = torch.constant.int 0
    %53095 = torch.aten.unsqueeze %53094, %int0_50479 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53095, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50480 = torch.constant.int 2
    %53096 = torch.aten.unsqueeze %53095, %int2_50480 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53096, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50481 = torch.constant.int 3
    %int0_50482 = torch.constant.int 0
    %int9223372036854775807_50483 = torch.constant.int 9223372036854775807
    %int1_50484 = torch.constant.int 1
    %53097 = torch.aten.slice.Tensor %53096, %int3_50481, %int0_50482, %int9223372036854775807_50483, %int1_50484 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53097, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53098 = torch_c.to_builtin_tensor %52928 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50485 = arith.constant 1 : index
    %dim_50486 = tensor.dim %53098, %c1_50485 : tensor<4x?x4x128xf16>
    %53099 = flow.tensor.bitcast %53098 : tensor<4x?x4x128xf16>{%dim_50486} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50486}
    %53100 = torch_c.from_builtin_tensor %53099 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53101 = torch.aten.mul.Tensor %53100, %53097 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53102 = torch_c.to_builtin_tensor %53101 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50487 = arith.constant 1 : index
    %dim_50488 = tensor.dim %53102, %c1_50487 : tensor<4x?x4x64xcomplex<f32>>
    %53103 = flow.tensor.bitcast %53102 : tensor<4x?x4x64xcomplex<f32>>{%dim_50488} -> tensor<4x?x4x128xf32>{%dim_50488}
    %53104 = torch_c.from_builtin_tensor %53103 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50489 = torch.constant.int 5
    %53105 = torch.prims.convert_element_type %53104, %int5_50489 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_50490 = torch.constant.int 1
    %53106 = torch.aten.size.int %52786, %int1_50490 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_50491 = torch.constant.int 0
    %53107 = torch.aten.add.int %int0_50491, %53106 : !torch.int, !torch.int -> !torch.int
    %int0_50492 = torch.constant.int 0
    %int0_50493 = torch.constant.int 0
    %int1_50494 = torch.constant.int 1
    %53108 = torch.aten.slice.Tensor %53000, %int0_50492, %int0_50493, %53107, %int1_50494 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53108, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50495 = torch.constant.int 1
    %int0_50496 = torch.constant.int 0
    %int9223372036854775807_50497 = torch.constant.int 9223372036854775807
    %int1_50498 = torch.constant.int 1
    %53109 = torch.aten.slice.Tensor %53108, %int1_50495, %int0_50496, %int9223372036854775807_50497, %int1_50498 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53109, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50499 = torch.constant.int 0
    %53110 = torch.aten.unsqueeze %53109, %int0_50499 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53110, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50500 = torch.constant.int 2
    %53111 = torch.aten.unsqueeze %53110, %int2_50500 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53111, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50501 = torch.constant.int 3
    %int0_50502 = torch.constant.int 0
    %int9223372036854775807_50503 = torch.constant.int 9223372036854775807
    %int1_50504 = torch.constant.int 1
    %53112 = torch.aten.slice.Tensor %53111, %int3_50501, %int0_50502, %int9223372036854775807_50503, %int1_50504 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53112, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53113 = torch_c.to_builtin_tensor %52930 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_50505 = arith.constant 1 : index
    %dim_50506 = tensor.dim %53113, %c1_50505 : tensor<4x?x4x128xf16>
    %53114 = flow.tensor.bitcast %53113 : tensor<4x?x4x128xf16>{%dim_50506} -> tensor<4x?x4x64xcomplex<f16>>{%dim_50506}
    %53115 = torch_c.from_builtin_tensor %53114 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %53115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %53116 = torch.aten.mul.Tensor %53115, %53112 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %53116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %53117 = torch_c.to_builtin_tensor %53116 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_50507 = arith.constant 1 : index
    %dim_50508 = tensor.dim %53117, %c1_50507 : tensor<4x?x4x64xcomplex<f32>>
    %53118 = flow.tensor.bitcast %53117 : tensor<4x?x4x64xcomplex<f32>>{%dim_50508} -> tensor<4x?x4x128xf32>{%dim_50508}
    %53119 = torch_c.from_builtin_tensor %53118 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %53119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_50509 = torch.constant.int 5
    %53120 = torch.prims.convert_element_type %53119, %int5_50509 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_50510 = torch.constant.int 131072
    %none_50511 = torch.constant.none
    %none_50512 = torch.constant.none
    %cpu_50513 = torch.constant.device "cpu"
    %false_50514 = torch.constant.bool false
    %53121 = torch.aten.arange %int131072_50510, %none_50511, %none_50512, %cpu_50513, %false_50514 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_50515 = torch.constant.int 0
    %int128_50516 = torch.constant.int 128
    %int2_50517 = torch.constant.int 2
    %none_50518 = torch.constant.none
    %none_50519 = torch.constant.none
    %cpu_50520 = torch.constant.device "cpu"
    %false_50521 = torch.constant.bool false
    %53122 = torch.aten.arange.start_step %int0_50515, %int128_50516, %int2_50517, %none_50518, %none_50519, %cpu_50520, %false_50521 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_50522 = torch.constant.int 0
    %int0_50523 = torch.constant.int 0
    %int64_50524 = torch.constant.int 64
    %int1_50525 = torch.constant.int 1
    %53123 = torch.aten.slice.Tensor %53122, %int0_50522, %int0_50523, %int64_50524, %int1_50525 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_50526 = torch.constant.int 6
    %53124 = torch.prims.convert_element_type %53123, %int6_50526 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_50527 = torch.constant.int 128
    %53125 = torch.aten.div.Scalar %53124, %int128_50527 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_50528 = torch.constant.float 5.000000e+05
    %53126 = torch.aten.pow.Scalar %float5.000000e05_50528, %53125 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %53127 = torch.aten.reciprocal %53126 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_50529 = torch.constant.float 1.000000e+00
    %53128 = torch.aten.mul.Scalar %53127, %float1.000000e00_50529 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_50530 = torch.constant.int 131072
    %int1_50531 = torch.constant.int 1
    %53129 = torch.prim.ListConstruct %int131072_50530, %int1_50531 : (!torch.int, !torch.int) -> !torch.list<int>
    %53130 = torch.aten.view %53121, %53129 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %53131 = torch.aten.mul.Tensor %53130, %53128 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %53132 = torch.aten.cos %53131 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %53133 = torch.aten.sin %53131 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %53134 = torch.aten.complex %53132, %53133 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %53135 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53136 = flow.tensor.transfer %53135 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %53137 = torch_c.from_builtin_tensor %53136 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53138 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53139 = flow.tensor.transfer %53138 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %53140 = torch_c.from_builtin_tensor %53139 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53141 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53142 = flow.tensor.transfer %53141 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %53143 = torch_c.from_builtin_tensor %53142 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53144 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53145 = flow.tensor.transfer %53144 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %53146 = torch_c.from_builtin_tensor %53145 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53147 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53148 = flow.tensor.transfer %53147 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %53149 = torch_c.from_builtin_tensor %53148 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53150 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53151 = flow.tensor.transfer %53150 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %53152 = torch_c.from_builtin_tensor %53151 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53153 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53154 = flow.tensor.transfer %53153 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %53155 = torch_c.from_builtin_tensor %53154 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %53156 = torch_c.to_builtin_tensor %53134 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %53157 = flow.tensor.transfer %53156 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %53158 = torch_c.from_builtin_tensor %53157 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_50532 = torch.constant.int 1
    %53159 = torch.aten.size.int %52808, %int1_50532 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50533 = torch.constant.int 0
    %53160 = torch.aten.add.int %int0_50533, %53159 : !torch.int, !torch.int -> !torch.int
    %int0_50534 = torch.constant.int 0
    %int0_50535 = torch.constant.int 0
    %int1_50536 = torch.constant.int 1
    %53161 = torch.aten.slice.Tensor %53137, %int0_50534, %int0_50535, %53160, %int1_50536 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53161, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50537 = torch.constant.int 1
    %int0_50538 = torch.constant.int 0
    %int9223372036854775807_50539 = torch.constant.int 9223372036854775807
    %int1_50540 = torch.constant.int 1
    %53162 = torch.aten.slice.Tensor %53161, %int1_50537, %int0_50538, %int9223372036854775807_50539, %int1_50540 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53162, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50541 = torch.constant.int 0
    %53163 = torch.aten.unsqueeze %53162, %int0_50541 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53163, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50542 = torch.constant.int 2
    %53164 = torch.aten.unsqueeze %53163, %int2_50542 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53164, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50543 = torch.constant.int 3
    %int0_50544 = torch.constant.int 0
    %int9223372036854775807_50545 = torch.constant.int 9223372036854775807
    %int1_50546 = torch.constant.int 1
    %53165 = torch.aten.slice.Tensor %53164, %int3_50543, %int0_50544, %int9223372036854775807_50545, %int1_50546 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53165, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53166 = torch_c.to_builtin_tensor %52932 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50547 = arith.constant 1 : index
    %dim_50548 = tensor.dim %53166, %c1_50547 : tensor<4x?x1x128xf16>
    %53167 = flow.tensor.bitcast %53166 : tensor<4x?x1x128xf16>{%dim_50548} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50548}
    %53168 = torch_c.from_builtin_tensor %53167 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53169 = torch.aten.mul.Tensor %53168, %53165 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53170 = torch_c.to_builtin_tensor %53169 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50549 = arith.constant 1 : index
    %dim_50550 = tensor.dim %53170, %c1_50549 : tensor<4x?x1x64xcomplex<f32>>
    %53171 = flow.tensor.bitcast %53170 : tensor<4x?x1x64xcomplex<f32>>{%dim_50550} -> tensor<4x?x1x128xf32>{%dim_50550}
    %53172 = torch_c.from_builtin_tensor %53171 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50551 = torch.constant.int 5
    %53173 = torch.prims.convert_element_type %53172, %int5_50551 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50552 = torch.constant.int 1
    %53174 = torch.aten.size.int %52814, %int1_50552 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50553 = torch.constant.int 0
    %53175 = torch.aten.add.int %int0_50553, %53174 : !torch.int, !torch.int -> !torch.int
    %int0_50554 = torch.constant.int 0
    %int0_50555 = torch.constant.int 0
    %int1_50556 = torch.constant.int 1
    %53176 = torch.aten.slice.Tensor %53140, %int0_50554, %int0_50555, %53175, %int1_50556 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53176, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50557 = torch.constant.int 1
    %int0_50558 = torch.constant.int 0
    %int9223372036854775807_50559 = torch.constant.int 9223372036854775807
    %int1_50560 = torch.constant.int 1
    %53177 = torch.aten.slice.Tensor %53176, %int1_50557, %int0_50558, %int9223372036854775807_50559, %int1_50560 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53177, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50561 = torch.constant.int 0
    %53178 = torch.aten.unsqueeze %53177, %int0_50561 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53178, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50562 = torch.constant.int 2
    %53179 = torch.aten.unsqueeze %53178, %int2_50562 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53179, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50563 = torch.constant.int 3
    %int0_50564 = torch.constant.int 0
    %int9223372036854775807_50565 = torch.constant.int 9223372036854775807
    %int1_50566 = torch.constant.int 1
    %53180 = torch.aten.slice.Tensor %53179, %int3_50563, %int0_50564, %int9223372036854775807_50565, %int1_50566 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53180, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53181 = torch_c.to_builtin_tensor %52934 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50567 = arith.constant 1 : index
    %dim_50568 = tensor.dim %53181, %c1_50567 : tensor<4x?x1x128xf16>
    %53182 = flow.tensor.bitcast %53181 : tensor<4x?x1x128xf16>{%dim_50568} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50568}
    %53183 = torch_c.from_builtin_tensor %53182 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53184 = torch.aten.mul.Tensor %53183, %53180 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53185 = torch_c.to_builtin_tensor %53184 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50569 = arith.constant 1 : index
    %dim_50570 = tensor.dim %53185, %c1_50569 : tensor<4x?x1x64xcomplex<f32>>
    %53186 = flow.tensor.bitcast %53185 : tensor<4x?x1x64xcomplex<f32>>{%dim_50570} -> tensor<4x?x1x128xf32>{%dim_50570}
    %53187 = torch_c.from_builtin_tensor %53186 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50571 = torch.constant.int 5
    %53188 = torch.prims.convert_element_type %53187, %int5_50571 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50572 = torch.constant.int 1
    %53189 = torch.aten.size.int %52820, %int1_50572 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50573 = torch.constant.int 0
    %53190 = torch.aten.add.int %int0_50573, %53189 : !torch.int, !torch.int -> !torch.int
    %int0_50574 = torch.constant.int 0
    %int0_50575 = torch.constant.int 0
    %int1_50576 = torch.constant.int 1
    %53191 = torch.aten.slice.Tensor %53143, %int0_50574, %int0_50575, %53190, %int1_50576 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53191, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50577 = torch.constant.int 1
    %int0_50578 = torch.constant.int 0
    %int9223372036854775807_50579 = torch.constant.int 9223372036854775807
    %int1_50580 = torch.constant.int 1
    %53192 = torch.aten.slice.Tensor %53191, %int1_50577, %int0_50578, %int9223372036854775807_50579, %int1_50580 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53192, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50581 = torch.constant.int 0
    %53193 = torch.aten.unsqueeze %53192, %int0_50581 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53193, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50582 = torch.constant.int 2
    %53194 = torch.aten.unsqueeze %53193, %int2_50582 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53194, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50583 = torch.constant.int 3
    %int0_50584 = torch.constant.int 0
    %int9223372036854775807_50585 = torch.constant.int 9223372036854775807
    %int1_50586 = torch.constant.int 1
    %53195 = torch.aten.slice.Tensor %53194, %int3_50583, %int0_50584, %int9223372036854775807_50585, %int1_50586 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53195, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53196 = torch_c.to_builtin_tensor %52936 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50587 = arith.constant 1 : index
    %dim_50588 = tensor.dim %53196, %c1_50587 : tensor<4x?x1x128xf16>
    %53197 = flow.tensor.bitcast %53196 : tensor<4x?x1x128xf16>{%dim_50588} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50588}
    %53198 = torch_c.from_builtin_tensor %53197 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53199 = torch.aten.mul.Tensor %53198, %53195 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53200 = torch_c.to_builtin_tensor %53199 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50589 = arith.constant 1 : index
    %dim_50590 = tensor.dim %53200, %c1_50589 : tensor<4x?x1x64xcomplex<f32>>
    %53201 = flow.tensor.bitcast %53200 : tensor<4x?x1x64xcomplex<f32>>{%dim_50590} -> tensor<4x?x1x128xf32>{%dim_50590}
    %53202 = torch_c.from_builtin_tensor %53201 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50591 = torch.constant.int 5
    %53203 = torch.prims.convert_element_type %53202, %int5_50591 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50592 = torch.constant.int 1
    %53204 = torch.aten.size.int %52826, %int1_50592 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50593 = torch.constant.int 0
    %53205 = torch.aten.add.int %int0_50593, %53204 : !torch.int, !torch.int -> !torch.int
    %int0_50594 = torch.constant.int 0
    %int0_50595 = torch.constant.int 0
    %int1_50596 = torch.constant.int 1
    %53206 = torch.aten.slice.Tensor %53146, %int0_50594, %int0_50595, %53205, %int1_50596 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53206, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50597 = torch.constant.int 1
    %int0_50598 = torch.constant.int 0
    %int9223372036854775807_50599 = torch.constant.int 9223372036854775807
    %int1_50600 = torch.constant.int 1
    %53207 = torch.aten.slice.Tensor %53206, %int1_50597, %int0_50598, %int9223372036854775807_50599, %int1_50600 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53207, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50601 = torch.constant.int 0
    %53208 = torch.aten.unsqueeze %53207, %int0_50601 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53208, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50602 = torch.constant.int 2
    %53209 = torch.aten.unsqueeze %53208, %int2_50602 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53209, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50603 = torch.constant.int 3
    %int0_50604 = torch.constant.int 0
    %int9223372036854775807_50605 = torch.constant.int 9223372036854775807
    %int1_50606 = torch.constant.int 1
    %53210 = torch.aten.slice.Tensor %53209, %int3_50603, %int0_50604, %int9223372036854775807_50605, %int1_50606 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53210, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53211 = torch_c.to_builtin_tensor %52938 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50607 = arith.constant 1 : index
    %dim_50608 = tensor.dim %53211, %c1_50607 : tensor<4x?x1x128xf16>
    %53212 = flow.tensor.bitcast %53211 : tensor<4x?x1x128xf16>{%dim_50608} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50608}
    %53213 = torch_c.from_builtin_tensor %53212 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53213, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53214 = torch.aten.mul.Tensor %53213, %53210 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53215 = torch_c.to_builtin_tensor %53214 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50609 = arith.constant 1 : index
    %dim_50610 = tensor.dim %53215, %c1_50609 : tensor<4x?x1x64xcomplex<f32>>
    %53216 = flow.tensor.bitcast %53215 : tensor<4x?x1x64xcomplex<f32>>{%dim_50610} -> tensor<4x?x1x128xf32>{%dim_50610}
    %53217 = torch_c.from_builtin_tensor %53216 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50611 = torch.constant.int 5
    %53218 = torch.prims.convert_element_type %53217, %int5_50611 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50612 = torch.constant.int 1
    %53219 = torch.aten.size.int %52832, %int1_50612 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50613 = torch.constant.int 0
    %53220 = torch.aten.add.int %int0_50613, %53219 : !torch.int, !torch.int -> !torch.int
    %int0_50614 = torch.constant.int 0
    %int0_50615 = torch.constant.int 0
    %int1_50616 = torch.constant.int 1
    %53221 = torch.aten.slice.Tensor %53149, %int0_50614, %int0_50615, %53220, %int1_50616 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53221, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50617 = torch.constant.int 1
    %int0_50618 = torch.constant.int 0
    %int9223372036854775807_50619 = torch.constant.int 9223372036854775807
    %int1_50620 = torch.constant.int 1
    %53222 = torch.aten.slice.Tensor %53221, %int1_50617, %int0_50618, %int9223372036854775807_50619, %int1_50620 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53222, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50621 = torch.constant.int 0
    %53223 = torch.aten.unsqueeze %53222, %int0_50621 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53223, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50622 = torch.constant.int 2
    %53224 = torch.aten.unsqueeze %53223, %int2_50622 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53224, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50623 = torch.constant.int 3
    %int0_50624 = torch.constant.int 0
    %int9223372036854775807_50625 = torch.constant.int 9223372036854775807
    %int1_50626 = torch.constant.int 1
    %53225 = torch.aten.slice.Tensor %53224, %int3_50623, %int0_50624, %int9223372036854775807_50625, %int1_50626 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53225, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53226 = torch_c.to_builtin_tensor %52940 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50627 = arith.constant 1 : index
    %dim_50628 = tensor.dim %53226, %c1_50627 : tensor<4x?x1x128xf16>
    %53227 = flow.tensor.bitcast %53226 : tensor<4x?x1x128xf16>{%dim_50628} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50628}
    %53228 = torch_c.from_builtin_tensor %53227 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53229 = torch.aten.mul.Tensor %53228, %53225 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53230 = torch_c.to_builtin_tensor %53229 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50629 = arith.constant 1 : index
    %dim_50630 = tensor.dim %53230, %c1_50629 : tensor<4x?x1x64xcomplex<f32>>
    %53231 = flow.tensor.bitcast %53230 : tensor<4x?x1x64xcomplex<f32>>{%dim_50630} -> tensor<4x?x1x128xf32>{%dim_50630}
    %53232 = torch_c.from_builtin_tensor %53231 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50631 = torch.constant.int 5
    %53233 = torch.prims.convert_element_type %53232, %int5_50631 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50632 = torch.constant.int 1
    %53234 = torch.aten.size.int %52838, %int1_50632 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50633 = torch.constant.int 0
    %53235 = torch.aten.add.int %int0_50633, %53234 : !torch.int, !torch.int -> !torch.int
    %int0_50634 = torch.constant.int 0
    %int0_50635 = torch.constant.int 0
    %int1_50636 = torch.constant.int 1
    %53236 = torch.aten.slice.Tensor %53152, %int0_50634, %int0_50635, %53235, %int1_50636 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53236, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50637 = torch.constant.int 1
    %int0_50638 = torch.constant.int 0
    %int9223372036854775807_50639 = torch.constant.int 9223372036854775807
    %int1_50640 = torch.constant.int 1
    %53237 = torch.aten.slice.Tensor %53236, %int1_50637, %int0_50638, %int9223372036854775807_50639, %int1_50640 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53237, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50641 = torch.constant.int 0
    %53238 = torch.aten.unsqueeze %53237, %int0_50641 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53238, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50642 = torch.constant.int 2
    %53239 = torch.aten.unsqueeze %53238, %int2_50642 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53239, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50643 = torch.constant.int 3
    %int0_50644 = torch.constant.int 0
    %int9223372036854775807_50645 = torch.constant.int 9223372036854775807
    %int1_50646 = torch.constant.int 1
    %53240 = torch.aten.slice.Tensor %53239, %int3_50643, %int0_50644, %int9223372036854775807_50645, %int1_50646 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53240, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53241 = torch_c.to_builtin_tensor %52942 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50647 = arith.constant 1 : index
    %dim_50648 = tensor.dim %53241, %c1_50647 : tensor<4x?x1x128xf16>
    %53242 = flow.tensor.bitcast %53241 : tensor<4x?x1x128xf16>{%dim_50648} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50648}
    %53243 = torch_c.from_builtin_tensor %53242 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53244 = torch.aten.mul.Tensor %53243, %53240 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53245 = torch_c.to_builtin_tensor %53244 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50649 = arith.constant 1 : index
    %dim_50650 = tensor.dim %53245, %c1_50649 : tensor<4x?x1x64xcomplex<f32>>
    %53246 = flow.tensor.bitcast %53245 : tensor<4x?x1x64xcomplex<f32>>{%dim_50650} -> tensor<4x?x1x128xf32>{%dim_50650}
    %53247 = torch_c.from_builtin_tensor %53246 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50651 = torch.constant.int 5
    %53248 = torch.prims.convert_element_type %53247, %int5_50651 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50652 = torch.constant.int 1
    %53249 = torch.aten.size.int %52844, %int1_50652 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50653 = torch.constant.int 0
    %53250 = torch.aten.add.int %int0_50653, %53249 : !torch.int, !torch.int -> !torch.int
    %int0_50654 = torch.constant.int 0
    %int0_50655 = torch.constant.int 0
    %int1_50656 = torch.constant.int 1
    %53251 = torch.aten.slice.Tensor %53155, %int0_50654, %int0_50655, %53250, %int1_50656 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53251, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50657 = torch.constant.int 1
    %int0_50658 = torch.constant.int 0
    %int9223372036854775807_50659 = torch.constant.int 9223372036854775807
    %int1_50660 = torch.constant.int 1
    %53252 = torch.aten.slice.Tensor %53251, %int1_50657, %int0_50658, %int9223372036854775807_50659, %int1_50660 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53252, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50661 = torch.constant.int 0
    %53253 = torch.aten.unsqueeze %53252, %int0_50661 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53253, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50662 = torch.constant.int 2
    %53254 = torch.aten.unsqueeze %53253, %int2_50662 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53254, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50663 = torch.constant.int 3
    %int0_50664 = torch.constant.int 0
    %int9223372036854775807_50665 = torch.constant.int 9223372036854775807
    %int1_50666 = torch.constant.int 1
    %53255 = torch.aten.slice.Tensor %53254, %int3_50663, %int0_50664, %int9223372036854775807_50665, %int1_50666 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53255, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53256 = torch_c.to_builtin_tensor %52944 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50667 = arith.constant 1 : index
    %dim_50668 = tensor.dim %53256, %c1_50667 : tensor<4x?x1x128xf16>
    %53257 = flow.tensor.bitcast %53256 : tensor<4x?x1x128xf16>{%dim_50668} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50668}
    %53258 = torch_c.from_builtin_tensor %53257 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53259 = torch.aten.mul.Tensor %53258, %53255 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53260 = torch_c.to_builtin_tensor %53259 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50669 = arith.constant 1 : index
    %dim_50670 = tensor.dim %53260, %c1_50669 : tensor<4x?x1x64xcomplex<f32>>
    %53261 = flow.tensor.bitcast %53260 : tensor<4x?x1x64xcomplex<f32>>{%dim_50670} -> tensor<4x?x1x128xf32>{%dim_50670}
    %53262 = torch_c.from_builtin_tensor %53261 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50671 = torch.constant.int 5
    %53263 = torch.prims.convert_element_type %53262, %int5_50671 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_50672 = torch.constant.int 1
    %53264 = torch.aten.size.int %52850, %int1_50672 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_50673 = torch.constant.int 0
    %53265 = torch.aten.add.int %int0_50673, %53264 : !torch.int, !torch.int -> !torch.int
    %int0_50674 = torch.constant.int 0
    %int0_50675 = torch.constant.int 0
    %int1_50676 = torch.constant.int 1
    %53266 = torch.aten.slice.Tensor %53158, %int0_50674, %int0_50675, %53265, %int1_50676 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53266, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_50677 = torch.constant.int 1
    %int0_50678 = torch.constant.int 0
    %int9223372036854775807_50679 = torch.constant.int 9223372036854775807
    %int1_50680 = torch.constant.int 1
    %53267 = torch.aten.slice.Tensor %53266, %int1_50677, %int0_50678, %int9223372036854775807_50679, %int1_50680 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %53267, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_50681 = torch.constant.int 0
    %53268 = torch.aten.unsqueeze %53267, %int0_50681 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %53268, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_50682 = torch.constant.int 2
    %53269 = torch.aten.unsqueeze %53268, %int2_50682 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53269, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_50683 = torch.constant.int 3
    %int0_50684 = torch.constant.int 0
    %int9223372036854775807_50685 = torch.constant.int 9223372036854775807
    %int1_50686 = torch.constant.int 1
    %53270 = torch.aten.slice.Tensor %53269, %int3_50683, %int0_50684, %int9223372036854775807_50685, %int1_50686 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53270, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %53271 = torch_c.to_builtin_tensor %52946 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_50687 = arith.constant 1 : index
    %dim_50688 = tensor.dim %53271, %c1_50687 : tensor<4x?x1x128xf16>
    %53272 = flow.tensor.bitcast %53271 : tensor<4x?x1x128xf16>{%dim_50688} -> tensor<4x?x1x64xcomplex<f16>>{%dim_50688}
    %53273 = torch_c.from_builtin_tensor %53272 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %53273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %53274 = torch.aten.mul.Tensor %53273, %53270 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %53274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %53275 = torch_c.to_builtin_tensor %53274 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_50689 = arith.constant 1 : index
    %dim_50690 = tensor.dim %53275, %c1_50689 : tensor<4x?x1x64xcomplex<f32>>
    %53276 = flow.tensor.bitcast %53275 : tensor<4x?x1x64xcomplex<f32>>{%dim_50690} -> tensor<4x?x1x128xf32>{%dim_50690}
    %53277 = torch_c.from_builtin_tensor %53276 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %53277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_50691 = torch.constant.int 5
    %53278 = torch.prims.convert_element_type %53277, %int5_50691 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %53278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_50692 = torch.constant.int 64
    %53279 = torch.aten.mul.Scalar %2364, %int64_50692 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53279, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50693 = torch.constant.int 64
    %53280 = torch.aten.mul.Scalar %2367, %int64_50693 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53280, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50694 = torch.constant.int 64
    %53281 = torch.aten.mul.Scalar %2370, %int64_50694 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53281, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50695 = torch.constant.int 64
    %53282 = torch.aten.mul.Scalar %2373, %int64_50695 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53282, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50696 = torch.constant.int 64
    %53283 = torch.aten.mul.Scalar %2376, %int64_50696 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53283, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50697 = torch.constant.int 64
    %53284 = torch.aten.mul.Scalar %2379, %int64_50697 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53284, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50698 = torch.constant.int 64
    %53285 = torch.aten.mul.Scalar %2382, %int64_50698 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53285, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_50699 = torch.constant.int 64
    %53286 = torch.aten.mul.Scalar %2385, %int64_50699 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53286, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54 = torch.constant.int 54
    %int1_50700 = torch.constant.int 1
    %53287 = torch.aten.add.Scalar %53279, %int54, %int1_50700 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53287, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50701 = torch.constant.int 54
    %int1_50702 = torch.constant.int 1
    %53288 = torch.aten.add.Scalar %53280, %int54_50701, %int1_50702 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53288, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50703 = torch.constant.int 54
    %int1_50704 = torch.constant.int 1
    %53289 = torch.aten.add.Scalar %53281, %int54_50703, %int1_50704 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53289, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50705 = torch.constant.int 54
    %int1_50706 = torch.constant.int 1
    %53290 = torch.aten.add.Scalar %53282, %int54_50705, %int1_50706 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53290, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50707 = torch.constant.int 54
    %int1_50708 = torch.constant.int 1
    %53291 = torch.aten.add.Scalar %53283, %int54_50707, %int1_50708 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53291, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50709 = torch.constant.int 54
    %int1_50710 = torch.constant.int 1
    %53292 = torch.aten.add.Scalar %53284, %int54_50709, %int1_50710 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53292, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50711 = torch.constant.int 54
    %int1_50712 = torch.constant.int 1
    %53293 = torch.aten.add.Scalar %53285, %int54_50711, %int1_50712 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53293, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int54_50713 = torch.constant.int 54
    %int1_50714 = torch.constant.int 1
    %53294 = torch.aten.add.Scalar %53286, %int54_50713, %int1_50714 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53294, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_50715 = torch.constant.int 4
    %int16_50716 = torch.constant.int 16
    %int1_50717 = torch.constant.int 1
    %int128_50718 = torch.constant.int 128
    %53295 = torch.prim.ListConstruct %int4_50715, %3095, %int16_50716, %int1_50717, %int128_50718 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53296 = torch.aten.view %53173, %53295 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53296, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50719 = torch.constant.int 4
    %int16_50720 = torch.constant.int 16
    %int1_50721 = torch.constant.int 1
    %int128_50722 = torch.constant.int 128
    %53297 = torch.prim.ListConstruct %int4_50719, %3095, %int16_50720, %int1_50721, %int128_50722 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53298 = torch.aten.view %53188, %53297 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53298, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50723 = torch.constant.int 4
    %int16_50724 = torch.constant.int 16
    %int1_50725 = torch.constant.int 1
    %int128_50726 = torch.constant.int 128
    %53299 = torch.prim.ListConstruct %int4_50723, %3095, %int16_50724, %int1_50725, %int128_50726 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53300 = torch.aten.view %53203, %53299 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53300, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50727 = torch.constant.int 4
    %int16_50728 = torch.constant.int 16
    %int1_50729 = torch.constant.int 1
    %int128_50730 = torch.constant.int 128
    %53301 = torch.prim.ListConstruct %int4_50727, %3095, %int16_50728, %int1_50729, %int128_50730 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53302 = torch.aten.view %53218, %53301 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53302, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50731 = torch.constant.int 4
    %int16_50732 = torch.constant.int 16
    %int1_50733 = torch.constant.int 1
    %int128_50734 = torch.constant.int 128
    %53303 = torch.prim.ListConstruct %int4_50731, %3095, %int16_50732, %int1_50733, %int128_50734 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53304 = torch.aten.view %53233, %53303 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53304, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50735 = torch.constant.int 4
    %int16_50736 = torch.constant.int 16
    %int1_50737 = torch.constant.int 1
    %int128_50738 = torch.constant.int 128
    %53305 = torch.prim.ListConstruct %int4_50735, %3095, %int16_50736, %int1_50737, %int128_50738 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53306 = torch.aten.view %53248, %53305 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53306, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50739 = torch.constant.int 4
    %int16_50740 = torch.constant.int 16
    %int1_50741 = torch.constant.int 1
    %int128_50742 = torch.constant.int 128
    %53307 = torch.prim.ListConstruct %int4_50739, %3095, %int16_50740, %int1_50741, %int128_50742 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53308 = torch.aten.view %53263, %53307 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53308, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50743 = torch.constant.int 4
    %int16_50744 = torch.constant.int 16
    %int1_50745 = torch.constant.int 1
    %int128_50746 = torch.constant.int 128
    %53309 = torch.prim.ListConstruct %int4_50743, %3095, %int16_50744, %int1_50745, %int128_50746 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53310 = torch.aten.view %53278, %53309 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53310, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50747 = torch.constant.int 4
    %53311 = torch.aten.mul.int %int4_50747, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50748 = torch.constant.int 16
    %int1_50749 = torch.constant.int 1
    %int128_50750 = torch.constant.int 128
    %53312 = torch.prim.ListConstruct %53311, %int16_50748, %int1_50749, %int128_50750 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53313 = torch.aten.view %53296, %53312 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53313, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50751 = torch.constant.int 4
    %53314 = torch.aten.mul.int %int4_50751, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50752 = torch.constant.int 16
    %int1_50753 = torch.constant.int 1
    %int128_50754 = torch.constant.int 128
    %53315 = torch.prim.ListConstruct %53314, %int16_50752, %int1_50753, %int128_50754 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53316 = torch.aten.view %53298, %53315 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53316, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50755 = torch.constant.int 4
    %53317 = torch.aten.mul.int %int4_50755, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50756 = torch.constant.int 16
    %int1_50757 = torch.constant.int 1
    %int128_50758 = torch.constant.int 128
    %53318 = torch.prim.ListConstruct %53317, %int16_50756, %int1_50757, %int128_50758 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53319 = torch.aten.view %53300, %53318 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53319, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50759 = torch.constant.int 4
    %53320 = torch.aten.mul.int %int4_50759, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50760 = torch.constant.int 16
    %int1_50761 = torch.constant.int 1
    %int128_50762 = torch.constant.int 128
    %53321 = torch.prim.ListConstruct %53320, %int16_50760, %int1_50761, %int128_50762 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53322 = torch.aten.view %53302, %53321 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53322, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50763 = torch.constant.int 4
    %53323 = torch.aten.mul.int %int4_50763, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50764 = torch.constant.int 16
    %int1_50765 = torch.constant.int 1
    %int128_50766 = torch.constant.int 128
    %53324 = torch.prim.ListConstruct %53323, %int16_50764, %int1_50765, %int128_50766 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53325 = torch.aten.view %53304, %53324 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53325, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50767 = torch.constant.int 4
    %53326 = torch.aten.mul.int %int4_50767, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50768 = torch.constant.int 16
    %int1_50769 = torch.constant.int 1
    %int128_50770 = torch.constant.int 128
    %53327 = torch.prim.ListConstruct %53326, %int16_50768, %int1_50769, %int128_50770 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53328 = torch.aten.view %53306, %53327 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53328, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50771 = torch.constant.int 4
    %53329 = torch.aten.mul.int %int4_50771, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50772 = torch.constant.int 16
    %int1_50773 = torch.constant.int 1
    %int128_50774 = torch.constant.int 128
    %53330 = torch.prim.ListConstruct %53329, %int16_50772, %int1_50773, %int128_50774 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53331 = torch.aten.view %53308, %53330 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53331, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50775 = torch.constant.int 4
    %53332 = torch.aten.mul.int %int4_50775, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50776 = torch.constant.int 16
    %int1_50777 = torch.constant.int 1
    %int128_50778 = torch.constant.int 128
    %53333 = torch.prim.ListConstruct %53332, %int16_50776, %int1_50777, %int128_50778 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53334 = torch.aten.view %53310, %53333 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53334, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50779 = torch.constant.int 4
    %53335 = torch.aten.mul.int %int4_50779, %3095 : !torch.int, !torch.int -> !torch.int
    %53336 = torch.prim.ListConstruct %53335 : (!torch.int) -> !torch.list<int>
    %53337 = torch.aten.view %53287, %53336 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53337, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50780 = torch.constant.int 4
    %53338 = torch.aten.mul.int %int4_50780, %3095 : !torch.int, !torch.int -> !torch.int
    %53339 = torch.prim.ListConstruct %53338 : (!torch.int) -> !torch.list<int>
    %53340 = torch.aten.view %53288, %53339 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53340, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50781 = torch.constant.int 4
    %53341 = torch.aten.mul.int %int4_50781, %3095 : !torch.int, !torch.int -> !torch.int
    %53342 = torch.prim.ListConstruct %53341 : (!torch.int) -> !torch.list<int>
    %53343 = torch.aten.view %53289, %53342 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53343, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50782 = torch.constant.int 4
    %53344 = torch.aten.mul.int %int4_50782, %3095 : !torch.int, !torch.int -> !torch.int
    %53345 = torch.prim.ListConstruct %53344 : (!torch.int) -> !torch.list<int>
    %53346 = torch.aten.view %53290, %53345 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53346, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50783 = torch.constant.int 4
    %53347 = torch.aten.mul.int %int4_50783, %3095 : !torch.int, !torch.int -> !torch.int
    %53348 = torch.prim.ListConstruct %53347 : (!torch.int) -> !torch.list<int>
    %53349 = torch.aten.view %53291, %53348 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53349, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50784 = torch.constant.int 4
    %53350 = torch.aten.mul.int %int4_50784, %3095 : !torch.int, !torch.int -> !torch.int
    %53351 = torch.prim.ListConstruct %53350 : (!torch.int) -> !torch.list<int>
    %53352 = torch.aten.view %53292, %53351 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53352, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50785 = torch.constant.int 4
    %53353 = torch.aten.mul.int %int4_50785, %3095 : !torch.int, !torch.int -> !torch.int
    %53354 = torch.prim.ListConstruct %53353 : (!torch.int) -> !torch.list<int>
    %53355 = torch.aten.view %53293, %53354 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53355, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50786 = torch.constant.int 4
    %53356 = torch.aten.mul.int %int4_50786, %3095 : !torch.int, !torch.int -> !torch.int
    %53357 = torch.prim.ListConstruct %53356 : (!torch.int) -> !torch.list<int>
    %53358 = torch.aten.view %53294, %53357 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53358, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50787 = torch.constant.int 4
    %int16_50788 = torch.constant.int 16
    %int1_50789 = torch.constant.int 1
    %int128_50790 = torch.constant.int 128
    %53359 = torch.prim.ListConstruct %int4_50787, %3095, %int16_50788, %int1_50789, %int128_50790 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53360 = torch.aten.view %52948, %53359 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53360, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50791 = torch.constant.int 4
    %int16_50792 = torch.constant.int 16
    %int1_50793 = torch.constant.int 1
    %int128_50794 = torch.constant.int 128
    %53361 = torch.prim.ListConstruct %int4_50791, %3095, %int16_50792, %int1_50793, %int128_50794 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53362 = torch.aten.view %52950, %53361 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53362, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50795 = torch.constant.int 4
    %int16_50796 = torch.constant.int 16
    %int1_50797 = torch.constant.int 1
    %int128_50798 = torch.constant.int 128
    %53363 = torch.prim.ListConstruct %int4_50795, %3095, %int16_50796, %int1_50797, %int128_50798 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53364 = torch.aten.view %52952, %53363 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53364, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50799 = torch.constant.int 4
    %int16_50800 = torch.constant.int 16
    %int1_50801 = torch.constant.int 1
    %int128_50802 = torch.constant.int 128
    %53365 = torch.prim.ListConstruct %int4_50799, %3095, %int16_50800, %int1_50801, %int128_50802 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53366 = torch.aten.view %52954, %53365 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53366, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50803 = torch.constant.int 4
    %int16_50804 = torch.constant.int 16
    %int1_50805 = torch.constant.int 1
    %int128_50806 = torch.constant.int 128
    %53367 = torch.prim.ListConstruct %int4_50803, %3095, %int16_50804, %int1_50805, %int128_50806 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53368 = torch.aten.view %52956, %53367 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53368, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50807 = torch.constant.int 4
    %int16_50808 = torch.constant.int 16
    %int1_50809 = torch.constant.int 1
    %int128_50810 = torch.constant.int 128
    %53369 = torch.prim.ListConstruct %int4_50807, %3095, %int16_50808, %int1_50809, %int128_50810 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53370 = torch.aten.view %52958, %53369 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53370, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50811 = torch.constant.int 4
    %int16_50812 = torch.constant.int 16
    %int1_50813 = torch.constant.int 1
    %int128_50814 = torch.constant.int 128
    %53371 = torch.prim.ListConstruct %int4_50811, %3095, %int16_50812, %int1_50813, %int128_50814 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53372 = torch.aten.view %52960, %53371 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53372, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50815 = torch.constant.int 4
    %int16_50816 = torch.constant.int 16
    %int1_50817 = torch.constant.int 1
    %int128_50818 = torch.constant.int 128
    %53373 = torch.prim.ListConstruct %int4_50815, %3095, %int16_50816, %int1_50817, %int128_50818 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53374 = torch.aten.view %52962, %53373 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %53374, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_50819 = torch.constant.int 4
    %53375 = torch.aten.mul.int %int4_50819, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50820 = torch.constant.int 16
    %int1_50821 = torch.constant.int 1
    %int128_50822 = torch.constant.int 128
    %53376 = torch.prim.ListConstruct %53375, %int16_50820, %int1_50821, %int128_50822 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53377 = torch.aten.view %53360, %53376 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53377, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50823 = torch.constant.int 4
    %53378 = torch.aten.mul.int %int4_50823, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50824 = torch.constant.int 16
    %int1_50825 = torch.constant.int 1
    %int128_50826 = torch.constant.int 128
    %53379 = torch.prim.ListConstruct %53378, %int16_50824, %int1_50825, %int128_50826 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53380 = torch.aten.view %53362, %53379 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53380, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50827 = torch.constant.int 4
    %53381 = torch.aten.mul.int %int4_50827, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50828 = torch.constant.int 16
    %int1_50829 = torch.constant.int 1
    %int128_50830 = torch.constant.int 128
    %53382 = torch.prim.ListConstruct %53381, %int16_50828, %int1_50829, %int128_50830 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53383 = torch.aten.view %53364, %53382 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53383, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50831 = torch.constant.int 4
    %53384 = torch.aten.mul.int %int4_50831, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50832 = torch.constant.int 16
    %int1_50833 = torch.constant.int 1
    %int128_50834 = torch.constant.int 128
    %53385 = torch.prim.ListConstruct %53384, %int16_50832, %int1_50833, %int128_50834 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53386 = torch.aten.view %53366, %53385 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53386, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50835 = torch.constant.int 4
    %53387 = torch.aten.mul.int %int4_50835, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50836 = torch.constant.int 16
    %int1_50837 = torch.constant.int 1
    %int128_50838 = torch.constant.int 128
    %53388 = torch.prim.ListConstruct %53387, %int16_50836, %int1_50837, %int128_50838 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53389 = torch.aten.view %53368, %53388 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53389, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50839 = torch.constant.int 4
    %53390 = torch.aten.mul.int %int4_50839, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50840 = torch.constant.int 16
    %int1_50841 = torch.constant.int 1
    %int128_50842 = torch.constant.int 128
    %53391 = torch.prim.ListConstruct %53390, %int16_50840, %int1_50841, %int128_50842 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53392 = torch.aten.view %53370, %53391 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53392, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50843 = torch.constant.int 4
    %53393 = torch.aten.mul.int %int4_50843, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50844 = torch.constant.int 16
    %int1_50845 = torch.constant.int 1
    %int128_50846 = torch.constant.int 128
    %53394 = torch.prim.ListConstruct %53393, %int16_50844, %int1_50845, %int128_50846 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53395 = torch.aten.view %53372, %53394 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53395, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_50847 = torch.constant.int 4
    %53396 = torch.aten.mul.int %int4_50847, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_50848 = torch.constant.int 16
    %int1_50849 = torch.constant.int 1
    %int128_50850 = torch.constant.int 128
    %53397 = torch.prim.ListConstruct %53396, %int16_50848, %int1_50849, %int128_50850 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53398 = torch.aten.view %53374, %53397 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53398, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_50851 = torch.constant.int 1
    %int1_50852 = torch.constant.int 1
    %53399 = torch.aten.add.Scalar %53287, %int1_50851, %int1_50852 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53399, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50853 = torch.constant.int 1
    %int1_50854 = torch.constant.int 1
    %53400 = torch.aten.add.Scalar %53288, %int1_50853, %int1_50854 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53400, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50855 = torch.constant.int 1
    %int1_50856 = torch.constant.int 1
    %53401 = torch.aten.add.Scalar %53289, %int1_50855, %int1_50856 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53401, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50857 = torch.constant.int 1
    %int1_50858 = torch.constant.int 1
    %53402 = torch.aten.add.Scalar %53290, %int1_50857, %int1_50858 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53402, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50859 = torch.constant.int 1
    %int1_50860 = torch.constant.int 1
    %53403 = torch.aten.add.Scalar %53291, %int1_50859, %int1_50860 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53403, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50861 = torch.constant.int 1
    %int1_50862 = torch.constant.int 1
    %53404 = torch.aten.add.Scalar %53292, %int1_50861, %int1_50862 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53404, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50863 = torch.constant.int 1
    %int1_50864 = torch.constant.int 1
    %53405 = torch.aten.add.Scalar %53293, %int1_50863, %int1_50864 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53405, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_50865 = torch.constant.int 1
    %int1_50866 = torch.constant.int 1
    %53406 = torch.aten.add.Scalar %53294, %int1_50865, %int1_50866 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %53406, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_50867 = torch.constant.int 4
    %53407 = torch.aten.mul.int %int4_50867, %3095 : !torch.int, !torch.int -> !torch.int
    %53408 = torch.prim.ListConstruct %53407 : (!torch.int) -> !torch.list<int>
    %53409 = torch.aten.view %53399, %53408 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53409, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50868 = torch.constant.int 4
    %53410 = torch.aten.mul.int %int4_50868, %3095 : !torch.int, !torch.int -> !torch.int
    %53411 = torch.prim.ListConstruct %53410 : (!torch.int) -> !torch.list<int>
    %53412 = torch.aten.view %53400, %53411 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53412, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50869 = torch.constant.int 4
    %53413 = torch.aten.mul.int %int4_50869, %3095 : !torch.int, !torch.int -> !torch.int
    %53414 = torch.prim.ListConstruct %53413 : (!torch.int) -> !torch.list<int>
    %53415 = torch.aten.view %53401, %53414 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53415, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50870 = torch.constant.int 4
    %53416 = torch.aten.mul.int %int4_50870, %3095 : !torch.int, !torch.int -> !torch.int
    %53417 = torch.prim.ListConstruct %53416 : (!torch.int) -> !torch.list<int>
    %53418 = torch.aten.view %53402, %53417 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53418, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50871 = torch.constant.int 4
    %53419 = torch.aten.mul.int %int4_50871, %3095 : !torch.int, !torch.int -> !torch.int
    %53420 = torch.prim.ListConstruct %53419 : (!torch.int) -> !torch.list<int>
    %53421 = torch.aten.view %53403, %53420 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53421, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50872 = torch.constant.int 4
    %53422 = torch.aten.mul.int %int4_50872, %3095 : !torch.int, !torch.int -> !torch.int
    %53423 = torch.prim.ListConstruct %53422 : (!torch.int) -> !torch.list<int>
    %53424 = torch.aten.view %53404, %53423 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53424, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50873 = torch.constant.int 4
    %53425 = torch.aten.mul.int %int4_50873, %3095 : !torch.int, !torch.int -> !torch.int
    %53426 = torch.prim.ListConstruct %53425 : (!torch.int) -> !torch.list<int>
    %53427 = torch.aten.view %53405, %53426 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53427, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_50874 = torch.constant.int 4
    %53428 = torch.aten.mul.int %int4_50874, %3095 : !torch.int, !torch.int -> !torch.int
    %53429 = torch.prim.ListConstruct %53428 : (!torch.int) -> !torch.list<int>
    %53430 = torch.aten.view %53406, %53429 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53430, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %53431 = torch.prim.ListConstruct %53337, %53409 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50875 = torch.constant.int 0
    %53432 = torch.aten.cat %53431, %int0_50875 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53432, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53433 = torch.prim.ListConstruct %53340, %53412 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50876 = torch.constant.int 0
    %53434 = torch.aten.cat %53433, %int0_50876 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53434, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53435 = torch.prim.ListConstruct %53343, %53415 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50877 = torch.constant.int 0
    %53436 = torch.aten.cat %53435, %int0_50877 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53436, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53437 = torch.prim.ListConstruct %53346, %53418 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50878 = torch.constant.int 0
    %53438 = torch.aten.cat %53437, %int0_50878 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53438, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53439 = torch.prim.ListConstruct %53349, %53421 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50879 = torch.constant.int 0
    %53440 = torch.aten.cat %53439, %int0_50879 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53440, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53441 = torch.prim.ListConstruct %53352, %53424 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50880 = torch.constant.int 0
    %53442 = torch.aten.cat %53441, %int0_50880 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53442, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53443 = torch.prim.ListConstruct %53355, %53427 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50881 = torch.constant.int 0
    %53444 = torch.aten.cat %53443, %int0_50881 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53444, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53445 = torch.prim.ListConstruct %53358, %53430 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_50882 = torch.constant.int 0
    %53446 = torch.aten.cat %53445, %int0_50882 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %53446, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %53447 = torch.prim.ListConstruct %53313, %53377 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50883 = torch.constant.int 0
    %53448 = torch.aten.cat %53447, %int0_50883 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53448, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53449 = torch.prim.ListConstruct %53316, %53380 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50884 = torch.constant.int 0
    %53450 = torch.aten.cat %53449, %int0_50884 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53450, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53451 = torch.prim.ListConstruct %53319, %53383 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50885 = torch.constant.int 0
    %53452 = torch.aten.cat %53451, %int0_50885 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53452, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53453 = torch.prim.ListConstruct %53322, %53386 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50886 = torch.constant.int 0
    %53454 = torch.aten.cat %53453, %int0_50886 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53454, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53455 = torch.prim.ListConstruct %53325, %53389 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50887 = torch.constant.int 0
    %53456 = torch.aten.cat %53455, %int0_50887 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53456, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53457 = torch.prim.ListConstruct %53328, %53392 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50888 = torch.constant.int 0
    %53458 = torch.aten.cat %53457, %int0_50888 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53458, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53459 = torch.prim.ListConstruct %53331, %53395 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50889 = torch.constant.int 0
    %53460 = torch.aten.cat %53459, %int0_50889 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53460, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53461 = torch.prim.ListConstruct %53334, %53398 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_50890 = torch.constant.int 0
    %53462 = torch.aten.cat %53461, %int0_50890 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53462, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50891 = torch.constant.int 32
    %int2_50892 = torch.constant.int 2
    %int16_50893 = torch.constant.int 16
    %int1_50894 = torch.constant.int 1
    %int128_50895 = torch.constant.int 128
    %53463 = torch.prim.ListConstruct %3023, %int32_50891, %int2_50892, %int16_50893, %int1_50894, %int128_50895 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53464 = torch.aten.view %51613, %53463 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53464, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50896 = torch.constant.int 32
    %53465 = torch.aten.mul.int %3023, %int32_50896 : !torch.int, !torch.int -> !torch.int
    %int2_50897 = torch.constant.int 2
    %53466 = torch.aten.mul.int %53465, %int2_50897 : !torch.int, !torch.int -> !torch.int
    %int16_50898 = torch.constant.int 16
    %int1_50899 = torch.constant.int 1
    %int128_50900 = torch.constant.int 128
    %53467 = torch.prim.ListConstruct %53466, %int16_50898, %int1_50899, %int128_50900 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53468 = torch.aten.view %53464, %53467 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53468, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53469 = torch.prim.ListConstruct %53432 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50901 = torch.constant.bool false
    %53470 = torch.aten.index_put %53468, %53469, %53448, %false_50901 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53470, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50902 = torch.constant.int 32
    %int2_50903 = torch.constant.int 2
    %int16_50904 = torch.constant.int 16
    %int1_50905 = torch.constant.int 1
    %int128_50906 = torch.constant.int 128
    %53471 = torch.prim.ListConstruct %3023, %int32_50902, %int2_50903, %int16_50904, %int1_50905, %int128_50906 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53472 = torch.aten.view %53470, %53471 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53472, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50907 = torch.constant.int 131072
    %53473 = torch.prim.ListConstruct %3023, %int131072_50907 : (!torch.int, !torch.int) -> !torch.list<int>
    %53474 = torch.aten.view %53472, %53473 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53474, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50908 = torch.constant.int 32
    %int2_50909 = torch.constant.int 2
    %int16_50910 = torch.constant.int 16
    %int1_50911 = torch.constant.int 1
    %int128_50912 = torch.constant.int 128
    %53475 = torch.prim.ListConstruct %3026, %int32_50908, %int2_50909, %int16_50910, %int1_50911, %int128_50912 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53476 = torch.aten.view %51625, %53475 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53476, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50913 = torch.constant.int 32
    %53477 = torch.aten.mul.int %3026, %int32_50913 : !torch.int, !torch.int -> !torch.int
    %int2_50914 = torch.constant.int 2
    %53478 = torch.aten.mul.int %53477, %int2_50914 : !torch.int, !torch.int -> !torch.int
    %int16_50915 = torch.constant.int 16
    %int1_50916 = torch.constant.int 1
    %int128_50917 = torch.constant.int 128
    %53479 = torch.prim.ListConstruct %53478, %int16_50915, %int1_50916, %int128_50917 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53480 = torch.aten.view %53476, %53479 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53480, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53481 = torch.prim.ListConstruct %53434 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50918 = torch.constant.bool false
    %53482 = torch.aten.index_put %53480, %53481, %53450, %false_50918 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53482, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50919 = torch.constant.int 32
    %int2_50920 = torch.constant.int 2
    %int16_50921 = torch.constant.int 16
    %int1_50922 = torch.constant.int 1
    %int128_50923 = torch.constant.int 128
    %53483 = torch.prim.ListConstruct %3026, %int32_50919, %int2_50920, %int16_50921, %int1_50922, %int128_50923 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53484 = torch.aten.view %53482, %53483 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53484, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50924 = torch.constant.int 131072
    %53485 = torch.prim.ListConstruct %3026, %int131072_50924 : (!torch.int, !torch.int) -> !torch.list<int>
    %53486 = torch.aten.view %53484, %53485 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53486, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50925 = torch.constant.int 32
    %int2_50926 = torch.constant.int 2
    %int16_50927 = torch.constant.int 16
    %int1_50928 = torch.constant.int 1
    %int128_50929 = torch.constant.int 128
    %53487 = torch.prim.ListConstruct %3029, %int32_50925, %int2_50926, %int16_50927, %int1_50928, %int128_50929 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53488 = torch.aten.view %51637, %53487 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53488, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50930 = torch.constant.int 32
    %53489 = torch.aten.mul.int %3029, %int32_50930 : !torch.int, !torch.int -> !torch.int
    %int2_50931 = torch.constant.int 2
    %53490 = torch.aten.mul.int %53489, %int2_50931 : !torch.int, !torch.int -> !torch.int
    %int16_50932 = torch.constant.int 16
    %int1_50933 = torch.constant.int 1
    %int128_50934 = torch.constant.int 128
    %53491 = torch.prim.ListConstruct %53490, %int16_50932, %int1_50933, %int128_50934 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53492 = torch.aten.view %53488, %53491 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53492, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53493 = torch.prim.ListConstruct %53436 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50935 = torch.constant.bool false
    %53494 = torch.aten.index_put %53492, %53493, %53452, %false_50935 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53494, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50936 = torch.constant.int 32
    %int2_50937 = torch.constant.int 2
    %int16_50938 = torch.constant.int 16
    %int1_50939 = torch.constant.int 1
    %int128_50940 = torch.constant.int 128
    %53495 = torch.prim.ListConstruct %3029, %int32_50936, %int2_50937, %int16_50938, %int1_50939, %int128_50940 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53496 = torch.aten.view %53494, %53495 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53496, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50941 = torch.constant.int 131072
    %53497 = torch.prim.ListConstruct %3029, %int131072_50941 : (!torch.int, !torch.int) -> !torch.list<int>
    %53498 = torch.aten.view %53496, %53497 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53498, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50942 = torch.constant.int 32
    %int2_50943 = torch.constant.int 2
    %int16_50944 = torch.constant.int 16
    %int1_50945 = torch.constant.int 1
    %int128_50946 = torch.constant.int 128
    %53499 = torch.prim.ListConstruct %3032, %int32_50942, %int2_50943, %int16_50944, %int1_50945, %int128_50946 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53500 = torch.aten.view %51649, %53499 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53500, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50947 = torch.constant.int 32
    %53501 = torch.aten.mul.int %3032, %int32_50947 : !torch.int, !torch.int -> !torch.int
    %int2_50948 = torch.constant.int 2
    %53502 = torch.aten.mul.int %53501, %int2_50948 : !torch.int, !torch.int -> !torch.int
    %int16_50949 = torch.constant.int 16
    %int1_50950 = torch.constant.int 1
    %int128_50951 = torch.constant.int 128
    %53503 = torch.prim.ListConstruct %53502, %int16_50949, %int1_50950, %int128_50951 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53504 = torch.aten.view %53500, %53503 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53504, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53505 = torch.prim.ListConstruct %53438 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50952 = torch.constant.bool false
    %53506 = torch.aten.index_put %53504, %53505, %53454, %false_50952 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53506, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50953 = torch.constant.int 32
    %int2_50954 = torch.constant.int 2
    %int16_50955 = torch.constant.int 16
    %int1_50956 = torch.constant.int 1
    %int128_50957 = torch.constant.int 128
    %53507 = torch.prim.ListConstruct %3032, %int32_50953, %int2_50954, %int16_50955, %int1_50956, %int128_50957 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53508 = torch.aten.view %53506, %53507 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53508, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50958 = torch.constant.int 131072
    %53509 = torch.prim.ListConstruct %3032, %int131072_50958 : (!torch.int, !torch.int) -> !torch.list<int>
    %53510 = torch.aten.view %53508, %53509 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53510, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50959 = torch.constant.int 32
    %int2_50960 = torch.constant.int 2
    %int16_50961 = torch.constant.int 16
    %int1_50962 = torch.constant.int 1
    %int128_50963 = torch.constant.int 128
    %53511 = torch.prim.ListConstruct %3035, %int32_50959, %int2_50960, %int16_50961, %int1_50962, %int128_50963 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53512 = torch.aten.view %51661, %53511 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53512, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50964 = torch.constant.int 32
    %53513 = torch.aten.mul.int %3035, %int32_50964 : !torch.int, !torch.int -> !torch.int
    %int2_50965 = torch.constant.int 2
    %53514 = torch.aten.mul.int %53513, %int2_50965 : !torch.int, !torch.int -> !torch.int
    %int16_50966 = torch.constant.int 16
    %int1_50967 = torch.constant.int 1
    %int128_50968 = torch.constant.int 128
    %53515 = torch.prim.ListConstruct %53514, %int16_50966, %int1_50967, %int128_50968 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53516 = torch.aten.view %53512, %53515 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53516, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53517 = torch.prim.ListConstruct %53440 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50969 = torch.constant.bool false
    %53518 = torch.aten.index_put %53516, %53517, %53456, %false_50969 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53518, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50970 = torch.constant.int 32
    %int2_50971 = torch.constant.int 2
    %int16_50972 = torch.constant.int 16
    %int1_50973 = torch.constant.int 1
    %int128_50974 = torch.constant.int 128
    %53519 = torch.prim.ListConstruct %3035, %int32_50970, %int2_50971, %int16_50972, %int1_50973, %int128_50974 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53520 = torch.aten.view %53518, %53519 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53520, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50975 = torch.constant.int 131072
    %53521 = torch.prim.ListConstruct %3035, %int131072_50975 : (!torch.int, !torch.int) -> !torch.list<int>
    %53522 = torch.aten.view %53520, %53521 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53522, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50976 = torch.constant.int 32
    %int2_50977 = torch.constant.int 2
    %int16_50978 = torch.constant.int 16
    %int1_50979 = torch.constant.int 1
    %int128_50980 = torch.constant.int 128
    %53523 = torch.prim.ListConstruct %3038, %int32_50976, %int2_50977, %int16_50978, %int1_50979, %int128_50980 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53524 = torch.aten.view %51673, %53523 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53524, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50981 = torch.constant.int 32
    %53525 = torch.aten.mul.int %3038, %int32_50981 : !torch.int, !torch.int -> !torch.int
    %int2_50982 = torch.constant.int 2
    %53526 = torch.aten.mul.int %53525, %int2_50982 : !torch.int, !torch.int -> !torch.int
    %int16_50983 = torch.constant.int 16
    %int1_50984 = torch.constant.int 1
    %int128_50985 = torch.constant.int 128
    %53527 = torch.prim.ListConstruct %53526, %int16_50983, %int1_50984, %int128_50985 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53528 = torch.aten.view %53524, %53527 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53528, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53529 = torch.prim.ListConstruct %53442 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_50986 = torch.constant.bool false
    %53530 = torch.aten.index_put %53528, %53529, %53458, %false_50986 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53530, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_50987 = torch.constant.int 32
    %int2_50988 = torch.constant.int 2
    %int16_50989 = torch.constant.int 16
    %int1_50990 = torch.constant.int 1
    %int128_50991 = torch.constant.int 128
    %53531 = torch.prim.ListConstruct %3038, %int32_50987, %int2_50988, %int16_50989, %int1_50990, %int128_50991 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53532 = torch.aten.view %53530, %53531 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53532, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_50992 = torch.constant.int 131072
    %53533 = torch.prim.ListConstruct %3038, %int131072_50992 : (!torch.int, !torch.int) -> !torch.list<int>
    %53534 = torch.aten.view %53532, %53533 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53534, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_50993 = torch.constant.int 32
    %int2_50994 = torch.constant.int 2
    %int16_50995 = torch.constant.int 16
    %int1_50996 = torch.constant.int 1
    %int128_50997 = torch.constant.int 128
    %53535 = torch.prim.ListConstruct %3041, %int32_50993, %int2_50994, %int16_50995, %int1_50996, %int128_50997 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53536 = torch.aten.view %51685, %53535 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53536, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_50998 = torch.constant.int 32
    %53537 = torch.aten.mul.int %3041, %int32_50998 : !torch.int, !torch.int -> !torch.int
    %int2_50999 = torch.constant.int 2
    %53538 = torch.aten.mul.int %53537, %int2_50999 : !torch.int, !torch.int -> !torch.int
    %int16_51000 = torch.constant.int 16
    %int1_51001 = torch.constant.int 1
    %int128_51002 = torch.constant.int 128
    %53539 = torch.prim.ListConstruct %53538, %int16_51000, %int1_51001, %int128_51002 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53540 = torch.aten.view %53536, %53539 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53540, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53541 = torch.prim.ListConstruct %53444 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_51003 = torch.constant.bool false
    %53542 = torch.aten.index_put %53540, %53541, %53460, %false_51003 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53542, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_51004 = torch.constant.int 32
    %int2_51005 = torch.constant.int 2
    %int16_51006 = torch.constant.int 16
    %int1_51007 = torch.constant.int 1
    %int128_51008 = torch.constant.int 128
    %53543 = torch.prim.ListConstruct %3041, %int32_51004, %int2_51005, %int16_51006, %int1_51007, %int128_51008 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53544 = torch.aten.view %53542, %53543 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53544, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_51009 = torch.constant.int 131072
    %53545 = torch.prim.ListConstruct %3041, %int131072_51009 : (!torch.int, !torch.int) -> !torch.list<int>
    %53546 = torch.aten.view %53544, %53545 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53546, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_51010 = torch.constant.int 32
    %int2_51011 = torch.constant.int 2
    %int16_51012 = torch.constant.int 16
    %int1_51013 = torch.constant.int 1
    %int128_51014 = torch.constant.int 128
    %53547 = torch.prim.ListConstruct %3044, %int32_51010, %int2_51011, %int16_51012, %int1_51013, %int128_51014 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53548 = torch.aten.view %51697, %53547 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53548, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_51015 = torch.constant.int 32
    %53549 = torch.aten.mul.int %3044, %int32_51015 : !torch.int, !torch.int -> !torch.int
    %int2_51016 = torch.constant.int 2
    %53550 = torch.aten.mul.int %53549, %int2_51016 : !torch.int, !torch.int -> !torch.int
    %int16_51017 = torch.constant.int 16
    %int1_51018 = torch.constant.int 1
    %int128_51019 = torch.constant.int 128
    %53551 = torch.prim.ListConstruct %53550, %int16_51017, %int1_51018, %int128_51019 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53552 = torch.aten.view %53548, %53551 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53552, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %53553 = torch.prim.ListConstruct %53446 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_51020 = torch.constant.bool false
    %53554 = torch.aten.index_put %53552, %53553, %53462, %false_51020 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %53554, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_51021 = torch.constant.int 32
    %int2_51022 = torch.constant.int 2
    %int16_51023 = torch.constant.int 16
    %int1_51024 = torch.constant.int 1
    %int128_51025 = torch.constant.int 128
    %53555 = torch.prim.ListConstruct %3044, %int32_51021, %int2_51022, %int16_51023, %int1_51024, %int128_51025 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53556 = torch.aten.view %53554, %53555 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %53556, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_51026 = torch.constant.int 131072
    %53557 = torch.prim.ListConstruct %3044, %int131072_51026 : (!torch.int, !torch.int) -> !torch.list<int>
    %53558 = torch.aten.view %53556, %53557 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %53558, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_51027 = torch.constant.int -2
    %53559 = torch.aten.unsqueeze %53173, %int-2_51027 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51028 = torch.constant.int -2
    %53560 = torch.aten.unsqueeze %53188, %int-2_51028 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51029 = torch.constant.int -2
    %53561 = torch.aten.unsqueeze %53203, %int-2_51029 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51030 = torch.constant.int -2
    %53562 = torch.aten.unsqueeze %53218, %int-2_51030 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51031 = torch.constant.int -2
    %53563 = torch.aten.unsqueeze %53233, %int-2_51031 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51032 = torch.constant.int -2
    %53564 = torch.aten.unsqueeze %53248, %int-2_51032 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51033 = torch.constant.int -2
    %53565 = torch.aten.unsqueeze %53263, %int-2_51033 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51034 = torch.constant.int -2
    %53566 = torch.aten.unsqueeze %53278, %int-2_51034 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_51035 = torch.constant.int 4
    %int1_51036 = torch.constant.int 1
    %int4_51037 = torch.constant.int 4
    %int128_51038 = torch.constant.int 128
    %53567 = torch.prim.ListConstruct %int4_51035, %53159, %int1_51036, %int4_51037, %int128_51038 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51039 = torch.constant.bool false
    %53568 = torch.aten.expand %53559, %53567, %false_51039 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51040 = torch.constant.int 4
    %int1_51041 = torch.constant.int 1
    %int4_51042 = torch.constant.int 4
    %int128_51043 = torch.constant.int 128
    %53569 = torch.prim.ListConstruct %int4_51040, %53159, %int1_51041, %int4_51042, %int128_51043 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51044 = torch.constant.bool false
    %53570 = torch.aten.expand %53560, %53569, %false_51044 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51045 = torch.constant.int 4
    %int1_51046 = torch.constant.int 1
    %int4_51047 = torch.constant.int 4
    %int128_51048 = torch.constant.int 128
    %53571 = torch.prim.ListConstruct %int4_51045, %53159, %int1_51046, %int4_51047, %int128_51048 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51049 = torch.constant.bool false
    %53572 = torch.aten.expand %53561, %53571, %false_51049 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51050 = torch.constant.int 4
    %int1_51051 = torch.constant.int 1
    %int4_51052 = torch.constant.int 4
    %int128_51053 = torch.constant.int 128
    %53573 = torch.prim.ListConstruct %int4_51050, %53159, %int1_51051, %int4_51052, %int128_51053 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51054 = torch.constant.bool false
    %53574 = torch.aten.expand %53562, %53573, %false_51054 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51055 = torch.constant.int 4
    %int1_51056 = torch.constant.int 1
    %int4_51057 = torch.constant.int 4
    %int128_51058 = torch.constant.int 128
    %53575 = torch.prim.ListConstruct %int4_51055, %53159, %int1_51056, %int4_51057, %int128_51058 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51059 = torch.constant.bool false
    %53576 = torch.aten.expand %53563, %53575, %false_51059 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51060 = torch.constant.int 4
    %int1_51061 = torch.constant.int 1
    %int4_51062 = torch.constant.int 4
    %int128_51063 = torch.constant.int 128
    %53577 = torch.prim.ListConstruct %int4_51060, %53159, %int1_51061, %int4_51062, %int128_51063 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51064 = torch.constant.bool false
    %53578 = torch.aten.expand %53564, %53577, %false_51064 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51065 = torch.constant.int 4
    %int1_51066 = torch.constant.int 1
    %int4_51067 = torch.constant.int 4
    %int128_51068 = torch.constant.int 128
    %53579 = torch.prim.ListConstruct %int4_51065, %53159, %int1_51066, %int4_51067, %int128_51068 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51069 = torch.constant.bool false
    %53580 = torch.aten.expand %53565, %53579, %false_51069 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51070 = torch.constant.int 4
    %int1_51071 = torch.constant.int 1
    %int4_51072 = torch.constant.int 4
    %int128_51073 = torch.constant.int 128
    %53581 = torch.prim.ListConstruct %int4_51070, %53159, %int1_51071, %int4_51072, %int128_51073 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51074 = torch.constant.bool false
    %53582 = torch.aten.expand %53566, %53581, %false_51074 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51075 = torch.constant.int 4
    %int4_51076 = torch.constant.int 4
    %int128_51077 = torch.constant.int 128
    %53583 = torch.prim.ListConstruct %int4_51075, %53159, %int4_51076, %int128_51077 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53584 = torch.aten.view %53568, %53583 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51078 = torch.constant.int 4
    %int4_51079 = torch.constant.int 4
    %int128_51080 = torch.constant.int 128
    %53585 = torch.prim.ListConstruct %int4_51078, %53159, %int4_51079, %int128_51080 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53586 = torch.aten.view %53570, %53585 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51081 = torch.constant.int 4
    %int4_51082 = torch.constant.int 4
    %int128_51083 = torch.constant.int 128
    %53587 = torch.prim.ListConstruct %int4_51081, %53159, %int4_51082, %int128_51083 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53588 = torch.aten.view %53572, %53587 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51084 = torch.constant.int 4
    %int4_51085 = torch.constant.int 4
    %int128_51086 = torch.constant.int 128
    %53589 = torch.prim.ListConstruct %int4_51084, %53159, %int4_51085, %int128_51086 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53590 = torch.aten.view %53574, %53589 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51087 = torch.constant.int 4
    %int4_51088 = torch.constant.int 4
    %int128_51089 = torch.constant.int 128
    %53591 = torch.prim.ListConstruct %int4_51087, %53159, %int4_51088, %int128_51089 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53592 = torch.aten.view %53576, %53591 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51090 = torch.constant.int 4
    %int4_51091 = torch.constant.int 4
    %int128_51092 = torch.constant.int 128
    %53593 = torch.prim.ListConstruct %int4_51090, %53159, %int4_51091, %int128_51092 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53594 = torch.aten.view %53578, %53593 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51093 = torch.constant.int 4
    %int4_51094 = torch.constant.int 4
    %int128_51095 = torch.constant.int 128
    %53595 = torch.prim.ListConstruct %int4_51093, %53159, %int4_51094, %int128_51095 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53596 = torch.aten.view %53580, %53595 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51096 = torch.constant.int 4
    %int4_51097 = torch.constant.int 4
    %int128_51098 = torch.constant.int 128
    %53597 = torch.prim.ListConstruct %int4_51096, %53159, %int4_51097, %int128_51098 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53598 = torch.aten.view %53582, %53597 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_51099 = torch.constant.int -2
    %53599 = torch.aten.unsqueeze %52948, %int-2_51099 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51100 = torch.constant.int -2
    %53600 = torch.aten.unsqueeze %52950, %int-2_51100 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51101 = torch.constant.int -2
    %53601 = torch.aten.unsqueeze %52952, %int-2_51101 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51102 = torch.constant.int -2
    %53602 = torch.aten.unsqueeze %52954, %int-2_51102 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51103 = torch.constant.int -2
    %53603 = torch.aten.unsqueeze %52956, %int-2_51103 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53603, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51104 = torch.constant.int -2
    %53604 = torch.aten.unsqueeze %52958, %int-2_51104 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51105 = torch.constant.int -2
    %53605 = torch.aten.unsqueeze %52960, %int-2_51105 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_51106 = torch.constant.int -2
    %53606 = torch.aten.unsqueeze %52962, %int-2_51106 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %53606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_51107 = torch.constant.int 1
    %53607 = torch.aten.size.int %52872, %int1_51107 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_51108 = torch.constant.int 4
    %int1_51109 = torch.constant.int 1
    %int4_51110 = torch.constant.int 4
    %int128_51111 = torch.constant.int 128
    %53608 = torch.prim.ListConstruct %int4_51108, %53607, %int1_51109, %int4_51110, %int128_51111 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51112 = torch.constant.bool false
    %53609 = torch.aten.expand %53599, %53608, %false_51112 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51113 = torch.constant.int 4
    %int1_51114 = torch.constant.int 1
    %int4_51115 = torch.constant.int 4
    %int128_51116 = torch.constant.int 128
    %53610 = torch.prim.ListConstruct %int4_51113, %53607, %int1_51114, %int4_51115, %int128_51116 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51117 = torch.constant.bool false
    %53611 = torch.aten.expand %53600, %53610, %false_51117 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51118 = torch.constant.int 4
    %int1_51119 = torch.constant.int 1
    %int4_51120 = torch.constant.int 4
    %int128_51121 = torch.constant.int 128
    %53612 = torch.prim.ListConstruct %int4_51118, %53607, %int1_51119, %int4_51120, %int128_51121 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51122 = torch.constant.bool false
    %53613 = torch.aten.expand %53601, %53612, %false_51122 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51123 = torch.constant.int 4
    %int1_51124 = torch.constant.int 1
    %int4_51125 = torch.constant.int 4
    %int128_51126 = torch.constant.int 128
    %53614 = torch.prim.ListConstruct %int4_51123, %53607, %int1_51124, %int4_51125, %int128_51126 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51127 = torch.constant.bool false
    %53615 = torch.aten.expand %53602, %53614, %false_51127 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51128 = torch.constant.int 4
    %int1_51129 = torch.constant.int 1
    %int4_51130 = torch.constant.int 4
    %int128_51131 = torch.constant.int 128
    %53616 = torch.prim.ListConstruct %int4_51128, %53607, %int1_51129, %int4_51130, %int128_51131 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51132 = torch.constant.bool false
    %53617 = torch.aten.expand %53603, %53616, %false_51132 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51133 = torch.constant.int 4
    %int1_51134 = torch.constant.int 1
    %int4_51135 = torch.constant.int 4
    %int128_51136 = torch.constant.int 128
    %53618 = torch.prim.ListConstruct %int4_51133, %53607, %int1_51134, %int4_51135, %int128_51136 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51137 = torch.constant.bool false
    %53619 = torch.aten.expand %53604, %53618, %false_51137 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51138 = torch.constant.int 4
    %int1_51139 = torch.constant.int 1
    %int4_51140 = torch.constant.int 4
    %int128_51141 = torch.constant.int 128
    %53620 = torch.prim.ListConstruct %int4_51138, %53607, %int1_51139, %int4_51140, %int128_51141 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51142 = torch.constant.bool false
    %53621 = torch.aten.expand %53605, %53620, %false_51142 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51143 = torch.constant.int 4
    %int1_51144 = torch.constant.int 1
    %int4_51145 = torch.constant.int 4
    %int128_51146 = torch.constant.int 128
    %53622 = torch.prim.ListConstruct %int4_51143, %53607, %int1_51144, %int4_51145, %int128_51146 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_51147 = torch.constant.bool false
    %53623 = torch.aten.expand %53606, %53622, %false_51147 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %53623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_51148 = torch.constant.int 4
    %int4_51149 = torch.constant.int 4
    %int128_51150 = torch.constant.int 128
    %53624 = torch.prim.ListConstruct %int4_51148, %53607, %int4_51149, %int128_51150 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53625 = torch.aten.view %53609, %53624 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51151 = torch.constant.int 4
    %int4_51152 = torch.constant.int 4
    %int128_51153 = torch.constant.int 128
    %53626 = torch.prim.ListConstruct %int4_51151, %53607, %int4_51152, %int128_51153 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53627 = torch.aten.view %53611, %53626 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51154 = torch.constant.int 4
    %int4_51155 = torch.constant.int 4
    %int128_51156 = torch.constant.int 128
    %53628 = torch.prim.ListConstruct %int4_51154, %53607, %int4_51155, %int128_51156 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53629 = torch.aten.view %53613, %53628 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51157 = torch.constant.int 4
    %int4_51158 = torch.constant.int 4
    %int128_51159 = torch.constant.int 128
    %53630 = torch.prim.ListConstruct %int4_51157, %53607, %int4_51158, %int128_51159 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53631 = torch.aten.view %53615, %53630 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51160 = torch.constant.int 4
    %int4_51161 = torch.constant.int 4
    %int128_51162 = torch.constant.int 128
    %53632 = torch.prim.ListConstruct %int4_51160, %53607, %int4_51161, %int128_51162 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53633 = torch.aten.view %53617, %53632 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51163 = torch.constant.int 4
    %int4_51164 = torch.constant.int 4
    %int128_51165 = torch.constant.int 128
    %53634 = torch.prim.ListConstruct %int4_51163, %53607, %int4_51164, %int128_51165 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53635 = torch.aten.view %53619, %53634 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51166 = torch.constant.int 4
    %int4_51167 = torch.constant.int 4
    %int128_51168 = torch.constant.int 128
    %53636 = torch.prim.ListConstruct %int4_51166, %53607, %int4_51167, %int128_51168 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53637 = torch.aten.view %53621, %53636 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51169 = torch.constant.int 4
    %int4_51170 = torch.constant.int 4
    %int128_51171 = torch.constant.int 128
    %53638 = torch.prim.ListConstruct %int4_51169, %53607, %int4_51170, %int128_51171 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53639 = torch.aten.view %53623, %53638 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51172 = torch.constant.int 1
    %int2_51173 = torch.constant.int 2
    %53640 = torch.aten.transpose.int %53015, %int1_51172, %int2_51173 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53640, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51174 = torch.constant.int 1
    %int2_51175 = torch.constant.int 2
    %53641 = torch.aten.transpose.int %53030, %int1_51174, %int2_51175 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53641, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51176 = torch.constant.int 1
    %int2_51177 = torch.constant.int 2
    %53642 = torch.aten.transpose.int %53045, %int1_51176, %int2_51177 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53642, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51178 = torch.constant.int 1
    %int2_51179 = torch.constant.int 2
    %53643 = torch.aten.transpose.int %53060, %int1_51178, %int2_51179 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53643, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51180 = torch.constant.int 1
    %int2_51181 = torch.constant.int 2
    %53644 = torch.aten.transpose.int %53075, %int1_51180, %int2_51181 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53644, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51182 = torch.constant.int 1
    %int2_51183 = torch.constant.int 2
    %53645 = torch.aten.transpose.int %53090, %int1_51182, %int2_51183 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53645, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51184 = torch.constant.int 1
    %int2_51185 = torch.constant.int 2
    %53646 = torch.aten.transpose.int %53105, %int1_51184, %int2_51185 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53646, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51186 = torch.constant.int 1
    %int2_51187 = torch.constant.int 2
    %53647 = torch.aten.transpose.int %53120, %int1_51186, %int2_51187 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53647, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51188 = torch.constant.int 1
    %int2_51189 = torch.constant.int 2
    %53648 = torch.aten.transpose.int %53584, %int1_51188, %int2_51189 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53648, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51190 = torch.constant.int 1
    %int2_51191 = torch.constant.int 2
    %53649 = torch.aten.transpose.int %53586, %int1_51190, %int2_51191 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53649, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51192 = torch.constant.int 1
    %int2_51193 = torch.constant.int 2
    %53650 = torch.aten.transpose.int %53588, %int1_51192, %int2_51193 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53650, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51194 = torch.constant.int 1
    %int2_51195 = torch.constant.int 2
    %53651 = torch.aten.transpose.int %53590, %int1_51194, %int2_51195 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53651, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51196 = torch.constant.int 1
    %int2_51197 = torch.constant.int 2
    %53652 = torch.aten.transpose.int %53592, %int1_51196, %int2_51197 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53652, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51198 = torch.constant.int 1
    %int2_51199 = torch.constant.int 2
    %53653 = torch.aten.transpose.int %53594, %int1_51198, %int2_51199 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53653, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51200 = torch.constant.int 1
    %int2_51201 = torch.constant.int 2
    %53654 = torch.aten.transpose.int %53596, %int1_51200, %int2_51201 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53654, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51202 = torch.constant.int 1
    %int2_51203 = torch.constant.int 2
    %53655 = torch.aten.transpose.int %53598, %int1_51202, %int2_51203 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53655, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51204 = torch.constant.int 1
    %int2_51205 = torch.constant.int 2
    %53656 = torch.aten.transpose.int %53625, %int1_51204, %int2_51205 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53656, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51206 = torch.constant.int 1
    %int2_51207 = torch.constant.int 2
    %53657 = torch.aten.transpose.int %53627, %int1_51206, %int2_51207 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53657, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51208 = torch.constant.int 1
    %int2_51209 = torch.constant.int 2
    %53658 = torch.aten.transpose.int %53629, %int1_51208, %int2_51209 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53658, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51210 = torch.constant.int 1
    %int2_51211 = torch.constant.int 2
    %53659 = torch.aten.transpose.int %53631, %int1_51210, %int2_51211 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53659, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51212 = torch.constant.int 1
    %int2_51213 = torch.constant.int 2
    %53660 = torch.aten.transpose.int %53633, %int1_51212, %int2_51213 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53660, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51214 = torch.constant.int 1
    %int2_51215 = torch.constant.int 2
    %53661 = torch.aten.transpose.int %53635, %int1_51214, %int2_51215 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53661, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51216 = torch.constant.int 1
    %int2_51217 = torch.constant.int 2
    %53662 = torch.aten.transpose.int %53637, %int1_51216, %int2_51217 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53662, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51218 = torch.constant.int 1
    %int2_51219 = torch.constant.int 2
    %53663 = torch.aten.transpose.int %53639, %int1_51218, %int2_51219 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %53663, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51220 = torch.constant.float 0.000000e+00
    %true_51221 = torch.constant.bool true
    %none_51222 = torch.constant.none
    %none_51223 = torch.constant.none
    %53664:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53640, %53648, %53656, %float0.000000e00_51220, %true_51221, %none_51222, %none_51223) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53664#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51224 = torch.constant.float 0.000000e+00
    %true_51225 = torch.constant.bool true
    %none_51226 = torch.constant.none
    %none_51227 = torch.constant.none
    %53665:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53641, %53649, %53657, %float0.000000e00_51224, %true_51225, %none_51226, %none_51227) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53665#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51228 = torch.constant.float 0.000000e+00
    %true_51229 = torch.constant.bool true
    %none_51230 = torch.constant.none
    %none_51231 = torch.constant.none
    %53666:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53642, %53650, %53658, %float0.000000e00_51228, %true_51229, %none_51230, %none_51231) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53666#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51232 = torch.constant.float 0.000000e+00
    %true_51233 = torch.constant.bool true
    %none_51234 = torch.constant.none
    %none_51235 = torch.constant.none
    %53667:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53643, %53651, %53659, %float0.000000e00_51232, %true_51233, %none_51234, %none_51235) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53667#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51236 = torch.constant.float 0.000000e+00
    %true_51237 = torch.constant.bool true
    %none_51238 = torch.constant.none
    %none_51239 = torch.constant.none
    %53668:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53644, %53652, %53660, %float0.000000e00_51236, %true_51237, %none_51238, %none_51239) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53668#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51240 = torch.constant.float 0.000000e+00
    %true_51241 = torch.constant.bool true
    %none_51242 = torch.constant.none
    %none_51243 = torch.constant.none
    %53669:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53645, %53653, %53661, %float0.000000e00_51240, %true_51241, %none_51242, %none_51243) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53669#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51244 = torch.constant.float 0.000000e+00
    %true_51245 = torch.constant.bool true
    %none_51246 = torch.constant.none
    %none_51247 = torch.constant.none
    %53670:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53646, %53654, %53662, %float0.000000e00_51244, %true_51245, %none_51246, %none_51247) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53670#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_51248 = torch.constant.float 0.000000e+00
    %true_51249 = torch.constant.bool true
    %none_51250 = torch.constant.none
    %none_51251 = torch.constant.none
    %53671:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%53647, %53655, %53663, %float0.000000e00_51248, %true_51249, %none_51250, %none_51251) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %53671#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_51252 = torch.constant.int 1
    %int2_51253 = torch.constant.int 2
    %53672 = torch.aten.transpose.int %53664#0, %int1_51252, %int2_51253 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51254 = torch.constant.int 1
    %int2_51255 = torch.constant.int 2
    %53673 = torch.aten.transpose.int %53665#0, %int1_51254, %int2_51255 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51256 = torch.constant.int 1
    %int2_51257 = torch.constant.int 2
    %53674 = torch.aten.transpose.int %53666#0, %int1_51256, %int2_51257 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51258 = torch.constant.int 1
    %int2_51259 = torch.constant.int 2
    %53675 = torch.aten.transpose.int %53667#0, %int1_51258, %int2_51259 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51260 = torch.constant.int 1
    %int2_51261 = torch.constant.int 2
    %53676 = torch.aten.transpose.int %53668#0, %int1_51260, %int2_51261 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51262 = torch.constant.int 1
    %int2_51263 = torch.constant.int 2
    %53677 = torch.aten.transpose.int %53669#0, %int1_51262, %int2_51263 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51264 = torch.constant.int 1
    %int2_51265 = torch.constant.int 2
    %53678 = torch.aten.transpose.int %53670#0, %int1_51264, %int2_51265 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_51266 = torch.constant.int 1
    %int2_51267 = torch.constant.int 2
    %53679 = torch.aten.transpose.int %53671#0, %int1_51266, %int2_51267 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %53679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_51268 = torch.constant.int 4
    %int512_51269 = torch.constant.int 512
    %53680 = torch.prim.ListConstruct %int4_51268, %53001, %int512_51269 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53681 = torch.aten.view %53672, %53680 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51270 = torch.constant.int 4
    %int512_51271 = torch.constant.int 512
    %53682 = torch.prim.ListConstruct %int4_51270, %53016, %int512_51271 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53683 = torch.aten.view %53673, %53682 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51272 = torch.constant.int 4
    %int512_51273 = torch.constant.int 512
    %53684 = torch.prim.ListConstruct %int4_51272, %53031, %int512_51273 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53685 = torch.aten.view %53674, %53684 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51274 = torch.constant.int 4
    %int512_51275 = torch.constant.int 512
    %53686 = torch.prim.ListConstruct %int4_51274, %53046, %int512_51275 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53687 = torch.aten.view %53675, %53686 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51276 = torch.constant.int 4
    %int512_51277 = torch.constant.int 512
    %53688 = torch.prim.ListConstruct %int4_51276, %53061, %int512_51277 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53689 = torch.aten.view %53676, %53688 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51278 = torch.constant.int 4
    %int512_51279 = torch.constant.int 512
    %53690 = torch.prim.ListConstruct %int4_51278, %53076, %int512_51279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53691 = torch.aten.view %53677, %53690 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51280 = torch.constant.int 4
    %int512_51281 = torch.constant.int 512
    %53692 = torch.prim.ListConstruct %int4_51280, %53091, %int512_51281 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53693 = torch.aten.view %53678, %53692 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51282 = torch.constant.int 4
    %int512_51283 = torch.constant.int 512
    %53694 = torch.prim.ListConstruct %int4_51282, %53106, %int512_51283 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53695 = torch.aten.view %53679, %53694 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %53695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_51284 = torch.constant.int 1
    %int0_51285 = torch.constant.int 0
    %53696 = torch.prim.ListConstruct %int1_51284, %int0_51285 : (!torch.int, !torch.int) -> !torch.list<int>
    %53697 = torch.aten.permute %1984, %53696 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51286 = torch.constant.int 1
    %int0_51287 = torch.constant.int 0
    %53698 = torch.prim.ListConstruct %int1_51286, %int0_51287 : (!torch.int, !torch.int) -> !torch.list<int>
    %53699 = torch.aten.permute %1985, %53698 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51288 = torch.constant.int 1
    %int0_51289 = torch.constant.int 0
    %53700 = torch.prim.ListConstruct %int1_51288, %int0_51289 : (!torch.int, !torch.int) -> !torch.list<int>
    %53701 = torch.aten.permute %1986, %53700 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51290 = torch.constant.int 1
    %int0_51291 = torch.constant.int 0
    %53702 = torch.prim.ListConstruct %int1_51290, %int0_51291 : (!torch.int, !torch.int) -> !torch.list<int>
    %53703 = torch.aten.permute %1987, %53702 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51292 = torch.constant.int 1
    %int0_51293 = torch.constant.int 0
    %53704 = torch.prim.ListConstruct %int1_51292, %int0_51293 : (!torch.int, !torch.int) -> !torch.list<int>
    %53705 = torch.aten.permute %1988, %53704 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51294 = torch.constant.int 1
    %int0_51295 = torch.constant.int 0
    %53706 = torch.prim.ListConstruct %int1_51294, %int0_51295 : (!torch.int, !torch.int) -> !torch.list<int>
    %53707 = torch.aten.permute %1989, %53706 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51296 = torch.constant.int 1
    %int0_51297 = torch.constant.int 0
    %53708 = torch.prim.ListConstruct %int1_51296, %int0_51297 : (!torch.int, !torch.int) -> !torch.list<int>
    %53709 = torch.aten.permute %1990, %53708 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_51298 = torch.constant.int 1
    %int0_51299 = torch.constant.int 0
    %53710 = torch.prim.ListConstruct %int1_51298, %int0_51299 : (!torch.int, !torch.int) -> !torch.list<int>
    %53711 = torch.aten.permute %1991, %53710 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_51300 = torch.constant.int 4
    %53712 = torch.aten.mul.int %int4_51300, %53001 : !torch.int, !torch.int -> !torch.int
    %int512_51301 = torch.constant.int 512
    %53713 = torch.prim.ListConstruct %53712, %int512_51301 : (!torch.int, !torch.int) -> !torch.list<int>
    %53714 = torch.aten.view %53681, %53713 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53714, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53715 = torch.aten.mm %53714, %53697 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53715, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51302 = torch.constant.int 4
    %int4096_51303 = torch.constant.int 4096
    %53716 = torch.prim.ListConstruct %int4_51302, %53001, %int4096_51303 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53717 = torch.aten.view %53715, %53716 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51304 = torch.constant.int 4
    %53718 = torch.aten.mul.int %int4_51304, %53016 : !torch.int, !torch.int -> !torch.int
    %int512_51305 = torch.constant.int 512
    %53719 = torch.prim.ListConstruct %53718, %int512_51305 : (!torch.int, !torch.int) -> !torch.list<int>
    %53720 = torch.aten.view %53683, %53719 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53720, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53721 = torch.aten.mm %53720, %53699 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53721, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51306 = torch.constant.int 4
    %int4096_51307 = torch.constant.int 4096
    %53722 = torch.prim.ListConstruct %int4_51306, %53016, %int4096_51307 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53723 = torch.aten.view %53721, %53722 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51308 = torch.constant.int 4
    %53724 = torch.aten.mul.int %int4_51308, %53031 : !torch.int, !torch.int -> !torch.int
    %int512_51309 = torch.constant.int 512
    %53725 = torch.prim.ListConstruct %53724, %int512_51309 : (!torch.int, !torch.int) -> !torch.list<int>
    %53726 = torch.aten.view %53685, %53725 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53726, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53727 = torch.aten.mm %53726, %53701 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53727, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51310 = torch.constant.int 4
    %int4096_51311 = torch.constant.int 4096
    %53728 = torch.prim.ListConstruct %int4_51310, %53031, %int4096_51311 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53729 = torch.aten.view %53727, %53728 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51312 = torch.constant.int 4
    %53730 = torch.aten.mul.int %int4_51312, %53046 : !torch.int, !torch.int -> !torch.int
    %int512_51313 = torch.constant.int 512
    %53731 = torch.prim.ListConstruct %53730, %int512_51313 : (!torch.int, !torch.int) -> !torch.list<int>
    %53732 = torch.aten.view %53687, %53731 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53732, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53733 = torch.aten.mm %53732, %53703 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53733, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51314 = torch.constant.int 4
    %int4096_51315 = torch.constant.int 4096
    %53734 = torch.prim.ListConstruct %int4_51314, %53046, %int4096_51315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53735 = torch.aten.view %53733, %53734 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51316 = torch.constant.int 4
    %53736 = torch.aten.mul.int %int4_51316, %53061 : !torch.int, !torch.int -> !torch.int
    %int512_51317 = torch.constant.int 512
    %53737 = torch.prim.ListConstruct %53736, %int512_51317 : (!torch.int, !torch.int) -> !torch.list<int>
    %53738 = torch.aten.view %53689, %53737 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53738, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53739 = torch.aten.mm %53738, %53705 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53739, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51318 = torch.constant.int 4
    %int4096_51319 = torch.constant.int 4096
    %53740 = torch.prim.ListConstruct %int4_51318, %53061, %int4096_51319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53741 = torch.aten.view %53739, %53740 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51320 = torch.constant.int 4
    %53742 = torch.aten.mul.int %int4_51320, %53076 : !torch.int, !torch.int -> !torch.int
    %int512_51321 = torch.constant.int 512
    %53743 = torch.prim.ListConstruct %53742, %int512_51321 : (!torch.int, !torch.int) -> !torch.list<int>
    %53744 = torch.aten.view %53691, %53743 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53744, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53745 = torch.aten.mm %53744, %53707 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53745, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51322 = torch.constant.int 4
    %int4096_51323 = torch.constant.int 4096
    %53746 = torch.prim.ListConstruct %int4_51322, %53076, %int4096_51323 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53747 = torch.aten.view %53745, %53746 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51324 = torch.constant.int 4
    %53748 = torch.aten.mul.int %int4_51324, %53091 : !torch.int, !torch.int -> !torch.int
    %int512_51325 = torch.constant.int 512
    %53749 = torch.prim.ListConstruct %53748, %int512_51325 : (!torch.int, !torch.int) -> !torch.list<int>
    %53750 = torch.aten.view %53693, %53749 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53750, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53751 = torch.aten.mm %53750, %53709 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53751, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51326 = torch.constant.int 4
    %int4096_51327 = torch.constant.int 4096
    %53752 = torch.prim.ListConstruct %int4_51326, %53091, %int4096_51327 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53753 = torch.aten.view %53751, %53752 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_51328 = torch.constant.int 4
    %53754 = torch.aten.mul.int %int4_51328, %53106 : !torch.int, !torch.int -> !torch.int
    %int512_51329 = torch.constant.int 512
    %53755 = torch.prim.ListConstruct %53754, %int512_51329 : (!torch.int, !torch.int) -> !torch.list<int>
    %53756 = torch.aten.view %53695, %53755 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %53756, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %53757 = torch.aten.mm %53756, %53711 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %53757, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51330 = torch.constant.int 4
    %int4096_51331 = torch.constant.int 4096
    %53758 = torch.prim.ListConstruct %int4_51330, %53106, %int4096_51331 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %53759 = torch.aten.view %53757, %53758 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53760 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51332 = arith.constant 1 : index
    %dim_51333 = tensor.dim %53760, %c1_51332 : tensor<4x?x4096xf16>
    %53761 = flow.tensor.transfer %53760 : tensor<4x?x4096xf16>{%dim_51333} to #hal.device.promise<@__device_0>
    %53762 = torch_c.from_builtin_tensor %53761 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53763 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51334 = arith.constant 1 : index
    %dim_51335 = tensor.dim %53763, %c1_51334 : tensor<4x?x4096xf16>
    %53764 = flow.tensor.transfer %53763 : tensor<4x?x4096xf16>{%dim_51335} to #hal.device.promise<@__device_0>
    %53765 = torch_c.from_builtin_tensor %53764 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53766 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51336 = arith.constant 1 : index
    %dim_51337 = tensor.dim %53766, %c1_51336 : tensor<4x?x4096xf16>
    %53767 = flow.tensor.transfer %53766 : tensor<4x?x4096xf16>{%dim_51337} to #hal.device.promise<@__device_0>
    %53768 = torch_c.from_builtin_tensor %53767 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53769 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51338 = arith.constant 1 : index
    %dim_51339 = tensor.dim %53769, %c1_51338 : tensor<4x?x4096xf16>
    %53770 = flow.tensor.transfer %53769 : tensor<4x?x4096xf16>{%dim_51339} to #hal.device.promise<@__device_0>
    %53771 = torch_c.from_builtin_tensor %53770 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53772 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51340 = arith.constant 1 : index
    %dim_51341 = tensor.dim %53772, %c1_51340 : tensor<4x?x4096xf16>
    %53773 = flow.tensor.transfer %53772 : tensor<4x?x4096xf16>{%dim_51341} to #hal.device.promise<@__device_0>
    %53774 = torch_c.from_builtin_tensor %53773 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53775 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51342 = arith.constant 1 : index
    %dim_51343 = tensor.dim %53775, %c1_51342 : tensor<4x?x4096xf16>
    %53776 = flow.tensor.transfer %53775 : tensor<4x?x4096xf16>{%dim_51343} to #hal.device.promise<@__device_0>
    %53777 = torch_c.from_builtin_tensor %53776 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53778 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51344 = arith.constant 1 : index
    %dim_51345 = tensor.dim %53778, %c1_51344 : tensor<4x?x4096xf16>
    %53779 = flow.tensor.transfer %53778 : tensor<4x?x4096xf16>{%dim_51345} to #hal.device.promise<@__device_0>
    %53780 = torch_c.from_builtin_tensor %53779 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51346 = torch.constant.int 1
    %53781 = torch.aten.add.Tensor %53717, %53762, %int1_51346 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51347 = torch.constant.int 1
    %53782 = torch.aten.add.Tensor %53781, %53765, %int1_51347 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51348 = torch.constant.int 1
    %53783 = torch.aten.add.Tensor %53782, %53768, %int1_51348 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51349 = torch.constant.int 1
    %53784 = torch.aten.add.Tensor %53783, %53771, %int1_51349 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51350 = torch.constant.int 1
    %53785 = torch.aten.add.Tensor %53784, %53774, %int1_51350 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51351 = torch.constant.int 1
    %53786 = torch.aten.add.Tensor %53785, %53777, %int1_51351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51352 = torch.constant.int 1
    %53787 = torch.aten.add.Tensor %53786, %53780, %int1_51352 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53788 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51353 = arith.constant 1 : index
    %dim_51354 = tensor.dim %53788, %c1_51353 : tensor<4x?x4096xf16>
    %53789 = flow.tensor.transfer %53788 : tensor<4x?x4096xf16>{%dim_51354} to #hal.device.promise<@__device_1>
    %53790 = torch_c.from_builtin_tensor %53789 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53791 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51355 = arith.constant 1 : index
    %dim_51356 = tensor.dim %53791, %c1_51355 : tensor<4x?x4096xf16>
    %53792 = flow.tensor.transfer %53791 : tensor<4x?x4096xf16>{%dim_51356} to #hal.device.promise<@__device_1>
    %53793 = torch_c.from_builtin_tensor %53792 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53794 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51357 = arith.constant 1 : index
    %dim_51358 = tensor.dim %53794, %c1_51357 : tensor<4x?x4096xf16>
    %53795 = flow.tensor.transfer %53794 : tensor<4x?x4096xf16>{%dim_51358} to #hal.device.promise<@__device_1>
    %53796 = torch_c.from_builtin_tensor %53795 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53797 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51359 = arith.constant 1 : index
    %dim_51360 = tensor.dim %53797, %c1_51359 : tensor<4x?x4096xf16>
    %53798 = flow.tensor.transfer %53797 : tensor<4x?x4096xf16>{%dim_51360} to #hal.device.promise<@__device_1>
    %53799 = torch_c.from_builtin_tensor %53798 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53800 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51361 = arith.constant 1 : index
    %dim_51362 = tensor.dim %53800, %c1_51361 : tensor<4x?x4096xf16>
    %53801 = flow.tensor.transfer %53800 : tensor<4x?x4096xf16>{%dim_51362} to #hal.device.promise<@__device_1>
    %53802 = torch_c.from_builtin_tensor %53801 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53803 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51363 = arith.constant 1 : index
    %dim_51364 = tensor.dim %53803, %c1_51363 : tensor<4x?x4096xf16>
    %53804 = flow.tensor.transfer %53803 : tensor<4x?x4096xf16>{%dim_51364} to #hal.device.promise<@__device_1>
    %53805 = torch_c.from_builtin_tensor %53804 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53806 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51365 = arith.constant 1 : index
    %dim_51366 = tensor.dim %53806, %c1_51365 : tensor<4x?x4096xf16>
    %53807 = flow.tensor.transfer %53806 : tensor<4x?x4096xf16>{%dim_51366} to #hal.device.promise<@__device_1>
    %53808 = torch_c.from_builtin_tensor %53807 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51367 = torch.constant.int 1
    %53809 = torch.aten.add.Tensor %53790, %53723, %int1_51367 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51368 = torch.constant.int 1
    %53810 = torch.aten.add.Tensor %53809, %53793, %int1_51368 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51369 = torch.constant.int 1
    %53811 = torch.aten.add.Tensor %53810, %53796, %int1_51369 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51370 = torch.constant.int 1
    %53812 = torch.aten.add.Tensor %53811, %53799, %int1_51370 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51371 = torch.constant.int 1
    %53813 = torch.aten.add.Tensor %53812, %53802, %int1_51371 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51372 = torch.constant.int 1
    %53814 = torch.aten.add.Tensor %53813, %53805, %int1_51372 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51373 = torch.constant.int 1
    %53815 = torch.aten.add.Tensor %53814, %53808, %int1_51373 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53816 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51374 = arith.constant 1 : index
    %dim_51375 = tensor.dim %53816, %c1_51374 : tensor<4x?x4096xf16>
    %53817 = flow.tensor.transfer %53816 : tensor<4x?x4096xf16>{%dim_51375} to #hal.device.promise<@__device_2>
    %53818 = torch_c.from_builtin_tensor %53817 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53818, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53819 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51376 = arith.constant 1 : index
    %dim_51377 = tensor.dim %53819, %c1_51376 : tensor<4x?x4096xf16>
    %53820 = flow.tensor.transfer %53819 : tensor<4x?x4096xf16>{%dim_51377} to #hal.device.promise<@__device_2>
    %53821 = torch_c.from_builtin_tensor %53820 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53822 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51378 = arith.constant 1 : index
    %dim_51379 = tensor.dim %53822, %c1_51378 : tensor<4x?x4096xf16>
    %53823 = flow.tensor.transfer %53822 : tensor<4x?x4096xf16>{%dim_51379} to #hal.device.promise<@__device_2>
    %53824 = torch_c.from_builtin_tensor %53823 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53824, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53825 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51380 = arith.constant 1 : index
    %dim_51381 = tensor.dim %53825, %c1_51380 : tensor<4x?x4096xf16>
    %53826 = flow.tensor.transfer %53825 : tensor<4x?x4096xf16>{%dim_51381} to #hal.device.promise<@__device_2>
    %53827 = torch_c.from_builtin_tensor %53826 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53828 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51382 = arith.constant 1 : index
    %dim_51383 = tensor.dim %53828, %c1_51382 : tensor<4x?x4096xf16>
    %53829 = flow.tensor.transfer %53828 : tensor<4x?x4096xf16>{%dim_51383} to #hal.device.promise<@__device_2>
    %53830 = torch_c.from_builtin_tensor %53829 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53831 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51384 = arith.constant 1 : index
    %dim_51385 = tensor.dim %53831, %c1_51384 : tensor<4x?x4096xf16>
    %53832 = flow.tensor.transfer %53831 : tensor<4x?x4096xf16>{%dim_51385} to #hal.device.promise<@__device_2>
    %53833 = torch_c.from_builtin_tensor %53832 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53834 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51386 = arith.constant 1 : index
    %dim_51387 = tensor.dim %53834, %c1_51386 : tensor<4x?x4096xf16>
    %53835 = flow.tensor.transfer %53834 : tensor<4x?x4096xf16>{%dim_51387} to #hal.device.promise<@__device_2>
    %53836 = torch_c.from_builtin_tensor %53835 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53836, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51388 = torch.constant.int 1
    %53837 = torch.aten.add.Tensor %53818, %53821, %int1_51388 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51389 = torch.constant.int 1
    %53838 = torch.aten.add.Tensor %53837, %53729, %int1_51389 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51390 = torch.constant.int 1
    %53839 = torch.aten.add.Tensor %53838, %53824, %int1_51390 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51391 = torch.constant.int 1
    %53840 = torch.aten.add.Tensor %53839, %53827, %int1_51391 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51392 = torch.constant.int 1
    %53841 = torch.aten.add.Tensor %53840, %53830, %int1_51392 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51393 = torch.constant.int 1
    %53842 = torch.aten.add.Tensor %53841, %53833, %int1_51393 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51394 = torch.constant.int 1
    %53843 = torch.aten.add.Tensor %53842, %53836, %int1_51394 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53844 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51395 = arith.constant 1 : index
    %dim_51396 = tensor.dim %53844, %c1_51395 : tensor<4x?x4096xf16>
    %53845 = flow.tensor.transfer %53844 : tensor<4x?x4096xf16>{%dim_51396} to #hal.device.promise<@__device_3>
    %53846 = torch_c.from_builtin_tensor %53845 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53847 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51397 = arith.constant 1 : index
    %dim_51398 = tensor.dim %53847, %c1_51397 : tensor<4x?x4096xf16>
    %53848 = flow.tensor.transfer %53847 : tensor<4x?x4096xf16>{%dim_51398} to #hal.device.promise<@__device_3>
    %53849 = torch_c.from_builtin_tensor %53848 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53850 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51399 = arith.constant 1 : index
    %dim_51400 = tensor.dim %53850, %c1_51399 : tensor<4x?x4096xf16>
    %53851 = flow.tensor.transfer %53850 : tensor<4x?x4096xf16>{%dim_51400} to #hal.device.promise<@__device_3>
    %53852 = torch_c.from_builtin_tensor %53851 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53853 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51401 = arith.constant 1 : index
    %dim_51402 = tensor.dim %53853, %c1_51401 : tensor<4x?x4096xf16>
    %53854 = flow.tensor.transfer %53853 : tensor<4x?x4096xf16>{%dim_51402} to #hal.device.promise<@__device_3>
    %53855 = torch_c.from_builtin_tensor %53854 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53856 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51403 = arith.constant 1 : index
    %dim_51404 = tensor.dim %53856, %c1_51403 : tensor<4x?x4096xf16>
    %53857 = flow.tensor.transfer %53856 : tensor<4x?x4096xf16>{%dim_51404} to #hal.device.promise<@__device_3>
    %53858 = torch_c.from_builtin_tensor %53857 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53859 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51405 = arith.constant 1 : index
    %dim_51406 = tensor.dim %53859, %c1_51405 : tensor<4x?x4096xf16>
    %53860 = flow.tensor.transfer %53859 : tensor<4x?x4096xf16>{%dim_51406} to #hal.device.promise<@__device_3>
    %53861 = torch_c.from_builtin_tensor %53860 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53862 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51407 = arith.constant 1 : index
    %dim_51408 = tensor.dim %53862, %c1_51407 : tensor<4x?x4096xf16>
    %53863 = flow.tensor.transfer %53862 : tensor<4x?x4096xf16>{%dim_51408} to #hal.device.promise<@__device_3>
    %53864 = torch_c.from_builtin_tensor %53863 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51409 = torch.constant.int 1
    %53865 = torch.aten.add.Tensor %53846, %53849, %int1_51409 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51410 = torch.constant.int 1
    %53866 = torch.aten.add.Tensor %53865, %53852, %int1_51410 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51411 = torch.constant.int 1
    %53867 = torch.aten.add.Tensor %53866, %53735, %int1_51411 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51412 = torch.constant.int 1
    %53868 = torch.aten.add.Tensor %53867, %53855, %int1_51412 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51413 = torch.constant.int 1
    %53869 = torch.aten.add.Tensor %53868, %53858, %int1_51413 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51414 = torch.constant.int 1
    %53870 = torch.aten.add.Tensor %53869, %53861, %int1_51414 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51415 = torch.constant.int 1
    %53871 = torch.aten.add.Tensor %53870, %53864, %int1_51415 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53872 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51416 = arith.constant 1 : index
    %dim_51417 = tensor.dim %53872, %c1_51416 : tensor<4x?x4096xf16>
    %53873 = flow.tensor.transfer %53872 : tensor<4x?x4096xf16>{%dim_51417} to #hal.device.promise<@__device_4>
    %53874 = torch_c.from_builtin_tensor %53873 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53875 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51418 = arith.constant 1 : index
    %dim_51419 = tensor.dim %53875, %c1_51418 : tensor<4x?x4096xf16>
    %53876 = flow.tensor.transfer %53875 : tensor<4x?x4096xf16>{%dim_51419} to #hal.device.promise<@__device_4>
    %53877 = torch_c.from_builtin_tensor %53876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53878 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51420 = arith.constant 1 : index
    %dim_51421 = tensor.dim %53878, %c1_51420 : tensor<4x?x4096xf16>
    %53879 = flow.tensor.transfer %53878 : tensor<4x?x4096xf16>{%dim_51421} to #hal.device.promise<@__device_4>
    %53880 = torch_c.from_builtin_tensor %53879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53881 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51422 = arith.constant 1 : index
    %dim_51423 = tensor.dim %53881, %c1_51422 : tensor<4x?x4096xf16>
    %53882 = flow.tensor.transfer %53881 : tensor<4x?x4096xf16>{%dim_51423} to #hal.device.promise<@__device_4>
    %53883 = torch_c.from_builtin_tensor %53882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53884 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51424 = arith.constant 1 : index
    %dim_51425 = tensor.dim %53884, %c1_51424 : tensor<4x?x4096xf16>
    %53885 = flow.tensor.transfer %53884 : tensor<4x?x4096xf16>{%dim_51425} to #hal.device.promise<@__device_4>
    %53886 = torch_c.from_builtin_tensor %53885 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53887 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51426 = arith.constant 1 : index
    %dim_51427 = tensor.dim %53887, %c1_51426 : tensor<4x?x4096xf16>
    %53888 = flow.tensor.transfer %53887 : tensor<4x?x4096xf16>{%dim_51427} to #hal.device.promise<@__device_4>
    %53889 = torch_c.from_builtin_tensor %53888 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53890 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51428 = arith.constant 1 : index
    %dim_51429 = tensor.dim %53890, %c1_51428 : tensor<4x?x4096xf16>
    %53891 = flow.tensor.transfer %53890 : tensor<4x?x4096xf16>{%dim_51429} to #hal.device.promise<@__device_4>
    %53892 = torch_c.from_builtin_tensor %53891 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51430 = torch.constant.int 1
    %53893 = torch.aten.add.Tensor %53874, %53877, %int1_51430 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51431 = torch.constant.int 1
    %53894 = torch.aten.add.Tensor %53893, %53880, %int1_51431 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51432 = torch.constant.int 1
    %53895 = torch.aten.add.Tensor %53894, %53883, %int1_51432 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51433 = torch.constant.int 1
    %53896 = torch.aten.add.Tensor %53895, %53741, %int1_51433 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51434 = torch.constant.int 1
    %53897 = torch.aten.add.Tensor %53896, %53886, %int1_51434 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51435 = torch.constant.int 1
    %53898 = torch.aten.add.Tensor %53897, %53889, %int1_51435 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51436 = torch.constant.int 1
    %53899 = torch.aten.add.Tensor %53898, %53892, %int1_51436 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53900 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51437 = arith.constant 1 : index
    %dim_51438 = tensor.dim %53900, %c1_51437 : tensor<4x?x4096xf16>
    %53901 = flow.tensor.transfer %53900 : tensor<4x?x4096xf16>{%dim_51438} to #hal.device.promise<@__device_5>
    %53902 = torch_c.from_builtin_tensor %53901 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53903 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51439 = arith.constant 1 : index
    %dim_51440 = tensor.dim %53903, %c1_51439 : tensor<4x?x4096xf16>
    %53904 = flow.tensor.transfer %53903 : tensor<4x?x4096xf16>{%dim_51440} to #hal.device.promise<@__device_5>
    %53905 = torch_c.from_builtin_tensor %53904 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53906 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51441 = arith.constant 1 : index
    %dim_51442 = tensor.dim %53906, %c1_51441 : tensor<4x?x4096xf16>
    %53907 = flow.tensor.transfer %53906 : tensor<4x?x4096xf16>{%dim_51442} to #hal.device.promise<@__device_5>
    %53908 = torch_c.from_builtin_tensor %53907 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53909 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51443 = arith.constant 1 : index
    %dim_51444 = tensor.dim %53909, %c1_51443 : tensor<4x?x4096xf16>
    %53910 = flow.tensor.transfer %53909 : tensor<4x?x4096xf16>{%dim_51444} to #hal.device.promise<@__device_5>
    %53911 = torch_c.from_builtin_tensor %53910 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53912 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51445 = arith.constant 1 : index
    %dim_51446 = tensor.dim %53912, %c1_51445 : tensor<4x?x4096xf16>
    %53913 = flow.tensor.transfer %53912 : tensor<4x?x4096xf16>{%dim_51446} to #hal.device.promise<@__device_5>
    %53914 = torch_c.from_builtin_tensor %53913 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53915 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51447 = arith.constant 1 : index
    %dim_51448 = tensor.dim %53915, %c1_51447 : tensor<4x?x4096xf16>
    %53916 = flow.tensor.transfer %53915 : tensor<4x?x4096xf16>{%dim_51448} to #hal.device.promise<@__device_5>
    %53917 = torch_c.from_builtin_tensor %53916 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53918 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51449 = arith.constant 1 : index
    %dim_51450 = tensor.dim %53918, %c1_51449 : tensor<4x?x4096xf16>
    %53919 = flow.tensor.transfer %53918 : tensor<4x?x4096xf16>{%dim_51450} to #hal.device.promise<@__device_5>
    %53920 = torch_c.from_builtin_tensor %53919 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51451 = torch.constant.int 1
    %53921 = torch.aten.add.Tensor %53902, %53905, %int1_51451 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51452 = torch.constant.int 1
    %53922 = torch.aten.add.Tensor %53921, %53908, %int1_51452 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51453 = torch.constant.int 1
    %53923 = torch.aten.add.Tensor %53922, %53911, %int1_51453 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51454 = torch.constant.int 1
    %53924 = torch.aten.add.Tensor %53923, %53914, %int1_51454 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51455 = torch.constant.int 1
    %53925 = torch.aten.add.Tensor %53924, %53747, %int1_51455 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51456 = torch.constant.int 1
    %53926 = torch.aten.add.Tensor %53925, %53917, %int1_51456 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51457 = torch.constant.int 1
    %53927 = torch.aten.add.Tensor %53926, %53920, %int1_51457 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53928 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51458 = arith.constant 1 : index
    %dim_51459 = tensor.dim %53928, %c1_51458 : tensor<4x?x4096xf16>
    %53929 = flow.tensor.transfer %53928 : tensor<4x?x4096xf16>{%dim_51459} to #hal.device.promise<@__device_6>
    %53930 = torch_c.from_builtin_tensor %53929 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53931 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51460 = arith.constant 1 : index
    %dim_51461 = tensor.dim %53931, %c1_51460 : tensor<4x?x4096xf16>
    %53932 = flow.tensor.transfer %53931 : tensor<4x?x4096xf16>{%dim_51461} to #hal.device.promise<@__device_6>
    %53933 = torch_c.from_builtin_tensor %53932 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53934 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51462 = arith.constant 1 : index
    %dim_51463 = tensor.dim %53934, %c1_51462 : tensor<4x?x4096xf16>
    %53935 = flow.tensor.transfer %53934 : tensor<4x?x4096xf16>{%dim_51463} to #hal.device.promise<@__device_6>
    %53936 = torch_c.from_builtin_tensor %53935 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53937 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51464 = arith.constant 1 : index
    %dim_51465 = tensor.dim %53937, %c1_51464 : tensor<4x?x4096xf16>
    %53938 = flow.tensor.transfer %53937 : tensor<4x?x4096xf16>{%dim_51465} to #hal.device.promise<@__device_6>
    %53939 = torch_c.from_builtin_tensor %53938 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53940 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51466 = arith.constant 1 : index
    %dim_51467 = tensor.dim %53940, %c1_51466 : tensor<4x?x4096xf16>
    %53941 = flow.tensor.transfer %53940 : tensor<4x?x4096xf16>{%dim_51467} to #hal.device.promise<@__device_6>
    %53942 = torch_c.from_builtin_tensor %53941 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53943 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51468 = arith.constant 1 : index
    %dim_51469 = tensor.dim %53943, %c1_51468 : tensor<4x?x4096xf16>
    %53944 = flow.tensor.transfer %53943 : tensor<4x?x4096xf16>{%dim_51469} to #hal.device.promise<@__device_6>
    %53945 = torch_c.from_builtin_tensor %53944 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53946 = torch_c.to_builtin_tensor %53759 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51470 = arith.constant 1 : index
    %dim_51471 = tensor.dim %53946, %c1_51470 : tensor<4x?x4096xf16>
    %53947 = flow.tensor.transfer %53946 : tensor<4x?x4096xf16>{%dim_51471} to #hal.device.promise<@__device_6>
    %53948 = torch_c.from_builtin_tensor %53947 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51472 = torch.constant.int 1
    %53949 = torch.aten.add.Tensor %53930, %53933, %int1_51472 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51473 = torch.constant.int 1
    %53950 = torch.aten.add.Tensor %53949, %53936, %int1_51473 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51474 = torch.constant.int 1
    %53951 = torch.aten.add.Tensor %53950, %53939, %int1_51474 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51475 = torch.constant.int 1
    %53952 = torch.aten.add.Tensor %53951, %53942, %int1_51475 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51476 = torch.constant.int 1
    %53953 = torch.aten.add.Tensor %53952, %53945, %int1_51476 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51477 = torch.constant.int 1
    %53954 = torch.aten.add.Tensor %53953, %53753, %int1_51477 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51478 = torch.constant.int 1
    %53955 = torch.aten.add.Tensor %53954, %53948, %int1_51478 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53956 = torch_c.to_builtin_tensor %53717 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51479 = arith.constant 1 : index
    %dim_51480 = tensor.dim %53956, %c1_51479 : tensor<4x?x4096xf16>
    %53957 = flow.tensor.transfer %53956 : tensor<4x?x4096xf16>{%dim_51480} to #hal.device.promise<@__device_7>
    %53958 = torch_c.from_builtin_tensor %53957 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53959 = torch_c.to_builtin_tensor %53723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51481 = arith.constant 1 : index
    %dim_51482 = tensor.dim %53959, %c1_51481 : tensor<4x?x4096xf16>
    %53960 = flow.tensor.transfer %53959 : tensor<4x?x4096xf16>{%dim_51482} to #hal.device.promise<@__device_7>
    %53961 = torch_c.from_builtin_tensor %53960 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53962 = torch_c.to_builtin_tensor %53729 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51483 = arith.constant 1 : index
    %dim_51484 = tensor.dim %53962, %c1_51483 : tensor<4x?x4096xf16>
    %53963 = flow.tensor.transfer %53962 : tensor<4x?x4096xf16>{%dim_51484} to #hal.device.promise<@__device_7>
    %53964 = torch_c.from_builtin_tensor %53963 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53965 = torch_c.to_builtin_tensor %53735 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51485 = arith.constant 1 : index
    %dim_51486 = tensor.dim %53965, %c1_51485 : tensor<4x?x4096xf16>
    %53966 = flow.tensor.transfer %53965 : tensor<4x?x4096xf16>{%dim_51486} to #hal.device.promise<@__device_7>
    %53967 = torch_c.from_builtin_tensor %53966 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53968 = torch_c.to_builtin_tensor %53741 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51487 = arith.constant 1 : index
    %dim_51488 = tensor.dim %53968, %c1_51487 : tensor<4x?x4096xf16>
    %53969 = flow.tensor.transfer %53968 : tensor<4x?x4096xf16>{%dim_51488} to #hal.device.promise<@__device_7>
    %53970 = torch_c.from_builtin_tensor %53969 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53971 = torch_c.to_builtin_tensor %53747 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51489 = arith.constant 1 : index
    %dim_51490 = tensor.dim %53971, %c1_51489 : tensor<4x?x4096xf16>
    %53972 = flow.tensor.transfer %53971 : tensor<4x?x4096xf16>{%dim_51490} to #hal.device.promise<@__device_7>
    %53973 = torch_c.from_builtin_tensor %53972 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %53974 = torch_c.to_builtin_tensor %53753 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51491 = arith.constant 1 : index
    %dim_51492 = tensor.dim %53974, %c1_51491 : tensor<4x?x4096xf16>
    %53975 = flow.tensor.transfer %53974 : tensor<4x?x4096xf16>{%dim_51492} to #hal.device.promise<@__device_7>
    %53976 = torch_c.from_builtin_tensor %53975 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51493 = torch.constant.int 1
    %53977 = torch.aten.add.Tensor %53958, %53961, %int1_51493 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51494 = torch.constant.int 1
    %53978 = torch.aten.add.Tensor %53977, %53964, %int1_51494 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53978, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51495 = torch.constant.int 1
    %53979 = torch.aten.add.Tensor %53978, %53967, %int1_51495 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51496 = torch.constant.int 1
    %53980 = torch.aten.add.Tensor %53979, %53970, %int1_51496 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51497 = torch.constant.int 1
    %53981 = torch.aten.add.Tensor %53980, %53973, %int1_51497 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51498 = torch.constant.int 1
    %53982 = torch.aten.add.Tensor %53981, %53976, %int1_51498 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51499 = torch.constant.int 1
    %53983 = torch.aten.add.Tensor %53982, %53759, %int1_51499 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51500 = torch.constant.int 1
    %53984 = torch.aten.add.Tensor %52643, %53787, %int1_51500 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51501 = torch.constant.int 1
    %53985 = torch.aten.add.Tensor %52644, %53815, %int1_51501 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51502 = torch.constant.int 1
    %53986 = torch.aten.add.Tensor %52645, %53843, %int1_51502 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51503 = torch.constant.int 1
    %53987 = torch.aten.add.Tensor %52646, %53871, %int1_51503 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51504 = torch.constant.int 1
    %53988 = torch.aten.add.Tensor %52647, %53899, %int1_51504 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51505 = torch.constant.int 1
    %53989 = torch.aten.add.Tensor %52648, %53927, %int1_51505 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51506 = torch.constant.int 1
    %53990 = torch.aten.add.Tensor %52649, %53955, %int1_51506 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51507 = torch.constant.int 1
    %53991 = torch.aten.add.Tensor %52650, %53983, %int1_51507 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %53991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_51508 = torch.constant.int 6
    %53992 = torch.prims.convert_element_type %53984, %int6_51508 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51509 = torch.constant.int 6
    %53993 = torch.prims.convert_element_type %53985, %int6_51509 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51510 = torch.constant.int 6
    %53994 = torch.prims.convert_element_type %53986, %int6_51510 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51511 = torch.constant.int 6
    %53995 = torch.prims.convert_element_type %53987, %int6_51511 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51512 = torch.constant.int 6
    %53996 = torch.prims.convert_element_type %53988, %int6_51512 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51513 = torch.constant.int 6
    %53997 = torch.prims.convert_element_type %53989, %int6_51513 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51514 = torch.constant.int 6
    %53998 = torch.prims.convert_element_type %53990, %int6_51514 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51515 = torch.constant.int 6
    %53999 = torch.prims.convert_element_type %53991, %int6_51515 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %53999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51516 = torch.constant.int 2
    %54000 = torch.aten.pow.Tensor_Scalar %53992, %int2_51516 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51517 = torch.constant.int 2
    %54001 = torch.aten.pow.Tensor_Scalar %53993, %int2_51517 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51518 = torch.constant.int 2
    %54002 = torch.aten.pow.Tensor_Scalar %53994, %int2_51518 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51519 = torch.constant.int 2
    %54003 = torch.aten.pow.Tensor_Scalar %53995, %int2_51519 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51520 = torch.constant.int 2
    %54004 = torch.aten.pow.Tensor_Scalar %53996, %int2_51520 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51521 = torch.constant.int 2
    %54005 = torch.aten.pow.Tensor_Scalar %53997, %int2_51521 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51522 = torch.constant.int 2
    %54006 = torch.aten.pow.Tensor_Scalar %53998, %int2_51522 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51523 = torch.constant.int 2
    %54007 = torch.aten.pow.Tensor_Scalar %53999, %int2_51523 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_51524 = torch.constant.int -1
    %54008 = torch.prim.ListConstruct %int-1_51524 : (!torch.int) -> !torch.list<int>
    %true_51525 = torch.constant.bool true
    %none_51526 = torch.constant.none
    %54009 = torch.aten.mean.dim %54000, %54008, %true_51525, %none_51526 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51527 = torch.constant.int -1
    %54010 = torch.prim.ListConstruct %int-1_51527 : (!torch.int) -> !torch.list<int>
    %true_51528 = torch.constant.bool true
    %none_51529 = torch.constant.none
    %54011 = torch.aten.mean.dim %54001, %54010, %true_51528, %none_51529 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51530 = torch.constant.int -1
    %54012 = torch.prim.ListConstruct %int-1_51530 : (!torch.int) -> !torch.list<int>
    %true_51531 = torch.constant.bool true
    %none_51532 = torch.constant.none
    %54013 = torch.aten.mean.dim %54002, %54012, %true_51531, %none_51532 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51533 = torch.constant.int -1
    %54014 = torch.prim.ListConstruct %int-1_51533 : (!torch.int) -> !torch.list<int>
    %true_51534 = torch.constant.bool true
    %none_51535 = torch.constant.none
    %54015 = torch.aten.mean.dim %54003, %54014, %true_51534, %none_51535 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51536 = torch.constant.int -1
    %54016 = torch.prim.ListConstruct %int-1_51536 : (!torch.int) -> !torch.list<int>
    %true_51537 = torch.constant.bool true
    %none_51538 = torch.constant.none
    %54017 = torch.aten.mean.dim %54004, %54016, %true_51537, %none_51538 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51539 = torch.constant.int -1
    %54018 = torch.prim.ListConstruct %int-1_51539 : (!torch.int) -> !torch.list<int>
    %true_51540 = torch.constant.bool true
    %none_51541 = torch.constant.none
    %54019 = torch.aten.mean.dim %54005, %54018, %true_51540, %none_51541 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51542 = torch.constant.int -1
    %54020 = torch.prim.ListConstruct %int-1_51542 : (!torch.int) -> !torch.list<int>
    %true_51543 = torch.constant.bool true
    %none_51544 = torch.constant.none
    %54021 = torch.aten.mean.dim %54006, %54020, %true_51543, %none_51544 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51545 = torch.constant.int -1
    %54022 = torch.prim.ListConstruct %int-1_51545 : (!torch.int) -> !torch.list<int>
    %true_51546 = torch.constant.bool true
    %none_51547 = torch.constant.none
    %54023 = torch.aten.mean.dim %54007, %54022, %true_51546, %none_51547 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51548 = torch.constant.float 9.9999997473787516E-6
    %int1_51549 = torch.constant.int 1
    %54024 = torch.aten.add.Scalar %54009, %float9.999990e-06_51548, %int1_51549 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51550 = torch.constant.float 9.9999997473787516E-6
    %int1_51551 = torch.constant.int 1
    %54025 = torch.aten.add.Scalar %54011, %float9.999990e-06_51550, %int1_51551 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51552 = torch.constant.float 9.9999997473787516E-6
    %int1_51553 = torch.constant.int 1
    %54026 = torch.aten.add.Scalar %54013, %float9.999990e-06_51552, %int1_51553 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51554 = torch.constant.float 9.9999997473787516E-6
    %int1_51555 = torch.constant.int 1
    %54027 = torch.aten.add.Scalar %54015, %float9.999990e-06_51554, %int1_51555 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51556 = torch.constant.float 9.9999997473787516E-6
    %int1_51557 = torch.constant.int 1
    %54028 = torch.aten.add.Scalar %54017, %float9.999990e-06_51556, %int1_51557 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51558 = torch.constant.float 9.9999997473787516E-6
    %int1_51559 = torch.constant.int 1
    %54029 = torch.aten.add.Scalar %54019, %float9.999990e-06_51558, %int1_51559 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51560 = torch.constant.float 9.9999997473787516E-6
    %int1_51561 = torch.constant.int 1
    %54030 = torch.aten.add.Scalar %54021, %float9.999990e-06_51560, %int1_51561 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51562 = torch.constant.float 9.9999997473787516E-6
    %int1_51563 = torch.constant.int 1
    %54031 = torch.aten.add.Scalar %54023, %float9.999990e-06_51562, %int1_51563 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54031, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54032 = torch.aten.rsqrt %54024 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54033 = torch.aten.rsqrt %54025 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54034 = torch.aten.rsqrt %54026 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54035 = torch.aten.rsqrt %54027 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54036 = torch.aten.rsqrt %54028 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54037 = torch.aten.rsqrt %54029 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54037, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54038 = torch.aten.rsqrt %54030 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54039 = torch.aten.rsqrt %54031 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54040 = torch.aten.mul.Tensor %53992, %54032 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54041 = torch.aten.mul.Tensor %53993, %54033 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54042 = torch.aten.mul.Tensor %53994, %54034 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54043 = torch.aten.mul.Tensor %53995, %54035 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54044 = torch.aten.mul.Tensor %53996, %54036 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54045 = torch.aten.mul.Tensor %53997, %54037 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54046 = torch.aten.mul.Tensor %53998, %54038 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54047 = torch.aten.mul.Tensor %53999, %54039 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54048 = torch.aten.mul.Tensor %1992, %54040 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54049 = torch.aten.mul.Tensor %1993, %54041 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54050 = torch.aten.mul.Tensor %1994, %54042 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54051 = torch.aten.mul.Tensor %1995, %54043 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54052 = torch.aten.mul.Tensor %1996, %54044 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54053 = torch.aten.mul.Tensor %1997, %54045 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54054 = torch.aten.mul.Tensor %1998, %54046 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54055 = torch.aten.mul.Tensor %1999, %54047 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_51564 = torch.constant.int 5
    %54056 = torch.prims.convert_element_type %54048, %int5_51564 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51565 = torch.constant.int 5
    %54057 = torch.prims.convert_element_type %54049, %int5_51565 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51566 = torch.constant.int 5
    %54058 = torch.prims.convert_element_type %54050, %int5_51566 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51567 = torch.constant.int 5
    %54059 = torch.prims.convert_element_type %54051, %int5_51567 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51568 = torch.constant.int 5
    %54060 = torch.prims.convert_element_type %54052, %int5_51568 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51569 = torch.constant.int 5
    %54061 = torch.prims.convert_element_type %54053, %int5_51569 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51570 = torch.constant.int 5
    %54062 = torch.prims.convert_element_type %54054, %int5_51570 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51571 = torch.constant.int 5
    %54063 = torch.prims.convert_element_type %54055, %int5_51571 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51572 = torch.constant.int 1
    %int0_51573 = torch.constant.int 0
    %54064 = torch.prim.ListConstruct %int1_51572, %int0_51573 : (!torch.int, !torch.int) -> !torch.list<int>
    %54065 = torch.aten.permute %2000, %54064 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51574 = torch.constant.int 1
    %int0_51575 = torch.constant.int 0
    %54066 = torch.prim.ListConstruct %int1_51574, %int0_51575 : (!torch.int, !torch.int) -> !torch.list<int>
    %54067 = torch.aten.permute %2001, %54066 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51576 = torch.constant.int 1
    %int0_51577 = torch.constant.int 0
    %54068 = torch.prim.ListConstruct %int1_51576, %int0_51577 : (!torch.int, !torch.int) -> !torch.list<int>
    %54069 = torch.aten.permute %2002, %54068 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51578 = torch.constant.int 1
    %int0_51579 = torch.constant.int 0
    %54070 = torch.prim.ListConstruct %int1_51578, %int0_51579 : (!torch.int, !torch.int) -> !torch.list<int>
    %54071 = torch.aten.permute %2003, %54070 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51580 = torch.constant.int 1
    %int0_51581 = torch.constant.int 0
    %54072 = torch.prim.ListConstruct %int1_51580, %int0_51581 : (!torch.int, !torch.int) -> !torch.list<int>
    %54073 = torch.aten.permute %2004, %54072 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51582 = torch.constant.int 1
    %int0_51583 = torch.constant.int 0
    %54074 = torch.prim.ListConstruct %int1_51582, %int0_51583 : (!torch.int, !torch.int) -> !torch.list<int>
    %54075 = torch.aten.permute %2005, %54074 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51584 = torch.constant.int 1
    %int0_51585 = torch.constant.int 0
    %54076 = torch.prim.ListConstruct %int1_51584, %int0_51585 : (!torch.int, !torch.int) -> !torch.list<int>
    %54077 = torch.aten.permute %2006, %54076 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51586 = torch.constant.int 1
    %int0_51587 = torch.constant.int 0
    %54078 = torch.prim.ListConstruct %int1_51586, %int0_51587 : (!torch.int, !torch.int) -> !torch.list<int>
    %54079 = torch.aten.permute %2007, %54078 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_51588 = torch.constant.int 4
    %54080 = torch.aten.mul.int %int4_51588, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51589 = torch.constant.int 4096
    %54081 = torch.prim.ListConstruct %54080, %int4096_51589 : (!torch.int, !torch.int) -> !torch.list<int>
    %54082 = torch.aten.view %54056, %54081 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54082, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54083 = torch.aten.mm %54082, %54065 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54083, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51590 = torch.constant.int 4
    %int1792_51591 = torch.constant.int 1792
    %54084 = torch.prim.ListConstruct %int4_51590, %2482, %int1792_51591 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54085 = torch.aten.view %54083, %54084 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51592 = torch.constant.int 4
    %54086 = torch.aten.mul.int %int4_51592, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51593 = torch.constant.int 4096
    %54087 = torch.prim.ListConstruct %54086, %int4096_51593 : (!torch.int, !torch.int) -> !torch.list<int>
    %54088 = torch.aten.view %54057, %54087 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54088, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54089 = torch.aten.mm %54088, %54067 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54089, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51594 = torch.constant.int 4
    %int1792_51595 = torch.constant.int 1792
    %54090 = torch.prim.ListConstruct %int4_51594, %2482, %int1792_51595 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54091 = torch.aten.view %54089, %54090 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51596 = torch.constant.int 4
    %54092 = torch.aten.mul.int %int4_51596, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51597 = torch.constant.int 4096
    %54093 = torch.prim.ListConstruct %54092, %int4096_51597 : (!torch.int, !torch.int) -> !torch.list<int>
    %54094 = torch.aten.view %54058, %54093 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54094, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54095 = torch.aten.mm %54094, %54069 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54095, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51598 = torch.constant.int 4
    %int1792_51599 = torch.constant.int 1792
    %54096 = torch.prim.ListConstruct %int4_51598, %2482, %int1792_51599 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54097 = torch.aten.view %54095, %54096 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51600 = torch.constant.int 4
    %54098 = torch.aten.mul.int %int4_51600, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51601 = torch.constant.int 4096
    %54099 = torch.prim.ListConstruct %54098, %int4096_51601 : (!torch.int, !torch.int) -> !torch.list<int>
    %54100 = torch.aten.view %54059, %54099 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54100, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54101 = torch.aten.mm %54100, %54071 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54101, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51602 = torch.constant.int 4
    %int1792_51603 = torch.constant.int 1792
    %54102 = torch.prim.ListConstruct %int4_51602, %2482, %int1792_51603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54103 = torch.aten.view %54101, %54102 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51604 = torch.constant.int 4
    %54104 = torch.aten.mul.int %int4_51604, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51605 = torch.constant.int 4096
    %54105 = torch.prim.ListConstruct %54104, %int4096_51605 : (!torch.int, !torch.int) -> !torch.list<int>
    %54106 = torch.aten.view %54060, %54105 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54106, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54107 = torch.aten.mm %54106, %54073 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54107, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51606 = torch.constant.int 4
    %int1792_51607 = torch.constant.int 1792
    %54108 = torch.prim.ListConstruct %int4_51606, %2482, %int1792_51607 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54109 = torch.aten.view %54107, %54108 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51608 = torch.constant.int 4
    %54110 = torch.aten.mul.int %int4_51608, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51609 = torch.constant.int 4096
    %54111 = torch.prim.ListConstruct %54110, %int4096_51609 : (!torch.int, !torch.int) -> !torch.list<int>
    %54112 = torch.aten.view %54061, %54111 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54112, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54113 = torch.aten.mm %54112, %54075 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54113, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51610 = torch.constant.int 4
    %int1792_51611 = torch.constant.int 1792
    %54114 = torch.prim.ListConstruct %int4_51610, %2482, %int1792_51611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54115 = torch.aten.view %54113, %54114 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54115, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51612 = torch.constant.int 4
    %54116 = torch.aten.mul.int %int4_51612, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51613 = torch.constant.int 4096
    %54117 = torch.prim.ListConstruct %54116, %int4096_51613 : (!torch.int, !torch.int) -> !torch.list<int>
    %54118 = torch.aten.view %54062, %54117 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54118, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54119 = torch.aten.mm %54118, %54077 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54119, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51614 = torch.constant.int 4
    %int1792_51615 = torch.constant.int 1792
    %54120 = torch.prim.ListConstruct %int4_51614, %2482, %int1792_51615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54121 = torch.aten.view %54119, %54120 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51616 = torch.constant.int 4
    %54122 = torch.aten.mul.int %int4_51616, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51617 = torch.constant.int 4096
    %54123 = torch.prim.ListConstruct %54122, %int4096_51617 : (!torch.int, !torch.int) -> !torch.list<int>
    %54124 = torch.aten.view %54063, %54123 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54124, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54125 = torch.aten.mm %54124, %54079 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54125, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51618 = torch.constant.int 4
    %int1792_51619 = torch.constant.int 1792
    %54126 = torch.prim.ListConstruct %int4_51618, %2482, %int1792_51619 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54127 = torch.aten.view %54125, %54126 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54128 = torch.aten.silu %54085 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54129 = torch.aten.silu %54091 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54130 = torch.aten.silu %54097 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54131 = torch.aten.silu %54103 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54132 = torch.aten.silu %54109 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54133 = torch.aten.silu %54115 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54134 = torch.aten.silu %54121 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54135 = torch.aten.silu %54127 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_51620 = torch.constant.int 1
    %int0_51621 = torch.constant.int 0
    %54136 = torch.prim.ListConstruct %int1_51620, %int0_51621 : (!torch.int, !torch.int) -> !torch.list<int>
    %54137 = torch.aten.permute %2008, %54136 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51622 = torch.constant.int 1
    %int0_51623 = torch.constant.int 0
    %54138 = torch.prim.ListConstruct %int1_51622, %int0_51623 : (!torch.int, !torch.int) -> !torch.list<int>
    %54139 = torch.aten.permute %2009, %54138 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51624 = torch.constant.int 1
    %int0_51625 = torch.constant.int 0
    %54140 = torch.prim.ListConstruct %int1_51624, %int0_51625 : (!torch.int, !torch.int) -> !torch.list<int>
    %54141 = torch.aten.permute %2010, %54140 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51626 = torch.constant.int 1
    %int0_51627 = torch.constant.int 0
    %54142 = torch.prim.ListConstruct %int1_51626, %int0_51627 : (!torch.int, !torch.int) -> !torch.list<int>
    %54143 = torch.aten.permute %2011, %54142 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51628 = torch.constant.int 1
    %int0_51629 = torch.constant.int 0
    %54144 = torch.prim.ListConstruct %int1_51628, %int0_51629 : (!torch.int, !torch.int) -> !torch.list<int>
    %54145 = torch.aten.permute %2012, %54144 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51630 = torch.constant.int 1
    %int0_51631 = torch.constant.int 0
    %54146 = torch.prim.ListConstruct %int1_51630, %int0_51631 : (!torch.int, !torch.int) -> !torch.list<int>
    %54147 = torch.aten.permute %2013, %54146 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51632 = torch.constant.int 1
    %int0_51633 = torch.constant.int 0
    %54148 = torch.prim.ListConstruct %int1_51632, %int0_51633 : (!torch.int, !torch.int) -> !torch.list<int>
    %54149 = torch.aten.permute %2014, %54148 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_51634 = torch.constant.int 1
    %int0_51635 = torch.constant.int 0
    %54150 = torch.prim.ListConstruct %int1_51634, %int0_51635 : (!torch.int, !torch.int) -> !torch.list<int>
    %54151 = torch.aten.permute %2015, %54150 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_51636 = torch.constant.int 4
    %54152 = torch.aten.mul.int %int4_51636, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51637 = torch.constant.int 4096
    %54153 = torch.prim.ListConstruct %54152, %int4096_51637 : (!torch.int, !torch.int) -> !torch.list<int>
    %54154 = torch.aten.view %54056, %54153 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54154, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54155 = torch.aten.mm %54154, %54137 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54155, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51638 = torch.constant.int 4
    %int1792_51639 = torch.constant.int 1792
    %54156 = torch.prim.ListConstruct %int4_51638, %2482, %int1792_51639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54157 = torch.aten.view %54155, %54156 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51640 = torch.constant.int 4
    %54158 = torch.aten.mul.int %int4_51640, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51641 = torch.constant.int 4096
    %54159 = torch.prim.ListConstruct %54158, %int4096_51641 : (!torch.int, !torch.int) -> !torch.list<int>
    %54160 = torch.aten.view %54057, %54159 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54160, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54161 = torch.aten.mm %54160, %54139 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54161, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51642 = torch.constant.int 4
    %int1792_51643 = torch.constant.int 1792
    %54162 = torch.prim.ListConstruct %int4_51642, %2482, %int1792_51643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54163 = torch.aten.view %54161, %54162 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51644 = torch.constant.int 4
    %54164 = torch.aten.mul.int %int4_51644, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51645 = torch.constant.int 4096
    %54165 = torch.prim.ListConstruct %54164, %int4096_51645 : (!torch.int, !torch.int) -> !torch.list<int>
    %54166 = torch.aten.view %54058, %54165 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54166, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54167 = torch.aten.mm %54166, %54141 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54167, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51646 = torch.constant.int 4
    %int1792_51647 = torch.constant.int 1792
    %54168 = torch.prim.ListConstruct %int4_51646, %2482, %int1792_51647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54169 = torch.aten.view %54167, %54168 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51648 = torch.constant.int 4
    %54170 = torch.aten.mul.int %int4_51648, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51649 = torch.constant.int 4096
    %54171 = torch.prim.ListConstruct %54170, %int4096_51649 : (!torch.int, !torch.int) -> !torch.list<int>
    %54172 = torch.aten.view %54059, %54171 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54172, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54173 = torch.aten.mm %54172, %54143 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54173, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51650 = torch.constant.int 4
    %int1792_51651 = torch.constant.int 1792
    %54174 = torch.prim.ListConstruct %int4_51650, %2482, %int1792_51651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54175 = torch.aten.view %54173, %54174 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51652 = torch.constant.int 4
    %54176 = torch.aten.mul.int %int4_51652, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51653 = torch.constant.int 4096
    %54177 = torch.prim.ListConstruct %54176, %int4096_51653 : (!torch.int, !torch.int) -> !torch.list<int>
    %54178 = torch.aten.view %54060, %54177 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54178, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54179 = torch.aten.mm %54178, %54145 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54179, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51654 = torch.constant.int 4
    %int1792_51655 = torch.constant.int 1792
    %54180 = torch.prim.ListConstruct %int4_51654, %2482, %int1792_51655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54181 = torch.aten.view %54179, %54180 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51656 = torch.constant.int 4
    %54182 = torch.aten.mul.int %int4_51656, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51657 = torch.constant.int 4096
    %54183 = torch.prim.ListConstruct %54182, %int4096_51657 : (!torch.int, !torch.int) -> !torch.list<int>
    %54184 = torch.aten.view %54061, %54183 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54184, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54185 = torch.aten.mm %54184, %54147 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54185, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51658 = torch.constant.int 4
    %int1792_51659 = torch.constant.int 1792
    %54186 = torch.prim.ListConstruct %int4_51658, %2482, %int1792_51659 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54187 = torch.aten.view %54185, %54186 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51660 = torch.constant.int 4
    %54188 = torch.aten.mul.int %int4_51660, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51661 = torch.constant.int 4096
    %54189 = torch.prim.ListConstruct %54188, %int4096_51661 : (!torch.int, !torch.int) -> !torch.list<int>
    %54190 = torch.aten.view %54062, %54189 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54190, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54191 = torch.aten.mm %54190, %54149 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54191, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51662 = torch.constant.int 4
    %int1792_51663 = torch.constant.int 1792
    %54192 = torch.prim.ListConstruct %int4_51662, %2482, %int1792_51663 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54193 = torch.aten.view %54191, %54192 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_51664 = torch.constant.int 4
    %54194 = torch.aten.mul.int %int4_51664, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51665 = torch.constant.int 4096
    %54195 = torch.prim.ListConstruct %54194, %int4096_51665 : (!torch.int, !torch.int) -> !torch.list<int>
    %54196 = torch.aten.view %54063, %54195 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54196, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54197 = torch.aten.mm %54196, %54151 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54197, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_51666 = torch.constant.int 4
    %int1792_51667 = torch.constant.int 1792
    %54198 = torch.prim.ListConstruct %int4_51666, %2482, %int1792_51667 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54199 = torch.aten.view %54197, %54198 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54200 = torch.aten.mul.Tensor %54128, %54157 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54201 = torch.aten.mul.Tensor %54129, %54163 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54201, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54202 = torch.aten.mul.Tensor %54130, %54169 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54203 = torch.aten.mul.Tensor %54131, %54175 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54204 = torch.aten.mul.Tensor %54132, %54181 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54205 = torch.aten.mul.Tensor %54133, %54187 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54206 = torch.aten.mul.Tensor %54134, %54193 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %54207 = torch.aten.mul.Tensor %54135, %54199 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %54207, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_51668 = torch.constant.int 1
    %int0_51669 = torch.constant.int 0
    %54208 = torch.prim.ListConstruct %int1_51668, %int0_51669 : (!torch.int, !torch.int) -> !torch.list<int>
    %54209 = torch.aten.permute %2016, %54208 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51670 = torch.constant.int 1
    %int0_51671 = torch.constant.int 0
    %54210 = torch.prim.ListConstruct %int1_51670, %int0_51671 : (!torch.int, !torch.int) -> !torch.list<int>
    %54211 = torch.aten.permute %2017, %54210 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51672 = torch.constant.int 1
    %int0_51673 = torch.constant.int 0
    %54212 = torch.prim.ListConstruct %int1_51672, %int0_51673 : (!torch.int, !torch.int) -> !torch.list<int>
    %54213 = torch.aten.permute %2018, %54212 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51674 = torch.constant.int 1
    %int0_51675 = torch.constant.int 0
    %54214 = torch.prim.ListConstruct %int1_51674, %int0_51675 : (!torch.int, !torch.int) -> !torch.list<int>
    %54215 = torch.aten.permute %2019, %54214 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51676 = torch.constant.int 1
    %int0_51677 = torch.constant.int 0
    %54216 = torch.prim.ListConstruct %int1_51676, %int0_51677 : (!torch.int, !torch.int) -> !torch.list<int>
    %54217 = torch.aten.permute %2020, %54216 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51678 = torch.constant.int 1
    %int0_51679 = torch.constant.int 0
    %54218 = torch.prim.ListConstruct %int1_51678, %int0_51679 : (!torch.int, !torch.int) -> !torch.list<int>
    %54219 = torch.aten.permute %2021, %54218 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51680 = torch.constant.int 1
    %int0_51681 = torch.constant.int 0
    %54220 = torch.prim.ListConstruct %int1_51680, %int0_51681 : (!torch.int, !torch.int) -> !torch.list<int>
    %54221 = torch.aten.permute %2022, %54220 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51682 = torch.constant.int 1
    %int0_51683 = torch.constant.int 0
    %54222 = torch.prim.ListConstruct %int1_51682, %int0_51683 : (!torch.int, !torch.int) -> !torch.list<int>
    %54223 = torch.aten.permute %2023, %54222 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_51684 = torch.constant.int 1
    %54224 = torch.aten.size.int %54085, %int1_51684 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51685 = torch.constant.int 4
    %54225 = torch.aten.mul.int %int4_51685, %54224 : !torch.int, !torch.int -> !torch.int
    %int1792_51686 = torch.constant.int 1792
    %54226 = torch.prim.ListConstruct %54225, %int1792_51686 : (!torch.int, !torch.int) -> !torch.list<int>
    %54227 = torch.aten.view %54200, %54226 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54227, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54228 = torch.aten.mm %54227, %54209 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54228, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51687 = torch.constant.int 4
    %int4096_51688 = torch.constant.int 4096
    %54229 = torch.prim.ListConstruct %int4_51687, %54224, %int4096_51688 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54230 = torch.aten.view %54228, %54229 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51689 = torch.constant.int 1
    %54231 = torch.aten.size.int %54091, %int1_51689 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51690 = torch.constant.int 4
    %54232 = torch.aten.mul.int %int4_51690, %54231 : !torch.int, !torch.int -> !torch.int
    %int1792_51691 = torch.constant.int 1792
    %54233 = torch.prim.ListConstruct %54232, %int1792_51691 : (!torch.int, !torch.int) -> !torch.list<int>
    %54234 = torch.aten.view %54201, %54233 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54234, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54235 = torch.aten.mm %54234, %54211 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54235, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51692 = torch.constant.int 4
    %int4096_51693 = torch.constant.int 4096
    %54236 = torch.prim.ListConstruct %int4_51692, %54231, %int4096_51693 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54237 = torch.aten.view %54235, %54236 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51694 = torch.constant.int 1
    %54238 = torch.aten.size.int %54097, %int1_51694 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51695 = torch.constant.int 4
    %54239 = torch.aten.mul.int %int4_51695, %54238 : !torch.int, !torch.int -> !torch.int
    %int1792_51696 = torch.constant.int 1792
    %54240 = torch.prim.ListConstruct %54239, %int1792_51696 : (!torch.int, !torch.int) -> !torch.list<int>
    %54241 = torch.aten.view %54202, %54240 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54241, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54242 = torch.aten.mm %54241, %54213 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54242, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51697 = torch.constant.int 4
    %int4096_51698 = torch.constant.int 4096
    %54243 = torch.prim.ListConstruct %int4_51697, %54238, %int4096_51698 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54244 = torch.aten.view %54242, %54243 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51699 = torch.constant.int 1
    %54245 = torch.aten.size.int %54103, %int1_51699 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51700 = torch.constant.int 4
    %54246 = torch.aten.mul.int %int4_51700, %54245 : !torch.int, !torch.int -> !torch.int
    %int1792_51701 = torch.constant.int 1792
    %54247 = torch.prim.ListConstruct %54246, %int1792_51701 : (!torch.int, !torch.int) -> !torch.list<int>
    %54248 = torch.aten.view %54203, %54247 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54248, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54249 = torch.aten.mm %54248, %54215 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54249, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51702 = torch.constant.int 4
    %int4096_51703 = torch.constant.int 4096
    %54250 = torch.prim.ListConstruct %int4_51702, %54245, %int4096_51703 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54251 = torch.aten.view %54249, %54250 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51704 = torch.constant.int 1
    %54252 = torch.aten.size.int %54109, %int1_51704 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51705 = torch.constant.int 4
    %54253 = torch.aten.mul.int %int4_51705, %54252 : !torch.int, !torch.int -> !torch.int
    %int1792_51706 = torch.constant.int 1792
    %54254 = torch.prim.ListConstruct %54253, %int1792_51706 : (!torch.int, !torch.int) -> !torch.list<int>
    %54255 = torch.aten.view %54204, %54254 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54255, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54256 = torch.aten.mm %54255, %54217 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54256, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51707 = torch.constant.int 4
    %int4096_51708 = torch.constant.int 4096
    %54257 = torch.prim.ListConstruct %int4_51707, %54252, %int4096_51708 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54258 = torch.aten.view %54256, %54257 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51709 = torch.constant.int 1
    %54259 = torch.aten.size.int %54115, %int1_51709 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51710 = torch.constant.int 4
    %54260 = torch.aten.mul.int %int4_51710, %54259 : !torch.int, !torch.int -> !torch.int
    %int1792_51711 = torch.constant.int 1792
    %54261 = torch.prim.ListConstruct %54260, %int1792_51711 : (!torch.int, !torch.int) -> !torch.list<int>
    %54262 = torch.aten.view %54205, %54261 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54262, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54263 = torch.aten.mm %54262, %54219 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54263, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51712 = torch.constant.int 4
    %int4096_51713 = torch.constant.int 4096
    %54264 = torch.prim.ListConstruct %int4_51712, %54259, %int4096_51713 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54265 = torch.aten.view %54263, %54264 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51714 = torch.constant.int 1
    %54266 = torch.aten.size.int %54121, %int1_51714 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51715 = torch.constant.int 4
    %54267 = torch.aten.mul.int %int4_51715, %54266 : !torch.int, !torch.int -> !torch.int
    %int1792_51716 = torch.constant.int 1792
    %54268 = torch.prim.ListConstruct %54267, %int1792_51716 : (!torch.int, !torch.int) -> !torch.list<int>
    %54269 = torch.aten.view %54206, %54268 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54269, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54270 = torch.aten.mm %54269, %54221 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54270, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51717 = torch.constant.int 4
    %int4096_51718 = torch.constant.int 4096
    %54271 = torch.prim.ListConstruct %int4_51717, %54266, %int4096_51718 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54272 = torch.aten.view %54270, %54271 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51719 = torch.constant.int 1
    %54273 = torch.aten.size.int %54127, %int1_51719 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_51720 = torch.constant.int 4
    %54274 = torch.aten.mul.int %int4_51720, %54273 : !torch.int, !torch.int -> !torch.int
    %int1792_51721 = torch.constant.int 1792
    %54275 = torch.prim.ListConstruct %54274, %int1792_51721 : (!torch.int, !torch.int) -> !torch.list<int>
    %54276 = torch.aten.view %54207, %54275 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %54276, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %54277 = torch.aten.mm %54276, %54223 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54277, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_51722 = torch.constant.int 4
    %int4096_51723 = torch.constant.int 4096
    %54278 = torch.prim.ListConstruct %int4_51722, %54273, %int4096_51723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54279 = torch.aten.view %54277, %54278 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54280 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51724 = arith.constant 1 : index
    %dim_51725 = tensor.dim %54280, %c1_51724 : tensor<4x?x4096xf16>
    %54281 = flow.tensor.transfer %54280 : tensor<4x?x4096xf16>{%dim_51725} to #hal.device.promise<@__device_0>
    %54282 = torch_c.from_builtin_tensor %54281 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54283 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51726 = arith.constant 1 : index
    %dim_51727 = tensor.dim %54283, %c1_51726 : tensor<4x?x4096xf16>
    %54284 = flow.tensor.transfer %54283 : tensor<4x?x4096xf16>{%dim_51727} to #hal.device.promise<@__device_0>
    %54285 = torch_c.from_builtin_tensor %54284 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54286 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51728 = arith.constant 1 : index
    %dim_51729 = tensor.dim %54286, %c1_51728 : tensor<4x?x4096xf16>
    %54287 = flow.tensor.transfer %54286 : tensor<4x?x4096xf16>{%dim_51729} to #hal.device.promise<@__device_0>
    %54288 = torch_c.from_builtin_tensor %54287 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54289 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51730 = arith.constant 1 : index
    %dim_51731 = tensor.dim %54289, %c1_51730 : tensor<4x?x4096xf16>
    %54290 = flow.tensor.transfer %54289 : tensor<4x?x4096xf16>{%dim_51731} to #hal.device.promise<@__device_0>
    %54291 = torch_c.from_builtin_tensor %54290 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54292 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51732 = arith.constant 1 : index
    %dim_51733 = tensor.dim %54292, %c1_51732 : tensor<4x?x4096xf16>
    %54293 = flow.tensor.transfer %54292 : tensor<4x?x4096xf16>{%dim_51733} to #hal.device.promise<@__device_0>
    %54294 = torch_c.from_builtin_tensor %54293 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54295 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51734 = arith.constant 1 : index
    %dim_51735 = tensor.dim %54295, %c1_51734 : tensor<4x?x4096xf16>
    %54296 = flow.tensor.transfer %54295 : tensor<4x?x4096xf16>{%dim_51735} to #hal.device.promise<@__device_0>
    %54297 = torch_c.from_builtin_tensor %54296 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54298 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51736 = arith.constant 1 : index
    %dim_51737 = tensor.dim %54298, %c1_51736 : tensor<4x?x4096xf16>
    %54299 = flow.tensor.transfer %54298 : tensor<4x?x4096xf16>{%dim_51737} to #hal.device.promise<@__device_0>
    %54300 = torch_c.from_builtin_tensor %54299 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51738 = torch.constant.int 1
    %54301 = torch.aten.add.Tensor %54230, %54282, %int1_51738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51739 = torch.constant.int 1
    %54302 = torch.aten.add.Tensor %54301, %54285, %int1_51739 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51740 = torch.constant.int 1
    %54303 = torch.aten.add.Tensor %54302, %54288, %int1_51740 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51741 = torch.constant.int 1
    %54304 = torch.aten.add.Tensor %54303, %54291, %int1_51741 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51742 = torch.constant.int 1
    %54305 = torch.aten.add.Tensor %54304, %54294, %int1_51742 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51743 = torch.constant.int 1
    %54306 = torch.aten.add.Tensor %54305, %54297, %int1_51743 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51744 = torch.constant.int 1
    %54307 = torch.aten.add.Tensor %54306, %54300, %int1_51744 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54308 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51745 = arith.constant 1 : index
    %dim_51746 = tensor.dim %54308, %c1_51745 : tensor<4x?x4096xf16>
    %54309 = flow.tensor.transfer %54308 : tensor<4x?x4096xf16>{%dim_51746} to #hal.device.promise<@__device_1>
    %54310 = torch_c.from_builtin_tensor %54309 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54311 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51747 = arith.constant 1 : index
    %dim_51748 = tensor.dim %54311, %c1_51747 : tensor<4x?x4096xf16>
    %54312 = flow.tensor.transfer %54311 : tensor<4x?x4096xf16>{%dim_51748} to #hal.device.promise<@__device_1>
    %54313 = torch_c.from_builtin_tensor %54312 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54314 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51749 = arith.constant 1 : index
    %dim_51750 = tensor.dim %54314, %c1_51749 : tensor<4x?x4096xf16>
    %54315 = flow.tensor.transfer %54314 : tensor<4x?x4096xf16>{%dim_51750} to #hal.device.promise<@__device_1>
    %54316 = torch_c.from_builtin_tensor %54315 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54317 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51751 = arith.constant 1 : index
    %dim_51752 = tensor.dim %54317, %c1_51751 : tensor<4x?x4096xf16>
    %54318 = flow.tensor.transfer %54317 : tensor<4x?x4096xf16>{%dim_51752} to #hal.device.promise<@__device_1>
    %54319 = torch_c.from_builtin_tensor %54318 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54319, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54320 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51753 = arith.constant 1 : index
    %dim_51754 = tensor.dim %54320, %c1_51753 : tensor<4x?x4096xf16>
    %54321 = flow.tensor.transfer %54320 : tensor<4x?x4096xf16>{%dim_51754} to #hal.device.promise<@__device_1>
    %54322 = torch_c.from_builtin_tensor %54321 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54323 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51755 = arith.constant 1 : index
    %dim_51756 = tensor.dim %54323, %c1_51755 : tensor<4x?x4096xf16>
    %54324 = flow.tensor.transfer %54323 : tensor<4x?x4096xf16>{%dim_51756} to #hal.device.promise<@__device_1>
    %54325 = torch_c.from_builtin_tensor %54324 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54326 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51757 = arith.constant 1 : index
    %dim_51758 = tensor.dim %54326, %c1_51757 : tensor<4x?x4096xf16>
    %54327 = flow.tensor.transfer %54326 : tensor<4x?x4096xf16>{%dim_51758} to #hal.device.promise<@__device_1>
    %54328 = torch_c.from_builtin_tensor %54327 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51759 = torch.constant.int 1
    %54329 = torch.aten.add.Tensor %54310, %54237, %int1_51759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51760 = torch.constant.int 1
    %54330 = torch.aten.add.Tensor %54329, %54313, %int1_51760 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51761 = torch.constant.int 1
    %54331 = torch.aten.add.Tensor %54330, %54316, %int1_51761 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51762 = torch.constant.int 1
    %54332 = torch.aten.add.Tensor %54331, %54319, %int1_51762 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51763 = torch.constant.int 1
    %54333 = torch.aten.add.Tensor %54332, %54322, %int1_51763 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51764 = torch.constant.int 1
    %54334 = torch.aten.add.Tensor %54333, %54325, %int1_51764 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51765 = torch.constant.int 1
    %54335 = torch.aten.add.Tensor %54334, %54328, %int1_51765 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54336 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51766 = arith.constant 1 : index
    %dim_51767 = tensor.dim %54336, %c1_51766 : tensor<4x?x4096xf16>
    %54337 = flow.tensor.transfer %54336 : tensor<4x?x4096xf16>{%dim_51767} to #hal.device.promise<@__device_2>
    %54338 = torch_c.from_builtin_tensor %54337 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54339 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51768 = arith.constant 1 : index
    %dim_51769 = tensor.dim %54339, %c1_51768 : tensor<4x?x4096xf16>
    %54340 = flow.tensor.transfer %54339 : tensor<4x?x4096xf16>{%dim_51769} to #hal.device.promise<@__device_2>
    %54341 = torch_c.from_builtin_tensor %54340 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54342 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51770 = arith.constant 1 : index
    %dim_51771 = tensor.dim %54342, %c1_51770 : tensor<4x?x4096xf16>
    %54343 = flow.tensor.transfer %54342 : tensor<4x?x4096xf16>{%dim_51771} to #hal.device.promise<@__device_2>
    %54344 = torch_c.from_builtin_tensor %54343 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54344, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54345 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51772 = arith.constant 1 : index
    %dim_51773 = tensor.dim %54345, %c1_51772 : tensor<4x?x4096xf16>
    %54346 = flow.tensor.transfer %54345 : tensor<4x?x4096xf16>{%dim_51773} to #hal.device.promise<@__device_2>
    %54347 = torch_c.from_builtin_tensor %54346 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54348 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51774 = arith.constant 1 : index
    %dim_51775 = tensor.dim %54348, %c1_51774 : tensor<4x?x4096xf16>
    %54349 = flow.tensor.transfer %54348 : tensor<4x?x4096xf16>{%dim_51775} to #hal.device.promise<@__device_2>
    %54350 = torch_c.from_builtin_tensor %54349 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54350, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54351 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51776 = arith.constant 1 : index
    %dim_51777 = tensor.dim %54351, %c1_51776 : tensor<4x?x4096xf16>
    %54352 = flow.tensor.transfer %54351 : tensor<4x?x4096xf16>{%dim_51777} to #hal.device.promise<@__device_2>
    %54353 = torch_c.from_builtin_tensor %54352 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54354 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51778 = arith.constant 1 : index
    %dim_51779 = tensor.dim %54354, %c1_51778 : tensor<4x?x4096xf16>
    %54355 = flow.tensor.transfer %54354 : tensor<4x?x4096xf16>{%dim_51779} to #hal.device.promise<@__device_2>
    %54356 = torch_c.from_builtin_tensor %54355 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54356, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51780 = torch.constant.int 1
    %54357 = torch.aten.add.Tensor %54338, %54341, %int1_51780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51781 = torch.constant.int 1
    %54358 = torch.aten.add.Tensor %54357, %54244, %int1_51781 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51782 = torch.constant.int 1
    %54359 = torch.aten.add.Tensor %54358, %54344, %int1_51782 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51783 = torch.constant.int 1
    %54360 = torch.aten.add.Tensor %54359, %54347, %int1_51783 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51784 = torch.constant.int 1
    %54361 = torch.aten.add.Tensor %54360, %54350, %int1_51784 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51785 = torch.constant.int 1
    %54362 = torch.aten.add.Tensor %54361, %54353, %int1_51785 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51786 = torch.constant.int 1
    %54363 = torch.aten.add.Tensor %54362, %54356, %int1_51786 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54364 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51787 = arith.constant 1 : index
    %dim_51788 = tensor.dim %54364, %c1_51787 : tensor<4x?x4096xf16>
    %54365 = flow.tensor.transfer %54364 : tensor<4x?x4096xf16>{%dim_51788} to #hal.device.promise<@__device_3>
    %54366 = torch_c.from_builtin_tensor %54365 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54367 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51789 = arith.constant 1 : index
    %dim_51790 = tensor.dim %54367, %c1_51789 : tensor<4x?x4096xf16>
    %54368 = flow.tensor.transfer %54367 : tensor<4x?x4096xf16>{%dim_51790} to #hal.device.promise<@__device_3>
    %54369 = torch_c.from_builtin_tensor %54368 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54370 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51791 = arith.constant 1 : index
    %dim_51792 = tensor.dim %54370, %c1_51791 : tensor<4x?x4096xf16>
    %54371 = flow.tensor.transfer %54370 : tensor<4x?x4096xf16>{%dim_51792} to #hal.device.promise<@__device_3>
    %54372 = torch_c.from_builtin_tensor %54371 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54373 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51793 = arith.constant 1 : index
    %dim_51794 = tensor.dim %54373, %c1_51793 : tensor<4x?x4096xf16>
    %54374 = flow.tensor.transfer %54373 : tensor<4x?x4096xf16>{%dim_51794} to #hal.device.promise<@__device_3>
    %54375 = torch_c.from_builtin_tensor %54374 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54376 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51795 = arith.constant 1 : index
    %dim_51796 = tensor.dim %54376, %c1_51795 : tensor<4x?x4096xf16>
    %54377 = flow.tensor.transfer %54376 : tensor<4x?x4096xf16>{%dim_51796} to #hal.device.promise<@__device_3>
    %54378 = torch_c.from_builtin_tensor %54377 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54379 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51797 = arith.constant 1 : index
    %dim_51798 = tensor.dim %54379, %c1_51797 : tensor<4x?x4096xf16>
    %54380 = flow.tensor.transfer %54379 : tensor<4x?x4096xf16>{%dim_51798} to #hal.device.promise<@__device_3>
    %54381 = torch_c.from_builtin_tensor %54380 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54382 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51799 = arith.constant 1 : index
    %dim_51800 = tensor.dim %54382, %c1_51799 : tensor<4x?x4096xf16>
    %54383 = flow.tensor.transfer %54382 : tensor<4x?x4096xf16>{%dim_51800} to #hal.device.promise<@__device_3>
    %54384 = torch_c.from_builtin_tensor %54383 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51801 = torch.constant.int 1
    %54385 = torch.aten.add.Tensor %54366, %54369, %int1_51801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51802 = torch.constant.int 1
    %54386 = torch.aten.add.Tensor %54385, %54372, %int1_51802 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51803 = torch.constant.int 1
    %54387 = torch.aten.add.Tensor %54386, %54251, %int1_51803 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51804 = torch.constant.int 1
    %54388 = torch.aten.add.Tensor %54387, %54375, %int1_51804 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51805 = torch.constant.int 1
    %54389 = torch.aten.add.Tensor %54388, %54378, %int1_51805 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51806 = torch.constant.int 1
    %54390 = torch.aten.add.Tensor %54389, %54381, %int1_51806 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51807 = torch.constant.int 1
    %54391 = torch.aten.add.Tensor %54390, %54384, %int1_51807 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54392 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51808 = arith.constant 1 : index
    %dim_51809 = tensor.dim %54392, %c1_51808 : tensor<4x?x4096xf16>
    %54393 = flow.tensor.transfer %54392 : tensor<4x?x4096xf16>{%dim_51809} to #hal.device.promise<@__device_4>
    %54394 = torch_c.from_builtin_tensor %54393 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54395 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51810 = arith.constant 1 : index
    %dim_51811 = tensor.dim %54395, %c1_51810 : tensor<4x?x4096xf16>
    %54396 = flow.tensor.transfer %54395 : tensor<4x?x4096xf16>{%dim_51811} to #hal.device.promise<@__device_4>
    %54397 = torch_c.from_builtin_tensor %54396 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54398 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51812 = arith.constant 1 : index
    %dim_51813 = tensor.dim %54398, %c1_51812 : tensor<4x?x4096xf16>
    %54399 = flow.tensor.transfer %54398 : tensor<4x?x4096xf16>{%dim_51813} to #hal.device.promise<@__device_4>
    %54400 = torch_c.from_builtin_tensor %54399 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54401 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51814 = arith.constant 1 : index
    %dim_51815 = tensor.dim %54401, %c1_51814 : tensor<4x?x4096xf16>
    %54402 = flow.tensor.transfer %54401 : tensor<4x?x4096xf16>{%dim_51815} to #hal.device.promise<@__device_4>
    %54403 = torch_c.from_builtin_tensor %54402 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54404 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51816 = arith.constant 1 : index
    %dim_51817 = tensor.dim %54404, %c1_51816 : tensor<4x?x4096xf16>
    %54405 = flow.tensor.transfer %54404 : tensor<4x?x4096xf16>{%dim_51817} to #hal.device.promise<@__device_4>
    %54406 = torch_c.from_builtin_tensor %54405 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54407 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51818 = arith.constant 1 : index
    %dim_51819 = tensor.dim %54407, %c1_51818 : tensor<4x?x4096xf16>
    %54408 = flow.tensor.transfer %54407 : tensor<4x?x4096xf16>{%dim_51819} to #hal.device.promise<@__device_4>
    %54409 = torch_c.from_builtin_tensor %54408 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54410 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51820 = arith.constant 1 : index
    %dim_51821 = tensor.dim %54410, %c1_51820 : tensor<4x?x4096xf16>
    %54411 = flow.tensor.transfer %54410 : tensor<4x?x4096xf16>{%dim_51821} to #hal.device.promise<@__device_4>
    %54412 = torch_c.from_builtin_tensor %54411 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51822 = torch.constant.int 1
    %54413 = torch.aten.add.Tensor %54394, %54397, %int1_51822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51823 = torch.constant.int 1
    %54414 = torch.aten.add.Tensor %54413, %54400, %int1_51823 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51824 = torch.constant.int 1
    %54415 = torch.aten.add.Tensor %54414, %54403, %int1_51824 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51825 = torch.constant.int 1
    %54416 = torch.aten.add.Tensor %54415, %54258, %int1_51825 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51826 = torch.constant.int 1
    %54417 = torch.aten.add.Tensor %54416, %54406, %int1_51826 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51827 = torch.constant.int 1
    %54418 = torch.aten.add.Tensor %54417, %54409, %int1_51827 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51828 = torch.constant.int 1
    %54419 = torch.aten.add.Tensor %54418, %54412, %int1_51828 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54420 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51829 = arith.constant 1 : index
    %dim_51830 = tensor.dim %54420, %c1_51829 : tensor<4x?x4096xf16>
    %54421 = flow.tensor.transfer %54420 : tensor<4x?x4096xf16>{%dim_51830} to #hal.device.promise<@__device_5>
    %54422 = torch_c.from_builtin_tensor %54421 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54423 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51831 = arith.constant 1 : index
    %dim_51832 = tensor.dim %54423, %c1_51831 : tensor<4x?x4096xf16>
    %54424 = flow.tensor.transfer %54423 : tensor<4x?x4096xf16>{%dim_51832} to #hal.device.promise<@__device_5>
    %54425 = torch_c.from_builtin_tensor %54424 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54426 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51833 = arith.constant 1 : index
    %dim_51834 = tensor.dim %54426, %c1_51833 : tensor<4x?x4096xf16>
    %54427 = flow.tensor.transfer %54426 : tensor<4x?x4096xf16>{%dim_51834} to #hal.device.promise<@__device_5>
    %54428 = torch_c.from_builtin_tensor %54427 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54429 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51835 = arith.constant 1 : index
    %dim_51836 = tensor.dim %54429, %c1_51835 : tensor<4x?x4096xf16>
    %54430 = flow.tensor.transfer %54429 : tensor<4x?x4096xf16>{%dim_51836} to #hal.device.promise<@__device_5>
    %54431 = torch_c.from_builtin_tensor %54430 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54432 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51837 = arith.constant 1 : index
    %dim_51838 = tensor.dim %54432, %c1_51837 : tensor<4x?x4096xf16>
    %54433 = flow.tensor.transfer %54432 : tensor<4x?x4096xf16>{%dim_51838} to #hal.device.promise<@__device_5>
    %54434 = torch_c.from_builtin_tensor %54433 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54435 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51839 = arith.constant 1 : index
    %dim_51840 = tensor.dim %54435, %c1_51839 : tensor<4x?x4096xf16>
    %54436 = flow.tensor.transfer %54435 : tensor<4x?x4096xf16>{%dim_51840} to #hal.device.promise<@__device_5>
    %54437 = torch_c.from_builtin_tensor %54436 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54438 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51841 = arith.constant 1 : index
    %dim_51842 = tensor.dim %54438, %c1_51841 : tensor<4x?x4096xf16>
    %54439 = flow.tensor.transfer %54438 : tensor<4x?x4096xf16>{%dim_51842} to #hal.device.promise<@__device_5>
    %54440 = torch_c.from_builtin_tensor %54439 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51843 = torch.constant.int 1
    %54441 = torch.aten.add.Tensor %54422, %54425, %int1_51843 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51844 = torch.constant.int 1
    %54442 = torch.aten.add.Tensor %54441, %54428, %int1_51844 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51845 = torch.constant.int 1
    %54443 = torch.aten.add.Tensor %54442, %54431, %int1_51845 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51846 = torch.constant.int 1
    %54444 = torch.aten.add.Tensor %54443, %54434, %int1_51846 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51847 = torch.constant.int 1
    %54445 = torch.aten.add.Tensor %54444, %54265, %int1_51847 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51848 = torch.constant.int 1
    %54446 = torch.aten.add.Tensor %54445, %54437, %int1_51848 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51849 = torch.constant.int 1
    %54447 = torch.aten.add.Tensor %54446, %54440, %int1_51849 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54448 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51850 = arith.constant 1 : index
    %dim_51851 = tensor.dim %54448, %c1_51850 : tensor<4x?x4096xf16>
    %54449 = flow.tensor.transfer %54448 : tensor<4x?x4096xf16>{%dim_51851} to #hal.device.promise<@__device_6>
    %54450 = torch_c.from_builtin_tensor %54449 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54451 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51852 = arith.constant 1 : index
    %dim_51853 = tensor.dim %54451, %c1_51852 : tensor<4x?x4096xf16>
    %54452 = flow.tensor.transfer %54451 : tensor<4x?x4096xf16>{%dim_51853} to #hal.device.promise<@__device_6>
    %54453 = torch_c.from_builtin_tensor %54452 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54454 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51854 = arith.constant 1 : index
    %dim_51855 = tensor.dim %54454, %c1_51854 : tensor<4x?x4096xf16>
    %54455 = flow.tensor.transfer %54454 : tensor<4x?x4096xf16>{%dim_51855} to #hal.device.promise<@__device_6>
    %54456 = torch_c.from_builtin_tensor %54455 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54456, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54457 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51856 = arith.constant 1 : index
    %dim_51857 = tensor.dim %54457, %c1_51856 : tensor<4x?x4096xf16>
    %54458 = flow.tensor.transfer %54457 : tensor<4x?x4096xf16>{%dim_51857} to #hal.device.promise<@__device_6>
    %54459 = torch_c.from_builtin_tensor %54458 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54460 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51858 = arith.constant 1 : index
    %dim_51859 = tensor.dim %54460, %c1_51858 : tensor<4x?x4096xf16>
    %54461 = flow.tensor.transfer %54460 : tensor<4x?x4096xf16>{%dim_51859} to #hal.device.promise<@__device_6>
    %54462 = torch_c.from_builtin_tensor %54461 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54463 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51860 = arith.constant 1 : index
    %dim_51861 = tensor.dim %54463, %c1_51860 : tensor<4x?x4096xf16>
    %54464 = flow.tensor.transfer %54463 : tensor<4x?x4096xf16>{%dim_51861} to #hal.device.promise<@__device_6>
    %54465 = torch_c.from_builtin_tensor %54464 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54466 = torch_c.to_builtin_tensor %54279 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51862 = arith.constant 1 : index
    %dim_51863 = tensor.dim %54466, %c1_51862 : tensor<4x?x4096xf16>
    %54467 = flow.tensor.transfer %54466 : tensor<4x?x4096xf16>{%dim_51863} to #hal.device.promise<@__device_6>
    %54468 = torch_c.from_builtin_tensor %54467 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51864 = torch.constant.int 1
    %54469 = torch.aten.add.Tensor %54450, %54453, %int1_51864 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51865 = torch.constant.int 1
    %54470 = torch.aten.add.Tensor %54469, %54456, %int1_51865 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51866 = torch.constant.int 1
    %54471 = torch.aten.add.Tensor %54470, %54459, %int1_51866 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51867 = torch.constant.int 1
    %54472 = torch.aten.add.Tensor %54471, %54462, %int1_51867 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51868 = torch.constant.int 1
    %54473 = torch.aten.add.Tensor %54472, %54465, %int1_51868 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51869 = torch.constant.int 1
    %54474 = torch.aten.add.Tensor %54473, %54272, %int1_51869 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51870 = torch.constant.int 1
    %54475 = torch.aten.add.Tensor %54474, %54468, %int1_51870 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54476 = torch_c.to_builtin_tensor %54230 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51871 = arith.constant 1 : index
    %dim_51872 = tensor.dim %54476, %c1_51871 : tensor<4x?x4096xf16>
    %54477 = flow.tensor.transfer %54476 : tensor<4x?x4096xf16>{%dim_51872} to #hal.device.promise<@__device_7>
    %54478 = torch_c.from_builtin_tensor %54477 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54479 = torch_c.to_builtin_tensor %54237 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51873 = arith.constant 1 : index
    %dim_51874 = tensor.dim %54479, %c1_51873 : tensor<4x?x4096xf16>
    %54480 = flow.tensor.transfer %54479 : tensor<4x?x4096xf16>{%dim_51874} to #hal.device.promise<@__device_7>
    %54481 = torch_c.from_builtin_tensor %54480 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54482 = torch_c.to_builtin_tensor %54244 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51875 = arith.constant 1 : index
    %dim_51876 = tensor.dim %54482, %c1_51875 : tensor<4x?x4096xf16>
    %54483 = flow.tensor.transfer %54482 : tensor<4x?x4096xf16>{%dim_51876} to #hal.device.promise<@__device_7>
    %54484 = torch_c.from_builtin_tensor %54483 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54485 = torch_c.to_builtin_tensor %54251 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51877 = arith.constant 1 : index
    %dim_51878 = tensor.dim %54485, %c1_51877 : tensor<4x?x4096xf16>
    %54486 = flow.tensor.transfer %54485 : tensor<4x?x4096xf16>{%dim_51878} to #hal.device.promise<@__device_7>
    %54487 = torch_c.from_builtin_tensor %54486 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54488 = torch_c.to_builtin_tensor %54258 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51879 = arith.constant 1 : index
    %dim_51880 = tensor.dim %54488, %c1_51879 : tensor<4x?x4096xf16>
    %54489 = flow.tensor.transfer %54488 : tensor<4x?x4096xf16>{%dim_51880} to #hal.device.promise<@__device_7>
    %54490 = torch_c.from_builtin_tensor %54489 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54491 = torch_c.to_builtin_tensor %54265 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51881 = arith.constant 1 : index
    %dim_51882 = tensor.dim %54491, %c1_51881 : tensor<4x?x4096xf16>
    %54492 = flow.tensor.transfer %54491 : tensor<4x?x4096xf16>{%dim_51882} to #hal.device.promise<@__device_7>
    %54493 = torch_c.from_builtin_tensor %54492 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %54494 = torch_c.to_builtin_tensor %54272 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_51883 = arith.constant 1 : index
    %dim_51884 = tensor.dim %54494, %c1_51883 : tensor<4x?x4096xf16>
    %54495 = flow.tensor.transfer %54494 : tensor<4x?x4096xf16>{%dim_51884} to #hal.device.promise<@__device_7>
    %54496 = torch_c.from_builtin_tensor %54495 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51885 = torch.constant.int 1
    %54497 = torch.aten.add.Tensor %54478, %54481, %int1_51885 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51886 = torch.constant.int 1
    %54498 = torch.aten.add.Tensor %54497, %54484, %int1_51886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51887 = torch.constant.int 1
    %54499 = torch.aten.add.Tensor %54498, %54487, %int1_51887 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51888 = torch.constant.int 1
    %54500 = torch.aten.add.Tensor %54499, %54490, %int1_51888 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51889 = torch.constant.int 1
    %54501 = torch.aten.add.Tensor %54500, %54493, %int1_51889 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51890 = torch.constant.int 1
    %54502 = torch.aten.add.Tensor %54501, %54496, %int1_51890 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51891 = torch.constant.int 1
    %54503 = torch.aten.add.Tensor %54502, %54279, %int1_51891 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51892 = torch.constant.int 1
    %54504 = torch.aten.add.Tensor %53984, %54307, %int1_51892 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51893 = torch.constant.int 1
    %54505 = torch.aten.add.Tensor %53985, %54335, %int1_51893 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51894 = torch.constant.int 1
    %54506 = torch.aten.add.Tensor %53986, %54363, %int1_51894 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51895 = torch.constant.int 1
    %54507 = torch.aten.add.Tensor %53987, %54391, %int1_51895 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51896 = torch.constant.int 1
    %54508 = torch.aten.add.Tensor %53988, %54419, %int1_51896 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51897 = torch.constant.int 1
    %54509 = torch.aten.add.Tensor %53989, %54447, %int1_51897 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51898 = torch.constant.int 1
    %54510 = torch.aten.add.Tensor %53990, %54475, %int1_51898 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51899 = torch.constant.int 1
    %54511 = torch.aten.add.Tensor %53991, %54503, %int1_51899 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_51900 = torch.constant.int 6
    %54512 = torch.prims.convert_element_type %54504, %int6_51900 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51901 = torch.constant.int 6
    %54513 = torch.prims.convert_element_type %54505, %int6_51901 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51902 = torch.constant.int 6
    %54514 = torch.prims.convert_element_type %54506, %int6_51902 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51903 = torch.constant.int 6
    %54515 = torch.prims.convert_element_type %54507, %int6_51903 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51904 = torch.constant.int 6
    %54516 = torch.prims.convert_element_type %54508, %int6_51904 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51905 = torch.constant.int 6
    %54517 = torch.prims.convert_element_type %54509, %int6_51905 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51906 = torch.constant.int 6
    %54518 = torch.prims.convert_element_type %54510, %int6_51906 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_51907 = torch.constant.int 6
    %54519 = torch.prims.convert_element_type %54511, %int6_51907 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51908 = torch.constant.int 2
    %54520 = torch.aten.pow.Tensor_Scalar %54512, %int2_51908 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54520, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51909 = torch.constant.int 2
    %54521 = torch.aten.pow.Tensor_Scalar %54513, %int2_51909 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51910 = torch.constant.int 2
    %54522 = torch.aten.pow.Tensor_Scalar %54514, %int2_51910 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51911 = torch.constant.int 2
    %54523 = torch.aten.pow.Tensor_Scalar %54515, %int2_51911 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51912 = torch.constant.int 2
    %54524 = torch.aten.pow.Tensor_Scalar %54516, %int2_51912 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51913 = torch.constant.int 2
    %54525 = torch.aten.pow.Tensor_Scalar %54517, %int2_51913 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51914 = torch.constant.int 2
    %54526 = torch.aten.pow.Tensor_Scalar %54518, %int2_51914 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54526, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_51915 = torch.constant.int 2
    %54527 = torch.aten.pow.Tensor_Scalar %54519, %int2_51915 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_51916 = torch.constant.int -1
    %54528 = torch.prim.ListConstruct %int-1_51916 : (!torch.int) -> !torch.list<int>
    %true_51917 = torch.constant.bool true
    %none_51918 = torch.constant.none
    %54529 = torch.aten.mean.dim %54520, %54528, %true_51917, %none_51918 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51919 = torch.constant.int -1
    %54530 = torch.prim.ListConstruct %int-1_51919 : (!torch.int) -> !torch.list<int>
    %true_51920 = torch.constant.bool true
    %none_51921 = torch.constant.none
    %54531 = torch.aten.mean.dim %54521, %54530, %true_51920, %none_51921 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51922 = torch.constant.int -1
    %54532 = torch.prim.ListConstruct %int-1_51922 : (!torch.int) -> !torch.list<int>
    %true_51923 = torch.constant.bool true
    %none_51924 = torch.constant.none
    %54533 = torch.aten.mean.dim %54522, %54532, %true_51923, %none_51924 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51925 = torch.constant.int -1
    %54534 = torch.prim.ListConstruct %int-1_51925 : (!torch.int) -> !torch.list<int>
    %true_51926 = torch.constant.bool true
    %none_51927 = torch.constant.none
    %54535 = torch.aten.mean.dim %54523, %54534, %true_51926, %none_51927 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51928 = torch.constant.int -1
    %54536 = torch.prim.ListConstruct %int-1_51928 : (!torch.int) -> !torch.list<int>
    %true_51929 = torch.constant.bool true
    %none_51930 = torch.constant.none
    %54537 = torch.aten.mean.dim %54524, %54536, %true_51929, %none_51930 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51931 = torch.constant.int -1
    %54538 = torch.prim.ListConstruct %int-1_51931 : (!torch.int) -> !torch.list<int>
    %true_51932 = torch.constant.bool true
    %none_51933 = torch.constant.none
    %54539 = torch.aten.mean.dim %54525, %54538, %true_51932, %none_51933 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51934 = torch.constant.int -1
    %54540 = torch.prim.ListConstruct %int-1_51934 : (!torch.int) -> !torch.list<int>
    %true_51935 = torch.constant.bool true
    %none_51936 = torch.constant.none
    %54541 = torch.aten.mean.dim %54526, %54540, %true_51935, %none_51936 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_51937 = torch.constant.int -1
    %54542 = torch.prim.ListConstruct %int-1_51937 : (!torch.int) -> !torch.list<int>
    %true_51938 = torch.constant.bool true
    %none_51939 = torch.constant.none
    %54543 = torch.aten.mean.dim %54527, %54542, %true_51938, %none_51939 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51940 = torch.constant.float 9.9999997473787516E-6
    %int1_51941 = torch.constant.int 1
    %54544 = torch.aten.add.Scalar %54529, %float9.999990e-06_51940, %int1_51941 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51942 = torch.constant.float 9.9999997473787516E-6
    %int1_51943 = torch.constant.int 1
    %54545 = torch.aten.add.Scalar %54531, %float9.999990e-06_51942, %int1_51943 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51944 = torch.constant.float 9.9999997473787516E-6
    %int1_51945 = torch.constant.int 1
    %54546 = torch.aten.add.Scalar %54533, %float9.999990e-06_51944, %int1_51945 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51946 = torch.constant.float 9.9999997473787516E-6
    %int1_51947 = torch.constant.int 1
    %54547 = torch.aten.add.Scalar %54535, %float9.999990e-06_51946, %int1_51947 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51948 = torch.constant.float 9.9999997473787516E-6
    %int1_51949 = torch.constant.int 1
    %54548 = torch.aten.add.Scalar %54537, %float9.999990e-06_51948, %int1_51949 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51950 = torch.constant.float 9.9999997473787516E-6
    %int1_51951 = torch.constant.int 1
    %54549 = torch.aten.add.Scalar %54539, %float9.999990e-06_51950, %int1_51951 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51952 = torch.constant.float 9.9999997473787516E-6
    %int1_51953 = torch.constant.int 1
    %54550 = torch.aten.add.Scalar %54541, %float9.999990e-06_51952, %int1_51953 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_51954 = torch.constant.float 9.9999997473787516E-6
    %int1_51955 = torch.constant.int 1
    %54551 = torch.aten.add.Scalar %54543, %float9.999990e-06_51954, %int1_51955 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54551, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54552 = torch.aten.rsqrt %54544 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54553 = torch.aten.rsqrt %54545 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54554 = torch.aten.rsqrt %54546 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54555 = torch.aten.rsqrt %54547 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54556 = torch.aten.rsqrt %54548 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54557 = torch.aten.rsqrt %54549 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54557, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54558 = torch.aten.rsqrt %54550 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54559 = torch.aten.rsqrt %54551 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %54559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %54560 = torch.aten.mul.Tensor %54512, %54552 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54561 = torch.aten.mul.Tensor %54513, %54553 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54562 = torch.aten.mul.Tensor %54514, %54554 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54563 = torch.aten.mul.Tensor %54515, %54555 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54564 = torch.aten.mul.Tensor %54516, %54556 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54565 = torch.aten.mul.Tensor %54517, %54557 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54566 = torch.aten.mul.Tensor %54518, %54558 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54567 = torch.aten.mul.Tensor %54519, %54559 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54568 = torch.aten.mul.Tensor %2024, %54560 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54569 = torch.aten.mul.Tensor %2025, %54561 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54570 = torch.aten.mul.Tensor %2026, %54562 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54571 = torch.aten.mul.Tensor %2027, %54563 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54572 = torch.aten.mul.Tensor %2028, %54564 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54573 = torch.aten.mul.Tensor %2029, %54565 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54574 = torch.aten.mul.Tensor %2030, %54566 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %54575 = torch.aten.mul.Tensor %2031, %54567 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %54575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_51956 = torch.constant.int 5
    %54576 = torch.prims.convert_element_type %54568, %int5_51956 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51957 = torch.constant.int 5
    %54577 = torch.prims.convert_element_type %54569, %int5_51957 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51958 = torch.constant.int 5
    %54578 = torch.prims.convert_element_type %54570, %int5_51958 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51959 = torch.constant.int 5
    %54579 = torch.prims.convert_element_type %54571, %int5_51959 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51960 = torch.constant.int 5
    %54580 = torch.prims.convert_element_type %54572, %int5_51960 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51961 = torch.constant.int 5
    %54581 = torch.prims.convert_element_type %54573, %int5_51961 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51962 = torch.constant.int 5
    %54582 = torch.prims.convert_element_type %54574, %int5_51962 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_51963 = torch.constant.int 5
    %54583 = torch.prims.convert_element_type %54575, %int5_51963 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %54583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_51964 = torch.constant.int 1
    %int0_51965 = torch.constant.int 0
    %54584 = torch.prim.ListConstruct %int1_51964, %int0_51965 : (!torch.int, !torch.int) -> !torch.list<int>
    %54585 = torch.aten.permute %2032, %54584 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51966 = torch.constant.int 1
    %int0_51967 = torch.constant.int 0
    %54586 = torch.prim.ListConstruct %int1_51966, %int0_51967 : (!torch.int, !torch.int) -> !torch.list<int>
    %54587 = torch.aten.permute %2033, %54586 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51968 = torch.constant.int 1
    %int0_51969 = torch.constant.int 0
    %54588 = torch.prim.ListConstruct %int1_51968, %int0_51969 : (!torch.int, !torch.int) -> !torch.list<int>
    %54589 = torch.aten.permute %2034, %54588 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51970 = torch.constant.int 1
    %int0_51971 = torch.constant.int 0
    %54590 = torch.prim.ListConstruct %int1_51970, %int0_51971 : (!torch.int, !torch.int) -> !torch.list<int>
    %54591 = torch.aten.permute %2035, %54590 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51972 = torch.constant.int 1
    %int0_51973 = torch.constant.int 0
    %54592 = torch.prim.ListConstruct %int1_51972, %int0_51973 : (!torch.int, !torch.int) -> !torch.list<int>
    %54593 = torch.aten.permute %2036, %54592 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51974 = torch.constant.int 1
    %int0_51975 = torch.constant.int 0
    %54594 = torch.prim.ListConstruct %int1_51974, %int0_51975 : (!torch.int, !torch.int) -> !torch.list<int>
    %54595 = torch.aten.permute %2037, %54594 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51976 = torch.constant.int 1
    %int0_51977 = torch.constant.int 0
    %54596 = torch.prim.ListConstruct %int1_51976, %int0_51977 : (!torch.int, !torch.int) -> !torch.list<int>
    %54597 = torch.aten.permute %2038, %54596 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_51978 = torch.constant.int 1
    %int0_51979 = torch.constant.int 0
    %54598 = torch.prim.ListConstruct %int1_51978, %int0_51979 : (!torch.int, !torch.int) -> !torch.list<int>
    %54599 = torch.aten.permute %2039, %54598 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_51980 = torch.constant.int 4
    %54600 = torch.aten.mul.int %int4_51980, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51981 = torch.constant.int 4096
    %54601 = torch.prim.ListConstruct %54600, %int4096_51981 : (!torch.int, !torch.int) -> !torch.list<int>
    %54602 = torch.aten.view %54576, %54601 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54602, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54603 = torch.aten.mm %54602, %54585 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54603, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_51982 = torch.constant.int 4
    %int512_51983 = torch.constant.int 512
    %54604 = torch.prim.ListConstruct %int4_51982, %2482, %int512_51983 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54605 = torch.aten.view %54603, %54604 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51984 = torch.constant.int 4
    %54606 = torch.aten.mul.int %int4_51984, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51985 = torch.constant.int 4096
    %54607 = torch.prim.ListConstruct %54606, %int4096_51985 : (!torch.int, !torch.int) -> !torch.list<int>
    %54608 = torch.aten.view %54577, %54607 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54608, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54609 = torch.aten.mm %54608, %54587 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54609, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_51986 = torch.constant.int 4
    %int512_51987 = torch.constant.int 512
    %54610 = torch.prim.ListConstruct %int4_51986, %2482, %int512_51987 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54611 = torch.aten.view %54609, %54610 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51988 = torch.constant.int 4
    %54612 = torch.aten.mul.int %int4_51988, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51989 = torch.constant.int 4096
    %54613 = torch.prim.ListConstruct %54612, %int4096_51989 : (!torch.int, !torch.int) -> !torch.list<int>
    %54614 = torch.aten.view %54578, %54613 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54614, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54615 = torch.aten.mm %54614, %54589 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54615, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_51990 = torch.constant.int 4
    %int512_51991 = torch.constant.int 512
    %54616 = torch.prim.ListConstruct %int4_51990, %2482, %int512_51991 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54617 = torch.aten.view %54615, %54616 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51992 = torch.constant.int 4
    %54618 = torch.aten.mul.int %int4_51992, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51993 = torch.constant.int 4096
    %54619 = torch.prim.ListConstruct %54618, %int4096_51993 : (!torch.int, !torch.int) -> !torch.list<int>
    %54620 = torch.aten.view %54579, %54619 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54620, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54621 = torch.aten.mm %54620, %54591 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54621, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_51994 = torch.constant.int 4
    %int512_51995 = torch.constant.int 512
    %54622 = torch.prim.ListConstruct %int4_51994, %2482, %int512_51995 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54623 = torch.aten.view %54621, %54622 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_51996 = torch.constant.int 4
    %54624 = torch.aten.mul.int %int4_51996, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_51997 = torch.constant.int 4096
    %54625 = torch.prim.ListConstruct %54624, %int4096_51997 : (!torch.int, !torch.int) -> !torch.list<int>
    %54626 = torch.aten.view %54580, %54625 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54626, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54627 = torch.aten.mm %54626, %54593 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54627, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_51998 = torch.constant.int 4
    %int512_51999 = torch.constant.int 512
    %54628 = torch.prim.ListConstruct %int4_51998, %2482, %int512_51999 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54629 = torch.aten.view %54627, %54628 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_52000 = torch.constant.int 4
    %54630 = torch.aten.mul.int %int4_52000, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52001 = torch.constant.int 4096
    %54631 = torch.prim.ListConstruct %54630, %int4096_52001 : (!torch.int, !torch.int) -> !torch.list<int>
    %54632 = torch.aten.view %54581, %54631 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54632, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54633 = torch.aten.mm %54632, %54595 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54633, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_52002 = torch.constant.int 4
    %int512_52003 = torch.constant.int 512
    %54634 = torch.prim.ListConstruct %int4_52002, %2482, %int512_52003 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54635 = torch.aten.view %54633, %54634 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_52004 = torch.constant.int 4
    %54636 = torch.aten.mul.int %int4_52004, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52005 = torch.constant.int 4096
    %54637 = torch.prim.ListConstruct %54636, %int4096_52005 : (!torch.int, !torch.int) -> !torch.list<int>
    %54638 = torch.aten.view %54582, %54637 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54638, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54639 = torch.aten.mm %54638, %54597 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54639, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_52006 = torch.constant.int 4
    %int512_52007 = torch.constant.int 512
    %54640 = torch.prim.ListConstruct %int4_52006, %2482, %int512_52007 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54641 = torch.aten.view %54639, %54640 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_52008 = torch.constant.int 4
    %54642 = torch.aten.mul.int %int4_52008, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52009 = torch.constant.int 4096
    %54643 = torch.prim.ListConstruct %54642, %int4096_52009 : (!torch.int, !torch.int) -> !torch.list<int>
    %54644 = torch.aten.view %54583, %54643 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54644, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54645 = torch.aten.mm %54644, %54599 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %54645, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_52010 = torch.constant.int 4
    %int512_52011 = torch.constant.int 512
    %54646 = torch.prim.ListConstruct %int4_52010, %2482, %int512_52011 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54647 = torch.aten.view %54645, %54646 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %54647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_52012 = torch.constant.int 1
    %int0_52013 = torch.constant.int 0
    %54648 = torch.prim.ListConstruct %int1_52012, %int0_52013 : (!torch.int, !torch.int) -> !torch.list<int>
    %54649 = torch.aten.permute %2040, %54648 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52014 = torch.constant.int 1
    %int0_52015 = torch.constant.int 0
    %54650 = torch.prim.ListConstruct %int1_52014, %int0_52015 : (!torch.int, !torch.int) -> !torch.list<int>
    %54651 = torch.aten.permute %2041, %54650 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52016 = torch.constant.int 1
    %int0_52017 = torch.constant.int 0
    %54652 = torch.prim.ListConstruct %int1_52016, %int0_52017 : (!torch.int, !torch.int) -> !torch.list<int>
    %54653 = torch.aten.permute %2042, %54652 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52018 = torch.constant.int 1
    %int0_52019 = torch.constant.int 0
    %54654 = torch.prim.ListConstruct %int1_52018, %int0_52019 : (!torch.int, !torch.int) -> !torch.list<int>
    %54655 = torch.aten.permute %2043, %54654 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52020 = torch.constant.int 1
    %int0_52021 = torch.constant.int 0
    %54656 = torch.prim.ListConstruct %int1_52020, %int0_52021 : (!torch.int, !torch.int) -> !torch.list<int>
    %54657 = torch.aten.permute %2044, %54656 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52022 = torch.constant.int 1
    %int0_52023 = torch.constant.int 0
    %54658 = torch.prim.ListConstruct %int1_52022, %int0_52023 : (!torch.int, !torch.int) -> !torch.list<int>
    %54659 = torch.aten.permute %2045, %54658 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52024 = torch.constant.int 1
    %int0_52025 = torch.constant.int 0
    %54660 = torch.prim.ListConstruct %int1_52024, %int0_52025 : (!torch.int, !torch.int) -> !torch.list<int>
    %54661 = torch.aten.permute %2046, %54660 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52026 = torch.constant.int 1
    %int0_52027 = torch.constant.int 0
    %54662 = torch.prim.ListConstruct %int1_52026, %int0_52027 : (!torch.int, !torch.int) -> !torch.list<int>
    %54663 = torch.aten.permute %2047, %54662 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_52028 = torch.constant.int 4
    %54664 = torch.aten.mul.int %int4_52028, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52029 = torch.constant.int 4096
    %54665 = torch.prim.ListConstruct %54664, %int4096_52029 : (!torch.int, !torch.int) -> !torch.list<int>
    %54666 = torch.aten.view %54576, %54665 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54666, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54667 = torch.aten.mm %54666, %54649 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54667, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52030 = torch.constant.int 4
    %int128_52031 = torch.constant.int 128
    %54668 = torch.prim.ListConstruct %int4_52030, %2482, %int128_52031 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54669 = torch.aten.view %54667, %54668 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52032 = torch.constant.int 4
    %54670 = torch.aten.mul.int %int4_52032, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52033 = torch.constant.int 4096
    %54671 = torch.prim.ListConstruct %54670, %int4096_52033 : (!torch.int, !torch.int) -> !torch.list<int>
    %54672 = torch.aten.view %54577, %54671 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54672, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54673 = torch.aten.mm %54672, %54651 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54673, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52034 = torch.constant.int 4
    %int128_52035 = torch.constant.int 128
    %54674 = torch.prim.ListConstruct %int4_52034, %2482, %int128_52035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54675 = torch.aten.view %54673, %54674 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52036 = torch.constant.int 4
    %54676 = torch.aten.mul.int %int4_52036, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52037 = torch.constant.int 4096
    %54677 = torch.prim.ListConstruct %54676, %int4096_52037 : (!torch.int, !torch.int) -> !torch.list<int>
    %54678 = torch.aten.view %54578, %54677 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54678, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54679 = torch.aten.mm %54678, %54653 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54679, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52038 = torch.constant.int 4
    %int128_52039 = torch.constant.int 128
    %54680 = torch.prim.ListConstruct %int4_52038, %2482, %int128_52039 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54681 = torch.aten.view %54679, %54680 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52040 = torch.constant.int 4
    %54682 = torch.aten.mul.int %int4_52040, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52041 = torch.constant.int 4096
    %54683 = torch.prim.ListConstruct %54682, %int4096_52041 : (!torch.int, !torch.int) -> !torch.list<int>
    %54684 = torch.aten.view %54579, %54683 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54684, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54685 = torch.aten.mm %54684, %54655 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54685, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52042 = torch.constant.int 4
    %int128_52043 = torch.constant.int 128
    %54686 = torch.prim.ListConstruct %int4_52042, %2482, %int128_52043 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54687 = torch.aten.view %54685, %54686 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52044 = torch.constant.int 4
    %54688 = torch.aten.mul.int %int4_52044, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52045 = torch.constant.int 4096
    %54689 = torch.prim.ListConstruct %54688, %int4096_52045 : (!torch.int, !torch.int) -> !torch.list<int>
    %54690 = torch.aten.view %54580, %54689 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54690, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54691 = torch.aten.mm %54690, %54657 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54691, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52046 = torch.constant.int 4
    %int128_52047 = torch.constant.int 128
    %54692 = torch.prim.ListConstruct %int4_52046, %2482, %int128_52047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54693 = torch.aten.view %54691, %54692 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54693, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52048 = torch.constant.int 4
    %54694 = torch.aten.mul.int %int4_52048, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52049 = torch.constant.int 4096
    %54695 = torch.prim.ListConstruct %54694, %int4096_52049 : (!torch.int, !torch.int) -> !torch.list<int>
    %54696 = torch.aten.view %54581, %54695 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54696, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54697 = torch.aten.mm %54696, %54659 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54697, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52050 = torch.constant.int 4
    %int128_52051 = torch.constant.int 128
    %54698 = torch.prim.ListConstruct %int4_52050, %2482, %int128_52051 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54699 = torch.aten.view %54697, %54698 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52052 = torch.constant.int 4
    %54700 = torch.aten.mul.int %int4_52052, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52053 = torch.constant.int 4096
    %54701 = torch.prim.ListConstruct %54700, %int4096_52053 : (!torch.int, !torch.int) -> !torch.list<int>
    %54702 = torch.aten.view %54582, %54701 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54702, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54703 = torch.aten.mm %54702, %54661 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54703, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52054 = torch.constant.int 4
    %int128_52055 = torch.constant.int 128
    %54704 = torch.prim.ListConstruct %int4_52054, %2482, %int128_52055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54705 = torch.aten.view %54703, %54704 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52056 = torch.constant.int 4
    %54706 = torch.aten.mul.int %int4_52056, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52057 = torch.constant.int 4096
    %54707 = torch.prim.ListConstruct %54706, %int4096_52057 : (!torch.int, !torch.int) -> !torch.list<int>
    %54708 = torch.aten.view %54583, %54707 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54708, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54709 = torch.aten.mm %54708, %54663 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54709, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52058 = torch.constant.int 4
    %int128_52059 = torch.constant.int 128
    %54710 = torch.prim.ListConstruct %int4_52058, %2482, %int128_52059 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54711 = torch.aten.view %54709, %54710 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_52060 = torch.constant.int 1
    %int0_52061 = torch.constant.int 0
    %54712 = torch.prim.ListConstruct %int1_52060, %int0_52061 : (!torch.int, !torch.int) -> !torch.list<int>
    %54713 = torch.aten.permute %2048, %54712 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52062 = torch.constant.int 1
    %int0_52063 = torch.constant.int 0
    %54714 = torch.prim.ListConstruct %int1_52062, %int0_52063 : (!torch.int, !torch.int) -> !torch.list<int>
    %54715 = torch.aten.permute %2049, %54714 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52064 = torch.constant.int 1
    %int0_52065 = torch.constant.int 0
    %54716 = torch.prim.ListConstruct %int1_52064, %int0_52065 : (!torch.int, !torch.int) -> !torch.list<int>
    %54717 = torch.aten.permute %2050, %54716 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52066 = torch.constant.int 1
    %int0_52067 = torch.constant.int 0
    %54718 = torch.prim.ListConstruct %int1_52066, %int0_52067 : (!torch.int, !torch.int) -> !torch.list<int>
    %54719 = torch.aten.permute %2051, %54718 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52068 = torch.constant.int 1
    %int0_52069 = torch.constant.int 0
    %54720 = torch.prim.ListConstruct %int1_52068, %int0_52069 : (!torch.int, !torch.int) -> !torch.list<int>
    %54721 = torch.aten.permute %2052, %54720 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52070 = torch.constant.int 1
    %int0_52071 = torch.constant.int 0
    %54722 = torch.prim.ListConstruct %int1_52070, %int0_52071 : (!torch.int, !torch.int) -> !torch.list<int>
    %54723 = torch.aten.permute %2053, %54722 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52072 = torch.constant.int 1
    %int0_52073 = torch.constant.int 0
    %54724 = torch.prim.ListConstruct %int1_52072, %int0_52073 : (!torch.int, !torch.int) -> !torch.list<int>
    %54725 = torch.aten.permute %2054, %54724 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_52074 = torch.constant.int 1
    %int0_52075 = torch.constant.int 0
    %54726 = torch.prim.ListConstruct %int1_52074, %int0_52075 : (!torch.int, !torch.int) -> !torch.list<int>
    %54727 = torch.aten.permute %2055, %54726 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_52076 = torch.constant.int 4
    %54728 = torch.aten.mul.int %int4_52076, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52077 = torch.constant.int 4096
    %54729 = torch.prim.ListConstruct %54728, %int4096_52077 : (!torch.int, !torch.int) -> !torch.list<int>
    %54730 = torch.aten.view %54576, %54729 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54730, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54731 = torch.aten.mm %54730, %54713 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54731, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52078 = torch.constant.int 4
    %int128_52079 = torch.constant.int 128
    %54732 = torch.prim.ListConstruct %int4_52078, %2482, %int128_52079 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54733 = torch.aten.view %54731, %54732 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52080 = torch.constant.int 4
    %54734 = torch.aten.mul.int %int4_52080, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52081 = torch.constant.int 4096
    %54735 = torch.prim.ListConstruct %54734, %int4096_52081 : (!torch.int, !torch.int) -> !torch.list<int>
    %54736 = torch.aten.view %54577, %54735 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54736, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54737 = torch.aten.mm %54736, %54715 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54737, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52082 = torch.constant.int 4
    %int128_52083 = torch.constant.int 128
    %54738 = torch.prim.ListConstruct %int4_52082, %2482, %int128_52083 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54739 = torch.aten.view %54737, %54738 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52084 = torch.constant.int 4
    %54740 = torch.aten.mul.int %int4_52084, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52085 = torch.constant.int 4096
    %54741 = torch.prim.ListConstruct %54740, %int4096_52085 : (!torch.int, !torch.int) -> !torch.list<int>
    %54742 = torch.aten.view %54578, %54741 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54742, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54743 = torch.aten.mm %54742, %54717 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54743, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52086 = torch.constant.int 4
    %int128_52087 = torch.constant.int 128
    %54744 = torch.prim.ListConstruct %int4_52086, %2482, %int128_52087 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54745 = torch.aten.view %54743, %54744 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52088 = torch.constant.int 4
    %54746 = torch.aten.mul.int %int4_52088, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52089 = torch.constant.int 4096
    %54747 = torch.prim.ListConstruct %54746, %int4096_52089 : (!torch.int, !torch.int) -> !torch.list<int>
    %54748 = torch.aten.view %54579, %54747 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54748, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54749 = torch.aten.mm %54748, %54719 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54749, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52090 = torch.constant.int 4
    %int128_52091 = torch.constant.int 128
    %54750 = torch.prim.ListConstruct %int4_52090, %2482, %int128_52091 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54751 = torch.aten.view %54749, %54750 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52092 = torch.constant.int 4
    %54752 = torch.aten.mul.int %int4_52092, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52093 = torch.constant.int 4096
    %54753 = torch.prim.ListConstruct %54752, %int4096_52093 : (!torch.int, !torch.int) -> !torch.list<int>
    %54754 = torch.aten.view %54580, %54753 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54754, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54755 = torch.aten.mm %54754, %54721 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54755, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52094 = torch.constant.int 4
    %int128_52095 = torch.constant.int 128
    %54756 = torch.prim.ListConstruct %int4_52094, %2482, %int128_52095 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54757 = torch.aten.view %54755, %54756 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52096 = torch.constant.int 4
    %54758 = torch.aten.mul.int %int4_52096, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52097 = torch.constant.int 4096
    %54759 = torch.prim.ListConstruct %54758, %int4096_52097 : (!torch.int, !torch.int) -> !torch.list<int>
    %54760 = torch.aten.view %54581, %54759 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54760, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54761 = torch.aten.mm %54760, %54723 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54761, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52098 = torch.constant.int 4
    %int128_52099 = torch.constant.int 128
    %54762 = torch.prim.ListConstruct %int4_52098, %2482, %int128_52099 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54763 = torch.aten.view %54761, %54762 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52100 = torch.constant.int 4
    %54764 = torch.aten.mul.int %int4_52100, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52101 = torch.constant.int 4096
    %54765 = torch.prim.ListConstruct %54764, %int4096_52101 : (!torch.int, !torch.int) -> !torch.list<int>
    %54766 = torch.aten.view %54582, %54765 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54766, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54767 = torch.aten.mm %54766, %54725 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54767, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52102 = torch.constant.int 4
    %int128_52103 = torch.constant.int 128
    %54768 = torch.prim.ListConstruct %int4_52102, %2482, %int128_52103 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54769 = torch.aten.view %54767, %54768 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52104 = torch.constant.int 4
    %54770 = torch.aten.mul.int %int4_52104, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_52105 = torch.constant.int 4096
    %54771 = torch.prim.ListConstruct %54770, %int4096_52105 : (!torch.int, !torch.int) -> !torch.list<int>
    %54772 = torch.aten.view %54583, %54771 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %54772, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %54773 = torch.aten.mm %54772, %54727 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %54773, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_52106 = torch.constant.int 4
    %int128_52107 = torch.constant.int 128
    %54774 = torch.prim.ListConstruct %int4_52106, %2482, %int128_52107 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54775 = torch.aten.view %54773, %54774 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %54775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_52108 = torch.constant.int 4
    %int4_52109 = torch.constant.int 4
    %int128_52110 = torch.constant.int 128
    %54776 = torch.prim.ListConstruct %int4_52108, %2482, %int4_52109, %int128_52110 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54777 = torch.aten.view %54605, %54776 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52111 = torch.constant.int 4
    %int4_52112 = torch.constant.int 4
    %int128_52113 = torch.constant.int 128
    %54778 = torch.prim.ListConstruct %int4_52111, %2482, %int4_52112, %int128_52113 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54779 = torch.aten.view %54611, %54778 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52114 = torch.constant.int 4
    %int4_52115 = torch.constant.int 4
    %int128_52116 = torch.constant.int 128
    %54780 = torch.prim.ListConstruct %int4_52114, %2482, %int4_52115, %int128_52116 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54781 = torch.aten.view %54617, %54780 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52117 = torch.constant.int 4
    %int4_52118 = torch.constant.int 4
    %int128_52119 = torch.constant.int 128
    %54782 = torch.prim.ListConstruct %int4_52117, %2482, %int4_52118, %int128_52119 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54783 = torch.aten.view %54623, %54782 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52120 = torch.constant.int 4
    %int4_52121 = torch.constant.int 4
    %int128_52122 = torch.constant.int 128
    %54784 = torch.prim.ListConstruct %int4_52120, %2482, %int4_52121, %int128_52122 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54785 = torch.aten.view %54629, %54784 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52123 = torch.constant.int 4
    %int4_52124 = torch.constant.int 4
    %int128_52125 = torch.constant.int 128
    %54786 = torch.prim.ListConstruct %int4_52123, %2482, %int4_52124, %int128_52125 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54787 = torch.aten.view %54635, %54786 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52126 = torch.constant.int 4
    %int4_52127 = torch.constant.int 4
    %int128_52128 = torch.constant.int 128
    %54788 = torch.prim.ListConstruct %int4_52126, %2482, %int4_52127, %int128_52128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54789 = torch.aten.view %54641, %54788 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52129 = torch.constant.int 4
    %int4_52130 = torch.constant.int 4
    %int128_52131 = torch.constant.int 128
    %54790 = torch.prim.ListConstruct %int4_52129, %2482, %int4_52130, %int128_52131 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54791 = torch.aten.view %54647, %54790 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52132 = torch.constant.int 4
    %int1_52133 = torch.constant.int 1
    %int128_52134 = torch.constant.int 128
    %54792 = torch.prim.ListConstruct %int4_52132, %2482, %int1_52133, %int128_52134 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54793 = torch.aten.view %54669, %54792 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52135 = torch.constant.int 4
    %int1_52136 = torch.constant.int 1
    %int128_52137 = torch.constant.int 128
    %54794 = torch.prim.ListConstruct %int4_52135, %2482, %int1_52136, %int128_52137 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54795 = torch.aten.view %54675, %54794 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54795, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52138 = torch.constant.int 4
    %int1_52139 = torch.constant.int 1
    %int128_52140 = torch.constant.int 128
    %54796 = torch.prim.ListConstruct %int4_52138, %2482, %int1_52139, %int128_52140 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54797 = torch.aten.view %54681, %54796 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52141 = torch.constant.int 4
    %int1_52142 = torch.constant.int 1
    %int128_52143 = torch.constant.int 128
    %54798 = torch.prim.ListConstruct %int4_52141, %2482, %int1_52142, %int128_52143 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54799 = torch.aten.view %54687, %54798 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54799, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52144 = torch.constant.int 4
    %int1_52145 = torch.constant.int 1
    %int128_52146 = torch.constant.int 128
    %54800 = torch.prim.ListConstruct %int4_52144, %2482, %int1_52145, %int128_52146 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54801 = torch.aten.view %54693, %54800 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52147 = torch.constant.int 4
    %int1_52148 = torch.constant.int 1
    %int128_52149 = torch.constant.int 128
    %54802 = torch.prim.ListConstruct %int4_52147, %2482, %int1_52148, %int128_52149 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54803 = torch.aten.view %54699, %54802 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52150 = torch.constant.int 4
    %int1_52151 = torch.constant.int 1
    %int128_52152 = torch.constant.int 128
    %54804 = torch.prim.ListConstruct %int4_52150, %2482, %int1_52151, %int128_52152 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54805 = torch.aten.view %54705, %54804 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52153 = torch.constant.int 4
    %int1_52154 = torch.constant.int 1
    %int128_52155 = torch.constant.int 128
    %54806 = torch.prim.ListConstruct %int4_52153, %2482, %int1_52154, %int128_52155 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54807 = torch.aten.view %54711, %54806 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52156 = torch.constant.int 4
    %int1_52157 = torch.constant.int 1
    %int128_52158 = torch.constant.int 128
    %54808 = torch.prim.ListConstruct %int4_52156, %2482, %int1_52157, %int128_52158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54809 = torch.aten.view %54733, %54808 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52159 = torch.constant.int 4
    %int1_52160 = torch.constant.int 1
    %int128_52161 = torch.constant.int 128
    %54810 = torch.prim.ListConstruct %int4_52159, %2482, %int1_52160, %int128_52161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54811 = torch.aten.view %54739, %54810 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52162 = torch.constant.int 4
    %int1_52163 = torch.constant.int 1
    %int128_52164 = torch.constant.int 128
    %54812 = torch.prim.ListConstruct %int4_52162, %2482, %int1_52163, %int128_52164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54813 = torch.aten.view %54745, %54812 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52165 = torch.constant.int 4
    %int1_52166 = torch.constant.int 1
    %int128_52167 = torch.constant.int 128
    %54814 = torch.prim.ListConstruct %int4_52165, %2482, %int1_52166, %int128_52167 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54815 = torch.aten.view %54751, %54814 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52168 = torch.constant.int 4
    %int1_52169 = torch.constant.int 1
    %int128_52170 = torch.constant.int 128
    %54816 = torch.prim.ListConstruct %int4_52168, %2482, %int1_52169, %int128_52170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54817 = torch.aten.view %54757, %54816 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54817, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52171 = torch.constant.int 4
    %int1_52172 = torch.constant.int 1
    %int128_52173 = torch.constant.int 128
    %54818 = torch.prim.ListConstruct %int4_52171, %2482, %int1_52172, %int128_52173 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54819 = torch.aten.view %54763, %54818 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52174 = torch.constant.int 4
    %int1_52175 = torch.constant.int 1
    %int128_52176 = torch.constant.int 128
    %54820 = torch.prim.ListConstruct %int4_52174, %2482, %int1_52175, %int128_52176 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54821 = torch.aten.view %54769, %54820 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54821, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_52177 = torch.constant.int 4
    %int1_52178 = torch.constant.int 1
    %int128_52179 = torch.constant.int 128
    %54822 = torch.prim.ListConstruct %int4_52177, %2482, %int1_52178, %int128_52179 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %54823 = torch.aten.view %54775, %54822 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %54823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_52180 = torch.constant.int 131072
    %none_52181 = torch.constant.none
    %none_52182 = torch.constant.none
    %cpu_52183 = torch.constant.device "cpu"
    %false_52184 = torch.constant.bool false
    %54824 = torch.aten.arange %int131072_52180, %none_52181, %none_52182, %cpu_52183, %false_52184 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_52185 = torch.constant.int 0
    %int128_52186 = torch.constant.int 128
    %int2_52187 = torch.constant.int 2
    %none_52188 = torch.constant.none
    %none_52189 = torch.constant.none
    %cpu_52190 = torch.constant.device "cpu"
    %false_52191 = torch.constant.bool false
    %54825 = torch.aten.arange.start_step %int0_52185, %int128_52186, %int2_52187, %none_52188, %none_52189, %cpu_52190, %false_52191 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_52192 = torch.constant.int 0
    %int0_52193 = torch.constant.int 0
    %int64_52194 = torch.constant.int 64
    %int1_52195 = torch.constant.int 1
    %54826 = torch.aten.slice.Tensor %54825, %int0_52192, %int0_52193, %int64_52194, %int1_52195 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_52196 = torch.constant.int 6
    %54827 = torch.prims.convert_element_type %54826, %int6_52196 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_52197 = torch.constant.int 128
    %54828 = torch.aten.div.Scalar %54827, %int128_52197 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_52198 = torch.constant.float 5.000000e+05
    %54829 = torch.aten.pow.Scalar %float5.000000e05_52198, %54828 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %54830 = torch.aten.reciprocal %54829 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_52199 = torch.constant.float 1.000000e+00
    %54831 = torch.aten.mul.Scalar %54830, %float1.000000e00_52199 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_52200 = torch.constant.int 131072
    %int1_52201 = torch.constant.int 1
    %54832 = torch.prim.ListConstruct %int131072_52200, %int1_52201 : (!torch.int, !torch.int) -> !torch.list<int>
    %54833 = torch.aten.view %54824, %54832 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %54834 = torch.aten.mul.Tensor %54833, %54831 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %54835 = torch.aten.cos %54834 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %54836 = torch.aten.sin %54834 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %54837 = torch.aten.complex %54835, %54836 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %54838 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54839 = flow.tensor.transfer %54838 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %54840 = torch_c.from_builtin_tensor %54839 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54841 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54842 = flow.tensor.transfer %54841 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %54843 = torch_c.from_builtin_tensor %54842 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54844 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54845 = flow.tensor.transfer %54844 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %54846 = torch_c.from_builtin_tensor %54845 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54847 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54848 = flow.tensor.transfer %54847 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %54849 = torch_c.from_builtin_tensor %54848 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54850 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54851 = flow.tensor.transfer %54850 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %54852 = torch_c.from_builtin_tensor %54851 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54853 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54854 = flow.tensor.transfer %54853 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %54855 = torch_c.from_builtin_tensor %54854 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54856 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54857 = flow.tensor.transfer %54856 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %54858 = torch_c.from_builtin_tensor %54857 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54859 = torch_c.to_builtin_tensor %54837 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54860 = flow.tensor.transfer %54859 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %54861 = torch_c.from_builtin_tensor %54860 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_52202 = torch.constant.int 1
    %54862 = torch.aten.size.int %54605, %int1_52202 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52203 = torch.constant.int 0
    %54863 = torch.aten.add.int %int0_52203, %54862 : !torch.int, !torch.int -> !torch.int
    %int0_52204 = torch.constant.int 0
    %int0_52205 = torch.constant.int 0
    %int1_52206 = torch.constant.int 1
    %54864 = torch.aten.slice.Tensor %54840, %int0_52204, %int0_52205, %54863, %int1_52206 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54864, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52207 = torch.constant.int 1
    %int0_52208 = torch.constant.int 0
    %int9223372036854775807_52209 = torch.constant.int 9223372036854775807
    %int1_52210 = torch.constant.int 1
    %54865 = torch.aten.slice.Tensor %54864, %int1_52207, %int0_52208, %int9223372036854775807_52209, %int1_52210 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54865, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52211 = torch.constant.int 0
    %54866 = torch.aten.unsqueeze %54865, %int0_52211 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54866, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52212 = torch.constant.int 2
    %54867 = torch.aten.unsqueeze %54866, %int2_52212 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54867, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52213 = torch.constant.int 3
    %int0_52214 = torch.constant.int 0
    %int9223372036854775807_52215 = torch.constant.int 9223372036854775807
    %int1_52216 = torch.constant.int 1
    %54868 = torch.aten.slice.Tensor %54867, %int3_52213, %int0_52214, %int9223372036854775807_52215, %int1_52216 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54868, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54869 = torch_c.to_builtin_tensor %54777 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52217 = arith.constant 1 : index
    %dim_52218 = tensor.dim %54869, %c1_52217 : tensor<4x?x4x128xf16>
    %54870 = flow.tensor.bitcast %54869 : tensor<4x?x4x128xf16>{%dim_52218} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52218}
    %54871 = torch_c.from_builtin_tensor %54870 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54872 = torch.aten.mul.Tensor %54871, %54868 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54873 = torch_c.to_builtin_tensor %54872 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52219 = arith.constant 1 : index
    %dim_52220 = tensor.dim %54873, %c1_52219 : tensor<4x?x4x64xcomplex<f32>>
    %54874 = flow.tensor.bitcast %54873 : tensor<4x?x4x64xcomplex<f32>>{%dim_52220} -> tensor<4x?x4x128xf32>{%dim_52220}
    %54875 = torch_c.from_builtin_tensor %54874 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52221 = torch.constant.int 5
    %54876 = torch.prims.convert_element_type %54875, %int5_52221 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52222 = torch.constant.int 1
    %54877 = torch.aten.size.int %54611, %int1_52222 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52223 = torch.constant.int 0
    %54878 = torch.aten.add.int %int0_52223, %54877 : !torch.int, !torch.int -> !torch.int
    %int0_52224 = torch.constant.int 0
    %int0_52225 = torch.constant.int 0
    %int1_52226 = torch.constant.int 1
    %54879 = torch.aten.slice.Tensor %54843, %int0_52224, %int0_52225, %54878, %int1_52226 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54879, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52227 = torch.constant.int 1
    %int0_52228 = torch.constant.int 0
    %int9223372036854775807_52229 = torch.constant.int 9223372036854775807
    %int1_52230 = torch.constant.int 1
    %54880 = torch.aten.slice.Tensor %54879, %int1_52227, %int0_52228, %int9223372036854775807_52229, %int1_52230 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54880, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52231 = torch.constant.int 0
    %54881 = torch.aten.unsqueeze %54880, %int0_52231 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54881, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52232 = torch.constant.int 2
    %54882 = torch.aten.unsqueeze %54881, %int2_52232 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54882, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52233 = torch.constant.int 3
    %int0_52234 = torch.constant.int 0
    %int9223372036854775807_52235 = torch.constant.int 9223372036854775807
    %int1_52236 = torch.constant.int 1
    %54883 = torch.aten.slice.Tensor %54882, %int3_52233, %int0_52234, %int9223372036854775807_52235, %int1_52236 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54883, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54884 = torch_c.to_builtin_tensor %54779 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52237 = arith.constant 1 : index
    %dim_52238 = tensor.dim %54884, %c1_52237 : tensor<4x?x4x128xf16>
    %54885 = flow.tensor.bitcast %54884 : tensor<4x?x4x128xf16>{%dim_52238} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52238}
    %54886 = torch_c.from_builtin_tensor %54885 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54887 = torch.aten.mul.Tensor %54886, %54883 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54888 = torch_c.to_builtin_tensor %54887 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52239 = arith.constant 1 : index
    %dim_52240 = tensor.dim %54888, %c1_52239 : tensor<4x?x4x64xcomplex<f32>>
    %54889 = flow.tensor.bitcast %54888 : tensor<4x?x4x64xcomplex<f32>>{%dim_52240} -> tensor<4x?x4x128xf32>{%dim_52240}
    %54890 = torch_c.from_builtin_tensor %54889 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52241 = torch.constant.int 5
    %54891 = torch.prims.convert_element_type %54890, %int5_52241 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52242 = torch.constant.int 1
    %54892 = torch.aten.size.int %54617, %int1_52242 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52243 = torch.constant.int 0
    %54893 = torch.aten.add.int %int0_52243, %54892 : !torch.int, !torch.int -> !torch.int
    %int0_52244 = torch.constant.int 0
    %int0_52245 = torch.constant.int 0
    %int1_52246 = torch.constant.int 1
    %54894 = torch.aten.slice.Tensor %54846, %int0_52244, %int0_52245, %54893, %int1_52246 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54894, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52247 = torch.constant.int 1
    %int0_52248 = torch.constant.int 0
    %int9223372036854775807_52249 = torch.constant.int 9223372036854775807
    %int1_52250 = torch.constant.int 1
    %54895 = torch.aten.slice.Tensor %54894, %int1_52247, %int0_52248, %int9223372036854775807_52249, %int1_52250 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54895, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52251 = torch.constant.int 0
    %54896 = torch.aten.unsqueeze %54895, %int0_52251 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54896, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52252 = torch.constant.int 2
    %54897 = torch.aten.unsqueeze %54896, %int2_52252 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54897, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52253 = torch.constant.int 3
    %int0_52254 = torch.constant.int 0
    %int9223372036854775807_52255 = torch.constant.int 9223372036854775807
    %int1_52256 = torch.constant.int 1
    %54898 = torch.aten.slice.Tensor %54897, %int3_52253, %int0_52254, %int9223372036854775807_52255, %int1_52256 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54898, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54899 = torch_c.to_builtin_tensor %54781 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52257 = arith.constant 1 : index
    %dim_52258 = tensor.dim %54899, %c1_52257 : tensor<4x?x4x128xf16>
    %54900 = flow.tensor.bitcast %54899 : tensor<4x?x4x128xf16>{%dim_52258} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52258}
    %54901 = torch_c.from_builtin_tensor %54900 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54902 = torch.aten.mul.Tensor %54901, %54898 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54903 = torch_c.to_builtin_tensor %54902 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52259 = arith.constant 1 : index
    %dim_52260 = tensor.dim %54903, %c1_52259 : tensor<4x?x4x64xcomplex<f32>>
    %54904 = flow.tensor.bitcast %54903 : tensor<4x?x4x64xcomplex<f32>>{%dim_52260} -> tensor<4x?x4x128xf32>{%dim_52260}
    %54905 = torch_c.from_builtin_tensor %54904 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52261 = torch.constant.int 5
    %54906 = torch.prims.convert_element_type %54905, %int5_52261 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52262 = torch.constant.int 1
    %54907 = torch.aten.size.int %54623, %int1_52262 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52263 = torch.constant.int 0
    %54908 = torch.aten.add.int %int0_52263, %54907 : !torch.int, !torch.int -> !torch.int
    %int0_52264 = torch.constant.int 0
    %int0_52265 = torch.constant.int 0
    %int1_52266 = torch.constant.int 1
    %54909 = torch.aten.slice.Tensor %54849, %int0_52264, %int0_52265, %54908, %int1_52266 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54909, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52267 = torch.constant.int 1
    %int0_52268 = torch.constant.int 0
    %int9223372036854775807_52269 = torch.constant.int 9223372036854775807
    %int1_52270 = torch.constant.int 1
    %54910 = torch.aten.slice.Tensor %54909, %int1_52267, %int0_52268, %int9223372036854775807_52269, %int1_52270 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54910, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52271 = torch.constant.int 0
    %54911 = torch.aten.unsqueeze %54910, %int0_52271 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54911, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52272 = torch.constant.int 2
    %54912 = torch.aten.unsqueeze %54911, %int2_52272 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54912, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52273 = torch.constant.int 3
    %int0_52274 = torch.constant.int 0
    %int9223372036854775807_52275 = torch.constant.int 9223372036854775807
    %int1_52276 = torch.constant.int 1
    %54913 = torch.aten.slice.Tensor %54912, %int3_52273, %int0_52274, %int9223372036854775807_52275, %int1_52276 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54913, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54914 = torch_c.to_builtin_tensor %54783 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52277 = arith.constant 1 : index
    %dim_52278 = tensor.dim %54914, %c1_52277 : tensor<4x?x4x128xf16>
    %54915 = flow.tensor.bitcast %54914 : tensor<4x?x4x128xf16>{%dim_52278} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52278}
    %54916 = torch_c.from_builtin_tensor %54915 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54917 = torch.aten.mul.Tensor %54916, %54913 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54918 = torch_c.to_builtin_tensor %54917 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52279 = arith.constant 1 : index
    %dim_52280 = tensor.dim %54918, %c1_52279 : tensor<4x?x4x64xcomplex<f32>>
    %54919 = flow.tensor.bitcast %54918 : tensor<4x?x4x64xcomplex<f32>>{%dim_52280} -> tensor<4x?x4x128xf32>{%dim_52280}
    %54920 = torch_c.from_builtin_tensor %54919 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52281 = torch.constant.int 5
    %54921 = torch.prims.convert_element_type %54920, %int5_52281 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52282 = torch.constant.int 1
    %54922 = torch.aten.size.int %54629, %int1_52282 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52283 = torch.constant.int 0
    %54923 = torch.aten.add.int %int0_52283, %54922 : !torch.int, !torch.int -> !torch.int
    %int0_52284 = torch.constant.int 0
    %int0_52285 = torch.constant.int 0
    %int1_52286 = torch.constant.int 1
    %54924 = torch.aten.slice.Tensor %54852, %int0_52284, %int0_52285, %54923, %int1_52286 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54924, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52287 = torch.constant.int 1
    %int0_52288 = torch.constant.int 0
    %int9223372036854775807_52289 = torch.constant.int 9223372036854775807
    %int1_52290 = torch.constant.int 1
    %54925 = torch.aten.slice.Tensor %54924, %int1_52287, %int0_52288, %int9223372036854775807_52289, %int1_52290 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54925, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52291 = torch.constant.int 0
    %54926 = torch.aten.unsqueeze %54925, %int0_52291 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54926, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52292 = torch.constant.int 2
    %54927 = torch.aten.unsqueeze %54926, %int2_52292 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54927, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52293 = torch.constant.int 3
    %int0_52294 = torch.constant.int 0
    %int9223372036854775807_52295 = torch.constant.int 9223372036854775807
    %int1_52296 = torch.constant.int 1
    %54928 = torch.aten.slice.Tensor %54927, %int3_52293, %int0_52294, %int9223372036854775807_52295, %int1_52296 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54928, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54929 = torch_c.to_builtin_tensor %54785 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52297 = arith.constant 1 : index
    %dim_52298 = tensor.dim %54929, %c1_52297 : tensor<4x?x4x128xf16>
    %54930 = flow.tensor.bitcast %54929 : tensor<4x?x4x128xf16>{%dim_52298} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52298}
    %54931 = torch_c.from_builtin_tensor %54930 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54932 = torch.aten.mul.Tensor %54931, %54928 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54932, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54933 = torch_c.to_builtin_tensor %54932 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52299 = arith.constant 1 : index
    %dim_52300 = tensor.dim %54933, %c1_52299 : tensor<4x?x4x64xcomplex<f32>>
    %54934 = flow.tensor.bitcast %54933 : tensor<4x?x4x64xcomplex<f32>>{%dim_52300} -> tensor<4x?x4x128xf32>{%dim_52300}
    %54935 = torch_c.from_builtin_tensor %54934 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52301 = torch.constant.int 5
    %54936 = torch.prims.convert_element_type %54935, %int5_52301 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52302 = torch.constant.int 1
    %54937 = torch.aten.size.int %54635, %int1_52302 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52303 = torch.constant.int 0
    %54938 = torch.aten.add.int %int0_52303, %54937 : !torch.int, !torch.int -> !torch.int
    %int0_52304 = torch.constant.int 0
    %int0_52305 = torch.constant.int 0
    %int1_52306 = torch.constant.int 1
    %54939 = torch.aten.slice.Tensor %54855, %int0_52304, %int0_52305, %54938, %int1_52306 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54939, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52307 = torch.constant.int 1
    %int0_52308 = torch.constant.int 0
    %int9223372036854775807_52309 = torch.constant.int 9223372036854775807
    %int1_52310 = torch.constant.int 1
    %54940 = torch.aten.slice.Tensor %54939, %int1_52307, %int0_52308, %int9223372036854775807_52309, %int1_52310 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54940, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52311 = torch.constant.int 0
    %54941 = torch.aten.unsqueeze %54940, %int0_52311 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54941, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52312 = torch.constant.int 2
    %54942 = torch.aten.unsqueeze %54941, %int2_52312 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54942, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52313 = torch.constant.int 3
    %int0_52314 = torch.constant.int 0
    %int9223372036854775807_52315 = torch.constant.int 9223372036854775807
    %int1_52316 = torch.constant.int 1
    %54943 = torch.aten.slice.Tensor %54942, %int3_52313, %int0_52314, %int9223372036854775807_52315, %int1_52316 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54943, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54944 = torch_c.to_builtin_tensor %54787 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52317 = arith.constant 1 : index
    %dim_52318 = tensor.dim %54944, %c1_52317 : tensor<4x?x4x128xf16>
    %54945 = flow.tensor.bitcast %54944 : tensor<4x?x4x128xf16>{%dim_52318} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52318}
    %54946 = torch_c.from_builtin_tensor %54945 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54947 = torch.aten.mul.Tensor %54946, %54943 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54948 = torch_c.to_builtin_tensor %54947 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52319 = arith.constant 1 : index
    %dim_52320 = tensor.dim %54948, %c1_52319 : tensor<4x?x4x64xcomplex<f32>>
    %54949 = flow.tensor.bitcast %54948 : tensor<4x?x4x64xcomplex<f32>>{%dim_52320} -> tensor<4x?x4x128xf32>{%dim_52320}
    %54950 = torch_c.from_builtin_tensor %54949 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52321 = torch.constant.int 5
    %54951 = torch.prims.convert_element_type %54950, %int5_52321 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52322 = torch.constant.int 1
    %54952 = torch.aten.size.int %54641, %int1_52322 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52323 = torch.constant.int 0
    %54953 = torch.aten.add.int %int0_52323, %54952 : !torch.int, !torch.int -> !torch.int
    %int0_52324 = torch.constant.int 0
    %int0_52325 = torch.constant.int 0
    %int1_52326 = torch.constant.int 1
    %54954 = torch.aten.slice.Tensor %54858, %int0_52324, %int0_52325, %54953, %int1_52326 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54954, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52327 = torch.constant.int 1
    %int0_52328 = torch.constant.int 0
    %int9223372036854775807_52329 = torch.constant.int 9223372036854775807
    %int1_52330 = torch.constant.int 1
    %54955 = torch.aten.slice.Tensor %54954, %int1_52327, %int0_52328, %int9223372036854775807_52329, %int1_52330 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54955, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52331 = torch.constant.int 0
    %54956 = torch.aten.unsqueeze %54955, %int0_52331 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54956, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52332 = torch.constant.int 2
    %54957 = torch.aten.unsqueeze %54956, %int2_52332 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54957, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52333 = torch.constant.int 3
    %int0_52334 = torch.constant.int 0
    %int9223372036854775807_52335 = torch.constant.int 9223372036854775807
    %int1_52336 = torch.constant.int 1
    %54958 = torch.aten.slice.Tensor %54957, %int3_52333, %int0_52334, %int9223372036854775807_52335, %int1_52336 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54958, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54959 = torch_c.to_builtin_tensor %54789 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52337 = arith.constant 1 : index
    %dim_52338 = tensor.dim %54959, %c1_52337 : tensor<4x?x4x128xf16>
    %54960 = flow.tensor.bitcast %54959 : tensor<4x?x4x128xf16>{%dim_52338} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52338}
    %54961 = torch_c.from_builtin_tensor %54960 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54962 = torch.aten.mul.Tensor %54961, %54958 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54963 = torch_c.to_builtin_tensor %54962 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52339 = arith.constant 1 : index
    %dim_52340 = tensor.dim %54963, %c1_52339 : tensor<4x?x4x64xcomplex<f32>>
    %54964 = flow.tensor.bitcast %54963 : tensor<4x?x4x64xcomplex<f32>>{%dim_52340} -> tensor<4x?x4x128xf32>{%dim_52340}
    %54965 = torch_c.from_builtin_tensor %54964 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52341 = torch.constant.int 5
    %54966 = torch.prims.convert_element_type %54965, %int5_52341 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_52342 = torch.constant.int 1
    %54967 = torch.aten.size.int %54647, %int1_52342 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_52343 = torch.constant.int 0
    %54968 = torch.aten.add.int %int0_52343, %54967 : !torch.int, !torch.int -> !torch.int
    %int0_52344 = torch.constant.int 0
    %int0_52345 = torch.constant.int 0
    %int1_52346 = torch.constant.int 1
    %54969 = torch.aten.slice.Tensor %54861, %int0_52344, %int0_52345, %54968, %int1_52346 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54969, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52347 = torch.constant.int 1
    %int0_52348 = torch.constant.int 0
    %int9223372036854775807_52349 = torch.constant.int 9223372036854775807
    %int1_52350 = torch.constant.int 1
    %54970 = torch.aten.slice.Tensor %54969, %int1_52347, %int0_52348, %int9223372036854775807_52349, %int1_52350 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %54970, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52351 = torch.constant.int 0
    %54971 = torch.aten.unsqueeze %54970, %int0_52351 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %54971, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52352 = torch.constant.int 2
    %54972 = torch.aten.unsqueeze %54971, %int2_52352 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54972, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52353 = torch.constant.int 3
    %int0_52354 = torch.constant.int 0
    %int9223372036854775807_52355 = torch.constant.int 9223372036854775807
    %int1_52356 = torch.constant.int 1
    %54973 = torch.aten.slice.Tensor %54972, %int3_52353, %int0_52354, %int9223372036854775807_52355, %int1_52356 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %54973, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %54974 = torch_c.to_builtin_tensor %54791 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_52357 = arith.constant 1 : index
    %dim_52358 = tensor.dim %54974, %c1_52357 : tensor<4x?x4x128xf16>
    %54975 = flow.tensor.bitcast %54974 : tensor<4x?x4x128xf16>{%dim_52358} -> tensor<4x?x4x64xcomplex<f16>>{%dim_52358}
    %54976 = torch_c.from_builtin_tensor %54975 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %54976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %54977 = torch.aten.mul.Tensor %54976, %54973 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %54977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %54978 = torch_c.to_builtin_tensor %54977 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_52359 = arith.constant 1 : index
    %dim_52360 = tensor.dim %54978, %c1_52359 : tensor<4x?x4x64xcomplex<f32>>
    %54979 = flow.tensor.bitcast %54978 : tensor<4x?x4x64xcomplex<f32>>{%dim_52360} -> tensor<4x?x4x128xf32>{%dim_52360}
    %54980 = torch_c.from_builtin_tensor %54979 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %54980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_52361 = torch.constant.int 5
    %54981 = torch.prims.convert_element_type %54980, %int5_52361 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %54981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_52362 = torch.constant.int 131072
    %none_52363 = torch.constant.none
    %none_52364 = torch.constant.none
    %cpu_52365 = torch.constant.device "cpu"
    %false_52366 = torch.constant.bool false
    %54982 = torch.aten.arange %int131072_52362, %none_52363, %none_52364, %cpu_52365, %false_52366 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_52367 = torch.constant.int 0
    %int128_52368 = torch.constant.int 128
    %int2_52369 = torch.constant.int 2
    %none_52370 = torch.constant.none
    %none_52371 = torch.constant.none
    %cpu_52372 = torch.constant.device "cpu"
    %false_52373 = torch.constant.bool false
    %54983 = torch.aten.arange.start_step %int0_52367, %int128_52368, %int2_52369, %none_52370, %none_52371, %cpu_52372, %false_52373 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_52374 = torch.constant.int 0
    %int0_52375 = torch.constant.int 0
    %int64_52376 = torch.constant.int 64
    %int1_52377 = torch.constant.int 1
    %54984 = torch.aten.slice.Tensor %54983, %int0_52374, %int0_52375, %int64_52376, %int1_52377 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_52378 = torch.constant.int 6
    %54985 = torch.prims.convert_element_type %54984, %int6_52378 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_52379 = torch.constant.int 128
    %54986 = torch.aten.div.Scalar %54985, %int128_52379 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_52380 = torch.constant.float 5.000000e+05
    %54987 = torch.aten.pow.Scalar %float5.000000e05_52380, %54986 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %54988 = torch.aten.reciprocal %54987 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_52381 = torch.constant.float 1.000000e+00
    %54989 = torch.aten.mul.Scalar %54988, %float1.000000e00_52381 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_52382 = torch.constant.int 131072
    %int1_52383 = torch.constant.int 1
    %54990 = torch.prim.ListConstruct %int131072_52382, %int1_52383 : (!torch.int, !torch.int) -> !torch.list<int>
    %54991 = torch.aten.view %54982, %54990 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %54992 = torch.aten.mul.Tensor %54991, %54989 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %54993 = torch.aten.cos %54992 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %54994 = torch.aten.sin %54992 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %54995 = torch.aten.complex %54993, %54994 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %54996 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %54997 = flow.tensor.transfer %54996 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %54998 = torch_c.from_builtin_tensor %54997 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %54999 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55000 = flow.tensor.transfer %54999 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %55001 = torch_c.from_builtin_tensor %55000 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55002 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55003 = flow.tensor.transfer %55002 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %55004 = torch_c.from_builtin_tensor %55003 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55005 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55006 = flow.tensor.transfer %55005 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %55007 = torch_c.from_builtin_tensor %55006 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55008 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55009 = flow.tensor.transfer %55008 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %55010 = torch_c.from_builtin_tensor %55009 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55011 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55012 = flow.tensor.transfer %55011 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %55013 = torch_c.from_builtin_tensor %55012 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55014 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55015 = flow.tensor.transfer %55014 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %55016 = torch_c.from_builtin_tensor %55015 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %55017 = torch_c.to_builtin_tensor %54995 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %55018 = flow.tensor.transfer %55017 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %55019 = torch_c.from_builtin_tensor %55018 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_52384 = torch.constant.int 1
    %55020 = torch.aten.size.int %54669, %int1_52384 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52385 = torch.constant.int 0
    %55021 = torch.aten.add.int %int0_52385, %55020 : !torch.int, !torch.int -> !torch.int
    %int0_52386 = torch.constant.int 0
    %int0_52387 = torch.constant.int 0
    %int1_52388 = torch.constant.int 1
    %55022 = torch.aten.slice.Tensor %54998, %int0_52386, %int0_52387, %55021, %int1_52388 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55022, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52389 = torch.constant.int 1
    %int0_52390 = torch.constant.int 0
    %int9223372036854775807_52391 = torch.constant.int 9223372036854775807
    %int1_52392 = torch.constant.int 1
    %55023 = torch.aten.slice.Tensor %55022, %int1_52389, %int0_52390, %int9223372036854775807_52391, %int1_52392 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55023, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52393 = torch.constant.int 0
    %55024 = torch.aten.unsqueeze %55023, %int0_52393 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55024, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52394 = torch.constant.int 2
    %55025 = torch.aten.unsqueeze %55024, %int2_52394 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55025, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52395 = torch.constant.int 3
    %int0_52396 = torch.constant.int 0
    %int9223372036854775807_52397 = torch.constant.int 9223372036854775807
    %int1_52398 = torch.constant.int 1
    %55026 = torch.aten.slice.Tensor %55025, %int3_52395, %int0_52396, %int9223372036854775807_52397, %int1_52398 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55026, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55027 = torch_c.to_builtin_tensor %54793 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52399 = arith.constant 1 : index
    %dim_52400 = tensor.dim %55027, %c1_52399 : tensor<4x?x1x128xf16>
    %55028 = flow.tensor.bitcast %55027 : tensor<4x?x1x128xf16>{%dim_52400} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52400}
    %55029 = torch_c.from_builtin_tensor %55028 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55030 = torch.aten.mul.Tensor %55029, %55026 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55031 = torch_c.to_builtin_tensor %55030 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52401 = arith.constant 1 : index
    %dim_52402 = tensor.dim %55031, %c1_52401 : tensor<4x?x1x64xcomplex<f32>>
    %55032 = flow.tensor.bitcast %55031 : tensor<4x?x1x64xcomplex<f32>>{%dim_52402} -> tensor<4x?x1x128xf32>{%dim_52402}
    %55033 = torch_c.from_builtin_tensor %55032 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52403 = torch.constant.int 5
    %55034 = torch.prims.convert_element_type %55033, %int5_52403 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52404 = torch.constant.int 1
    %55035 = torch.aten.size.int %54675, %int1_52404 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52405 = torch.constant.int 0
    %55036 = torch.aten.add.int %int0_52405, %55035 : !torch.int, !torch.int -> !torch.int
    %int0_52406 = torch.constant.int 0
    %int0_52407 = torch.constant.int 0
    %int1_52408 = torch.constant.int 1
    %55037 = torch.aten.slice.Tensor %55001, %int0_52406, %int0_52407, %55036, %int1_52408 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55037, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52409 = torch.constant.int 1
    %int0_52410 = torch.constant.int 0
    %int9223372036854775807_52411 = torch.constant.int 9223372036854775807
    %int1_52412 = torch.constant.int 1
    %55038 = torch.aten.slice.Tensor %55037, %int1_52409, %int0_52410, %int9223372036854775807_52411, %int1_52412 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55038, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52413 = torch.constant.int 0
    %55039 = torch.aten.unsqueeze %55038, %int0_52413 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55039, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52414 = torch.constant.int 2
    %55040 = torch.aten.unsqueeze %55039, %int2_52414 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55040, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52415 = torch.constant.int 3
    %int0_52416 = torch.constant.int 0
    %int9223372036854775807_52417 = torch.constant.int 9223372036854775807
    %int1_52418 = torch.constant.int 1
    %55041 = torch.aten.slice.Tensor %55040, %int3_52415, %int0_52416, %int9223372036854775807_52417, %int1_52418 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55041, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55042 = torch_c.to_builtin_tensor %54795 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52419 = arith.constant 1 : index
    %dim_52420 = tensor.dim %55042, %c1_52419 : tensor<4x?x1x128xf16>
    %55043 = flow.tensor.bitcast %55042 : tensor<4x?x1x128xf16>{%dim_52420} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52420}
    %55044 = torch_c.from_builtin_tensor %55043 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55045 = torch.aten.mul.Tensor %55044, %55041 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55046 = torch_c.to_builtin_tensor %55045 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52421 = arith.constant 1 : index
    %dim_52422 = tensor.dim %55046, %c1_52421 : tensor<4x?x1x64xcomplex<f32>>
    %55047 = flow.tensor.bitcast %55046 : tensor<4x?x1x64xcomplex<f32>>{%dim_52422} -> tensor<4x?x1x128xf32>{%dim_52422}
    %55048 = torch_c.from_builtin_tensor %55047 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52423 = torch.constant.int 5
    %55049 = torch.prims.convert_element_type %55048, %int5_52423 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52424 = torch.constant.int 1
    %55050 = torch.aten.size.int %54681, %int1_52424 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52425 = torch.constant.int 0
    %55051 = torch.aten.add.int %int0_52425, %55050 : !torch.int, !torch.int -> !torch.int
    %int0_52426 = torch.constant.int 0
    %int0_52427 = torch.constant.int 0
    %int1_52428 = torch.constant.int 1
    %55052 = torch.aten.slice.Tensor %55004, %int0_52426, %int0_52427, %55051, %int1_52428 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55052, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52429 = torch.constant.int 1
    %int0_52430 = torch.constant.int 0
    %int9223372036854775807_52431 = torch.constant.int 9223372036854775807
    %int1_52432 = torch.constant.int 1
    %55053 = torch.aten.slice.Tensor %55052, %int1_52429, %int0_52430, %int9223372036854775807_52431, %int1_52432 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55053, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52433 = torch.constant.int 0
    %55054 = torch.aten.unsqueeze %55053, %int0_52433 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55054, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52434 = torch.constant.int 2
    %55055 = torch.aten.unsqueeze %55054, %int2_52434 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55055, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52435 = torch.constant.int 3
    %int0_52436 = torch.constant.int 0
    %int9223372036854775807_52437 = torch.constant.int 9223372036854775807
    %int1_52438 = torch.constant.int 1
    %55056 = torch.aten.slice.Tensor %55055, %int3_52435, %int0_52436, %int9223372036854775807_52437, %int1_52438 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55056, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55057 = torch_c.to_builtin_tensor %54797 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52439 = arith.constant 1 : index
    %dim_52440 = tensor.dim %55057, %c1_52439 : tensor<4x?x1x128xf16>
    %55058 = flow.tensor.bitcast %55057 : tensor<4x?x1x128xf16>{%dim_52440} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52440}
    %55059 = torch_c.from_builtin_tensor %55058 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55060 = torch.aten.mul.Tensor %55059, %55056 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55061 = torch_c.to_builtin_tensor %55060 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52441 = arith.constant 1 : index
    %dim_52442 = tensor.dim %55061, %c1_52441 : tensor<4x?x1x64xcomplex<f32>>
    %55062 = flow.tensor.bitcast %55061 : tensor<4x?x1x64xcomplex<f32>>{%dim_52442} -> tensor<4x?x1x128xf32>{%dim_52442}
    %55063 = torch_c.from_builtin_tensor %55062 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52443 = torch.constant.int 5
    %55064 = torch.prims.convert_element_type %55063, %int5_52443 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52444 = torch.constant.int 1
    %55065 = torch.aten.size.int %54687, %int1_52444 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52445 = torch.constant.int 0
    %55066 = torch.aten.add.int %int0_52445, %55065 : !torch.int, !torch.int -> !torch.int
    %int0_52446 = torch.constant.int 0
    %int0_52447 = torch.constant.int 0
    %int1_52448 = torch.constant.int 1
    %55067 = torch.aten.slice.Tensor %55007, %int0_52446, %int0_52447, %55066, %int1_52448 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55067, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52449 = torch.constant.int 1
    %int0_52450 = torch.constant.int 0
    %int9223372036854775807_52451 = torch.constant.int 9223372036854775807
    %int1_52452 = torch.constant.int 1
    %55068 = torch.aten.slice.Tensor %55067, %int1_52449, %int0_52450, %int9223372036854775807_52451, %int1_52452 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55068, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52453 = torch.constant.int 0
    %55069 = torch.aten.unsqueeze %55068, %int0_52453 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55069, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52454 = torch.constant.int 2
    %55070 = torch.aten.unsqueeze %55069, %int2_52454 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55070, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52455 = torch.constant.int 3
    %int0_52456 = torch.constant.int 0
    %int9223372036854775807_52457 = torch.constant.int 9223372036854775807
    %int1_52458 = torch.constant.int 1
    %55071 = torch.aten.slice.Tensor %55070, %int3_52455, %int0_52456, %int9223372036854775807_52457, %int1_52458 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55071, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55072 = torch_c.to_builtin_tensor %54799 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52459 = arith.constant 1 : index
    %dim_52460 = tensor.dim %55072, %c1_52459 : tensor<4x?x1x128xf16>
    %55073 = flow.tensor.bitcast %55072 : tensor<4x?x1x128xf16>{%dim_52460} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52460}
    %55074 = torch_c.from_builtin_tensor %55073 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55075 = torch.aten.mul.Tensor %55074, %55071 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55076 = torch_c.to_builtin_tensor %55075 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52461 = arith.constant 1 : index
    %dim_52462 = tensor.dim %55076, %c1_52461 : tensor<4x?x1x64xcomplex<f32>>
    %55077 = flow.tensor.bitcast %55076 : tensor<4x?x1x64xcomplex<f32>>{%dim_52462} -> tensor<4x?x1x128xf32>{%dim_52462}
    %55078 = torch_c.from_builtin_tensor %55077 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52463 = torch.constant.int 5
    %55079 = torch.prims.convert_element_type %55078, %int5_52463 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52464 = torch.constant.int 1
    %55080 = torch.aten.size.int %54693, %int1_52464 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52465 = torch.constant.int 0
    %55081 = torch.aten.add.int %int0_52465, %55080 : !torch.int, !torch.int -> !torch.int
    %int0_52466 = torch.constant.int 0
    %int0_52467 = torch.constant.int 0
    %int1_52468 = torch.constant.int 1
    %55082 = torch.aten.slice.Tensor %55010, %int0_52466, %int0_52467, %55081, %int1_52468 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55082, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52469 = torch.constant.int 1
    %int0_52470 = torch.constant.int 0
    %int9223372036854775807_52471 = torch.constant.int 9223372036854775807
    %int1_52472 = torch.constant.int 1
    %55083 = torch.aten.slice.Tensor %55082, %int1_52469, %int0_52470, %int9223372036854775807_52471, %int1_52472 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55083, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52473 = torch.constant.int 0
    %55084 = torch.aten.unsqueeze %55083, %int0_52473 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55084, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52474 = torch.constant.int 2
    %55085 = torch.aten.unsqueeze %55084, %int2_52474 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55085, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52475 = torch.constant.int 3
    %int0_52476 = torch.constant.int 0
    %int9223372036854775807_52477 = torch.constant.int 9223372036854775807
    %int1_52478 = torch.constant.int 1
    %55086 = torch.aten.slice.Tensor %55085, %int3_52475, %int0_52476, %int9223372036854775807_52477, %int1_52478 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55086, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55087 = torch_c.to_builtin_tensor %54801 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52479 = arith.constant 1 : index
    %dim_52480 = tensor.dim %55087, %c1_52479 : tensor<4x?x1x128xf16>
    %55088 = flow.tensor.bitcast %55087 : tensor<4x?x1x128xf16>{%dim_52480} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52480}
    %55089 = torch_c.from_builtin_tensor %55088 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55090 = torch.aten.mul.Tensor %55089, %55086 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55091 = torch_c.to_builtin_tensor %55090 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52481 = arith.constant 1 : index
    %dim_52482 = tensor.dim %55091, %c1_52481 : tensor<4x?x1x64xcomplex<f32>>
    %55092 = flow.tensor.bitcast %55091 : tensor<4x?x1x64xcomplex<f32>>{%dim_52482} -> tensor<4x?x1x128xf32>{%dim_52482}
    %55093 = torch_c.from_builtin_tensor %55092 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52483 = torch.constant.int 5
    %55094 = torch.prims.convert_element_type %55093, %int5_52483 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52484 = torch.constant.int 1
    %55095 = torch.aten.size.int %54699, %int1_52484 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52485 = torch.constant.int 0
    %55096 = torch.aten.add.int %int0_52485, %55095 : !torch.int, !torch.int -> !torch.int
    %int0_52486 = torch.constant.int 0
    %int0_52487 = torch.constant.int 0
    %int1_52488 = torch.constant.int 1
    %55097 = torch.aten.slice.Tensor %55013, %int0_52486, %int0_52487, %55096, %int1_52488 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55097, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52489 = torch.constant.int 1
    %int0_52490 = torch.constant.int 0
    %int9223372036854775807_52491 = torch.constant.int 9223372036854775807
    %int1_52492 = torch.constant.int 1
    %55098 = torch.aten.slice.Tensor %55097, %int1_52489, %int0_52490, %int9223372036854775807_52491, %int1_52492 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55098, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52493 = torch.constant.int 0
    %55099 = torch.aten.unsqueeze %55098, %int0_52493 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55099, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52494 = torch.constant.int 2
    %55100 = torch.aten.unsqueeze %55099, %int2_52494 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55100, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52495 = torch.constant.int 3
    %int0_52496 = torch.constant.int 0
    %int9223372036854775807_52497 = torch.constant.int 9223372036854775807
    %int1_52498 = torch.constant.int 1
    %55101 = torch.aten.slice.Tensor %55100, %int3_52495, %int0_52496, %int9223372036854775807_52497, %int1_52498 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55101, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55102 = torch_c.to_builtin_tensor %54803 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52499 = arith.constant 1 : index
    %dim_52500 = tensor.dim %55102, %c1_52499 : tensor<4x?x1x128xf16>
    %55103 = flow.tensor.bitcast %55102 : tensor<4x?x1x128xf16>{%dim_52500} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52500}
    %55104 = torch_c.from_builtin_tensor %55103 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55105 = torch.aten.mul.Tensor %55104, %55101 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55106 = torch_c.to_builtin_tensor %55105 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52501 = arith.constant 1 : index
    %dim_52502 = tensor.dim %55106, %c1_52501 : tensor<4x?x1x64xcomplex<f32>>
    %55107 = flow.tensor.bitcast %55106 : tensor<4x?x1x64xcomplex<f32>>{%dim_52502} -> tensor<4x?x1x128xf32>{%dim_52502}
    %55108 = torch_c.from_builtin_tensor %55107 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52503 = torch.constant.int 5
    %55109 = torch.prims.convert_element_type %55108, %int5_52503 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52504 = torch.constant.int 1
    %55110 = torch.aten.size.int %54705, %int1_52504 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52505 = torch.constant.int 0
    %55111 = torch.aten.add.int %int0_52505, %55110 : !torch.int, !torch.int -> !torch.int
    %int0_52506 = torch.constant.int 0
    %int0_52507 = torch.constant.int 0
    %int1_52508 = torch.constant.int 1
    %55112 = torch.aten.slice.Tensor %55016, %int0_52506, %int0_52507, %55111, %int1_52508 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55112, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52509 = torch.constant.int 1
    %int0_52510 = torch.constant.int 0
    %int9223372036854775807_52511 = torch.constant.int 9223372036854775807
    %int1_52512 = torch.constant.int 1
    %55113 = torch.aten.slice.Tensor %55112, %int1_52509, %int0_52510, %int9223372036854775807_52511, %int1_52512 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55113, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52513 = torch.constant.int 0
    %55114 = torch.aten.unsqueeze %55113, %int0_52513 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55114, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52514 = torch.constant.int 2
    %55115 = torch.aten.unsqueeze %55114, %int2_52514 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55115, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52515 = torch.constant.int 3
    %int0_52516 = torch.constant.int 0
    %int9223372036854775807_52517 = torch.constant.int 9223372036854775807
    %int1_52518 = torch.constant.int 1
    %55116 = torch.aten.slice.Tensor %55115, %int3_52515, %int0_52516, %int9223372036854775807_52517, %int1_52518 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55116, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55117 = torch_c.to_builtin_tensor %54805 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52519 = arith.constant 1 : index
    %dim_52520 = tensor.dim %55117, %c1_52519 : tensor<4x?x1x128xf16>
    %55118 = flow.tensor.bitcast %55117 : tensor<4x?x1x128xf16>{%dim_52520} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52520}
    %55119 = torch_c.from_builtin_tensor %55118 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55120 = torch.aten.mul.Tensor %55119, %55116 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55121 = torch_c.to_builtin_tensor %55120 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52521 = arith.constant 1 : index
    %dim_52522 = tensor.dim %55121, %c1_52521 : tensor<4x?x1x64xcomplex<f32>>
    %55122 = flow.tensor.bitcast %55121 : tensor<4x?x1x64xcomplex<f32>>{%dim_52522} -> tensor<4x?x1x128xf32>{%dim_52522}
    %55123 = torch_c.from_builtin_tensor %55122 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52523 = torch.constant.int 5
    %55124 = torch.prims.convert_element_type %55123, %int5_52523 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_52524 = torch.constant.int 1
    %55125 = torch.aten.size.int %54711, %int1_52524 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_52525 = torch.constant.int 0
    %55126 = torch.aten.add.int %int0_52525, %55125 : !torch.int, !torch.int -> !torch.int
    %int0_52526 = torch.constant.int 0
    %int0_52527 = torch.constant.int 0
    %int1_52528 = torch.constant.int 1
    %55127 = torch.aten.slice.Tensor %55019, %int0_52526, %int0_52527, %55126, %int1_52528 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55127, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_52529 = torch.constant.int 1
    %int0_52530 = torch.constant.int 0
    %int9223372036854775807_52531 = torch.constant.int 9223372036854775807
    %int1_52532 = torch.constant.int 1
    %55128 = torch.aten.slice.Tensor %55127, %int1_52529, %int0_52530, %int9223372036854775807_52531, %int1_52532 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %55128, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_52533 = torch.constant.int 0
    %55129 = torch.aten.unsqueeze %55128, %int0_52533 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %55129, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_52534 = torch.constant.int 2
    %55130 = torch.aten.unsqueeze %55129, %int2_52534 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55130, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_52535 = torch.constant.int 3
    %int0_52536 = torch.constant.int 0
    %int9223372036854775807_52537 = torch.constant.int 9223372036854775807
    %int1_52538 = torch.constant.int 1
    %55131 = torch.aten.slice.Tensor %55130, %int3_52535, %int0_52536, %int9223372036854775807_52537, %int1_52538 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55131, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %55132 = torch_c.to_builtin_tensor %54807 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_52539 = arith.constant 1 : index
    %dim_52540 = tensor.dim %55132, %c1_52539 : tensor<4x?x1x128xf16>
    %55133 = flow.tensor.bitcast %55132 : tensor<4x?x1x128xf16>{%dim_52540} -> tensor<4x?x1x64xcomplex<f16>>{%dim_52540}
    %55134 = torch_c.from_builtin_tensor %55133 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %55134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %55135 = torch.aten.mul.Tensor %55134, %55131 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %55135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %55136 = torch_c.to_builtin_tensor %55135 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_52541 = arith.constant 1 : index
    %dim_52542 = tensor.dim %55136, %c1_52541 : tensor<4x?x1x64xcomplex<f32>>
    %55137 = flow.tensor.bitcast %55136 : tensor<4x?x1x64xcomplex<f32>>{%dim_52542} -> tensor<4x?x1x128xf32>{%dim_52542}
    %55138 = torch_c.from_builtin_tensor %55137 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %55138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_52543 = torch.constant.int 5
    %55139 = torch.prims.convert_element_type %55138, %int5_52543 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %55139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_52544 = torch.constant.int 64
    %55140 = torch.aten.mul.Scalar %2364, %int64_52544 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55140, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52545 = torch.constant.int 64
    %55141 = torch.aten.mul.Scalar %2367, %int64_52545 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55141, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52546 = torch.constant.int 64
    %55142 = torch.aten.mul.Scalar %2370, %int64_52546 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55142, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52547 = torch.constant.int 64
    %55143 = torch.aten.mul.Scalar %2373, %int64_52547 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55143, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52548 = torch.constant.int 64
    %55144 = torch.aten.mul.Scalar %2376, %int64_52548 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55144, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52549 = torch.constant.int 64
    %55145 = torch.aten.mul.Scalar %2379, %int64_52549 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55145, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52550 = torch.constant.int 64
    %55146 = torch.aten.mul.Scalar %2382, %int64_52550 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55146, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_52551 = torch.constant.int 64
    %55147 = torch.aten.mul.Scalar %2385, %int64_52551 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55147, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56 = torch.constant.int 56
    %int1_52552 = torch.constant.int 1
    %55148 = torch.aten.add.Scalar %55140, %int56, %int1_52552 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55148, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52553 = torch.constant.int 56
    %int1_52554 = torch.constant.int 1
    %55149 = torch.aten.add.Scalar %55141, %int56_52553, %int1_52554 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55149, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52555 = torch.constant.int 56
    %int1_52556 = torch.constant.int 1
    %55150 = torch.aten.add.Scalar %55142, %int56_52555, %int1_52556 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55150, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52557 = torch.constant.int 56
    %int1_52558 = torch.constant.int 1
    %55151 = torch.aten.add.Scalar %55143, %int56_52557, %int1_52558 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55151, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52559 = torch.constant.int 56
    %int1_52560 = torch.constant.int 1
    %55152 = torch.aten.add.Scalar %55144, %int56_52559, %int1_52560 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55152, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52561 = torch.constant.int 56
    %int1_52562 = torch.constant.int 1
    %55153 = torch.aten.add.Scalar %55145, %int56_52561, %int1_52562 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55153, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52563 = torch.constant.int 56
    %int1_52564 = torch.constant.int 1
    %55154 = torch.aten.add.Scalar %55146, %int56_52563, %int1_52564 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55154, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int56_52565 = torch.constant.int 56
    %int1_52566 = torch.constant.int 1
    %55155 = torch.aten.add.Scalar %55147, %int56_52565, %int1_52566 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55155, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_52567 = torch.constant.int 4
    %int16_52568 = torch.constant.int 16
    %int1_52569 = torch.constant.int 1
    %int128_52570 = torch.constant.int 128
    %55156 = torch.prim.ListConstruct %int4_52567, %3095, %int16_52568, %int1_52569, %int128_52570 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55157 = torch.aten.view %55034, %55156 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55157, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52571 = torch.constant.int 4
    %int16_52572 = torch.constant.int 16
    %int1_52573 = torch.constant.int 1
    %int128_52574 = torch.constant.int 128
    %55158 = torch.prim.ListConstruct %int4_52571, %3095, %int16_52572, %int1_52573, %int128_52574 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55159 = torch.aten.view %55049, %55158 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55159, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52575 = torch.constant.int 4
    %int16_52576 = torch.constant.int 16
    %int1_52577 = torch.constant.int 1
    %int128_52578 = torch.constant.int 128
    %55160 = torch.prim.ListConstruct %int4_52575, %3095, %int16_52576, %int1_52577, %int128_52578 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55161 = torch.aten.view %55064, %55160 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55161, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52579 = torch.constant.int 4
    %int16_52580 = torch.constant.int 16
    %int1_52581 = torch.constant.int 1
    %int128_52582 = torch.constant.int 128
    %55162 = torch.prim.ListConstruct %int4_52579, %3095, %int16_52580, %int1_52581, %int128_52582 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55163 = torch.aten.view %55079, %55162 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55163, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52583 = torch.constant.int 4
    %int16_52584 = torch.constant.int 16
    %int1_52585 = torch.constant.int 1
    %int128_52586 = torch.constant.int 128
    %55164 = torch.prim.ListConstruct %int4_52583, %3095, %int16_52584, %int1_52585, %int128_52586 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55165 = torch.aten.view %55094, %55164 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55165, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52587 = torch.constant.int 4
    %int16_52588 = torch.constant.int 16
    %int1_52589 = torch.constant.int 1
    %int128_52590 = torch.constant.int 128
    %55166 = torch.prim.ListConstruct %int4_52587, %3095, %int16_52588, %int1_52589, %int128_52590 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55167 = torch.aten.view %55109, %55166 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55167, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52591 = torch.constant.int 4
    %int16_52592 = torch.constant.int 16
    %int1_52593 = torch.constant.int 1
    %int128_52594 = torch.constant.int 128
    %55168 = torch.prim.ListConstruct %int4_52591, %3095, %int16_52592, %int1_52593, %int128_52594 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55169 = torch.aten.view %55124, %55168 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55169, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52595 = torch.constant.int 4
    %int16_52596 = torch.constant.int 16
    %int1_52597 = torch.constant.int 1
    %int128_52598 = torch.constant.int 128
    %55170 = torch.prim.ListConstruct %int4_52595, %3095, %int16_52596, %int1_52597, %int128_52598 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55171 = torch.aten.view %55139, %55170 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55171, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52599 = torch.constant.int 4
    %55172 = torch.aten.mul.int %int4_52599, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52600 = torch.constant.int 16
    %int1_52601 = torch.constant.int 1
    %int128_52602 = torch.constant.int 128
    %55173 = torch.prim.ListConstruct %55172, %int16_52600, %int1_52601, %int128_52602 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55174 = torch.aten.view %55157, %55173 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55174, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52603 = torch.constant.int 4
    %55175 = torch.aten.mul.int %int4_52603, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52604 = torch.constant.int 16
    %int1_52605 = torch.constant.int 1
    %int128_52606 = torch.constant.int 128
    %55176 = torch.prim.ListConstruct %55175, %int16_52604, %int1_52605, %int128_52606 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55177 = torch.aten.view %55159, %55176 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55177, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52607 = torch.constant.int 4
    %55178 = torch.aten.mul.int %int4_52607, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52608 = torch.constant.int 16
    %int1_52609 = torch.constant.int 1
    %int128_52610 = torch.constant.int 128
    %55179 = torch.prim.ListConstruct %55178, %int16_52608, %int1_52609, %int128_52610 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55180 = torch.aten.view %55161, %55179 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55180, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52611 = torch.constant.int 4
    %55181 = torch.aten.mul.int %int4_52611, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52612 = torch.constant.int 16
    %int1_52613 = torch.constant.int 1
    %int128_52614 = torch.constant.int 128
    %55182 = torch.prim.ListConstruct %55181, %int16_52612, %int1_52613, %int128_52614 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55183 = torch.aten.view %55163, %55182 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55183, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52615 = torch.constant.int 4
    %55184 = torch.aten.mul.int %int4_52615, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52616 = torch.constant.int 16
    %int1_52617 = torch.constant.int 1
    %int128_52618 = torch.constant.int 128
    %55185 = torch.prim.ListConstruct %55184, %int16_52616, %int1_52617, %int128_52618 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55186 = torch.aten.view %55165, %55185 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55186, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52619 = torch.constant.int 4
    %55187 = torch.aten.mul.int %int4_52619, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52620 = torch.constant.int 16
    %int1_52621 = torch.constant.int 1
    %int128_52622 = torch.constant.int 128
    %55188 = torch.prim.ListConstruct %55187, %int16_52620, %int1_52621, %int128_52622 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55189 = torch.aten.view %55167, %55188 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55189, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52623 = torch.constant.int 4
    %55190 = torch.aten.mul.int %int4_52623, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52624 = torch.constant.int 16
    %int1_52625 = torch.constant.int 1
    %int128_52626 = torch.constant.int 128
    %55191 = torch.prim.ListConstruct %55190, %int16_52624, %int1_52625, %int128_52626 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55192 = torch.aten.view %55169, %55191 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55192, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52627 = torch.constant.int 4
    %55193 = torch.aten.mul.int %int4_52627, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52628 = torch.constant.int 16
    %int1_52629 = torch.constant.int 1
    %int128_52630 = torch.constant.int 128
    %55194 = torch.prim.ListConstruct %55193, %int16_52628, %int1_52629, %int128_52630 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55195 = torch.aten.view %55171, %55194 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55195, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52631 = torch.constant.int 4
    %55196 = torch.aten.mul.int %int4_52631, %3095 : !torch.int, !torch.int -> !torch.int
    %55197 = torch.prim.ListConstruct %55196 : (!torch.int) -> !torch.list<int>
    %55198 = torch.aten.view %55148, %55197 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55198, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52632 = torch.constant.int 4
    %55199 = torch.aten.mul.int %int4_52632, %3095 : !torch.int, !torch.int -> !torch.int
    %55200 = torch.prim.ListConstruct %55199 : (!torch.int) -> !torch.list<int>
    %55201 = torch.aten.view %55149, %55200 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55201, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52633 = torch.constant.int 4
    %55202 = torch.aten.mul.int %int4_52633, %3095 : !torch.int, !torch.int -> !torch.int
    %55203 = torch.prim.ListConstruct %55202 : (!torch.int) -> !torch.list<int>
    %55204 = torch.aten.view %55150, %55203 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55204, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52634 = torch.constant.int 4
    %55205 = torch.aten.mul.int %int4_52634, %3095 : !torch.int, !torch.int -> !torch.int
    %55206 = torch.prim.ListConstruct %55205 : (!torch.int) -> !torch.list<int>
    %55207 = torch.aten.view %55151, %55206 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55207, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52635 = torch.constant.int 4
    %55208 = torch.aten.mul.int %int4_52635, %3095 : !torch.int, !torch.int -> !torch.int
    %55209 = torch.prim.ListConstruct %55208 : (!torch.int) -> !torch.list<int>
    %55210 = torch.aten.view %55152, %55209 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55210, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52636 = torch.constant.int 4
    %55211 = torch.aten.mul.int %int4_52636, %3095 : !torch.int, !torch.int -> !torch.int
    %55212 = torch.prim.ListConstruct %55211 : (!torch.int) -> !torch.list<int>
    %55213 = torch.aten.view %55153, %55212 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55213, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52637 = torch.constant.int 4
    %55214 = torch.aten.mul.int %int4_52637, %3095 : !torch.int, !torch.int -> !torch.int
    %55215 = torch.prim.ListConstruct %55214 : (!torch.int) -> !torch.list<int>
    %55216 = torch.aten.view %55154, %55215 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55216, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52638 = torch.constant.int 4
    %55217 = torch.aten.mul.int %int4_52638, %3095 : !torch.int, !torch.int -> !torch.int
    %55218 = torch.prim.ListConstruct %55217 : (!torch.int) -> !torch.list<int>
    %55219 = torch.aten.view %55155, %55218 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55219, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52639 = torch.constant.int 4
    %int16_52640 = torch.constant.int 16
    %int1_52641 = torch.constant.int 1
    %int128_52642 = torch.constant.int 128
    %55220 = torch.prim.ListConstruct %int4_52639, %3095, %int16_52640, %int1_52641, %int128_52642 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55221 = torch.aten.view %54809, %55220 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55221, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52643 = torch.constant.int 4
    %int16_52644 = torch.constant.int 16
    %int1_52645 = torch.constant.int 1
    %int128_52646 = torch.constant.int 128
    %55222 = torch.prim.ListConstruct %int4_52643, %3095, %int16_52644, %int1_52645, %int128_52646 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55223 = torch.aten.view %54811, %55222 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55223, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52647 = torch.constant.int 4
    %int16_52648 = torch.constant.int 16
    %int1_52649 = torch.constant.int 1
    %int128_52650 = torch.constant.int 128
    %55224 = torch.prim.ListConstruct %int4_52647, %3095, %int16_52648, %int1_52649, %int128_52650 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55225 = torch.aten.view %54813, %55224 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55225, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52651 = torch.constant.int 4
    %int16_52652 = torch.constant.int 16
    %int1_52653 = torch.constant.int 1
    %int128_52654 = torch.constant.int 128
    %55226 = torch.prim.ListConstruct %int4_52651, %3095, %int16_52652, %int1_52653, %int128_52654 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55227 = torch.aten.view %54815, %55226 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55227, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52655 = torch.constant.int 4
    %int16_52656 = torch.constant.int 16
    %int1_52657 = torch.constant.int 1
    %int128_52658 = torch.constant.int 128
    %55228 = torch.prim.ListConstruct %int4_52655, %3095, %int16_52656, %int1_52657, %int128_52658 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55229 = torch.aten.view %54817, %55228 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55229, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52659 = torch.constant.int 4
    %int16_52660 = torch.constant.int 16
    %int1_52661 = torch.constant.int 1
    %int128_52662 = torch.constant.int 128
    %55230 = torch.prim.ListConstruct %int4_52659, %3095, %int16_52660, %int1_52661, %int128_52662 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55231 = torch.aten.view %54819, %55230 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55231, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52663 = torch.constant.int 4
    %int16_52664 = torch.constant.int 16
    %int1_52665 = torch.constant.int 1
    %int128_52666 = torch.constant.int 128
    %55232 = torch.prim.ListConstruct %int4_52663, %3095, %int16_52664, %int1_52665, %int128_52666 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55233 = torch.aten.view %54821, %55232 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55233, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52667 = torch.constant.int 4
    %int16_52668 = torch.constant.int 16
    %int1_52669 = torch.constant.int 1
    %int128_52670 = torch.constant.int 128
    %55234 = torch.prim.ListConstruct %int4_52667, %3095, %int16_52668, %int1_52669, %int128_52670 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55235 = torch.aten.view %54823, %55234 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %55235, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_52671 = torch.constant.int 4
    %55236 = torch.aten.mul.int %int4_52671, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52672 = torch.constant.int 16
    %int1_52673 = torch.constant.int 1
    %int128_52674 = torch.constant.int 128
    %55237 = torch.prim.ListConstruct %55236, %int16_52672, %int1_52673, %int128_52674 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55238 = torch.aten.view %55221, %55237 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55238, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52675 = torch.constant.int 4
    %55239 = torch.aten.mul.int %int4_52675, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52676 = torch.constant.int 16
    %int1_52677 = torch.constant.int 1
    %int128_52678 = torch.constant.int 128
    %55240 = torch.prim.ListConstruct %55239, %int16_52676, %int1_52677, %int128_52678 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55241 = torch.aten.view %55223, %55240 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55241, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52679 = torch.constant.int 4
    %55242 = torch.aten.mul.int %int4_52679, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52680 = torch.constant.int 16
    %int1_52681 = torch.constant.int 1
    %int128_52682 = torch.constant.int 128
    %55243 = torch.prim.ListConstruct %55242, %int16_52680, %int1_52681, %int128_52682 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55244 = torch.aten.view %55225, %55243 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55244, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52683 = torch.constant.int 4
    %55245 = torch.aten.mul.int %int4_52683, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52684 = torch.constant.int 16
    %int1_52685 = torch.constant.int 1
    %int128_52686 = torch.constant.int 128
    %55246 = torch.prim.ListConstruct %55245, %int16_52684, %int1_52685, %int128_52686 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55247 = torch.aten.view %55227, %55246 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55247, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52687 = torch.constant.int 4
    %55248 = torch.aten.mul.int %int4_52687, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52688 = torch.constant.int 16
    %int1_52689 = torch.constant.int 1
    %int128_52690 = torch.constant.int 128
    %55249 = torch.prim.ListConstruct %55248, %int16_52688, %int1_52689, %int128_52690 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55250 = torch.aten.view %55229, %55249 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55250, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52691 = torch.constant.int 4
    %55251 = torch.aten.mul.int %int4_52691, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52692 = torch.constant.int 16
    %int1_52693 = torch.constant.int 1
    %int128_52694 = torch.constant.int 128
    %55252 = torch.prim.ListConstruct %55251, %int16_52692, %int1_52693, %int128_52694 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55253 = torch.aten.view %55231, %55252 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55253, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52695 = torch.constant.int 4
    %55254 = torch.aten.mul.int %int4_52695, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52696 = torch.constant.int 16
    %int1_52697 = torch.constant.int 1
    %int128_52698 = torch.constant.int 128
    %55255 = torch.prim.ListConstruct %55254, %int16_52696, %int1_52697, %int128_52698 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55256 = torch.aten.view %55233, %55255 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55256, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_52699 = torch.constant.int 4
    %55257 = torch.aten.mul.int %int4_52699, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_52700 = torch.constant.int 16
    %int1_52701 = torch.constant.int 1
    %int128_52702 = torch.constant.int 128
    %55258 = torch.prim.ListConstruct %55257, %int16_52700, %int1_52701, %int128_52702 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55259 = torch.aten.view %55235, %55258 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55259, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_52703 = torch.constant.int 1
    %int1_52704 = torch.constant.int 1
    %55260 = torch.aten.add.Scalar %55148, %int1_52703, %int1_52704 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55260, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52705 = torch.constant.int 1
    %int1_52706 = torch.constant.int 1
    %55261 = torch.aten.add.Scalar %55149, %int1_52705, %int1_52706 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55261, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52707 = torch.constant.int 1
    %int1_52708 = torch.constant.int 1
    %55262 = torch.aten.add.Scalar %55150, %int1_52707, %int1_52708 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55262, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52709 = torch.constant.int 1
    %int1_52710 = torch.constant.int 1
    %55263 = torch.aten.add.Scalar %55151, %int1_52709, %int1_52710 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55263, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52711 = torch.constant.int 1
    %int1_52712 = torch.constant.int 1
    %55264 = torch.aten.add.Scalar %55152, %int1_52711, %int1_52712 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55264, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52713 = torch.constant.int 1
    %int1_52714 = torch.constant.int 1
    %55265 = torch.aten.add.Scalar %55153, %int1_52713, %int1_52714 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55265, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52715 = torch.constant.int 1
    %int1_52716 = torch.constant.int 1
    %55266 = torch.aten.add.Scalar %55154, %int1_52715, %int1_52716 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55266, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_52717 = torch.constant.int 1
    %int1_52718 = torch.constant.int 1
    %55267 = torch.aten.add.Scalar %55155, %int1_52717, %int1_52718 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %55267, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_52719 = torch.constant.int 4
    %55268 = torch.aten.mul.int %int4_52719, %3095 : !torch.int, !torch.int -> !torch.int
    %55269 = torch.prim.ListConstruct %55268 : (!torch.int) -> !torch.list<int>
    %55270 = torch.aten.view %55260, %55269 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55270, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52720 = torch.constant.int 4
    %55271 = torch.aten.mul.int %int4_52720, %3095 : !torch.int, !torch.int -> !torch.int
    %55272 = torch.prim.ListConstruct %55271 : (!torch.int) -> !torch.list<int>
    %55273 = torch.aten.view %55261, %55272 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55273, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52721 = torch.constant.int 4
    %55274 = torch.aten.mul.int %int4_52721, %3095 : !torch.int, !torch.int -> !torch.int
    %55275 = torch.prim.ListConstruct %55274 : (!torch.int) -> !torch.list<int>
    %55276 = torch.aten.view %55262, %55275 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55276, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52722 = torch.constant.int 4
    %55277 = torch.aten.mul.int %int4_52722, %3095 : !torch.int, !torch.int -> !torch.int
    %55278 = torch.prim.ListConstruct %55277 : (!torch.int) -> !torch.list<int>
    %55279 = torch.aten.view %55263, %55278 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55279, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52723 = torch.constant.int 4
    %55280 = torch.aten.mul.int %int4_52723, %3095 : !torch.int, !torch.int -> !torch.int
    %55281 = torch.prim.ListConstruct %55280 : (!torch.int) -> !torch.list<int>
    %55282 = torch.aten.view %55264, %55281 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55282, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52724 = torch.constant.int 4
    %55283 = torch.aten.mul.int %int4_52724, %3095 : !torch.int, !torch.int -> !torch.int
    %55284 = torch.prim.ListConstruct %55283 : (!torch.int) -> !torch.list<int>
    %55285 = torch.aten.view %55265, %55284 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55285, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52725 = torch.constant.int 4
    %55286 = torch.aten.mul.int %int4_52725, %3095 : !torch.int, !torch.int -> !torch.int
    %55287 = torch.prim.ListConstruct %55286 : (!torch.int) -> !torch.list<int>
    %55288 = torch.aten.view %55266, %55287 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55288, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_52726 = torch.constant.int 4
    %55289 = torch.aten.mul.int %int4_52726, %3095 : !torch.int, !torch.int -> !torch.int
    %55290 = torch.prim.ListConstruct %55289 : (!torch.int) -> !torch.list<int>
    %55291 = torch.aten.view %55267, %55290 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55291, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %55292 = torch.prim.ListConstruct %55198, %55270 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52727 = torch.constant.int 0
    %55293 = torch.aten.cat %55292, %int0_52727 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55293, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55294 = torch.prim.ListConstruct %55201, %55273 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52728 = torch.constant.int 0
    %55295 = torch.aten.cat %55294, %int0_52728 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55295, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55296 = torch.prim.ListConstruct %55204, %55276 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52729 = torch.constant.int 0
    %55297 = torch.aten.cat %55296, %int0_52729 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55297, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55298 = torch.prim.ListConstruct %55207, %55279 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52730 = torch.constant.int 0
    %55299 = torch.aten.cat %55298, %int0_52730 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55299, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55300 = torch.prim.ListConstruct %55210, %55282 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52731 = torch.constant.int 0
    %55301 = torch.aten.cat %55300, %int0_52731 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55301, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55302 = torch.prim.ListConstruct %55213, %55285 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52732 = torch.constant.int 0
    %55303 = torch.aten.cat %55302, %int0_52732 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55303, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55304 = torch.prim.ListConstruct %55216, %55288 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52733 = torch.constant.int 0
    %55305 = torch.aten.cat %55304, %int0_52733 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55305, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55306 = torch.prim.ListConstruct %55219, %55291 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_52734 = torch.constant.int 0
    %55307 = torch.aten.cat %55306, %int0_52734 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %55307, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %55308 = torch.prim.ListConstruct %55174, %55238 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52735 = torch.constant.int 0
    %55309 = torch.aten.cat %55308, %int0_52735 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55309, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55310 = torch.prim.ListConstruct %55177, %55241 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52736 = torch.constant.int 0
    %55311 = torch.aten.cat %55310, %int0_52736 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55311, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55312 = torch.prim.ListConstruct %55180, %55244 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52737 = torch.constant.int 0
    %55313 = torch.aten.cat %55312, %int0_52737 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55313, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55314 = torch.prim.ListConstruct %55183, %55247 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52738 = torch.constant.int 0
    %55315 = torch.aten.cat %55314, %int0_52738 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55315, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55316 = torch.prim.ListConstruct %55186, %55250 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52739 = torch.constant.int 0
    %55317 = torch.aten.cat %55316, %int0_52739 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55317, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55318 = torch.prim.ListConstruct %55189, %55253 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52740 = torch.constant.int 0
    %55319 = torch.aten.cat %55318, %int0_52740 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55319, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55320 = torch.prim.ListConstruct %55192, %55256 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52741 = torch.constant.int 0
    %55321 = torch.aten.cat %55320, %int0_52741 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55321, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55322 = torch.prim.ListConstruct %55195, %55259 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_52742 = torch.constant.int 0
    %55323 = torch.aten.cat %55322, %int0_52742 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55323, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52743 = torch.constant.int 32
    %int2_52744 = torch.constant.int 2
    %int16_52745 = torch.constant.int 16
    %int1_52746 = torch.constant.int 1
    %int128_52747 = torch.constant.int 128
    %55324 = torch.prim.ListConstruct %3023, %int32_52743, %int2_52744, %int16_52745, %int1_52746, %int128_52747 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55325 = torch.aten.view %53474, %55324 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55325, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52748 = torch.constant.int 32
    %55326 = torch.aten.mul.int %3023, %int32_52748 : !torch.int, !torch.int -> !torch.int
    %int2_52749 = torch.constant.int 2
    %55327 = torch.aten.mul.int %55326, %int2_52749 : !torch.int, !torch.int -> !torch.int
    %int16_52750 = torch.constant.int 16
    %int1_52751 = torch.constant.int 1
    %int128_52752 = torch.constant.int 128
    %55328 = torch.prim.ListConstruct %55327, %int16_52750, %int1_52751, %int128_52752 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55329 = torch.aten.view %55325, %55328 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55329, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55330 = torch.prim.ListConstruct %55293 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52753 = torch.constant.bool false
    %55331 = torch.aten.index_put %55329, %55330, %55309, %false_52753 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55331, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52754 = torch.constant.int 32
    %int2_52755 = torch.constant.int 2
    %int16_52756 = torch.constant.int 16
    %int1_52757 = torch.constant.int 1
    %int128_52758 = torch.constant.int 128
    %55332 = torch.prim.ListConstruct %3023, %int32_52754, %int2_52755, %int16_52756, %int1_52757, %int128_52758 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55333 = torch.aten.view %55331, %55332 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55333, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52759 = torch.constant.int 131072
    %55334 = torch.prim.ListConstruct %3023, %int131072_52759 : (!torch.int, !torch.int) -> !torch.list<int>
    %55335 = torch.aten.view %55333, %55334 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55335, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52760 = torch.constant.int 32
    %int2_52761 = torch.constant.int 2
    %int16_52762 = torch.constant.int 16
    %int1_52763 = torch.constant.int 1
    %int128_52764 = torch.constant.int 128
    %55336 = torch.prim.ListConstruct %3026, %int32_52760, %int2_52761, %int16_52762, %int1_52763, %int128_52764 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55337 = torch.aten.view %53486, %55336 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55337, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52765 = torch.constant.int 32
    %55338 = torch.aten.mul.int %3026, %int32_52765 : !torch.int, !torch.int -> !torch.int
    %int2_52766 = torch.constant.int 2
    %55339 = torch.aten.mul.int %55338, %int2_52766 : !torch.int, !torch.int -> !torch.int
    %int16_52767 = torch.constant.int 16
    %int1_52768 = torch.constant.int 1
    %int128_52769 = torch.constant.int 128
    %55340 = torch.prim.ListConstruct %55339, %int16_52767, %int1_52768, %int128_52769 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55341 = torch.aten.view %55337, %55340 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55341, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55342 = torch.prim.ListConstruct %55295 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52770 = torch.constant.bool false
    %55343 = torch.aten.index_put %55341, %55342, %55311, %false_52770 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55343, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52771 = torch.constant.int 32
    %int2_52772 = torch.constant.int 2
    %int16_52773 = torch.constant.int 16
    %int1_52774 = torch.constant.int 1
    %int128_52775 = torch.constant.int 128
    %55344 = torch.prim.ListConstruct %3026, %int32_52771, %int2_52772, %int16_52773, %int1_52774, %int128_52775 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55345 = torch.aten.view %55343, %55344 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55345, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52776 = torch.constant.int 131072
    %55346 = torch.prim.ListConstruct %3026, %int131072_52776 : (!torch.int, !torch.int) -> !torch.list<int>
    %55347 = torch.aten.view %55345, %55346 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55347, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52777 = torch.constant.int 32
    %int2_52778 = torch.constant.int 2
    %int16_52779 = torch.constant.int 16
    %int1_52780 = torch.constant.int 1
    %int128_52781 = torch.constant.int 128
    %55348 = torch.prim.ListConstruct %3029, %int32_52777, %int2_52778, %int16_52779, %int1_52780, %int128_52781 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55349 = torch.aten.view %53498, %55348 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55349, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52782 = torch.constant.int 32
    %55350 = torch.aten.mul.int %3029, %int32_52782 : !torch.int, !torch.int -> !torch.int
    %int2_52783 = torch.constant.int 2
    %55351 = torch.aten.mul.int %55350, %int2_52783 : !torch.int, !torch.int -> !torch.int
    %int16_52784 = torch.constant.int 16
    %int1_52785 = torch.constant.int 1
    %int128_52786 = torch.constant.int 128
    %55352 = torch.prim.ListConstruct %55351, %int16_52784, %int1_52785, %int128_52786 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55353 = torch.aten.view %55349, %55352 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55353, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55354 = torch.prim.ListConstruct %55297 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52787 = torch.constant.bool false
    %55355 = torch.aten.index_put %55353, %55354, %55313, %false_52787 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55355, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52788 = torch.constant.int 32
    %int2_52789 = torch.constant.int 2
    %int16_52790 = torch.constant.int 16
    %int1_52791 = torch.constant.int 1
    %int128_52792 = torch.constant.int 128
    %55356 = torch.prim.ListConstruct %3029, %int32_52788, %int2_52789, %int16_52790, %int1_52791, %int128_52792 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55357 = torch.aten.view %55355, %55356 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55357, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52793 = torch.constant.int 131072
    %55358 = torch.prim.ListConstruct %3029, %int131072_52793 : (!torch.int, !torch.int) -> !torch.list<int>
    %55359 = torch.aten.view %55357, %55358 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55359, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52794 = torch.constant.int 32
    %int2_52795 = torch.constant.int 2
    %int16_52796 = torch.constant.int 16
    %int1_52797 = torch.constant.int 1
    %int128_52798 = torch.constant.int 128
    %55360 = torch.prim.ListConstruct %3032, %int32_52794, %int2_52795, %int16_52796, %int1_52797, %int128_52798 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55361 = torch.aten.view %53510, %55360 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55361, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52799 = torch.constant.int 32
    %55362 = torch.aten.mul.int %3032, %int32_52799 : !torch.int, !torch.int -> !torch.int
    %int2_52800 = torch.constant.int 2
    %55363 = torch.aten.mul.int %55362, %int2_52800 : !torch.int, !torch.int -> !torch.int
    %int16_52801 = torch.constant.int 16
    %int1_52802 = torch.constant.int 1
    %int128_52803 = torch.constant.int 128
    %55364 = torch.prim.ListConstruct %55363, %int16_52801, %int1_52802, %int128_52803 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55365 = torch.aten.view %55361, %55364 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55365, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55366 = torch.prim.ListConstruct %55299 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52804 = torch.constant.bool false
    %55367 = torch.aten.index_put %55365, %55366, %55315, %false_52804 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55367, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52805 = torch.constant.int 32
    %int2_52806 = torch.constant.int 2
    %int16_52807 = torch.constant.int 16
    %int1_52808 = torch.constant.int 1
    %int128_52809 = torch.constant.int 128
    %55368 = torch.prim.ListConstruct %3032, %int32_52805, %int2_52806, %int16_52807, %int1_52808, %int128_52809 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55369 = torch.aten.view %55367, %55368 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55369, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52810 = torch.constant.int 131072
    %55370 = torch.prim.ListConstruct %3032, %int131072_52810 : (!torch.int, !torch.int) -> !torch.list<int>
    %55371 = torch.aten.view %55369, %55370 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55371, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52811 = torch.constant.int 32
    %int2_52812 = torch.constant.int 2
    %int16_52813 = torch.constant.int 16
    %int1_52814 = torch.constant.int 1
    %int128_52815 = torch.constant.int 128
    %55372 = torch.prim.ListConstruct %3035, %int32_52811, %int2_52812, %int16_52813, %int1_52814, %int128_52815 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55373 = torch.aten.view %53522, %55372 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55373, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52816 = torch.constant.int 32
    %55374 = torch.aten.mul.int %3035, %int32_52816 : !torch.int, !torch.int -> !torch.int
    %int2_52817 = torch.constant.int 2
    %55375 = torch.aten.mul.int %55374, %int2_52817 : !torch.int, !torch.int -> !torch.int
    %int16_52818 = torch.constant.int 16
    %int1_52819 = torch.constant.int 1
    %int128_52820 = torch.constant.int 128
    %55376 = torch.prim.ListConstruct %55375, %int16_52818, %int1_52819, %int128_52820 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55377 = torch.aten.view %55373, %55376 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55377, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55378 = torch.prim.ListConstruct %55301 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52821 = torch.constant.bool false
    %55379 = torch.aten.index_put %55377, %55378, %55317, %false_52821 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55379, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52822 = torch.constant.int 32
    %int2_52823 = torch.constant.int 2
    %int16_52824 = torch.constant.int 16
    %int1_52825 = torch.constant.int 1
    %int128_52826 = torch.constant.int 128
    %55380 = torch.prim.ListConstruct %3035, %int32_52822, %int2_52823, %int16_52824, %int1_52825, %int128_52826 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55381 = torch.aten.view %55379, %55380 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55381, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52827 = torch.constant.int 131072
    %55382 = torch.prim.ListConstruct %3035, %int131072_52827 : (!torch.int, !torch.int) -> !torch.list<int>
    %55383 = torch.aten.view %55381, %55382 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55383, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52828 = torch.constant.int 32
    %int2_52829 = torch.constant.int 2
    %int16_52830 = torch.constant.int 16
    %int1_52831 = torch.constant.int 1
    %int128_52832 = torch.constant.int 128
    %55384 = torch.prim.ListConstruct %3038, %int32_52828, %int2_52829, %int16_52830, %int1_52831, %int128_52832 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55385 = torch.aten.view %53534, %55384 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55385, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52833 = torch.constant.int 32
    %55386 = torch.aten.mul.int %3038, %int32_52833 : !torch.int, !torch.int -> !torch.int
    %int2_52834 = torch.constant.int 2
    %55387 = torch.aten.mul.int %55386, %int2_52834 : !torch.int, !torch.int -> !torch.int
    %int16_52835 = torch.constant.int 16
    %int1_52836 = torch.constant.int 1
    %int128_52837 = torch.constant.int 128
    %55388 = torch.prim.ListConstruct %55387, %int16_52835, %int1_52836, %int128_52837 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55389 = torch.aten.view %55385, %55388 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55389, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55390 = torch.prim.ListConstruct %55303 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52838 = torch.constant.bool false
    %55391 = torch.aten.index_put %55389, %55390, %55319, %false_52838 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55391, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52839 = torch.constant.int 32
    %int2_52840 = torch.constant.int 2
    %int16_52841 = torch.constant.int 16
    %int1_52842 = torch.constant.int 1
    %int128_52843 = torch.constant.int 128
    %55392 = torch.prim.ListConstruct %3038, %int32_52839, %int2_52840, %int16_52841, %int1_52842, %int128_52843 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55393 = torch.aten.view %55391, %55392 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55393, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52844 = torch.constant.int 131072
    %55394 = torch.prim.ListConstruct %3038, %int131072_52844 : (!torch.int, !torch.int) -> !torch.list<int>
    %55395 = torch.aten.view %55393, %55394 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55395, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52845 = torch.constant.int 32
    %int2_52846 = torch.constant.int 2
    %int16_52847 = torch.constant.int 16
    %int1_52848 = torch.constant.int 1
    %int128_52849 = torch.constant.int 128
    %55396 = torch.prim.ListConstruct %3041, %int32_52845, %int2_52846, %int16_52847, %int1_52848, %int128_52849 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55397 = torch.aten.view %53546, %55396 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55397, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52850 = torch.constant.int 32
    %55398 = torch.aten.mul.int %3041, %int32_52850 : !torch.int, !torch.int -> !torch.int
    %int2_52851 = torch.constant.int 2
    %55399 = torch.aten.mul.int %55398, %int2_52851 : !torch.int, !torch.int -> !torch.int
    %int16_52852 = torch.constant.int 16
    %int1_52853 = torch.constant.int 1
    %int128_52854 = torch.constant.int 128
    %55400 = torch.prim.ListConstruct %55399, %int16_52852, %int1_52853, %int128_52854 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55401 = torch.aten.view %55397, %55400 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55401, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55402 = torch.prim.ListConstruct %55305 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52855 = torch.constant.bool false
    %55403 = torch.aten.index_put %55401, %55402, %55321, %false_52855 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55403, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52856 = torch.constant.int 32
    %int2_52857 = torch.constant.int 2
    %int16_52858 = torch.constant.int 16
    %int1_52859 = torch.constant.int 1
    %int128_52860 = torch.constant.int 128
    %55404 = torch.prim.ListConstruct %3041, %int32_52856, %int2_52857, %int16_52858, %int1_52859, %int128_52860 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55405 = torch.aten.view %55403, %55404 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55405, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52861 = torch.constant.int 131072
    %55406 = torch.prim.ListConstruct %3041, %int131072_52861 : (!torch.int, !torch.int) -> !torch.list<int>
    %55407 = torch.aten.view %55405, %55406 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55407, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_52862 = torch.constant.int 32
    %int2_52863 = torch.constant.int 2
    %int16_52864 = torch.constant.int 16
    %int1_52865 = torch.constant.int 1
    %int128_52866 = torch.constant.int 128
    %55408 = torch.prim.ListConstruct %3044, %int32_52862, %int2_52863, %int16_52864, %int1_52865, %int128_52866 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55409 = torch.aten.view %53558, %55408 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55409, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_52867 = torch.constant.int 32
    %55410 = torch.aten.mul.int %3044, %int32_52867 : !torch.int, !torch.int -> !torch.int
    %int2_52868 = torch.constant.int 2
    %55411 = torch.aten.mul.int %55410, %int2_52868 : !torch.int, !torch.int -> !torch.int
    %int16_52869 = torch.constant.int 16
    %int1_52870 = torch.constant.int 1
    %int128_52871 = torch.constant.int 128
    %55412 = torch.prim.ListConstruct %55411, %int16_52869, %int1_52870, %int128_52871 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55413 = torch.aten.view %55409, %55412 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55413, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %55414 = torch.prim.ListConstruct %55307 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_52872 = torch.constant.bool false
    %55415 = torch.aten.index_put %55413, %55414, %55323, %false_52872 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %55415, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_52873 = torch.constant.int 32
    %int2_52874 = torch.constant.int 2
    %int16_52875 = torch.constant.int 16
    %int1_52876 = torch.constant.int 1
    %int128_52877 = torch.constant.int 128
    %55416 = torch.prim.ListConstruct %3044, %int32_52873, %int2_52874, %int16_52875, %int1_52876, %int128_52877 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55417 = torch.aten.view %55415, %55416 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %55417, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_52878 = torch.constant.int 131072
    %55418 = torch.prim.ListConstruct %3044, %int131072_52878 : (!torch.int, !torch.int) -> !torch.list<int>
    %55419 = torch.aten.view %55417, %55418 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %55419, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_52879 = torch.constant.int -2
    %55420 = torch.aten.unsqueeze %55034, %int-2_52879 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52880 = torch.constant.int -2
    %55421 = torch.aten.unsqueeze %55049, %int-2_52880 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52881 = torch.constant.int -2
    %55422 = torch.aten.unsqueeze %55064, %int-2_52881 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52882 = torch.constant.int -2
    %55423 = torch.aten.unsqueeze %55079, %int-2_52882 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52883 = torch.constant.int -2
    %55424 = torch.aten.unsqueeze %55094, %int-2_52883 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52884 = torch.constant.int -2
    %55425 = torch.aten.unsqueeze %55109, %int-2_52884 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52885 = torch.constant.int -2
    %55426 = torch.aten.unsqueeze %55124, %int-2_52885 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52886 = torch.constant.int -2
    %55427 = torch.aten.unsqueeze %55139, %int-2_52886 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_52887 = torch.constant.int 4
    %int1_52888 = torch.constant.int 1
    %int4_52889 = torch.constant.int 4
    %int128_52890 = torch.constant.int 128
    %55428 = torch.prim.ListConstruct %int4_52887, %55020, %int1_52888, %int4_52889, %int128_52890 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52891 = torch.constant.bool false
    %55429 = torch.aten.expand %55420, %55428, %false_52891 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52892 = torch.constant.int 4
    %int1_52893 = torch.constant.int 1
    %int4_52894 = torch.constant.int 4
    %int128_52895 = torch.constant.int 128
    %55430 = torch.prim.ListConstruct %int4_52892, %55020, %int1_52893, %int4_52894, %int128_52895 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52896 = torch.constant.bool false
    %55431 = torch.aten.expand %55421, %55430, %false_52896 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52897 = torch.constant.int 4
    %int1_52898 = torch.constant.int 1
    %int4_52899 = torch.constant.int 4
    %int128_52900 = torch.constant.int 128
    %55432 = torch.prim.ListConstruct %int4_52897, %55020, %int1_52898, %int4_52899, %int128_52900 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52901 = torch.constant.bool false
    %55433 = torch.aten.expand %55422, %55432, %false_52901 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52902 = torch.constant.int 4
    %int1_52903 = torch.constant.int 1
    %int4_52904 = torch.constant.int 4
    %int128_52905 = torch.constant.int 128
    %55434 = torch.prim.ListConstruct %int4_52902, %55020, %int1_52903, %int4_52904, %int128_52905 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52906 = torch.constant.bool false
    %55435 = torch.aten.expand %55423, %55434, %false_52906 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52907 = torch.constant.int 4
    %int1_52908 = torch.constant.int 1
    %int4_52909 = torch.constant.int 4
    %int128_52910 = torch.constant.int 128
    %55436 = torch.prim.ListConstruct %int4_52907, %55020, %int1_52908, %int4_52909, %int128_52910 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52911 = torch.constant.bool false
    %55437 = torch.aten.expand %55424, %55436, %false_52911 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52912 = torch.constant.int 4
    %int1_52913 = torch.constant.int 1
    %int4_52914 = torch.constant.int 4
    %int128_52915 = torch.constant.int 128
    %55438 = torch.prim.ListConstruct %int4_52912, %55020, %int1_52913, %int4_52914, %int128_52915 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52916 = torch.constant.bool false
    %55439 = torch.aten.expand %55425, %55438, %false_52916 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52917 = torch.constant.int 4
    %int1_52918 = torch.constant.int 1
    %int4_52919 = torch.constant.int 4
    %int128_52920 = torch.constant.int 128
    %55440 = torch.prim.ListConstruct %int4_52917, %55020, %int1_52918, %int4_52919, %int128_52920 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52921 = torch.constant.bool false
    %55441 = torch.aten.expand %55426, %55440, %false_52921 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52922 = torch.constant.int 4
    %int1_52923 = torch.constant.int 1
    %int4_52924 = torch.constant.int 4
    %int128_52925 = torch.constant.int 128
    %55442 = torch.prim.ListConstruct %int4_52922, %55020, %int1_52923, %int4_52924, %int128_52925 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52926 = torch.constant.bool false
    %55443 = torch.aten.expand %55427, %55442, %false_52926 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52927 = torch.constant.int 4
    %int4_52928 = torch.constant.int 4
    %int128_52929 = torch.constant.int 128
    %55444 = torch.prim.ListConstruct %int4_52927, %55020, %int4_52928, %int128_52929 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55445 = torch.aten.view %55429, %55444 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52930 = torch.constant.int 4
    %int4_52931 = torch.constant.int 4
    %int128_52932 = torch.constant.int 128
    %55446 = torch.prim.ListConstruct %int4_52930, %55020, %int4_52931, %int128_52932 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55447 = torch.aten.view %55431, %55446 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52933 = torch.constant.int 4
    %int4_52934 = torch.constant.int 4
    %int128_52935 = torch.constant.int 128
    %55448 = torch.prim.ListConstruct %int4_52933, %55020, %int4_52934, %int128_52935 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55449 = torch.aten.view %55433, %55448 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52936 = torch.constant.int 4
    %int4_52937 = torch.constant.int 4
    %int128_52938 = torch.constant.int 128
    %55450 = torch.prim.ListConstruct %int4_52936, %55020, %int4_52937, %int128_52938 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55451 = torch.aten.view %55435, %55450 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52939 = torch.constant.int 4
    %int4_52940 = torch.constant.int 4
    %int128_52941 = torch.constant.int 128
    %55452 = torch.prim.ListConstruct %int4_52939, %55020, %int4_52940, %int128_52941 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55453 = torch.aten.view %55437, %55452 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52942 = torch.constant.int 4
    %int4_52943 = torch.constant.int 4
    %int128_52944 = torch.constant.int 128
    %55454 = torch.prim.ListConstruct %int4_52942, %55020, %int4_52943, %int128_52944 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55455 = torch.aten.view %55439, %55454 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52945 = torch.constant.int 4
    %int4_52946 = torch.constant.int 4
    %int128_52947 = torch.constant.int 128
    %55456 = torch.prim.ListConstruct %int4_52945, %55020, %int4_52946, %int128_52947 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55457 = torch.aten.view %55441, %55456 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_52948 = torch.constant.int 4
    %int4_52949 = torch.constant.int 4
    %int128_52950 = torch.constant.int 128
    %55458 = torch.prim.ListConstruct %int4_52948, %55020, %int4_52949, %int128_52950 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55459 = torch.aten.view %55443, %55458 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_52951 = torch.constant.int -2
    %55460 = torch.aten.unsqueeze %54809, %int-2_52951 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52952 = torch.constant.int -2
    %55461 = torch.aten.unsqueeze %54811, %int-2_52952 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52953 = torch.constant.int -2
    %55462 = torch.aten.unsqueeze %54813, %int-2_52953 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55462, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52954 = torch.constant.int -2
    %55463 = torch.aten.unsqueeze %54815, %int-2_52954 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52955 = torch.constant.int -2
    %55464 = torch.aten.unsqueeze %54817, %int-2_52955 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55464, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52956 = torch.constant.int -2
    %55465 = torch.aten.unsqueeze %54819, %int-2_52956 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52957 = torch.constant.int -2
    %55466 = torch.aten.unsqueeze %54821, %int-2_52957 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_52958 = torch.constant.int -2
    %55467 = torch.aten.unsqueeze %54823, %int-2_52958 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %55467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_52959 = torch.constant.int 1
    %55468 = torch.aten.size.int %54733, %int1_52959 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_52960 = torch.constant.int 4
    %int1_52961 = torch.constant.int 1
    %int4_52962 = torch.constant.int 4
    %int128_52963 = torch.constant.int 128
    %55469 = torch.prim.ListConstruct %int4_52960, %55468, %int1_52961, %int4_52962, %int128_52963 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52964 = torch.constant.bool false
    %55470 = torch.aten.expand %55460, %55469, %false_52964 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52965 = torch.constant.int 4
    %int1_52966 = torch.constant.int 1
    %int4_52967 = torch.constant.int 4
    %int128_52968 = torch.constant.int 128
    %55471 = torch.prim.ListConstruct %int4_52965, %55468, %int1_52966, %int4_52967, %int128_52968 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52969 = torch.constant.bool false
    %55472 = torch.aten.expand %55461, %55471, %false_52969 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52970 = torch.constant.int 4
    %int1_52971 = torch.constant.int 1
    %int4_52972 = torch.constant.int 4
    %int128_52973 = torch.constant.int 128
    %55473 = torch.prim.ListConstruct %int4_52970, %55468, %int1_52971, %int4_52972, %int128_52973 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52974 = torch.constant.bool false
    %55474 = torch.aten.expand %55462, %55473, %false_52974 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52975 = torch.constant.int 4
    %int1_52976 = torch.constant.int 1
    %int4_52977 = torch.constant.int 4
    %int128_52978 = torch.constant.int 128
    %55475 = torch.prim.ListConstruct %int4_52975, %55468, %int1_52976, %int4_52977, %int128_52978 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52979 = torch.constant.bool false
    %55476 = torch.aten.expand %55463, %55475, %false_52979 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52980 = torch.constant.int 4
    %int1_52981 = torch.constant.int 1
    %int4_52982 = torch.constant.int 4
    %int128_52983 = torch.constant.int 128
    %55477 = torch.prim.ListConstruct %int4_52980, %55468, %int1_52981, %int4_52982, %int128_52983 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52984 = torch.constant.bool false
    %55478 = torch.aten.expand %55464, %55477, %false_52984 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52985 = torch.constant.int 4
    %int1_52986 = torch.constant.int 1
    %int4_52987 = torch.constant.int 4
    %int128_52988 = torch.constant.int 128
    %55479 = torch.prim.ListConstruct %int4_52985, %55468, %int1_52986, %int4_52987, %int128_52988 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52989 = torch.constant.bool false
    %55480 = torch.aten.expand %55465, %55479, %false_52989 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52990 = torch.constant.int 4
    %int1_52991 = torch.constant.int 1
    %int4_52992 = torch.constant.int 4
    %int128_52993 = torch.constant.int 128
    %55481 = torch.prim.ListConstruct %int4_52990, %55468, %int1_52991, %int4_52992, %int128_52993 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52994 = torch.constant.bool false
    %55482 = torch.aten.expand %55466, %55481, %false_52994 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_52995 = torch.constant.int 4
    %int1_52996 = torch.constant.int 1
    %int4_52997 = torch.constant.int 4
    %int128_52998 = torch.constant.int 128
    %55483 = torch.prim.ListConstruct %int4_52995, %55468, %int1_52996, %int4_52997, %int128_52998 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_52999 = torch.constant.bool false
    %55484 = torch.aten.expand %55467, %55483, %false_52999 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %55484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_53000 = torch.constant.int 4
    %int4_53001 = torch.constant.int 4
    %int128_53002 = torch.constant.int 128
    %55485 = torch.prim.ListConstruct %int4_53000, %55468, %int4_53001, %int128_53002 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55486 = torch.aten.view %55470, %55485 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53003 = torch.constant.int 4
    %int4_53004 = torch.constant.int 4
    %int128_53005 = torch.constant.int 128
    %55487 = torch.prim.ListConstruct %int4_53003, %55468, %int4_53004, %int128_53005 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55488 = torch.aten.view %55472, %55487 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53006 = torch.constant.int 4
    %int4_53007 = torch.constant.int 4
    %int128_53008 = torch.constant.int 128
    %55489 = torch.prim.ListConstruct %int4_53006, %55468, %int4_53007, %int128_53008 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55490 = torch.aten.view %55474, %55489 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53009 = torch.constant.int 4
    %int4_53010 = torch.constant.int 4
    %int128_53011 = torch.constant.int 128
    %55491 = torch.prim.ListConstruct %int4_53009, %55468, %int4_53010, %int128_53011 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55492 = torch.aten.view %55476, %55491 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53012 = torch.constant.int 4
    %int4_53013 = torch.constant.int 4
    %int128_53014 = torch.constant.int 128
    %55493 = torch.prim.ListConstruct %int4_53012, %55468, %int4_53013, %int128_53014 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55494 = torch.aten.view %55478, %55493 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53015 = torch.constant.int 4
    %int4_53016 = torch.constant.int 4
    %int128_53017 = torch.constant.int 128
    %55495 = torch.prim.ListConstruct %int4_53015, %55468, %int4_53016, %int128_53017 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55496 = torch.aten.view %55480, %55495 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53018 = torch.constant.int 4
    %int4_53019 = torch.constant.int 4
    %int128_53020 = torch.constant.int 128
    %55497 = torch.prim.ListConstruct %int4_53018, %55468, %int4_53019, %int128_53020 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55498 = torch.aten.view %55482, %55497 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53021 = torch.constant.int 4
    %int4_53022 = torch.constant.int 4
    %int128_53023 = torch.constant.int 128
    %55499 = torch.prim.ListConstruct %int4_53021, %55468, %int4_53022, %int128_53023 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55500 = torch.aten.view %55484, %55499 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53024 = torch.constant.int 1
    %int2_53025 = torch.constant.int 2
    %55501 = torch.aten.transpose.int %54876, %int1_53024, %int2_53025 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55501, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53026 = torch.constant.int 1
    %int2_53027 = torch.constant.int 2
    %55502 = torch.aten.transpose.int %54891, %int1_53026, %int2_53027 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55502, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53028 = torch.constant.int 1
    %int2_53029 = torch.constant.int 2
    %55503 = torch.aten.transpose.int %54906, %int1_53028, %int2_53029 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55503, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53030 = torch.constant.int 1
    %int2_53031 = torch.constant.int 2
    %55504 = torch.aten.transpose.int %54921, %int1_53030, %int2_53031 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55504, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53032 = torch.constant.int 1
    %int2_53033 = torch.constant.int 2
    %55505 = torch.aten.transpose.int %54936, %int1_53032, %int2_53033 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55505, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53034 = torch.constant.int 1
    %int2_53035 = torch.constant.int 2
    %55506 = torch.aten.transpose.int %54951, %int1_53034, %int2_53035 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55506, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53036 = torch.constant.int 1
    %int2_53037 = torch.constant.int 2
    %55507 = torch.aten.transpose.int %54966, %int1_53036, %int2_53037 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55507, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53038 = torch.constant.int 1
    %int2_53039 = torch.constant.int 2
    %55508 = torch.aten.transpose.int %54981, %int1_53038, %int2_53039 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55508, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53040 = torch.constant.int 1
    %int2_53041 = torch.constant.int 2
    %55509 = torch.aten.transpose.int %55445, %int1_53040, %int2_53041 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55509, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53042 = torch.constant.int 1
    %int2_53043 = torch.constant.int 2
    %55510 = torch.aten.transpose.int %55447, %int1_53042, %int2_53043 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55510, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53044 = torch.constant.int 1
    %int2_53045 = torch.constant.int 2
    %55511 = torch.aten.transpose.int %55449, %int1_53044, %int2_53045 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55511, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53046 = torch.constant.int 1
    %int2_53047 = torch.constant.int 2
    %55512 = torch.aten.transpose.int %55451, %int1_53046, %int2_53047 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55512, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53048 = torch.constant.int 1
    %int2_53049 = torch.constant.int 2
    %55513 = torch.aten.transpose.int %55453, %int1_53048, %int2_53049 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55513, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53050 = torch.constant.int 1
    %int2_53051 = torch.constant.int 2
    %55514 = torch.aten.transpose.int %55455, %int1_53050, %int2_53051 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55514, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53052 = torch.constant.int 1
    %int2_53053 = torch.constant.int 2
    %55515 = torch.aten.transpose.int %55457, %int1_53052, %int2_53053 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55515, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53054 = torch.constant.int 1
    %int2_53055 = torch.constant.int 2
    %55516 = torch.aten.transpose.int %55459, %int1_53054, %int2_53055 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55516, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53056 = torch.constant.int 1
    %int2_53057 = torch.constant.int 2
    %55517 = torch.aten.transpose.int %55486, %int1_53056, %int2_53057 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55517, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53058 = torch.constant.int 1
    %int2_53059 = torch.constant.int 2
    %55518 = torch.aten.transpose.int %55488, %int1_53058, %int2_53059 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55518, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53060 = torch.constant.int 1
    %int2_53061 = torch.constant.int 2
    %55519 = torch.aten.transpose.int %55490, %int1_53060, %int2_53061 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55519, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53062 = torch.constant.int 1
    %int2_53063 = torch.constant.int 2
    %55520 = torch.aten.transpose.int %55492, %int1_53062, %int2_53063 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55520, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53064 = torch.constant.int 1
    %int2_53065 = torch.constant.int 2
    %55521 = torch.aten.transpose.int %55494, %int1_53064, %int2_53065 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55521, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53066 = torch.constant.int 1
    %int2_53067 = torch.constant.int 2
    %55522 = torch.aten.transpose.int %55496, %int1_53066, %int2_53067 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55522, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53068 = torch.constant.int 1
    %int2_53069 = torch.constant.int 2
    %55523 = torch.aten.transpose.int %55498, %int1_53068, %int2_53069 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55523, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53070 = torch.constant.int 1
    %int2_53071 = torch.constant.int 2
    %55524 = torch.aten.transpose.int %55500, %int1_53070, %int2_53071 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %55524, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53072 = torch.constant.float 0.000000e+00
    %true_53073 = torch.constant.bool true
    %none_53074 = torch.constant.none
    %none_53075 = torch.constant.none
    %55525:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55501, %55509, %55517, %float0.000000e00_53072, %true_53073, %none_53074, %none_53075) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55525#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53076 = torch.constant.float 0.000000e+00
    %true_53077 = torch.constant.bool true
    %none_53078 = torch.constant.none
    %none_53079 = torch.constant.none
    %55526:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55502, %55510, %55518, %float0.000000e00_53076, %true_53077, %none_53078, %none_53079) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55526#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53080 = torch.constant.float 0.000000e+00
    %true_53081 = torch.constant.bool true
    %none_53082 = torch.constant.none
    %none_53083 = torch.constant.none
    %55527:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55503, %55511, %55519, %float0.000000e00_53080, %true_53081, %none_53082, %none_53083) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55527#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53084 = torch.constant.float 0.000000e+00
    %true_53085 = torch.constant.bool true
    %none_53086 = torch.constant.none
    %none_53087 = torch.constant.none
    %55528:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55504, %55512, %55520, %float0.000000e00_53084, %true_53085, %none_53086, %none_53087) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55528#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53088 = torch.constant.float 0.000000e+00
    %true_53089 = torch.constant.bool true
    %none_53090 = torch.constant.none
    %none_53091 = torch.constant.none
    %55529:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55505, %55513, %55521, %float0.000000e00_53088, %true_53089, %none_53090, %none_53091) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55529#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53092 = torch.constant.float 0.000000e+00
    %true_53093 = torch.constant.bool true
    %none_53094 = torch.constant.none
    %none_53095 = torch.constant.none
    %55530:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55506, %55514, %55522, %float0.000000e00_53092, %true_53093, %none_53094, %none_53095) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55530#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53096 = torch.constant.float 0.000000e+00
    %true_53097 = torch.constant.bool true
    %none_53098 = torch.constant.none
    %none_53099 = torch.constant.none
    %55531:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55507, %55515, %55523, %float0.000000e00_53096, %true_53097, %none_53098, %none_53099) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55531#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_53100 = torch.constant.float 0.000000e+00
    %true_53101 = torch.constant.bool true
    %none_53102 = torch.constant.none
    %none_53103 = torch.constant.none
    %55532:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%55508, %55516, %55524, %float0.000000e00_53100, %true_53101, %none_53102, %none_53103) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %55532#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_53104 = torch.constant.int 1
    %int2_53105 = torch.constant.int 2
    %55533 = torch.aten.transpose.int %55525#0, %int1_53104, %int2_53105 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53106 = torch.constant.int 1
    %int2_53107 = torch.constant.int 2
    %55534 = torch.aten.transpose.int %55526#0, %int1_53106, %int2_53107 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53108 = torch.constant.int 1
    %int2_53109 = torch.constant.int 2
    %55535 = torch.aten.transpose.int %55527#0, %int1_53108, %int2_53109 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53110 = torch.constant.int 1
    %int2_53111 = torch.constant.int 2
    %55536 = torch.aten.transpose.int %55528#0, %int1_53110, %int2_53111 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53112 = torch.constant.int 1
    %int2_53113 = torch.constant.int 2
    %55537 = torch.aten.transpose.int %55529#0, %int1_53112, %int2_53113 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53114 = torch.constant.int 1
    %int2_53115 = torch.constant.int 2
    %55538 = torch.aten.transpose.int %55530#0, %int1_53114, %int2_53115 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53116 = torch.constant.int 1
    %int2_53117 = torch.constant.int 2
    %55539 = torch.aten.transpose.int %55531#0, %int1_53116, %int2_53117 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_53118 = torch.constant.int 1
    %int2_53119 = torch.constant.int 2
    %55540 = torch.aten.transpose.int %55532#0, %int1_53118, %int2_53119 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %55540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53120 = torch.constant.int 4
    %int512_53121 = torch.constant.int 512
    %55541 = torch.prim.ListConstruct %int4_53120, %54862, %int512_53121 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55542 = torch.aten.view %55533, %55541 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53122 = torch.constant.int 4
    %int512_53123 = torch.constant.int 512
    %55543 = torch.prim.ListConstruct %int4_53122, %54877, %int512_53123 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55544 = torch.aten.view %55534, %55543 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53124 = torch.constant.int 4
    %int512_53125 = torch.constant.int 512
    %55545 = torch.prim.ListConstruct %int4_53124, %54892, %int512_53125 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55546 = torch.aten.view %55535, %55545 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53126 = torch.constant.int 4
    %int512_53127 = torch.constant.int 512
    %55547 = torch.prim.ListConstruct %int4_53126, %54907, %int512_53127 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55548 = torch.aten.view %55536, %55547 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53128 = torch.constant.int 4
    %int512_53129 = torch.constant.int 512
    %55549 = torch.prim.ListConstruct %int4_53128, %54922, %int512_53129 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55550 = torch.aten.view %55537, %55549 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53130 = torch.constant.int 4
    %int512_53131 = torch.constant.int 512
    %55551 = torch.prim.ListConstruct %int4_53130, %54937, %int512_53131 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55552 = torch.aten.view %55538, %55551 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53132 = torch.constant.int 4
    %int512_53133 = torch.constant.int 512
    %55553 = torch.prim.ListConstruct %int4_53132, %54952, %int512_53133 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55554 = torch.aten.view %55539, %55553 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53134 = torch.constant.int 4
    %int512_53135 = torch.constant.int 512
    %55555 = torch.prim.ListConstruct %int4_53134, %54967, %int512_53135 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55556 = torch.aten.view %55540, %55555 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %55556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_53136 = torch.constant.int 1
    %int0_53137 = torch.constant.int 0
    %55557 = torch.prim.ListConstruct %int1_53136, %int0_53137 : (!torch.int, !torch.int) -> !torch.list<int>
    %55558 = torch.aten.permute %2056, %55557 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53138 = torch.constant.int 1
    %int0_53139 = torch.constant.int 0
    %55559 = torch.prim.ListConstruct %int1_53138, %int0_53139 : (!torch.int, !torch.int) -> !torch.list<int>
    %55560 = torch.aten.permute %2057, %55559 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53140 = torch.constant.int 1
    %int0_53141 = torch.constant.int 0
    %55561 = torch.prim.ListConstruct %int1_53140, %int0_53141 : (!torch.int, !torch.int) -> !torch.list<int>
    %55562 = torch.aten.permute %2058, %55561 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53142 = torch.constant.int 1
    %int0_53143 = torch.constant.int 0
    %55563 = torch.prim.ListConstruct %int1_53142, %int0_53143 : (!torch.int, !torch.int) -> !torch.list<int>
    %55564 = torch.aten.permute %2059, %55563 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53144 = torch.constant.int 1
    %int0_53145 = torch.constant.int 0
    %55565 = torch.prim.ListConstruct %int1_53144, %int0_53145 : (!torch.int, !torch.int) -> !torch.list<int>
    %55566 = torch.aten.permute %2060, %55565 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53146 = torch.constant.int 1
    %int0_53147 = torch.constant.int 0
    %55567 = torch.prim.ListConstruct %int1_53146, %int0_53147 : (!torch.int, !torch.int) -> !torch.list<int>
    %55568 = torch.aten.permute %2061, %55567 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53148 = torch.constant.int 1
    %int0_53149 = torch.constant.int 0
    %55569 = torch.prim.ListConstruct %int1_53148, %int0_53149 : (!torch.int, !torch.int) -> !torch.list<int>
    %55570 = torch.aten.permute %2062, %55569 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_53150 = torch.constant.int 1
    %int0_53151 = torch.constant.int 0
    %55571 = torch.prim.ListConstruct %int1_53150, %int0_53151 : (!torch.int, !torch.int) -> !torch.list<int>
    %55572 = torch.aten.permute %2063, %55571 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_53152 = torch.constant.int 4
    %55573 = torch.aten.mul.int %int4_53152, %54862 : !torch.int, !torch.int -> !torch.int
    %int512_53153 = torch.constant.int 512
    %55574 = torch.prim.ListConstruct %55573, %int512_53153 : (!torch.int, !torch.int) -> !torch.list<int>
    %55575 = torch.aten.view %55542, %55574 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55575, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55576 = torch.aten.mm %55575, %55558 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55576, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53154 = torch.constant.int 4
    %int4096_53155 = torch.constant.int 4096
    %55577 = torch.prim.ListConstruct %int4_53154, %54862, %int4096_53155 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55578 = torch.aten.view %55576, %55577 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53156 = torch.constant.int 4
    %55579 = torch.aten.mul.int %int4_53156, %54877 : !torch.int, !torch.int -> !torch.int
    %int512_53157 = torch.constant.int 512
    %55580 = torch.prim.ListConstruct %55579, %int512_53157 : (!torch.int, !torch.int) -> !torch.list<int>
    %55581 = torch.aten.view %55544, %55580 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55581, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55582 = torch.aten.mm %55581, %55560 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55582, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53158 = torch.constant.int 4
    %int4096_53159 = torch.constant.int 4096
    %55583 = torch.prim.ListConstruct %int4_53158, %54877, %int4096_53159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55584 = torch.aten.view %55582, %55583 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53160 = torch.constant.int 4
    %55585 = torch.aten.mul.int %int4_53160, %54892 : !torch.int, !torch.int -> !torch.int
    %int512_53161 = torch.constant.int 512
    %55586 = torch.prim.ListConstruct %55585, %int512_53161 : (!torch.int, !torch.int) -> !torch.list<int>
    %55587 = torch.aten.view %55546, %55586 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55587, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55588 = torch.aten.mm %55587, %55562 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55588, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53162 = torch.constant.int 4
    %int4096_53163 = torch.constant.int 4096
    %55589 = torch.prim.ListConstruct %int4_53162, %54892, %int4096_53163 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55590 = torch.aten.view %55588, %55589 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53164 = torch.constant.int 4
    %55591 = torch.aten.mul.int %int4_53164, %54907 : !torch.int, !torch.int -> !torch.int
    %int512_53165 = torch.constant.int 512
    %55592 = torch.prim.ListConstruct %55591, %int512_53165 : (!torch.int, !torch.int) -> !torch.list<int>
    %55593 = torch.aten.view %55548, %55592 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55593, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55594 = torch.aten.mm %55593, %55564 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55594, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53166 = torch.constant.int 4
    %int4096_53167 = torch.constant.int 4096
    %55595 = torch.prim.ListConstruct %int4_53166, %54907, %int4096_53167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55596 = torch.aten.view %55594, %55595 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53168 = torch.constant.int 4
    %55597 = torch.aten.mul.int %int4_53168, %54922 : !torch.int, !torch.int -> !torch.int
    %int512_53169 = torch.constant.int 512
    %55598 = torch.prim.ListConstruct %55597, %int512_53169 : (!torch.int, !torch.int) -> !torch.list<int>
    %55599 = torch.aten.view %55550, %55598 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55599, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55600 = torch.aten.mm %55599, %55566 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55600, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53170 = torch.constant.int 4
    %int4096_53171 = torch.constant.int 4096
    %55601 = torch.prim.ListConstruct %int4_53170, %54922, %int4096_53171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55602 = torch.aten.view %55600, %55601 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53172 = torch.constant.int 4
    %55603 = torch.aten.mul.int %int4_53172, %54937 : !torch.int, !torch.int -> !torch.int
    %int512_53173 = torch.constant.int 512
    %55604 = torch.prim.ListConstruct %55603, %int512_53173 : (!torch.int, !torch.int) -> !torch.list<int>
    %55605 = torch.aten.view %55552, %55604 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55605, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55606 = torch.aten.mm %55605, %55568 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55606, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53174 = torch.constant.int 4
    %int4096_53175 = torch.constant.int 4096
    %55607 = torch.prim.ListConstruct %int4_53174, %54937, %int4096_53175 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55608 = torch.aten.view %55606, %55607 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53176 = torch.constant.int 4
    %55609 = torch.aten.mul.int %int4_53176, %54952 : !torch.int, !torch.int -> !torch.int
    %int512_53177 = torch.constant.int 512
    %55610 = torch.prim.ListConstruct %55609, %int512_53177 : (!torch.int, !torch.int) -> !torch.list<int>
    %55611 = torch.aten.view %55554, %55610 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55611, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55612 = torch.aten.mm %55611, %55570 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55612, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53178 = torch.constant.int 4
    %int4096_53179 = torch.constant.int 4096
    %55613 = torch.prim.ListConstruct %int4_53178, %54952, %int4096_53179 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55614 = torch.aten.view %55612, %55613 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_53180 = torch.constant.int 4
    %55615 = torch.aten.mul.int %int4_53180, %54967 : !torch.int, !torch.int -> !torch.int
    %int512_53181 = torch.constant.int 512
    %55616 = torch.prim.ListConstruct %55615, %int512_53181 : (!torch.int, !torch.int) -> !torch.list<int>
    %55617 = torch.aten.view %55556, %55616 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %55617, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %55618 = torch.aten.mm %55617, %55572 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55618, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53182 = torch.constant.int 4
    %int4096_53183 = torch.constant.int 4096
    %55619 = torch.prim.ListConstruct %int4_53182, %54967, %int4096_53183 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55620 = torch.aten.view %55618, %55619 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55621 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53184 = arith.constant 1 : index
    %dim_53185 = tensor.dim %55621, %c1_53184 : tensor<4x?x4096xf16>
    %55622 = flow.tensor.transfer %55621 : tensor<4x?x4096xf16>{%dim_53185} to #hal.device.promise<@__device_0>
    %55623 = torch_c.from_builtin_tensor %55622 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55624 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53186 = arith.constant 1 : index
    %dim_53187 = tensor.dim %55624, %c1_53186 : tensor<4x?x4096xf16>
    %55625 = flow.tensor.transfer %55624 : tensor<4x?x4096xf16>{%dim_53187} to #hal.device.promise<@__device_0>
    %55626 = torch_c.from_builtin_tensor %55625 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55627 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53188 = arith.constant 1 : index
    %dim_53189 = tensor.dim %55627, %c1_53188 : tensor<4x?x4096xf16>
    %55628 = flow.tensor.transfer %55627 : tensor<4x?x4096xf16>{%dim_53189} to #hal.device.promise<@__device_0>
    %55629 = torch_c.from_builtin_tensor %55628 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55630 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53190 = arith.constant 1 : index
    %dim_53191 = tensor.dim %55630, %c1_53190 : tensor<4x?x4096xf16>
    %55631 = flow.tensor.transfer %55630 : tensor<4x?x4096xf16>{%dim_53191} to #hal.device.promise<@__device_0>
    %55632 = torch_c.from_builtin_tensor %55631 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55633 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53192 = arith.constant 1 : index
    %dim_53193 = tensor.dim %55633, %c1_53192 : tensor<4x?x4096xf16>
    %55634 = flow.tensor.transfer %55633 : tensor<4x?x4096xf16>{%dim_53193} to #hal.device.promise<@__device_0>
    %55635 = torch_c.from_builtin_tensor %55634 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55636 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53194 = arith.constant 1 : index
    %dim_53195 = tensor.dim %55636, %c1_53194 : tensor<4x?x4096xf16>
    %55637 = flow.tensor.transfer %55636 : tensor<4x?x4096xf16>{%dim_53195} to #hal.device.promise<@__device_0>
    %55638 = torch_c.from_builtin_tensor %55637 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55639 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53196 = arith.constant 1 : index
    %dim_53197 = tensor.dim %55639, %c1_53196 : tensor<4x?x4096xf16>
    %55640 = flow.tensor.transfer %55639 : tensor<4x?x4096xf16>{%dim_53197} to #hal.device.promise<@__device_0>
    %55641 = torch_c.from_builtin_tensor %55640 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53198 = torch.constant.int 1
    %55642 = torch.aten.add.Tensor %55578, %55623, %int1_53198 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53199 = torch.constant.int 1
    %55643 = torch.aten.add.Tensor %55642, %55626, %int1_53199 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53200 = torch.constant.int 1
    %55644 = torch.aten.add.Tensor %55643, %55629, %int1_53200 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53201 = torch.constant.int 1
    %55645 = torch.aten.add.Tensor %55644, %55632, %int1_53201 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53202 = torch.constant.int 1
    %55646 = torch.aten.add.Tensor %55645, %55635, %int1_53202 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53203 = torch.constant.int 1
    %55647 = torch.aten.add.Tensor %55646, %55638, %int1_53203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53204 = torch.constant.int 1
    %55648 = torch.aten.add.Tensor %55647, %55641, %int1_53204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55649 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53205 = arith.constant 1 : index
    %dim_53206 = tensor.dim %55649, %c1_53205 : tensor<4x?x4096xf16>
    %55650 = flow.tensor.transfer %55649 : tensor<4x?x4096xf16>{%dim_53206} to #hal.device.promise<@__device_1>
    %55651 = torch_c.from_builtin_tensor %55650 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55652 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53207 = arith.constant 1 : index
    %dim_53208 = tensor.dim %55652, %c1_53207 : tensor<4x?x4096xf16>
    %55653 = flow.tensor.transfer %55652 : tensor<4x?x4096xf16>{%dim_53208} to #hal.device.promise<@__device_1>
    %55654 = torch_c.from_builtin_tensor %55653 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55655 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53209 = arith.constant 1 : index
    %dim_53210 = tensor.dim %55655, %c1_53209 : tensor<4x?x4096xf16>
    %55656 = flow.tensor.transfer %55655 : tensor<4x?x4096xf16>{%dim_53210} to #hal.device.promise<@__device_1>
    %55657 = torch_c.from_builtin_tensor %55656 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55658 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53211 = arith.constant 1 : index
    %dim_53212 = tensor.dim %55658, %c1_53211 : tensor<4x?x4096xf16>
    %55659 = flow.tensor.transfer %55658 : tensor<4x?x4096xf16>{%dim_53212} to #hal.device.promise<@__device_1>
    %55660 = torch_c.from_builtin_tensor %55659 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55661 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53213 = arith.constant 1 : index
    %dim_53214 = tensor.dim %55661, %c1_53213 : tensor<4x?x4096xf16>
    %55662 = flow.tensor.transfer %55661 : tensor<4x?x4096xf16>{%dim_53214} to #hal.device.promise<@__device_1>
    %55663 = torch_c.from_builtin_tensor %55662 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55663, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55664 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53215 = arith.constant 1 : index
    %dim_53216 = tensor.dim %55664, %c1_53215 : tensor<4x?x4096xf16>
    %55665 = flow.tensor.transfer %55664 : tensor<4x?x4096xf16>{%dim_53216} to #hal.device.promise<@__device_1>
    %55666 = torch_c.from_builtin_tensor %55665 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55667 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53217 = arith.constant 1 : index
    %dim_53218 = tensor.dim %55667, %c1_53217 : tensor<4x?x4096xf16>
    %55668 = flow.tensor.transfer %55667 : tensor<4x?x4096xf16>{%dim_53218} to #hal.device.promise<@__device_1>
    %55669 = torch_c.from_builtin_tensor %55668 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53219 = torch.constant.int 1
    %55670 = torch.aten.add.Tensor %55651, %55584, %int1_53219 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53220 = torch.constant.int 1
    %55671 = torch.aten.add.Tensor %55670, %55654, %int1_53220 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53221 = torch.constant.int 1
    %55672 = torch.aten.add.Tensor %55671, %55657, %int1_53221 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53222 = torch.constant.int 1
    %55673 = torch.aten.add.Tensor %55672, %55660, %int1_53222 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53223 = torch.constant.int 1
    %55674 = torch.aten.add.Tensor %55673, %55663, %int1_53223 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53224 = torch.constant.int 1
    %55675 = torch.aten.add.Tensor %55674, %55666, %int1_53224 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53225 = torch.constant.int 1
    %55676 = torch.aten.add.Tensor %55675, %55669, %int1_53225 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55677 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53226 = arith.constant 1 : index
    %dim_53227 = tensor.dim %55677, %c1_53226 : tensor<4x?x4096xf16>
    %55678 = flow.tensor.transfer %55677 : tensor<4x?x4096xf16>{%dim_53227} to #hal.device.promise<@__device_2>
    %55679 = torch_c.from_builtin_tensor %55678 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55679, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55680 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53228 = arith.constant 1 : index
    %dim_53229 = tensor.dim %55680, %c1_53228 : tensor<4x?x4096xf16>
    %55681 = flow.tensor.transfer %55680 : tensor<4x?x4096xf16>{%dim_53229} to #hal.device.promise<@__device_2>
    %55682 = torch_c.from_builtin_tensor %55681 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55683 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53230 = arith.constant 1 : index
    %dim_53231 = tensor.dim %55683, %c1_53230 : tensor<4x?x4096xf16>
    %55684 = flow.tensor.transfer %55683 : tensor<4x?x4096xf16>{%dim_53231} to #hal.device.promise<@__device_2>
    %55685 = torch_c.from_builtin_tensor %55684 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55685, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55686 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53232 = arith.constant 1 : index
    %dim_53233 = tensor.dim %55686, %c1_53232 : tensor<4x?x4096xf16>
    %55687 = flow.tensor.transfer %55686 : tensor<4x?x4096xf16>{%dim_53233} to #hal.device.promise<@__device_2>
    %55688 = torch_c.from_builtin_tensor %55687 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55689 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53234 = arith.constant 1 : index
    %dim_53235 = tensor.dim %55689, %c1_53234 : tensor<4x?x4096xf16>
    %55690 = flow.tensor.transfer %55689 : tensor<4x?x4096xf16>{%dim_53235} to #hal.device.promise<@__device_2>
    %55691 = torch_c.from_builtin_tensor %55690 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55692 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53236 = arith.constant 1 : index
    %dim_53237 = tensor.dim %55692, %c1_53236 : tensor<4x?x4096xf16>
    %55693 = flow.tensor.transfer %55692 : tensor<4x?x4096xf16>{%dim_53237} to #hal.device.promise<@__device_2>
    %55694 = torch_c.from_builtin_tensor %55693 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55694, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55695 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53238 = arith.constant 1 : index
    %dim_53239 = tensor.dim %55695, %c1_53238 : tensor<4x?x4096xf16>
    %55696 = flow.tensor.transfer %55695 : tensor<4x?x4096xf16>{%dim_53239} to #hal.device.promise<@__device_2>
    %55697 = torch_c.from_builtin_tensor %55696 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55697, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53240 = torch.constant.int 1
    %55698 = torch.aten.add.Tensor %55679, %55682, %int1_53240 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53241 = torch.constant.int 1
    %55699 = torch.aten.add.Tensor %55698, %55590, %int1_53241 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53242 = torch.constant.int 1
    %55700 = torch.aten.add.Tensor %55699, %55685, %int1_53242 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53243 = torch.constant.int 1
    %55701 = torch.aten.add.Tensor %55700, %55688, %int1_53243 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53244 = torch.constant.int 1
    %55702 = torch.aten.add.Tensor %55701, %55691, %int1_53244 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53245 = torch.constant.int 1
    %55703 = torch.aten.add.Tensor %55702, %55694, %int1_53245 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53246 = torch.constant.int 1
    %55704 = torch.aten.add.Tensor %55703, %55697, %int1_53246 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55705 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53247 = arith.constant 1 : index
    %dim_53248 = tensor.dim %55705, %c1_53247 : tensor<4x?x4096xf16>
    %55706 = flow.tensor.transfer %55705 : tensor<4x?x4096xf16>{%dim_53248} to #hal.device.promise<@__device_3>
    %55707 = torch_c.from_builtin_tensor %55706 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55708 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53249 = arith.constant 1 : index
    %dim_53250 = tensor.dim %55708, %c1_53249 : tensor<4x?x4096xf16>
    %55709 = flow.tensor.transfer %55708 : tensor<4x?x4096xf16>{%dim_53250} to #hal.device.promise<@__device_3>
    %55710 = torch_c.from_builtin_tensor %55709 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55711 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53251 = arith.constant 1 : index
    %dim_53252 = tensor.dim %55711, %c1_53251 : tensor<4x?x4096xf16>
    %55712 = flow.tensor.transfer %55711 : tensor<4x?x4096xf16>{%dim_53252} to #hal.device.promise<@__device_3>
    %55713 = torch_c.from_builtin_tensor %55712 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55714 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53253 = arith.constant 1 : index
    %dim_53254 = tensor.dim %55714, %c1_53253 : tensor<4x?x4096xf16>
    %55715 = flow.tensor.transfer %55714 : tensor<4x?x4096xf16>{%dim_53254} to #hal.device.promise<@__device_3>
    %55716 = torch_c.from_builtin_tensor %55715 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55717 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53255 = arith.constant 1 : index
    %dim_53256 = tensor.dim %55717, %c1_53255 : tensor<4x?x4096xf16>
    %55718 = flow.tensor.transfer %55717 : tensor<4x?x4096xf16>{%dim_53256} to #hal.device.promise<@__device_3>
    %55719 = torch_c.from_builtin_tensor %55718 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55720 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53257 = arith.constant 1 : index
    %dim_53258 = tensor.dim %55720, %c1_53257 : tensor<4x?x4096xf16>
    %55721 = flow.tensor.transfer %55720 : tensor<4x?x4096xf16>{%dim_53258} to #hal.device.promise<@__device_3>
    %55722 = torch_c.from_builtin_tensor %55721 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55723 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53259 = arith.constant 1 : index
    %dim_53260 = tensor.dim %55723, %c1_53259 : tensor<4x?x4096xf16>
    %55724 = flow.tensor.transfer %55723 : tensor<4x?x4096xf16>{%dim_53260} to #hal.device.promise<@__device_3>
    %55725 = torch_c.from_builtin_tensor %55724 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53261 = torch.constant.int 1
    %55726 = torch.aten.add.Tensor %55707, %55710, %int1_53261 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53262 = torch.constant.int 1
    %55727 = torch.aten.add.Tensor %55726, %55713, %int1_53262 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53263 = torch.constant.int 1
    %55728 = torch.aten.add.Tensor %55727, %55596, %int1_53263 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53264 = torch.constant.int 1
    %55729 = torch.aten.add.Tensor %55728, %55716, %int1_53264 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53265 = torch.constant.int 1
    %55730 = torch.aten.add.Tensor %55729, %55719, %int1_53265 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55730, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53266 = torch.constant.int 1
    %55731 = torch.aten.add.Tensor %55730, %55722, %int1_53266 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53267 = torch.constant.int 1
    %55732 = torch.aten.add.Tensor %55731, %55725, %int1_53267 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55733 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53268 = arith.constant 1 : index
    %dim_53269 = tensor.dim %55733, %c1_53268 : tensor<4x?x4096xf16>
    %55734 = flow.tensor.transfer %55733 : tensor<4x?x4096xf16>{%dim_53269} to #hal.device.promise<@__device_4>
    %55735 = torch_c.from_builtin_tensor %55734 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55736 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53270 = arith.constant 1 : index
    %dim_53271 = tensor.dim %55736, %c1_53270 : tensor<4x?x4096xf16>
    %55737 = flow.tensor.transfer %55736 : tensor<4x?x4096xf16>{%dim_53271} to #hal.device.promise<@__device_4>
    %55738 = torch_c.from_builtin_tensor %55737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55739 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53272 = arith.constant 1 : index
    %dim_53273 = tensor.dim %55739, %c1_53272 : tensor<4x?x4096xf16>
    %55740 = flow.tensor.transfer %55739 : tensor<4x?x4096xf16>{%dim_53273} to #hal.device.promise<@__device_4>
    %55741 = torch_c.from_builtin_tensor %55740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55742 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53274 = arith.constant 1 : index
    %dim_53275 = tensor.dim %55742, %c1_53274 : tensor<4x?x4096xf16>
    %55743 = flow.tensor.transfer %55742 : tensor<4x?x4096xf16>{%dim_53275} to #hal.device.promise<@__device_4>
    %55744 = torch_c.from_builtin_tensor %55743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55745 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53276 = arith.constant 1 : index
    %dim_53277 = tensor.dim %55745, %c1_53276 : tensor<4x?x4096xf16>
    %55746 = flow.tensor.transfer %55745 : tensor<4x?x4096xf16>{%dim_53277} to #hal.device.promise<@__device_4>
    %55747 = torch_c.from_builtin_tensor %55746 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55748 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53278 = arith.constant 1 : index
    %dim_53279 = tensor.dim %55748, %c1_53278 : tensor<4x?x4096xf16>
    %55749 = flow.tensor.transfer %55748 : tensor<4x?x4096xf16>{%dim_53279} to #hal.device.promise<@__device_4>
    %55750 = torch_c.from_builtin_tensor %55749 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55751 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53280 = arith.constant 1 : index
    %dim_53281 = tensor.dim %55751, %c1_53280 : tensor<4x?x4096xf16>
    %55752 = flow.tensor.transfer %55751 : tensor<4x?x4096xf16>{%dim_53281} to #hal.device.promise<@__device_4>
    %55753 = torch_c.from_builtin_tensor %55752 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53282 = torch.constant.int 1
    %55754 = torch.aten.add.Tensor %55735, %55738, %int1_53282 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53283 = torch.constant.int 1
    %55755 = torch.aten.add.Tensor %55754, %55741, %int1_53283 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53284 = torch.constant.int 1
    %55756 = torch.aten.add.Tensor %55755, %55744, %int1_53284 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53285 = torch.constant.int 1
    %55757 = torch.aten.add.Tensor %55756, %55602, %int1_53285 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53286 = torch.constant.int 1
    %55758 = torch.aten.add.Tensor %55757, %55747, %int1_53286 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53287 = torch.constant.int 1
    %55759 = torch.aten.add.Tensor %55758, %55750, %int1_53287 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53288 = torch.constant.int 1
    %55760 = torch.aten.add.Tensor %55759, %55753, %int1_53288 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55761 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53289 = arith.constant 1 : index
    %dim_53290 = tensor.dim %55761, %c1_53289 : tensor<4x?x4096xf16>
    %55762 = flow.tensor.transfer %55761 : tensor<4x?x4096xf16>{%dim_53290} to #hal.device.promise<@__device_5>
    %55763 = torch_c.from_builtin_tensor %55762 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55764 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53291 = arith.constant 1 : index
    %dim_53292 = tensor.dim %55764, %c1_53291 : tensor<4x?x4096xf16>
    %55765 = flow.tensor.transfer %55764 : tensor<4x?x4096xf16>{%dim_53292} to #hal.device.promise<@__device_5>
    %55766 = torch_c.from_builtin_tensor %55765 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55767 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53293 = arith.constant 1 : index
    %dim_53294 = tensor.dim %55767, %c1_53293 : tensor<4x?x4096xf16>
    %55768 = flow.tensor.transfer %55767 : tensor<4x?x4096xf16>{%dim_53294} to #hal.device.promise<@__device_5>
    %55769 = torch_c.from_builtin_tensor %55768 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55770 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53295 = arith.constant 1 : index
    %dim_53296 = tensor.dim %55770, %c1_53295 : tensor<4x?x4096xf16>
    %55771 = flow.tensor.transfer %55770 : tensor<4x?x4096xf16>{%dim_53296} to #hal.device.promise<@__device_5>
    %55772 = torch_c.from_builtin_tensor %55771 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55773 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53297 = arith.constant 1 : index
    %dim_53298 = tensor.dim %55773, %c1_53297 : tensor<4x?x4096xf16>
    %55774 = flow.tensor.transfer %55773 : tensor<4x?x4096xf16>{%dim_53298} to #hal.device.promise<@__device_5>
    %55775 = torch_c.from_builtin_tensor %55774 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55776 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53299 = arith.constant 1 : index
    %dim_53300 = tensor.dim %55776, %c1_53299 : tensor<4x?x4096xf16>
    %55777 = flow.tensor.transfer %55776 : tensor<4x?x4096xf16>{%dim_53300} to #hal.device.promise<@__device_5>
    %55778 = torch_c.from_builtin_tensor %55777 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55779 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53301 = arith.constant 1 : index
    %dim_53302 = tensor.dim %55779, %c1_53301 : tensor<4x?x4096xf16>
    %55780 = flow.tensor.transfer %55779 : tensor<4x?x4096xf16>{%dim_53302} to #hal.device.promise<@__device_5>
    %55781 = torch_c.from_builtin_tensor %55780 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53303 = torch.constant.int 1
    %55782 = torch.aten.add.Tensor %55763, %55766, %int1_53303 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53304 = torch.constant.int 1
    %55783 = torch.aten.add.Tensor %55782, %55769, %int1_53304 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53305 = torch.constant.int 1
    %55784 = torch.aten.add.Tensor %55783, %55772, %int1_53305 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53306 = torch.constant.int 1
    %55785 = torch.aten.add.Tensor %55784, %55775, %int1_53306 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53307 = torch.constant.int 1
    %55786 = torch.aten.add.Tensor %55785, %55608, %int1_53307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53308 = torch.constant.int 1
    %55787 = torch.aten.add.Tensor %55786, %55778, %int1_53308 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53309 = torch.constant.int 1
    %55788 = torch.aten.add.Tensor %55787, %55781, %int1_53309 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55789 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53310 = arith.constant 1 : index
    %dim_53311 = tensor.dim %55789, %c1_53310 : tensor<4x?x4096xf16>
    %55790 = flow.tensor.transfer %55789 : tensor<4x?x4096xf16>{%dim_53311} to #hal.device.promise<@__device_6>
    %55791 = torch_c.from_builtin_tensor %55790 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55792 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53312 = arith.constant 1 : index
    %dim_53313 = tensor.dim %55792, %c1_53312 : tensor<4x?x4096xf16>
    %55793 = flow.tensor.transfer %55792 : tensor<4x?x4096xf16>{%dim_53313} to #hal.device.promise<@__device_6>
    %55794 = torch_c.from_builtin_tensor %55793 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55795 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53314 = arith.constant 1 : index
    %dim_53315 = tensor.dim %55795, %c1_53314 : tensor<4x?x4096xf16>
    %55796 = flow.tensor.transfer %55795 : tensor<4x?x4096xf16>{%dim_53315} to #hal.device.promise<@__device_6>
    %55797 = torch_c.from_builtin_tensor %55796 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55798 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53316 = arith.constant 1 : index
    %dim_53317 = tensor.dim %55798, %c1_53316 : tensor<4x?x4096xf16>
    %55799 = flow.tensor.transfer %55798 : tensor<4x?x4096xf16>{%dim_53317} to #hal.device.promise<@__device_6>
    %55800 = torch_c.from_builtin_tensor %55799 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55801 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53318 = arith.constant 1 : index
    %dim_53319 = tensor.dim %55801, %c1_53318 : tensor<4x?x4096xf16>
    %55802 = flow.tensor.transfer %55801 : tensor<4x?x4096xf16>{%dim_53319} to #hal.device.promise<@__device_6>
    %55803 = torch_c.from_builtin_tensor %55802 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55804 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53320 = arith.constant 1 : index
    %dim_53321 = tensor.dim %55804, %c1_53320 : tensor<4x?x4096xf16>
    %55805 = flow.tensor.transfer %55804 : tensor<4x?x4096xf16>{%dim_53321} to #hal.device.promise<@__device_6>
    %55806 = torch_c.from_builtin_tensor %55805 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55807 = torch_c.to_builtin_tensor %55620 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53322 = arith.constant 1 : index
    %dim_53323 = tensor.dim %55807, %c1_53322 : tensor<4x?x4096xf16>
    %55808 = flow.tensor.transfer %55807 : tensor<4x?x4096xf16>{%dim_53323} to #hal.device.promise<@__device_6>
    %55809 = torch_c.from_builtin_tensor %55808 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55809, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53324 = torch.constant.int 1
    %55810 = torch.aten.add.Tensor %55791, %55794, %int1_53324 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53325 = torch.constant.int 1
    %55811 = torch.aten.add.Tensor %55810, %55797, %int1_53325 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53326 = torch.constant.int 1
    %55812 = torch.aten.add.Tensor %55811, %55800, %int1_53326 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53327 = torch.constant.int 1
    %55813 = torch.aten.add.Tensor %55812, %55803, %int1_53327 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53328 = torch.constant.int 1
    %55814 = torch.aten.add.Tensor %55813, %55806, %int1_53328 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55814, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53329 = torch.constant.int 1
    %55815 = torch.aten.add.Tensor %55814, %55614, %int1_53329 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53330 = torch.constant.int 1
    %55816 = torch.aten.add.Tensor %55815, %55809, %int1_53330 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55817 = torch_c.to_builtin_tensor %55578 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53331 = arith.constant 1 : index
    %dim_53332 = tensor.dim %55817, %c1_53331 : tensor<4x?x4096xf16>
    %55818 = flow.tensor.transfer %55817 : tensor<4x?x4096xf16>{%dim_53332} to #hal.device.promise<@__device_7>
    %55819 = torch_c.from_builtin_tensor %55818 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55820 = torch_c.to_builtin_tensor %55584 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53333 = arith.constant 1 : index
    %dim_53334 = tensor.dim %55820, %c1_53333 : tensor<4x?x4096xf16>
    %55821 = flow.tensor.transfer %55820 : tensor<4x?x4096xf16>{%dim_53334} to #hal.device.promise<@__device_7>
    %55822 = torch_c.from_builtin_tensor %55821 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55823 = torch_c.to_builtin_tensor %55590 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53335 = arith.constant 1 : index
    %dim_53336 = tensor.dim %55823, %c1_53335 : tensor<4x?x4096xf16>
    %55824 = flow.tensor.transfer %55823 : tensor<4x?x4096xf16>{%dim_53336} to #hal.device.promise<@__device_7>
    %55825 = torch_c.from_builtin_tensor %55824 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55826 = torch_c.to_builtin_tensor %55596 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53337 = arith.constant 1 : index
    %dim_53338 = tensor.dim %55826, %c1_53337 : tensor<4x?x4096xf16>
    %55827 = flow.tensor.transfer %55826 : tensor<4x?x4096xf16>{%dim_53338} to #hal.device.promise<@__device_7>
    %55828 = torch_c.from_builtin_tensor %55827 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55829 = torch_c.to_builtin_tensor %55602 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53339 = arith.constant 1 : index
    %dim_53340 = tensor.dim %55829, %c1_53339 : tensor<4x?x4096xf16>
    %55830 = flow.tensor.transfer %55829 : tensor<4x?x4096xf16>{%dim_53340} to #hal.device.promise<@__device_7>
    %55831 = torch_c.from_builtin_tensor %55830 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55832 = torch_c.to_builtin_tensor %55608 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53341 = arith.constant 1 : index
    %dim_53342 = tensor.dim %55832, %c1_53341 : tensor<4x?x4096xf16>
    %55833 = flow.tensor.transfer %55832 : tensor<4x?x4096xf16>{%dim_53342} to #hal.device.promise<@__device_7>
    %55834 = torch_c.from_builtin_tensor %55833 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %55835 = torch_c.to_builtin_tensor %55614 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53343 = arith.constant 1 : index
    %dim_53344 = tensor.dim %55835, %c1_53343 : tensor<4x?x4096xf16>
    %55836 = flow.tensor.transfer %55835 : tensor<4x?x4096xf16>{%dim_53344} to #hal.device.promise<@__device_7>
    %55837 = torch_c.from_builtin_tensor %55836 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53345 = torch.constant.int 1
    %55838 = torch.aten.add.Tensor %55819, %55822, %int1_53345 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53346 = torch.constant.int 1
    %55839 = torch.aten.add.Tensor %55838, %55825, %int1_53346 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55839, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53347 = torch.constant.int 1
    %55840 = torch.aten.add.Tensor %55839, %55828, %int1_53347 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55840, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53348 = torch.constant.int 1
    %55841 = torch.aten.add.Tensor %55840, %55831, %int1_53348 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53349 = torch.constant.int 1
    %55842 = torch.aten.add.Tensor %55841, %55834, %int1_53349 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53350 = torch.constant.int 1
    %55843 = torch.aten.add.Tensor %55842, %55837, %int1_53350 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53351 = torch.constant.int 1
    %55844 = torch.aten.add.Tensor %55843, %55620, %int1_53351 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53352 = torch.constant.int 1
    %55845 = torch.aten.add.Tensor %54504, %55648, %int1_53352 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53353 = torch.constant.int 1
    %55846 = torch.aten.add.Tensor %54505, %55676, %int1_53353 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53354 = torch.constant.int 1
    %55847 = torch.aten.add.Tensor %54506, %55704, %int1_53354 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53355 = torch.constant.int 1
    %55848 = torch.aten.add.Tensor %54507, %55732, %int1_53355 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53356 = torch.constant.int 1
    %55849 = torch.aten.add.Tensor %54508, %55760, %int1_53356 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53357 = torch.constant.int 1
    %55850 = torch.aten.add.Tensor %54509, %55788, %int1_53357 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53358 = torch.constant.int 1
    %55851 = torch.aten.add.Tensor %54510, %55816, %int1_53358 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53359 = torch.constant.int 1
    %55852 = torch.aten.add.Tensor %54511, %55844, %int1_53359 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_53360 = torch.constant.int 6
    %55853 = torch.prims.convert_element_type %55845, %int6_53360 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53361 = torch.constant.int 6
    %55854 = torch.prims.convert_element_type %55846, %int6_53361 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53362 = torch.constant.int 6
    %55855 = torch.prims.convert_element_type %55847, %int6_53362 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53363 = torch.constant.int 6
    %55856 = torch.prims.convert_element_type %55848, %int6_53363 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53364 = torch.constant.int 6
    %55857 = torch.prims.convert_element_type %55849, %int6_53364 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53365 = torch.constant.int 6
    %55858 = torch.prims.convert_element_type %55850, %int6_53365 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53366 = torch.constant.int 6
    %55859 = torch.prims.convert_element_type %55851, %int6_53366 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53367 = torch.constant.int 6
    %55860 = torch.prims.convert_element_type %55852, %int6_53367 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53368 = torch.constant.int 2
    %55861 = torch.aten.pow.Tensor_Scalar %55853, %int2_53368 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53369 = torch.constant.int 2
    %55862 = torch.aten.pow.Tensor_Scalar %55854, %int2_53369 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53370 = torch.constant.int 2
    %55863 = torch.aten.pow.Tensor_Scalar %55855, %int2_53370 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53371 = torch.constant.int 2
    %55864 = torch.aten.pow.Tensor_Scalar %55856, %int2_53371 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55864, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53372 = torch.constant.int 2
    %55865 = torch.aten.pow.Tensor_Scalar %55857, %int2_53372 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53373 = torch.constant.int 2
    %55866 = torch.aten.pow.Tensor_Scalar %55858, %int2_53373 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53374 = torch.constant.int 2
    %55867 = torch.aten.pow.Tensor_Scalar %55859, %int2_53374 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55867, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53375 = torch.constant.int 2
    %55868 = torch.aten.pow.Tensor_Scalar %55860, %int2_53375 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_53376 = torch.constant.int -1
    %55869 = torch.prim.ListConstruct %int-1_53376 : (!torch.int) -> !torch.list<int>
    %true_53377 = torch.constant.bool true
    %none_53378 = torch.constant.none
    %55870 = torch.aten.mean.dim %55861, %55869, %true_53377, %none_53378 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55870, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53379 = torch.constant.int -1
    %55871 = torch.prim.ListConstruct %int-1_53379 : (!torch.int) -> !torch.list<int>
    %true_53380 = torch.constant.bool true
    %none_53381 = torch.constant.none
    %55872 = torch.aten.mean.dim %55862, %55871, %true_53380, %none_53381 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53382 = torch.constant.int -1
    %55873 = torch.prim.ListConstruct %int-1_53382 : (!torch.int) -> !torch.list<int>
    %true_53383 = torch.constant.bool true
    %none_53384 = torch.constant.none
    %55874 = torch.aten.mean.dim %55863, %55873, %true_53383, %none_53384 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53385 = torch.constant.int -1
    %55875 = torch.prim.ListConstruct %int-1_53385 : (!torch.int) -> !torch.list<int>
    %true_53386 = torch.constant.bool true
    %none_53387 = torch.constant.none
    %55876 = torch.aten.mean.dim %55864, %55875, %true_53386, %none_53387 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55876, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53388 = torch.constant.int -1
    %55877 = torch.prim.ListConstruct %int-1_53388 : (!torch.int) -> !torch.list<int>
    %true_53389 = torch.constant.bool true
    %none_53390 = torch.constant.none
    %55878 = torch.aten.mean.dim %55865, %55877, %true_53389, %none_53390 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53391 = torch.constant.int -1
    %55879 = torch.prim.ListConstruct %int-1_53391 : (!torch.int) -> !torch.list<int>
    %true_53392 = torch.constant.bool true
    %none_53393 = torch.constant.none
    %55880 = torch.aten.mean.dim %55866, %55879, %true_53392, %none_53393 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53394 = torch.constant.int -1
    %55881 = torch.prim.ListConstruct %int-1_53394 : (!torch.int) -> !torch.list<int>
    %true_53395 = torch.constant.bool true
    %none_53396 = torch.constant.none
    %55882 = torch.aten.mean.dim %55867, %55881, %true_53395, %none_53396 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55882, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53397 = torch.constant.int -1
    %55883 = torch.prim.ListConstruct %int-1_53397 : (!torch.int) -> !torch.list<int>
    %true_53398 = torch.constant.bool true
    %none_53399 = torch.constant.none
    %55884 = torch.aten.mean.dim %55868, %55883, %true_53398, %none_53399 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53400 = torch.constant.float 9.9999997473787516E-6
    %int1_53401 = torch.constant.int 1
    %55885 = torch.aten.add.Scalar %55870, %float9.999990e-06_53400, %int1_53401 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53402 = torch.constant.float 9.9999997473787516E-6
    %int1_53403 = torch.constant.int 1
    %55886 = torch.aten.add.Scalar %55872, %float9.999990e-06_53402, %int1_53403 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53404 = torch.constant.float 9.9999997473787516E-6
    %int1_53405 = torch.constant.int 1
    %55887 = torch.aten.add.Scalar %55874, %float9.999990e-06_53404, %int1_53405 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53406 = torch.constant.float 9.9999997473787516E-6
    %int1_53407 = torch.constant.int 1
    %55888 = torch.aten.add.Scalar %55876, %float9.999990e-06_53406, %int1_53407 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53408 = torch.constant.float 9.9999997473787516E-6
    %int1_53409 = torch.constant.int 1
    %55889 = torch.aten.add.Scalar %55878, %float9.999990e-06_53408, %int1_53409 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53410 = torch.constant.float 9.9999997473787516E-6
    %int1_53411 = torch.constant.int 1
    %55890 = torch.aten.add.Scalar %55880, %float9.999990e-06_53410, %int1_53411 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53412 = torch.constant.float 9.9999997473787516E-6
    %int1_53413 = torch.constant.int 1
    %55891 = torch.aten.add.Scalar %55882, %float9.999990e-06_53412, %int1_53413 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53414 = torch.constant.float 9.9999997473787516E-6
    %int1_53415 = torch.constant.int 1
    %55892 = torch.aten.add.Scalar %55884, %float9.999990e-06_53414, %int1_53415 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55892, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55893 = torch.aten.rsqrt %55885 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55894 = torch.aten.rsqrt %55886 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55895 = torch.aten.rsqrt %55887 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55896 = torch.aten.rsqrt %55888 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55897 = torch.aten.rsqrt %55889 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55898 = torch.aten.rsqrt %55890 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55898, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55899 = torch.aten.rsqrt %55891 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55900 = torch.aten.rsqrt %55892 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %55900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %55901 = torch.aten.mul.Tensor %55853, %55893 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55901, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55902 = torch.aten.mul.Tensor %55854, %55894 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55903 = torch.aten.mul.Tensor %55855, %55895 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55904 = torch.aten.mul.Tensor %55856, %55896 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55904, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55905 = torch.aten.mul.Tensor %55857, %55897 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55906 = torch.aten.mul.Tensor %55858, %55898 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55907 = torch.aten.mul.Tensor %55859, %55899 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55907, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55908 = torch.aten.mul.Tensor %55860, %55900 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55909 = torch.aten.mul.Tensor %2064, %55901 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55910 = torch.aten.mul.Tensor %2065, %55902 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55911 = torch.aten.mul.Tensor %2066, %55903 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55912 = torch.aten.mul.Tensor %2067, %55904 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55913 = torch.aten.mul.Tensor %2068, %55905 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55914 = torch.aten.mul.Tensor %2069, %55906 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55915 = torch.aten.mul.Tensor %2070, %55907 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %55916 = torch.aten.mul.Tensor %2071, %55908 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %55916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_53416 = torch.constant.int 5
    %55917 = torch.prims.convert_element_type %55909, %int5_53416 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53417 = torch.constant.int 5
    %55918 = torch.prims.convert_element_type %55910, %int5_53417 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53418 = torch.constant.int 5
    %55919 = torch.prims.convert_element_type %55911, %int5_53418 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53419 = torch.constant.int 5
    %55920 = torch.prims.convert_element_type %55912, %int5_53419 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53420 = torch.constant.int 5
    %55921 = torch.prims.convert_element_type %55913, %int5_53420 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53421 = torch.constant.int 5
    %55922 = torch.prims.convert_element_type %55914, %int5_53421 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53422 = torch.constant.int 5
    %55923 = torch.prims.convert_element_type %55915, %int5_53422 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53423 = torch.constant.int 5
    %55924 = torch.prims.convert_element_type %55916, %int5_53423 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %55924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53424 = torch.constant.int 1
    %int0_53425 = torch.constant.int 0
    %55925 = torch.prim.ListConstruct %int1_53424, %int0_53425 : (!torch.int, !torch.int) -> !torch.list<int>
    %55926 = torch.aten.permute %2072, %55925 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53426 = torch.constant.int 1
    %int0_53427 = torch.constant.int 0
    %55927 = torch.prim.ListConstruct %int1_53426, %int0_53427 : (!torch.int, !torch.int) -> !torch.list<int>
    %55928 = torch.aten.permute %2073, %55927 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53428 = torch.constant.int 1
    %int0_53429 = torch.constant.int 0
    %55929 = torch.prim.ListConstruct %int1_53428, %int0_53429 : (!torch.int, !torch.int) -> !torch.list<int>
    %55930 = torch.aten.permute %2074, %55929 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53430 = torch.constant.int 1
    %int0_53431 = torch.constant.int 0
    %55931 = torch.prim.ListConstruct %int1_53430, %int0_53431 : (!torch.int, !torch.int) -> !torch.list<int>
    %55932 = torch.aten.permute %2075, %55931 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53432 = torch.constant.int 1
    %int0_53433 = torch.constant.int 0
    %55933 = torch.prim.ListConstruct %int1_53432, %int0_53433 : (!torch.int, !torch.int) -> !torch.list<int>
    %55934 = torch.aten.permute %2076, %55933 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53434 = torch.constant.int 1
    %int0_53435 = torch.constant.int 0
    %55935 = torch.prim.ListConstruct %int1_53434, %int0_53435 : (!torch.int, !torch.int) -> !torch.list<int>
    %55936 = torch.aten.permute %2077, %55935 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53436 = torch.constant.int 1
    %int0_53437 = torch.constant.int 0
    %55937 = torch.prim.ListConstruct %int1_53436, %int0_53437 : (!torch.int, !torch.int) -> !torch.list<int>
    %55938 = torch.aten.permute %2078, %55937 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53438 = torch.constant.int 1
    %int0_53439 = torch.constant.int 0
    %55939 = torch.prim.ListConstruct %int1_53438, %int0_53439 : (!torch.int, !torch.int) -> !torch.list<int>
    %55940 = torch.aten.permute %2079, %55939 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_53440 = torch.constant.int 4
    %55941 = torch.aten.mul.int %int4_53440, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53441 = torch.constant.int 4096
    %55942 = torch.prim.ListConstruct %55941, %int4096_53441 : (!torch.int, !torch.int) -> !torch.list<int>
    %55943 = torch.aten.view %55917, %55942 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55943, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55944 = torch.aten.mm %55943, %55926 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55944, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53442 = torch.constant.int 4
    %int1792_53443 = torch.constant.int 1792
    %55945 = torch.prim.ListConstruct %int4_53442, %2482, %int1792_53443 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55946 = torch.aten.view %55944, %55945 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53444 = torch.constant.int 4
    %55947 = torch.aten.mul.int %int4_53444, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53445 = torch.constant.int 4096
    %55948 = torch.prim.ListConstruct %55947, %int4096_53445 : (!torch.int, !torch.int) -> !torch.list<int>
    %55949 = torch.aten.view %55918, %55948 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55949, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55950 = torch.aten.mm %55949, %55928 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55950, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53446 = torch.constant.int 4
    %int1792_53447 = torch.constant.int 1792
    %55951 = torch.prim.ListConstruct %int4_53446, %2482, %int1792_53447 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55952 = torch.aten.view %55950, %55951 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53448 = torch.constant.int 4
    %55953 = torch.aten.mul.int %int4_53448, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53449 = torch.constant.int 4096
    %55954 = torch.prim.ListConstruct %55953, %int4096_53449 : (!torch.int, !torch.int) -> !torch.list<int>
    %55955 = torch.aten.view %55919, %55954 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55955, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55956 = torch.aten.mm %55955, %55930 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55956, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53450 = torch.constant.int 4
    %int1792_53451 = torch.constant.int 1792
    %55957 = torch.prim.ListConstruct %int4_53450, %2482, %int1792_53451 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55958 = torch.aten.view %55956, %55957 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53452 = torch.constant.int 4
    %55959 = torch.aten.mul.int %int4_53452, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53453 = torch.constant.int 4096
    %55960 = torch.prim.ListConstruct %55959, %int4096_53453 : (!torch.int, !torch.int) -> !torch.list<int>
    %55961 = torch.aten.view %55920, %55960 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55961, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55962 = torch.aten.mm %55961, %55932 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55962, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53454 = torch.constant.int 4
    %int1792_53455 = torch.constant.int 1792
    %55963 = torch.prim.ListConstruct %int4_53454, %2482, %int1792_53455 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55964 = torch.aten.view %55962, %55963 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53456 = torch.constant.int 4
    %55965 = torch.aten.mul.int %int4_53456, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53457 = torch.constant.int 4096
    %55966 = torch.prim.ListConstruct %55965, %int4096_53457 : (!torch.int, !torch.int) -> !torch.list<int>
    %55967 = torch.aten.view %55921, %55966 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55967, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55968 = torch.aten.mm %55967, %55934 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55968, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53458 = torch.constant.int 4
    %int1792_53459 = torch.constant.int 1792
    %55969 = torch.prim.ListConstruct %int4_53458, %2482, %int1792_53459 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55970 = torch.aten.view %55968, %55969 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53460 = torch.constant.int 4
    %55971 = torch.aten.mul.int %int4_53460, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53461 = torch.constant.int 4096
    %55972 = torch.prim.ListConstruct %55971, %int4096_53461 : (!torch.int, !torch.int) -> !torch.list<int>
    %55973 = torch.aten.view %55922, %55972 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55973, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55974 = torch.aten.mm %55973, %55936 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55974, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53462 = torch.constant.int 4
    %int1792_53463 = torch.constant.int 1792
    %55975 = torch.prim.ListConstruct %int4_53462, %2482, %int1792_53463 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55976 = torch.aten.view %55974, %55975 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55976, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53464 = torch.constant.int 4
    %55977 = torch.aten.mul.int %int4_53464, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53465 = torch.constant.int 4096
    %55978 = torch.prim.ListConstruct %55977, %int4096_53465 : (!torch.int, !torch.int) -> !torch.list<int>
    %55979 = torch.aten.view %55923, %55978 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55979, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55980 = torch.aten.mm %55979, %55938 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55980, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53466 = torch.constant.int 4
    %int1792_53467 = torch.constant.int 1792
    %55981 = torch.prim.ListConstruct %int4_53466, %2482, %int1792_53467 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55982 = torch.aten.view %55980, %55981 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55982, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53468 = torch.constant.int 4
    %55983 = torch.aten.mul.int %int4_53468, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53469 = torch.constant.int 4096
    %55984 = torch.prim.ListConstruct %55983, %int4096_53469 : (!torch.int, !torch.int) -> !torch.list<int>
    %55985 = torch.aten.view %55924, %55984 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %55985, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %55986 = torch.aten.mm %55985, %55940 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %55986, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53470 = torch.constant.int 4
    %int1792_53471 = torch.constant.int 1792
    %55987 = torch.prim.ListConstruct %int4_53470, %2482, %int1792_53471 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %55988 = torch.aten.view %55986, %55987 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55989 = torch.aten.silu %55946 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55990 = torch.aten.silu %55952 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55991 = torch.aten.silu %55958 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55992 = torch.aten.silu %55964 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55993 = torch.aten.silu %55970 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55994 = torch.aten.silu %55976 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55995 = torch.aten.silu %55982 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %55996 = torch.aten.silu %55988 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %55996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_53472 = torch.constant.int 1
    %int0_53473 = torch.constant.int 0
    %55997 = torch.prim.ListConstruct %int1_53472, %int0_53473 : (!torch.int, !torch.int) -> !torch.list<int>
    %55998 = torch.aten.permute %2080, %55997 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53474 = torch.constant.int 1
    %int0_53475 = torch.constant.int 0
    %55999 = torch.prim.ListConstruct %int1_53474, %int0_53475 : (!torch.int, !torch.int) -> !torch.list<int>
    %56000 = torch.aten.permute %2081, %55999 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53476 = torch.constant.int 1
    %int0_53477 = torch.constant.int 0
    %56001 = torch.prim.ListConstruct %int1_53476, %int0_53477 : (!torch.int, !torch.int) -> !torch.list<int>
    %56002 = torch.aten.permute %2082, %56001 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53478 = torch.constant.int 1
    %int0_53479 = torch.constant.int 0
    %56003 = torch.prim.ListConstruct %int1_53478, %int0_53479 : (!torch.int, !torch.int) -> !torch.list<int>
    %56004 = torch.aten.permute %2083, %56003 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53480 = torch.constant.int 1
    %int0_53481 = torch.constant.int 0
    %56005 = torch.prim.ListConstruct %int1_53480, %int0_53481 : (!torch.int, !torch.int) -> !torch.list<int>
    %56006 = torch.aten.permute %2084, %56005 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53482 = torch.constant.int 1
    %int0_53483 = torch.constant.int 0
    %56007 = torch.prim.ListConstruct %int1_53482, %int0_53483 : (!torch.int, !torch.int) -> !torch.list<int>
    %56008 = torch.aten.permute %2085, %56007 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53484 = torch.constant.int 1
    %int0_53485 = torch.constant.int 0
    %56009 = torch.prim.ListConstruct %int1_53484, %int0_53485 : (!torch.int, !torch.int) -> !torch.list<int>
    %56010 = torch.aten.permute %2086, %56009 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_53486 = torch.constant.int 1
    %int0_53487 = torch.constant.int 0
    %56011 = torch.prim.ListConstruct %int1_53486, %int0_53487 : (!torch.int, !torch.int) -> !torch.list<int>
    %56012 = torch.aten.permute %2087, %56011 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_53488 = torch.constant.int 4
    %56013 = torch.aten.mul.int %int4_53488, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53489 = torch.constant.int 4096
    %56014 = torch.prim.ListConstruct %56013, %int4096_53489 : (!torch.int, !torch.int) -> !torch.list<int>
    %56015 = torch.aten.view %55917, %56014 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56015, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56016 = torch.aten.mm %56015, %55998 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56016, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53490 = torch.constant.int 4
    %int1792_53491 = torch.constant.int 1792
    %56017 = torch.prim.ListConstruct %int4_53490, %2482, %int1792_53491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56018 = torch.aten.view %56016, %56017 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53492 = torch.constant.int 4
    %56019 = torch.aten.mul.int %int4_53492, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53493 = torch.constant.int 4096
    %56020 = torch.prim.ListConstruct %56019, %int4096_53493 : (!torch.int, !torch.int) -> !torch.list<int>
    %56021 = torch.aten.view %55918, %56020 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56021, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56022 = torch.aten.mm %56021, %56000 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56022, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53494 = torch.constant.int 4
    %int1792_53495 = torch.constant.int 1792
    %56023 = torch.prim.ListConstruct %int4_53494, %2482, %int1792_53495 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56024 = torch.aten.view %56022, %56023 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53496 = torch.constant.int 4
    %56025 = torch.aten.mul.int %int4_53496, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53497 = torch.constant.int 4096
    %56026 = torch.prim.ListConstruct %56025, %int4096_53497 : (!torch.int, !torch.int) -> !torch.list<int>
    %56027 = torch.aten.view %55919, %56026 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56027, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56028 = torch.aten.mm %56027, %56002 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56028, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53498 = torch.constant.int 4
    %int1792_53499 = torch.constant.int 1792
    %56029 = torch.prim.ListConstruct %int4_53498, %2482, %int1792_53499 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56030 = torch.aten.view %56028, %56029 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53500 = torch.constant.int 4
    %56031 = torch.aten.mul.int %int4_53500, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53501 = torch.constant.int 4096
    %56032 = torch.prim.ListConstruct %56031, %int4096_53501 : (!torch.int, !torch.int) -> !torch.list<int>
    %56033 = torch.aten.view %55920, %56032 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56033, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56034 = torch.aten.mm %56033, %56004 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56034, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53502 = torch.constant.int 4
    %int1792_53503 = torch.constant.int 1792
    %56035 = torch.prim.ListConstruct %int4_53502, %2482, %int1792_53503 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56036 = torch.aten.view %56034, %56035 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53504 = torch.constant.int 4
    %56037 = torch.aten.mul.int %int4_53504, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53505 = torch.constant.int 4096
    %56038 = torch.prim.ListConstruct %56037, %int4096_53505 : (!torch.int, !torch.int) -> !torch.list<int>
    %56039 = torch.aten.view %55921, %56038 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56039, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56040 = torch.aten.mm %56039, %56006 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56040, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53506 = torch.constant.int 4
    %int1792_53507 = torch.constant.int 1792
    %56041 = torch.prim.ListConstruct %int4_53506, %2482, %int1792_53507 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56042 = torch.aten.view %56040, %56041 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53508 = torch.constant.int 4
    %56043 = torch.aten.mul.int %int4_53508, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53509 = torch.constant.int 4096
    %56044 = torch.prim.ListConstruct %56043, %int4096_53509 : (!torch.int, !torch.int) -> !torch.list<int>
    %56045 = torch.aten.view %55922, %56044 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56045, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56046 = torch.aten.mm %56045, %56008 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56046, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53510 = torch.constant.int 4
    %int1792_53511 = torch.constant.int 1792
    %56047 = torch.prim.ListConstruct %int4_53510, %2482, %int1792_53511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56048 = torch.aten.view %56046, %56047 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53512 = torch.constant.int 4
    %56049 = torch.aten.mul.int %int4_53512, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53513 = torch.constant.int 4096
    %56050 = torch.prim.ListConstruct %56049, %int4096_53513 : (!torch.int, !torch.int) -> !torch.list<int>
    %56051 = torch.aten.view %55923, %56050 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56051, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56052 = torch.aten.mm %56051, %56010 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56052, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53514 = torch.constant.int 4
    %int1792_53515 = torch.constant.int 1792
    %56053 = torch.prim.ListConstruct %int4_53514, %2482, %int1792_53515 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56054 = torch.aten.view %56052, %56053 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_53516 = torch.constant.int 4
    %56055 = torch.aten.mul.int %int4_53516, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53517 = torch.constant.int 4096
    %56056 = torch.prim.ListConstruct %56055, %int4096_53517 : (!torch.int, !torch.int) -> !torch.list<int>
    %56057 = torch.aten.view %55924, %56056 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56057, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56058 = torch.aten.mm %56057, %56012 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56058, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_53518 = torch.constant.int 4
    %int1792_53519 = torch.constant.int 1792
    %56059 = torch.prim.ListConstruct %int4_53518, %2482, %int1792_53519 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56060 = torch.aten.view %56058, %56059 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56061 = torch.aten.mul.Tensor %55989, %56018 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56062 = torch.aten.mul.Tensor %55990, %56024 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56063 = torch.aten.mul.Tensor %55991, %56030 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56064 = torch.aten.mul.Tensor %55992, %56036 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56065 = torch.aten.mul.Tensor %55993, %56042 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56066 = torch.aten.mul.Tensor %55994, %56048 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56067 = torch.aten.mul.Tensor %55995, %56054 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %56068 = torch.aten.mul.Tensor %55996, %56060 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %56068, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_53520 = torch.constant.int 1
    %int0_53521 = torch.constant.int 0
    %56069 = torch.prim.ListConstruct %int1_53520, %int0_53521 : (!torch.int, !torch.int) -> !torch.list<int>
    %56070 = torch.aten.permute %2088, %56069 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53522 = torch.constant.int 1
    %int0_53523 = torch.constant.int 0
    %56071 = torch.prim.ListConstruct %int1_53522, %int0_53523 : (!torch.int, !torch.int) -> !torch.list<int>
    %56072 = torch.aten.permute %2089, %56071 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53524 = torch.constant.int 1
    %int0_53525 = torch.constant.int 0
    %56073 = torch.prim.ListConstruct %int1_53524, %int0_53525 : (!torch.int, !torch.int) -> !torch.list<int>
    %56074 = torch.aten.permute %2090, %56073 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53526 = torch.constant.int 1
    %int0_53527 = torch.constant.int 0
    %56075 = torch.prim.ListConstruct %int1_53526, %int0_53527 : (!torch.int, !torch.int) -> !torch.list<int>
    %56076 = torch.aten.permute %2091, %56075 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53528 = torch.constant.int 1
    %int0_53529 = torch.constant.int 0
    %56077 = torch.prim.ListConstruct %int1_53528, %int0_53529 : (!torch.int, !torch.int) -> !torch.list<int>
    %56078 = torch.aten.permute %2092, %56077 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53530 = torch.constant.int 1
    %int0_53531 = torch.constant.int 0
    %56079 = torch.prim.ListConstruct %int1_53530, %int0_53531 : (!torch.int, !torch.int) -> !torch.list<int>
    %56080 = torch.aten.permute %2093, %56079 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53532 = torch.constant.int 1
    %int0_53533 = torch.constant.int 0
    %56081 = torch.prim.ListConstruct %int1_53532, %int0_53533 : (!torch.int, !torch.int) -> !torch.list<int>
    %56082 = torch.aten.permute %2094, %56081 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53534 = torch.constant.int 1
    %int0_53535 = torch.constant.int 0
    %56083 = torch.prim.ListConstruct %int1_53534, %int0_53535 : (!torch.int, !torch.int) -> !torch.list<int>
    %56084 = torch.aten.permute %2095, %56083 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_53536 = torch.constant.int 1
    %56085 = torch.aten.size.int %55946, %int1_53536 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53537 = torch.constant.int 4
    %56086 = torch.aten.mul.int %int4_53537, %56085 : !torch.int, !torch.int -> !torch.int
    %int1792_53538 = torch.constant.int 1792
    %56087 = torch.prim.ListConstruct %56086, %int1792_53538 : (!torch.int, !torch.int) -> !torch.list<int>
    %56088 = torch.aten.view %56061, %56087 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56088, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56089 = torch.aten.mm %56088, %56070 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56089, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53539 = torch.constant.int 4
    %int4096_53540 = torch.constant.int 4096
    %56090 = torch.prim.ListConstruct %int4_53539, %56085, %int4096_53540 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56091 = torch.aten.view %56089, %56090 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53541 = torch.constant.int 1
    %56092 = torch.aten.size.int %55952, %int1_53541 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53542 = torch.constant.int 4
    %56093 = torch.aten.mul.int %int4_53542, %56092 : !torch.int, !torch.int -> !torch.int
    %int1792_53543 = torch.constant.int 1792
    %56094 = torch.prim.ListConstruct %56093, %int1792_53543 : (!torch.int, !torch.int) -> !torch.list<int>
    %56095 = torch.aten.view %56062, %56094 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56095, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56096 = torch.aten.mm %56095, %56072 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56096, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53544 = torch.constant.int 4
    %int4096_53545 = torch.constant.int 4096
    %56097 = torch.prim.ListConstruct %int4_53544, %56092, %int4096_53545 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56098 = torch.aten.view %56096, %56097 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53546 = torch.constant.int 1
    %56099 = torch.aten.size.int %55958, %int1_53546 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53547 = torch.constant.int 4
    %56100 = torch.aten.mul.int %int4_53547, %56099 : !torch.int, !torch.int -> !torch.int
    %int1792_53548 = torch.constant.int 1792
    %56101 = torch.prim.ListConstruct %56100, %int1792_53548 : (!torch.int, !torch.int) -> !torch.list<int>
    %56102 = torch.aten.view %56063, %56101 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56102, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56103 = torch.aten.mm %56102, %56074 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56103, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53549 = torch.constant.int 4
    %int4096_53550 = torch.constant.int 4096
    %56104 = torch.prim.ListConstruct %int4_53549, %56099, %int4096_53550 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56105 = torch.aten.view %56103, %56104 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53551 = torch.constant.int 1
    %56106 = torch.aten.size.int %55964, %int1_53551 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53552 = torch.constant.int 4
    %56107 = torch.aten.mul.int %int4_53552, %56106 : !torch.int, !torch.int -> !torch.int
    %int1792_53553 = torch.constant.int 1792
    %56108 = torch.prim.ListConstruct %56107, %int1792_53553 : (!torch.int, !torch.int) -> !torch.list<int>
    %56109 = torch.aten.view %56064, %56108 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56109, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56110 = torch.aten.mm %56109, %56076 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56110, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53554 = torch.constant.int 4
    %int4096_53555 = torch.constant.int 4096
    %56111 = torch.prim.ListConstruct %int4_53554, %56106, %int4096_53555 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56112 = torch.aten.view %56110, %56111 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53556 = torch.constant.int 1
    %56113 = torch.aten.size.int %55970, %int1_53556 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53557 = torch.constant.int 4
    %56114 = torch.aten.mul.int %int4_53557, %56113 : !torch.int, !torch.int -> !torch.int
    %int1792_53558 = torch.constant.int 1792
    %56115 = torch.prim.ListConstruct %56114, %int1792_53558 : (!torch.int, !torch.int) -> !torch.list<int>
    %56116 = torch.aten.view %56065, %56115 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56116, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56117 = torch.aten.mm %56116, %56078 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56117, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53559 = torch.constant.int 4
    %int4096_53560 = torch.constant.int 4096
    %56118 = torch.prim.ListConstruct %int4_53559, %56113, %int4096_53560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56119 = torch.aten.view %56117, %56118 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53561 = torch.constant.int 1
    %56120 = torch.aten.size.int %55976, %int1_53561 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53562 = torch.constant.int 4
    %56121 = torch.aten.mul.int %int4_53562, %56120 : !torch.int, !torch.int -> !torch.int
    %int1792_53563 = torch.constant.int 1792
    %56122 = torch.prim.ListConstruct %56121, %int1792_53563 : (!torch.int, !torch.int) -> !torch.list<int>
    %56123 = torch.aten.view %56066, %56122 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56123, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56124 = torch.aten.mm %56123, %56080 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56124, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53564 = torch.constant.int 4
    %int4096_53565 = torch.constant.int 4096
    %56125 = torch.prim.ListConstruct %int4_53564, %56120, %int4096_53565 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56126 = torch.aten.view %56124, %56125 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53566 = torch.constant.int 1
    %56127 = torch.aten.size.int %55982, %int1_53566 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53567 = torch.constant.int 4
    %56128 = torch.aten.mul.int %int4_53567, %56127 : !torch.int, !torch.int -> !torch.int
    %int1792_53568 = torch.constant.int 1792
    %56129 = torch.prim.ListConstruct %56128, %int1792_53568 : (!torch.int, !torch.int) -> !torch.list<int>
    %56130 = torch.aten.view %56067, %56129 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56130, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56131 = torch.aten.mm %56130, %56082 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56131, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53569 = torch.constant.int 4
    %int4096_53570 = torch.constant.int 4096
    %56132 = torch.prim.ListConstruct %int4_53569, %56127, %int4096_53570 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56133 = torch.aten.view %56131, %56132 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53571 = torch.constant.int 1
    %56134 = torch.aten.size.int %55988, %int1_53571 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_53572 = torch.constant.int 4
    %56135 = torch.aten.mul.int %int4_53572, %56134 : !torch.int, !torch.int -> !torch.int
    %int1792_53573 = torch.constant.int 1792
    %56136 = torch.prim.ListConstruct %56135, %int1792_53573 : (!torch.int, !torch.int) -> !torch.list<int>
    %56137 = torch.aten.view %56068, %56136 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %56137, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %56138 = torch.aten.mm %56137, %56084 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56138, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_53574 = torch.constant.int 4
    %int4096_53575 = torch.constant.int 4096
    %56139 = torch.prim.ListConstruct %int4_53574, %56134, %int4096_53575 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56140 = torch.aten.view %56138, %56139 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56141 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53576 = arith.constant 1 : index
    %dim_53577 = tensor.dim %56141, %c1_53576 : tensor<4x?x4096xf16>
    %56142 = flow.tensor.transfer %56141 : tensor<4x?x4096xf16>{%dim_53577} to #hal.device.promise<@__device_0>
    %56143 = torch_c.from_builtin_tensor %56142 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56144 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53578 = arith.constant 1 : index
    %dim_53579 = tensor.dim %56144, %c1_53578 : tensor<4x?x4096xf16>
    %56145 = flow.tensor.transfer %56144 : tensor<4x?x4096xf16>{%dim_53579} to #hal.device.promise<@__device_0>
    %56146 = torch_c.from_builtin_tensor %56145 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56147 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53580 = arith.constant 1 : index
    %dim_53581 = tensor.dim %56147, %c1_53580 : tensor<4x?x4096xf16>
    %56148 = flow.tensor.transfer %56147 : tensor<4x?x4096xf16>{%dim_53581} to #hal.device.promise<@__device_0>
    %56149 = torch_c.from_builtin_tensor %56148 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56150 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53582 = arith.constant 1 : index
    %dim_53583 = tensor.dim %56150, %c1_53582 : tensor<4x?x4096xf16>
    %56151 = flow.tensor.transfer %56150 : tensor<4x?x4096xf16>{%dim_53583} to #hal.device.promise<@__device_0>
    %56152 = torch_c.from_builtin_tensor %56151 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56153 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53584 = arith.constant 1 : index
    %dim_53585 = tensor.dim %56153, %c1_53584 : tensor<4x?x4096xf16>
    %56154 = flow.tensor.transfer %56153 : tensor<4x?x4096xf16>{%dim_53585} to #hal.device.promise<@__device_0>
    %56155 = torch_c.from_builtin_tensor %56154 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56156 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53586 = arith.constant 1 : index
    %dim_53587 = tensor.dim %56156, %c1_53586 : tensor<4x?x4096xf16>
    %56157 = flow.tensor.transfer %56156 : tensor<4x?x4096xf16>{%dim_53587} to #hal.device.promise<@__device_0>
    %56158 = torch_c.from_builtin_tensor %56157 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56159 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53588 = arith.constant 1 : index
    %dim_53589 = tensor.dim %56159, %c1_53588 : tensor<4x?x4096xf16>
    %56160 = flow.tensor.transfer %56159 : tensor<4x?x4096xf16>{%dim_53589} to #hal.device.promise<@__device_0>
    %56161 = torch_c.from_builtin_tensor %56160 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53590 = torch.constant.int 1
    %56162 = torch.aten.add.Tensor %56091, %56143, %int1_53590 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53591 = torch.constant.int 1
    %56163 = torch.aten.add.Tensor %56162, %56146, %int1_53591 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53592 = torch.constant.int 1
    %56164 = torch.aten.add.Tensor %56163, %56149, %int1_53592 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53593 = torch.constant.int 1
    %56165 = torch.aten.add.Tensor %56164, %56152, %int1_53593 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53594 = torch.constant.int 1
    %56166 = torch.aten.add.Tensor %56165, %56155, %int1_53594 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53595 = torch.constant.int 1
    %56167 = torch.aten.add.Tensor %56166, %56158, %int1_53595 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53596 = torch.constant.int 1
    %56168 = torch.aten.add.Tensor %56167, %56161, %int1_53596 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56169 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53597 = arith.constant 1 : index
    %dim_53598 = tensor.dim %56169, %c1_53597 : tensor<4x?x4096xf16>
    %56170 = flow.tensor.transfer %56169 : tensor<4x?x4096xf16>{%dim_53598} to #hal.device.promise<@__device_1>
    %56171 = torch_c.from_builtin_tensor %56170 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56172 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53599 = arith.constant 1 : index
    %dim_53600 = tensor.dim %56172, %c1_53599 : tensor<4x?x4096xf16>
    %56173 = flow.tensor.transfer %56172 : tensor<4x?x4096xf16>{%dim_53600} to #hal.device.promise<@__device_1>
    %56174 = torch_c.from_builtin_tensor %56173 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56174, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56175 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53601 = arith.constant 1 : index
    %dim_53602 = tensor.dim %56175, %c1_53601 : tensor<4x?x4096xf16>
    %56176 = flow.tensor.transfer %56175 : tensor<4x?x4096xf16>{%dim_53602} to #hal.device.promise<@__device_1>
    %56177 = torch_c.from_builtin_tensor %56176 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56178 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53603 = arith.constant 1 : index
    %dim_53604 = tensor.dim %56178, %c1_53603 : tensor<4x?x4096xf16>
    %56179 = flow.tensor.transfer %56178 : tensor<4x?x4096xf16>{%dim_53604} to #hal.device.promise<@__device_1>
    %56180 = torch_c.from_builtin_tensor %56179 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56180, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56181 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53605 = arith.constant 1 : index
    %dim_53606 = tensor.dim %56181, %c1_53605 : tensor<4x?x4096xf16>
    %56182 = flow.tensor.transfer %56181 : tensor<4x?x4096xf16>{%dim_53606} to #hal.device.promise<@__device_1>
    %56183 = torch_c.from_builtin_tensor %56182 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56184 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53607 = arith.constant 1 : index
    %dim_53608 = tensor.dim %56184, %c1_53607 : tensor<4x?x4096xf16>
    %56185 = flow.tensor.transfer %56184 : tensor<4x?x4096xf16>{%dim_53608} to #hal.device.promise<@__device_1>
    %56186 = torch_c.from_builtin_tensor %56185 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56187 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53609 = arith.constant 1 : index
    %dim_53610 = tensor.dim %56187, %c1_53609 : tensor<4x?x4096xf16>
    %56188 = flow.tensor.transfer %56187 : tensor<4x?x4096xf16>{%dim_53610} to #hal.device.promise<@__device_1>
    %56189 = torch_c.from_builtin_tensor %56188 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53611 = torch.constant.int 1
    %56190 = torch.aten.add.Tensor %56171, %56098, %int1_53611 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53612 = torch.constant.int 1
    %56191 = torch.aten.add.Tensor %56190, %56174, %int1_53612 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53613 = torch.constant.int 1
    %56192 = torch.aten.add.Tensor %56191, %56177, %int1_53613 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53614 = torch.constant.int 1
    %56193 = torch.aten.add.Tensor %56192, %56180, %int1_53614 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53615 = torch.constant.int 1
    %56194 = torch.aten.add.Tensor %56193, %56183, %int1_53615 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53616 = torch.constant.int 1
    %56195 = torch.aten.add.Tensor %56194, %56186, %int1_53616 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53617 = torch.constant.int 1
    %56196 = torch.aten.add.Tensor %56195, %56189, %int1_53617 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56197 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53618 = arith.constant 1 : index
    %dim_53619 = tensor.dim %56197, %c1_53618 : tensor<4x?x4096xf16>
    %56198 = flow.tensor.transfer %56197 : tensor<4x?x4096xf16>{%dim_53619} to #hal.device.promise<@__device_2>
    %56199 = torch_c.from_builtin_tensor %56198 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56199, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56200 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53620 = arith.constant 1 : index
    %dim_53621 = tensor.dim %56200, %c1_53620 : tensor<4x?x4096xf16>
    %56201 = flow.tensor.transfer %56200 : tensor<4x?x4096xf16>{%dim_53621} to #hal.device.promise<@__device_2>
    %56202 = torch_c.from_builtin_tensor %56201 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56203 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53622 = arith.constant 1 : index
    %dim_53623 = tensor.dim %56203, %c1_53622 : tensor<4x?x4096xf16>
    %56204 = flow.tensor.transfer %56203 : tensor<4x?x4096xf16>{%dim_53623} to #hal.device.promise<@__device_2>
    %56205 = torch_c.from_builtin_tensor %56204 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56205, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56206 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53624 = arith.constant 1 : index
    %dim_53625 = tensor.dim %56206, %c1_53624 : tensor<4x?x4096xf16>
    %56207 = flow.tensor.transfer %56206 : tensor<4x?x4096xf16>{%dim_53625} to #hal.device.promise<@__device_2>
    %56208 = torch_c.from_builtin_tensor %56207 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56209 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53626 = arith.constant 1 : index
    %dim_53627 = tensor.dim %56209, %c1_53626 : tensor<4x?x4096xf16>
    %56210 = flow.tensor.transfer %56209 : tensor<4x?x4096xf16>{%dim_53627} to #hal.device.promise<@__device_2>
    %56211 = torch_c.from_builtin_tensor %56210 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56211, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56212 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53628 = arith.constant 1 : index
    %dim_53629 = tensor.dim %56212, %c1_53628 : tensor<4x?x4096xf16>
    %56213 = flow.tensor.transfer %56212 : tensor<4x?x4096xf16>{%dim_53629} to #hal.device.promise<@__device_2>
    %56214 = torch_c.from_builtin_tensor %56213 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56215 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53630 = arith.constant 1 : index
    %dim_53631 = tensor.dim %56215, %c1_53630 : tensor<4x?x4096xf16>
    %56216 = flow.tensor.transfer %56215 : tensor<4x?x4096xf16>{%dim_53631} to #hal.device.promise<@__device_2>
    %56217 = torch_c.from_builtin_tensor %56216 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56217, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53632 = torch.constant.int 1
    %56218 = torch.aten.add.Tensor %56199, %56202, %int1_53632 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53633 = torch.constant.int 1
    %56219 = torch.aten.add.Tensor %56218, %56105, %int1_53633 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53634 = torch.constant.int 1
    %56220 = torch.aten.add.Tensor %56219, %56205, %int1_53634 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53635 = torch.constant.int 1
    %56221 = torch.aten.add.Tensor %56220, %56208, %int1_53635 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53636 = torch.constant.int 1
    %56222 = torch.aten.add.Tensor %56221, %56211, %int1_53636 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53637 = torch.constant.int 1
    %56223 = torch.aten.add.Tensor %56222, %56214, %int1_53637 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53638 = torch.constant.int 1
    %56224 = torch.aten.add.Tensor %56223, %56217, %int1_53638 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56225 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53639 = arith.constant 1 : index
    %dim_53640 = tensor.dim %56225, %c1_53639 : tensor<4x?x4096xf16>
    %56226 = flow.tensor.transfer %56225 : tensor<4x?x4096xf16>{%dim_53640} to #hal.device.promise<@__device_3>
    %56227 = torch_c.from_builtin_tensor %56226 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56228 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53641 = arith.constant 1 : index
    %dim_53642 = tensor.dim %56228, %c1_53641 : tensor<4x?x4096xf16>
    %56229 = flow.tensor.transfer %56228 : tensor<4x?x4096xf16>{%dim_53642} to #hal.device.promise<@__device_3>
    %56230 = torch_c.from_builtin_tensor %56229 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56231 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53643 = arith.constant 1 : index
    %dim_53644 = tensor.dim %56231, %c1_53643 : tensor<4x?x4096xf16>
    %56232 = flow.tensor.transfer %56231 : tensor<4x?x4096xf16>{%dim_53644} to #hal.device.promise<@__device_3>
    %56233 = torch_c.from_builtin_tensor %56232 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56234 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53645 = arith.constant 1 : index
    %dim_53646 = tensor.dim %56234, %c1_53645 : tensor<4x?x4096xf16>
    %56235 = flow.tensor.transfer %56234 : tensor<4x?x4096xf16>{%dim_53646} to #hal.device.promise<@__device_3>
    %56236 = torch_c.from_builtin_tensor %56235 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56237 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53647 = arith.constant 1 : index
    %dim_53648 = tensor.dim %56237, %c1_53647 : tensor<4x?x4096xf16>
    %56238 = flow.tensor.transfer %56237 : tensor<4x?x4096xf16>{%dim_53648} to #hal.device.promise<@__device_3>
    %56239 = torch_c.from_builtin_tensor %56238 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56240 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53649 = arith.constant 1 : index
    %dim_53650 = tensor.dim %56240, %c1_53649 : tensor<4x?x4096xf16>
    %56241 = flow.tensor.transfer %56240 : tensor<4x?x4096xf16>{%dim_53650} to #hal.device.promise<@__device_3>
    %56242 = torch_c.from_builtin_tensor %56241 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56243 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53651 = arith.constant 1 : index
    %dim_53652 = tensor.dim %56243, %c1_53651 : tensor<4x?x4096xf16>
    %56244 = flow.tensor.transfer %56243 : tensor<4x?x4096xf16>{%dim_53652} to #hal.device.promise<@__device_3>
    %56245 = torch_c.from_builtin_tensor %56244 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53653 = torch.constant.int 1
    %56246 = torch.aten.add.Tensor %56227, %56230, %int1_53653 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53654 = torch.constant.int 1
    %56247 = torch.aten.add.Tensor %56246, %56233, %int1_53654 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53655 = torch.constant.int 1
    %56248 = torch.aten.add.Tensor %56247, %56112, %int1_53655 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53656 = torch.constant.int 1
    %56249 = torch.aten.add.Tensor %56248, %56236, %int1_53656 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53657 = torch.constant.int 1
    %56250 = torch.aten.add.Tensor %56249, %56239, %int1_53657 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56250, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53658 = torch.constant.int 1
    %56251 = torch.aten.add.Tensor %56250, %56242, %int1_53658 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53659 = torch.constant.int 1
    %56252 = torch.aten.add.Tensor %56251, %56245, %int1_53659 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56253 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53660 = arith.constant 1 : index
    %dim_53661 = tensor.dim %56253, %c1_53660 : tensor<4x?x4096xf16>
    %56254 = flow.tensor.transfer %56253 : tensor<4x?x4096xf16>{%dim_53661} to #hal.device.promise<@__device_4>
    %56255 = torch_c.from_builtin_tensor %56254 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56256 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53662 = arith.constant 1 : index
    %dim_53663 = tensor.dim %56256, %c1_53662 : tensor<4x?x4096xf16>
    %56257 = flow.tensor.transfer %56256 : tensor<4x?x4096xf16>{%dim_53663} to #hal.device.promise<@__device_4>
    %56258 = torch_c.from_builtin_tensor %56257 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56259 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53664 = arith.constant 1 : index
    %dim_53665 = tensor.dim %56259, %c1_53664 : tensor<4x?x4096xf16>
    %56260 = flow.tensor.transfer %56259 : tensor<4x?x4096xf16>{%dim_53665} to #hal.device.promise<@__device_4>
    %56261 = torch_c.from_builtin_tensor %56260 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56262 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53666 = arith.constant 1 : index
    %dim_53667 = tensor.dim %56262, %c1_53666 : tensor<4x?x4096xf16>
    %56263 = flow.tensor.transfer %56262 : tensor<4x?x4096xf16>{%dim_53667} to #hal.device.promise<@__device_4>
    %56264 = torch_c.from_builtin_tensor %56263 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56265 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53668 = arith.constant 1 : index
    %dim_53669 = tensor.dim %56265, %c1_53668 : tensor<4x?x4096xf16>
    %56266 = flow.tensor.transfer %56265 : tensor<4x?x4096xf16>{%dim_53669} to #hal.device.promise<@__device_4>
    %56267 = torch_c.from_builtin_tensor %56266 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56268 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53670 = arith.constant 1 : index
    %dim_53671 = tensor.dim %56268, %c1_53670 : tensor<4x?x4096xf16>
    %56269 = flow.tensor.transfer %56268 : tensor<4x?x4096xf16>{%dim_53671} to #hal.device.promise<@__device_4>
    %56270 = torch_c.from_builtin_tensor %56269 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56271 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53672 = arith.constant 1 : index
    %dim_53673 = tensor.dim %56271, %c1_53672 : tensor<4x?x4096xf16>
    %56272 = flow.tensor.transfer %56271 : tensor<4x?x4096xf16>{%dim_53673} to #hal.device.promise<@__device_4>
    %56273 = torch_c.from_builtin_tensor %56272 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53674 = torch.constant.int 1
    %56274 = torch.aten.add.Tensor %56255, %56258, %int1_53674 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53675 = torch.constant.int 1
    %56275 = torch.aten.add.Tensor %56274, %56261, %int1_53675 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53676 = torch.constant.int 1
    %56276 = torch.aten.add.Tensor %56275, %56264, %int1_53676 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53677 = torch.constant.int 1
    %56277 = torch.aten.add.Tensor %56276, %56119, %int1_53677 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53678 = torch.constant.int 1
    %56278 = torch.aten.add.Tensor %56277, %56267, %int1_53678 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53679 = torch.constant.int 1
    %56279 = torch.aten.add.Tensor %56278, %56270, %int1_53679 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53680 = torch.constant.int 1
    %56280 = torch.aten.add.Tensor %56279, %56273, %int1_53680 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56281 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53681 = arith.constant 1 : index
    %dim_53682 = tensor.dim %56281, %c1_53681 : tensor<4x?x4096xf16>
    %56282 = flow.tensor.transfer %56281 : tensor<4x?x4096xf16>{%dim_53682} to #hal.device.promise<@__device_5>
    %56283 = torch_c.from_builtin_tensor %56282 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56284 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53683 = arith.constant 1 : index
    %dim_53684 = tensor.dim %56284, %c1_53683 : tensor<4x?x4096xf16>
    %56285 = flow.tensor.transfer %56284 : tensor<4x?x4096xf16>{%dim_53684} to #hal.device.promise<@__device_5>
    %56286 = torch_c.from_builtin_tensor %56285 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56287 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53685 = arith.constant 1 : index
    %dim_53686 = tensor.dim %56287, %c1_53685 : tensor<4x?x4096xf16>
    %56288 = flow.tensor.transfer %56287 : tensor<4x?x4096xf16>{%dim_53686} to #hal.device.promise<@__device_5>
    %56289 = torch_c.from_builtin_tensor %56288 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56290 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53687 = arith.constant 1 : index
    %dim_53688 = tensor.dim %56290, %c1_53687 : tensor<4x?x4096xf16>
    %56291 = flow.tensor.transfer %56290 : tensor<4x?x4096xf16>{%dim_53688} to #hal.device.promise<@__device_5>
    %56292 = torch_c.from_builtin_tensor %56291 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56293 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53689 = arith.constant 1 : index
    %dim_53690 = tensor.dim %56293, %c1_53689 : tensor<4x?x4096xf16>
    %56294 = flow.tensor.transfer %56293 : tensor<4x?x4096xf16>{%dim_53690} to #hal.device.promise<@__device_5>
    %56295 = torch_c.from_builtin_tensor %56294 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56296 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53691 = arith.constant 1 : index
    %dim_53692 = tensor.dim %56296, %c1_53691 : tensor<4x?x4096xf16>
    %56297 = flow.tensor.transfer %56296 : tensor<4x?x4096xf16>{%dim_53692} to #hal.device.promise<@__device_5>
    %56298 = torch_c.from_builtin_tensor %56297 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56299 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53693 = arith.constant 1 : index
    %dim_53694 = tensor.dim %56299, %c1_53693 : tensor<4x?x4096xf16>
    %56300 = flow.tensor.transfer %56299 : tensor<4x?x4096xf16>{%dim_53694} to #hal.device.promise<@__device_5>
    %56301 = torch_c.from_builtin_tensor %56300 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53695 = torch.constant.int 1
    %56302 = torch.aten.add.Tensor %56283, %56286, %int1_53695 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53696 = torch.constant.int 1
    %56303 = torch.aten.add.Tensor %56302, %56289, %int1_53696 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53697 = torch.constant.int 1
    %56304 = torch.aten.add.Tensor %56303, %56292, %int1_53697 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53698 = torch.constant.int 1
    %56305 = torch.aten.add.Tensor %56304, %56295, %int1_53698 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53699 = torch.constant.int 1
    %56306 = torch.aten.add.Tensor %56305, %56126, %int1_53699 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53700 = torch.constant.int 1
    %56307 = torch.aten.add.Tensor %56306, %56298, %int1_53700 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56307, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53701 = torch.constant.int 1
    %56308 = torch.aten.add.Tensor %56307, %56301, %int1_53701 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56309 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53702 = arith.constant 1 : index
    %dim_53703 = tensor.dim %56309, %c1_53702 : tensor<4x?x4096xf16>
    %56310 = flow.tensor.transfer %56309 : tensor<4x?x4096xf16>{%dim_53703} to #hal.device.promise<@__device_6>
    %56311 = torch_c.from_builtin_tensor %56310 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56312 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53704 = arith.constant 1 : index
    %dim_53705 = tensor.dim %56312, %c1_53704 : tensor<4x?x4096xf16>
    %56313 = flow.tensor.transfer %56312 : tensor<4x?x4096xf16>{%dim_53705} to #hal.device.promise<@__device_6>
    %56314 = torch_c.from_builtin_tensor %56313 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56315 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53706 = arith.constant 1 : index
    %dim_53707 = tensor.dim %56315, %c1_53706 : tensor<4x?x4096xf16>
    %56316 = flow.tensor.transfer %56315 : tensor<4x?x4096xf16>{%dim_53707} to #hal.device.promise<@__device_6>
    %56317 = torch_c.from_builtin_tensor %56316 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56317, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56318 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53708 = arith.constant 1 : index
    %dim_53709 = tensor.dim %56318, %c1_53708 : tensor<4x?x4096xf16>
    %56319 = flow.tensor.transfer %56318 : tensor<4x?x4096xf16>{%dim_53709} to #hal.device.promise<@__device_6>
    %56320 = torch_c.from_builtin_tensor %56319 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56321 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53710 = arith.constant 1 : index
    %dim_53711 = tensor.dim %56321, %c1_53710 : tensor<4x?x4096xf16>
    %56322 = flow.tensor.transfer %56321 : tensor<4x?x4096xf16>{%dim_53711} to #hal.device.promise<@__device_6>
    %56323 = torch_c.from_builtin_tensor %56322 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56324 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53712 = arith.constant 1 : index
    %dim_53713 = tensor.dim %56324, %c1_53712 : tensor<4x?x4096xf16>
    %56325 = flow.tensor.transfer %56324 : tensor<4x?x4096xf16>{%dim_53713} to #hal.device.promise<@__device_6>
    %56326 = torch_c.from_builtin_tensor %56325 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56327 = torch_c.to_builtin_tensor %56140 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53714 = arith.constant 1 : index
    %dim_53715 = tensor.dim %56327, %c1_53714 : tensor<4x?x4096xf16>
    %56328 = flow.tensor.transfer %56327 : tensor<4x?x4096xf16>{%dim_53715} to #hal.device.promise<@__device_6>
    %56329 = torch_c.from_builtin_tensor %56328 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56329, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53716 = torch.constant.int 1
    %56330 = torch.aten.add.Tensor %56311, %56314, %int1_53716 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53717 = torch.constant.int 1
    %56331 = torch.aten.add.Tensor %56330, %56317, %int1_53717 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53718 = torch.constant.int 1
    %56332 = torch.aten.add.Tensor %56331, %56320, %int1_53718 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56332, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53719 = torch.constant.int 1
    %56333 = torch.aten.add.Tensor %56332, %56323, %int1_53719 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53720 = torch.constant.int 1
    %56334 = torch.aten.add.Tensor %56333, %56326, %int1_53720 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53721 = torch.constant.int 1
    %56335 = torch.aten.add.Tensor %56334, %56133, %int1_53721 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53722 = torch.constant.int 1
    %56336 = torch.aten.add.Tensor %56335, %56329, %int1_53722 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56337 = torch_c.to_builtin_tensor %56091 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53723 = arith.constant 1 : index
    %dim_53724 = tensor.dim %56337, %c1_53723 : tensor<4x?x4096xf16>
    %56338 = flow.tensor.transfer %56337 : tensor<4x?x4096xf16>{%dim_53724} to #hal.device.promise<@__device_7>
    %56339 = torch_c.from_builtin_tensor %56338 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56340 = torch_c.to_builtin_tensor %56098 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53725 = arith.constant 1 : index
    %dim_53726 = tensor.dim %56340, %c1_53725 : tensor<4x?x4096xf16>
    %56341 = flow.tensor.transfer %56340 : tensor<4x?x4096xf16>{%dim_53726} to #hal.device.promise<@__device_7>
    %56342 = torch_c.from_builtin_tensor %56341 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56343 = torch_c.to_builtin_tensor %56105 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53727 = arith.constant 1 : index
    %dim_53728 = tensor.dim %56343, %c1_53727 : tensor<4x?x4096xf16>
    %56344 = flow.tensor.transfer %56343 : tensor<4x?x4096xf16>{%dim_53728} to #hal.device.promise<@__device_7>
    %56345 = torch_c.from_builtin_tensor %56344 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56346 = torch_c.to_builtin_tensor %56112 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53729 = arith.constant 1 : index
    %dim_53730 = tensor.dim %56346, %c1_53729 : tensor<4x?x4096xf16>
    %56347 = flow.tensor.transfer %56346 : tensor<4x?x4096xf16>{%dim_53730} to #hal.device.promise<@__device_7>
    %56348 = torch_c.from_builtin_tensor %56347 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56349 = torch_c.to_builtin_tensor %56119 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53731 = arith.constant 1 : index
    %dim_53732 = tensor.dim %56349, %c1_53731 : tensor<4x?x4096xf16>
    %56350 = flow.tensor.transfer %56349 : tensor<4x?x4096xf16>{%dim_53732} to #hal.device.promise<@__device_7>
    %56351 = torch_c.from_builtin_tensor %56350 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56352 = torch_c.to_builtin_tensor %56126 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53733 = arith.constant 1 : index
    %dim_53734 = tensor.dim %56352, %c1_53733 : tensor<4x?x4096xf16>
    %56353 = flow.tensor.transfer %56352 : tensor<4x?x4096xf16>{%dim_53734} to #hal.device.promise<@__device_7>
    %56354 = torch_c.from_builtin_tensor %56353 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %56355 = torch_c.to_builtin_tensor %56133 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_53735 = arith.constant 1 : index
    %dim_53736 = tensor.dim %56355, %c1_53735 : tensor<4x?x4096xf16>
    %56356 = flow.tensor.transfer %56355 : tensor<4x?x4096xf16>{%dim_53736} to #hal.device.promise<@__device_7>
    %56357 = torch_c.from_builtin_tensor %56356 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53737 = torch.constant.int 1
    %56358 = torch.aten.add.Tensor %56339, %56342, %int1_53737 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53738 = torch.constant.int 1
    %56359 = torch.aten.add.Tensor %56358, %56345, %int1_53738 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53739 = torch.constant.int 1
    %56360 = torch.aten.add.Tensor %56359, %56348, %int1_53739 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53740 = torch.constant.int 1
    %56361 = torch.aten.add.Tensor %56360, %56351, %int1_53740 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53741 = torch.constant.int 1
    %56362 = torch.aten.add.Tensor %56361, %56354, %int1_53741 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53742 = torch.constant.int 1
    %56363 = torch.aten.add.Tensor %56362, %56357, %int1_53742 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53743 = torch.constant.int 1
    %56364 = torch.aten.add.Tensor %56363, %56140, %int1_53743 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53744 = torch.constant.int 1
    %56365 = torch.aten.add.Tensor %55845, %56168, %int1_53744 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53745 = torch.constant.int 1
    %56366 = torch.aten.add.Tensor %55846, %56196, %int1_53745 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53746 = torch.constant.int 1
    %56367 = torch.aten.add.Tensor %55847, %56224, %int1_53746 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53747 = torch.constant.int 1
    %56368 = torch.aten.add.Tensor %55848, %56252, %int1_53747 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53748 = torch.constant.int 1
    %56369 = torch.aten.add.Tensor %55849, %56280, %int1_53748 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53749 = torch.constant.int 1
    %56370 = torch.aten.add.Tensor %55850, %56308, %int1_53749 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53750 = torch.constant.int 1
    %56371 = torch.aten.add.Tensor %55851, %56336, %int1_53750 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53751 = torch.constant.int 1
    %56372 = torch.aten.add.Tensor %55852, %56364, %int1_53751 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_53752 = torch.constant.int 6
    %56373 = torch.prims.convert_element_type %56365, %int6_53752 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53753 = torch.constant.int 6
    %56374 = torch.prims.convert_element_type %56366, %int6_53753 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53754 = torch.constant.int 6
    %56375 = torch.prims.convert_element_type %56367, %int6_53754 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56375, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53755 = torch.constant.int 6
    %56376 = torch.prims.convert_element_type %56368, %int6_53755 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53756 = torch.constant.int 6
    %56377 = torch.prims.convert_element_type %56369, %int6_53756 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53757 = torch.constant.int 6
    %56378 = torch.prims.convert_element_type %56370, %int6_53757 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53758 = torch.constant.int 6
    %56379 = torch.prims.convert_element_type %56371, %int6_53758 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_53759 = torch.constant.int 6
    %56380 = torch.prims.convert_element_type %56372, %int6_53759 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53760 = torch.constant.int 2
    %56381 = torch.aten.pow.Tensor_Scalar %56373, %int2_53760 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56381, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53761 = torch.constant.int 2
    %56382 = torch.aten.pow.Tensor_Scalar %56374, %int2_53761 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53762 = torch.constant.int 2
    %56383 = torch.aten.pow.Tensor_Scalar %56375, %int2_53762 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53763 = torch.constant.int 2
    %56384 = torch.aten.pow.Tensor_Scalar %56376, %int2_53763 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53764 = torch.constant.int 2
    %56385 = torch.aten.pow.Tensor_Scalar %56377, %int2_53764 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53765 = torch.constant.int 2
    %56386 = torch.aten.pow.Tensor_Scalar %56378, %int2_53765 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53766 = torch.constant.int 2
    %56387 = torch.aten.pow.Tensor_Scalar %56379, %int2_53766 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56387, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_53767 = torch.constant.int 2
    %56388 = torch.aten.pow.Tensor_Scalar %56380, %int2_53767 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_53768 = torch.constant.int -1
    %56389 = torch.prim.ListConstruct %int-1_53768 : (!torch.int) -> !torch.list<int>
    %true_53769 = torch.constant.bool true
    %none_53770 = torch.constant.none
    %56390 = torch.aten.mean.dim %56381, %56389, %true_53769, %none_53770 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53771 = torch.constant.int -1
    %56391 = torch.prim.ListConstruct %int-1_53771 : (!torch.int) -> !torch.list<int>
    %true_53772 = torch.constant.bool true
    %none_53773 = torch.constant.none
    %56392 = torch.aten.mean.dim %56382, %56391, %true_53772, %none_53773 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53774 = torch.constant.int -1
    %56393 = torch.prim.ListConstruct %int-1_53774 : (!torch.int) -> !torch.list<int>
    %true_53775 = torch.constant.bool true
    %none_53776 = torch.constant.none
    %56394 = torch.aten.mean.dim %56383, %56393, %true_53775, %none_53776 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53777 = torch.constant.int -1
    %56395 = torch.prim.ListConstruct %int-1_53777 : (!torch.int) -> !torch.list<int>
    %true_53778 = torch.constant.bool true
    %none_53779 = torch.constant.none
    %56396 = torch.aten.mean.dim %56384, %56395, %true_53778, %none_53779 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53780 = torch.constant.int -1
    %56397 = torch.prim.ListConstruct %int-1_53780 : (!torch.int) -> !torch.list<int>
    %true_53781 = torch.constant.bool true
    %none_53782 = torch.constant.none
    %56398 = torch.aten.mean.dim %56385, %56397, %true_53781, %none_53782 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53783 = torch.constant.int -1
    %56399 = torch.prim.ListConstruct %int-1_53783 : (!torch.int) -> !torch.list<int>
    %true_53784 = torch.constant.bool true
    %none_53785 = torch.constant.none
    %56400 = torch.aten.mean.dim %56386, %56399, %true_53784, %none_53785 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53786 = torch.constant.int -1
    %56401 = torch.prim.ListConstruct %int-1_53786 : (!torch.int) -> !torch.list<int>
    %true_53787 = torch.constant.bool true
    %none_53788 = torch.constant.none
    %56402 = torch.aten.mean.dim %56387, %56401, %true_53787, %none_53788 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_53789 = torch.constant.int -1
    %56403 = torch.prim.ListConstruct %int-1_53789 : (!torch.int) -> !torch.list<int>
    %true_53790 = torch.constant.bool true
    %none_53791 = torch.constant.none
    %56404 = torch.aten.mean.dim %56388, %56403, %true_53790, %none_53791 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53792 = torch.constant.float 9.9999997473787516E-6
    %int1_53793 = torch.constant.int 1
    %56405 = torch.aten.add.Scalar %56390, %float9.999990e-06_53792, %int1_53793 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53794 = torch.constant.float 9.9999997473787516E-6
    %int1_53795 = torch.constant.int 1
    %56406 = torch.aten.add.Scalar %56392, %float9.999990e-06_53794, %int1_53795 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53796 = torch.constant.float 9.9999997473787516E-6
    %int1_53797 = torch.constant.int 1
    %56407 = torch.aten.add.Scalar %56394, %float9.999990e-06_53796, %int1_53797 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53798 = torch.constant.float 9.9999997473787516E-6
    %int1_53799 = torch.constant.int 1
    %56408 = torch.aten.add.Scalar %56396, %float9.999990e-06_53798, %int1_53799 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53800 = torch.constant.float 9.9999997473787516E-6
    %int1_53801 = torch.constant.int 1
    %56409 = torch.aten.add.Scalar %56398, %float9.999990e-06_53800, %int1_53801 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53802 = torch.constant.float 9.9999997473787516E-6
    %int1_53803 = torch.constant.int 1
    %56410 = torch.aten.add.Scalar %56400, %float9.999990e-06_53802, %int1_53803 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53804 = torch.constant.float 9.9999997473787516E-6
    %int1_53805 = torch.constant.int 1
    %56411 = torch.aten.add.Scalar %56402, %float9.999990e-06_53804, %int1_53805 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_53806 = torch.constant.float 9.9999997473787516E-6
    %int1_53807 = torch.constant.int 1
    %56412 = torch.aten.add.Scalar %56404, %float9.999990e-06_53806, %int1_53807 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56412, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56413 = torch.aten.rsqrt %56405 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56414 = torch.aten.rsqrt %56406 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56415 = torch.aten.rsqrt %56407 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56416 = torch.aten.rsqrt %56408 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56417 = torch.aten.rsqrt %56409 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56418 = torch.aten.rsqrt %56410 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56418, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56419 = torch.aten.rsqrt %56411 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56420 = torch.aten.rsqrt %56412 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %56420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %56421 = torch.aten.mul.Tensor %56373, %56413 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56422 = torch.aten.mul.Tensor %56374, %56414 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56423 = torch.aten.mul.Tensor %56375, %56415 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56424 = torch.aten.mul.Tensor %56376, %56416 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56425 = torch.aten.mul.Tensor %56377, %56417 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56426 = torch.aten.mul.Tensor %56378, %56418 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56427 = torch.aten.mul.Tensor %56379, %56419 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56428 = torch.aten.mul.Tensor %56380, %56420 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56429 = torch.aten.mul.Tensor %2096, %56421 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56430 = torch.aten.mul.Tensor %2097, %56422 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56431 = torch.aten.mul.Tensor %2098, %56423 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56432 = torch.aten.mul.Tensor %2099, %56424 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56433 = torch.aten.mul.Tensor %2100, %56425 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56434 = torch.aten.mul.Tensor %2101, %56426 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56435 = torch.aten.mul.Tensor %2102, %56427 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %56436 = torch.aten.mul.Tensor %2103, %56428 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %56436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_53808 = torch.constant.int 5
    %56437 = torch.prims.convert_element_type %56429, %int5_53808 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53809 = torch.constant.int 5
    %56438 = torch.prims.convert_element_type %56430, %int5_53809 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53810 = torch.constant.int 5
    %56439 = torch.prims.convert_element_type %56431, %int5_53810 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53811 = torch.constant.int 5
    %56440 = torch.prims.convert_element_type %56432, %int5_53811 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53812 = torch.constant.int 5
    %56441 = torch.prims.convert_element_type %56433, %int5_53812 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53813 = torch.constant.int 5
    %56442 = torch.prims.convert_element_type %56434, %int5_53813 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53814 = torch.constant.int 5
    %56443 = torch.prims.convert_element_type %56435, %int5_53814 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_53815 = torch.constant.int 5
    %56444 = torch.prims.convert_element_type %56436, %int5_53815 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %56444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_53816 = torch.constant.int 1
    %int0_53817 = torch.constant.int 0
    %56445 = torch.prim.ListConstruct %int1_53816, %int0_53817 : (!torch.int, !torch.int) -> !torch.list<int>
    %56446 = torch.aten.permute %2104, %56445 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53818 = torch.constant.int 1
    %int0_53819 = torch.constant.int 0
    %56447 = torch.prim.ListConstruct %int1_53818, %int0_53819 : (!torch.int, !torch.int) -> !torch.list<int>
    %56448 = torch.aten.permute %2105, %56447 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53820 = torch.constant.int 1
    %int0_53821 = torch.constant.int 0
    %56449 = torch.prim.ListConstruct %int1_53820, %int0_53821 : (!torch.int, !torch.int) -> !torch.list<int>
    %56450 = torch.aten.permute %2106, %56449 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53822 = torch.constant.int 1
    %int0_53823 = torch.constant.int 0
    %56451 = torch.prim.ListConstruct %int1_53822, %int0_53823 : (!torch.int, !torch.int) -> !torch.list<int>
    %56452 = torch.aten.permute %2107, %56451 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53824 = torch.constant.int 1
    %int0_53825 = torch.constant.int 0
    %56453 = torch.prim.ListConstruct %int1_53824, %int0_53825 : (!torch.int, !torch.int) -> !torch.list<int>
    %56454 = torch.aten.permute %2108, %56453 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53826 = torch.constant.int 1
    %int0_53827 = torch.constant.int 0
    %56455 = torch.prim.ListConstruct %int1_53826, %int0_53827 : (!torch.int, !torch.int) -> !torch.list<int>
    %56456 = torch.aten.permute %2109, %56455 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53828 = torch.constant.int 1
    %int0_53829 = torch.constant.int 0
    %56457 = torch.prim.ListConstruct %int1_53828, %int0_53829 : (!torch.int, !torch.int) -> !torch.list<int>
    %56458 = torch.aten.permute %2110, %56457 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_53830 = torch.constant.int 1
    %int0_53831 = torch.constant.int 0
    %56459 = torch.prim.ListConstruct %int1_53830, %int0_53831 : (!torch.int, !torch.int) -> !torch.list<int>
    %56460 = torch.aten.permute %2111, %56459 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_53832 = torch.constant.int 4
    %56461 = torch.aten.mul.int %int4_53832, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53833 = torch.constant.int 4096
    %56462 = torch.prim.ListConstruct %56461, %int4096_53833 : (!torch.int, !torch.int) -> !torch.list<int>
    %56463 = torch.aten.view %56437, %56462 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56463, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56464 = torch.aten.mm %56463, %56446 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56464, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53834 = torch.constant.int 4
    %int512_53835 = torch.constant.int 512
    %56465 = torch.prim.ListConstruct %int4_53834, %2482, %int512_53835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56466 = torch.aten.view %56464, %56465 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53836 = torch.constant.int 4
    %56467 = torch.aten.mul.int %int4_53836, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53837 = torch.constant.int 4096
    %56468 = torch.prim.ListConstruct %56467, %int4096_53837 : (!torch.int, !torch.int) -> !torch.list<int>
    %56469 = torch.aten.view %56438, %56468 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56469, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56470 = torch.aten.mm %56469, %56448 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56470, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53838 = torch.constant.int 4
    %int512_53839 = torch.constant.int 512
    %56471 = torch.prim.ListConstruct %int4_53838, %2482, %int512_53839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56472 = torch.aten.view %56470, %56471 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53840 = torch.constant.int 4
    %56473 = torch.aten.mul.int %int4_53840, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53841 = torch.constant.int 4096
    %56474 = torch.prim.ListConstruct %56473, %int4096_53841 : (!torch.int, !torch.int) -> !torch.list<int>
    %56475 = torch.aten.view %56439, %56474 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56475, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56476 = torch.aten.mm %56475, %56450 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56476, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53842 = torch.constant.int 4
    %int512_53843 = torch.constant.int 512
    %56477 = torch.prim.ListConstruct %int4_53842, %2482, %int512_53843 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56478 = torch.aten.view %56476, %56477 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53844 = torch.constant.int 4
    %56479 = torch.aten.mul.int %int4_53844, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53845 = torch.constant.int 4096
    %56480 = torch.prim.ListConstruct %56479, %int4096_53845 : (!torch.int, !torch.int) -> !torch.list<int>
    %56481 = torch.aten.view %56440, %56480 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56481, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56482 = torch.aten.mm %56481, %56452 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56482, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53846 = torch.constant.int 4
    %int512_53847 = torch.constant.int 512
    %56483 = torch.prim.ListConstruct %int4_53846, %2482, %int512_53847 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56484 = torch.aten.view %56482, %56483 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53848 = torch.constant.int 4
    %56485 = torch.aten.mul.int %int4_53848, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53849 = torch.constant.int 4096
    %56486 = torch.prim.ListConstruct %56485, %int4096_53849 : (!torch.int, !torch.int) -> !torch.list<int>
    %56487 = torch.aten.view %56441, %56486 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56487, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56488 = torch.aten.mm %56487, %56454 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56488, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53850 = torch.constant.int 4
    %int512_53851 = torch.constant.int 512
    %56489 = torch.prim.ListConstruct %int4_53850, %2482, %int512_53851 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56490 = torch.aten.view %56488, %56489 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53852 = torch.constant.int 4
    %56491 = torch.aten.mul.int %int4_53852, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53853 = torch.constant.int 4096
    %56492 = torch.prim.ListConstruct %56491, %int4096_53853 : (!torch.int, !torch.int) -> !torch.list<int>
    %56493 = torch.aten.view %56442, %56492 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56493, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56494 = torch.aten.mm %56493, %56456 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56494, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53854 = torch.constant.int 4
    %int512_53855 = torch.constant.int 512
    %56495 = torch.prim.ListConstruct %int4_53854, %2482, %int512_53855 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56496 = torch.aten.view %56494, %56495 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53856 = torch.constant.int 4
    %56497 = torch.aten.mul.int %int4_53856, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53857 = torch.constant.int 4096
    %56498 = torch.prim.ListConstruct %56497, %int4096_53857 : (!torch.int, !torch.int) -> !torch.list<int>
    %56499 = torch.aten.view %56443, %56498 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56499, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56500 = torch.aten.mm %56499, %56458 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56500, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53858 = torch.constant.int 4
    %int512_53859 = torch.constant.int 512
    %56501 = torch.prim.ListConstruct %int4_53858, %2482, %int512_53859 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56502 = torch.aten.view %56500, %56501 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_53860 = torch.constant.int 4
    %56503 = torch.aten.mul.int %int4_53860, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53861 = torch.constant.int 4096
    %56504 = torch.prim.ListConstruct %56503, %int4096_53861 : (!torch.int, !torch.int) -> !torch.list<int>
    %56505 = torch.aten.view %56444, %56504 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56505, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56506 = torch.aten.mm %56505, %56460 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %56506, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_53862 = torch.constant.int 4
    %int512_53863 = torch.constant.int 512
    %56507 = torch.prim.ListConstruct %int4_53862, %2482, %int512_53863 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56508 = torch.aten.view %56506, %56507 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %56508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_53864 = torch.constant.int 1
    %int0_53865 = torch.constant.int 0
    %56509 = torch.prim.ListConstruct %int1_53864, %int0_53865 : (!torch.int, !torch.int) -> !torch.list<int>
    %56510 = torch.aten.permute %2112, %56509 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53866 = torch.constant.int 1
    %int0_53867 = torch.constant.int 0
    %56511 = torch.prim.ListConstruct %int1_53866, %int0_53867 : (!torch.int, !torch.int) -> !torch.list<int>
    %56512 = torch.aten.permute %2113, %56511 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53868 = torch.constant.int 1
    %int0_53869 = torch.constant.int 0
    %56513 = torch.prim.ListConstruct %int1_53868, %int0_53869 : (!torch.int, !torch.int) -> !torch.list<int>
    %56514 = torch.aten.permute %2114, %56513 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53870 = torch.constant.int 1
    %int0_53871 = torch.constant.int 0
    %56515 = torch.prim.ListConstruct %int1_53870, %int0_53871 : (!torch.int, !torch.int) -> !torch.list<int>
    %56516 = torch.aten.permute %2115, %56515 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53872 = torch.constant.int 1
    %int0_53873 = torch.constant.int 0
    %56517 = torch.prim.ListConstruct %int1_53872, %int0_53873 : (!torch.int, !torch.int) -> !torch.list<int>
    %56518 = torch.aten.permute %2116, %56517 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53874 = torch.constant.int 1
    %int0_53875 = torch.constant.int 0
    %56519 = torch.prim.ListConstruct %int1_53874, %int0_53875 : (!torch.int, !torch.int) -> !torch.list<int>
    %56520 = torch.aten.permute %2117, %56519 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53876 = torch.constant.int 1
    %int0_53877 = torch.constant.int 0
    %56521 = torch.prim.ListConstruct %int1_53876, %int0_53877 : (!torch.int, !torch.int) -> !torch.list<int>
    %56522 = torch.aten.permute %2118, %56521 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53878 = torch.constant.int 1
    %int0_53879 = torch.constant.int 0
    %56523 = torch.prim.ListConstruct %int1_53878, %int0_53879 : (!torch.int, !torch.int) -> !torch.list<int>
    %56524 = torch.aten.permute %2119, %56523 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_53880 = torch.constant.int 4
    %56525 = torch.aten.mul.int %int4_53880, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53881 = torch.constant.int 4096
    %56526 = torch.prim.ListConstruct %56525, %int4096_53881 : (!torch.int, !torch.int) -> !torch.list<int>
    %56527 = torch.aten.view %56437, %56526 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56527, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56528 = torch.aten.mm %56527, %56510 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56528, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53882 = torch.constant.int 4
    %int128_53883 = torch.constant.int 128
    %56529 = torch.prim.ListConstruct %int4_53882, %2482, %int128_53883 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56530 = torch.aten.view %56528, %56529 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53884 = torch.constant.int 4
    %56531 = torch.aten.mul.int %int4_53884, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53885 = torch.constant.int 4096
    %56532 = torch.prim.ListConstruct %56531, %int4096_53885 : (!torch.int, !torch.int) -> !torch.list<int>
    %56533 = torch.aten.view %56438, %56532 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56533, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56534 = torch.aten.mm %56533, %56512 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56534, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53886 = torch.constant.int 4
    %int128_53887 = torch.constant.int 128
    %56535 = torch.prim.ListConstruct %int4_53886, %2482, %int128_53887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56536 = torch.aten.view %56534, %56535 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53888 = torch.constant.int 4
    %56537 = torch.aten.mul.int %int4_53888, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53889 = torch.constant.int 4096
    %56538 = torch.prim.ListConstruct %56537, %int4096_53889 : (!torch.int, !torch.int) -> !torch.list<int>
    %56539 = torch.aten.view %56439, %56538 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56539, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56540 = torch.aten.mm %56539, %56514 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56540, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53890 = torch.constant.int 4
    %int128_53891 = torch.constant.int 128
    %56541 = torch.prim.ListConstruct %int4_53890, %2482, %int128_53891 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56542 = torch.aten.view %56540, %56541 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56542, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53892 = torch.constant.int 4
    %56543 = torch.aten.mul.int %int4_53892, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53893 = torch.constant.int 4096
    %56544 = torch.prim.ListConstruct %56543, %int4096_53893 : (!torch.int, !torch.int) -> !torch.list<int>
    %56545 = torch.aten.view %56440, %56544 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56545, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56546 = torch.aten.mm %56545, %56516 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56546, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53894 = torch.constant.int 4
    %int128_53895 = torch.constant.int 128
    %56547 = torch.prim.ListConstruct %int4_53894, %2482, %int128_53895 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56548 = torch.aten.view %56546, %56547 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53896 = torch.constant.int 4
    %56549 = torch.aten.mul.int %int4_53896, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53897 = torch.constant.int 4096
    %56550 = torch.prim.ListConstruct %56549, %int4096_53897 : (!torch.int, !torch.int) -> !torch.list<int>
    %56551 = torch.aten.view %56441, %56550 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56551, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56552 = torch.aten.mm %56551, %56518 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56552, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53898 = torch.constant.int 4
    %int128_53899 = torch.constant.int 128
    %56553 = torch.prim.ListConstruct %int4_53898, %2482, %int128_53899 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56554 = torch.aten.view %56552, %56553 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56554, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53900 = torch.constant.int 4
    %56555 = torch.aten.mul.int %int4_53900, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53901 = torch.constant.int 4096
    %56556 = torch.prim.ListConstruct %56555, %int4096_53901 : (!torch.int, !torch.int) -> !torch.list<int>
    %56557 = torch.aten.view %56442, %56556 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56557, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56558 = torch.aten.mm %56557, %56520 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56558, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53902 = torch.constant.int 4
    %int128_53903 = torch.constant.int 128
    %56559 = torch.prim.ListConstruct %int4_53902, %2482, %int128_53903 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56560 = torch.aten.view %56558, %56559 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53904 = torch.constant.int 4
    %56561 = torch.aten.mul.int %int4_53904, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53905 = torch.constant.int 4096
    %56562 = torch.prim.ListConstruct %56561, %int4096_53905 : (!torch.int, !torch.int) -> !torch.list<int>
    %56563 = torch.aten.view %56443, %56562 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56563, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56564 = torch.aten.mm %56563, %56522 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56564, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53906 = torch.constant.int 4
    %int128_53907 = torch.constant.int 128
    %56565 = torch.prim.ListConstruct %int4_53906, %2482, %int128_53907 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56566 = torch.aten.view %56564, %56565 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53908 = torch.constant.int 4
    %56567 = torch.aten.mul.int %int4_53908, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53909 = torch.constant.int 4096
    %56568 = torch.prim.ListConstruct %56567, %int4096_53909 : (!torch.int, !torch.int) -> !torch.list<int>
    %56569 = torch.aten.view %56444, %56568 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56569, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56570 = torch.aten.mm %56569, %56524 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56570, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53910 = torch.constant.int 4
    %int128_53911 = torch.constant.int 128
    %56571 = torch.prim.ListConstruct %int4_53910, %2482, %int128_53911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56572 = torch.aten.view %56570, %56571 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_53912 = torch.constant.int 1
    %int0_53913 = torch.constant.int 0
    %56573 = torch.prim.ListConstruct %int1_53912, %int0_53913 : (!torch.int, !torch.int) -> !torch.list<int>
    %56574 = torch.aten.permute %2120, %56573 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53914 = torch.constant.int 1
    %int0_53915 = torch.constant.int 0
    %56575 = torch.prim.ListConstruct %int1_53914, %int0_53915 : (!torch.int, !torch.int) -> !torch.list<int>
    %56576 = torch.aten.permute %2121, %56575 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53916 = torch.constant.int 1
    %int0_53917 = torch.constant.int 0
    %56577 = torch.prim.ListConstruct %int1_53916, %int0_53917 : (!torch.int, !torch.int) -> !torch.list<int>
    %56578 = torch.aten.permute %2122, %56577 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53918 = torch.constant.int 1
    %int0_53919 = torch.constant.int 0
    %56579 = torch.prim.ListConstruct %int1_53918, %int0_53919 : (!torch.int, !torch.int) -> !torch.list<int>
    %56580 = torch.aten.permute %2123, %56579 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53920 = torch.constant.int 1
    %int0_53921 = torch.constant.int 0
    %56581 = torch.prim.ListConstruct %int1_53920, %int0_53921 : (!torch.int, !torch.int) -> !torch.list<int>
    %56582 = torch.aten.permute %2124, %56581 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53922 = torch.constant.int 1
    %int0_53923 = torch.constant.int 0
    %56583 = torch.prim.ListConstruct %int1_53922, %int0_53923 : (!torch.int, !torch.int) -> !torch.list<int>
    %56584 = torch.aten.permute %2125, %56583 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53924 = torch.constant.int 1
    %int0_53925 = torch.constant.int 0
    %56585 = torch.prim.ListConstruct %int1_53924, %int0_53925 : (!torch.int, !torch.int) -> !torch.list<int>
    %56586 = torch.aten.permute %2126, %56585 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_53926 = torch.constant.int 1
    %int0_53927 = torch.constant.int 0
    %56587 = torch.prim.ListConstruct %int1_53926, %int0_53927 : (!torch.int, !torch.int) -> !torch.list<int>
    %56588 = torch.aten.permute %2127, %56587 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_53928 = torch.constant.int 4
    %56589 = torch.aten.mul.int %int4_53928, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53929 = torch.constant.int 4096
    %56590 = torch.prim.ListConstruct %56589, %int4096_53929 : (!torch.int, !torch.int) -> !torch.list<int>
    %56591 = torch.aten.view %56437, %56590 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56591, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56592 = torch.aten.mm %56591, %56574 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56592, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53930 = torch.constant.int 4
    %int128_53931 = torch.constant.int 128
    %56593 = torch.prim.ListConstruct %int4_53930, %2482, %int128_53931 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56594 = torch.aten.view %56592, %56593 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53932 = torch.constant.int 4
    %56595 = torch.aten.mul.int %int4_53932, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53933 = torch.constant.int 4096
    %56596 = torch.prim.ListConstruct %56595, %int4096_53933 : (!torch.int, !torch.int) -> !torch.list<int>
    %56597 = torch.aten.view %56438, %56596 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56597, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56598 = torch.aten.mm %56597, %56576 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56598, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53934 = torch.constant.int 4
    %int128_53935 = torch.constant.int 128
    %56599 = torch.prim.ListConstruct %int4_53934, %2482, %int128_53935 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56600 = torch.aten.view %56598, %56599 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53936 = torch.constant.int 4
    %56601 = torch.aten.mul.int %int4_53936, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53937 = torch.constant.int 4096
    %56602 = torch.prim.ListConstruct %56601, %int4096_53937 : (!torch.int, !torch.int) -> !torch.list<int>
    %56603 = torch.aten.view %56439, %56602 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56603, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56604 = torch.aten.mm %56603, %56578 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56604, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53938 = torch.constant.int 4
    %int128_53939 = torch.constant.int 128
    %56605 = torch.prim.ListConstruct %int4_53938, %2482, %int128_53939 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56606 = torch.aten.view %56604, %56605 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53940 = torch.constant.int 4
    %56607 = torch.aten.mul.int %int4_53940, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53941 = torch.constant.int 4096
    %56608 = torch.prim.ListConstruct %56607, %int4096_53941 : (!torch.int, !torch.int) -> !torch.list<int>
    %56609 = torch.aten.view %56440, %56608 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56609, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56610 = torch.aten.mm %56609, %56580 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56610, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53942 = torch.constant.int 4
    %int128_53943 = torch.constant.int 128
    %56611 = torch.prim.ListConstruct %int4_53942, %2482, %int128_53943 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56612 = torch.aten.view %56610, %56611 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53944 = torch.constant.int 4
    %56613 = torch.aten.mul.int %int4_53944, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53945 = torch.constant.int 4096
    %56614 = torch.prim.ListConstruct %56613, %int4096_53945 : (!torch.int, !torch.int) -> !torch.list<int>
    %56615 = torch.aten.view %56441, %56614 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56615, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56616 = torch.aten.mm %56615, %56582 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56616, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53946 = torch.constant.int 4
    %int128_53947 = torch.constant.int 128
    %56617 = torch.prim.ListConstruct %int4_53946, %2482, %int128_53947 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56618 = torch.aten.view %56616, %56617 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53948 = torch.constant.int 4
    %56619 = torch.aten.mul.int %int4_53948, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53949 = torch.constant.int 4096
    %56620 = torch.prim.ListConstruct %56619, %int4096_53949 : (!torch.int, !torch.int) -> !torch.list<int>
    %56621 = torch.aten.view %56442, %56620 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56621, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56622 = torch.aten.mm %56621, %56584 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56622, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53950 = torch.constant.int 4
    %int128_53951 = torch.constant.int 128
    %56623 = torch.prim.ListConstruct %int4_53950, %2482, %int128_53951 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56624 = torch.aten.view %56622, %56623 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53952 = torch.constant.int 4
    %56625 = torch.aten.mul.int %int4_53952, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53953 = torch.constant.int 4096
    %56626 = torch.prim.ListConstruct %56625, %int4096_53953 : (!torch.int, !torch.int) -> !torch.list<int>
    %56627 = torch.aten.view %56443, %56626 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56627, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56628 = torch.aten.mm %56627, %56586 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56628, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53954 = torch.constant.int 4
    %int128_53955 = torch.constant.int 128
    %56629 = torch.prim.ListConstruct %int4_53954, %2482, %int128_53955 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56630 = torch.aten.view %56628, %56629 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53956 = torch.constant.int 4
    %56631 = torch.aten.mul.int %int4_53956, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_53957 = torch.constant.int 4096
    %56632 = torch.prim.ListConstruct %56631, %int4096_53957 : (!torch.int, !torch.int) -> !torch.list<int>
    %56633 = torch.aten.view %56444, %56632 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %56633, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %56634 = torch.aten.mm %56633, %56588 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %56634, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_53958 = torch.constant.int 4
    %int128_53959 = torch.constant.int 128
    %56635 = torch.prim.ListConstruct %int4_53958, %2482, %int128_53959 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56636 = torch.aten.view %56634, %56635 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %56636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_53960 = torch.constant.int 4
    %int4_53961 = torch.constant.int 4
    %int128_53962 = torch.constant.int 128
    %56637 = torch.prim.ListConstruct %int4_53960, %2482, %int4_53961, %int128_53962 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56638 = torch.aten.view %56466, %56637 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53963 = torch.constant.int 4
    %int4_53964 = torch.constant.int 4
    %int128_53965 = torch.constant.int 128
    %56639 = torch.prim.ListConstruct %int4_53963, %2482, %int4_53964, %int128_53965 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56640 = torch.aten.view %56472, %56639 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53966 = torch.constant.int 4
    %int4_53967 = torch.constant.int 4
    %int128_53968 = torch.constant.int 128
    %56641 = torch.prim.ListConstruct %int4_53966, %2482, %int4_53967, %int128_53968 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56642 = torch.aten.view %56478, %56641 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53969 = torch.constant.int 4
    %int4_53970 = torch.constant.int 4
    %int128_53971 = torch.constant.int 128
    %56643 = torch.prim.ListConstruct %int4_53969, %2482, %int4_53970, %int128_53971 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56644 = torch.aten.view %56484, %56643 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53972 = torch.constant.int 4
    %int4_53973 = torch.constant.int 4
    %int128_53974 = torch.constant.int 128
    %56645 = torch.prim.ListConstruct %int4_53972, %2482, %int4_53973, %int128_53974 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56646 = torch.aten.view %56490, %56645 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53975 = torch.constant.int 4
    %int4_53976 = torch.constant.int 4
    %int128_53977 = torch.constant.int 128
    %56647 = torch.prim.ListConstruct %int4_53975, %2482, %int4_53976, %int128_53977 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56648 = torch.aten.view %56496, %56647 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53978 = torch.constant.int 4
    %int4_53979 = torch.constant.int 4
    %int128_53980 = torch.constant.int 128
    %56649 = torch.prim.ListConstruct %int4_53978, %2482, %int4_53979, %int128_53980 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56650 = torch.aten.view %56502, %56649 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53981 = torch.constant.int 4
    %int4_53982 = torch.constant.int 4
    %int128_53983 = torch.constant.int 128
    %56651 = torch.prim.ListConstruct %int4_53981, %2482, %int4_53982, %int128_53983 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56652 = torch.aten.view %56508, %56651 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_53984 = torch.constant.int 4
    %int1_53985 = torch.constant.int 1
    %int128_53986 = torch.constant.int 128
    %56653 = torch.prim.ListConstruct %int4_53984, %2482, %int1_53985, %int128_53986 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56654 = torch.aten.view %56530, %56653 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_53987 = torch.constant.int 4
    %int1_53988 = torch.constant.int 1
    %int128_53989 = torch.constant.int 128
    %56655 = torch.prim.ListConstruct %int4_53987, %2482, %int1_53988, %int128_53989 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56656 = torch.aten.view %56536, %56655 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56656, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_53990 = torch.constant.int 4
    %int1_53991 = torch.constant.int 1
    %int128_53992 = torch.constant.int 128
    %56657 = torch.prim.ListConstruct %int4_53990, %2482, %int1_53991, %int128_53992 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56658 = torch.aten.view %56542, %56657 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_53993 = torch.constant.int 4
    %int1_53994 = torch.constant.int 1
    %int128_53995 = torch.constant.int 128
    %56659 = torch.prim.ListConstruct %int4_53993, %2482, %int1_53994, %int128_53995 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56660 = torch.aten.view %56548, %56659 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56660, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_53996 = torch.constant.int 4
    %int1_53997 = torch.constant.int 1
    %int128_53998 = torch.constant.int 128
    %56661 = torch.prim.ListConstruct %int4_53996, %2482, %int1_53997, %int128_53998 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56662 = torch.aten.view %56554, %56661 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_53999 = torch.constant.int 4
    %int1_54000 = torch.constant.int 1
    %int128_54001 = torch.constant.int 128
    %56663 = torch.prim.ListConstruct %int4_53999, %2482, %int1_54000, %int128_54001 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56664 = torch.aten.view %56560, %56663 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54002 = torch.constant.int 4
    %int1_54003 = torch.constant.int 1
    %int128_54004 = torch.constant.int 128
    %56665 = torch.prim.ListConstruct %int4_54002, %2482, %int1_54003, %int128_54004 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56666 = torch.aten.view %56566, %56665 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56666, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54005 = torch.constant.int 4
    %int1_54006 = torch.constant.int 1
    %int128_54007 = torch.constant.int 128
    %56667 = torch.prim.ListConstruct %int4_54005, %2482, %int1_54006, %int128_54007 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56668 = torch.aten.view %56572, %56667 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54008 = torch.constant.int 4
    %int1_54009 = torch.constant.int 1
    %int128_54010 = torch.constant.int 128
    %56669 = torch.prim.ListConstruct %int4_54008, %2482, %int1_54009, %int128_54010 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56670 = torch.aten.view %56594, %56669 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54011 = torch.constant.int 4
    %int1_54012 = torch.constant.int 1
    %int128_54013 = torch.constant.int 128
    %56671 = torch.prim.ListConstruct %int4_54011, %2482, %int1_54012, %int128_54013 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56672 = torch.aten.view %56600, %56671 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54014 = torch.constant.int 4
    %int1_54015 = torch.constant.int 1
    %int128_54016 = torch.constant.int 128
    %56673 = torch.prim.ListConstruct %int4_54014, %2482, %int1_54015, %int128_54016 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56674 = torch.aten.view %56606, %56673 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54017 = torch.constant.int 4
    %int1_54018 = torch.constant.int 1
    %int128_54019 = torch.constant.int 128
    %56675 = torch.prim.ListConstruct %int4_54017, %2482, %int1_54018, %int128_54019 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56676 = torch.aten.view %56612, %56675 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54020 = torch.constant.int 4
    %int1_54021 = torch.constant.int 1
    %int128_54022 = torch.constant.int 128
    %56677 = torch.prim.ListConstruct %int4_54020, %2482, %int1_54021, %int128_54022 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56678 = torch.aten.view %56618, %56677 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56678, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54023 = torch.constant.int 4
    %int1_54024 = torch.constant.int 1
    %int128_54025 = torch.constant.int 128
    %56679 = torch.prim.ListConstruct %int4_54023, %2482, %int1_54024, %int128_54025 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56680 = torch.aten.view %56624, %56679 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54026 = torch.constant.int 4
    %int1_54027 = torch.constant.int 1
    %int128_54028 = torch.constant.int 128
    %56681 = torch.prim.ListConstruct %int4_54026, %2482, %int1_54027, %int128_54028 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56682 = torch.aten.view %56630, %56681 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56682, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_54029 = torch.constant.int 4
    %int1_54030 = torch.constant.int 1
    %int128_54031 = torch.constant.int 128
    %56683 = torch.prim.ListConstruct %int4_54029, %2482, %int1_54030, %int128_54031 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %56684 = torch.aten.view %56636, %56683 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_54032 = torch.constant.int 131072
    %none_54033 = torch.constant.none
    %none_54034 = torch.constant.none
    %cpu_54035 = torch.constant.device "cpu"
    %false_54036 = torch.constant.bool false
    %56685 = torch.aten.arange %int131072_54032, %none_54033, %none_54034, %cpu_54035, %false_54036 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_54037 = torch.constant.int 0
    %int128_54038 = torch.constant.int 128
    %int2_54039 = torch.constant.int 2
    %none_54040 = torch.constant.none
    %none_54041 = torch.constant.none
    %cpu_54042 = torch.constant.device "cpu"
    %false_54043 = torch.constant.bool false
    %56686 = torch.aten.arange.start_step %int0_54037, %int128_54038, %int2_54039, %none_54040, %none_54041, %cpu_54042, %false_54043 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_54044 = torch.constant.int 0
    %int0_54045 = torch.constant.int 0
    %int64_54046 = torch.constant.int 64
    %int1_54047 = torch.constant.int 1
    %56687 = torch.aten.slice.Tensor %56686, %int0_54044, %int0_54045, %int64_54046, %int1_54047 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_54048 = torch.constant.int 6
    %56688 = torch.prims.convert_element_type %56687, %int6_54048 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_54049 = torch.constant.int 128
    %56689 = torch.aten.div.Scalar %56688, %int128_54049 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_54050 = torch.constant.float 5.000000e+05
    %56690 = torch.aten.pow.Scalar %float5.000000e05_54050, %56689 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %56691 = torch.aten.reciprocal %56690 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_54051 = torch.constant.float 1.000000e+00
    %56692 = torch.aten.mul.Scalar %56691, %float1.000000e00_54051 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_54052 = torch.constant.int 131072
    %int1_54053 = torch.constant.int 1
    %56693 = torch.prim.ListConstruct %int131072_54052, %int1_54053 : (!torch.int, !torch.int) -> !torch.list<int>
    %56694 = torch.aten.view %56685, %56693 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %56695 = torch.aten.mul.Tensor %56694, %56692 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %56696 = torch.aten.cos %56695 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %56697 = torch.aten.sin %56695 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %56698 = torch.aten.complex %56696, %56697 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %56699 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56700 = flow.tensor.transfer %56699 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %56701 = torch_c.from_builtin_tensor %56700 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56702 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56703 = flow.tensor.transfer %56702 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %56704 = torch_c.from_builtin_tensor %56703 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56705 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56706 = flow.tensor.transfer %56705 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %56707 = torch_c.from_builtin_tensor %56706 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56708 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56709 = flow.tensor.transfer %56708 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %56710 = torch_c.from_builtin_tensor %56709 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56711 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56712 = flow.tensor.transfer %56711 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %56713 = torch_c.from_builtin_tensor %56712 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56714 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56715 = flow.tensor.transfer %56714 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %56716 = torch_c.from_builtin_tensor %56715 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56717 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56718 = flow.tensor.transfer %56717 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %56719 = torch_c.from_builtin_tensor %56718 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56720 = torch_c.to_builtin_tensor %56698 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56721 = flow.tensor.transfer %56720 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %56722 = torch_c.from_builtin_tensor %56721 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_54054 = torch.constant.int 1
    %56723 = torch.aten.size.int %56466, %int1_54054 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54055 = torch.constant.int 0
    %56724 = torch.aten.add.int %int0_54055, %56723 : !torch.int, !torch.int -> !torch.int
    %int0_54056 = torch.constant.int 0
    %int0_54057 = torch.constant.int 0
    %int1_54058 = torch.constant.int 1
    %56725 = torch.aten.slice.Tensor %56701, %int0_54056, %int0_54057, %56724, %int1_54058 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56725, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54059 = torch.constant.int 1
    %int0_54060 = torch.constant.int 0
    %int9223372036854775807_54061 = torch.constant.int 9223372036854775807
    %int1_54062 = torch.constant.int 1
    %56726 = torch.aten.slice.Tensor %56725, %int1_54059, %int0_54060, %int9223372036854775807_54061, %int1_54062 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56726, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54063 = torch.constant.int 0
    %56727 = torch.aten.unsqueeze %56726, %int0_54063 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56727, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54064 = torch.constant.int 2
    %56728 = torch.aten.unsqueeze %56727, %int2_54064 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56728, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54065 = torch.constant.int 3
    %int0_54066 = torch.constant.int 0
    %int9223372036854775807_54067 = torch.constant.int 9223372036854775807
    %int1_54068 = torch.constant.int 1
    %56729 = torch.aten.slice.Tensor %56728, %int3_54065, %int0_54066, %int9223372036854775807_54067, %int1_54068 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56729, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56730 = torch_c.to_builtin_tensor %56638 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54069 = arith.constant 1 : index
    %dim_54070 = tensor.dim %56730, %c1_54069 : tensor<4x?x4x128xf16>
    %56731 = flow.tensor.bitcast %56730 : tensor<4x?x4x128xf16>{%dim_54070} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54070}
    %56732 = torch_c.from_builtin_tensor %56731 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56733 = torch.aten.mul.Tensor %56732, %56729 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56734 = torch_c.to_builtin_tensor %56733 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54071 = arith.constant 1 : index
    %dim_54072 = tensor.dim %56734, %c1_54071 : tensor<4x?x4x64xcomplex<f32>>
    %56735 = flow.tensor.bitcast %56734 : tensor<4x?x4x64xcomplex<f32>>{%dim_54072} -> tensor<4x?x4x128xf32>{%dim_54072}
    %56736 = torch_c.from_builtin_tensor %56735 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56736, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54073 = torch.constant.int 5
    %56737 = torch.prims.convert_element_type %56736, %int5_54073 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54074 = torch.constant.int 1
    %56738 = torch.aten.size.int %56472, %int1_54074 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54075 = torch.constant.int 0
    %56739 = torch.aten.add.int %int0_54075, %56738 : !torch.int, !torch.int -> !torch.int
    %int0_54076 = torch.constant.int 0
    %int0_54077 = torch.constant.int 0
    %int1_54078 = torch.constant.int 1
    %56740 = torch.aten.slice.Tensor %56704, %int0_54076, %int0_54077, %56739, %int1_54078 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56740, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54079 = torch.constant.int 1
    %int0_54080 = torch.constant.int 0
    %int9223372036854775807_54081 = torch.constant.int 9223372036854775807
    %int1_54082 = torch.constant.int 1
    %56741 = torch.aten.slice.Tensor %56740, %int1_54079, %int0_54080, %int9223372036854775807_54081, %int1_54082 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56741, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54083 = torch.constant.int 0
    %56742 = torch.aten.unsqueeze %56741, %int0_54083 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56742, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54084 = torch.constant.int 2
    %56743 = torch.aten.unsqueeze %56742, %int2_54084 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56743, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54085 = torch.constant.int 3
    %int0_54086 = torch.constant.int 0
    %int9223372036854775807_54087 = torch.constant.int 9223372036854775807
    %int1_54088 = torch.constant.int 1
    %56744 = torch.aten.slice.Tensor %56743, %int3_54085, %int0_54086, %int9223372036854775807_54087, %int1_54088 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56744, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56745 = torch_c.to_builtin_tensor %56640 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54089 = arith.constant 1 : index
    %dim_54090 = tensor.dim %56745, %c1_54089 : tensor<4x?x4x128xf16>
    %56746 = flow.tensor.bitcast %56745 : tensor<4x?x4x128xf16>{%dim_54090} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54090}
    %56747 = torch_c.from_builtin_tensor %56746 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56748 = torch.aten.mul.Tensor %56747, %56744 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56749 = torch_c.to_builtin_tensor %56748 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54091 = arith.constant 1 : index
    %dim_54092 = tensor.dim %56749, %c1_54091 : tensor<4x?x4x64xcomplex<f32>>
    %56750 = flow.tensor.bitcast %56749 : tensor<4x?x4x64xcomplex<f32>>{%dim_54092} -> tensor<4x?x4x128xf32>{%dim_54092}
    %56751 = torch_c.from_builtin_tensor %56750 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54093 = torch.constant.int 5
    %56752 = torch.prims.convert_element_type %56751, %int5_54093 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54094 = torch.constant.int 1
    %56753 = torch.aten.size.int %56478, %int1_54094 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54095 = torch.constant.int 0
    %56754 = torch.aten.add.int %int0_54095, %56753 : !torch.int, !torch.int -> !torch.int
    %int0_54096 = torch.constant.int 0
    %int0_54097 = torch.constant.int 0
    %int1_54098 = torch.constant.int 1
    %56755 = torch.aten.slice.Tensor %56707, %int0_54096, %int0_54097, %56754, %int1_54098 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56755, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54099 = torch.constant.int 1
    %int0_54100 = torch.constant.int 0
    %int9223372036854775807_54101 = torch.constant.int 9223372036854775807
    %int1_54102 = torch.constant.int 1
    %56756 = torch.aten.slice.Tensor %56755, %int1_54099, %int0_54100, %int9223372036854775807_54101, %int1_54102 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56756, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54103 = torch.constant.int 0
    %56757 = torch.aten.unsqueeze %56756, %int0_54103 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56757, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54104 = torch.constant.int 2
    %56758 = torch.aten.unsqueeze %56757, %int2_54104 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56758, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54105 = torch.constant.int 3
    %int0_54106 = torch.constant.int 0
    %int9223372036854775807_54107 = torch.constant.int 9223372036854775807
    %int1_54108 = torch.constant.int 1
    %56759 = torch.aten.slice.Tensor %56758, %int3_54105, %int0_54106, %int9223372036854775807_54107, %int1_54108 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56759, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56760 = torch_c.to_builtin_tensor %56642 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54109 = arith.constant 1 : index
    %dim_54110 = tensor.dim %56760, %c1_54109 : tensor<4x?x4x128xf16>
    %56761 = flow.tensor.bitcast %56760 : tensor<4x?x4x128xf16>{%dim_54110} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54110}
    %56762 = torch_c.from_builtin_tensor %56761 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56763 = torch.aten.mul.Tensor %56762, %56759 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56764 = torch_c.to_builtin_tensor %56763 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54111 = arith.constant 1 : index
    %dim_54112 = tensor.dim %56764, %c1_54111 : tensor<4x?x4x64xcomplex<f32>>
    %56765 = flow.tensor.bitcast %56764 : tensor<4x?x4x64xcomplex<f32>>{%dim_54112} -> tensor<4x?x4x128xf32>{%dim_54112}
    %56766 = torch_c.from_builtin_tensor %56765 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54113 = torch.constant.int 5
    %56767 = torch.prims.convert_element_type %56766, %int5_54113 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54114 = torch.constant.int 1
    %56768 = torch.aten.size.int %56484, %int1_54114 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54115 = torch.constant.int 0
    %56769 = torch.aten.add.int %int0_54115, %56768 : !torch.int, !torch.int -> !torch.int
    %int0_54116 = torch.constant.int 0
    %int0_54117 = torch.constant.int 0
    %int1_54118 = torch.constant.int 1
    %56770 = torch.aten.slice.Tensor %56710, %int0_54116, %int0_54117, %56769, %int1_54118 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56770, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54119 = torch.constant.int 1
    %int0_54120 = torch.constant.int 0
    %int9223372036854775807_54121 = torch.constant.int 9223372036854775807
    %int1_54122 = torch.constant.int 1
    %56771 = torch.aten.slice.Tensor %56770, %int1_54119, %int0_54120, %int9223372036854775807_54121, %int1_54122 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56771, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54123 = torch.constant.int 0
    %56772 = torch.aten.unsqueeze %56771, %int0_54123 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56772, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54124 = torch.constant.int 2
    %56773 = torch.aten.unsqueeze %56772, %int2_54124 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56773, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54125 = torch.constant.int 3
    %int0_54126 = torch.constant.int 0
    %int9223372036854775807_54127 = torch.constant.int 9223372036854775807
    %int1_54128 = torch.constant.int 1
    %56774 = torch.aten.slice.Tensor %56773, %int3_54125, %int0_54126, %int9223372036854775807_54127, %int1_54128 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56774, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56775 = torch_c.to_builtin_tensor %56644 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54129 = arith.constant 1 : index
    %dim_54130 = tensor.dim %56775, %c1_54129 : tensor<4x?x4x128xf16>
    %56776 = flow.tensor.bitcast %56775 : tensor<4x?x4x128xf16>{%dim_54130} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54130}
    %56777 = torch_c.from_builtin_tensor %56776 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56778 = torch.aten.mul.Tensor %56777, %56774 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56779 = torch_c.to_builtin_tensor %56778 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54131 = arith.constant 1 : index
    %dim_54132 = tensor.dim %56779, %c1_54131 : tensor<4x?x4x64xcomplex<f32>>
    %56780 = flow.tensor.bitcast %56779 : tensor<4x?x4x64xcomplex<f32>>{%dim_54132} -> tensor<4x?x4x128xf32>{%dim_54132}
    %56781 = torch_c.from_builtin_tensor %56780 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54133 = torch.constant.int 5
    %56782 = torch.prims.convert_element_type %56781, %int5_54133 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54134 = torch.constant.int 1
    %56783 = torch.aten.size.int %56490, %int1_54134 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54135 = torch.constant.int 0
    %56784 = torch.aten.add.int %int0_54135, %56783 : !torch.int, !torch.int -> !torch.int
    %int0_54136 = torch.constant.int 0
    %int0_54137 = torch.constant.int 0
    %int1_54138 = torch.constant.int 1
    %56785 = torch.aten.slice.Tensor %56713, %int0_54136, %int0_54137, %56784, %int1_54138 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56785, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54139 = torch.constant.int 1
    %int0_54140 = torch.constant.int 0
    %int9223372036854775807_54141 = torch.constant.int 9223372036854775807
    %int1_54142 = torch.constant.int 1
    %56786 = torch.aten.slice.Tensor %56785, %int1_54139, %int0_54140, %int9223372036854775807_54141, %int1_54142 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56786, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54143 = torch.constant.int 0
    %56787 = torch.aten.unsqueeze %56786, %int0_54143 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56787, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54144 = torch.constant.int 2
    %56788 = torch.aten.unsqueeze %56787, %int2_54144 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56788, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54145 = torch.constant.int 3
    %int0_54146 = torch.constant.int 0
    %int9223372036854775807_54147 = torch.constant.int 9223372036854775807
    %int1_54148 = torch.constant.int 1
    %56789 = torch.aten.slice.Tensor %56788, %int3_54145, %int0_54146, %int9223372036854775807_54147, %int1_54148 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56789, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56790 = torch_c.to_builtin_tensor %56646 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54149 = arith.constant 1 : index
    %dim_54150 = tensor.dim %56790, %c1_54149 : tensor<4x?x4x128xf16>
    %56791 = flow.tensor.bitcast %56790 : tensor<4x?x4x128xf16>{%dim_54150} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54150}
    %56792 = torch_c.from_builtin_tensor %56791 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56792, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56793 = torch.aten.mul.Tensor %56792, %56789 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56793, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56794 = torch_c.to_builtin_tensor %56793 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54151 = arith.constant 1 : index
    %dim_54152 = tensor.dim %56794, %c1_54151 : tensor<4x?x4x64xcomplex<f32>>
    %56795 = flow.tensor.bitcast %56794 : tensor<4x?x4x64xcomplex<f32>>{%dim_54152} -> tensor<4x?x4x128xf32>{%dim_54152}
    %56796 = torch_c.from_builtin_tensor %56795 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54153 = torch.constant.int 5
    %56797 = torch.prims.convert_element_type %56796, %int5_54153 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54154 = torch.constant.int 1
    %56798 = torch.aten.size.int %56496, %int1_54154 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54155 = torch.constant.int 0
    %56799 = torch.aten.add.int %int0_54155, %56798 : !torch.int, !torch.int -> !torch.int
    %int0_54156 = torch.constant.int 0
    %int0_54157 = torch.constant.int 0
    %int1_54158 = torch.constant.int 1
    %56800 = torch.aten.slice.Tensor %56716, %int0_54156, %int0_54157, %56799, %int1_54158 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56800, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54159 = torch.constant.int 1
    %int0_54160 = torch.constant.int 0
    %int9223372036854775807_54161 = torch.constant.int 9223372036854775807
    %int1_54162 = torch.constant.int 1
    %56801 = torch.aten.slice.Tensor %56800, %int1_54159, %int0_54160, %int9223372036854775807_54161, %int1_54162 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56801, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54163 = torch.constant.int 0
    %56802 = torch.aten.unsqueeze %56801, %int0_54163 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56802, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54164 = torch.constant.int 2
    %56803 = torch.aten.unsqueeze %56802, %int2_54164 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56803, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54165 = torch.constant.int 3
    %int0_54166 = torch.constant.int 0
    %int9223372036854775807_54167 = torch.constant.int 9223372036854775807
    %int1_54168 = torch.constant.int 1
    %56804 = torch.aten.slice.Tensor %56803, %int3_54165, %int0_54166, %int9223372036854775807_54167, %int1_54168 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56804, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56805 = torch_c.to_builtin_tensor %56648 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54169 = arith.constant 1 : index
    %dim_54170 = tensor.dim %56805, %c1_54169 : tensor<4x?x4x128xf16>
    %56806 = flow.tensor.bitcast %56805 : tensor<4x?x4x128xf16>{%dim_54170} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54170}
    %56807 = torch_c.from_builtin_tensor %56806 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56808 = torch.aten.mul.Tensor %56807, %56804 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56808, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56809 = torch_c.to_builtin_tensor %56808 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54171 = arith.constant 1 : index
    %dim_54172 = tensor.dim %56809, %c1_54171 : tensor<4x?x4x64xcomplex<f32>>
    %56810 = flow.tensor.bitcast %56809 : tensor<4x?x4x64xcomplex<f32>>{%dim_54172} -> tensor<4x?x4x128xf32>{%dim_54172}
    %56811 = torch_c.from_builtin_tensor %56810 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54173 = torch.constant.int 5
    %56812 = torch.prims.convert_element_type %56811, %int5_54173 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54174 = torch.constant.int 1
    %56813 = torch.aten.size.int %56502, %int1_54174 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54175 = torch.constant.int 0
    %56814 = torch.aten.add.int %int0_54175, %56813 : !torch.int, !torch.int -> !torch.int
    %int0_54176 = torch.constant.int 0
    %int0_54177 = torch.constant.int 0
    %int1_54178 = torch.constant.int 1
    %56815 = torch.aten.slice.Tensor %56719, %int0_54176, %int0_54177, %56814, %int1_54178 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56815, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54179 = torch.constant.int 1
    %int0_54180 = torch.constant.int 0
    %int9223372036854775807_54181 = torch.constant.int 9223372036854775807
    %int1_54182 = torch.constant.int 1
    %56816 = torch.aten.slice.Tensor %56815, %int1_54179, %int0_54180, %int9223372036854775807_54181, %int1_54182 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56816, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54183 = torch.constant.int 0
    %56817 = torch.aten.unsqueeze %56816, %int0_54183 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56817, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54184 = torch.constant.int 2
    %56818 = torch.aten.unsqueeze %56817, %int2_54184 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56818, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54185 = torch.constant.int 3
    %int0_54186 = torch.constant.int 0
    %int9223372036854775807_54187 = torch.constant.int 9223372036854775807
    %int1_54188 = torch.constant.int 1
    %56819 = torch.aten.slice.Tensor %56818, %int3_54185, %int0_54186, %int9223372036854775807_54187, %int1_54188 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56819, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56820 = torch_c.to_builtin_tensor %56650 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54189 = arith.constant 1 : index
    %dim_54190 = tensor.dim %56820, %c1_54189 : tensor<4x?x4x128xf16>
    %56821 = flow.tensor.bitcast %56820 : tensor<4x?x4x128xf16>{%dim_54190} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54190}
    %56822 = torch_c.from_builtin_tensor %56821 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56823 = torch.aten.mul.Tensor %56822, %56819 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56823, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56824 = torch_c.to_builtin_tensor %56823 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54191 = arith.constant 1 : index
    %dim_54192 = tensor.dim %56824, %c1_54191 : tensor<4x?x4x64xcomplex<f32>>
    %56825 = flow.tensor.bitcast %56824 : tensor<4x?x4x64xcomplex<f32>>{%dim_54192} -> tensor<4x?x4x128xf32>{%dim_54192}
    %56826 = torch_c.from_builtin_tensor %56825 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54193 = torch.constant.int 5
    %56827 = torch.prims.convert_element_type %56826, %int5_54193 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54194 = torch.constant.int 1
    %56828 = torch.aten.size.int %56508, %int1_54194 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_54195 = torch.constant.int 0
    %56829 = torch.aten.add.int %int0_54195, %56828 : !torch.int, !torch.int -> !torch.int
    %int0_54196 = torch.constant.int 0
    %int0_54197 = torch.constant.int 0
    %int1_54198 = torch.constant.int 1
    %56830 = torch.aten.slice.Tensor %56722, %int0_54196, %int0_54197, %56829, %int1_54198 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56830, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54199 = torch.constant.int 1
    %int0_54200 = torch.constant.int 0
    %int9223372036854775807_54201 = torch.constant.int 9223372036854775807
    %int1_54202 = torch.constant.int 1
    %56831 = torch.aten.slice.Tensor %56830, %int1_54199, %int0_54200, %int9223372036854775807_54201, %int1_54202 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56831, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54203 = torch.constant.int 0
    %56832 = torch.aten.unsqueeze %56831, %int0_54203 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56832, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54204 = torch.constant.int 2
    %56833 = torch.aten.unsqueeze %56832, %int2_54204 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56833, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54205 = torch.constant.int 3
    %int0_54206 = torch.constant.int 0
    %int9223372036854775807_54207 = torch.constant.int 9223372036854775807
    %int1_54208 = torch.constant.int 1
    %56834 = torch.aten.slice.Tensor %56833, %int3_54205, %int0_54206, %int9223372036854775807_54207, %int1_54208 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56834, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56835 = torch_c.to_builtin_tensor %56652 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_54209 = arith.constant 1 : index
    %dim_54210 = tensor.dim %56835, %c1_54209 : tensor<4x?x4x128xf16>
    %56836 = flow.tensor.bitcast %56835 : tensor<4x?x4x128xf16>{%dim_54210} -> tensor<4x?x4x64xcomplex<f16>>{%dim_54210}
    %56837 = torch_c.from_builtin_tensor %56836 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %56837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %56838 = torch.aten.mul.Tensor %56837, %56834 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %56838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %56839 = torch_c.to_builtin_tensor %56838 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_54211 = arith.constant 1 : index
    %dim_54212 = tensor.dim %56839, %c1_54211 : tensor<4x?x4x64xcomplex<f32>>
    %56840 = flow.tensor.bitcast %56839 : tensor<4x?x4x64xcomplex<f32>>{%dim_54212} -> tensor<4x?x4x128xf32>{%dim_54212}
    %56841 = torch_c.from_builtin_tensor %56840 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %56841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_54213 = torch.constant.int 5
    %56842 = torch.prims.convert_element_type %56841, %int5_54213 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %56842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_54214 = torch.constant.int 131072
    %none_54215 = torch.constant.none
    %none_54216 = torch.constant.none
    %cpu_54217 = torch.constant.device "cpu"
    %false_54218 = torch.constant.bool false
    %56843 = torch.aten.arange %int131072_54214, %none_54215, %none_54216, %cpu_54217, %false_54218 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_54219 = torch.constant.int 0
    %int128_54220 = torch.constant.int 128
    %int2_54221 = torch.constant.int 2
    %none_54222 = torch.constant.none
    %none_54223 = torch.constant.none
    %cpu_54224 = torch.constant.device "cpu"
    %false_54225 = torch.constant.bool false
    %56844 = torch.aten.arange.start_step %int0_54219, %int128_54220, %int2_54221, %none_54222, %none_54223, %cpu_54224, %false_54225 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_54226 = torch.constant.int 0
    %int0_54227 = torch.constant.int 0
    %int64_54228 = torch.constant.int 64
    %int1_54229 = torch.constant.int 1
    %56845 = torch.aten.slice.Tensor %56844, %int0_54226, %int0_54227, %int64_54228, %int1_54229 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_54230 = torch.constant.int 6
    %56846 = torch.prims.convert_element_type %56845, %int6_54230 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_54231 = torch.constant.int 128
    %56847 = torch.aten.div.Scalar %56846, %int128_54231 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_54232 = torch.constant.float 5.000000e+05
    %56848 = torch.aten.pow.Scalar %float5.000000e05_54232, %56847 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %56849 = torch.aten.reciprocal %56848 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_54233 = torch.constant.float 1.000000e+00
    %56850 = torch.aten.mul.Scalar %56849, %float1.000000e00_54233 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_54234 = torch.constant.int 131072
    %int1_54235 = torch.constant.int 1
    %56851 = torch.prim.ListConstruct %int131072_54234, %int1_54235 : (!torch.int, !torch.int) -> !torch.list<int>
    %56852 = torch.aten.view %56843, %56851 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %56853 = torch.aten.mul.Tensor %56852, %56850 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %56854 = torch.aten.cos %56853 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %56855 = torch.aten.sin %56853 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %56856 = torch.aten.complex %56854, %56855 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %56857 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56858 = flow.tensor.transfer %56857 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %56859 = torch_c.from_builtin_tensor %56858 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56860 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56861 = flow.tensor.transfer %56860 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %56862 = torch_c.from_builtin_tensor %56861 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56863 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56864 = flow.tensor.transfer %56863 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %56865 = torch_c.from_builtin_tensor %56864 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56866 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56867 = flow.tensor.transfer %56866 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %56868 = torch_c.from_builtin_tensor %56867 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56869 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56870 = flow.tensor.transfer %56869 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %56871 = torch_c.from_builtin_tensor %56870 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56872 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56873 = flow.tensor.transfer %56872 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %56874 = torch_c.from_builtin_tensor %56873 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56875 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56876 = flow.tensor.transfer %56875 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %56877 = torch_c.from_builtin_tensor %56876 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %56878 = torch_c.to_builtin_tensor %56856 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %56879 = flow.tensor.transfer %56878 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %56880 = torch_c.from_builtin_tensor %56879 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_54236 = torch.constant.int 1
    %56881 = torch.aten.size.int %56530, %int1_54236 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54237 = torch.constant.int 0
    %56882 = torch.aten.add.int %int0_54237, %56881 : !torch.int, !torch.int -> !torch.int
    %int0_54238 = torch.constant.int 0
    %int0_54239 = torch.constant.int 0
    %int1_54240 = torch.constant.int 1
    %56883 = torch.aten.slice.Tensor %56859, %int0_54238, %int0_54239, %56882, %int1_54240 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56883, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54241 = torch.constant.int 1
    %int0_54242 = torch.constant.int 0
    %int9223372036854775807_54243 = torch.constant.int 9223372036854775807
    %int1_54244 = torch.constant.int 1
    %56884 = torch.aten.slice.Tensor %56883, %int1_54241, %int0_54242, %int9223372036854775807_54243, %int1_54244 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56884, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54245 = torch.constant.int 0
    %56885 = torch.aten.unsqueeze %56884, %int0_54245 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56885, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54246 = torch.constant.int 2
    %56886 = torch.aten.unsqueeze %56885, %int2_54246 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56886, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54247 = torch.constant.int 3
    %int0_54248 = torch.constant.int 0
    %int9223372036854775807_54249 = torch.constant.int 9223372036854775807
    %int1_54250 = torch.constant.int 1
    %56887 = torch.aten.slice.Tensor %56886, %int3_54247, %int0_54248, %int9223372036854775807_54249, %int1_54250 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56887, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56888 = torch_c.to_builtin_tensor %56654 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54251 = arith.constant 1 : index
    %dim_54252 = tensor.dim %56888, %c1_54251 : tensor<4x?x1x128xf16>
    %56889 = flow.tensor.bitcast %56888 : tensor<4x?x1x128xf16>{%dim_54252} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54252}
    %56890 = torch_c.from_builtin_tensor %56889 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56891 = torch.aten.mul.Tensor %56890, %56887 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56892 = torch_c.to_builtin_tensor %56891 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54253 = arith.constant 1 : index
    %dim_54254 = tensor.dim %56892, %c1_54253 : tensor<4x?x1x64xcomplex<f32>>
    %56893 = flow.tensor.bitcast %56892 : tensor<4x?x1x64xcomplex<f32>>{%dim_54254} -> tensor<4x?x1x128xf32>{%dim_54254}
    %56894 = torch_c.from_builtin_tensor %56893 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54255 = torch.constant.int 5
    %56895 = torch.prims.convert_element_type %56894, %int5_54255 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56895, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54256 = torch.constant.int 1
    %56896 = torch.aten.size.int %56536, %int1_54256 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54257 = torch.constant.int 0
    %56897 = torch.aten.add.int %int0_54257, %56896 : !torch.int, !torch.int -> !torch.int
    %int0_54258 = torch.constant.int 0
    %int0_54259 = torch.constant.int 0
    %int1_54260 = torch.constant.int 1
    %56898 = torch.aten.slice.Tensor %56862, %int0_54258, %int0_54259, %56897, %int1_54260 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56898, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54261 = torch.constant.int 1
    %int0_54262 = torch.constant.int 0
    %int9223372036854775807_54263 = torch.constant.int 9223372036854775807
    %int1_54264 = torch.constant.int 1
    %56899 = torch.aten.slice.Tensor %56898, %int1_54261, %int0_54262, %int9223372036854775807_54263, %int1_54264 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56899, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54265 = torch.constant.int 0
    %56900 = torch.aten.unsqueeze %56899, %int0_54265 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56900, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54266 = torch.constant.int 2
    %56901 = torch.aten.unsqueeze %56900, %int2_54266 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56901, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54267 = torch.constant.int 3
    %int0_54268 = torch.constant.int 0
    %int9223372036854775807_54269 = torch.constant.int 9223372036854775807
    %int1_54270 = torch.constant.int 1
    %56902 = torch.aten.slice.Tensor %56901, %int3_54267, %int0_54268, %int9223372036854775807_54269, %int1_54270 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56902, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56903 = torch_c.to_builtin_tensor %56656 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54271 = arith.constant 1 : index
    %dim_54272 = tensor.dim %56903, %c1_54271 : tensor<4x?x1x128xf16>
    %56904 = flow.tensor.bitcast %56903 : tensor<4x?x1x128xf16>{%dim_54272} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54272}
    %56905 = torch_c.from_builtin_tensor %56904 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56906 = torch.aten.mul.Tensor %56905, %56902 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56907 = torch_c.to_builtin_tensor %56906 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54273 = arith.constant 1 : index
    %dim_54274 = tensor.dim %56907, %c1_54273 : tensor<4x?x1x64xcomplex<f32>>
    %56908 = flow.tensor.bitcast %56907 : tensor<4x?x1x64xcomplex<f32>>{%dim_54274} -> tensor<4x?x1x128xf32>{%dim_54274}
    %56909 = torch_c.from_builtin_tensor %56908 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54275 = torch.constant.int 5
    %56910 = torch.prims.convert_element_type %56909, %int5_54275 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56910, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54276 = torch.constant.int 1
    %56911 = torch.aten.size.int %56542, %int1_54276 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54277 = torch.constant.int 0
    %56912 = torch.aten.add.int %int0_54277, %56911 : !torch.int, !torch.int -> !torch.int
    %int0_54278 = torch.constant.int 0
    %int0_54279 = torch.constant.int 0
    %int1_54280 = torch.constant.int 1
    %56913 = torch.aten.slice.Tensor %56865, %int0_54278, %int0_54279, %56912, %int1_54280 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56913, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54281 = torch.constant.int 1
    %int0_54282 = torch.constant.int 0
    %int9223372036854775807_54283 = torch.constant.int 9223372036854775807
    %int1_54284 = torch.constant.int 1
    %56914 = torch.aten.slice.Tensor %56913, %int1_54281, %int0_54282, %int9223372036854775807_54283, %int1_54284 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56914, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54285 = torch.constant.int 0
    %56915 = torch.aten.unsqueeze %56914, %int0_54285 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56915, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54286 = torch.constant.int 2
    %56916 = torch.aten.unsqueeze %56915, %int2_54286 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56916, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54287 = torch.constant.int 3
    %int0_54288 = torch.constant.int 0
    %int9223372036854775807_54289 = torch.constant.int 9223372036854775807
    %int1_54290 = torch.constant.int 1
    %56917 = torch.aten.slice.Tensor %56916, %int3_54287, %int0_54288, %int9223372036854775807_54289, %int1_54290 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56917, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56918 = torch_c.to_builtin_tensor %56658 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54291 = arith.constant 1 : index
    %dim_54292 = tensor.dim %56918, %c1_54291 : tensor<4x?x1x128xf16>
    %56919 = flow.tensor.bitcast %56918 : tensor<4x?x1x128xf16>{%dim_54292} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54292}
    %56920 = torch_c.from_builtin_tensor %56919 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56920, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56921 = torch.aten.mul.Tensor %56920, %56917 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56922 = torch_c.to_builtin_tensor %56921 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54293 = arith.constant 1 : index
    %dim_54294 = tensor.dim %56922, %c1_54293 : tensor<4x?x1x64xcomplex<f32>>
    %56923 = flow.tensor.bitcast %56922 : tensor<4x?x1x64xcomplex<f32>>{%dim_54294} -> tensor<4x?x1x128xf32>{%dim_54294}
    %56924 = torch_c.from_builtin_tensor %56923 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54295 = torch.constant.int 5
    %56925 = torch.prims.convert_element_type %56924, %int5_54295 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54296 = torch.constant.int 1
    %56926 = torch.aten.size.int %56548, %int1_54296 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54297 = torch.constant.int 0
    %56927 = torch.aten.add.int %int0_54297, %56926 : !torch.int, !torch.int -> !torch.int
    %int0_54298 = torch.constant.int 0
    %int0_54299 = torch.constant.int 0
    %int1_54300 = torch.constant.int 1
    %56928 = torch.aten.slice.Tensor %56868, %int0_54298, %int0_54299, %56927, %int1_54300 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56928, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54301 = torch.constant.int 1
    %int0_54302 = torch.constant.int 0
    %int9223372036854775807_54303 = torch.constant.int 9223372036854775807
    %int1_54304 = torch.constant.int 1
    %56929 = torch.aten.slice.Tensor %56928, %int1_54301, %int0_54302, %int9223372036854775807_54303, %int1_54304 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56929, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54305 = torch.constant.int 0
    %56930 = torch.aten.unsqueeze %56929, %int0_54305 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56930, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54306 = torch.constant.int 2
    %56931 = torch.aten.unsqueeze %56930, %int2_54306 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56931, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54307 = torch.constant.int 3
    %int0_54308 = torch.constant.int 0
    %int9223372036854775807_54309 = torch.constant.int 9223372036854775807
    %int1_54310 = torch.constant.int 1
    %56932 = torch.aten.slice.Tensor %56931, %int3_54307, %int0_54308, %int9223372036854775807_54309, %int1_54310 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56932, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56933 = torch_c.to_builtin_tensor %56660 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54311 = arith.constant 1 : index
    %dim_54312 = tensor.dim %56933, %c1_54311 : tensor<4x?x1x128xf16>
    %56934 = flow.tensor.bitcast %56933 : tensor<4x?x1x128xf16>{%dim_54312} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54312}
    %56935 = torch_c.from_builtin_tensor %56934 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56935, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56936 = torch.aten.mul.Tensor %56935, %56932 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56937 = torch_c.to_builtin_tensor %56936 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54313 = arith.constant 1 : index
    %dim_54314 = tensor.dim %56937, %c1_54313 : tensor<4x?x1x64xcomplex<f32>>
    %56938 = flow.tensor.bitcast %56937 : tensor<4x?x1x64xcomplex<f32>>{%dim_54314} -> tensor<4x?x1x128xf32>{%dim_54314}
    %56939 = torch_c.from_builtin_tensor %56938 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54315 = torch.constant.int 5
    %56940 = torch.prims.convert_element_type %56939, %int5_54315 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54316 = torch.constant.int 1
    %56941 = torch.aten.size.int %56554, %int1_54316 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54317 = torch.constant.int 0
    %56942 = torch.aten.add.int %int0_54317, %56941 : !torch.int, !torch.int -> !torch.int
    %int0_54318 = torch.constant.int 0
    %int0_54319 = torch.constant.int 0
    %int1_54320 = torch.constant.int 1
    %56943 = torch.aten.slice.Tensor %56871, %int0_54318, %int0_54319, %56942, %int1_54320 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56943, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54321 = torch.constant.int 1
    %int0_54322 = torch.constant.int 0
    %int9223372036854775807_54323 = torch.constant.int 9223372036854775807
    %int1_54324 = torch.constant.int 1
    %56944 = torch.aten.slice.Tensor %56943, %int1_54321, %int0_54322, %int9223372036854775807_54323, %int1_54324 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56944, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54325 = torch.constant.int 0
    %56945 = torch.aten.unsqueeze %56944, %int0_54325 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56945, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54326 = torch.constant.int 2
    %56946 = torch.aten.unsqueeze %56945, %int2_54326 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56946, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54327 = torch.constant.int 3
    %int0_54328 = torch.constant.int 0
    %int9223372036854775807_54329 = torch.constant.int 9223372036854775807
    %int1_54330 = torch.constant.int 1
    %56947 = torch.aten.slice.Tensor %56946, %int3_54327, %int0_54328, %int9223372036854775807_54329, %int1_54330 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56947, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56948 = torch_c.to_builtin_tensor %56662 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54331 = arith.constant 1 : index
    %dim_54332 = tensor.dim %56948, %c1_54331 : tensor<4x?x1x128xf16>
    %56949 = flow.tensor.bitcast %56948 : tensor<4x?x1x128xf16>{%dim_54332} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54332}
    %56950 = torch_c.from_builtin_tensor %56949 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56951 = torch.aten.mul.Tensor %56950, %56947 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56952 = torch_c.to_builtin_tensor %56951 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54333 = arith.constant 1 : index
    %dim_54334 = tensor.dim %56952, %c1_54333 : tensor<4x?x1x64xcomplex<f32>>
    %56953 = flow.tensor.bitcast %56952 : tensor<4x?x1x64xcomplex<f32>>{%dim_54334} -> tensor<4x?x1x128xf32>{%dim_54334}
    %56954 = torch_c.from_builtin_tensor %56953 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54335 = torch.constant.int 5
    %56955 = torch.prims.convert_element_type %56954, %int5_54335 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54336 = torch.constant.int 1
    %56956 = torch.aten.size.int %56560, %int1_54336 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54337 = torch.constant.int 0
    %56957 = torch.aten.add.int %int0_54337, %56956 : !torch.int, !torch.int -> !torch.int
    %int0_54338 = torch.constant.int 0
    %int0_54339 = torch.constant.int 0
    %int1_54340 = torch.constant.int 1
    %56958 = torch.aten.slice.Tensor %56874, %int0_54338, %int0_54339, %56957, %int1_54340 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56958, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54341 = torch.constant.int 1
    %int0_54342 = torch.constant.int 0
    %int9223372036854775807_54343 = torch.constant.int 9223372036854775807
    %int1_54344 = torch.constant.int 1
    %56959 = torch.aten.slice.Tensor %56958, %int1_54341, %int0_54342, %int9223372036854775807_54343, %int1_54344 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56959, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54345 = torch.constant.int 0
    %56960 = torch.aten.unsqueeze %56959, %int0_54345 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56960, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54346 = torch.constant.int 2
    %56961 = torch.aten.unsqueeze %56960, %int2_54346 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56961, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54347 = torch.constant.int 3
    %int0_54348 = torch.constant.int 0
    %int9223372036854775807_54349 = torch.constant.int 9223372036854775807
    %int1_54350 = torch.constant.int 1
    %56962 = torch.aten.slice.Tensor %56961, %int3_54347, %int0_54348, %int9223372036854775807_54349, %int1_54350 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56962, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56963 = torch_c.to_builtin_tensor %56664 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54351 = arith.constant 1 : index
    %dim_54352 = tensor.dim %56963, %c1_54351 : tensor<4x?x1x128xf16>
    %56964 = flow.tensor.bitcast %56963 : tensor<4x?x1x128xf16>{%dim_54352} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54352}
    %56965 = torch_c.from_builtin_tensor %56964 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56966 = torch.aten.mul.Tensor %56965, %56962 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56967 = torch_c.to_builtin_tensor %56966 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54353 = arith.constant 1 : index
    %dim_54354 = tensor.dim %56967, %c1_54353 : tensor<4x?x1x64xcomplex<f32>>
    %56968 = flow.tensor.bitcast %56967 : tensor<4x?x1x64xcomplex<f32>>{%dim_54354} -> tensor<4x?x1x128xf32>{%dim_54354}
    %56969 = torch_c.from_builtin_tensor %56968 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54355 = torch.constant.int 5
    %56970 = torch.prims.convert_element_type %56969, %int5_54355 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54356 = torch.constant.int 1
    %56971 = torch.aten.size.int %56566, %int1_54356 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54357 = torch.constant.int 0
    %56972 = torch.aten.add.int %int0_54357, %56971 : !torch.int, !torch.int -> !torch.int
    %int0_54358 = torch.constant.int 0
    %int0_54359 = torch.constant.int 0
    %int1_54360 = torch.constant.int 1
    %56973 = torch.aten.slice.Tensor %56877, %int0_54358, %int0_54359, %56972, %int1_54360 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56973, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54361 = torch.constant.int 1
    %int0_54362 = torch.constant.int 0
    %int9223372036854775807_54363 = torch.constant.int 9223372036854775807
    %int1_54364 = torch.constant.int 1
    %56974 = torch.aten.slice.Tensor %56973, %int1_54361, %int0_54362, %int9223372036854775807_54363, %int1_54364 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56974, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54365 = torch.constant.int 0
    %56975 = torch.aten.unsqueeze %56974, %int0_54365 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56975, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54366 = torch.constant.int 2
    %56976 = torch.aten.unsqueeze %56975, %int2_54366 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56976, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54367 = torch.constant.int 3
    %int0_54368 = torch.constant.int 0
    %int9223372036854775807_54369 = torch.constant.int 9223372036854775807
    %int1_54370 = torch.constant.int 1
    %56977 = torch.aten.slice.Tensor %56976, %int3_54367, %int0_54368, %int9223372036854775807_54369, %int1_54370 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56977, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56978 = torch_c.to_builtin_tensor %56666 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54371 = arith.constant 1 : index
    %dim_54372 = tensor.dim %56978, %c1_54371 : tensor<4x?x1x128xf16>
    %56979 = flow.tensor.bitcast %56978 : tensor<4x?x1x128xf16>{%dim_54372} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54372}
    %56980 = torch_c.from_builtin_tensor %56979 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56981 = torch.aten.mul.Tensor %56980, %56977 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56982 = torch_c.to_builtin_tensor %56981 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54373 = arith.constant 1 : index
    %dim_54374 = tensor.dim %56982, %c1_54373 : tensor<4x?x1x64xcomplex<f32>>
    %56983 = flow.tensor.bitcast %56982 : tensor<4x?x1x64xcomplex<f32>>{%dim_54374} -> tensor<4x?x1x128xf32>{%dim_54374}
    %56984 = torch_c.from_builtin_tensor %56983 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56984, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54375 = torch.constant.int 5
    %56985 = torch.prims.convert_element_type %56984, %int5_54375 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %56985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_54376 = torch.constant.int 1
    %56986 = torch.aten.size.int %56572, %int1_54376 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_54377 = torch.constant.int 0
    %56987 = torch.aten.add.int %int0_54377, %56986 : !torch.int, !torch.int -> !torch.int
    %int0_54378 = torch.constant.int 0
    %int0_54379 = torch.constant.int 0
    %int1_54380 = torch.constant.int 1
    %56988 = torch.aten.slice.Tensor %56880, %int0_54378, %int0_54379, %56987, %int1_54380 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56988, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_54381 = torch.constant.int 1
    %int0_54382 = torch.constant.int 0
    %int9223372036854775807_54383 = torch.constant.int 9223372036854775807
    %int1_54384 = torch.constant.int 1
    %56989 = torch.aten.slice.Tensor %56988, %int1_54381, %int0_54382, %int9223372036854775807_54383, %int1_54384 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %56989, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_54385 = torch.constant.int 0
    %56990 = torch.aten.unsqueeze %56989, %int0_54385 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %56990, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_54386 = torch.constant.int 2
    %56991 = torch.aten.unsqueeze %56990, %int2_54386 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56991, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_54387 = torch.constant.int 3
    %int0_54388 = torch.constant.int 0
    %int9223372036854775807_54389 = torch.constant.int 9223372036854775807
    %int1_54390 = torch.constant.int 1
    %56992 = torch.aten.slice.Tensor %56991, %int3_54387, %int0_54388, %int9223372036854775807_54389, %int1_54390 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56992, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %56993 = torch_c.to_builtin_tensor %56668 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_54391 = arith.constant 1 : index
    %dim_54392 = tensor.dim %56993, %c1_54391 : tensor<4x?x1x128xf16>
    %56994 = flow.tensor.bitcast %56993 : tensor<4x?x1x128xf16>{%dim_54392} -> tensor<4x?x1x64xcomplex<f16>>{%dim_54392}
    %56995 = torch_c.from_builtin_tensor %56994 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %56995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %56996 = torch.aten.mul.Tensor %56995, %56992 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %56996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %56997 = torch_c.to_builtin_tensor %56996 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_54393 = arith.constant 1 : index
    %dim_54394 = tensor.dim %56997, %c1_54393 : tensor<4x?x1x64xcomplex<f32>>
    %56998 = flow.tensor.bitcast %56997 : tensor<4x?x1x64xcomplex<f32>>{%dim_54394} -> tensor<4x?x1x128xf32>{%dim_54394}
    %56999 = torch_c.from_builtin_tensor %56998 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %56999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_54395 = torch.constant.int 5
    %57000 = torch.prims.convert_element_type %56999, %int5_54395 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %57000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_54396 = torch.constant.int 64
    %57001 = torch.aten.mul.Scalar %2364, %int64_54396 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57001, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54397 = torch.constant.int 64
    %57002 = torch.aten.mul.Scalar %2367, %int64_54397 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57002, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54398 = torch.constant.int 64
    %57003 = torch.aten.mul.Scalar %2370, %int64_54398 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57003, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54399 = torch.constant.int 64
    %57004 = torch.aten.mul.Scalar %2373, %int64_54399 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57004, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54400 = torch.constant.int 64
    %57005 = torch.aten.mul.Scalar %2376, %int64_54400 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57005, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54401 = torch.constant.int 64
    %57006 = torch.aten.mul.Scalar %2379, %int64_54401 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57006, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54402 = torch.constant.int 64
    %57007 = torch.aten.mul.Scalar %2382, %int64_54402 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57007, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_54403 = torch.constant.int 64
    %57008 = torch.aten.mul.Scalar %2385, %int64_54403 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57008, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58 = torch.constant.int 58
    %int1_54404 = torch.constant.int 1
    %57009 = torch.aten.add.Scalar %57001, %int58, %int1_54404 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57009, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54405 = torch.constant.int 58
    %int1_54406 = torch.constant.int 1
    %57010 = torch.aten.add.Scalar %57002, %int58_54405, %int1_54406 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57010, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54407 = torch.constant.int 58
    %int1_54408 = torch.constant.int 1
    %57011 = torch.aten.add.Scalar %57003, %int58_54407, %int1_54408 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57011, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54409 = torch.constant.int 58
    %int1_54410 = torch.constant.int 1
    %57012 = torch.aten.add.Scalar %57004, %int58_54409, %int1_54410 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57012, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54411 = torch.constant.int 58
    %int1_54412 = torch.constant.int 1
    %57013 = torch.aten.add.Scalar %57005, %int58_54411, %int1_54412 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57013, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54413 = torch.constant.int 58
    %int1_54414 = torch.constant.int 1
    %57014 = torch.aten.add.Scalar %57006, %int58_54413, %int1_54414 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57014, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54415 = torch.constant.int 58
    %int1_54416 = torch.constant.int 1
    %57015 = torch.aten.add.Scalar %57007, %int58_54415, %int1_54416 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57015, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int58_54417 = torch.constant.int 58
    %int1_54418 = torch.constant.int 1
    %57016 = torch.aten.add.Scalar %57008, %int58_54417, %int1_54418 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57016, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_54419 = torch.constant.int 4
    %int16_54420 = torch.constant.int 16
    %int1_54421 = torch.constant.int 1
    %int128_54422 = torch.constant.int 128
    %57017 = torch.prim.ListConstruct %int4_54419, %3095, %int16_54420, %int1_54421, %int128_54422 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57018 = torch.aten.view %56895, %57017 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57018, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54423 = torch.constant.int 4
    %int16_54424 = torch.constant.int 16
    %int1_54425 = torch.constant.int 1
    %int128_54426 = torch.constant.int 128
    %57019 = torch.prim.ListConstruct %int4_54423, %3095, %int16_54424, %int1_54425, %int128_54426 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57020 = torch.aten.view %56910, %57019 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57020, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54427 = torch.constant.int 4
    %int16_54428 = torch.constant.int 16
    %int1_54429 = torch.constant.int 1
    %int128_54430 = torch.constant.int 128
    %57021 = torch.prim.ListConstruct %int4_54427, %3095, %int16_54428, %int1_54429, %int128_54430 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57022 = torch.aten.view %56925, %57021 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57022, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54431 = torch.constant.int 4
    %int16_54432 = torch.constant.int 16
    %int1_54433 = torch.constant.int 1
    %int128_54434 = torch.constant.int 128
    %57023 = torch.prim.ListConstruct %int4_54431, %3095, %int16_54432, %int1_54433, %int128_54434 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57024 = torch.aten.view %56940, %57023 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57024, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54435 = torch.constant.int 4
    %int16_54436 = torch.constant.int 16
    %int1_54437 = torch.constant.int 1
    %int128_54438 = torch.constant.int 128
    %57025 = torch.prim.ListConstruct %int4_54435, %3095, %int16_54436, %int1_54437, %int128_54438 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57026 = torch.aten.view %56955, %57025 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57026, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54439 = torch.constant.int 4
    %int16_54440 = torch.constant.int 16
    %int1_54441 = torch.constant.int 1
    %int128_54442 = torch.constant.int 128
    %57027 = torch.prim.ListConstruct %int4_54439, %3095, %int16_54440, %int1_54441, %int128_54442 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57028 = torch.aten.view %56970, %57027 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57028, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54443 = torch.constant.int 4
    %int16_54444 = torch.constant.int 16
    %int1_54445 = torch.constant.int 1
    %int128_54446 = torch.constant.int 128
    %57029 = torch.prim.ListConstruct %int4_54443, %3095, %int16_54444, %int1_54445, %int128_54446 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57030 = torch.aten.view %56985, %57029 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57030, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54447 = torch.constant.int 4
    %int16_54448 = torch.constant.int 16
    %int1_54449 = torch.constant.int 1
    %int128_54450 = torch.constant.int 128
    %57031 = torch.prim.ListConstruct %int4_54447, %3095, %int16_54448, %int1_54449, %int128_54450 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57032 = torch.aten.view %57000, %57031 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57032, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54451 = torch.constant.int 4
    %57033 = torch.aten.mul.int %int4_54451, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54452 = torch.constant.int 16
    %int1_54453 = torch.constant.int 1
    %int128_54454 = torch.constant.int 128
    %57034 = torch.prim.ListConstruct %57033, %int16_54452, %int1_54453, %int128_54454 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57035 = torch.aten.view %57018, %57034 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57035, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54455 = torch.constant.int 4
    %57036 = torch.aten.mul.int %int4_54455, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54456 = torch.constant.int 16
    %int1_54457 = torch.constant.int 1
    %int128_54458 = torch.constant.int 128
    %57037 = torch.prim.ListConstruct %57036, %int16_54456, %int1_54457, %int128_54458 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57038 = torch.aten.view %57020, %57037 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57038, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54459 = torch.constant.int 4
    %57039 = torch.aten.mul.int %int4_54459, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54460 = torch.constant.int 16
    %int1_54461 = torch.constant.int 1
    %int128_54462 = torch.constant.int 128
    %57040 = torch.prim.ListConstruct %57039, %int16_54460, %int1_54461, %int128_54462 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57041 = torch.aten.view %57022, %57040 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57041, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54463 = torch.constant.int 4
    %57042 = torch.aten.mul.int %int4_54463, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54464 = torch.constant.int 16
    %int1_54465 = torch.constant.int 1
    %int128_54466 = torch.constant.int 128
    %57043 = torch.prim.ListConstruct %57042, %int16_54464, %int1_54465, %int128_54466 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57044 = torch.aten.view %57024, %57043 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57044, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54467 = torch.constant.int 4
    %57045 = torch.aten.mul.int %int4_54467, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54468 = torch.constant.int 16
    %int1_54469 = torch.constant.int 1
    %int128_54470 = torch.constant.int 128
    %57046 = torch.prim.ListConstruct %57045, %int16_54468, %int1_54469, %int128_54470 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57047 = torch.aten.view %57026, %57046 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57047, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54471 = torch.constant.int 4
    %57048 = torch.aten.mul.int %int4_54471, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54472 = torch.constant.int 16
    %int1_54473 = torch.constant.int 1
    %int128_54474 = torch.constant.int 128
    %57049 = torch.prim.ListConstruct %57048, %int16_54472, %int1_54473, %int128_54474 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57050 = torch.aten.view %57028, %57049 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57050, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54475 = torch.constant.int 4
    %57051 = torch.aten.mul.int %int4_54475, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54476 = torch.constant.int 16
    %int1_54477 = torch.constant.int 1
    %int128_54478 = torch.constant.int 128
    %57052 = torch.prim.ListConstruct %57051, %int16_54476, %int1_54477, %int128_54478 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57053 = torch.aten.view %57030, %57052 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57053, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54479 = torch.constant.int 4
    %57054 = torch.aten.mul.int %int4_54479, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54480 = torch.constant.int 16
    %int1_54481 = torch.constant.int 1
    %int128_54482 = torch.constant.int 128
    %57055 = torch.prim.ListConstruct %57054, %int16_54480, %int1_54481, %int128_54482 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57056 = torch.aten.view %57032, %57055 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57056, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54483 = torch.constant.int 4
    %57057 = torch.aten.mul.int %int4_54483, %3095 : !torch.int, !torch.int -> !torch.int
    %57058 = torch.prim.ListConstruct %57057 : (!torch.int) -> !torch.list<int>
    %57059 = torch.aten.view %57009, %57058 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57059, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54484 = torch.constant.int 4
    %57060 = torch.aten.mul.int %int4_54484, %3095 : !torch.int, !torch.int -> !torch.int
    %57061 = torch.prim.ListConstruct %57060 : (!torch.int) -> !torch.list<int>
    %57062 = torch.aten.view %57010, %57061 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57062, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54485 = torch.constant.int 4
    %57063 = torch.aten.mul.int %int4_54485, %3095 : !torch.int, !torch.int -> !torch.int
    %57064 = torch.prim.ListConstruct %57063 : (!torch.int) -> !torch.list<int>
    %57065 = torch.aten.view %57011, %57064 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57065, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54486 = torch.constant.int 4
    %57066 = torch.aten.mul.int %int4_54486, %3095 : !torch.int, !torch.int -> !torch.int
    %57067 = torch.prim.ListConstruct %57066 : (!torch.int) -> !torch.list<int>
    %57068 = torch.aten.view %57012, %57067 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57068, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54487 = torch.constant.int 4
    %57069 = torch.aten.mul.int %int4_54487, %3095 : !torch.int, !torch.int -> !torch.int
    %57070 = torch.prim.ListConstruct %57069 : (!torch.int) -> !torch.list<int>
    %57071 = torch.aten.view %57013, %57070 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57071, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54488 = torch.constant.int 4
    %57072 = torch.aten.mul.int %int4_54488, %3095 : !torch.int, !torch.int -> !torch.int
    %57073 = torch.prim.ListConstruct %57072 : (!torch.int) -> !torch.list<int>
    %57074 = torch.aten.view %57014, %57073 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57074, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54489 = torch.constant.int 4
    %57075 = torch.aten.mul.int %int4_54489, %3095 : !torch.int, !torch.int -> !torch.int
    %57076 = torch.prim.ListConstruct %57075 : (!torch.int) -> !torch.list<int>
    %57077 = torch.aten.view %57015, %57076 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57077, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54490 = torch.constant.int 4
    %57078 = torch.aten.mul.int %int4_54490, %3095 : !torch.int, !torch.int -> !torch.int
    %57079 = torch.prim.ListConstruct %57078 : (!torch.int) -> !torch.list<int>
    %57080 = torch.aten.view %57016, %57079 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57080, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54491 = torch.constant.int 4
    %int16_54492 = torch.constant.int 16
    %int1_54493 = torch.constant.int 1
    %int128_54494 = torch.constant.int 128
    %57081 = torch.prim.ListConstruct %int4_54491, %3095, %int16_54492, %int1_54493, %int128_54494 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57082 = torch.aten.view %56670, %57081 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57082, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54495 = torch.constant.int 4
    %int16_54496 = torch.constant.int 16
    %int1_54497 = torch.constant.int 1
    %int128_54498 = torch.constant.int 128
    %57083 = torch.prim.ListConstruct %int4_54495, %3095, %int16_54496, %int1_54497, %int128_54498 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57084 = torch.aten.view %56672, %57083 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57084, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54499 = torch.constant.int 4
    %int16_54500 = torch.constant.int 16
    %int1_54501 = torch.constant.int 1
    %int128_54502 = torch.constant.int 128
    %57085 = torch.prim.ListConstruct %int4_54499, %3095, %int16_54500, %int1_54501, %int128_54502 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57086 = torch.aten.view %56674, %57085 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57086, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54503 = torch.constant.int 4
    %int16_54504 = torch.constant.int 16
    %int1_54505 = torch.constant.int 1
    %int128_54506 = torch.constant.int 128
    %57087 = torch.prim.ListConstruct %int4_54503, %3095, %int16_54504, %int1_54505, %int128_54506 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57088 = torch.aten.view %56676, %57087 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57088, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54507 = torch.constant.int 4
    %int16_54508 = torch.constant.int 16
    %int1_54509 = torch.constant.int 1
    %int128_54510 = torch.constant.int 128
    %57089 = torch.prim.ListConstruct %int4_54507, %3095, %int16_54508, %int1_54509, %int128_54510 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57090 = torch.aten.view %56678, %57089 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57090, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54511 = torch.constant.int 4
    %int16_54512 = torch.constant.int 16
    %int1_54513 = torch.constant.int 1
    %int128_54514 = torch.constant.int 128
    %57091 = torch.prim.ListConstruct %int4_54511, %3095, %int16_54512, %int1_54513, %int128_54514 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57092 = torch.aten.view %56680, %57091 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57092, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54515 = torch.constant.int 4
    %int16_54516 = torch.constant.int 16
    %int1_54517 = torch.constant.int 1
    %int128_54518 = torch.constant.int 128
    %57093 = torch.prim.ListConstruct %int4_54515, %3095, %int16_54516, %int1_54517, %int128_54518 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57094 = torch.aten.view %56682, %57093 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57094, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54519 = torch.constant.int 4
    %int16_54520 = torch.constant.int 16
    %int1_54521 = torch.constant.int 1
    %int128_54522 = torch.constant.int 128
    %57095 = torch.prim.ListConstruct %int4_54519, %3095, %int16_54520, %int1_54521, %int128_54522 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57096 = torch.aten.view %56684, %57095 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %57096, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_54523 = torch.constant.int 4
    %57097 = torch.aten.mul.int %int4_54523, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54524 = torch.constant.int 16
    %int1_54525 = torch.constant.int 1
    %int128_54526 = torch.constant.int 128
    %57098 = torch.prim.ListConstruct %57097, %int16_54524, %int1_54525, %int128_54526 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57099 = torch.aten.view %57082, %57098 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57099, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54527 = torch.constant.int 4
    %57100 = torch.aten.mul.int %int4_54527, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54528 = torch.constant.int 16
    %int1_54529 = torch.constant.int 1
    %int128_54530 = torch.constant.int 128
    %57101 = torch.prim.ListConstruct %57100, %int16_54528, %int1_54529, %int128_54530 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57102 = torch.aten.view %57084, %57101 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57102, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54531 = torch.constant.int 4
    %57103 = torch.aten.mul.int %int4_54531, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54532 = torch.constant.int 16
    %int1_54533 = torch.constant.int 1
    %int128_54534 = torch.constant.int 128
    %57104 = torch.prim.ListConstruct %57103, %int16_54532, %int1_54533, %int128_54534 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57105 = torch.aten.view %57086, %57104 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57105, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54535 = torch.constant.int 4
    %57106 = torch.aten.mul.int %int4_54535, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54536 = torch.constant.int 16
    %int1_54537 = torch.constant.int 1
    %int128_54538 = torch.constant.int 128
    %57107 = torch.prim.ListConstruct %57106, %int16_54536, %int1_54537, %int128_54538 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57108 = torch.aten.view %57088, %57107 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57108, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54539 = torch.constant.int 4
    %57109 = torch.aten.mul.int %int4_54539, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54540 = torch.constant.int 16
    %int1_54541 = torch.constant.int 1
    %int128_54542 = torch.constant.int 128
    %57110 = torch.prim.ListConstruct %57109, %int16_54540, %int1_54541, %int128_54542 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57111 = torch.aten.view %57090, %57110 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57111, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54543 = torch.constant.int 4
    %57112 = torch.aten.mul.int %int4_54543, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54544 = torch.constant.int 16
    %int1_54545 = torch.constant.int 1
    %int128_54546 = torch.constant.int 128
    %57113 = torch.prim.ListConstruct %57112, %int16_54544, %int1_54545, %int128_54546 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57114 = torch.aten.view %57092, %57113 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57114, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54547 = torch.constant.int 4
    %57115 = torch.aten.mul.int %int4_54547, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54548 = torch.constant.int 16
    %int1_54549 = torch.constant.int 1
    %int128_54550 = torch.constant.int 128
    %57116 = torch.prim.ListConstruct %57115, %int16_54548, %int1_54549, %int128_54550 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57117 = torch.aten.view %57094, %57116 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57117, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_54551 = torch.constant.int 4
    %57118 = torch.aten.mul.int %int4_54551, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_54552 = torch.constant.int 16
    %int1_54553 = torch.constant.int 1
    %int128_54554 = torch.constant.int 128
    %57119 = torch.prim.ListConstruct %57118, %int16_54552, %int1_54553, %int128_54554 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57120 = torch.aten.view %57096, %57119 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57120, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_54555 = torch.constant.int 1
    %int1_54556 = torch.constant.int 1
    %57121 = torch.aten.add.Scalar %57009, %int1_54555, %int1_54556 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57121, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54557 = torch.constant.int 1
    %int1_54558 = torch.constant.int 1
    %57122 = torch.aten.add.Scalar %57010, %int1_54557, %int1_54558 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57122, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54559 = torch.constant.int 1
    %int1_54560 = torch.constant.int 1
    %57123 = torch.aten.add.Scalar %57011, %int1_54559, %int1_54560 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57123, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54561 = torch.constant.int 1
    %int1_54562 = torch.constant.int 1
    %57124 = torch.aten.add.Scalar %57012, %int1_54561, %int1_54562 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57124, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54563 = torch.constant.int 1
    %int1_54564 = torch.constant.int 1
    %57125 = torch.aten.add.Scalar %57013, %int1_54563, %int1_54564 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57125, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54565 = torch.constant.int 1
    %int1_54566 = torch.constant.int 1
    %57126 = torch.aten.add.Scalar %57014, %int1_54565, %int1_54566 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57126, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54567 = torch.constant.int 1
    %int1_54568 = torch.constant.int 1
    %57127 = torch.aten.add.Scalar %57015, %int1_54567, %int1_54568 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57127, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_54569 = torch.constant.int 1
    %int1_54570 = torch.constant.int 1
    %57128 = torch.aten.add.Scalar %57016, %int1_54569, %int1_54570 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %57128, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_54571 = torch.constant.int 4
    %57129 = torch.aten.mul.int %int4_54571, %3095 : !torch.int, !torch.int -> !torch.int
    %57130 = torch.prim.ListConstruct %57129 : (!torch.int) -> !torch.list<int>
    %57131 = torch.aten.view %57121, %57130 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57131, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54572 = torch.constant.int 4
    %57132 = torch.aten.mul.int %int4_54572, %3095 : !torch.int, !torch.int -> !torch.int
    %57133 = torch.prim.ListConstruct %57132 : (!torch.int) -> !torch.list<int>
    %57134 = torch.aten.view %57122, %57133 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57134, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54573 = torch.constant.int 4
    %57135 = torch.aten.mul.int %int4_54573, %3095 : !torch.int, !torch.int -> !torch.int
    %57136 = torch.prim.ListConstruct %57135 : (!torch.int) -> !torch.list<int>
    %57137 = torch.aten.view %57123, %57136 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57137, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54574 = torch.constant.int 4
    %57138 = torch.aten.mul.int %int4_54574, %3095 : !torch.int, !torch.int -> !torch.int
    %57139 = torch.prim.ListConstruct %57138 : (!torch.int) -> !torch.list<int>
    %57140 = torch.aten.view %57124, %57139 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57140, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54575 = torch.constant.int 4
    %57141 = torch.aten.mul.int %int4_54575, %3095 : !torch.int, !torch.int -> !torch.int
    %57142 = torch.prim.ListConstruct %57141 : (!torch.int) -> !torch.list<int>
    %57143 = torch.aten.view %57125, %57142 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57143, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54576 = torch.constant.int 4
    %57144 = torch.aten.mul.int %int4_54576, %3095 : !torch.int, !torch.int -> !torch.int
    %57145 = torch.prim.ListConstruct %57144 : (!torch.int) -> !torch.list<int>
    %57146 = torch.aten.view %57126, %57145 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57146, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54577 = torch.constant.int 4
    %57147 = torch.aten.mul.int %int4_54577, %3095 : !torch.int, !torch.int -> !torch.int
    %57148 = torch.prim.ListConstruct %57147 : (!torch.int) -> !torch.list<int>
    %57149 = torch.aten.view %57127, %57148 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57149, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_54578 = torch.constant.int 4
    %57150 = torch.aten.mul.int %int4_54578, %3095 : !torch.int, !torch.int -> !torch.int
    %57151 = torch.prim.ListConstruct %57150 : (!torch.int) -> !torch.list<int>
    %57152 = torch.aten.view %57128, %57151 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57152, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %57153 = torch.prim.ListConstruct %57059, %57131 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54579 = torch.constant.int 0
    %57154 = torch.aten.cat %57153, %int0_54579 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57154, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57155 = torch.prim.ListConstruct %57062, %57134 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54580 = torch.constant.int 0
    %57156 = torch.aten.cat %57155, %int0_54580 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57156, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57157 = torch.prim.ListConstruct %57065, %57137 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54581 = torch.constant.int 0
    %57158 = torch.aten.cat %57157, %int0_54581 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57158, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57159 = torch.prim.ListConstruct %57068, %57140 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54582 = torch.constant.int 0
    %57160 = torch.aten.cat %57159, %int0_54582 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57160, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57161 = torch.prim.ListConstruct %57071, %57143 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54583 = torch.constant.int 0
    %57162 = torch.aten.cat %57161, %int0_54583 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57162, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57163 = torch.prim.ListConstruct %57074, %57146 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54584 = torch.constant.int 0
    %57164 = torch.aten.cat %57163, %int0_54584 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57164, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57165 = torch.prim.ListConstruct %57077, %57149 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54585 = torch.constant.int 0
    %57166 = torch.aten.cat %57165, %int0_54585 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57166, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57167 = torch.prim.ListConstruct %57080, %57152 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_54586 = torch.constant.int 0
    %57168 = torch.aten.cat %57167, %int0_54586 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %57168, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %57169 = torch.prim.ListConstruct %57035, %57099 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54587 = torch.constant.int 0
    %57170 = torch.aten.cat %57169, %int0_54587 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57170, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57171 = torch.prim.ListConstruct %57038, %57102 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54588 = torch.constant.int 0
    %57172 = torch.aten.cat %57171, %int0_54588 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57172, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57173 = torch.prim.ListConstruct %57041, %57105 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54589 = torch.constant.int 0
    %57174 = torch.aten.cat %57173, %int0_54589 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57174, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57175 = torch.prim.ListConstruct %57044, %57108 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54590 = torch.constant.int 0
    %57176 = torch.aten.cat %57175, %int0_54590 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57176, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57177 = torch.prim.ListConstruct %57047, %57111 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54591 = torch.constant.int 0
    %57178 = torch.aten.cat %57177, %int0_54591 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57178, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57179 = torch.prim.ListConstruct %57050, %57114 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54592 = torch.constant.int 0
    %57180 = torch.aten.cat %57179, %int0_54592 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57180, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57181 = torch.prim.ListConstruct %57053, %57117 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54593 = torch.constant.int 0
    %57182 = torch.aten.cat %57181, %int0_54593 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57182, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57183 = torch.prim.ListConstruct %57056, %57120 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_54594 = torch.constant.int 0
    %57184 = torch.aten.cat %57183, %int0_54594 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57184, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54595 = torch.constant.int 32
    %int2_54596 = torch.constant.int 2
    %int16_54597 = torch.constant.int 16
    %int1_54598 = torch.constant.int 1
    %int128_54599 = torch.constant.int 128
    %57185 = torch.prim.ListConstruct %3023, %int32_54595, %int2_54596, %int16_54597, %int1_54598, %int128_54599 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57186 = torch.aten.view %55335, %57185 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57186, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54600 = torch.constant.int 32
    %57187 = torch.aten.mul.int %3023, %int32_54600 : !torch.int, !torch.int -> !torch.int
    %int2_54601 = torch.constant.int 2
    %57188 = torch.aten.mul.int %57187, %int2_54601 : !torch.int, !torch.int -> !torch.int
    %int16_54602 = torch.constant.int 16
    %int1_54603 = torch.constant.int 1
    %int128_54604 = torch.constant.int 128
    %57189 = torch.prim.ListConstruct %57188, %int16_54602, %int1_54603, %int128_54604 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57190 = torch.aten.view %57186, %57189 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57190, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57191 = torch.prim.ListConstruct %57154 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54605 = torch.constant.bool false
    %57192 = torch.aten.index_put %57190, %57191, %57170, %false_54605 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57192, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54606 = torch.constant.int 32
    %int2_54607 = torch.constant.int 2
    %int16_54608 = torch.constant.int 16
    %int1_54609 = torch.constant.int 1
    %int128_54610 = torch.constant.int 128
    %57193 = torch.prim.ListConstruct %3023, %int32_54606, %int2_54607, %int16_54608, %int1_54609, %int128_54610 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57194 = torch.aten.view %57192, %57193 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57194, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54611 = torch.constant.int 131072
    %57195 = torch.prim.ListConstruct %3023, %int131072_54611 : (!torch.int, !torch.int) -> !torch.list<int>
    %57196 = torch.aten.view %57194, %57195 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57196, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54612 = torch.constant.int 32
    %int2_54613 = torch.constant.int 2
    %int16_54614 = torch.constant.int 16
    %int1_54615 = torch.constant.int 1
    %int128_54616 = torch.constant.int 128
    %57197 = torch.prim.ListConstruct %3026, %int32_54612, %int2_54613, %int16_54614, %int1_54615, %int128_54616 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57198 = torch.aten.view %55347, %57197 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57198, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54617 = torch.constant.int 32
    %57199 = torch.aten.mul.int %3026, %int32_54617 : !torch.int, !torch.int -> !torch.int
    %int2_54618 = torch.constant.int 2
    %57200 = torch.aten.mul.int %57199, %int2_54618 : !torch.int, !torch.int -> !torch.int
    %int16_54619 = torch.constant.int 16
    %int1_54620 = torch.constant.int 1
    %int128_54621 = torch.constant.int 128
    %57201 = torch.prim.ListConstruct %57200, %int16_54619, %int1_54620, %int128_54621 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57202 = torch.aten.view %57198, %57201 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57202, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57203 = torch.prim.ListConstruct %57156 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54622 = torch.constant.bool false
    %57204 = torch.aten.index_put %57202, %57203, %57172, %false_54622 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57204, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54623 = torch.constant.int 32
    %int2_54624 = torch.constant.int 2
    %int16_54625 = torch.constant.int 16
    %int1_54626 = torch.constant.int 1
    %int128_54627 = torch.constant.int 128
    %57205 = torch.prim.ListConstruct %3026, %int32_54623, %int2_54624, %int16_54625, %int1_54626, %int128_54627 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57206 = torch.aten.view %57204, %57205 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57206, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54628 = torch.constant.int 131072
    %57207 = torch.prim.ListConstruct %3026, %int131072_54628 : (!torch.int, !torch.int) -> !torch.list<int>
    %57208 = torch.aten.view %57206, %57207 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57208, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54629 = torch.constant.int 32
    %int2_54630 = torch.constant.int 2
    %int16_54631 = torch.constant.int 16
    %int1_54632 = torch.constant.int 1
    %int128_54633 = torch.constant.int 128
    %57209 = torch.prim.ListConstruct %3029, %int32_54629, %int2_54630, %int16_54631, %int1_54632, %int128_54633 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57210 = torch.aten.view %55359, %57209 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57210, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54634 = torch.constant.int 32
    %57211 = torch.aten.mul.int %3029, %int32_54634 : !torch.int, !torch.int -> !torch.int
    %int2_54635 = torch.constant.int 2
    %57212 = torch.aten.mul.int %57211, %int2_54635 : !torch.int, !torch.int -> !torch.int
    %int16_54636 = torch.constant.int 16
    %int1_54637 = torch.constant.int 1
    %int128_54638 = torch.constant.int 128
    %57213 = torch.prim.ListConstruct %57212, %int16_54636, %int1_54637, %int128_54638 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57214 = torch.aten.view %57210, %57213 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57214, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57215 = torch.prim.ListConstruct %57158 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54639 = torch.constant.bool false
    %57216 = torch.aten.index_put %57214, %57215, %57174, %false_54639 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57216, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54640 = torch.constant.int 32
    %int2_54641 = torch.constant.int 2
    %int16_54642 = torch.constant.int 16
    %int1_54643 = torch.constant.int 1
    %int128_54644 = torch.constant.int 128
    %57217 = torch.prim.ListConstruct %3029, %int32_54640, %int2_54641, %int16_54642, %int1_54643, %int128_54644 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57218 = torch.aten.view %57216, %57217 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57218, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54645 = torch.constant.int 131072
    %57219 = torch.prim.ListConstruct %3029, %int131072_54645 : (!torch.int, !torch.int) -> !torch.list<int>
    %57220 = torch.aten.view %57218, %57219 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57220, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54646 = torch.constant.int 32
    %int2_54647 = torch.constant.int 2
    %int16_54648 = torch.constant.int 16
    %int1_54649 = torch.constant.int 1
    %int128_54650 = torch.constant.int 128
    %57221 = torch.prim.ListConstruct %3032, %int32_54646, %int2_54647, %int16_54648, %int1_54649, %int128_54650 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57222 = torch.aten.view %55371, %57221 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57222, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54651 = torch.constant.int 32
    %57223 = torch.aten.mul.int %3032, %int32_54651 : !torch.int, !torch.int -> !torch.int
    %int2_54652 = torch.constant.int 2
    %57224 = torch.aten.mul.int %57223, %int2_54652 : !torch.int, !torch.int -> !torch.int
    %int16_54653 = torch.constant.int 16
    %int1_54654 = torch.constant.int 1
    %int128_54655 = torch.constant.int 128
    %57225 = torch.prim.ListConstruct %57224, %int16_54653, %int1_54654, %int128_54655 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57226 = torch.aten.view %57222, %57225 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57226, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57227 = torch.prim.ListConstruct %57160 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54656 = torch.constant.bool false
    %57228 = torch.aten.index_put %57226, %57227, %57176, %false_54656 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57228, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54657 = torch.constant.int 32
    %int2_54658 = torch.constant.int 2
    %int16_54659 = torch.constant.int 16
    %int1_54660 = torch.constant.int 1
    %int128_54661 = torch.constant.int 128
    %57229 = torch.prim.ListConstruct %3032, %int32_54657, %int2_54658, %int16_54659, %int1_54660, %int128_54661 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57230 = torch.aten.view %57228, %57229 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57230, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54662 = torch.constant.int 131072
    %57231 = torch.prim.ListConstruct %3032, %int131072_54662 : (!torch.int, !torch.int) -> !torch.list<int>
    %57232 = torch.aten.view %57230, %57231 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57232, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54663 = torch.constant.int 32
    %int2_54664 = torch.constant.int 2
    %int16_54665 = torch.constant.int 16
    %int1_54666 = torch.constant.int 1
    %int128_54667 = torch.constant.int 128
    %57233 = torch.prim.ListConstruct %3035, %int32_54663, %int2_54664, %int16_54665, %int1_54666, %int128_54667 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57234 = torch.aten.view %55383, %57233 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57234, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54668 = torch.constant.int 32
    %57235 = torch.aten.mul.int %3035, %int32_54668 : !torch.int, !torch.int -> !torch.int
    %int2_54669 = torch.constant.int 2
    %57236 = torch.aten.mul.int %57235, %int2_54669 : !torch.int, !torch.int -> !torch.int
    %int16_54670 = torch.constant.int 16
    %int1_54671 = torch.constant.int 1
    %int128_54672 = torch.constant.int 128
    %57237 = torch.prim.ListConstruct %57236, %int16_54670, %int1_54671, %int128_54672 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57238 = torch.aten.view %57234, %57237 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57238, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57239 = torch.prim.ListConstruct %57162 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54673 = torch.constant.bool false
    %57240 = torch.aten.index_put %57238, %57239, %57178, %false_54673 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57240, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54674 = torch.constant.int 32
    %int2_54675 = torch.constant.int 2
    %int16_54676 = torch.constant.int 16
    %int1_54677 = torch.constant.int 1
    %int128_54678 = torch.constant.int 128
    %57241 = torch.prim.ListConstruct %3035, %int32_54674, %int2_54675, %int16_54676, %int1_54677, %int128_54678 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57242 = torch.aten.view %57240, %57241 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57242, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54679 = torch.constant.int 131072
    %57243 = torch.prim.ListConstruct %3035, %int131072_54679 : (!torch.int, !torch.int) -> !torch.list<int>
    %57244 = torch.aten.view %57242, %57243 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57244, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54680 = torch.constant.int 32
    %int2_54681 = torch.constant.int 2
    %int16_54682 = torch.constant.int 16
    %int1_54683 = torch.constant.int 1
    %int128_54684 = torch.constant.int 128
    %57245 = torch.prim.ListConstruct %3038, %int32_54680, %int2_54681, %int16_54682, %int1_54683, %int128_54684 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57246 = torch.aten.view %55395, %57245 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57246, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54685 = torch.constant.int 32
    %57247 = torch.aten.mul.int %3038, %int32_54685 : !torch.int, !torch.int -> !torch.int
    %int2_54686 = torch.constant.int 2
    %57248 = torch.aten.mul.int %57247, %int2_54686 : !torch.int, !torch.int -> !torch.int
    %int16_54687 = torch.constant.int 16
    %int1_54688 = torch.constant.int 1
    %int128_54689 = torch.constant.int 128
    %57249 = torch.prim.ListConstruct %57248, %int16_54687, %int1_54688, %int128_54689 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57250 = torch.aten.view %57246, %57249 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57250, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57251 = torch.prim.ListConstruct %57164 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54690 = torch.constant.bool false
    %57252 = torch.aten.index_put %57250, %57251, %57180, %false_54690 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57252, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54691 = torch.constant.int 32
    %int2_54692 = torch.constant.int 2
    %int16_54693 = torch.constant.int 16
    %int1_54694 = torch.constant.int 1
    %int128_54695 = torch.constant.int 128
    %57253 = torch.prim.ListConstruct %3038, %int32_54691, %int2_54692, %int16_54693, %int1_54694, %int128_54695 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57254 = torch.aten.view %57252, %57253 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57254, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54696 = torch.constant.int 131072
    %57255 = torch.prim.ListConstruct %3038, %int131072_54696 : (!torch.int, !torch.int) -> !torch.list<int>
    %57256 = torch.aten.view %57254, %57255 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57256, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54697 = torch.constant.int 32
    %int2_54698 = torch.constant.int 2
    %int16_54699 = torch.constant.int 16
    %int1_54700 = torch.constant.int 1
    %int128_54701 = torch.constant.int 128
    %57257 = torch.prim.ListConstruct %3041, %int32_54697, %int2_54698, %int16_54699, %int1_54700, %int128_54701 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57258 = torch.aten.view %55407, %57257 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57258, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54702 = torch.constant.int 32
    %57259 = torch.aten.mul.int %3041, %int32_54702 : !torch.int, !torch.int -> !torch.int
    %int2_54703 = torch.constant.int 2
    %57260 = torch.aten.mul.int %57259, %int2_54703 : !torch.int, !torch.int -> !torch.int
    %int16_54704 = torch.constant.int 16
    %int1_54705 = torch.constant.int 1
    %int128_54706 = torch.constant.int 128
    %57261 = torch.prim.ListConstruct %57260, %int16_54704, %int1_54705, %int128_54706 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57262 = torch.aten.view %57258, %57261 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57262, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57263 = torch.prim.ListConstruct %57166 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54707 = torch.constant.bool false
    %57264 = torch.aten.index_put %57262, %57263, %57182, %false_54707 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57264, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54708 = torch.constant.int 32
    %int2_54709 = torch.constant.int 2
    %int16_54710 = torch.constant.int 16
    %int1_54711 = torch.constant.int 1
    %int128_54712 = torch.constant.int 128
    %57265 = torch.prim.ListConstruct %3041, %int32_54708, %int2_54709, %int16_54710, %int1_54711, %int128_54712 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57266 = torch.aten.view %57264, %57265 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57266, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54713 = torch.constant.int 131072
    %57267 = torch.prim.ListConstruct %3041, %int131072_54713 : (!torch.int, !torch.int) -> !torch.list<int>
    %57268 = torch.aten.view %57266, %57267 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57268, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_54714 = torch.constant.int 32
    %int2_54715 = torch.constant.int 2
    %int16_54716 = torch.constant.int 16
    %int1_54717 = torch.constant.int 1
    %int128_54718 = torch.constant.int 128
    %57269 = torch.prim.ListConstruct %3044, %int32_54714, %int2_54715, %int16_54716, %int1_54717, %int128_54718 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57270 = torch.aten.view %55419, %57269 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57270, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_54719 = torch.constant.int 32
    %57271 = torch.aten.mul.int %3044, %int32_54719 : !torch.int, !torch.int -> !torch.int
    %int2_54720 = torch.constant.int 2
    %57272 = torch.aten.mul.int %57271, %int2_54720 : !torch.int, !torch.int -> !torch.int
    %int16_54721 = torch.constant.int 16
    %int1_54722 = torch.constant.int 1
    %int128_54723 = torch.constant.int 128
    %57273 = torch.prim.ListConstruct %57272, %int16_54721, %int1_54722, %int128_54723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57274 = torch.aten.view %57270, %57273 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57274, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %57275 = torch.prim.ListConstruct %57168 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_54724 = torch.constant.bool false
    %57276 = torch.aten.index_put %57274, %57275, %57184, %false_54724 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %57276, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_54725 = torch.constant.int 32
    %int2_54726 = torch.constant.int 2
    %int16_54727 = torch.constant.int 16
    %int1_54728 = torch.constant.int 1
    %int128_54729 = torch.constant.int 128
    %57277 = torch.prim.ListConstruct %3044, %int32_54725, %int2_54726, %int16_54727, %int1_54728, %int128_54729 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57278 = torch.aten.view %57276, %57277 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %57278, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_54730 = torch.constant.int 131072
    %57279 = torch.prim.ListConstruct %3044, %int131072_54730 : (!torch.int, !torch.int) -> !torch.list<int>
    %57280 = torch.aten.view %57278, %57279 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %57280, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_54731 = torch.constant.int -2
    %57281 = torch.aten.unsqueeze %56895, %int-2_54731 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54732 = torch.constant.int -2
    %57282 = torch.aten.unsqueeze %56910, %int-2_54732 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54733 = torch.constant.int -2
    %57283 = torch.aten.unsqueeze %56925, %int-2_54733 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54734 = torch.constant.int -2
    %57284 = torch.aten.unsqueeze %56940, %int-2_54734 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54735 = torch.constant.int -2
    %57285 = torch.aten.unsqueeze %56955, %int-2_54735 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54736 = torch.constant.int -2
    %57286 = torch.aten.unsqueeze %56970, %int-2_54736 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54737 = torch.constant.int -2
    %57287 = torch.aten.unsqueeze %56985, %int-2_54737 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54738 = torch.constant.int -2
    %57288 = torch.aten.unsqueeze %57000, %int-2_54738 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_54739 = torch.constant.int 4
    %int1_54740 = torch.constant.int 1
    %int4_54741 = torch.constant.int 4
    %int128_54742 = torch.constant.int 128
    %57289 = torch.prim.ListConstruct %int4_54739, %56881, %int1_54740, %int4_54741, %int128_54742 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54743 = torch.constant.bool false
    %57290 = torch.aten.expand %57281, %57289, %false_54743 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54744 = torch.constant.int 4
    %int1_54745 = torch.constant.int 1
    %int4_54746 = torch.constant.int 4
    %int128_54747 = torch.constant.int 128
    %57291 = torch.prim.ListConstruct %int4_54744, %56881, %int1_54745, %int4_54746, %int128_54747 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54748 = torch.constant.bool false
    %57292 = torch.aten.expand %57282, %57291, %false_54748 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54749 = torch.constant.int 4
    %int1_54750 = torch.constant.int 1
    %int4_54751 = torch.constant.int 4
    %int128_54752 = torch.constant.int 128
    %57293 = torch.prim.ListConstruct %int4_54749, %56881, %int1_54750, %int4_54751, %int128_54752 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54753 = torch.constant.bool false
    %57294 = torch.aten.expand %57283, %57293, %false_54753 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54754 = torch.constant.int 4
    %int1_54755 = torch.constant.int 1
    %int4_54756 = torch.constant.int 4
    %int128_54757 = torch.constant.int 128
    %57295 = torch.prim.ListConstruct %int4_54754, %56881, %int1_54755, %int4_54756, %int128_54757 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54758 = torch.constant.bool false
    %57296 = torch.aten.expand %57284, %57295, %false_54758 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54759 = torch.constant.int 4
    %int1_54760 = torch.constant.int 1
    %int4_54761 = torch.constant.int 4
    %int128_54762 = torch.constant.int 128
    %57297 = torch.prim.ListConstruct %int4_54759, %56881, %int1_54760, %int4_54761, %int128_54762 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54763 = torch.constant.bool false
    %57298 = torch.aten.expand %57285, %57297, %false_54763 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54764 = torch.constant.int 4
    %int1_54765 = torch.constant.int 1
    %int4_54766 = torch.constant.int 4
    %int128_54767 = torch.constant.int 128
    %57299 = torch.prim.ListConstruct %int4_54764, %56881, %int1_54765, %int4_54766, %int128_54767 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54768 = torch.constant.bool false
    %57300 = torch.aten.expand %57286, %57299, %false_54768 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54769 = torch.constant.int 4
    %int1_54770 = torch.constant.int 1
    %int4_54771 = torch.constant.int 4
    %int128_54772 = torch.constant.int 128
    %57301 = torch.prim.ListConstruct %int4_54769, %56881, %int1_54770, %int4_54771, %int128_54772 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54773 = torch.constant.bool false
    %57302 = torch.aten.expand %57287, %57301, %false_54773 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54774 = torch.constant.int 4
    %int1_54775 = torch.constant.int 1
    %int4_54776 = torch.constant.int 4
    %int128_54777 = torch.constant.int 128
    %57303 = torch.prim.ListConstruct %int4_54774, %56881, %int1_54775, %int4_54776, %int128_54777 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54778 = torch.constant.bool false
    %57304 = torch.aten.expand %57288, %57303, %false_54778 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54779 = torch.constant.int 4
    %int4_54780 = torch.constant.int 4
    %int128_54781 = torch.constant.int 128
    %57305 = torch.prim.ListConstruct %int4_54779, %56881, %int4_54780, %int128_54781 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57306 = torch.aten.view %57290, %57305 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54782 = torch.constant.int 4
    %int4_54783 = torch.constant.int 4
    %int128_54784 = torch.constant.int 128
    %57307 = torch.prim.ListConstruct %int4_54782, %56881, %int4_54783, %int128_54784 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57308 = torch.aten.view %57292, %57307 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54785 = torch.constant.int 4
    %int4_54786 = torch.constant.int 4
    %int128_54787 = torch.constant.int 128
    %57309 = torch.prim.ListConstruct %int4_54785, %56881, %int4_54786, %int128_54787 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57310 = torch.aten.view %57294, %57309 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54788 = torch.constant.int 4
    %int4_54789 = torch.constant.int 4
    %int128_54790 = torch.constant.int 128
    %57311 = torch.prim.ListConstruct %int4_54788, %56881, %int4_54789, %int128_54790 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57312 = torch.aten.view %57296, %57311 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54791 = torch.constant.int 4
    %int4_54792 = torch.constant.int 4
    %int128_54793 = torch.constant.int 128
    %57313 = torch.prim.ListConstruct %int4_54791, %56881, %int4_54792, %int128_54793 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57314 = torch.aten.view %57298, %57313 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54794 = torch.constant.int 4
    %int4_54795 = torch.constant.int 4
    %int128_54796 = torch.constant.int 128
    %57315 = torch.prim.ListConstruct %int4_54794, %56881, %int4_54795, %int128_54796 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57316 = torch.aten.view %57300, %57315 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54797 = torch.constant.int 4
    %int4_54798 = torch.constant.int 4
    %int128_54799 = torch.constant.int 128
    %57317 = torch.prim.ListConstruct %int4_54797, %56881, %int4_54798, %int128_54799 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57318 = torch.aten.view %57302, %57317 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54800 = torch.constant.int 4
    %int4_54801 = torch.constant.int 4
    %int128_54802 = torch.constant.int 128
    %57319 = torch.prim.ListConstruct %int4_54800, %56881, %int4_54801, %int128_54802 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57320 = torch.aten.view %57304, %57319 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57320, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_54803 = torch.constant.int -2
    %57321 = torch.aten.unsqueeze %56670, %int-2_54803 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54804 = torch.constant.int -2
    %57322 = torch.aten.unsqueeze %56672, %int-2_54804 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54805 = torch.constant.int -2
    %57323 = torch.aten.unsqueeze %56674, %int-2_54805 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57323, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54806 = torch.constant.int -2
    %57324 = torch.aten.unsqueeze %56676, %int-2_54806 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54807 = torch.constant.int -2
    %57325 = torch.aten.unsqueeze %56678, %int-2_54807 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57325, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54808 = torch.constant.int -2
    %57326 = torch.aten.unsqueeze %56680, %int-2_54808 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57326, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54809 = torch.constant.int -2
    %57327 = torch.aten.unsqueeze %56682, %int-2_54809 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_54810 = torch.constant.int -2
    %57328 = torch.aten.unsqueeze %56684, %int-2_54810 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %57328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_54811 = torch.constant.int 1
    %57329 = torch.aten.size.int %56594, %int1_54811 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_54812 = torch.constant.int 4
    %int1_54813 = torch.constant.int 1
    %int4_54814 = torch.constant.int 4
    %int128_54815 = torch.constant.int 128
    %57330 = torch.prim.ListConstruct %int4_54812, %57329, %int1_54813, %int4_54814, %int128_54815 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54816 = torch.constant.bool false
    %57331 = torch.aten.expand %57321, %57330, %false_54816 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57331, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54817 = torch.constant.int 4
    %int1_54818 = torch.constant.int 1
    %int4_54819 = torch.constant.int 4
    %int128_54820 = torch.constant.int 128
    %57332 = torch.prim.ListConstruct %int4_54817, %57329, %int1_54818, %int4_54819, %int128_54820 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54821 = torch.constant.bool false
    %57333 = torch.aten.expand %57322, %57332, %false_54821 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54822 = torch.constant.int 4
    %int1_54823 = torch.constant.int 1
    %int4_54824 = torch.constant.int 4
    %int128_54825 = torch.constant.int 128
    %57334 = torch.prim.ListConstruct %int4_54822, %57329, %int1_54823, %int4_54824, %int128_54825 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54826 = torch.constant.bool false
    %57335 = torch.aten.expand %57323, %57334, %false_54826 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57335, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54827 = torch.constant.int 4
    %int1_54828 = torch.constant.int 1
    %int4_54829 = torch.constant.int 4
    %int128_54830 = torch.constant.int 128
    %57336 = torch.prim.ListConstruct %int4_54827, %57329, %int1_54828, %int4_54829, %int128_54830 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54831 = torch.constant.bool false
    %57337 = torch.aten.expand %57324, %57336, %false_54831 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54832 = torch.constant.int 4
    %int1_54833 = torch.constant.int 1
    %int4_54834 = torch.constant.int 4
    %int128_54835 = torch.constant.int 128
    %57338 = torch.prim.ListConstruct %int4_54832, %57329, %int1_54833, %int4_54834, %int128_54835 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54836 = torch.constant.bool false
    %57339 = torch.aten.expand %57325, %57338, %false_54836 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54837 = torch.constant.int 4
    %int1_54838 = torch.constant.int 1
    %int4_54839 = torch.constant.int 4
    %int128_54840 = torch.constant.int 128
    %57340 = torch.prim.ListConstruct %int4_54837, %57329, %int1_54838, %int4_54839, %int128_54840 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54841 = torch.constant.bool false
    %57341 = torch.aten.expand %57326, %57340, %false_54841 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54842 = torch.constant.int 4
    %int1_54843 = torch.constant.int 1
    %int4_54844 = torch.constant.int 4
    %int128_54845 = torch.constant.int 128
    %57342 = torch.prim.ListConstruct %int4_54842, %57329, %int1_54843, %int4_54844, %int128_54845 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54846 = torch.constant.bool false
    %57343 = torch.aten.expand %57327, %57342, %false_54846 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54847 = torch.constant.int 4
    %int1_54848 = torch.constant.int 1
    %int4_54849 = torch.constant.int 4
    %int128_54850 = torch.constant.int 128
    %57344 = torch.prim.ListConstruct %int4_54847, %57329, %int1_54848, %int4_54849, %int128_54850 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_54851 = torch.constant.bool false
    %57345 = torch.aten.expand %57328, %57344, %false_54851 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %57345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_54852 = torch.constant.int 4
    %int4_54853 = torch.constant.int 4
    %int128_54854 = torch.constant.int 128
    %57346 = torch.prim.ListConstruct %int4_54852, %57329, %int4_54853, %int128_54854 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57347 = torch.aten.view %57331, %57346 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57347, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54855 = torch.constant.int 4
    %int4_54856 = torch.constant.int 4
    %int128_54857 = torch.constant.int 128
    %57348 = torch.prim.ListConstruct %int4_54855, %57329, %int4_54856, %int128_54857 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57349 = torch.aten.view %57333, %57348 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54858 = torch.constant.int 4
    %int4_54859 = torch.constant.int 4
    %int128_54860 = torch.constant.int 128
    %57350 = torch.prim.ListConstruct %int4_54858, %57329, %int4_54859, %int128_54860 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57351 = torch.aten.view %57335, %57350 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54861 = torch.constant.int 4
    %int4_54862 = torch.constant.int 4
    %int128_54863 = torch.constant.int 128
    %57352 = torch.prim.ListConstruct %int4_54861, %57329, %int4_54862, %int128_54863 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57353 = torch.aten.view %57337, %57352 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57353, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54864 = torch.constant.int 4
    %int4_54865 = torch.constant.int 4
    %int128_54866 = torch.constant.int 128
    %57354 = torch.prim.ListConstruct %int4_54864, %57329, %int4_54865, %int128_54866 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57355 = torch.aten.view %57339, %57354 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54867 = torch.constant.int 4
    %int4_54868 = torch.constant.int 4
    %int128_54869 = torch.constant.int 128
    %57356 = torch.prim.ListConstruct %int4_54867, %57329, %int4_54868, %int128_54869 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57357 = torch.aten.view %57341, %57356 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54870 = torch.constant.int 4
    %int4_54871 = torch.constant.int 4
    %int128_54872 = torch.constant.int 128
    %57358 = torch.prim.ListConstruct %int4_54870, %57329, %int4_54871, %int128_54872 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57359 = torch.aten.view %57343, %57358 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57359, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54873 = torch.constant.int 4
    %int4_54874 = torch.constant.int 4
    %int128_54875 = torch.constant.int 128
    %57360 = torch.prim.ListConstruct %int4_54873, %57329, %int4_54874, %int128_54875 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57361 = torch.aten.view %57345, %57360 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54876 = torch.constant.int 1
    %int2_54877 = torch.constant.int 2
    %57362 = torch.aten.transpose.int %56737, %int1_54876, %int2_54877 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57362, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54878 = torch.constant.int 1
    %int2_54879 = torch.constant.int 2
    %57363 = torch.aten.transpose.int %56752, %int1_54878, %int2_54879 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57363, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54880 = torch.constant.int 1
    %int2_54881 = torch.constant.int 2
    %57364 = torch.aten.transpose.int %56767, %int1_54880, %int2_54881 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57364, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54882 = torch.constant.int 1
    %int2_54883 = torch.constant.int 2
    %57365 = torch.aten.transpose.int %56782, %int1_54882, %int2_54883 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57365, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54884 = torch.constant.int 1
    %int2_54885 = torch.constant.int 2
    %57366 = torch.aten.transpose.int %56797, %int1_54884, %int2_54885 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57366, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54886 = torch.constant.int 1
    %int2_54887 = torch.constant.int 2
    %57367 = torch.aten.transpose.int %56812, %int1_54886, %int2_54887 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57367, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54888 = torch.constant.int 1
    %int2_54889 = torch.constant.int 2
    %57368 = torch.aten.transpose.int %56827, %int1_54888, %int2_54889 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57368, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54890 = torch.constant.int 1
    %int2_54891 = torch.constant.int 2
    %57369 = torch.aten.transpose.int %56842, %int1_54890, %int2_54891 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57369, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54892 = torch.constant.int 1
    %int2_54893 = torch.constant.int 2
    %57370 = torch.aten.transpose.int %57306, %int1_54892, %int2_54893 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57370, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54894 = torch.constant.int 1
    %int2_54895 = torch.constant.int 2
    %57371 = torch.aten.transpose.int %57308, %int1_54894, %int2_54895 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57371, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54896 = torch.constant.int 1
    %int2_54897 = torch.constant.int 2
    %57372 = torch.aten.transpose.int %57310, %int1_54896, %int2_54897 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57372, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54898 = torch.constant.int 1
    %int2_54899 = torch.constant.int 2
    %57373 = torch.aten.transpose.int %57312, %int1_54898, %int2_54899 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57373, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54900 = torch.constant.int 1
    %int2_54901 = torch.constant.int 2
    %57374 = torch.aten.transpose.int %57314, %int1_54900, %int2_54901 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57374, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54902 = torch.constant.int 1
    %int2_54903 = torch.constant.int 2
    %57375 = torch.aten.transpose.int %57316, %int1_54902, %int2_54903 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57375, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54904 = torch.constant.int 1
    %int2_54905 = torch.constant.int 2
    %57376 = torch.aten.transpose.int %57318, %int1_54904, %int2_54905 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57376, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54906 = torch.constant.int 1
    %int2_54907 = torch.constant.int 2
    %57377 = torch.aten.transpose.int %57320, %int1_54906, %int2_54907 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57377, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54908 = torch.constant.int 1
    %int2_54909 = torch.constant.int 2
    %57378 = torch.aten.transpose.int %57347, %int1_54908, %int2_54909 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57378, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54910 = torch.constant.int 1
    %int2_54911 = torch.constant.int 2
    %57379 = torch.aten.transpose.int %57349, %int1_54910, %int2_54911 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57379, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54912 = torch.constant.int 1
    %int2_54913 = torch.constant.int 2
    %57380 = torch.aten.transpose.int %57351, %int1_54912, %int2_54913 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57380, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54914 = torch.constant.int 1
    %int2_54915 = torch.constant.int 2
    %57381 = torch.aten.transpose.int %57353, %int1_54914, %int2_54915 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57381, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54916 = torch.constant.int 1
    %int2_54917 = torch.constant.int 2
    %57382 = torch.aten.transpose.int %57355, %int1_54916, %int2_54917 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57382, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54918 = torch.constant.int 1
    %int2_54919 = torch.constant.int 2
    %57383 = torch.aten.transpose.int %57357, %int1_54918, %int2_54919 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57383, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54920 = torch.constant.int 1
    %int2_54921 = torch.constant.int 2
    %57384 = torch.aten.transpose.int %57359, %int1_54920, %int2_54921 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57384, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54922 = torch.constant.int 1
    %int2_54923 = torch.constant.int 2
    %57385 = torch.aten.transpose.int %57361, %int1_54922, %int2_54923 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %57385, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54924 = torch.constant.float 0.000000e+00
    %true_54925 = torch.constant.bool true
    %none_54926 = torch.constant.none
    %none_54927 = torch.constant.none
    %57386:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57362, %57370, %57378, %float0.000000e00_54924, %true_54925, %none_54926, %none_54927) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57386#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54928 = torch.constant.float 0.000000e+00
    %true_54929 = torch.constant.bool true
    %none_54930 = torch.constant.none
    %none_54931 = torch.constant.none
    %57387:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57363, %57371, %57379, %float0.000000e00_54928, %true_54929, %none_54930, %none_54931) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57387#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54932 = torch.constant.float 0.000000e+00
    %true_54933 = torch.constant.bool true
    %none_54934 = torch.constant.none
    %none_54935 = torch.constant.none
    %57388:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57364, %57372, %57380, %float0.000000e00_54932, %true_54933, %none_54934, %none_54935) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57388#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54936 = torch.constant.float 0.000000e+00
    %true_54937 = torch.constant.bool true
    %none_54938 = torch.constant.none
    %none_54939 = torch.constant.none
    %57389:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57365, %57373, %57381, %float0.000000e00_54936, %true_54937, %none_54938, %none_54939) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57389#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54940 = torch.constant.float 0.000000e+00
    %true_54941 = torch.constant.bool true
    %none_54942 = torch.constant.none
    %none_54943 = torch.constant.none
    %57390:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57366, %57374, %57382, %float0.000000e00_54940, %true_54941, %none_54942, %none_54943) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57390#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54944 = torch.constant.float 0.000000e+00
    %true_54945 = torch.constant.bool true
    %none_54946 = torch.constant.none
    %none_54947 = torch.constant.none
    %57391:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57367, %57375, %57383, %float0.000000e00_54944, %true_54945, %none_54946, %none_54947) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57391#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54948 = torch.constant.float 0.000000e+00
    %true_54949 = torch.constant.bool true
    %none_54950 = torch.constant.none
    %none_54951 = torch.constant.none
    %57392:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57368, %57376, %57384, %float0.000000e00_54948, %true_54949, %none_54950, %none_54951) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57392#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_54952 = torch.constant.float 0.000000e+00
    %true_54953 = torch.constant.bool true
    %none_54954 = torch.constant.none
    %none_54955 = torch.constant.none
    %57393:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%57369, %57377, %57385, %float0.000000e00_54952, %true_54953, %none_54954, %none_54955) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %57393#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_54956 = torch.constant.int 1
    %int2_54957 = torch.constant.int 2
    %57394 = torch.aten.transpose.int %57386#0, %int1_54956, %int2_54957 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54958 = torch.constant.int 1
    %int2_54959 = torch.constant.int 2
    %57395 = torch.aten.transpose.int %57387#0, %int1_54958, %int2_54959 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54960 = torch.constant.int 1
    %int2_54961 = torch.constant.int 2
    %57396 = torch.aten.transpose.int %57388#0, %int1_54960, %int2_54961 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54962 = torch.constant.int 1
    %int2_54963 = torch.constant.int 2
    %57397 = torch.aten.transpose.int %57389#0, %int1_54962, %int2_54963 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54964 = torch.constant.int 1
    %int2_54965 = torch.constant.int 2
    %57398 = torch.aten.transpose.int %57390#0, %int1_54964, %int2_54965 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54966 = torch.constant.int 1
    %int2_54967 = torch.constant.int 2
    %57399 = torch.aten.transpose.int %57391#0, %int1_54966, %int2_54967 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54968 = torch.constant.int 1
    %int2_54969 = torch.constant.int 2
    %57400 = torch.aten.transpose.int %57392#0, %int1_54968, %int2_54969 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_54970 = torch.constant.int 1
    %int2_54971 = torch.constant.int 2
    %57401 = torch.aten.transpose.int %57393#0, %int1_54970, %int2_54971 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %57401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_54972 = torch.constant.int 4
    %int512_54973 = torch.constant.int 512
    %57402 = torch.prim.ListConstruct %int4_54972, %56723, %int512_54973 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57403 = torch.aten.view %57394, %57402 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54974 = torch.constant.int 4
    %int512_54975 = torch.constant.int 512
    %57404 = torch.prim.ListConstruct %int4_54974, %56738, %int512_54975 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57405 = torch.aten.view %57395, %57404 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54976 = torch.constant.int 4
    %int512_54977 = torch.constant.int 512
    %57406 = torch.prim.ListConstruct %int4_54976, %56753, %int512_54977 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57407 = torch.aten.view %57396, %57406 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54978 = torch.constant.int 4
    %int512_54979 = torch.constant.int 512
    %57408 = torch.prim.ListConstruct %int4_54978, %56768, %int512_54979 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57409 = torch.aten.view %57397, %57408 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54980 = torch.constant.int 4
    %int512_54981 = torch.constant.int 512
    %57410 = torch.prim.ListConstruct %int4_54980, %56783, %int512_54981 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57411 = torch.aten.view %57398, %57410 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54982 = torch.constant.int 4
    %int512_54983 = torch.constant.int 512
    %57412 = torch.prim.ListConstruct %int4_54982, %56798, %int512_54983 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57413 = torch.aten.view %57399, %57412 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54984 = torch.constant.int 4
    %int512_54985 = torch.constant.int 512
    %57414 = torch.prim.ListConstruct %int4_54984, %56813, %int512_54985 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57415 = torch.aten.view %57400, %57414 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_54986 = torch.constant.int 4
    %int512_54987 = torch.constant.int 512
    %57416 = torch.prim.ListConstruct %int4_54986, %56828, %int512_54987 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57417 = torch.aten.view %57401, %57416 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %57417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_54988 = torch.constant.int 1
    %int0_54989 = torch.constant.int 0
    %57418 = torch.prim.ListConstruct %int1_54988, %int0_54989 : (!torch.int, !torch.int) -> !torch.list<int>
    %57419 = torch.aten.permute %2128, %57418 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_54990 = torch.constant.int 1
    %int0_54991 = torch.constant.int 0
    %57420 = torch.prim.ListConstruct %int1_54990, %int0_54991 : (!torch.int, !torch.int) -> !torch.list<int>
    %57421 = torch.aten.permute %2129, %57420 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_54992 = torch.constant.int 1
    %int0_54993 = torch.constant.int 0
    %57422 = torch.prim.ListConstruct %int1_54992, %int0_54993 : (!torch.int, !torch.int) -> !torch.list<int>
    %57423 = torch.aten.permute %2130, %57422 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_54994 = torch.constant.int 1
    %int0_54995 = torch.constant.int 0
    %57424 = torch.prim.ListConstruct %int1_54994, %int0_54995 : (!torch.int, !torch.int) -> !torch.list<int>
    %57425 = torch.aten.permute %2131, %57424 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_54996 = torch.constant.int 1
    %int0_54997 = torch.constant.int 0
    %57426 = torch.prim.ListConstruct %int1_54996, %int0_54997 : (!torch.int, !torch.int) -> !torch.list<int>
    %57427 = torch.aten.permute %2132, %57426 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_54998 = torch.constant.int 1
    %int0_54999 = torch.constant.int 0
    %57428 = torch.prim.ListConstruct %int1_54998, %int0_54999 : (!torch.int, !torch.int) -> !torch.list<int>
    %57429 = torch.aten.permute %2133, %57428 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_55000 = torch.constant.int 1
    %int0_55001 = torch.constant.int 0
    %57430 = torch.prim.ListConstruct %int1_55000, %int0_55001 : (!torch.int, !torch.int) -> !torch.list<int>
    %57431 = torch.aten.permute %2134, %57430 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_55002 = torch.constant.int 1
    %int0_55003 = torch.constant.int 0
    %57432 = torch.prim.ListConstruct %int1_55002, %int0_55003 : (!torch.int, !torch.int) -> !torch.list<int>
    %57433 = torch.aten.permute %2135, %57432 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_55004 = torch.constant.int 4
    %57434 = torch.aten.mul.int %int4_55004, %56723 : !torch.int, !torch.int -> !torch.int
    %int512_55005 = torch.constant.int 512
    %57435 = torch.prim.ListConstruct %57434, %int512_55005 : (!torch.int, !torch.int) -> !torch.list<int>
    %57436 = torch.aten.view %57403, %57435 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57436, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57437 = torch.aten.mm %57436, %57419 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57437, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55006 = torch.constant.int 4
    %int4096_55007 = torch.constant.int 4096
    %57438 = torch.prim.ListConstruct %int4_55006, %56723, %int4096_55007 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57439 = torch.aten.view %57437, %57438 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55008 = torch.constant.int 4
    %57440 = torch.aten.mul.int %int4_55008, %56738 : !torch.int, !torch.int -> !torch.int
    %int512_55009 = torch.constant.int 512
    %57441 = torch.prim.ListConstruct %57440, %int512_55009 : (!torch.int, !torch.int) -> !torch.list<int>
    %57442 = torch.aten.view %57405, %57441 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57442, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57443 = torch.aten.mm %57442, %57421 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57443, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55010 = torch.constant.int 4
    %int4096_55011 = torch.constant.int 4096
    %57444 = torch.prim.ListConstruct %int4_55010, %56738, %int4096_55011 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57445 = torch.aten.view %57443, %57444 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55012 = torch.constant.int 4
    %57446 = torch.aten.mul.int %int4_55012, %56753 : !torch.int, !torch.int -> !torch.int
    %int512_55013 = torch.constant.int 512
    %57447 = torch.prim.ListConstruct %57446, %int512_55013 : (!torch.int, !torch.int) -> !torch.list<int>
    %57448 = torch.aten.view %57407, %57447 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57448, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57449 = torch.aten.mm %57448, %57423 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57449, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55014 = torch.constant.int 4
    %int4096_55015 = torch.constant.int 4096
    %57450 = torch.prim.ListConstruct %int4_55014, %56753, %int4096_55015 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57451 = torch.aten.view %57449, %57450 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55016 = torch.constant.int 4
    %57452 = torch.aten.mul.int %int4_55016, %56768 : !torch.int, !torch.int -> !torch.int
    %int512_55017 = torch.constant.int 512
    %57453 = torch.prim.ListConstruct %57452, %int512_55017 : (!torch.int, !torch.int) -> !torch.list<int>
    %57454 = torch.aten.view %57409, %57453 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57454, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57455 = torch.aten.mm %57454, %57425 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57455, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55018 = torch.constant.int 4
    %int4096_55019 = torch.constant.int 4096
    %57456 = torch.prim.ListConstruct %int4_55018, %56768, %int4096_55019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57457 = torch.aten.view %57455, %57456 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55020 = torch.constant.int 4
    %57458 = torch.aten.mul.int %int4_55020, %56783 : !torch.int, !torch.int -> !torch.int
    %int512_55021 = torch.constant.int 512
    %57459 = torch.prim.ListConstruct %57458, %int512_55021 : (!torch.int, !torch.int) -> !torch.list<int>
    %57460 = torch.aten.view %57411, %57459 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57460, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57461 = torch.aten.mm %57460, %57427 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57461, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55022 = torch.constant.int 4
    %int4096_55023 = torch.constant.int 4096
    %57462 = torch.prim.ListConstruct %int4_55022, %56783, %int4096_55023 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57463 = torch.aten.view %57461, %57462 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55024 = torch.constant.int 4
    %57464 = torch.aten.mul.int %int4_55024, %56798 : !torch.int, !torch.int -> !torch.int
    %int512_55025 = torch.constant.int 512
    %57465 = torch.prim.ListConstruct %57464, %int512_55025 : (!torch.int, !torch.int) -> !torch.list<int>
    %57466 = torch.aten.view %57413, %57465 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57466, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57467 = torch.aten.mm %57466, %57429 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57467, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55026 = torch.constant.int 4
    %int4096_55027 = torch.constant.int 4096
    %57468 = torch.prim.ListConstruct %int4_55026, %56798, %int4096_55027 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57469 = torch.aten.view %57467, %57468 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55028 = torch.constant.int 4
    %57470 = torch.aten.mul.int %int4_55028, %56813 : !torch.int, !torch.int -> !torch.int
    %int512_55029 = torch.constant.int 512
    %57471 = torch.prim.ListConstruct %57470, %int512_55029 : (!torch.int, !torch.int) -> !torch.list<int>
    %57472 = torch.aten.view %57415, %57471 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57472, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57473 = torch.aten.mm %57472, %57431 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57473, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55030 = torch.constant.int 4
    %int4096_55031 = torch.constant.int 4096
    %57474 = torch.prim.ListConstruct %int4_55030, %56813, %int4096_55031 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57475 = torch.aten.view %57473, %57474 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_55032 = torch.constant.int 4
    %57476 = torch.aten.mul.int %int4_55032, %56828 : !torch.int, !torch.int -> !torch.int
    %int512_55033 = torch.constant.int 512
    %57477 = torch.prim.ListConstruct %57476, %int512_55033 : (!torch.int, !torch.int) -> !torch.list<int>
    %57478 = torch.aten.view %57417, %57477 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %57478, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %57479 = torch.aten.mm %57478, %57433 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57479, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55034 = torch.constant.int 4
    %int4096_55035 = torch.constant.int 4096
    %57480 = torch.prim.ListConstruct %int4_55034, %56828, %int4096_55035 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57481 = torch.aten.view %57479, %57480 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57482 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55036 = arith.constant 1 : index
    %dim_55037 = tensor.dim %57482, %c1_55036 : tensor<4x?x4096xf16>
    %57483 = flow.tensor.transfer %57482 : tensor<4x?x4096xf16>{%dim_55037} to #hal.device.promise<@__device_0>
    %57484 = torch_c.from_builtin_tensor %57483 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57485 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55038 = arith.constant 1 : index
    %dim_55039 = tensor.dim %57485, %c1_55038 : tensor<4x?x4096xf16>
    %57486 = flow.tensor.transfer %57485 : tensor<4x?x4096xf16>{%dim_55039} to #hal.device.promise<@__device_0>
    %57487 = torch_c.from_builtin_tensor %57486 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57488 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55040 = arith.constant 1 : index
    %dim_55041 = tensor.dim %57488, %c1_55040 : tensor<4x?x4096xf16>
    %57489 = flow.tensor.transfer %57488 : tensor<4x?x4096xf16>{%dim_55041} to #hal.device.promise<@__device_0>
    %57490 = torch_c.from_builtin_tensor %57489 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57491 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55042 = arith.constant 1 : index
    %dim_55043 = tensor.dim %57491, %c1_55042 : tensor<4x?x4096xf16>
    %57492 = flow.tensor.transfer %57491 : tensor<4x?x4096xf16>{%dim_55043} to #hal.device.promise<@__device_0>
    %57493 = torch_c.from_builtin_tensor %57492 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57494 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55044 = arith.constant 1 : index
    %dim_55045 = tensor.dim %57494, %c1_55044 : tensor<4x?x4096xf16>
    %57495 = flow.tensor.transfer %57494 : tensor<4x?x4096xf16>{%dim_55045} to #hal.device.promise<@__device_0>
    %57496 = torch_c.from_builtin_tensor %57495 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57497 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55046 = arith.constant 1 : index
    %dim_55047 = tensor.dim %57497, %c1_55046 : tensor<4x?x4096xf16>
    %57498 = flow.tensor.transfer %57497 : tensor<4x?x4096xf16>{%dim_55047} to #hal.device.promise<@__device_0>
    %57499 = torch_c.from_builtin_tensor %57498 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57500 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55048 = arith.constant 1 : index
    %dim_55049 = tensor.dim %57500, %c1_55048 : tensor<4x?x4096xf16>
    %57501 = flow.tensor.transfer %57500 : tensor<4x?x4096xf16>{%dim_55049} to #hal.device.promise<@__device_0>
    %57502 = torch_c.from_builtin_tensor %57501 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55050 = torch.constant.int 1
    %57503 = torch.aten.add.Tensor %57439, %57484, %int1_55050 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55051 = torch.constant.int 1
    %57504 = torch.aten.add.Tensor %57503, %57487, %int1_55051 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55052 = torch.constant.int 1
    %57505 = torch.aten.add.Tensor %57504, %57490, %int1_55052 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55053 = torch.constant.int 1
    %57506 = torch.aten.add.Tensor %57505, %57493, %int1_55053 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55054 = torch.constant.int 1
    %57507 = torch.aten.add.Tensor %57506, %57496, %int1_55054 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55055 = torch.constant.int 1
    %57508 = torch.aten.add.Tensor %57507, %57499, %int1_55055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55056 = torch.constant.int 1
    %57509 = torch.aten.add.Tensor %57508, %57502, %int1_55056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57510 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55057 = arith.constant 1 : index
    %dim_55058 = tensor.dim %57510, %c1_55057 : tensor<4x?x4096xf16>
    %57511 = flow.tensor.transfer %57510 : tensor<4x?x4096xf16>{%dim_55058} to #hal.device.promise<@__device_1>
    %57512 = torch_c.from_builtin_tensor %57511 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57512, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57513 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55059 = arith.constant 1 : index
    %dim_55060 = tensor.dim %57513, %c1_55059 : tensor<4x?x4096xf16>
    %57514 = flow.tensor.transfer %57513 : tensor<4x?x4096xf16>{%dim_55060} to #hal.device.promise<@__device_1>
    %57515 = torch_c.from_builtin_tensor %57514 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57516 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55061 = arith.constant 1 : index
    %dim_55062 = tensor.dim %57516, %c1_55061 : tensor<4x?x4096xf16>
    %57517 = flow.tensor.transfer %57516 : tensor<4x?x4096xf16>{%dim_55062} to #hal.device.promise<@__device_1>
    %57518 = torch_c.from_builtin_tensor %57517 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57519 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55063 = arith.constant 1 : index
    %dim_55064 = tensor.dim %57519, %c1_55063 : tensor<4x?x4096xf16>
    %57520 = flow.tensor.transfer %57519 : tensor<4x?x4096xf16>{%dim_55064} to #hal.device.promise<@__device_1>
    %57521 = torch_c.from_builtin_tensor %57520 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57522 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55065 = arith.constant 1 : index
    %dim_55066 = tensor.dim %57522, %c1_55065 : tensor<4x?x4096xf16>
    %57523 = flow.tensor.transfer %57522 : tensor<4x?x4096xf16>{%dim_55066} to #hal.device.promise<@__device_1>
    %57524 = torch_c.from_builtin_tensor %57523 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57524, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57525 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55067 = arith.constant 1 : index
    %dim_55068 = tensor.dim %57525, %c1_55067 : tensor<4x?x4096xf16>
    %57526 = flow.tensor.transfer %57525 : tensor<4x?x4096xf16>{%dim_55068} to #hal.device.promise<@__device_1>
    %57527 = torch_c.from_builtin_tensor %57526 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57528 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55069 = arith.constant 1 : index
    %dim_55070 = tensor.dim %57528, %c1_55069 : tensor<4x?x4096xf16>
    %57529 = flow.tensor.transfer %57528 : tensor<4x?x4096xf16>{%dim_55070} to #hal.device.promise<@__device_1>
    %57530 = torch_c.from_builtin_tensor %57529 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55071 = torch.constant.int 1
    %57531 = torch.aten.add.Tensor %57512, %57445, %int1_55071 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55072 = torch.constant.int 1
    %57532 = torch.aten.add.Tensor %57531, %57515, %int1_55072 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55073 = torch.constant.int 1
    %57533 = torch.aten.add.Tensor %57532, %57518, %int1_55073 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55074 = torch.constant.int 1
    %57534 = torch.aten.add.Tensor %57533, %57521, %int1_55074 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55075 = torch.constant.int 1
    %57535 = torch.aten.add.Tensor %57534, %57524, %int1_55075 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55076 = torch.constant.int 1
    %57536 = torch.aten.add.Tensor %57535, %57527, %int1_55076 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55077 = torch.constant.int 1
    %57537 = torch.aten.add.Tensor %57536, %57530, %int1_55077 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57538 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55078 = arith.constant 1 : index
    %dim_55079 = tensor.dim %57538, %c1_55078 : tensor<4x?x4096xf16>
    %57539 = flow.tensor.transfer %57538 : tensor<4x?x4096xf16>{%dim_55079} to #hal.device.promise<@__device_2>
    %57540 = torch_c.from_builtin_tensor %57539 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57540, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57541 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55080 = arith.constant 1 : index
    %dim_55081 = tensor.dim %57541, %c1_55080 : tensor<4x?x4096xf16>
    %57542 = flow.tensor.transfer %57541 : tensor<4x?x4096xf16>{%dim_55081} to #hal.device.promise<@__device_2>
    %57543 = torch_c.from_builtin_tensor %57542 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57544 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55082 = arith.constant 1 : index
    %dim_55083 = tensor.dim %57544, %c1_55082 : tensor<4x?x4096xf16>
    %57545 = flow.tensor.transfer %57544 : tensor<4x?x4096xf16>{%dim_55083} to #hal.device.promise<@__device_2>
    %57546 = torch_c.from_builtin_tensor %57545 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57546, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57547 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55084 = arith.constant 1 : index
    %dim_55085 = tensor.dim %57547, %c1_55084 : tensor<4x?x4096xf16>
    %57548 = flow.tensor.transfer %57547 : tensor<4x?x4096xf16>{%dim_55085} to #hal.device.promise<@__device_2>
    %57549 = torch_c.from_builtin_tensor %57548 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57550 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55086 = arith.constant 1 : index
    %dim_55087 = tensor.dim %57550, %c1_55086 : tensor<4x?x4096xf16>
    %57551 = flow.tensor.transfer %57550 : tensor<4x?x4096xf16>{%dim_55087} to #hal.device.promise<@__device_2>
    %57552 = torch_c.from_builtin_tensor %57551 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57552, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57553 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55088 = arith.constant 1 : index
    %dim_55089 = tensor.dim %57553, %c1_55088 : tensor<4x?x4096xf16>
    %57554 = flow.tensor.transfer %57553 : tensor<4x?x4096xf16>{%dim_55089} to #hal.device.promise<@__device_2>
    %57555 = torch_c.from_builtin_tensor %57554 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57555, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57556 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55090 = arith.constant 1 : index
    %dim_55091 = tensor.dim %57556, %c1_55090 : tensor<4x?x4096xf16>
    %57557 = flow.tensor.transfer %57556 : tensor<4x?x4096xf16>{%dim_55091} to #hal.device.promise<@__device_2>
    %57558 = torch_c.from_builtin_tensor %57557 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57558, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55092 = torch.constant.int 1
    %57559 = torch.aten.add.Tensor %57540, %57543, %int1_55092 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55093 = torch.constant.int 1
    %57560 = torch.aten.add.Tensor %57559, %57451, %int1_55093 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55094 = torch.constant.int 1
    %57561 = torch.aten.add.Tensor %57560, %57546, %int1_55094 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55095 = torch.constant.int 1
    %57562 = torch.aten.add.Tensor %57561, %57549, %int1_55095 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55096 = torch.constant.int 1
    %57563 = torch.aten.add.Tensor %57562, %57552, %int1_55096 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55097 = torch.constant.int 1
    %57564 = torch.aten.add.Tensor %57563, %57555, %int1_55097 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55098 = torch.constant.int 1
    %57565 = torch.aten.add.Tensor %57564, %57558, %int1_55098 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57566 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55099 = arith.constant 1 : index
    %dim_55100 = tensor.dim %57566, %c1_55099 : tensor<4x?x4096xf16>
    %57567 = flow.tensor.transfer %57566 : tensor<4x?x4096xf16>{%dim_55100} to #hal.device.promise<@__device_3>
    %57568 = torch_c.from_builtin_tensor %57567 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57569 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55101 = arith.constant 1 : index
    %dim_55102 = tensor.dim %57569, %c1_55101 : tensor<4x?x4096xf16>
    %57570 = flow.tensor.transfer %57569 : tensor<4x?x4096xf16>{%dim_55102} to #hal.device.promise<@__device_3>
    %57571 = torch_c.from_builtin_tensor %57570 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57572 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55103 = arith.constant 1 : index
    %dim_55104 = tensor.dim %57572, %c1_55103 : tensor<4x?x4096xf16>
    %57573 = flow.tensor.transfer %57572 : tensor<4x?x4096xf16>{%dim_55104} to #hal.device.promise<@__device_3>
    %57574 = torch_c.from_builtin_tensor %57573 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57575 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55105 = arith.constant 1 : index
    %dim_55106 = tensor.dim %57575, %c1_55105 : tensor<4x?x4096xf16>
    %57576 = flow.tensor.transfer %57575 : tensor<4x?x4096xf16>{%dim_55106} to #hal.device.promise<@__device_3>
    %57577 = torch_c.from_builtin_tensor %57576 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57578 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55107 = arith.constant 1 : index
    %dim_55108 = tensor.dim %57578, %c1_55107 : tensor<4x?x4096xf16>
    %57579 = flow.tensor.transfer %57578 : tensor<4x?x4096xf16>{%dim_55108} to #hal.device.promise<@__device_3>
    %57580 = torch_c.from_builtin_tensor %57579 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57581 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55109 = arith.constant 1 : index
    %dim_55110 = tensor.dim %57581, %c1_55109 : tensor<4x?x4096xf16>
    %57582 = flow.tensor.transfer %57581 : tensor<4x?x4096xf16>{%dim_55110} to #hal.device.promise<@__device_3>
    %57583 = torch_c.from_builtin_tensor %57582 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57584 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55111 = arith.constant 1 : index
    %dim_55112 = tensor.dim %57584, %c1_55111 : tensor<4x?x4096xf16>
    %57585 = flow.tensor.transfer %57584 : tensor<4x?x4096xf16>{%dim_55112} to #hal.device.promise<@__device_3>
    %57586 = torch_c.from_builtin_tensor %57585 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55113 = torch.constant.int 1
    %57587 = torch.aten.add.Tensor %57568, %57571, %int1_55113 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55114 = torch.constant.int 1
    %57588 = torch.aten.add.Tensor %57587, %57574, %int1_55114 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55115 = torch.constant.int 1
    %57589 = torch.aten.add.Tensor %57588, %57457, %int1_55115 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55116 = torch.constant.int 1
    %57590 = torch.aten.add.Tensor %57589, %57577, %int1_55116 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55117 = torch.constant.int 1
    %57591 = torch.aten.add.Tensor %57590, %57580, %int1_55117 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57591, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55118 = torch.constant.int 1
    %57592 = torch.aten.add.Tensor %57591, %57583, %int1_55118 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55119 = torch.constant.int 1
    %57593 = torch.aten.add.Tensor %57592, %57586, %int1_55119 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57594 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55120 = arith.constant 1 : index
    %dim_55121 = tensor.dim %57594, %c1_55120 : tensor<4x?x4096xf16>
    %57595 = flow.tensor.transfer %57594 : tensor<4x?x4096xf16>{%dim_55121} to #hal.device.promise<@__device_4>
    %57596 = torch_c.from_builtin_tensor %57595 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57597 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55122 = arith.constant 1 : index
    %dim_55123 = tensor.dim %57597, %c1_55122 : tensor<4x?x4096xf16>
    %57598 = flow.tensor.transfer %57597 : tensor<4x?x4096xf16>{%dim_55123} to #hal.device.promise<@__device_4>
    %57599 = torch_c.from_builtin_tensor %57598 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57599, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57600 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55124 = arith.constant 1 : index
    %dim_55125 = tensor.dim %57600, %c1_55124 : tensor<4x?x4096xf16>
    %57601 = flow.tensor.transfer %57600 : tensor<4x?x4096xf16>{%dim_55125} to #hal.device.promise<@__device_4>
    %57602 = torch_c.from_builtin_tensor %57601 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57603 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55126 = arith.constant 1 : index
    %dim_55127 = tensor.dim %57603, %c1_55126 : tensor<4x?x4096xf16>
    %57604 = flow.tensor.transfer %57603 : tensor<4x?x4096xf16>{%dim_55127} to #hal.device.promise<@__device_4>
    %57605 = torch_c.from_builtin_tensor %57604 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57605, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57606 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55128 = arith.constant 1 : index
    %dim_55129 = tensor.dim %57606, %c1_55128 : tensor<4x?x4096xf16>
    %57607 = flow.tensor.transfer %57606 : tensor<4x?x4096xf16>{%dim_55129} to #hal.device.promise<@__device_4>
    %57608 = torch_c.from_builtin_tensor %57607 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57609 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55130 = arith.constant 1 : index
    %dim_55131 = tensor.dim %57609, %c1_55130 : tensor<4x?x4096xf16>
    %57610 = flow.tensor.transfer %57609 : tensor<4x?x4096xf16>{%dim_55131} to #hal.device.promise<@__device_4>
    %57611 = torch_c.from_builtin_tensor %57610 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57612 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55132 = arith.constant 1 : index
    %dim_55133 = tensor.dim %57612, %c1_55132 : tensor<4x?x4096xf16>
    %57613 = flow.tensor.transfer %57612 : tensor<4x?x4096xf16>{%dim_55133} to #hal.device.promise<@__device_4>
    %57614 = torch_c.from_builtin_tensor %57613 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55134 = torch.constant.int 1
    %57615 = torch.aten.add.Tensor %57596, %57599, %int1_55134 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55135 = torch.constant.int 1
    %57616 = torch.aten.add.Tensor %57615, %57602, %int1_55135 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55136 = torch.constant.int 1
    %57617 = torch.aten.add.Tensor %57616, %57605, %int1_55136 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55137 = torch.constant.int 1
    %57618 = torch.aten.add.Tensor %57617, %57463, %int1_55137 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55138 = torch.constant.int 1
    %57619 = torch.aten.add.Tensor %57618, %57608, %int1_55138 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55139 = torch.constant.int 1
    %57620 = torch.aten.add.Tensor %57619, %57611, %int1_55139 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55140 = torch.constant.int 1
    %57621 = torch.aten.add.Tensor %57620, %57614, %int1_55140 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57622 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55141 = arith.constant 1 : index
    %dim_55142 = tensor.dim %57622, %c1_55141 : tensor<4x?x4096xf16>
    %57623 = flow.tensor.transfer %57622 : tensor<4x?x4096xf16>{%dim_55142} to #hal.device.promise<@__device_5>
    %57624 = torch_c.from_builtin_tensor %57623 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57625 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55143 = arith.constant 1 : index
    %dim_55144 = tensor.dim %57625, %c1_55143 : tensor<4x?x4096xf16>
    %57626 = flow.tensor.transfer %57625 : tensor<4x?x4096xf16>{%dim_55144} to #hal.device.promise<@__device_5>
    %57627 = torch_c.from_builtin_tensor %57626 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57628 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55145 = arith.constant 1 : index
    %dim_55146 = tensor.dim %57628, %c1_55145 : tensor<4x?x4096xf16>
    %57629 = flow.tensor.transfer %57628 : tensor<4x?x4096xf16>{%dim_55146} to #hal.device.promise<@__device_5>
    %57630 = torch_c.from_builtin_tensor %57629 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57631 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55147 = arith.constant 1 : index
    %dim_55148 = tensor.dim %57631, %c1_55147 : tensor<4x?x4096xf16>
    %57632 = flow.tensor.transfer %57631 : tensor<4x?x4096xf16>{%dim_55148} to #hal.device.promise<@__device_5>
    %57633 = torch_c.from_builtin_tensor %57632 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57634 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55149 = arith.constant 1 : index
    %dim_55150 = tensor.dim %57634, %c1_55149 : tensor<4x?x4096xf16>
    %57635 = flow.tensor.transfer %57634 : tensor<4x?x4096xf16>{%dim_55150} to #hal.device.promise<@__device_5>
    %57636 = torch_c.from_builtin_tensor %57635 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57637 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55151 = arith.constant 1 : index
    %dim_55152 = tensor.dim %57637, %c1_55151 : tensor<4x?x4096xf16>
    %57638 = flow.tensor.transfer %57637 : tensor<4x?x4096xf16>{%dim_55152} to #hal.device.promise<@__device_5>
    %57639 = torch_c.from_builtin_tensor %57638 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57640 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55153 = arith.constant 1 : index
    %dim_55154 = tensor.dim %57640, %c1_55153 : tensor<4x?x4096xf16>
    %57641 = flow.tensor.transfer %57640 : tensor<4x?x4096xf16>{%dim_55154} to #hal.device.promise<@__device_5>
    %57642 = torch_c.from_builtin_tensor %57641 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55155 = torch.constant.int 1
    %57643 = torch.aten.add.Tensor %57624, %57627, %int1_55155 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55156 = torch.constant.int 1
    %57644 = torch.aten.add.Tensor %57643, %57630, %int1_55156 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55157 = torch.constant.int 1
    %57645 = torch.aten.add.Tensor %57644, %57633, %int1_55157 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55158 = torch.constant.int 1
    %57646 = torch.aten.add.Tensor %57645, %57636, %int1_55158 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55159 = torch.constant.int 1
    %57647 = torch.aten.add.Tensor %57646, %57469, %int1_55159 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55160 = torch.constant.int 1
    %57648 = torch.aten.add.Tensor %57647, %57639, %int1_55160 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55161 = torch.constant.int 1
    %57649 = torch.aten.add.Tensor %57648, %57642, %int1_55161 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57650 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55162 = arith.constant 1 : index
    %dim_55163 = tensor.dim %57650, %c1_55162 : tensor<4x?x4096xf16>
    %57651 = flow.tensor.transfer %57650 : tensor<4x?x4096xf16>{%dim_55163} to #hal.device.promise<@__device_6>
    %57652 = torch_c.from_builtin_tensor %57651 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57652, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57653 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55164 = arith.constant 1 : index
    %dim_55165 = tensor.dim %57653, %c1_55164 : tensor<4x?x4096xf16>
    %57654 = flow.tensor.transfer %57653 : tensor<4x?x4096xf16>{%dim_55165} to #hal.device.promise<@__device_6>
    %57655 = torch_c.from_builtin_tensor %57654 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57655, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57656 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55166 = arith.constant 1 : index
    %dim_55167 = tensor.dim %57656, %c1_55166 : tensor<4x?x4096xf16>
    %57657 = flow.tensor.transfer %57656 : tensor<4x?x4096xf16>{%dim_55167} to #hal.device.promise<@__device_6>
    %57658 = torch_c.from_builtin_tensor %57657 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57659 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55168 = arith.constant 1 : index
    %dim_55169 = tensor.dim %57659, %c1_55168 : tensor<4x?x4096xf16>
    %57660 = flow.tensor.transfer %57659 : tensor<4x?x4096xf16>{%dim_55169} to #hal.device.promise<@__device_6>
    %57661 = torch_c.from_builtin_tensor %57660 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57662 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55170 = arith.constant 1 : index
    %dim_55171 = tensor.dim %57662, %c1_55170 : tensor<4x?x4096xf16>
    %57663 = flow.tensor.transfer %57662 : tensor<4x?x4096xf16>{%dim_55171} to #hal.device.promise<@__device_6>
    %57664 = torch_c.from_builtin_tensor %57663 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57664, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57665 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55172 = arith.constant 1 : index
    %dim_55173 = tensor.dim %57665, %c1_55172 : tensor<4x?x4096xf16>
    %57666 = flow.tensor.transfer %57665 : tensor<4x?x4096xf16>{%dim_55173} to #hal.device.promise<@__device_6>
    %57667 = torch_c.from_builtin_tensor %57666 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57667, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57668 = torch_c.to_builtin_tensor %57481 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55174 = arith.constant 1 : index
    %dim_55175 = tensor.dim %57668, %c1_55174 : tensor<4x?x4096xf16>
    %57669 = flow.tensor.transfer %57668 : tensor<4x?x4096xf16>{%dim_55175} to #hal.device.promise<@__device_6>
    %57670 = torch_c.from_builtin_tensor %57669 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57670, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55176 = torch.constant.int 1
    %57671 = torch.aten.add.Tensor %57652, %57655, %int1_55176 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57671, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55177 = torch.constant.int 1
    %57672 = torch.aten.add.Tensor %57671, %57658, %int1_55177 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55178 = torch.constant.int 1
    %57673 = torch.aten.add.Tensor %57672, %57661, %int1_55178 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55179 = torch.constant.int 1
    %57674 = torch.aten.add.Tensor %57673, %57664, %int1_55179 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55180 = torch.constant.int 1
    %57675 = torch.aten.add.Tensor %57674, %57667, %int1_55180 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57675, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55181 = torch.constant.int 1
    %57676 = torch.aten.add.Tensor %57675, %57475, %int1_55181 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55182 = torch.constant.int 1
    %57677 = torch.aten.add.Tensor %57676, %57670, %int1_55182 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57678 = torch_c.to_builtin_tensor %57439 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55183 = arith.constant 1 : index
    %dim_55184 = tensor.dim %57678, %c1_55183 : tensor<4x?x4096xf16>
    %57679 = flow.tensor.transfer %57678 : tensor<4x?x4096xf16>{%dim_55184} to #hal.device.promise<@__device_7>
    %57680 = torch_c.from_builtin_tensor %57679 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57681 = torch_c.to_builtin_tensor %57445 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55185 = arith.constant 1 : index
    %dim_55186 = tensor.dim %57681, %c1_55185 : tensor<4x?x4096xf16>
    %57682 = flow.tensor.transfer %57681 : tensor<4x?x4096xf16>{%dim_55186} to #hal.device.promise<@__device_7>
    %57683 = torch_c.from_builtin_tensor %57682 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57684 = torch_c.to_builtin_tensor %57451 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55187 = arith.constant 1 : index
    %dim_55188 = tensor.dim %57684, %c1_55187 : tensor<4x?x4096xf16>
    %57685 = flow.tensor.transfer %57684 : tensor<4x?x4096xf16>{%dim_55188} to #hal.device.promise<@__device_7>
    %57686 = torch_c.from_builtin_tensor %57685 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57687 = torch_c.to_builtin_tensor %57457 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55189 = arith.constant 1 : index
    %dim_55190 = tensor.dim %57687, %c1_55189 : tensor<4x?x4096xf16>
    %57688 = flow.tensor.transfer %57687 : tensor<4x?x4096xf16>{%dim_55190} to #hal.device.promise<@__device_7>
    %57689 = torch_c.from_builtin_tensor %57688 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57689, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57690 = torch_c.to_builtin_tensor %57463 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55191 = arith.constant 1 : index
    %dim_55192 = tensor.dim %57690, %c1_55191 : tensor<4x?x4096xf16>
    %57691 = flow.tensor.transfer %57690 : tensor<4x?x4096xf16>{%dim_55192} to #hal.device.promise<@__device_7>
    %57692 = torch_c.from_builtin_tensor %57691 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57693 = torch_c.to_builtin_tensor %57469 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55193 = arith.constant 1 : index
    %dim_55194 = tensor.dim %57693, %c1_55193 : tensor<4x?x4096xf16>
    %57694 = flow.tensor.transfer %57693 : tensor<4x?x4096xf16>{%dim_55194} to #hal.device.promise<@__device_7>
    %57695 = torch_c.from_builtin_tensor %57694 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %57696 = torch_c.to_builtin_tensor %57475 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55195 = arith.constant 1 : index
    %dim_55196 = tensor.dim %57696, %c1_55195 : tensor<4x?x4096xf16>
    %57697 = flow.tensor.transfer %57696 : tensor<4x?x4096xf16>{%dim_55196} to #hal.device.promise<@__device_7>
    %57698 = torch_c.from_builtin_tensor %57697 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55197 = torch.constant.int 1
    %57699 = torch.aten.add.Tensor %57680, %57683, %int1_55197 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55198 = torch.constant.int 1
    %57700 = torch.aten.add.Tensor %57699, %57686, %int1_55198 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57700, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55199 = torch.constant.int 1
    %57701 = torch.aten.add.Tensor %57700, %57689, %int1_55199 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57701, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55200 = torch.constant.int 1
    %57702 = torch.aten.add.Tensor %57701, %57692, %int1_55200 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55201 = torch.constant.int 1
    %57703 = torch.aten.add.Tensor %57702, %57695, %int1_55201 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55202 = torch.constant.int 1
    %57704 = torch.aten.add.Tensor %57703, %57698, %int1_55202 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55203 = torch.constant.int 1
    %57705 = torch.aten.add.Tensor %57704, %57481, %int1_55203 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57705, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55204 = torch.constant.int 1
    %57706 = torch.aten.add.Tensor %56365, %57509, %int1_55204 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55205 = torch.constant.int 1
    %57707 = torch.aten.add.Tensor %56366, %57537, %int1_55205 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55206 = torch.constant.int 1
    %57708 = torch.aten.add.Tensor %56367, %57565, %int1_55206 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57708, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55207 = torch.constant.int 1
    %57709 = torch.aten.add.Tensor %56368, %57593, %int1_55207 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55208 = torch.constant.int 1
    %57710 = torch.aten.add.Tensor %56369, %57621, %int1_55208 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55209 = torch.constant.int 1
    %57711 = torch.aten.add.Tensor %56370, %57649, %int1_55209 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55210 = torch.constant.int 1
    %57712 = torch.aten.add.Tensor %56371, %57677, %int1_55210 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55211 = torch.constant.int 1
    %57713 = torch.aten.add.Tensor %56372, %57705, %int1_55211 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_55212 = torch.constant.int 6
    %57714 = torch.prims.convert_element_type %57706, %int6_55212 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55213 = torch.constant.int 6
    %57715 = torch.prims.convert_element_type %57707, %int6_55213 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55214 = torch.constant.int 6
    %57716 = torch.prims.convert_element_type %57708, %int6_55214 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55215 = torch.constant.int 6
    %57717 = torch.prims.convert_element_type %57709, %int6_55215 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55216 = torch.constant.int 6
    %57718 = torch.prims.convert_element_type %57710, %int6_55216 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55217 = torch.constant.int 6
    %57719 = torch.prims.convert_element_type %57711, %int6_55217 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57719, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55218 = torch.constant.int 6
    %57720 = torch.prims.convert_element_type %57712, %int6_55218 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57720, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55219 = torch.constant.int 6
    %57721 = torch.prims.convert_element_type %57713, %int6_55219 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55220 = torch.constant.int 2
    %57722 = torch.aten.pow.Tensor_Scalar %57714, %int2_55220 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55221 = torch.constant.int 2
    %57723 = torch.aten.pow.Tensor_Scalar %57715, %int2_55221 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55222 = torch.constant.int 2
    %57724 = torch.aten.pow.Tensor_Scalar %57716, %int2_55222 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57724, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55223 = torch.constant.int 2
    %57725 = torch.aten.pow.Tensor_Scalar %57717, %int2_55223 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57725, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55224 = torch.constant.int 2
    %57726 = torch.aten.pow.Tensor_Scalar %57718, %int2_55224 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55225 = torch.constant.int 2
    %57727 = torch.aten.pow.Tensor_Scalar %57719, %int2_55225 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57727, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55226 = torch.constant.int 2
    %57728 = torch.aten.pow.Tensor_Scalar %57720, %int2_55226 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57728, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55227 = torch.constant.int 2
    %57729 = torch.aten.pow.Tensor_Scalar %57721, %int2_55227 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_55228 = torch.constant.int -1
    %57730 = torch.prim.ListConstruct %int-1_55228 : (!torch.int) -> !torch.list<int>
    %true_55229 = torch.constant.bool true
    %none_55230 = torch.constant.none
    %57731 = torch.aten.mean.dim %57722, %57730, %true_55229, %none_55230 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57731, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55231 = torch.constant.int -1
    %57732 = torch.prim.ListConstruct %int-1_55231 : (!torch.int) -> !torch.list<int>
    %true_55232 = torch.constant.bool true
    %none_55233 = torch.constant.none
    %57733 = torch.aten.mean.dim %57723, %57732, %true_55232, %none_55233 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57733, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55234 = torch.constant.int -1
    %57734 = torch.prim.ListConstruct %int-1_55234 : (!torch.int) -> !torch.list<int>
    %true_55235 = torch.constant.bool true
    %none_55236 = torch.constant.none
    %57735 = torch.aten.mean.dim %57724, %57734, %true_55235, %none_55236 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55237 = torch.constant.int -1
    %57736 = torch.prim.ListConstruct %int-1_55237 : (!torch.int) -> !torch.list<int>
    %true_55238 = torch.constant.bool true
    %none_55239 = torch.constant.none
    %57737 = torch.aten.mean.dim %57725, %57736, %true_55238, %none_55239 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57737, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55240 = torch.constant.int -1
    %57738 = torch.prim.ListConstruct %int-1_55240 : (!torch.int) -> !torch.list<int>
    %true_55241 = torch.constant.bool true
    %none_55242 = torch.constant.none
    %57739 = torch.aten.mean.dim %57726, %57738, %true_55241, %none_55242 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57739, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55243 = torch.constant.int -1
    %57740 = torch.prim.ListConstruct %int-1_55243 : (!torch.int) -> !torch.list<int>
    %true_55244 = torch.constant.bool true
    %none_55245 = torch.constant.none
    %57741 = torch.aten.mean.dim %57727, %57740, %true_55244, %none_55245 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55246 = torch.constant.int -1
    %57742 = torch.prim.ListConstruct %int-1_55246 : (!torch.int) -> !torch.list<int>
    %true_55247 = torch.constant.bool true
    %none_55248 = torch.constant.none
    %57743 = torch.aten.mean.dim %57728, %57742, %true_55247, %none_55248 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57743, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55249 = torch.constant.int -1
    %57744 = torch.prim.ListConstruct %int-1_55249 : (!torch.int) -> !torch.list<int>
    %true_55250 = torch.constant.bool true
    %none_55251 = torch.constant.none
    %57745 = torch.aten.mean.dim %57729, %57744, %true_55250, %none_55251 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55252 = torch.constant.float 9.9999997473787516E-6
    %int1_55253 = torch.constant.int 1
    %57746 = torch.aten.add.Scalar %57731, %float9.999990e-06_55252, %int1_55253 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55254 = torch.constant.float 9.9999997473787516E-6
    %int1_55255 = torch.constant.int 1
    %57747 = torch.aten.add.Scalar %57733, %float9.999990e-06_55254, %int1_55255 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55256 = torch.constant.float 9.9999997473787516E-6
    %int1_55257 = torch.constant.int 1
    %57748 = torch.aten.add.Scalar %57735, %float9.999990e-06_55256, %int1_55257 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55258 = torch.constant.float 9.9999997473787516E-6
    %int1_55259 = torch.constant.int 1
    %57749 = torch.aten.add.Scalar %57737, %float9.999990e-06_55258, %int1_55259 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55260 = torch.constant.float 9.9999997473787516E-6
    %int1_55261 = torch.constant.int 1
    %57750 = torch.aten.add.Scalar %57739, %float9.999990e-06_55260, %int1_55261 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55262 = torch.constant.float 9.9999997473787516E-6
    %int1_55263 = torch.constant.int 1
    %57751 = torch.aten.add.Scalar %57741, %float9.999990e-06_55262, %int1_55263 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55264 = torch.constant.float 9.9999997473787516E-6
    %int1_55265 = torch.constant.int 1
    %57752 = torch.aten.add.Scalar %57743, %float9.999990e-06_55264, %int1_55265 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55266 = torch.constant.float 9.9999997473787516E-6
    %int1_55267 = torch.constant.int 1
    %57753 = torch.aten.add.Scalar %57745, %float9.999990e-06_55266, %int1_55267 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57753, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57754 = torch.aten.rsqrt %57746 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57755 = torch.aten.rsqrt %57747 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57756 = torch.aten.rsqrt %57748 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57757 = torch.aten.rsqrt %57749 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57758 = torch.aten.rsqrt %57750 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57759 = torch.aten.rsqrt %57751 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57759, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57760 = torch.aten.rsqrt %57752 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57761 = torch.aten.rsqrt %57753 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %57761, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %57762 = torch.aten.mul.Tensor %57714, %57754 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57762, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57763 = torch.aten.mul.Tensor %57715, %57755 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57764 = torch.aten.mul.Tensor %57716, %57756 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57765 = torch.aten.mul.Tensor %57717, %57757 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57765, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57766 = torch.aten.mul.Tensor %57718, %57758 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57767 = torch.aten.mul.Tensor %57719, %57759 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57768 = torch.aten.mul.Tensor %57720, %57760 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57768, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57769 = torch.aten.mul.Tensor %57721, %57761 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57770 = torch.aten.mul.Tensor %2136, %57762 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57771 = torch.aten.mul.Tensor %2137, %57763 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57772 = torch.aten.mul.Tensor %2138, %57764 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57773 = torch.aten.mul.Tensor %2139, %57765 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57774 = torch.aten.mul.Tensor %2140, %57766 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57775 = torch.aten.mul.Tensor %2141, %57767 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57776 = torch.aten.mul.Tensor %2142, %57768 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %57777 = torch.aten.mul.Tensor %2143, %57769 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %57777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_55268 = torch.constant.int 5
    %57778 = torch.prims.convert_element_type %57770, %int5_55268 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55269 = torch.constant.int 5
    %57779 = torch.prims.convert_element_type %57771, %int5_55269 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55270 = torch.constant.int 5
    %57780 = torch.prims.convert_element_type %57772, %int5_55270 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57780, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55271 = torch.constant.int 5
    %57781 = torch.prims.convert_element_type %57773, %int5_55271 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55272 = torch.constant.int 5
    %57782 = torch.prims.convert_element_type %57774, %int5_55272 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55273 = torch.constant.int 5
    %57783 = torch.prims.convert_element_type %57775, %int5_55273 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55274 = torch.constant.int 5
    %57784 = torch.prims.convert_element_type %57776, %int5_55274 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55275 = torch.constant.int 5
    %57785 = torch.prims.convert_element_type %57777, %int5_55275 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55276 = torch.constant.int 1
    %int0_55277 = torch.constant.int 0
    %57786 = torch.prim.ListConstruct %int1_55276, %int0_55277 : (!torch.int, !torch.int) -> !torch.list<int>
    %57787 = torch.aten.permute %2144, %57786 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55278 = torch.constant.int 1
    %int0_55279 = torch.constant.int 0
    %57788 = torch.prim.ListConstruct %int1_55278, %int0_55279 : (!torch.int, !torch.int) -> !torch.list<int>
    %57789 = torch.aten.permute %2145, %57788 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55280 = torch.constant.int 1
    %int0_55281 = torch.constant.int 0
    %57790 = torch.prim.ListConstruct %int1_55280, %int0_55281 : (!torch.int, !torch.int) -> !torch.list<int>
    %57791 = torch.aten.permute %2146, %57790 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55282 = torch.constant.int 1
    %int0_55283 = torch.constant.int 0
    %57792 = torch.prim.ListConstruct %int1_55282, %int0_55283 : (!torch.int, !torch.int) -> !torch.list<int>
    %57793 = torch.aten.permute %2147, %57792 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55284 = torch.constant.int 1
    %int0_55285 = torch.constant.int 0
    %57794 = torch.prim.ListConstruct %int1_55284, %int0_55285 : (!torch.int, !torch.int) -> !torch.list<int>
    %57795 = torch.aten.permute %2148, %57794 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55286 = torch.constant.int 1
    %int0_55287 = torch.constant.int 0
    %57796 = torch.prim.ListConstruct %int1_55286, %int0_55287 : (!torch.int, !torch.int) -> !torch.list<int>
    %57797 = torch.aten.permute %2149, %57796 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55288 = torch.constant.int 1
    %int0_55289 = torch.constant.int 0
    %57798 = torch.prim.ListConstruct %int1_55288, %int0_55289 : (!torch.int, !torch.int) -> !torch.list<int>
    %57799 = torch.aten.permute %2150, %57798 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55290 = torch.constant.int 1
    %int0_55291 = torch.constant.int 0
    %57800 = torch.prim.ListConstruct %int1_55290, %int0_55291 : (!torch.int, !torch.int) -> !torch.list<int>
    %57801 = torch.aten.permute %2151, %57800 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_55292 = torch.constant.int 4
    %57802 = torch.aten.mul.int %int4_55292, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55293 = torch.constant.int 4096
    %57803 = torch.prim.ListConstruct %57802, %int4096_55293 : (!torch.int, !torch.int) -> !torch.list<int>
    %57804 = torch.aten.view %57778, %57803 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57804, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57805 = torch.aten.mm %57804, %57787 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57805, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55294 = torch.constant.int 4
    %int1792_55295 = torch.constant.int 1792
    %57806 = torch.prim.ListConstruct %int4_55294, %2482, %int1792_55295 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57807 = torch.aten.view %57805, %57806 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55296 = torch.constant.int 4
    %57808 = torch.aten.mul.int %int4_55296, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55297 = torch.constant.int 4096
    %57809 = torch.prim.ListConstruct %57808, %int4096_55297 : (!torch.int, !torch.int) -> !torch.list<int>
    %57810 = torch.aten.view %57779, %57809 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57810, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57811 = torch.aten.mm %57810, %57789 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57811, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55298 = torch.constant.int 4
    %int1792_55299 = torch.constant.int 1792
    %57812 = torch.prim.ListConstruct %int4_55298, %2482, %int1792_55299 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57813 = torch.aten.view %57811, %57812 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55300 = torch.constant.int 4
    %57814 = torch.aten.mul.int %int4_55300, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55301 = torch.constant.int 4096
    %57815 = torch.prim.ListConstruct %57814, %int4096_55301 : (!torch.int, !torch.int) -> !torch.list<int>
    %57816 = torch.aten.view %57780, %57815 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57816, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57817 = torch.aten.mm %57816, %57791 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57817, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55302 = torch.constant.int 4
    %int1792_55303 = torch.constant.int 1792
    %57818 = torch.prim.ListConstruct %int4_55302, %2482, %int1792_55303 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57819 = torch.aten.view %57817, %57818 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55304 = torch.constant.int 4
    %57820 = torch.aten.mul.int %int4_55304, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55305 = torch.constant.int 4096
    %57821 = torch.prim.ListConstruct %57820, %int4096_55305 : (!torch.int, !torch.int) -> !torch.list<int>
    %57822 = torch.aten.view %57781, %57821 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57822, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57823 = torch.aten.mm %57822, %57793 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57823, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55306 = torch.constant.int 4
    %int1792_55307 = torch.constant.int 1792
    %57824 = torch.prim.ListConstruct %int4_55306, %2482, %int1792_55307 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57825 = torch.aten.view %57823, %57824 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55308 = torch.constant.int 4
    %57826 = torch.aten.mul.int %int4_55308, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55309 = torch.constant.int 4096
    %57827 = torch.prim.ListConstruct %57826, %int4096_55309 : (!torch.int, !torch.int) -> !torch.list<int>
    %57828 = torch.aten.view %57782, %57827 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57828, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57829 = torch.aten.mm %57828, %57795 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57829, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55310 = torch.constant.int 4
    %int1792_55311 = torch.constant.int 1792
    %57830 = torch.prim.ListConstruct %int4_55310, %2482, %int1792_55311 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57831 = torch.aten.view %57829, %57830 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55312 = torch.constant.int 4
    %57832 = torch.aten.mul.int %int4_55312, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55313 = torch.constant.int 4096
    %57833 = torch.prim.ListConstruct %57832, %int4096_55313 : (!torch.int, !torch.int) -> !torch.list<int>
    %57834 = torch.aten.view %57783, %57833 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57834, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57835 = torch.aten.mm %57834, %57797 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57835, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55314 = torch.constant.int 4
    %int1792_55315 = torch.constant.int 1792
    %57836 = torch.prim.ListConstruct %int4_55314, %2482, %int1792_55315 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57837 = torch.aten.view %57835, %57836 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57837, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55316 = torch.constant.int 4
    %57838 = torch.aten.mul.int %int4_55316, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55317 = torch.constant.int 4096
    %57839 = torch.prim.ListConstruct %57838, %int4096_55317 : (!torch.int, !torch.int) -> !torch.list<int>
    %57840 = torch.aten.view %57784, %57839 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57840, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57841 = torch.aten.mm %57840, %57799 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57841, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55318 = torch.constant.int 4
    %int1792_55319 = torch.constant.int 1792
    %57842 = torch.prim.ListConstruct %int4_55318, %2482, %int1792_55319 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57843 = torch.aten.view %57841, %57842 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57843, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55320 = torch.constant.int 4
    %57844 = torch.aten.mul.int %int4_55320, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55321 = torch.constant.int 4096
    %57845 = torch.prim.ListConstruct %57844, %int4096_55321 : (!torch.int, !torch.int) -> !torch.list<int>
    %57846 = torch.aten.view %57785, %57845 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57846, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57847 = torch.aten.mm %57846, %57801 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57847, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55322 = torch.constant.int 4
    %int1792_55323 = torch.constant.int 1792
    %57848 = torch.prim.ListConstruct %int4_55322, %2482, %int1792_55323 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57849 = torch.aten.view %57847, %57848 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57849, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57850 = torch.aten.silu %57807 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57851 = torch.aten.silu %57813 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57851, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57852 = torch.aten.silu %57819 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57852, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57853 = torch.aten.silu %57825 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57854 = torch.aten.silu %57831 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57854, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57855 = torch.aten.silu %57837 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57856 = torch.aten.silu %57843 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57857 = torch.aten.silu %57849 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_55324 = torch.constant.int 1
    %int0_55325 = torch.constant.int 0
    %57858 = torch.prim.ListConstruct %int1_55324, %int0_55325 : (!torch.int, !torch.int) -> !torch.list<int>
    %57859 = torch.aten.permute %2152, %57858 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55326 = torch.constant.int 1
    %int0_55327 = torch.constant.int 0
    %57860 = torch.prim.ListConstruct %int1_55326, %int0_55327 : (!torch.int, !torch.int) -> !torch.list<int>
    %57861 = torch.aten.permute %2153, %57860 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55328 = torch.constant.int 1
    %int0_55329 = torch.constant.int 0
    %57862 = torch.prim.ListConstruct %int1_55328, %int0_55329 : (!torch.int, !torch.int) -> !torch.list<int>
    %57863 = torch.aten.permute %2154, %57862 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55330 = torch.constant.int 1
    %int0_55331 = torch.constant.int 0
    %57864 = torch.prim.ListConstruct %int1_55330, %int0_55331 : (!torch.int, !torch.int) -> !torch.list<int>
    %57865 = torch.aten.permute %2155, %57864 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55332 = torch.constant.int 1
    %int0_55333 = torch.constant.int 0
    %57866 = torch.prim.ListConstruct %int1_55332, %int0_55333 : (!torch.int, !torch.int) -> !torch.list<int>
    %57867 = torch.aten.permute %2156, %57866 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55334 = torch.constant.int 1
    %int0_55335 = torch.constant.int 0
    %57868 = torch.prim.ListConstruct %int1_55334, %int0_55335 : (!torch.int, !torch.int) -> !torch.list<int>
    %57869 = torch.aten.permute %2157, %57868 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55336 = torch.constant.int 1
    %int0_55337 = torch.constant.int 0
    %57870 = torch.prim.ListConstruct %int1_55336, %int0_55337 : (!torch.int, !torch.int) -> !torch.list<int>
    %57871 = torch.aten.permute %2158, %57870 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_55338 = torch.constant.int 1
    %int0_55339 = torch.constant.int 0
    %57872 = torch.prim.ListConstruct %int1_55338, %int0_55339 : (!torch.int, !torch.int) -> !torch.list<int>
    %57873 = torch.aten.permute %2159, %57872 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_55340 = torch.constant.int 4
    %57874 = torch.aten.mul.int %int4_55340, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55341 = torch.constant.int 4096
    %57875 = torch.prim.ListConstruct %57874, %int4096_55341 : (!torch.int, !torch.int) -> !torch.list<int>
    %57876 = torch.aten.view %57778, %57875 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57876, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57877 = torch.aten.mm %57876, %57859 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57877, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55342 = torch.constant.int 4
    %int1792_55343 = torch.constant.int 1792
    %57878 = torch.prim.ListConstruct %int4_55342, %2482, %int1792_55343 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57879 = torch.aten.view %57877, %57878 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57879, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55344 = torch.constant.int 4
    %57880 = torch.aten.mul.int %int4_55344, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55345 = torch.constant.int 4096
    %57881 = torch.prim.ListConstruct %57880, %int4096_55345 : (!torch.int, !torch.int) -> !torch.list<int>
    %57882 = torch.aten.view %57779, %57881 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57882, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57883 = torch.aten.mm %57882, %57861 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57883, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55346 = torch.constant.int 4
    %int1792_55347 = torch.constant.int 1792
    %57884 = torch.prim.ListConstruct %int4_55346, %2482, %int1792_55347 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57885 = torch.aten.view %57883, %57884 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55348 = torch.constant.int 4
    %57886 = torch.aten.mul.int %int4_55348, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55349 = torch.constant.int 4096
    %57887 = torch.prim.ListConstruct %57886, %int4096_55349 : (!torch.int, !torch.int) -> !torch.list<int>
    %57888 = torch.aten.view %57780, %57887 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57888, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57889 = torch.aten.mm %57888, %57863 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57889, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55350 = torch.constant.int 4
    %int1792_55351 = torch.constant.int 1792
    %57890 = torch.prim.ListConstruct %int4_55350, %2482, %int1792_55351 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57891 = torch.aten.view %57889, %57890 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55352 = torch.constant.int 4
    %57892 = torch.aten.mul.int %int4_55352, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55353 = torch.constant.int 4096
    %57893 = torch.prim.ListConstruct %57892, %int4096_55353 : (!torch.int, !torch.int) -> !torch.list<int>
    %57894 = torch.aten.view %57781, %57893 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57894, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57895 = torch.aten.mm %57894, %57865 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57895, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55354 = torch.constant.int 4
    %int1792_55355 = torch.constant.int 1792
    %57896 = torch.prim.ListConstruct %int4_55354, %2482, %int1792_55355 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57897 = torch.aten.view %57895, %57896 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55356 = torch.constant.int 4
    %57898 = torch.aten.mul.int %int4_55356, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55357 = torch.constant.int 4096
    %57899 = torch.prim.ListConstruct %57898, %int4096_55357 : (!torch.int, !torch.int) -> !torch.list<int>
    %57900 = torch.aten.view %57782, %57899 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57900, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57901 = torch.aten.mm %57900, %57867 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57901, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55358 = torch.constant.int 4
    %int1792_55359 = torch.constant.int 1792
    %57902 = torch.prim.ListConstruct %int4_55358, %2482, %int1792_55359 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57903 = torch.aten.view %57901, %57902 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55360 = torch.constant.int 4
    %57904 = torch.aten.mul.int %int4_55360, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55361 = torch.constant.int 4096
    %57905 = torch.prim.ListConstruct %57904, %int4096_55361 : (!torch.int, !torch.int) -> !torch.list<int>
    %57906 = torch.aten.view %57783, %57905 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57906, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57907 = torch.aten.mm %57906, %57869 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57907, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55362 = torch.constant.int 4
    %int1792_55363 = torch.constant.int 1792
    %57908 = torch.prim.ListConstruct %int4_55362, %2482, %int1792_55363 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57909 = torch.aten.view %57907, %57908 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55364 = torch.constant.int 4
    %57910 = torch.aten.mul.int %int4_55364, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55365 = torch.constant.int 4096
    %57911 = torch.prim.ListConstruct %57910, %int4096_55365 : (!torch.int, !torch.int) -> !torch.list<int>
    %57912 = torch.aten.view %57784, %57911 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57912, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57913 = torch.aten.mm %57912, %57871 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57913, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55366 = torch.constant.int 4
    %int1792_55367 = torch.constant.int 1792
    %57914 = torch.prim.ListConstruct %int4_55366, %2482, %int1792_55367 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57915 = torch.aten.view %57913, %57914 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_55368 = torch.constant.int 4
    %57916 = torch.aten.mul.int %int4_55368, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55369 = torch.constant.int 4096
    %57917 = torch.prim.ListConstruct %57916, %int4096_55369 : (!torch.int, !torch.int) -> !torch.list<int>
    %57918 = torch.aten.view %57785, %57917 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57918, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %57919 = torch.aten.mm %57918, %57873 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57919, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_55370 = torch.constant.int 4
    %int1792_55371 = torch.constant.int 1792
    %57920 = torch.prim.ListConstruct %int4_55370, %2482, %int1792_55371 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57921 = torch.aten.view %57919, %57920 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57922 = torch.aten.mul.Tensor %57850, %57879 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57923 = torch.aten.mul.Tensor %57851, %57885 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57923, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57924 = torch.aten.mul.Tensor %57852, %57891 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57925 = torch.aten.mul.Tensor %57853, %57897 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57926 = torch.aten.mul.Tensor %57854, %57903 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57926, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57927 = torch.aten.mul.Tensor %57855, %57909 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57928 = torch.aten.mul.Tensor %57856, %57915 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %57929 = torch.aten.mul.Tensor %57857, %57921 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %57929, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_55372 = torch.constant.int 1
    %int0_55373 = torch.constant.int 0
    %57930 = torch.prim.ListConstruct %int1_55372, %int0_55373 : (!torch.int, !torch.int) -> !torch.list<int>
    %57931 = torch.aten.permute %2160, %57930 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55374 = torch.constant.int 1
    %int0_55375 = torch.constant.int 0
    %57932 = torch.prim.ListConstruct %int1_55374, %int0_55375 : (!torch.int, !torch.int) -> !torch.list<int>
    %57933 = torch.aten.permute %2161, %57932 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55376 = torch.constant.int 1
    %int0_55377 = torch.constant.int 0
    %57934 = torch.prim.ListConstruct %int1_55376, %int0_55377 : (!torch.int, !torch.int) -> !torch.list<int>
    %57935 = torch.aten.permute %2162, %57934 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55378 = torch.constant.int 1
    %int0_55379 = torch.constant.int 0
    %57936 = torch.prim.ListConstruct %int1_55378, %int0_55379 : (!torch.int, !torch.int) -> !torch.list<int>
    %57937 = torch.aten.permute %2163, %57936 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55380 = torch.constant.int 1
    %int0_55381 = torch.constant.int 0
    %57938 = torch.prim.ListConstruct %int1_55380, %int0_55381 : (!torch.int, !torch.int) -> !torch.list<int>
    %57939 = torch.aten.permute %2164, %57938 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55382 = torch.constant.int 1
    %int0_55383 = torch.constant.int 0
    %57940 = torch.prim.ListConstruct %int1_55382, %int0_55383 : (!torch.int, !torch.int) -> !torch.list<int>
    %57941 = torch.aten.permute %2165, %57940 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55384 = torch.constant.int 1
    %int0_55385 = torch.constant.int 0
    %57942 = torch.prim.ListConstruct %int1_55384, %int0_55385 : (!torch.int, !torch.int) -> !torch.list<int>
    %57943 = torch.aten.permute %2166, %57942 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55386 = torch.constant.int 1
    %int0_55387 = torch.constant.int 0
    %57944 = torch.prim.ListConstruct %int1_55386, %int0_55387 : (!torch.int, !torch.int) -> !torch.list<int>
    %57945 = torch.aten.permute %2167, %57944 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_55388 = torch.constant.int 1
    %57946 = torch.aten.size.int %57807, %int1_55388 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55389 = torch.constant.int 4
    %57947 = torch.aten.mul.int %int4_55389, %57946 : !torch.int, !torch.int -> !torch.int
    %int1792_55390 = torch.constant.int 1792
    %57948 = torch.prim.ListConstruct %57947, %int1792_55390 : (!torch.int, !torch.int) -> !torch.list<int>
    %57949 = torch.aten.view %57922, %57948 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57949, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57950 = torch.aten.mm %57949, %57931 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57950, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55391 = torch.constant.int 4
    %int4096_55392 = torch.constant.int 4096
    %57951 = torch.prim.ListConstruct %int4_55391, %57946, %int4096_55392 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57952 = torch.aten.view %57950, %57951 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55393 = torch.constant.int 1
    %57953 = torch.aten.size.int %57813, %int1_55393 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55394 = torch.constant.int 4
    %57954 = torch.aten.mul.int %int4_55394, %57953 : !torch.int, !torch.int -> !torch.int
    %int1792_55395 = torch.constant.int 1792
    %57955 = torch.prim.ListConstruct %57954, %int1792_55395 : (!torch.int, !torch.int) -> !torch.list<int>
    %57956 = torch.aten.view %57923, %57955 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57956, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57957 = torch.aten.mm %57956, %57933 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57957, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55396 = torch.constant.int 4
    %int4096_55397 = torch.constant.int 4096
    %57958 = torch.prim.ListConstruct %int4_55396, %57953, %int4096_55397 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57959 = torch.aten.view %57957, %57958 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55398 = torch.constant.int 1
    %57960 = torch.aten.size.int %57819, %int1_55398 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55399 = torch.constant.int 4
    %57961 = torch.aten.mul.int %int4_55399, %57960 : !torch.int, !torch.int -> !torch.int
    %int1792_55400 = torch.constant.int 1792
    %57962 = torch.prim.ListConstruct %57961, %int1792_55400 : (!torch.int, !torch.int) -> !torch.list<int>
    %57963 = torch.aten.view %57924, %57962 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57963, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57964 = torch.aten.mm %57963, %57935 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57964, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55401 = torch.constant.int 4
    %int4096_55402 = torch.constant.int 4096
    %57965 = torch.prim.ListConstruct %int4_55401, %57960, %int4096_55402 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57966 = torch.aten.view %57964, %57965 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55403 = torch.constant.int 1
    %57967 = torch.aten.size.int %57825, %int1_55403 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55404 = torch.constant.int 4
    %57968 = torch.aten.mul.int %int4_55404, %57967 : !torch.int, !torch.int -> !torch.int
    %int1792_55405 = torch.constant.int 1792
    %57969 = torch.prim.ListConstruct %57968, %int1792_55405 : (!torch.int, !torch.int) -> !torch.list<int>
    %57970 = torch.aten.view %57925, %57969 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57970, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57971 = torch.aten.mm %57970, %57937 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57971, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55406 = torch.constant.int 4
    %int4096_55407 = torch.constant.int 4096
    %57972 = torch.prim.ListConstruct %int4_55406, %57967, %int4096_55407 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57973 = torch.aten.view %57971, %57972 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55408 = torch.constant.int 1
    %57974 = torch.aten.size.int %57831, %int1_55408 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55409 = torch.constant.int 4
    %57975 = torch.aten.mul.int %int4_55409, %57974 : !torch.int, !torch.int -> !torch.int
    %int1792_55410 = torch.constant.int 1792
    %57976 = torch.prim.ListConstruct %57975, %int1792_55410 : (!torch.int, !torch.int) -> !torch.list<int>
    %57977 = torch.aten.view %57926, %57976 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57977, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57978 = torch.aten.mm %57977, %57939 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57978, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55411 = torch.constant.int 4
    %int4096_55412 = torch.constant.int 4096
    %57979 = torch.prim.ListConstruct %int4_55411, %57974, %int4096_55412 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57980 = torch.aten.view %57978, %57979 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55413 = torch.constant.int 1
    %57981 = torch.aten.size.int %57837, %int1_55413 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55414 = torch.constant.int 4
    %57982 = torch.aten.mul.int %int4_55414, %57981 : !torch.int, !torch.int -> !torch.int
    %int1792_55415 = torch.constant.int 1792
    %57983 = torch.prim.ListConstruct %57982, %int1792_55415 : (!torch.int, !torch.int) -> !torch.list<int>
    %57984 = torch.aten.view %57927, %57983 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57984, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57985 = torch.aten.mm %57984, %57941 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57985, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55416 = torch.constant.int 4
    %int4096_55417 = torch.constant.int 4096
    %57986 = torch.prim.ListConstruct %int4_55416, %57981, %int4096_55417 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57987 = torch.aten.view %57985, %57986 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55418 = torch.constant.int 1
    %57988 = torch.aten.size.int %57843, %int1_55418 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55419 = torch.constant.int 4
    %57989 = torch.aten.mul.int %int4_55419, %57988 : !torch.int, !torch.int -> !torch.int
    %int1792_55420 = torch.constant.int 1792
    %57990 = torch.prim.ListConstruct %57989, %int1792_55420 : (!torch.int, !torch.int) -> !torch.list<int>
    %57991 = torch.aten.view %57928, %57990 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57991, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57992 = torch.aten.mm %57991, %57943 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57992, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55421 = torch.constant.int 4
    %int4096_55422 = torch.constant.int 4096
    %57993 = torch.prim.ListConstruct %int4_55421, %57988, %int4096_55422 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %57994 = torch.aten.view %57992, %57993 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %57994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55423 = torch.constant.int 1
    %57995 = torch.aten.size.int %57849, %int1_55423 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_55424 = torch.constant.int 4
    %57996 = torch.aten.mul.int %int4_55424, %57995 : !torch.int, !torch.int -> !torch.int
    %int1792_55425 = torch.constant.int 1792
    %57997 = torch.prim.ListConstruct %57996, %int1792_55425 : (!torch.int, !torch.int) -> !torch.list<int>
    %57998 = torch.aten.view %57929, %57997 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %57998, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %57999 = torch.aten.mm %57998, %57945 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %57999, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_55426 = torch.constant.int 4
    %int4096_55427 = torch.constant.int 4096
    %58000 = torch.prim.ListConstruct %int4_55426, %57995, %int4096_55427 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58001 = torch.aten.view %57999, %58000 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58002 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55428 = arith.constant 1 : index
    %dim_55429 = tensor.dim %58002, %c1_55428 : tensor<4x?x4096xf16>
    %58003 = flow.tensor.transfer %58002 : tensor<4x?x4096xf16>{%dim_55429} to #hal.device.promise<@__device_0>
    %58004 = torch_c.from_builtin_tensor %58003 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58005 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55430 = arith.constant 1 : index
    %dim_55431 = tensor.dim %58005, %c1_55430 : tensor<4x?x4096xf16>
    %58006 = flow.tensor.transfer %58005 : tensor<4x?x4096xf16>{%dim_55431} to #hal.device.promise<@__device_0>
    %58007 = torch_c.from_builtin_tensor %58006 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58008 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55432 = arith.constant 1 : index
    %dim_55433 = tensor.dim %58008, %c1_55432 : tensor<4x?x4096xf16>
    %58009 = flow.tensor.transfer %58008 : tensor<4x?x4096xf16>{%dim_55433} to #hal.device.promise<@__device_0>
    %58010 = torch_c.from_builtin_tensor %58009 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58011 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55434 = arith.constant 1 : index
    %dim_55435 = tensor.dim %58011, %c1_55434 : tensor<4x?x4096xf16>
    %58012 = flow.tensor.transfer %58011 : tensor<4x?x4096xf16>{%dim_55435} to #hal.device.promise<@__device_0>
    %58013 = torch_c.from_builtin_tensor %58012 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58014 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55436 = arith.constant 1 : index
    %dim_55437 = tensor.dim %58014, %c1_55436 : tensor<4x?x4096xf16>
    %58015 = flow.tensor.transfer %58014 : tensor<4x?x4096xf16>{%dim_55437} to #hal.device.promise<@__device_0>
    %58016 = torch_c.from_builtin_tensor %58015 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58017 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55438 = arith.constant 1 : index
    %dim_55439 = tensor.dim %58017, %c1_55438 : tensor<4x?x4096xf16>
    %58018 = flow.tensor.transfer %58017 : tensor<4x?x4096xf16>{%dim_55439} to #hal.device.promise<@__device_0>
    %58019 = torch_c.from_builtin_tensor %58018 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58020 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55440 = arith.constant 1 : index
    %dim_55441 = tensor.dim %58020, %c1_55440 : tensor<4x?x4096xf16>
    %58021 = flow.tensor.transfer %58020 : tensor<4x?x4096xf16>{%dim_55441} to #hal.device.promise<@__device_0>
    %58022 = torch_c.from_builtin_tensor %58021 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55442 = torch.constant.int 1
    %58023 = torch.aten.add.Tensor %57952, %58004, %int1_55442 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55443 = torch.constant.int 1
    %58024 = torch.aten.add.Tensor %58023, %58007, %int1_55443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55444 = torch.constant.int 1
    %58025 = torch.aten.add.Tensor %58024, %58010, %int1_55444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55445 = torch.constant.int 1
    %58026 = torch.aten.add.Tensor %58025, %58013, %int1_55445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55446 = torch.constant.int 1
    %58027 = torch.aten.add.Tensor %58026, %58016, %int1_55446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55447 = torch.constant.int 1
    %58028 = torch.aten.add.Tensor %58027, %58019, %int1_55447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55448 = torch.constant.int 1
    %58029 = torch.aten.add.Tensor %58028, %58022, %int1_55448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58030 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55449 = arith.constant 1 : index
    %dim_55450 = tensor.dim %58030, %c1_55449 : tensor<4x?x4096xf16>
    %58031 = flow.tensor.transfer %58030 : tensor<4x?x4096xf16>{%dim_55450} to #hal.device.promise<@__device_1>
    %58032 = torch_c.from_builtin_tensor %58031 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58033 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55451 = arith.constant 1 : index
    %dim_55452 = tensor.dim %58033, %c1_55451 : tensor<4x?x4096xf16>
    %58034 = flow.tensor.transfer %58033 : tensor<4x?x4096xf16>{%dim_55452} to #hal.device.promise<@__device_1>
    %58035 = torch_c.from_builtin_tensor %58034 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58035, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58036 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55453 = arith.constant 1 : index
    %dim_55454 = tensor.dim %58036, %c1_55453 : tensor<4x?x4096xf16>
    %58037 = flow.tensor.transfer %58036 : tensor<4x?x4096xf16>{%dim_55454} to #hal.device.promise<@__device_1>
    %58038 = torch_c.from_builtin_tensor %58037 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58039 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55455 = arith.constant 1 : index
    %dim_55456 = tensor.dim %58039, %c1_55455 : tensor<4x?x4096xf16>
    %58040 = flow.tensor.transfer %58039 : tensor<4x?x4096xf16>{%dim_55456} to #hal.device.promise<@__device_1>
    %58041 = torch_c.from_builtin_tensor %58040 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58041, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58042 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55457 = arith.constant 1 : index
    %dim_55458 = tensor.dim %58042, %c1_55457 : tensor<4x?x4096xf16>
    %58043 = flow.tensor.transfer %58042 : tensor<4x?x4096xf16>{%dim_55458} to #hal.device.promise<@__device_1>
    %58044 = torch_c.from_builtin_tensor %58043 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58045 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55459 = arith.constant 1 : index
    %dim_55460 = tensor.dim %58045, %c1_55459 : tensor<4x?x4096xf16>
    %58046 = flow.tensor.transfer %58045 : tensor<4x?x4096xf16>{%dim_55460} to #hal.device.promise<@__device_1>
    %58047 = torch_c.from_builtin_tensor %58046 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58048 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55461 = arith.constant 1 : index
    %dim_55462 = tensor.dim %58048, %c1_55461 : tensor<4x?x4096xf16>
    %58049 = flow.tensor.transfer %58048 : tensor<4x?x4096xf16>{%dim_55462} to #hal.device.promise<@__device_1>
    %58050 = torch_c.from_builtin_tensor %58049 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55463 = torch.constant.int 1
    %58051 = torch.aten.add.Tensor %58032, %57959, %int1_55463 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55464 = torch.constant.int 1
    %58052 = torch.aten.add.Tensor %58051, %58035, %int1_55464 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55465 = torch.constant.int 1
    %58053 = torch.aten.add.Tensor %58052, %58038, %int1_55465 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55466 = torch.constant.int 1
    %58054 = torch.aten.add.Tensor %58053, %58041, %int1_55466 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55467 = torch.constant.int 1
    %58055 = torch.aten.add.Tensor %58054, %58044, %int1_55467 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55468 = torch.constant.int 1
    %58056 = torch.aten.add.Tensor %58055, %58047, %int1_55468 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55469 = torch.constant.int 1
    %58057 = torch.aten.add.Tensor %58056, %58050, %int1_55469 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58058 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55470 = arith.constant 1 : index
    %dim_55471 = tensor.dim %58058, %c1_55470 : tensor<4x?x4096xf16>
    %58059 = flow.tensor.transfer %58058 : tensor<4x?x4096xf16>{%dim_55471} to #hal.device.promise<@__device_2>
    %58060 = torch_c.from_builtin_tensor %58059 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58061 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55472 = arith.constant 1 : index
    %dim_55473 = tensor.dim %58061, %c1_55472 : tensor<4x?x4096xf16>
    %58062 = flow.tensor.transfer %58061 : tensor<4x?x4096xf16>{%dim_55473} to #hal.device.promise<@__device_2>
    %58063 = torch_c.from_builtin_tensor %58062 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58064 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55474 = arith.constant 1 : index
    %dim_55475 = tensor.dim %58064, %c1_55474 : tensor<4x?x4096xf16>
    %58065 = flow.tensor.transfer %58064 : tensor<4x?x4096xf16>{%dim_55475} to #hal.device.promise<@__device_2>
    %58066 = torch_c.from_builtin_tensor %58065 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58067 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55476 = arith.constant 1 : index
    %dim_55477 = tensor.dim %58067, %c1_55476 : tensor<4x?x4096xf16>
    %58068 = flow.tensor.transfer %58067 : tensor<4x?x4096xf16>{%dim_55477} to #hal.device.promise<@__device_2>
    %58069 = torch_c.from_builtin_tensor %58068 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58070 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55478 = arith.constant 1 : index
    %dim_55479 = tensor.dim %58070, %c1_55478 : tensor<4x?x4096xf16>
    %58071 = flow.tensor.transfer %58070 : tensor<4x?x4096xf16>{%dim_55479} to #hal.device.promise<@__device_2>
    %58072 = torch_c.from_builtin_tensor %58071 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58072, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58073 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55480 = arith.constant 1 : index
    %dim_55481 = tensor.dim %58073, %c1_55480 : tensor<4x?x4096xf16>
    %58074 = flow.tensor.transfer %58073 : tensor<4x?x4096xf16>{%dim_55481} to #hal.device.promise<@__device_2>
    %58075 = torch_c.from_builtin_tensor %58074 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58076 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55482 = arith.constant 1 : index
    %dim_55483 = tensor.dim %58076, %c1_55482 : tensor<4x?x4096xf16>
    %58077 = flow.tensor.transfer %58076 : tensor<4x?x4096xf16>{%dim_55483} to #hal.device.promise<@__device_2>
    %58078 = torch_c.from_builtin_tensor %58077 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58078, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55484 = torch.constant.int 1
    %58079 = torch.aten.add.Tensor %58060, %58063, %int1_55484 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55485 = torch.constant.int 1
    %58080 = torch.aten.add.Tensor %58079, %57966, %int1_55485 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55486 = torch.constant.int 1
    %58081 = torch.aten.add.Tensor %58080, %58066, %int1_55486 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55487 = torch.constant.int 1
    %58082 = torch.aten.add.Tensor %58081, %58069, %int1_55487 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55488 = torch.constant.int 1
    %58083 = torch.aten.add.Tensor %58082, %58072, %int1_55488 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55489 = torch.constant.int 1
    %58084 = torch.aten.add.Tensor %58083, %58075, %int1_55489 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55490 = torch.constant.int 1
    %58085 = torch.aten.add.Tensor %58084, %58078, %int1_55490 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58086 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55491 = arith.constant 1 : index
    %dim_55492 = tensor.dim %58086, %c1_55491 : tensor<4x?x4096xf16>
    %58087 = flow.tensor.transfer %58086 : tensor<4x?x4096xf16>{%dim_55492} to #hal.device.promise<@__device_3>
    %58088 = torch_c.from_builtin_tensor %58087 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58089 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55493 = arith.constant 1 : index
    %dim_55494 = tensor.dim %58089, %c1_55493 : tensor<4x?x4096xf16>
    %58090 = flow.tensor.transfer %58089 : tensor<4x?x4096xf16>{%dim_55494} to #hal.device.promise<@__device_3>
    %58091 = torch_c.from_builtin_tensor %58090 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58092 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55495 = arith.constant 1 : index
    %dim_55496 = tensor.dim %58092, %c1_55495 : tensor<4x?x4096xf16>
    %58093 = flow.tensor.transfer %58092 : tensor<4x?x4096xf16>{%dim_55496} to #hal.device.promise<@__device_3>
    %58094 = torch_c.from_builtin_tensor %58093 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58095 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55497 = arith.constant 1 : index
    %dim_55498 = tensor.dim %58095, %c1_55497 : tensor<4x?x4096xf16>
    %58096 = flow.tensor.transfer %58095 : tensor<4x?x4096xf16>{%dim_55498} to #hal.device.promise<@__device_3>
    %58097 = torch_c.from_builtin_tensor %58096 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58098 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55499 = arith.constant 1 : index
    %dim_55500 = tensor.dim %58098, %c1_55499 : tensor<4x?x4096xf16>
    %58099 = flow.tensor.transfer %58098 : tensor<4x?x4096xf16>{%dim_55500} to #hal.device.promise<@__device_3>
    %58100 = torch_c.from_builtin_tensor %58099 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58101 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55501 = arith.constant 1 : index
    %dim_55502 = tensor.dim %58101, %c1_55501 : tensor<4x?x4096xf16>
    %58102 = flow.tensor.transfer %58101 : tensor<4x?x4096xf16>{%dim_55502} to #hal.device.promise<@__device_3>
    %58103 = torch_c.from_builtin_tensor %58102 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58104 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55503 = arith.constant 1 : index
    %dim_55504 = tensor.dim %58104, %c1_55503 : tensor<4x?x4096xf16>
    %58105 = flow.tensor.transfer %58104 : tensor<4x?x4096xf16>{%dim_55504} to #hal.device.promise<@__device_3>
    %58106 = torch_c.from_builtin_tensor %58105 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55505 = torch.constant.int 1
    %58107 = torch.aten.add.Tensor %58088, %58091, %int1_55505 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55506 = torch.constant.int 1
    %58108 = torch.aten.add.Tensor %58107, %58094, %int1_55506 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55507 = torch.constant.int 1
    %58109 = torch.aten.add.Tensor %58108, %57973, %int1_55507 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55508 = torch.constant.int 1
    %58110 = torch.aten.add.Tensor %58109, %58097, %int1_55508 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55509 = torch.constant.int 1
    %58111 = torch.aten.add.Tensor %58110, %58100, %int1_55509 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58111, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55510 = torch.constant.int 1
    %58112 = torch.aten.add.Tensor %58111, %58103, %int1_55510 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55511 = torch.constant.int 1
    %58113 = torch.aten.add.Tensor %58112, %58106, %int1_55511 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58113, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58114 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55512 = arith.constant 1 : index
    %dim_55513 = tensor.dim %58114, %c1_55512 : tensor<4x?x4096xf16>
    %58115 = flow.tensor.transfer %58114 : tensor<4x?x4096xf16>{%dim_55513} to #hal.device.promise<@__device_4>
    %58116 = torch_c.from_builtin_tensor %58115 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58117 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55514 = arith.constant 1 : index
    %dim_55515 = tensor.dim %58117, %c1_55514 : tensor<4x?x4096xf16>
    %58118 = flow.tensor.transfer %58117 : tensor<4x?x4096xf16>{%dim_55515} to #hal.device.promise<@__device_4>
    %58119 = torch_c.from_builtin_tensor %58118 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58120 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55516 = arith.constant 1 : index
    %dim_55517 = tensor.dim %58120, %c1_55516 : tensor<4x?x4096xf16>
    %58121 = flow.tensor.transfer %58120 : tensor<4x?x4096xf16>{%dim_55517} to #hal.device.promise<@__device_4>
    %58122 = torch_c.from_builtin_tensor %58121 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58123 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55518 = arith.constant 1 : index
    %dim_55519 = tensor.dim %58123, %c1_55518 : tensor<4x?x4096xf16>
    %58124 = flow.tensor.transfer %58123 : tensor<4x?x4096xf16>{%dim_55519} to #hal.device.promise<@__device_4>
    %58125 = torch_c.from_builtin_tensor %58124 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58126 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55520 = arith.constant 1 : index
    %dim_55521 = tensor.dim %58126, %c1_55520 : tensor<4x?x4096xf16>
    %58127 = flow.tensor.transfer %58126 : tensor<4x?x4096xf16>{%dim_55521} to #hal.device.promise<@__device_4>
    %58128 = torch_c.from_builtin_tensor %58127 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58129 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55522 = arith.constant 1 : index
    %dim_55523 = tensor.dim %58129, %c1_55522 : tensor<4x?x4096xf16>
    %58130 = flow.tensor.transfer %58129 : tensor<4x?x4096xf16>{%dim_55523} to #hal.device.promise<@__device_4>
    %58131 = torch_c.from_builtin_tensor %58130 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58132 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55524 = arith.constant 1 : index
    %dim_55525 = tensor.dim %58132, %c1_55524 : tensor<4x?x4096xf16>
    %58133 = flow.tensor.transfer %58132 : tensor<4x?x4096xf16>{%dim_55525} to #hal.device.promise<@__device_4>
    %58134 = torch_c.from_builtin_tensor %58133 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55526 = torch.constant.int 1
    %58135 = torch.aten.add.Tensor %58116, %58119, %int1_55526 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55527 = torch.constant.int 1
    %58136 = torch.aten.add.Tensor %58135, %58122, %int1_55527 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55528 = torch.constant.int 1
    %58137 = torch.aten.add.Tensor %58136, %58125, %int1_55528 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55529 = torch.constant.int 1
    %58138 = torch.aten.add.Tensor %58137, %57980, %int1_55529 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55530 = torch.constant.int 1
    %58139 = torch.aten.add.Tensor %58138, %58128, %int1_55530 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55531 = torch.constant.int 1
    %58140 = torch.aten.add.Tensor %58139, %58131, %int1_55531 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55532 = torch.constant.int 1
    %58141 = torch.aten.add.Tensor %58140, %58134, %int1_55532 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58142 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55533 = arith.constant 1 : index
    %dim_55534 = tensor.dim %58142, %c1_55533 : tensor<4x?x4096xf16>
    %58143 = flow.tensor.transfer %58142 : tensor<4x?x4096xf16>{%dim_55534} to #hal.device.promise<@__device_5>
    %58144 = torch_c.from_builtin_tensor %58143 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58145 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55535 = arith.constant 1 : index
    %dim_55536 = tensor.dim %58145, %c1_55535 : tensor<4x?x4096xf16>
    %58146 = flow.tensor.transfer %58145 : tensor<4x?x4096xf16>{%dim_55536} to #hal.device.promise<@__device_5>
    %58147 = torch_c.from_builtin_tensor %58146 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58148 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55537 = arith.constant 1 : index
    %dim_55538 = tensor.dim %58148, %c1_55537 : tensor<4x?x4096xf16>
    %58149 = flow.tensor.transfer %58148 : tensor<4x?x4096xf16>{%dim_55538} to #hal.device.promise<@__device_5>
    %58150 = torch_c.from_builtin_tensor %58149 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58151 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55539 = arith.constant 1 : index
    %dim_55540 = tensor.dim %58151, %c1_55539 : tensor<4x?x4096xf16>
    %58152 = flow.tensor.transfer %58151 : tensor<4x?x4096xf16>{%dim_55540} to #hal.device.promise<@__device_5>
    %58153 = torch_c.from_builtin_tensor %58152 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58154 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55541 = arith.constant 1 : index
    %dim_55542 = tensor.dim %58154, %c1_55541 : tensor<4x?x4096xf16>
    %58155 = flow.tensor.transfer %58154 : tensor<4x?x4096xf16>{%dim_55542} to #hal.device.promise<@__device_5>
    %58156 = torch_c.from_builtin_tensor %58155 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58157 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55543 = arith.constant 1 : index
    %dim_55544 = tensor.dim %58157, %c1_55543 : tensor<4x?x4096xf16>
    %58158 = flow.tensor.transfer %58157 : tensor<4x?x4096xf16>{%dim_55544} to #hal.device.promise<@__device_5>
    %58159 = torch_c.from_builtin_tensor %58158 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58160 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55545 = arith.constant 1 : index
    %dim_55546 = tensor.dim %58160, %c1_55545 : tensor<4x?x4096xf16>
    %58161 = flow.tensor.transfer %58160 : tensor<4x?x4096xf16>{%dim_55546} to #hal.device.promise<@__device_5>
    %58162 = torch_c.from_builtin_tensor %58161 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55547 = torch.constant.int 1
    %58163 = torch.aten.add.Tensor %58144, %58147, %int1_55547 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55548 = torch.constant.int 1
    %58164 = torch.aten.add.Tensor %58163, %58150, %int1_55548 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55549 = torch.constant.int 1
    %58165 = torch.aten.add.Tensor %58164, %58153, %int1_55549 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55550 = torch.constant.int 1
    %58166 = torch.aten.add.Tensor %58165, %58156, %int1_55550 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55551 = torch.constant.int 1
    %58167 = torch.aten.add.Tensor %58166, %57987, %int1_55551 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55552 = torch.constant.int 1
    %58168 = torch.aten.add.Tensor %58167, %58159, %int1_55552 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58168, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55553 = torch.constant.int 1
    %58169 = torch.aten.add.Tensor %58168, %58162, %int1_55553 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58170 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55554 = arith.constant 1 : index
    %dim_55555 = tensor.dim %58170, %c1_55554 : tensor<4x?x4096xf16>
    %58171 = flow.tensor.transfer %58170 : tensor<4x?x4096xf16>{%dim_55555} to #hal.device.promise<@__device_6>
    %58172 = torch_c.from_builtin_tensor %58171 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58172, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58173 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55556 = arith.constant 1 : index
    %dim_55557 = tensor.dim %58173, %c1_55556 : tensor<4x?x4096xf16>
    %58174 = flow.tensor.transfer %58173 : tensor<4x?x4096xf16>{%dim_55557} to #hal.device.promise<@__device_6>
    %58175 = torch_c.from_builtin_tensor %58174 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58176 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55558 = arith.constant 1 : index
    %dim_55559 = tensor.dim %58176, %c1_55558 : tensor<4x?x4096xf16>
    %58177 = flow.tensor.transfer %58176 : tensor<4x?x4096xf16>{%dim_55559} to #hal.device.promise<@__device_6>
    %58178 = torch_c.from_builtin_tensor %58177 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58178, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58179 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55560 = arith.constant 1 : index
    %dim_55561 = tensor.dim %58179, %c1_55560 : tensor<4x?x4096xf16>
    %58180 = flow.tensor.transfer %58179 : tensor<4x?x4096xf16>{%dim_55561} to #hal.device.promise<@__device_6>
    %58181 = torch_c.from_builtin_tensor %58180 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58182 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55562 = arith.constant 1 : index
    %dim_55563 = tensor.dim %58182, %c1_55562 : tensor<4x?x4096xf16>
    %58183 = flow.tensor.transfer %58182 : tensor<4x?x4096xf16>{%dim_55563} to #hal.device.promise<@__device_6>
    %58184 = torch_c.from_builtin_tensor %58183 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58185 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55564 = arith.constant 1 : index
    %dim_55565 = tensor.dim %58185, %c1_55564 : tensor<4x?x4096xf16>
    %58186 = flow.tensor.transfer %58185 : tensor<4x?x4096xf16>{%dim_55565} to #hal.device.promise<@__device_6>
    %58187 = torch_c.from_builtin_tensor %58186 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58188 = torch_c.to_builtin_tensor %58001 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55566 = arith.constant 1 : index
    %dim_55567 = tensor.dim %58188, %c1_55566 : tensor<4x?x4096xf16>
    %58189 = flow.tensor.transfer %58188 : tensor<4x?x4096xf16>{%dim_55567} to #hal.device.promise<@__device_6>
    %58190 = torch_c.from_builtin_tensor %58189 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58190, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55568 = torch.constant.int 1
    %58191 = torch.aten.add.Tensor %58172, %58175, %int1_55568 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55569 = torch.constant.int 1
    %58192 = torch.aten.add.Tensor %58191, %58178, %int1_55569 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55570 = torch.constant.int 1
    %58193 = torch.aten.add.Tensor %58192, %58181, %int1_55570 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58193, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55571 = torch.constant.int 1
    %58194 = torch.aten.add.Tensor %58193, %58184, %int1_55571 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55572 = torch.constant.int 1
    %58195 = torch.aten.add.Tensor %58194, %58187, %int1_55572 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58195, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55573 = torch.constant.int 1
    %58196 = torch.aten.add.Tensor %58195, %57994, %int1_55573 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55574 = torch.constant.int 1
    %58197 = torch.aten.add.Tensor %58196, %58190, %int1_55574 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58198 = torch_c.to_builtin_tensor %57952 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55575 = arith.constant 1 : index
    %dim_55576 = tensor.dim %58198, %c1_55575 : tensor<4x?x4096xf16>
    %58199 = flow.tensor.transfer %58198 : tensor<4x?x4096xf16>{%dim_55576} to #hal.device.promise<@__device_7>
    %58200 = torch_c.from_builtin_tensor %58199 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58201 = torch_c.to_builtin_tensor %57959 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55577 = arith.constant 1 : index
    %dim_55578 = tensor.dim %58201, %c1_55577 : tensor<4x?x4096xf16>
    %58202 = flow.tensor.transfer %58201 : tensor<4x?x4096xf16>{%dim_55578} to #hal.device.promise<@__device_7>
    %58203 = torch_c.from_builtin_tensor %58202 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58204 = torch_c.to_builtin_tensor %57966 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55579 = arith.constant 1 : index
    %dim_55580 = tensor.dim %58204, %c1_55579 : tensor<4x?x4096xf16>
    %58205 = flow.tensor.transfer %58204 : tensor<4x?x4096xf16>{%dim_55580} to #hal.device.promise<@__device_7>
    %58206 = torch_c.from_builtin_tensor %58205 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58207 = torch_c.to_builtin_tensor %57973 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55581 = arith.constant 1 : index
    %dim_55582 = tensor.dim %58207, %c1_55581 : tensor<4x?x4096xf16>
    %58208 = flow.tensor.transfer %58207 : tensor<4x?x4096xf16>{%dim_55582} to #hal.device.promise<@__device_7>
    %58209 = torch_c.from_builtin_tensor %58208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58210 = torch_c.to_builtin_tensor %57980 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55583 = arith.constant 1 : index
    %dim_55584 = tensor.dim %58210, %c1_55583 : tensor<4x?x4096xf16>
    %58211 = flow.tensor.transfer %58210 : tensor<4x?x4096xf16>{%dim_55584} to #hal.device.promise<@__device_7>
    %58212 = torch_c.from_builtin_tensor %58211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58213 = torch_c.to_builtin_tensor %57987 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55585 = arith.constant 1 : index
    %dim_55586 = tensor.dim %58213, %c1_55585 : tensor<4x?x4096xf16>
    %58214 = flow.tensor.transfer %58213 : tensor<4x?x4096xf16>{%dim_55586} to #hal.device.promise<@__device_7>
    %58215 = torch_c.from_builtin_tensor %58214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %58216 = torch_c.to_builtin_tensor %57994 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_55587 = arith.constant 1 : index
    %dim_55588 = tensor.dim %58216, %c1_55587 : tensor<4x?x4096xf16>
    %58217 = flow.tensor.transfer %58216 : tensor<4x?x4096xf16>{%dim_55588} to #hal.device.promise<@__device_7>
    %58218 = torch_c.from_builtin_tensor %58217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55589 = torch.constant.int 1
    %58219 = torch.aten.add.Tensor %58200, %58203, %int1_55589 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58219, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55590 = torch.constant.int 1
    %58220 = torch.aten.add.Tensor %58219, %58206, %int1_55590 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55591 = torch.constant.int 1
    %58221 = torch.aten.add.Tensor %58220, %58209, %int1_55591 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55592 = torch.constant.int 1
    %58222 = torch.aten.add.Tensor %58221, %58212, %int1_55592 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55593 = torch.constant.int 1
    %58223 = torch.aten.add.Tensor %58222, %58215, %int1_55593 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58223, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55594 = torch.constant.int 1
    %58224 = torch.aten.add.Tensor %58223, %58218, %int1_55594 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55595 = torch.constant.int 1
    %58225 = torch.aten.add.Tensor %58224, %58001, %int1_55595 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55596 = torch.constant.int 1
    %58226 = torch.aten.add.Tensor %57706, %58029, %int1_55596 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55597 = torch.constant.int 1
    %58227 = torch.aten.add.Tensor %57707, %58057, %int1_55597 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55598 = torch.constant.int 1
    %58228 = torch.aten.add.Tensor %57708, %58085, %int1_55598 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55599 = torch.constant.int 1
    %58229 = torch.aten.add.Tensor %57709, %58113, %int1_55599 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55600 = torch.constant.int 1
    %58230 = torch.aten.add.Tensor %57710, %58141, %int1_55600 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55601 = torch.constant.int 1
    %58231 = torch.aten.add.Tensor %57711, %58169, %int1_55601 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55602 = torch.constant.int 1
    %58232 = torch.aten.add.Tensor %57712, %58197, %int1_55602 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58232, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55603 = torch.constant.int 1
    %58233 = torch.aten.add.Tensor %57713, %58225, %int1_55603 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58233, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_55604 = torch.constant.int 6
    %58234 = torch.prims.convert_element_type %58226, %int6_55604 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55605 = torch.constant.int 6
    %58235 = torch.prims.convert_element_type %58227, %int6_55605 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58235, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55606 = torch.constant.int 6
    %58236 = torch.prims.convert_element_type %58228, %int6_55606 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58236, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55607 = torch.constant.int 6
    %58237 = torch.prims.convert_element_type %58229, %int6_55607 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55608 = torch.constant.int 6
    %58238 = torch.prims.convert_element_type %58230, %int6_55608 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58238, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55609 = torch.constant.int 6
    %58239 = torch.prims.convert_element_type %58231, %int6_55609 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58239, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55610 = torch.constant.int 6
    %58240 = torch.prims.convert_element_type %58232, %int6_55610 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_55611 = torch.constant.int 6
    %58241 = torch.prims.convert_element_type %58233, %int6_55611 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58241, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55612 = torch.constant.int 2
    %58242 = torch.aten.pow.Tensor_Scalar %58234, %int2_55612 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58242, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55613 = torch.constant.int 2
    %58243 = torch.aten.pow.Tensor_Scalar %58235, %int2_55613 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55614 = torch.constant.int 2
    %58244 = torch.aten.pow.Tensor_Scalar %58236, %int2_55614 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58244, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55615 = torch.constant.int 2
    %58245 = torch.aten.pow.Tensor_Scalar %58237, %int2_55615 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58245, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55616 = torch.constant.int 2
    %58246 = torch.aten.pow.Tensor_Scalar %58238, %int2_55616 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55617 = torch.constant.int 2
    %58247 = torch.aten.pow.Tensor_Scalar %58239, %int2_55617 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58247, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55618 = torch.constant.int 2
    %58248 = torch.aten.pow.Tensor_Scalar %58240, %int2_55618 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58248, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_55619 = torch.constant.int 2
    %58249 = torch.aten.pow.Tensor_Scalar %58241, %int2_55619 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_55620 = torch.constant.int -1
    %58250 = torch.prim.ListConstruct %int-1_55620 : (!torch.int) -> !torch.list<int>
    %true_55621 = torch.constant.bool true
    %none_55622 = torch.constant.none
    %58251 = torch.aten.mean.dim %58242, %58250, %true_55621, %none_55622 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58251, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55623 = torch.constant.int -1
    %58252 = torch.prim.ListConstruct %int-1_55623 : (!torch.int) -> !torch.list<int>
    %true_55624 = torch.constant.bool true
    %none_55625 = torch.constant.none
    %58253 = torch.aten.mean.dim %58243, %58252, %true_55624, %none_55625 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55626 = torch.constant.int -1
    %58254 = torch.prim.ListConstruct %int-1_55626 : (!torch.int) -> !torch.list<int>
    %true_55627 = torch.constant.bool true
    %none_55628 = torch.constant.none
    %58255 = torch.aten.mean.dim %58244, %58254, %true_55627, %none_55628 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55629 = torch.constant.int -1
    %58256 = torch.prim.ListConstruct %int-1_55629 : (!torch.int) -> !torch.list<int>
    %true_55630 = torch.constant.bool true
    %none_55631 = torch.constant.none
    %58257 = torch.aten.mean.dim %58245, %58256, %true_55630, %none_55631 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55632 = torch.constant.int -1
    %58258 = torch.prim.ListConstruct %int-1_55632 : (!torch.int) -> !torch.list<int>
    %true_55633 = torch.constant.bool true
    %none_55634 = torch.constant.none
    %58259 = torch.aten.mean.dim %58246, %58258, %true_55633, %none_55634 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55635 = torch.constant.int -1
    %58260 = torch.prim.ListConstruct %int-1_55635 : (!torch.int) -> !torch.list<int>
    %true_55636 = torch.constant.bool true
    %none_55637 = torch.constant.none
    %58261 = torch.aten.mean.dim %58247, %58260, %true_55636, %none_55637 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55638 = torch.constant.int -1
    %58262 = torch.prim.ListConstruct %int-1_55638 : (!torch.int) -> !torch.list<int>
    %true_55639 = torch.constant.bool true
    %none_55640 = torch.constant.none
    %58263 = torch.aten.mean.dim %58248, %58262, %true_55639, %none_55640 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58263, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_55641 = torch.constant.int -1
    %58264 = torch.prim.ListConstruct %int-1_55641 : (!torch.int) -> !torch.list<int>
    %true_55642 = torch.constant.bool true
    %none_55643 = torch.constant.none
    %58265 = torch.aten.mean.dim %58249, %58264, %true_55642, %none_55643 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55644 = torch.constant.float 9.9999997473787516E-6
    %int1_55645 = torch.constant.int 1
    %58266 = torch.aten.add.Scalar %58251, %float9.999990e-06_55644, %int1_55645 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55646 = torch.constant.float 9.9999997473787516E-6
    %int1_55647 = torch.constant.int 1
    %58267 = torch.aten.add.Scalar %58253, %float9.999990e-06_55646, %int1_55647 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58267, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55648 = torch.constant.float 9.9999997473787516E-6
    %int1_55649 = torch.constant.int 1
    %58268 = torch.aten.add.Scalar %58255, %float9.999990e-06_55648, %int1_55649 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55650 = torch.constant.float 9.9999997473787516E-6
    %int1_55651 = torch.constant.int 1
    %58269 = torch.aten.add.Scalar %58257, %float9.999990e-06_55650, %int1_55651 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58269, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55652 = torch.constant.float 9.9999997473787516E-6
    %int1_55653 = torch.constant.int 1
    %58270 = torch.aten.add.Scalar %58259, %float9.999990e-06_55652, %int1_55653 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55654 = torch.constant.float 9.9999997473787516E-6
    %int1_55655 = torch.constant.int 1
    %58271 = torch.aten.add.Scalar %58261, %float9.999990e-06_55654, %int1_55655 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55656 = torch.constant.float 9.9999997473787516E-6
    %int1_55657 = torch.constant.int 1
    %58272 = torch.aten.add.Scalar %58263, %float9.999990e-06_55656, %int1_55657 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_55658 = torch.constant.float 9.9999997473787516E-6
    %int1_55659 = torch.constant.int 1
    %58273 = torch.aten.add.Scalar %58265, %float9.999990e-06_55658, %int1_55659 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58273, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58274 = torch.aten.rsqrt %58266 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58275 = torch.aten.rsqrt %58267 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58275, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58276 = torch.aten.rsqrt %58268 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58277 = torch.aten.rsqrt %58269 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58278 = torch.aten.rsqrt %58270 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58279 = torch.aten.rsqrt %58271 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58279, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58280 = torch.aten.rsqrt %58272 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58281 = torch.aten.rsqrt %58273 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %58281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %58282 = torch.aten.mul.Tensor %58234, %58274 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58283 = torch.aten.mul.Tensor %58235, %58275 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58284 = torch.aten.mul.Tensor %58236, %58276 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58285 = torch.aten.mul.Tensor %58237, %58277 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58286 = torch.aten.mul.Tensor %58238, %58278 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58287 = torch.aten.mul.Tensor %58239, %58279 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58288 = torch.aten.mul.Tensor %58240, %58280 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58289 = torch.aten.mul.Tensor %58241, %58281 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58289, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58290 = torch.aten.mul.Tensor %2168, %58282 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58291 = torch.aten.mul.Tensor %2169, %58283 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58291, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58292 = torch.aten.mul.Tensor %2170, %58284 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58292, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58293 = torch.aten.mul.Tensor %2171, %58285 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58294 = torch.aten.mul.Tensor %2172, %58286 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58295 = torch.aten.mul.Tensor %2173, %58287 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58295, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58296 = torch.aten.mul.Tensor %2174, %58288 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %58297 = torch.aten.mul.Tensor %2175, %58289 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %58297, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_55660 = torch.constant.int 5
    %58298 = torch.prims.convert_element_type %58290, %int5_55660 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58298, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55661 = torch.constant.int 5
    %58299 = torch.prims.convert_element_type %58291, %int5_55661 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55662 = torch.constant.int 5
    %58300 = torch.prims.convert_element_type %58292, %int5_55662 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55663 = torch.constant.int 5
    %58301 = torch.prims.convert_element_type %58293, %int5_55663 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58301, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55664 = torch.constant.int 5
    %58302 = torch.prims.convert_element_type %58294, %int5_55664 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55665 = torch.constant.int 5
    %58303 = torch.prims.convert_element_type %58295, %int5_55665 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58303, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55666 = torch.constant.int 5
    %58304 = torch.prims.convert_element_type %58296, %int5_55666 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58304, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_55667 = torch.constant.int 5
    %58305 = torch.prims.convert_element_type %58297, %int5_55667 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %58305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_55668 = torch.constant.int 1
    %int0_55669 = torch.constant.int 0
    %58306 = torch.prim.ListConstruct %int1_55668, %int0_55669 : (!torch.int, !torch.int) -> !torch.list<int>
    %58307 = torch.aten.permute %2176, %58306 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55670 = torch.constant.int 1
    %int0_55671 = torch.constant.int 0
    %58308 = torch.prim.ListConstruct %int1_55670, %int0_55671 : (!torch.int, !torch.int) -> !torch.list<int>
    %58309 = torch.aten.permute %2177, %58308 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55672 = torch.constant.int 1
    %int0_55673 = torch.constant.int 0
    %58310 = torch.prim.ListConstruct %int1_55672, %int0_55673 : (!torch.int, !torch.int) -> !torch.list<int>
    %58311 = torch.aten.permute %2178, %58310 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55674 = torch.constant.int 1
    %int0_55675 = torch.constant.int 0
    %58312 = torch.prim.ListConstruct %int1_55674, %int0_55675 : (!torch.int, !torch.int) -> !torch.list<int>
    %58313 = torch.aten.permute %2179, %58312 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55676 = torch.constant.int 1
    %int0_55677 = torch.constant.int 0
    %58314 = torch.prim.ListConstruct %int1_55676, %int0_55677 : (!torch.int, !torch.int) -> !torch.list<int>
    %58315 = torch.aten.permute %2180, %58314 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55678 = torch.constant.int 1
    %int0_55679 = torch.constant.int 0
    %58316 = torch.prim.ListConstruct %int1_55678, %int0_55679 : (!torch.int, !torch.int) -> !torch.list<int>
    %58317 = torch.aten.permute %2181, %58316 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55680 = torch.constant.int 1
    %int0_55681 = torch.constant.int 0
    %58318 = torch.prim.ListConstruct %int1_55680, %int0_55681 : (!torch.int, !torch.int) -> !torch.list<int>
    %58319 = torch.aten.permute %2182, %58318 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_55682 = torch.constant.int 1
    %int0_55683 = torch.constant.int 0
    %58320 = torch.prim.ListConstruct %int1_55682, %int0_55683 : (!torch.int, !torch.int) -> !torch.list<int>
    %58321 = torch.aten.permute %2183, %58320 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_55684 = torch.constant.int 4
    %58322 = torch.aten.mul.int %int4_55684, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55685 = torch.constant.int 4096
    %58323 = torch.prim.ListConstruct %58322, %int4096_55685 : (!torch.int, !torch.int) -> !torch.list<int>
    %58324 = torch.aten.view %58298, %58323 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58324, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58325 = torch.aten.mm %58324, %58307 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58325, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55686 = torch.constant.int 4
    %int512_55687 = torch.constant.int 512
    %58326 = torch.prim.ListConstruct %int4_55686, %2482, %int512_55687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58327 = torch.aten.view %58325, %58326 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55688 = torch.constant.int 4
    %58328 = torch.aten.mul.int %int4_55688, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55689 = torch.constant.int 4096
    %58329 = torch.prim.ListConstruct %58328, %int4096_55689 : (!torch.int, !torch.int) -> !torch.list<int>
    %58330 = torch.aten.view %58299, %58329 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58330, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58331 = torch.aten.mm %58330, %58309 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58331, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55690 = torch.constant.int 4
    %int512_55691 = torch.constant.int 512
    %58332 = torch.prim.ListConstruct %int4_55690, %2482, %int512_55691 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58333 = torch.aten.view %58331, %58332 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55692 = torch.constant.int 4
    %58334 = torch.aten.mul.int %int4_55692, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55693 = torch.constant.int 4096
    %58335 = torch.prim.ListConstruct %58334, %int4096_55693 : (!torch.int, !torch.int) -> !torch.list<int>
    %58336 = torch.aten.view %58300, %58335 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58336, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58337 = torch.aten.mm %58336, %58311 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58337, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55694 = torch.constant.int 4
    %int512_55695 = torch.constant.int 512
    %58338 = torch.prim.ListConstruct %int4_55694, %2482, %int512_55695 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58339 = torch.aten.view %58337, %58338 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55696 = torch.constant.int 4
    %58340 = torch.aten.mul.int %int4_55696, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55697 = torch.constant.int 4096
    %58341 = torch.prim.ListConstruct %58340, %int4096_55697 : (!torch.int, !torch.int) -> !torch.list<int>
    %58342 = torch.aten.view %58301, %58341 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58342, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58343 = torch.aten.mm %58342, %58313 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58343, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55698 = torch.constant.int 4
    %int512_55699 = torch.constant.int 512
    %58344 = torch.prim.ListConstruct %int4_55698, %2482, %int512_55699 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58345 = torch.aten.view %58343, %58344 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55700 = torch.constant.int 4
    %58346 = torch.aten.mul.int %int4_55700, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55701 = torch.constant.int 4096
    %58347 = torch.prim.ListConstruct %58346, %int4096_55701 : (!torch.int, !torch.int) -> !torch.list<int>
    %58348 = torch.aten.view %58302, %58347 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58348, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58349 = torch.aten.mm %58348, %58315 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58349, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55702 = torch.constant.int 4
    %int512_55703 = torch.constant.int 512
    %58350 = torch.prim.ListConstruct %int4_55702, %2482, %int512_55703 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58351 = torch.aten.view %58349, %58350 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55704 = torch.constant.int 4
    %58352 = torch.aten.mul.int %int4_55704, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55705 = torch.constant.int 4096
    %58353 = torch.prim.ListConstruct %58352, %int4096_55705 : (!torch.int, !torch.int) -> !torch.list<int>
    %58354 = torch.aten.view %58303, %58353 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58354, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58355 = torch.aten.mm %58354, %58317 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58355, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55706 = torch.constant.int 4
    %int512_55707 = torch.constant.int 512
    %58356 = torch.prim.ListConstruct %int4_55706, %2482, %int512_55707 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58357 = torch.aten.view %58355, %58356 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55708 = torch.constant.int 4
    %58358 = torch.aten.mul.int %int4_55708, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55709 = torch.constant.int 4096
    %58359 = torch.prim.ListConstruct %58358, %int4096_55709 : (!torch.int, !torch.int) -> !torch.list<int>
    %58360 = torch.aten.view %58304, %58359 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58360, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58361 = torch.aten.mm %58360, %58319 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58361, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55710 = torch.constant.int 4
    %int512_55711 = torch.constant.int 512
    %58362 = torch.prim.ListConstruct %int4_55710, %2482, %int512_55711 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58363 = torch.aten.view %58361, %58362 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_55712 = torch.constant.int 4
    %58364 = torch.aten.mul.int %int4_55712, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55713 = torch.constant.int 4096
    %58365 = torch.prim.ListConstruct %58364, %int4096_55713 : (!torch.int, !torch.int) -> !torch.list<int>
    %58366 = torch.aten.view %58305, %58365 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58366, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58367 = torch.aten.mm %58366, %58321 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %58367, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_55714 = torch.constant.int 4
    %int512_55715 = torch.constant.int 512
    %58368 = torch.prim.ListConstruct %int4_55714, %2482, %int512_55715 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58369 = torch.aten.view %58367, %58368 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %58369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_55716 = torch.constant.int 1
    %int0_55717 = torch.constant.int 0
    %58370 = torch.prim.ListConstruct %int1_55716, %int0_55717 : (!torch.int, !torch.int) -> !torch.list<int>
    %58371 = torch.aten.permute %2184, %58370 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55718 = torch.constant.int 1
    %int0_55719 = torch.constant.int 0
    %58372 = torch.prim.ListConstruct %int1_55718, %int0_55719 : (!torch.int, !torch.int) -> !torch.list<int>
    %58373 = torch.aten.permute %2185, %58372 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55720 = torch.constant.int 1
    %int0_55721 = torch.constant.int 0
    %58374 = torch.prim.ListConstruct %int1_55720, %int0_55721 : (!torch.int, !torch.int) -> !torch.list<int>
    %58375 = torch.aten.permute %2186, %58374 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55722 = torch.constant.int 1
    %int0_55723 = torch.constant.int 0
    %58376 = torch.prim.ListConstruct %int1_55722, %int0_55723 : (!torch.int, !torch.int) -> !torch.list<int>
    %58377 = torch.aten.permute %2187, %58376 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55724 = torch.constant.int 1
    %int0_55725 = torch.constant.int 0
    %58378 = torch.prim.ListConstruct %int1_55724, %int0_55725 : (!torch.int, !torch.int) -> !torch.list<int>
    %58379 = torch.aten.permute %2188, %58378 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55726 = torch.constant.int 1
    %int0_55727 = torch.constant.int 0
    %58380 = torch.prim.ListConstruct %int1_55726, %int0_55727 : (!torch.int, !torch.int) -> !torch.list<int>
    %58381 = torch.aten.permute %2189, %58380 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55728 = torch.constant.int 1
    %int0_55729 = torch.constant.int 0
    %58382 = torch.prim.ListConstruct %int1_55728, %int0_55729 : (!torch.int, !torch.int) -> !torch.list<int>
    %58383 = torch.aten.permute %2190, %58382 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55730 = torch.constant.int 1
    %int0_55731 = torch.constant.int 0
    %58384 = torch.prim.ListConstruct %int1_55730, %int0_55731 : (!torch.int, !torch.int) -> !torch.list<int>
    %58385 = torch.aten.permute %2191, %58384 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_55732 = torch.constant.int 4
    %58386 = torch.aten.mul.int %int4_55732, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55733 = torch.constant.int 4096
    %58387 = torch.prim.ListConstruct %58386, %int4096_55733 : (!torch.int, !torch.int) -> !torch.list<int>
    %58388 = torch.aten.view %58298, %58387 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58388, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58389 = torch.aten.mm %58388, %58371 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58389, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55734 = torch.constant.int 4
    %int128_55735 = torch.constant.int 128
    %58390 = torch.prim.ListConstruct %int4_55734, %2482, %int128_55735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58391 = torch.aten.view %58389, %58390 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55736 = torch.constant.int 4
    %58392 = torch.aten.mul.int %int4_55736, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55737 = torch.constant.int 4096
    %58393 = torch.prim.ListConstruct %58392, %int4096_55737 : (!torch.int, !torch.int) -> !torch.list<int>
    %58394 = torch.aten.view %58299, %58393 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58394, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58395 = torch.aten.mm %58394, %58373 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58395, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55738 = torch.constant.int 4
    %int128_55739 = torch.constant.int 128
    %58396 = torch.prim.ListConstruct %int4_55738, %2482, %int128_55739 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58397 = torch.aten.view %58395, %58396 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55740 = torch.constant.int 4
    %58398 = torch.aten.mul.int %int4_55740, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55741 = torch.constant.int 4096
    %58399 = torch.prim.ListConstruct %58398, %int4096_55741 : (!torch.int, !torch.int) -> !torch.list<int>
    %58400 = torch.aten.view %58300, %58399 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58400, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58401 = torch.aten.mm %58400, %58375 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58401, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55742 = torch.constant.int 4
    %int128_55743 = torch.constant.int 128
    %58402 = torch.prim.ListConstruct %int4_55742, %2482, %int128_55743 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58403 = torch.aten.view %58401, %58402 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58403, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55744 = torch.constant.int 4
    %58404 = torch.aten.mul.int %int4_55744, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55745 = torch.constant.int 4096
    %58405 = torch.prim.ListConstruct %58404, %int4096_55745 : (!torch.int, !torch.int) -> !torch.list<int>
    %58406 = torch.aten.view %58301, %58405 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58406, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58407 = torch.aten.mm %58406, %58377 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58407, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55746 = torch.constant.int 4
    %int128_55747 = torch.constant.int 128
    %58408 = torch.prim.ListConstruct %int4_55746, %2482, %int128_55747 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58409 = torch.aten.view %58407, %58408 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58409, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55748 = torch.constant.int 4
    %58410 = torch.aten.mul.int %int4_55748, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55749 = torch.constant.int 4096
    %58411 = torch.prim.ListConstruct %58410, %int4096_55749 : (!torch.int, !torch.int) -> !torch.list<int>
    %58412 = torch.aten.view %58302, %58411 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58412, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58413 = torch.aten.mm %58412, %58379 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58413, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55750 = torch.constant.int 4
    %int128_55751 = torch.constant.int 128
    %58414 = torch.prim.ListConstruct %int4_55750, %2482, %int128_55751 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58415 = torch.aten.view %58413, %58414 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58415, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55752 = torch.constant.int 4
    %58416 = torch.aten.mul.int %int4_55752, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55753 = torch.constant.int 4096
    %58417 = torch.prim.ListConstruct %58416, %int4096_55753 : (!torch.int, !torch.int) -> !torch.list<int>
    %58418 = torch.aten.view %58303, %58417 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58418, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58419 = torch.aten.mm %58418, %58381 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58419, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55754 = torch.constant.int 4
    %int128_55755 = torch.constant.int 128
    %58420 = torch.prim.ListConstruct %int4_55754, %2482, %int128_55755 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58421 = torch.aten.view %58419, %58420 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55756 = torch.constant.int 4
    %58422 = torch.aten.mul.int %int4_55756, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55757 = torch.constant.int 4096
    %58423 = torch.prim.ListConstruct %58422, %int4096_55757 : (!torch.int, !torch.int) -> !torch.list<int>
    %58424 = torch.aten.view %58304, %58423 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58424, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58425 = torch.aten.mm %58424, %58383 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58425, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55758 = torch.constant.int 4
    %int128_55759 = torch.constant.int 128
    %58426 = torch.prim.ListConstruct %int4_55758, %2482, %int128_55759 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58427 = torch.aten.view %58425, %58426 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55760 = torch.constant.int 4
    %58428 = torch.aten.mul.int %int4_55760, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55761 = torch.constant.int 4096
    %58429 = torch.prim.ListConstruct %58428, %int4096_55761 : (!torch.int, !torch.int) -> !torch.list<int>
    %58430 = torch.aten.view %58305, %58429 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58430, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58431 = torch.aten.mm %58430, %58385 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58431, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55762 = torch.constant.int 4
    %int128_55763 = torch.constant.int 128
    %58432 = torch.prim.ListConstruct %int4_55762, %2482, %int128_55763 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58433 = torch.aten.view %58431, %58432 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_55764 = torch.constant.int 1
    %int0_55765 = torch.constant.int 0
    %58434 = torch.prim.ListConstruct %int1_55764, %int0_55765 : (!torch.int, !torch.int) -> !torch.list<int>
    %58435 = torch.aten.permute %2192, %58434 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55766 = torch.constant.int 1
    %int0_55767 = torch.constant.int 0
    %58436 = torch.prim.ListConstruct %int1_55766, %int0_55767 : (!torch.int, !torch.int) -> !torch.list<int>
    %58437 = torch.aten.permute %2193, %58436 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55768 = torch.constant.int 1
    %int0_55769 = torch.constant.int 0
    %58438 = torch.prim.ListConstruct %int1_55768, %int0_55769 : (!torch.int, !torch.int) -> !torch.list<int>
    %58439 = torch.aten.permute %2194, %58438 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55770 = torch.constant.int 1
    %int0_55771 = torch.constant.int 0
    %58440 = torch.prim.ListConstruct %int1_55770, %int0_55771 : (!torch.int, !torch.int) -> !torch.list<int>
    %58441 = torch.aten.permute %2195, %58440 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55772 = torch.constant.int 1
    %int0_55773 = torch.constant.int 0
    %58442 = torch.prim.ListConstruct %int1_55772, %int0_55773 : (!torch.int, !torch.int) -> !torch.list<int>
    %58443 = torch.aten.permute %2196, %58442 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55774 = torch.constant.int 1
    %int0_55775 = torch.constant.int 0
    %58444 = torch.prim.ListConstruct %int1_55774, %int0_55775 : (!torch.int, !torch.int) -> !torch.list<int>
    %58445 = torch.aten.permute %2197, %58444 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55776 = torch.constant.int 1
    %int0_55777 = torch.constant.int 0
    %58446 = torch.prim.ListConstruct %int1_55776, %int0_55777 : (!torch.int, !torch.int) -> !torch.list<int>
    %58447 = torch.aten.permute %2198, %58446 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_55778 = torch.constant.int 1
    %int0_55779 = torch.constant.int 0
    %58448 = torch.prim.ListConstruct %int1_55778, %int0_55779 : (!torch.int, !torch.int) -> !torch.list<int>
    %58449 = torch.aten.permute %2199, %58448 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_55780 = torch.constant.int 4
    %58450 = torch.aten.mul.int %int4_55780, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55781 = torch.constant.int 4096
    %58451 = torch.prim.ListConstruct %58450, %int4096_55781 : (!torch.int, !torch.int) -> !torch.list<int>
    %58452 = torch.aten.view %58298, %58451 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58452, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58453 = torch.aten.mm %58452, %58435 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58453, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55782 = torch.constant.int 4
    %int128_55783 = torch.constant.int 128
    %58454 = torch.prim.ListConstruct %int4_55782, %2482, %int128_55783 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58455 = torch.aten.view %58453, %58454 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55784 = torch.constant.int 4
    %58456 = torch.aten.mul.int %int4_55784, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55785 = torch.constant.int 4096
    %58457 = torch.prim.ListConstruct %58456, %int4096_55785 : (!torch.int, !torch.int) -> !torch.list<int>
    %58458 = torch.aten.view %58299, %58457 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58458, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58459 = torch.aten.mm %58458, %58437 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58459, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55786 = torch.constant.int 4
    %int128_55787 = torch.constant.int 128
    %58460 = torch.prim.ListConstruct %int4_55786, %2482, %int128_55787 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58461 = torch.aten.view %58459, %58460 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55788 = torch.constant.int 4
    %58462 = torch.aten.mul.int %int4_55788, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55789 = torch.constant.int 4096
    %58463 = torch.prim.ListConstruct %58462, %int4096_55789 : (!torch.int, !torch.int) -> !torch.list<int>
    %58464 = torch.aten.view %58300, %58463 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58464, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58465 = torch.aten.mm %58464, %58439 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58465, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55790 = torch.constant.int 4
    %int128_55791 = torch.constant.int 128
    %58466 = torch.prim.ListConstruct %int4_55790, %2482, %int128_55791 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58467 = torch.aten.view %58465, %58466 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55792 = torch.constant.int 4
    %58468 = torch.aten.mul.int %int4_55792, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55793 = torch.constant.int 4096
    %58469 = torch.prim.ListConstruct %58468, %int4096_55793 : (!torch.int, !torch.int) -> !torch.list<int>
    %58470 = torch.aten.view %58301, %58469 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58470, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58471 = torch.aten.mm %58470, %58441 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58471, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55794 = torch.constant.int 4
    %int128_55795 = torch.constant.int 128
    %58472 = torch.prim.ListConstruct %int4_55794, %2482, %int128_55795 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58473 = torch.aten.view %58471, %58472 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55796 = torch.constant.int 4
    %58474 = torch.aten.mul.int %int4_55796, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55797 = torch.constant.int 4096
    %58475 = torch.prim.ListConstruct %58474, %int4096_55797 : (!torch.int, !torch.int) -> !torch.list<int>
    %58476 = torch.aten.view %58302, %58475 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58476, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58477 = torch.aten.mm %58476, %58443 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58477, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55798 = torch.constant.int 4
    %int128_55799 = torch.constant.int 128
    %58478 = torch.prim.ListConstruct %int4_55798, %2482, %int128_55799 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58479 = torch.aten.view %58477, %58478 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55800 = torch.constant.int 4
    %58480 = torch.aten.mul.int %int4_55800, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55801 = torch.constant.int 4096
    %58481 = torch.prim.ListConstruct %58480, %int4096_55801 : (!torch.int, !torch.int) -> !torch.list<int>
    %58482 = torch.aten.view %58303, %58481 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58482, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58483 = torch.aten.mm %58482, %58445 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58483, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55802 = torch.constant.int 4
    %int128_55803 = torch.constant.int 128
    %58484 = torch.prim.ListConstruct %int4_55802, %2482, %int128_55803 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58485 = torch.aten.view %58483, %58484 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55804 = torch.constant.int 4
    %58486 = torch.aten.mul.int %int4_55804, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55805 = torch.constant.int 4096
    %58487 = torch.prim.ListConstruct %58486, %int4096_55805 : (!torch.int, !torch.int) -> !torch.list<int>
    %58488 = torch.aten.view %58304, %58487 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58488, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58489 = torch.aten.mm %58488, %58447 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58489, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55806 = torch.constant.int 4
    %int128_55807 = torch.constant.int 128
    %58490 = torch.prim.ListConstruct %int4_55806, %2482, %int128_55807 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58491 = torch.aten.view %58489, %58490 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55808 = torch.constant.int 4
    %58492 = torch.aten.mul.int %int4_55808, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_55809 = torch.constant.int 4096
    %58493 = torch.prim.ListConstruct %58492, %int4096_55809 : (!torch.int, !torch.int) -> !torch.list<int>
    %58494 = torch.aten.view %58305, %58493 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %58494, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %58495 = torch.aten.mm %58494, %58449 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %58495, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_55810 = torch.constant.int 4
    %int128_55811 = torch.constant.int 128
    %58496 = torch.prim.ListConstruct %int4_55810, %2482, %int128_55811 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58497 = torch.aten.view %58495, %58496 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %58497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_55812 = torch.constant.int 4
    %int4_55813 = torch.constant.int 4
    %int128_55814 = torch.constant.int 128
    %58498 = torch.prim.ListConstruct %int4_55812, %2482, %int4_55813, %int128_55814 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58499 = torch.aten.view %58327, %58498 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55815 = torch.constant.int 4
    %int4_55816 = torch.constant.int 4
    %int128_55817 = torch.constant.int 128
    %58500 = torch.prim.ListConstruct %int4_55815, %2482, %int4_55816, %int128_55817 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58501 = torch.aten.view %58333, %58500 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55818 = torch.constant.int 4
    %int4_55819 = torch.constant.int 4
    %int128_55820 = torch.constant.int 128
    %58502 = torch.prim.ListConstruct %int4_55818, %2482, %int4_55819, %int128_55820 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58503 = torch.aten.view %58339, %58502 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55821 = torch.constant.int 4
    %int4_55822 = torch.constant.int 4
    %int128_55823 = torch.constant.int 128
    %58504 = torch.prim.ListConstruct %int4_55821, %2482, %int4_55822, %int128_55823 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58505 = torch.aten.view %58345, %58504 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55824 = torch.constant.int 4
    %int4_55825 = torch.constant.int 4
    %int128_55826 = torch.constant.int 128
    %58506 = torch.prim.ListConstruct %int4_55824, %2482, %int4_55825, %int128_55826 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58507 = torch.aten.view %58351, %58506 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55827 = torch.constant.int 4
    %int4_55828 = torch.constant.int 4
    %int128_55829 = torch.constant.int 128
    %58508 = torch.prim.ListConstruct %int4_55827, %2482, %int4_55828, %int128_55829 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58509 = torch.aten.view %58357, %58508 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55830 = torch.constant.int 4
    %int4_55831 = torch.constant.int 4
    %int128_55832 = torch.constant.int 128
    %58510 = torch.prim.ListConstruct %int4_55830, %2482, %int4_55831, %int128_55832 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58511 = torch.aten.view %58363, %58510 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58511, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55833 = torch.constant.int 4
    %int4_55834 = torch.constant.int 4
    %int128_55835 = torch.constant.int 128
    %58512 = torch.prim.ListConstruct %int4_55833, %2482, %int4_55834, %int128_55835 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58513 = torch.aten.view %58369, %58512 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_55836 = torch.constant.int 4
    %int1_55837 = torch.constant.int 1
    %int128_55838 = torch.constant.int 128
    %58514 = torch.prim.ListConstruct %int4_55836, %2482, %int1_55837, %int128_55838 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58515 = torch.aten.view %58391, %58514 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55839 = torch.constant.int 4
    %int1_55840 = torch.constant.int 1
    %int128_55841 = torch.constant.int 128
    %58516 = torch.prim.ListConstruct %int4_55839, %2482, %int1_55840, %int128_55841 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58517 = torch.aten.view %58397, %58516 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58517, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55842 = torch.constant.int 4
    %int1_55843 = torch.constant.int 1
    %int128_55844 = torch.constant.int 128
    %58518 = torch.prim.ListConstruct %int4_55842, %2482, %int1_55843, %int128_55844 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58519 = torch.aten.view %58403, %58518 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55845 = torch.constant.int 4
    %int1_55846 = torch.constant.int 1
    %int128_55847 = torch.constant.int 128
    %58520 = torch.prim.ListConstruct %int4_55845, %2482, %int1_55846, %int128_55847 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58521 = torch.aten.view %58409, %58520 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58521, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55848 = torch.constant.int 4
    %int1_55849 = torch.constant.int 1
    %int128_55850 = torch.constant.int 128
    %58522 = torch.prim.ListConstruct %int4_55848, %2482, %int1_55849, %int128_55850 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58523 = torch.aten.view %58415, %58522 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58523, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55851 = torch.constant.int 4
    %int1_55852 = torch.constant.int 1
    %int128_55853 = torch.constant.int 128
    %58524 = torch.prim.ListConstruct %int4_55851, %2482, %int1_55852, %int128_55853 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58525 = torch.aten.view %58421, %58524 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55854 = torch.constant.int 4
    %int1_55855 = torch.constant.int 1
    %int128_55856 = torch.constant.int 128
    %58526 = torch.prim.ListConstruct %int4_55854, %2482, %int1_55855, %int128_55856 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58527 = torch.aten.view %58427, %58526 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58527, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55857 = torch.constant.int 4
    %int1_55858 = torch.constant.int 1
    %int128_55859 = torch.constant.int 128
    %58528 = torch.prim.ListConstruct %int4_55857, %2482, %int1_55858, %int128_55859 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58529 = torch.aten.view %58433, %58528 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55860 = torch.constant.int 4
    %int1_55861 = torch.constant.int 1
    %int128_55862 = torch.constant.int 128
    %58530 = torch.prim.ListConstruct %int4_55860, %2482, %int1_55861, %int128_55862 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58531 = torch.aten.view %58455, %58530 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55863 = torch.constant.int 4
    %int1_55864 = torch.constant.int 1
    %int128_55865 = torch.constant.int 128
    %58532 = torch.prim.ListConstruct %int4_55863, %2482, %int1_55864, %int128_55865 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58533 = torch.aten.view %58461, %58532 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55866 = torch.constant.int 4
    %int1_55867 = torch.constant.int 1
    %int128_55868 = torch.constant.int 128
    %58534 = torch.prim.ListConstruct %int4_55866, %2482, %int1_55867, %int128_55868 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58535 = torch.aten.view %58467, %58534 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55869 = torch.constant.int 4
    %int1_55870 = torch.constant.int 1
    %int128_55871 = torch.constant.int 128
    %58536 = torch.prim.ListConstruct %int4_55869, %2482, %int1_55870, %int128_55871 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58537 = torch.aten.view %58473, %58536 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55872 = torch.constant.int 4
    %int1_55873 = torch.constant.int 1
    %int128_55874 = torch.constant.int 128
    %58538 = torch.prim.ListConstruct %int4_55872, %2482, %int1_55873, %int128_55874 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58539 = torch.aten.view %58479, %58538 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58539, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55875 = torch.constant.int 4
    %int1_55876 = torch.constant.int 1
    %int128_55877 = torch.constant.int 128
    %58540 = torch.prim.ListConstruct %int4_55875, %2482, %int1_55876, %int128_55877 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58541 = torch.aten.view %58485, %58540 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55878 = torch.constant.int 4
    %int1_55879 = torch.constant.int 1
    %int128_55880 = torch.constant.int 128
    %58542 = torch.prim.ListConstruct %int4_55878, %2482, %int1_55879, %int128_55880 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58543 = torch.aten.view %58491, %58542 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58543, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_55881 = torch.constant.int 4
    %int1_55882 = torch.constant.int 1
    %int128_55883 = torch.constant.int 128
    %58544 = torch.prim.ListConstruct %int4_55881, %2482, %int1_55882, %int128_55883 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58545 = torch.aten.view %58497, %58544 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_55884 = torch.constant.int 131072
    %none_55885 = torch.constant.none
    %none_55886 = torch.constant.none
    %cpu_55887 = torch.constant.device "cpu"
    %false_55888 = torch.constant.bool false
    %58546 = torch.aten.arange %int131072_55884, %none_55885, %none_55886, %cpu_55887, %false_55888 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_55889 = torch.constant.int 0
    %int128_55890 = torch.constant.int 128
    %int2_55891 = torch.constant.int 2
    %none_55892 = torch.constant.none
    %none_55893 = torch.constant.none
    %cpu_55894 = torch.constant.device "cpu"
    %false_55895 = torch.constant.bool false
    %58547 = torch.aten.arange.start_step %int0_55889, %int128_55890, %int2_55891, %none_55892, %none_55893, %cpu_55894, %false_55895 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_55896 = torch.constant.int 0
    %int0_55897 = torch.constant.int 0
    %int64_55898 = torch.constant.int 64
    %int1_55899 = torch.constant.int 1
    %58548 = torch.aten.slice.Tensor %58547, %int0_55896, %int0_55897, %int64_55898, %int1_55899 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_55900 = torch.constant.int 6
    %58549 = torch.prims.convert_element_type %58548, %int6_55900 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_55901 = torch.constant.int 128
    %58550 = torch.aten.div.Scalar %58549, %int128_55901 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_55902 = torch.constant.float 5.000000e+05
    %58551 = torch.aten.pow.Scalar %float5.000000e05_55902, %58550 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %58552 = torch.aten.reciprocal %58551 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_55903 = torch.constant.float 1.000000e+00
    %58553 = torch.aten.mul.Scalar %58552, %float1.000000e00_55903 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_55904 = torch.constant.int 131072
    %int1_55905 = torch.constant.int 1
    %58554 = torch.prim.ListConstruct %int131072_55904, %int1_55905 : (!torch.int, !torch.int) -> !torch.list<int>
    %58555 = torch.aten.view %58546, %58554 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %58556 = torch.aten.mul.Tensor %58555, %58553 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %58557 = torch.aten.cos %58556 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %58558 = torch.aten.sin %58556 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %58559 = torch.aten.complex %58557, %58558 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %58560 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58561 = flow.tensor.transfer %58560 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %58562 = torch_c.from_builtin_tensor %58561 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58563 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58564 = flow.tensor.transfer %58563 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %58565 = torch_c.from_builtin_tensor %58564 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58566 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58567 = flow.tensor.transfer %58566 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %58568 = torch_c.from_builtin_tensor %58567 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58569 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58570 = flow.tensor.transfer %58569 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %58571 = torch_c.from_builtin_tensor %58570 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58572 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58573 = flow.tensor.transfer %58572 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %58574 = torch_c.from_builtin_tensor %58573 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58575 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58576 = flow.tensor.transfer %58575 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %58577 = torch_c.from_builtin_tensor %58576 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58578 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58579 = flow.tensor.transfer %58578 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %58580 = torch_c.from_builtin_tensor %58579 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58581 = torch_c.to_builtin_tensor %58559 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58582 = flow.tensor.transfer %58581 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %58583 = torch_c.from_builtin_tensor %58582 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_55906 = torch.constant.int 1
    %58584 = torch.aten.size.int %58327, %int1_55906 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_55907 = torch.constant.int 0
    %58585 = torch.aten.add.int %int0_55907, %58584 : !torch.int, !torch.int -> !torch.int
    %int0_55908 = torch.constant.int 0
    %int0_55909 = torch.constant.int 0
    %int1_55910 = torch.constant.int 1
    %58586 = torch.aten.slice.Tensor %58562, %int0_55908, %int0_55909, %58585, %int1_55910 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58586, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_55911 = torch.constant.int 1
    %int0_55912 = torch.constant.int 0
    %int9223372036854775807_55913 = torch.constant.int 9223372036854775807
    %int1_55914 = torch.constant.int 1
    %58587 = torch.aten.slice.Tensor %58586, %int1_55911, %int0_55912, %int9223372036854775807_55913, %int1_55914 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58587, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_55915 = torch.constant.int 0
    %58588 = torch.aten.unsqueeze %58587, %int0_55915 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58588, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_55916 = torch.constant.int 2
    %58589 = torch.aten.unsqueeze %58588, %int2_55916 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58589, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_55917 = torch.constant.int 3
    %int0_55918 = torch.constant.int 0
    %int9223372036854775807_55919 = torch.constant.int 9223372036854775807
    %int1_55920 = torch.constant.int 1
    %58590 = torch.aten.slice.Tensor %58589, %int3_55917, %int0_55918, %int9223372036854775807_55919, %int1_55920 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58590, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58591 = torch_c.to_builtin_tensor %58499 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_55921 = arith.constant 1 : index
    %dim_55922 = tensor.dim %58591, %c1_55921 : tensor<4x?x4x128xf16>
    %58592 = flow.tensor.bitcast %58591 : tensor<4x?x4x128xf16>{%dim_55922} -> tensor<4x?x4x64xcomplex<f16>>{%dim_55922}
    %58593 = torch_c.from_builtin_tensor %58592 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58593, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58594 = torch.aten.mul.Tensor %58593, %58590 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58595 = torch_c.to_builtin_tensor %58594 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_55923 = arith.constant 1 : index
    %dim_55924 = tensor.dim %58595, %c1_55923 : tensor<4x?x4x64xcomplex<f32>>
    %58596 = flow.tensor.bitcast %58595 : tensor<4x?x4x64xcomplex<f32>>{%dim_55924} -> tensor<4x?x4x128xf32>{%dim_55924}
    %58597 = torch_c.from_builtin_tensor %58596 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58597, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_55925 = torch.constant.int 5
    %58598 = torch.prims.convert_element_type %58597, %int5_55925 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_55926 = torch.constant.int 1
    %58599 = torch.aten.size.int %58333, %int1_55926 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_55927 = torch.constant.int 0
    %58600 = torch.aten.add.int %int0_55927, %58599 : !torch.int, !torch.int -> !torch.int
    %int0_55928 = torch.constant.int 0
    %int0_55929 = torch.constant.int 0
    %int1_55930 = torch.constant.int 1
    %58601 = torch.aten.slice.Tensor %58565, %int0_55928, %int0_55929, %58600, %int1_55930 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58601, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_55931 = torch.constant.int 1
    %int0_55932 = torch.constant.int 0
    %int9223372036854775807_55933 = torch.constant.int 9223372036854775807
    %int1_55934 = torch.constant.int 1
    %58602 = torch.aten.slice.Tensor %58601, %int1_55931, %int0_55932, %int9223372036854775807_55933, %int1_55934 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58602, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_55935 = torch.constant.int 0
    %58603 = torch.aten.unsqueeze %58602, %int0_55935 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58603, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_55936 = torch.constant.int 2
    %58604 = torch.aten.unsqueeze %58603, %int2_55936 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58604, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_55937 = torch.constant.int 3
    %int0_55938 = torch.constant.int 0
    %int9223372036854775807_55939 = torch.constant.int 9223372036854775807
    %int1_55940 = torch.constant.int 1
    %58605 = torch.aten.slice.Tensor %58604, %int3_55937, %int0_55938, %int9223372036854775807_55939, %int1_55940 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58605, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58606 = torch_c.to_builtin_tensor %58501 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_55941 = arith.constant 1 : index
    %dim_55942 = tensor.dim %58606, %c1_55941 : tensor<4x?x4x128xf16>
    %58607 = flow.tensor.bitcast %58606 : tensor<4x?x4x128xf16>{%dim_55942} -> tensor<4x?x4x64xcomplex<f16>>{%dim_55942}
    %58608 = torch_c.from_builtin_tensor %58607 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58609 = torch.aten.mul.Tensor %58608, %58605 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58610 = torch_c.to_builtin_tensor %58609 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_55943 = arith.constant 1 : index
    %dim_55944 = tensor.dim %58610, %c1_55943 : tensor<4x?x4x64xcomplex<f32>>
    %58611 = flow.tensor.bitcast %58610 : tensor<4x?x4x64xcomplex<f32>>{%dim_55944} -> tensor<4x?x4x128xf32>{%dim_55944}
    %58612 = torch_c.from_builtin_tensor %58611 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_55945 = torch.constant.int 5
    %58613 = torch.prims.convert_element_type %58612, %int5_55945 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_55946 = torch.constant.int 1
    %58614 = torch.aten.size.int %58339, %int1_55946 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_55947 = torch.constant.int 0
    %58615 = torch.aten.add.int %int0_55947, %58614 : !torch.int, !torch.int -> !torch.int
    %int0_55948 = torch.constant.int 0
    %int0_55949 = torch.constant.int 0
    %int1_55950 = torch.constant.int 1
    %58616 = torch.aten.slice.Tensor %58568, %int0_55948, %int0_55949, %58615, %int1_55950 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58616, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_55951 = torch.constant.int 1
    %int0_55952 = torch.constant.int 0
    %int9223372036854775807_55953 = torch.constant.int 9223372036854775807
    %int1_55954 = torch.constant.int 1
    %58617 = torch.aten.slice.Tensor %58616, %int1_55951, %int0_55952, %int9223372036854775807_55953, %int1_55954 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58617, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_55955 = torch.constant.int 0
    %58618 = torch.aten.unsqueeze %58617, %int0_55955 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58618, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_55956 = torch.constant.int 2
    %58619 = torch.aten.unsqueeze %58618, %int2_55956 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58619, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_55957 = torch.constant.int 3
    %int0_55958 = torch.constant.int 0
    %int9223372036854775807_55959 = torch.constant.int 9223372036854775807
    %int1_55960 = torch.constant.int 1
    %58620 = torch.aten.slice.Tensor %58619, %int3_55957, %int0_55958, %int9223372036854775807_55959, %int1_55960 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58620, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58621 = torch_c.to_builtin_tensor %58503 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_55961 = arith.constant 1 : index
    %dim_55962 = tensor.dim %58621, %c1_55961 : tensor<4x?x4x128xf16>
    %58622 = flow.tensor.bitcast %58621 : tensor<4x?x4x128xf16>{%dim_55962} -> tensor<4x?x4x64xcomplex<f16>>{%dim_55962}
    %58623 = torch_c.from_builtin_tensor %58622 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58624 = torch.aten.mul.Tensor %58623, %58620 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58625 = torch_c.to_builtin_tensor %58624 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_55963 = arith.constant 1 : index
    %dim_55964 = tensor.dim %58625, %c1_55963 : tensor<4x?x4x64xcomplex<f32>>
    %58626 = flow.tensor.bitcast %58625 : tensor<4x?x4x64xcomplex<f32>>{%dim_55964} -> tensor<4x?x4x128xf32>{%dim_55964}
    %58627 = torch_c.from_builtin_tensor %58626 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_55965 = torch.constant.int 5
    %58628 = torch.prims.convert_element_type %58627, %int5_55965 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_55966 = torch.constant.int 1
    %58629 = torch.aten.size.int %58345, %int1_55966 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_55967 = torch.constant.int 0
    %58630 = torch.aten.add.int %int0_55967, %58629 : !torch.int, !torch.int -> !torch.int
    %int0_55968 = torch.constant.int 0
    %int0_55969 = torch.constant.int 0
    %int1_55970 = torch.constant.int 1
    %58631 = torch.aten.slice.Tensor %58571, %int0_55968, %int0_55969, %58630, %int1_55970 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58631, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_55971 = torch.constant.int 1
    %int0_55972 = torch.constant.int 0
    %int9223372036854775807_55973 = torch.constant.int 9223372036854775807
    %int1_55974 = torch.constant.int 1
    %58632 = torch.aten.slice.Tensor %58631, %int1_55971, %int0_55972, %int9223372036854775807_55973, %int1_55974 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58632, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_55975 = torch.constant.int 0
    %58633 = torch.aten.unsqueeze %58632, %int0_55975 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58633, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_55976 = torch.constant.int 2
    %58634 = torch.aten.unsqueeze %58633, %int2_55976 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58634, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_55977 = torch.constant.int 3
    %int0_55978 = torch.constant.int 0
    %int9223372036854775807_55979 = torch.constant.int 9223372036854775807
    %int1_55980 = torch.constant.int 1
    %58635 = torch.aten.slice.Tensor %58634, %int3_55977, %int0_55978, %int9223372036854775807_55979, %int1_55980 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58635, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58636 = torch_c.to_builtin_tensor %58505 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_55981 = arith.constant 1 : index
    %dim_55982 = tensor.dim %58636, %c1_55981 : tensor<4x?x4x128xf16>
    %58637 = flow.tensor.bitcast %58636 : tensor<4x?x4x128xf16>{%dim_55982} -> tensor<4x?x4x64xcomplex<f16>>{%dim_55982}
    %58638 = torch_c.from_builtin_tensor %58637 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58639 = torch.aten.mul.Tensor %58638, %58635 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58640 = torch_c.to_builtin_tensor %58639 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_55983 = arith.constant 1 : index
    %dim_55984 = tensor.dim %58640, %c1_55983 : tensor<4x?x4x64xcomplex<f32>>
    %58641 = flow.tensor.bitcast %58640 : tensor<4x?x4x64xcomplex<f32>>{%dim_55984} -> tensor<4x?x4x128xf32>{%dim_55984}
    %58642 = torch_c.from_builtin_tensor %58641 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_55985 = torch.constant.int 5
    %58643 = torch.prims.convert_element_type %58642, %int5_55985 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_55986 = torch.constant.int 1
    %58644 = torch.aten.size.int %58351, %int1_55986 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_55987 = torch.constant.int 0
    %58645 = torch.aten.add.int %int0_55987, %58644 : !torch.int, !torch.int -> !torch.int
    %int0_55988 = torch.constant.int 0
    %int0_55989 = torch.constant.int 0
    %int1_55990 = torch.constant.int 1
    %58646 = torch.aten.slice.Tensor %58574, %int0_55988, %int0_55989, %58645, %int1_55990 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58646, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_55991 = torch.constant.int 1
    %int0_55992 = torch.constant.int 0
    %int9223372036854775807_55993 = torch.constant.int 9223372036854775807
    %int1_55994 = torch.constant.int 1
    %58647 = torch.aten.slice.Tensor %58646, %int1_55991, %int0_55992, %int9223372036854775807_55993, %int1_55994 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58647, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_55995 = torch.constant.int 0
    %58648 = torch.aten.unsqueeze %58647, %int0_55995 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58648, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_55996 = torch.constant.int 2
    %58649 = torch.aten.unsqueeze %58648, %int2_55996 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58649, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_55997 = torch.constant.int 3
    %int0_55998 = torch.constant.int 0
    %int9223372036854775807_55999 = torch.constant.int 9223372036854775807
    %int1_56000 = torch.constant.int 1
    %58650 = torch.aten.slice.Tensor %58649, %int3_55997, %int0_55998, %int9223372036854775807_55999, %int1_56000 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58650, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58651 = torch_c.to_builtin_tensor %58507 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_56001 = arith.constant 1 : index
    %dim_56002 = tensor.dim %58651, %c1_56001 : tensor<4x?x4x128xf16>
    %58652 = flow.tensor.bitcast %58651 : tensor<4x?x4x128xf16>{%dim_56002} -> tensor<4x?x4x64xcomplex<f16>>{%dim_56002}
    %58653 = torch_c.from_builtin_tensor %58652 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58653, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58654 = torch.aten.mul.Tensor %58653, %58650 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58654, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58655 = torch_c.to_builtin_tensor %58654 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_56003 = arith.constant 1 : index
    %dim_56004 = tensor.dim %58655, %c1_56003 : tensor<4x?x4x64xcomplex<f32>>
    %58656 = flow.tensor.bitcast %58655 : tensor<4x?x4x64xcomplex<f32>>{%dim_56004} -> tensor<4x?x4x128xf32>{%dim_56004}
    %58657 = torch_c.from_builtin_tensor %58656 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_56005 = torch.constant.int 5
    %58658 = torch.prims.convert_element_type %58657, %int5_56005 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56006 = torch.constant.int 1
    %58659 = torch.aten.size.int %58357, %int1_56006 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_56007 = torch.constant.int 0
    %58660 = torch.aten.add.int %int0_56007, %58659 : !torch.int, !torch.int -> !torch.int
    %int0_56008 = torch.constant.int 0
    %int0_56009 = torch.constant.int 0
    %int1_56010 = torch.constant.int 1
    %58661 = torch.aten.slice.Tensor %58577, %int0_56008, %int0_56009, %58660, %int1_56010 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58661, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56011 = torch.constant.int 1
    %int0_56012 = torch.constant.int 0
    %int9223372036854775807_56013 = torch.constant.int 9223372036854775807
    %int1_56014 = torch.constant.int 1
    %58662 = torch.aten.slice.Tensor %58661, %int1_56011, %int0_56012, %int9223372036854775807_56013, %int1_56014 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58662, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56015 = torch.constant.int 0
    %58663 = torch.aten.unsqueeze %58662, %int0_56015 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58663, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56016 = torch.constant.int 2
    %58664 = torch.aten.unsqueeze %58663, %int2_56016 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58664, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56017 = torch.constant.int 3
    %int0_56018 = torch.constant.int 0
    %int9223372036854775807_56019 = torch.constant.int 9223372036854775807
    %int1_56020 = torch.constant.int 1
    %58665 = torch.aten.slice.Tensor %58664, %int3_56017, %int0_56018, %int9223372036854775807_56019, %int1_56020 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58665, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58666 = torch_c.to_builtin_tensor %58509 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_56021 = arith.constant 1 : index
    %dim_56022 = tensor.dim %58666, %c1_56021 : tensor<4x?x4x128xf16>
    %58667 = flow.tensor.bitcast %58666 : tensor<4x?x4x128xf16>{%dim_56022} -> tensor<4x?x4x64xcomplex<f16>>{%dim_56022}
    %58668 = torch_c.from_builtin_tensor %58667 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58669 = torch.aten.mul.Tensor %58668, %58665 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58669, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58670 = torch_c.to_builtin_tensor %58669 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_56023 = arith.constant 1 : index
    %dim_56024 = tensor.dim %58670, %c1_56023 : tensor<4x?x4x64xcomplex<f32>>
    %58671 = flow.tensor.bitcast %58670 : tensor<4x?x4x64xcomplex<f32>>{%dim_56024} -> tensor<4x?x4x128xf32>{%dim_56024}
    %58672 = torch_c.from_builtin_tensor %58671 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_56025 = torch.constant.int 5
    %58673 = torch.prims.convert_element_type %58672, %int5_56025 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56026 = torch.constant.int 1
    %58674 = torch.aten.size.int %58363, %int1_56026 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_56027 = torch.constant.int 0
    %58675 = torch.aten.add.int %int0_56027, %58674 : !torch.int, !torch.int -> !torch.int
    %int0_56028 = torch.constant.int 0
    %int0_56029 = torch.constant.int 0
    %int1_56030 = torch.constant.int 1
    %58676 = torch.aten.slice.Tensor %58580, %int0_56028, %int0_56029, %58675, %int1_56030 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58676, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56031 = torch.constant.int 1
    %int0_56032 = torch.constant.int 0
    %int9223372036854775807_56033 = torch.constant.int 9223372036854775807
    %int1_56034 = torch.constant.int 1
    %58677 = torch.aten.slice.Tensor %58676, %int1_56031, %int0_56032, %int9223372036854775807_56033, %int1_56034 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58677, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56035 = torch.constant.int 0
    %58678 = torch.aten.unsqueeze %58677, %int0_56035 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58678, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56036 = torch.constant.int 2
    %58679 = torch.aten.unsqueeze %58678, %int2_56036 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58679, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56037 = torch.constant.int 3
    %int0_56038 = torch.constant.int 0
    %int9223372036854775807_56039 = torch.constant.int 9223372036854775807
    %int1_56040 = torch.constant.int 1
    %58680 = torch.aten.slice.Tensor %58679, %int3_56037, %int0_56038, %int9223372036854775807_56039, %int1_56040 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58680, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58681 = torch_c.to_builtin_tensor %58511 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_56041 = arith.constant 1 : index
    %dim_56042 = tensor.dim %58681, %c1_56041 : tensor<4x?x4x128xf16>
    %58682 = flow.tensor.bitcast %58681 : tensor<4x?x4x128xf16>{%dim_56042} -> tensor<4x?x4x64xcomplex<f16>>{%dim_56042}
    %58683 = torch_c.from_builtin_tensor %58682 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58683, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58684 = torch.aten.mul.Tensor %58683, %58680 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58684, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58685 = torch_c.to_builtin_tensor %58684 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_56043 = arith.constant 1 : index
    %dim_56044 = tensor.dim %58685, %c1_56043 : tensor<4x?x4x64xcomplex<f32>>
    %58686 = flow.tensor.bitcast %58685 : tensor<4x?x4x64xcomplex<f32>>{%dim_56044} -> tensor<4x?x4x128xf32>{%dim_56044}
    %58687 = torch_c.from_builtin_tensor %58686 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_56045 = torch.constant.int 5
    %58688 = torch.prims.convert_element_type %58687, %int5_56045 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56046 = torch.constant.int 1
    %58689 = torch.aten.size.int %58369, %int1_56046 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_56047 = torch.constant.int 0
    %58690 = torch.aten.add.int %int0_56047, %58689 : !torch.int, !torch.int -> !torch.int
    %int0_56048 = torch.constant.int 0
    %int0_56049 = torch.constant.int 0
    %int1_56050 = torch.constant.int 1
    %58691 = torch.aten.slice.Tensor %58583, %int0_56048, %int0_56049, %58690, %int1_56050 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58691, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56051 = torch.constant.int 1
    %int0_56052 = torch.constant.int 0
    %int9223372036854775807_56053 = torch.constant.int 9223372036854775807
    %int1_56054 = torch.constant.int 1
    %58692 = torch.aten.slice.Tensor %58691, %int1_56051, %int0_56052, %int9223372036854775807_56053, %int1_56054 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58692, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56055 = torch.constant.int 0
    %58693 = torch.aten.unsqueeze %58692, %int0_56055 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58693, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56056 = torch.constant.int 2
    %58694 = torch.aten.unsqueeze %58693, %int2_56056 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58694, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56057 = torch.constant.int 3
    %int0_56058 = torch.constant.int 0
    %int9223372036854775807_56059 = torch.constant.int 9223372036854775807
    %int1_56060 = torch.constant.int 1
    %58695 = torch.aten.slice.Tensor %58694, %int3_56057, %int0_56058, %int9223372036854775807_56059, %int1_56060 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58695, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58696 = torch_c.to_builtin_tensor %58513 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_56061 = arith.constant 1 : index
    %dim_56062 = tensor.dim %58696, %c1_56061 : tensor<4x?x4x128xf16>
    %58697 = flow.tensor.bitcast %58696 : tensor<4x?x4x128xf16>{%dim_56062} -> tensor<4x?x4x64xcomplex<f16>>{%dim_56062}
    %58698 = torch_c.from_builtin_tensor %58697 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %58698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %58699 = torch.aten.mul.Tensor %58698, %58695 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %58699, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %58700 = torch_c.to_builtin_tensor %58699 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_56063 = arith.constant 1 : index
    %dim_56064 = tensor.dim %58700, %c1_56063 : tensor<4x?x4x64xcomplex<f32>>
    %58701 = flow.tensor.bitcast %58700 : tensor<4x?x4x64xcomplex<f32>>{%dim_56064} -> tensor<4x?x4x128xf32>{%dim_56064}
    %58702 = torch_c.from_builtin_tensor %58701 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %58702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_56065 = torch.constant.int 5
    %58703 = torch.prims.convert_element_type %58702, %int5_56065 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %58703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_56066 = torch.constant.int 131072
    %none_56067 = torch.constant.none
    %none_56068 = torch.constant.none
    %cpu_56069 = torch.constant.device "cpu"
    %false_56070 = torch.constant.bool false
    %58704 = torch.aten.arange %int131072_56066, %none_56067, %none_56068, %cpu_56069, %false_56070 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_56071 = torch.constant.int 0
    %int128_56072 = torch.constant.int 128
    %int2_56073 = torch.constant.int 2
    %none_56074 = torch.constant.none
    %none_56075 = torch.constant.none
    %cpu_56076 = torch.constant.device "cpu"
    %false_56077 = torch.constant.bool false
    %58705 = torch.aten.arange.start_step %int0_56071, %int128_56072, %int2_56073, %none_56074, %none_56075, %cpu_56076, %false_56077 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_56078 = torch.constant.int 0
    %int0_56079 = torch.constant.int 0
    %int64_56080 = torch.constant.int 64
    %int1_56081 = torch.constant.int 1
    %58706 = torch.aten.slice.Tensor %58705, %int0_56078, %int0_56079, %int64_56080, %int1_56081 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_56082 = torch.constant.int 6
    %58707 = torch.prims.convert_element_type %58706, %int6_56082 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_56083 = torch.constant.int 128
    %58708 = torch.aten.div.Scalar %58707, %int128_56083 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_56084 = torch.constant.float 5.000000e+05
    %58709 = torch.aten.pow.Scalar %float5.000000e05_56084, %58708 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %58710 = torch.aten.reciprocal %58709 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_56085 = torch.constant.float 1.000000e+00
    %58711 = torch.aten.mul.Scalar %58710, %float1.000000e00_56085 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_56086 = torch.constant.int 131072
    %int1_56087 = torch.constant.int 1
    %58712 = torch.prim.ListConstruct %int131072_56086, %int1_56087 : (!torch.int, !torch.int) -> !torch.list<int>
    %58713 = torch.aten.view %58704, %58712 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %58714 = torch.aten.mul.Tensor %58713, %58711 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %58715 = torch.aten.cos %58714 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %58716 = torch.aten.sin %58714 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %58717 = torch.aten.complex %58715, %58716 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %58718 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58719 = flow.tensor.transfer %58718 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %58720 = torch_c.from_builtin_tensor %58719 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58721 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58722 = flow.tensor.transfer %58721 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %58723 = torch_c.from_builtin_tensor %58722 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58724 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58725 = flow.tensor.transfer %58724 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %58726 = torch_c.from_builtin_tensor %58725 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58727 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58728 = flow.tensor.transfer %58727 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %58729 = torch_c.from_builtin_tensor %58728 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58730 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58731 = flow.tensor.transfer %58730 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %58732 = torch_c.from_builtin_tensor %58731 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58733 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58734 = flow.tensor.transfer %58733 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %58735 = torch_c.from_builtin_tensor %58734 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58736 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58737 = flow.tensor.transfer %58736 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %58738 = torch_c.from_builtin_tensor %58737 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %58739 = torch_c.to_builtin_tensor %58717 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %58740 = flow.tensor.transfer %58739 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %58741 = torch_c.from_builtin_tensor %58740 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_56088 = torch.constant.int 1
    %58742 = torch.aten.size.int %58391, %int1_56088 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56089 = torch.constant.int 0
    %58743 = torch.aten.add.int %int0_56089, %58742 : !torch.int, !torch.int -> !torch.int
    %int0_56090 = torch.constant.int 0
    %int0_56091 = torch.constant.int 0
    %int1_56092 = torch.constant.int 1
    %58744 = torch.aten.slice.Tensor %58720, %int0_56090, %int0_56091, %58743, %int1_56092 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58744, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56093 = torch.constant.int 1
    %int0_56094 = torch.constant.int 0
    %int9223372036854775807_56095 = torch.constant.int 9223372036854775807
    %int1_56096 = torch.constant.int 1
    %58745 = torch.aten.slice.Tensor %58744, %int1_56093, %int0_56094, %int9223372036854775807_56095, %int1_56096 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58745, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56097 = torch.constant.int 0
    %58746 = torch.aten.unsqueeze %58745, %int0_56097 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58746, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56098 = torch.constant.int 2
    %58747 = torch.aten.unsqueeze %58746, %int2_56098 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58747, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56099 = torch.constant.int 3
    %int0_56100 = torch.constant.int 0
    %int9223372036854775807_56101 = torch.constant.int 9223372036854775807
    %int1_56102 = torch.constant.int 1
    %58748 = torch.aten.slice.Tensor %58747, %int3_56099, %int0_56100, %int9223372036854775807_56101, %int1_56102 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58748, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58749 = torch_c.to_builtin_tensor %58515 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56103 = arith.constant 1 : index
    %dim_56104 = tensor.dim %58749, %c1_56103 : tensor<4x?x1x128xf16>
    %58750 = flow.tensor.bitcast %58749 : tensor<4x?x1x128xf16>{%dim_56104} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56104}
    %58751 = torch_c.from_builtin_tensor %58750 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58752 = torch.aten.mul.Tensor %58751, %58748 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58753 = torch_c.to_builtin_tensor %58752 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56105 = arith.constant 1 : index
    %dim_56106 = tensor.dim %58753, %c1_56105 : tensor<4x?x1x64xcomplex<f32>>
    %58754 = flow.tensor.bitcast %58753 : tensor<4x?x1x64xcomplex<f32>>{%dim_56106} -> tensor<4x?x1x128xf32>{%dim_56106}
    %58755 = torch_c.from_builtin_tensor %58754 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58755, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56107 = torch.constant.int 5
    %58756 = torch.prims.convert_element_type %58755, %int5_56107 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58756, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56108 = torch.constant.int 1
    %58757 = torch.aten.size.int %58397, %int1_56108 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56109 = torch.constant.int 0
    %58758 = torch.aten.add.int %int0_56109, %58757 : !torch.int, !torch.int -> !torch.int
    %int0_56110 = torch.constant.int 0
    %int0_56111 = torch.constant.int 0
    %int1_56112 = torch.constant.int 1
    %58759 = torch.aten.slice.Tensor %58723, %int0_56110, %int0_56111, %58758, %int1_56112 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58759, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56113 = torch.constant.int 1
    %int0_56114 = torch.constant.int 0
    %int9223372036854775807_56115 = torch.constant.int 9223372036854775807
    %int1_56116 = torch.constant.int 1
    %58760 = torch.aten.slice.Tensor %58759, %int1_56113, %int0_56114, %int9223372036854775807_56115, %int1_56116 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58760, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56117 = torch.constant.int 0
    %58761 = torch.aten.unsqueeze %58760, %int0_56117 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58761, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56118 = torch.constant.int 2
    %58762 = torch.aten.unsqueeze %58761, %int2_56118 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58762, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56119 = torch.constant.int 3
    %int0_56120 = torch.constant.int 0
    %int9223372036854775807_56121 = torch.constant.int 9223372036854775807
    %int1_56122 = torch.constant.int 1
    %58763 = torch.aten.slice.Tensor %58762, %int3_56119, %int0_56120, %int9223372036854775807_56121, %int1_56122 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58763, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58764 = torch_c.to_builtin_tensor %58517 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56123 = arith.constant 1 : index
    %dim_56124 = tensor.dim %58764, %c1_56123 : tensor<4x?x1x128xf16>
    %58765 = flow.tensor.bitcast %58764 : tensor<4x?x1x128xf16>{%dim_56124} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56124}
    %58766 = torch_c.from_builtin_tensor %58765 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58767 = torch.aten.mul.Tensor %58766, %58763 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58767, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58768 = torch_c.to_builtin_tensor %58767 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56125 = arith.constant 1 : index
    %dim_56126 = tensor.dim %58768, %c1_56125 : tensor<4x?x1x64xcomplex<f32>>
    %58769 = flow.tensor.bitcast %58768 : tensor<4x?x1x64xcomplex<f32>>{%dim_56126} -> tensor<4x?x1x128xf32>{%dim_56126}
    %58770 = torch_c.from_builtin_tensor %58769 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56127 = torch.constant.int 5
    %58771 = torch.prims.convert_element_type %58770, %int5_56127 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58771, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56128 = torch.constant.int 1
    %58772 = torch.aten.size.int %58403, %int1_56128 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56129 = torch.constant.int 0
    %58773 = torch.aten.add.int %int0_56129, %58772 : !torch.int, !torch.int -> !torch.int
    %int0_56130 = torch.constant.int 0
    %int0_56131 = torch.constant.int 0
    %int1_56132 = torch.constant.int 1
    %58774 = torch.aten.slice.Tensor %58726, %int0_56130, %int0_56131, %58773, %int1_56132 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58774, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56133 = torch.constant.int 1
    %int0_56134 = torch.constant.int 0
    %int9223372036854775807_56135 = torch.constant.int 9223372036854775807
    %int1_56136 = torch.constant.int 1
    %58775 = torch.aten.slice.Tensor %58774, %int1_56133, %int0_56134, %int9223372036854775807_56135, %int1_56136 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58775, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56137 = torch.constant.int 0
    %58776 = torch.aten.unsqueeze %58775, %int0_56137 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58776, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56138 = torch.constant.int 2
    %58777 = torch.aten.unsqueeze %58776, %int2_56138 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58777, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56139 = torch.constant.int 3
    %int0_56140 = torch.constant.int 0
    %int9223372036854775807_56141 = torch.constant.int 9223372036854775807
    %int1_56142 = torch.constant.int 1
    %58778 = torch.aten.slice.Tensor %58777, %int3_56139, %int0_56140, %int9223372036854775807_56141, %int1_56142 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58778, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58779 = torch_c.to_builtin_tensor %58519 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56143 = arith.constant 1 : index
    %dim_56144 = tensor.dim %58779, %c1_56143 : tensor<4x?x1x128xf16>
    %58780 = flow.tensor.bitcast %58779 : tensor<4x?x1x128xf16>{%dim_56144} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56144}
    %58781 = torch_c.from_builtin_tensor %58780 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58781, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58782 = torch.aten.mul.Tensor %58781, %58778 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58783 = torch_c.to_builtin_tensor %58782 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56145 = arith.constant 1 : index
    %dim_56146 = tensor.dim %58783, %c1_56145 : tensor<4x?x1x64xcomplex<f32>>
    %58784 = flow.tensor.bitcast %58783 : tensor<4x?x1x64xcomplex<f32>>{%dim_56146} -> tensor<4x?x1x128xf32>{%dim_56146}
    %58785 = torch_c.from_builtin_tensor %58784 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56147 = torch.constant.int 5
    %58786 = torch.prims.convert_element_type %58785, %int5_56147 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56148 = torch.constant.int 1
    %58787 = torch.aten.size.int %58409, %int1_56148 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56149 = torch.constant.int 0
    %58788 = torch.aten.add.int %int0_56149, %58787 : !torch.int, !torch.int -> !torch.int
    %int0_56150 = torch.constant.int 0
    %int0_56151 = torch.constant.int 0
    %int1_56152 = torch.constant.int 1
    %58789 = torch.aten.slice.Tensor %58729, %int0_56150, %int0_56151, %58788, %int1_56152 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58789, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56153 = torch.constant.int 1
    %int0_56154 = torch.constant.int 0
    %int9223372036854775807_56155 = torch.constant.int 9223372036854775807
    %int1_56156 = torch.constant.int 1
    %58790 = torch.aten.slice.Tensor %58789, %int1_56153, %int0_56154, %int9223372036854775807_56155, %int1_56156 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58790, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56157 = torch.constant.int 0
    %58791 = torch.aten.unsqueeze %58790, %int0_56157 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58791, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56158 = torch.constant.int 2
    %58792 = torch.aten.unsqueeze %58791, %int2_56158 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58792, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56159 = torch.constant.int 3
    %int0_56160 = torch.constant.int 0
    %int9223372036854775807_56161 = torch.constant.int 9223372036854775807
    %int1_56162 = torch.constant.int 1
    %58793 = torch.aten.slice.Tensor %58792, %int3_56159, %int0_56160, %int9223372036854775807_56161, %int1_56162 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58793, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58794 = torch_c.to_builtin_tensor %58521 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56163 = arith.constant 1 : index
    %dim_56164 = tensor.dim %58794, %c1_56163 : tensor<4x?x1x128xf16>
    %58795 = flow.tensor.bitcast %58794 : tensor<4x?x1x128xf16>{%dim_56164} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56164}
    %58796 = torch_c.from_builtin_tensor %58795 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58796, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58797 = torch.aten.mul.Tensor %58796, %58793 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58798 = torch_c.to_builtin_tensor %58797 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56165 = arith.constant 1 : index
    %dim_56166 = tensor.dim %58798, %c1_56165 : tensor<4x?x1x64xcomplex<f32>>
    %58799 = flow.tensor.bitcast %58798 : tensor<4x?x1x64xcomplex<f32>>{%dim_56166} -> tensor<4x?x1x128xf32>{%dim_56166}
    %58800 = torch_c.from_builtin_tensor %58799 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56167 = torch.constant.int 5
    %58801 = torch.prims.convert_element_type %58800, %int5_56167 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56168 = torch.constant.int 1
    %58802 = torch.aten.size.int %58415, %int1_56168 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56169 = torch.constant.int 0
    %58803 = torch.aten.add.int %int0_56169, %58802 : !torch.int, !torch.int -> !torch.int
    %int0_56170 = torch.constant.int 0
    %int0_56171 = torch.constant.int 0
    %int1_56172 = torch.constant.int 1
    %58804 = torch.aten.slice.Tensor %58732, %int0_56170, %int0_56171, %58803, %int1_56172 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58804, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56173 = torch.constant.int 1
    %int0_56174 = torch.constant.int 0
    %int9223372036854775807_56175 = torch.constant.int 9223372036854775807
    %int1_56176 = torch.constant.int 1
    %58805 = torch.aten.slice.Tensor %58804, %int1_56173, %int0_56174, %int9223372036854775807_56175, %int1_56176 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58805, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56177 = torch.constant.int 0
    %58806 = torch.aten.unsqueeze %58805, %int0_56177 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58806, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56178 = torch.constant.int 2
    %58807 = torch.aten.unsqueeze %58806, %int2_56178 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58807, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56179 = torch.constant.int 3
    %int0_56180 = torch.constant.int 0
    %int9223372036854775807_56181 = torch.constant.int 9223372036854775807
    %int1_56182 = torch.constant.int 1
    %58808 = torch.aten.slice.Tensor %58807, %int3_56179, %int0_56180, %int9223372036854775807_56181, %int1_56182 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58808, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58809 = torch_c.to_builtin_tensor %58523 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56183 = arith.constant 1 : index
    %dim_56184 = tensor.dim %58809, %c1_56183 : tensor<4x?x1x128xf16>
    %58810 = flow.tensor.bitcast %58809 : tensor<4x?x1x128xf16>{%dim_56184} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56184}
    %58811 = torch_c.from_builtin_tensor %58810 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58811, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58812 = torch.aten.mul.Tensor %58811, %58808 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58812, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58813 = torch_c.to_builtin_tensor %58812 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56185 = arith.constant 1 : index
    %dim_56186 = tensor.dim %58813, %c1_56185 : tensor<4x?x1x64xcomplex<f32>>
    %58814 = flow.tensor.bitcast %58813 : tensor<4x?x1x64xcomplex<f32>>{%dim_56186} -> tensor<4x?x1x128xf32>{%dim_56186}
    %58815 = torch_c.from_builtin_tensor %58814 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58815, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56187 = torch.constant.int 5
    %58816 = torch.prims.convert_element_type %58815, %int5_56187 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56188 = torch.constant.int 1
    %58817 = torch.aten.size.int %58421, %int1_56188 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56189 = torch.constant.int 0
    %58818 = torch.aten.add.int %int0_56189, %58817 : !torch.int, !torch.int -> !torch.int
    %int0_56190 = torch.constant.int 0
    %int0_56191 = torch.constant.int 0
    %int1_56192 = torch.constant.int 1
    %58819 = torch.aten.slice.Tensor %58735, %int0_56190, %int0_56191, %58818, %int1_56192 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58819, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56193 = torch.constant.int 1
    %int0_56194 = torch.constant.int 0
    %int9223372036854775807_56195 = torch.constant.int 9223372036854775807
    %int1_56196 = torch.constant.int 1
    %58820 = torch.aten.slice.Tensor %58819, %int1_56193, %int0_56194, %int9223372036854775807_56195, %int1_56196 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58820, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56197 = torch.constant.int 0
    %58821 = torch.aten.unsqueeze %58820, %int0_56197 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58821, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56198 = torch.constant.int 2
    %58822 = torch.aten.unsqueeze %58821, %int2_56198 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58822, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56199 = torch.constant.int 3
    %int0_56200 = torch.constant.int 0
    %int9223372036854775807_56201 = torch.constant.int 9223372036854775807
    %int1_56202 = torch.constant.int 1
    %58823 = torch.aten.slice.Tensor %58822, %int3_56199, %int0_56200, %int9223372036854775807_56201, %int1_56202 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58823, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58824 = torch_c.to_builtin_tensor %58525 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56203 = arith.constant 1 : index
    %dim_56204 = tensor.dim %58824, %c1_56203 : tensor<4x?x1x128xf16>
    %58825 = flow.tensor.bitcast %58824 : tensor<4x?x1x128xf16>{%dim_56204} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56204}
    %58826 = torch_c.from_builtin_tensor %58825 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58826, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58827 = torch.aten.mul.Tensor %58826, %58823 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58828 = torch_c.to_builtin_tensor %58827 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56205 = arith.constant 1 : index
    %dim_56206 = tensor.dim %58828, %c1_56205 : tensor<4x?x1x64xcomplex<f32>>
    %58829 = flow.tensor.bitcast %58828 : tensor<4x?x1x64xcomplex<f32>>{%dim_56206} -> tensor<4x?x1x128xf32>{%dim_56206}
    %58830 = torch_c.from_builtin_tensor %58829 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56207 = torch.constant.int 5
    %58831 = torch.prims.convert_element_type %58830, %int5_56207 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56208 = torch.constant.int 1
    %58832 = torch.aten.size.int %58427, %int1_56208 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56209 = torch.constant.int 0
    %58833 = torch.aten.add.int %int0_56209, %58832 : !torch.int, !torch.int -> !torch.int
    %int0_56210 = torch.constant.int 0
    %int0_56211 = torch.constant.int 0
    %int1_56212 = torch.constant.int 1
    %58834 = torch.aten.slice.Tensor %58738, %int0_56210, %int0_56211, %58833, %int1_56212 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58834, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56213 = torch.constant.int 1
    %int0_56214 = torch.constant.int 0
    %int9223372036854775807_56215 = torch.constant.int 9223372036854775807
    %int1_56216 = torch.constant.int 1
    %58835 = torch.aten.slice.Tensor %58834, %int1_56213, %int0_56214, %int9223372036854775807_56215, %int1_56216 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58835, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56217 = torch.constant.int 0
    %58836 = torch.aten.unsqueeze %58835, %int0_56217 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58836, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56218 = torch.constant.int 2
    %58837 = torch.aten.unsqueeze %58836, %int2_56218 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58837, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56219 = torch.constant.int 3
    %int0_56220 = torch.constant.int 0
    %int9223372036854775807_56221 = torch.constant.int 9223372036854775807
    %int1_56222 = torch.constant.int 1
    %58838 = torch.aten.slice.Tensor %58837, %int3_56219, %int0_56220, %int9223372036854775807_56221, %int1_56222 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58838, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58839 = torch_c.to_builtin_tensor %58527 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56223 = arith.constant 1 : index
    %dim_56224 = tensor.dim %58839, %c1_56223 : tensor<4x?x1x128xf16>
    %58840 = flow.tensor.bitcast %58839 : tensor<4x?x1x128xf16>{%dim_56224} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56224}
    %58841 = torch_c.from_builtin_tensor %58840 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58842 = torch.aten.mul.Tensor %58841, %58838 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58842, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58843 = torch_c.to_builtin_tensor %58842 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56225 = arith.constant 1 : index
    %dim_56226 = tensor.dim %58843, %c1_56225 : tensor<4x?x1x64xcomplex<f32>>
    %58844 = flow.tensor.bitcast %58843 : tensor<4x?x1x64xcomplex<f32>>{%dim_56226} -> tensor<4x?x1x128xf32>{%dim_56226}
    %58845 = torch_c.from_builtin_tensor %58844 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58845, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56227 = torch.constant.int 5
    %58846 = torch.prims.convert_element_type %58845, %int5_56227 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58846, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_56228 = torch.constant.int 1
    %58847 = torch.aten.size.int %58433, %int1_56228 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_56229 = torch.constant.int 0
    %58848 = torch.aten.add.int %int0_56229, %58847 : !torch.int, !torch.int -> !torch.int
    %int0_56230 = torch.constant.int 0
    %int0_56231 = torch.constant.int 0
    %int1_56232 = torch.constant.int 1
    %58849 = torch.aten.slice.Tensor %58741, %int0_56230, %int0_56231, %58848, %int1_56232 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58849, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_56233 = torch.constant.int 1
    %int0_56234 = torch.constant.int 0
    %int9223372036854775807_56235 = torch.constant.int 9223372036854775807
    %int1_56236 = torch.constant.int 1
    %58850 = torch.aten.slice.Tensor %58849, %int1_56233, %int0_56234, %int9223372036854775807_56235, %int1_56236 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %58850, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_56237 = torch.constant.int 0
    %58851 = torch.aten.unsqueeze %58850, %int0_56237 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %58851, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_56238 = torch.constant.int 2
    %58852 = torch.aten.unsqueeze %58851, %int2_56238 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58852, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_56239 = torch.constant.int 3
    %int0_56240 = torch.constant.int 0
    %int9223372036854775807_56241 = torch.constant.int 9223372036854775807
    %int1_56242 = torch.constant.int 1
    %58853 = torch.aten.slice.Tensor %58852, %int3_56239, %int0_56240, %int9223372036854775807_56241, %int1_56242 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58853, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %58854 = torch_c.to_builtin_tensor %58529 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_56243 = arith.constant 1 : index
    %dim_56244 = tensor.dim %58854, %c1_56243 : tensor<4x?x1x128xf16>
    %58855 = flow.tensor.bitcast %58854 : tensor<4x?x1x128xf16>{%dim_56244} -> tensor<4x?x1x64xcomplex<f16>>{%dim_56244}
    %58856 = torch_c.from_builtin_tensor %58855 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %58856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %58857 = torch.aten.mul.Tensor %58856, %58853 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %58857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %58858 = torch_c.to_builtin_tensor %58857 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_56245 = arith.constant 1 : index
    %dim_56246 = tensor.dim %58858, %c1_56245 : tensor<4x?x1x64xcomplex<f32>>
    %58859 = flow.tensor.bitcast %58858 : tensor<4x?x1x64xcomplex<f32>>{%dim_56246} -> tensor<4x?x1x128xf32>{%dim_56246}
    %58860 = torch_c.from_builtin_tensor %58859 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %58860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_56247 = torch.constant.int 5
    %58861 = torch.prims.convert_element_type %58860, %int5_56247 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %58861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_56248 = torch.constant.int 64
    %58862 = torch.aten.mul.Scalar %2364, %int64_56248 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58862, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56249 = torch.constant.int 64
    %58863 = torch.aten.mul.Scalar %2367, %int64_56249 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58863, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56250 = torch.constant.int 64
    %58864 = torch.aten.mul.Scalar %2370, %int64_56250 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58864, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56251 = torch.constant.int 64
    %58865 = torch.aten.mul.Scalar %2373, %int64_56251 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58865, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56252 = torch.constant.int 64
    %58866 = torch.aten.mul.Scalar %2376, %int64_56252 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58866, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56253 = torch.constant.int 64
    %58867 = torch.aten.mul.Scalar %2379, %int64_56253 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58867, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56254 = torch.constant.int 64
    %58868 = torch.aten.mul.Scalar %2382, %int64_56254 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58868, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_56255 = torch.constant.int 64
    %58869 = torch.aten.mul.Scalar %2385, %int64_56255 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58869, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60 = torch.constant.int 60
    %int1_56256 = torch.constant.int 1
    %58870 = torch.aten.add.Scalar %58862, %int60, %int1_56256 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58870, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56257 = torch.constant.int 60
    %int1_56258 = torch.constant.int 1
    %58871 = torch.aten.add.Scalar %58863, %int60_56257, %int1_56258 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58871, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56259 = torch.constant.int 60
    %int1_56260 = torch.constant.int 1
    %58872 = torch.aten.add.Scalar %58864, %int60_56259, %int1_56260 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58872, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56261 = torch.constant.int 60
    %int1_56262 = torch.constant.int 1
    %58873 = torch.aten.add.Scalar %58865, %int60_56261, %int1_56262 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58873, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56263 = torch.constant.int 60
    %int1_56264 = torch.constant.int 1
    %58874 = torch.aten.add.Scalar %58866, %int60_56263, %int1_56264 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58874, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56265 = torch.constant.int 60
    %int1_56266 = torch.constant.int 1
    %58875 = torch.aten.add.Scalar %58867, %int60_56265, %int1_56266 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58875, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56267 = torch.constant.int 60
    %int1_56268 = torch.constant.int 1
    %58876 = torch.aten.add.Scalar %58868, %int60_56267, %int1_56268 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58876, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int60_56269 = torch.constant.int 60
    %int1_56270 = torch.constant.int 1
    %58877 = torch.aten.add.Scalar %58869, %int60_56269, %int1_56270 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58877, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_56271 = torch.constant.int 4
    %int16_56272 = torch.constant.int 16
    %int1_56273 = torch.constant.int 1
    %int128_56274 = torch.constant.int 128
    %58878 = torch.prim.ListConstruct %int4_56271, %3095, %int16_56272, %int1_56273, %int128_56274 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58879 = torch.aten.view %58756, %58878 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58879, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56275 = torch.constant.int 4
    %int16_56276 = torch.constant.int 16
    %int1_56277 = torch.constant.int 1
    %int128_56278 = torch.constant.int 128
    %58880 = torch.prim.ListConstruct %int4_56275, %3095, %int16_56276, %int1_56277, %int128_56278 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58881 = torch.aten.view %58771, %58880 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58881, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56279 = torch.constant.int 4
    %int16_56280 = torch.constant.int 16
    %int1_56281 = torch.constant.int 1
    %int128_56282 = torch.constant.int 128
    %58882 = torch.prim.ListConstruct %int4_56279, %3095, %int16_56280, %int1_56281, %int128_56282 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58883 = torch.aten.view %58786, %58882 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58883, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56283 = torch.constant.int 4
    %int16_56284 = torch.constant.int 16
    %int1_56285 = torch.constant.int 1
    %int128_56286 = torch.constant.int 128
    %58884 = torch.prim.ListConstruct %int4_56283, %3095, %int16_56284, %int1_56285, %int128_56286 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58885 = torch.aten.view %58801, %58884 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58885, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56287 = torch.constant.int 4
    %int16_56288 = torch.constant.int 16
    %int1_56289 = torch.constant.int 1
    %int128_56290 = torch.constant.int 128
    %58886 = torch.prim.ListConstruct %int4_56287, %3095, %int16_56288, %int1_56289, %int128_56290 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58887 = torch.aten.view %58816, %58886 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58887, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56291 = torch.constant.int 4
    %int16_56292 = torch.constant.int 16
    %int1_56293 = torch.constant.int 1
    %int128_56294 = torch.constant.int 128
    %58888 = torch.prim.ListConstruct %int4_56291, %3095, %int16_56292, %int1_56293, %int128_56294 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58889 = torch.aten.view %58831, %58888 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58889, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56295 = torch.constant.int 4
    %int16_56296 = torch.constant.int 16
    %int1_56297 = torch.constant.int 1
    %int128_56298 = torch.constant.int 128
    %58890 = torch.prim.ListConstruct %int4_56295, %3095, %int16_56296, %int1_56297, %int128_56298 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58891 = torch.aten.view %58846, %58890 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58891, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56299 = torch.constant.int 4
    %int16_56300 = torch.constant.int 16
    %int1_56301 = torch.constant.int 1
    %int128_56302 = torch.constant.int 128
    %58892 = torch.prim.ListConstruct %int4_56299, %3095, %int16_56300, %int1_56301, %int128_56302 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58893 = torch.aten.view %58861, %58892 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58893, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56303 = torch.constant.int 4
    %58894 = torch.aten.mul.int %int4_56303, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56304 = torch.constant.int 16
    %int1_56305 = torch.constant.int 1
    %int128_56306 = torch.constant.int 128
    %58895 = torch.prim.ListConstruct %58894, %int16_56304, %int1_56305, %int128_56306 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58896 = torch.aten.view %58879, %58895 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58896, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56307 = torch.constant.int 4
    %58897 = torch.aten.mul.int %int4_56307, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56308 = torch.constant.int 16
    %int1_56309 = torch.constant.int 1
    %int128_56310 = torch.constant.int 128
    %58898 = torch.prim.ListConstruct %58897, %int16_56308, %int1_56309, %int128_56310 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58899 = torch.aten.view %58881, %58898 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58899, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56311 = torch.constant.int 4
    %58900 = torch.aten.mul.int %int4_56311, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56312 = torch.constant.int 16
    %int1_56313 = torch.constant.int 1
    %int128_56314 = torch.constant.int 128
    %58901 = torch.prim.ListConstruct %58900, %int16_56312, %int1_56313, %int128_56314 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58902 = torch.aten.view %58883, %58901 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58902, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56315 = torch.constant.int 4
    %58903 = torch.aten.mul.int %int4_56315, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56316 = torch.constant.int 16
    %int1_56317 = torch.constant.int 1
    %int128_56318 = torch.constant.int 128
    %58904 = torch.prim.ListConstruct %58903, %int16_56316, %int1_56317, %int128_56318 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58905 = torch.aten.view %58885, %58904 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58905, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56319 = torch.constant.int 4
    %58906 = torch.aten.mul.int %int4_56319, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56320 = torch.constant.int 16
    %int1_56321 = torch.constant.int 1
    %int128_56322 = torch.constant.int 128
    %58907 = torch.prim.ListConstruct %58906, %int16_56320, %int1_56321, %int128_56322 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58908 = torch.aten.view %58887, %58907 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58908, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56323 = torch.constant.int 4
    %58909 = torch.aten.mul.int %int4_56323, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56324 = torch.constant.int 16
    %int1_56325 = torch.constant.int 1
    %int128_56326 = torch.constant.int 128
    %58910 = torch.prim.ListConstruct %58909, %int16_56324, %int1_56325, %int128_56326 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58911 = torch.aten.view %58889, %58910 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58911, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56327 = torch.constant.int 4
    %58912 = torch.aten.mul.int %int4_56327, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56328 = torch.constant.int 16
    %int1_56329 = torch.constant.int 1
    %int128_56330 = torch.constant.int 128
    %58913 = torch.prim.ListConstruct %58912, %int16_56328, %int1_56329, %int128_56330 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58914 = torch.aten.view %58891, %58913 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58914, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56331 = torch.constant.int 4
    %58915 = torch.aten.mul.int %int4_56331, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56332 = torch.constant.int 16
    %int1_56333 = torch.constant.int 1
    %int128_56334 = torch.constant.int 128
    %58916 = torch.prim.ListConstruct %58915, %int16_56332, %int1_56333, %int128_56334 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58917 = torch.aten.view %58893, %58916 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58917, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56335 = torch.constant.int 4
    %58918 = torch.aten.mul.int %int4_56335, %3095 : !torch.int, !torch.int -> !torch.int
    %58919 = torch.prim.ListConstruct %58918 : (!torch.int) -> !torch.list<int>
    %58920 = torch.aten.view %58870, %58919 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58920, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56336 = torch.constant.int 4
    %58921 = torch.aten.mul.int %int4_56336, %3095 : !torch.int, !torch.int -> !torch.int
    %58922 = torch.prim.ListConstruct %58921 : (!torch.int) -> !torch.list<int>
    %58923 = torch.aten.view %58871, %58922 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58923, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56337 = torch.constant.int 4
    %58924 = torch.aten.mul.int %int4_56337, %3095 : !torch.int, !torch.int -> !torch.int
    %58925 = torch.prim.ListConstruct %58924 : (!torch.int) -> !torch.list<int>
    %58926 = torch.aten.view %58872, %58925 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58926, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56338 = torch.constant.int 4
    %58927 = torch.aten.mul.int %int4_56338, %3095 : !torch.int, !torch.int -> !torch.int
    %58928 = torch.prim.ListConstruct %58927 : (!torch.int) -> !torch.list<int>
    %58929 = torch.aten.view %58873, %58928 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58929, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56339 = torch.constant.int 4
    %58930 = torch.aten.mul.int %int4_56339, %3095 : !torch.int, !torch.int -> !torch.int
    %58931 = torch.prim.ListConstruct %58930 : (!torch.int) -> !torch.list<int>
    %58932 = torch.aten.view %58874, %58931 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58932, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56340 = torch.constant.int 4
    %58933 = torch.aten.mul.int %int4_56340, %3095 : !torch.int, !torch.int -> !torch.int
    %58934 = torch.prim.ListConstruct %58933 : (!torch.int) -> !torch.list<int>
    %58935 = torch.aten.view %58875, %58934 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58935, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56341 = torch.constant.int 4
    %58936 = torch.aten.mul.int %int4_56341, %3095 : !torch.int, !torch.int -> !torch.int
    %58937 = torch.prim.ListConstruct %58936 : (!torch.int) -> !torch.list<int>
    %58938 = torch.aten.view %58876, %58937 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58938, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56342 = torch.constant.int 4
    %58939 = torch.aten.mul.int %int4_56342, %3095 : !torch.int, !torch.int -> !torch.int
    %58940 = torch.prim.ListConstruct %58939 : (!torch.int) -> !torch.list<int>
    %58941 = torch.aten.view %58877, %58940 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58941, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56343 = torch.constant.int 4
    %int16_56344 = torch.constant.int 16
    %int1_56345 = torch.constant.int 1
    %int128_56346 = torch.constant.int 128
    %58942 = torch.prim.ListConstruct %int4_56343, %3095, %int16_56344, %int1_56345, %int128_56346 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58943 = torch.aten.view %58531, %58942 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58943, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56347 = torch.constant.int 4
    %int16_56348 = torch.constant.int 16
    %int1_56349 = torch.constant.int 1
    %int128_56350 = torch.constant.int 128
    %58944 = torch.prim.ListConstruct %int4_56347, %3095, %int16_56348, %int1_56349, %int128_56350 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58945 = torch.aten.view %58533, %58944 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58945, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56351 = torch.constant.int 4
    %int16_56352 = torch.constant.int 16
    %int1_56353 = torch.constant.int 1
    %int128_56354 = torch.constant.int 128
    %58946 = torch.prim.ListConstruct %int4_56351, %3095, %int16_56352, %int1_56353, %int128_56354 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58947 = torch.aten.view %58535, %58946 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58947, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56355 = torch.constant.int 4
    %int16_56356 = torch.constant.int 16
    %int1_56357 = torch.constant.int 1
    %int128_56358 = torch.constant.int 128
    %58948 = torch.prim.ListConstruct %int4_56355, %3095, %int16_56356, %int1_56357, %int128_56358 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58949 = torch.aten.view %58537, %58948 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58949, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56359 = torch.constant.int 4
    %int16_56360 = torch.constant.int 16
    %int1_56361 = torch.constant.int 1
    %int128_56362 = torch.constant.int 128
    %58950 = torch.prim.ListConstruct %int4_56359, %3095, %int16_56360, %int1_56361, %int128_56362 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58951 = torch.aten.view %58539, %58950 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58951, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56363 = torch.constant.int 4
    %int16_56364 = torch.constant.int 16
    %int1_56365 = torch.constant.int 1
    %int128_56366 = torch.constant.int 128
    %58952 = torch.prim.ListConstruct %int4_56363, %3095, %int16_56364, %int1_56365, %int128_56366 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58953 = torch.aten.view %58541, %58952 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58953, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56367 = torch.constant.int 4
    %int16_56368 = torch.constant.int 16
    %int1_56369 = torch.constant.int 1
    %int128_56370 = torch.constant.int 128
    %58954 = torch.prim.ListConstruct %int4_56367, %3095, %int16_56368, %int1_56369, %int128_56370 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58955 = torch.aten.view %58543, %58954 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58955, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56371 = torch.constant.int 4
    %int16_56372 = torch.constant.int 16
    %int1_56373 = torch.constant.int 1
    %int128_56374 = torch.constant.int 128
    %58956 = torch.prim.ListConstruct %int4_56371, %3095, %int16_56372, %int1_56373, %int128_56374 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58957 = torch.aten.view %58545, %58956 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %58957, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_56375 = torch.constant.int 4
    %58958 = torch.aten.mul.int %int4_56375, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56376 = torch.constant.int 16
    %int1_56377 = torch.constant.int 1
    %int128_56378 = torch.constant.int 128
    %58959 = torch.prim.ListConstruct %58958, %int16_56376, %int1_56377, %int128_56378 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58960 = torch.aten.view %58943, %58959 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58960, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56379 = torch.constant.int 4
    %58961 = torch.aten.mul.int %int4_56379, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56380 = torch.constant.int 16
    %int1_56381 = torch.constant.int 1
    %int128_56382 = torch.constant.int 128
    %58962 = torch.prim.ListConstruct %58961, %int16_56380, %int1_56381, %int128_56382 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58963 = torch.aten.view %58945, %58962 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58963, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56383 = torch.constant.int 4
    %58964 = torch.aten.mul.int %int4_56383, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56384 = torch.constant.int 16
    %int1_56385 = torch.constant.int 1
    %int128_56386 = torch.constant.int 128
    %58965 = torch.prim.ListConstruct %58964, %int16_56384, %int1_56385, %int128_56386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58966 = torch.aten.view %58947, %58965 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58966, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56387 = torch.constant.int 4
    %58967 = torch.aten.mul.int %int4_56387, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56388 = torch.constant.int 16
    %int1_56389 = torch.constant.int 1
    %int128_56390 = torch.constant.int 128
    %58968 = torch.prim.ListConstruct %58967, %int16_56388, %int1_56389, %int128_56390 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58969 = torch.aten.view %58949, %58968 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58969, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56391 = torch.constant.int 4
    %58970 = torch.aten.mul.int %int4_56391, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56392 = torch.constant.int 16
    %int1_56393 = torch.constant.int 1
    %int128_56394 = torch.constant.int 128
    %58971 = torch.prim.ListConstruct %58970, %int16_56392, %int1_56393, %int128_56394 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58972 = torch.aten.view %58951, %58971 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58972, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56395 = torch.constant.int 4
    %58973 = torch.aten.mul.int %int4_56395, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56396 = torch.constant.int 16
    %int1_56397 = torch.constant.int 1
    %int128_56398 = torch.constant.int 128
    %58974 = torch.prim.ListConstruct %58973, %int16_56396, %int1_56397, %int128_56398 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58975 = torch.aten.view %58953, %58974 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58975, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56399 = torch.constant.int 4
    %58976 = torch.aten.mul.int %int4_56399, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56400 = torch.constant.int 16
    %int1_56401 = torch.constant.int 1
    %int128_56402 = torch.constant.int 128
    %58977 = torch.prim.ListConstruct %58976, %int16_56400, %int1_56401, %int128_56402 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58978 = torch.aten.view %58955, %58977 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58978, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_56403 = torch.constant.int 4
    %58979 = torch.aten.mul.int %int4_56403, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_56404 = torch.constant.int 16
    %int1_56405 = torch.constant.int 1
    %int128_56406 = torch.constant.int 128
    %58980 = torch.prim.ListConstruct %58979, %int16_56404, %int1_56405, %int128_56406 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %58981 = torch.aten.view %58957, %58980 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %58981, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_56407 = torch.constant.int 1
    %int1_56408 = torch.constant.int 1
    %58982 = torch.aten.add.Scalar %58870, %int1_56407, %int1_56408 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58982, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56409 = torch.constant.int 1
    %int1_56410 = torch.constant.int 1
    %58983 = torch.aten.add.Scalar %58871, %int1_56409, %int1_56410 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58983, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56411 = torch.constant.int 1
    %int1_56412 = torch.constant.int 1
    %58984 = torch.aten.add.Scalar %58872, %int1_56411, %int1_56412 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58984, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56413 = torch.constant.int 1
    %int1_56414 = torch.constant.int 1
    %58985 = torch.aten.add.Scalar %58873, %int1_56413, %int1_56414 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58985, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56415 = torch.constant.int 1
    %int1_56416 = torch.constant.int 1
    %58986 = torch.aten.add.Scalar %58874, %int1_56415, %int1_56416 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58986, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56417 = torch.constant.int 1
    %int1_56418 = torch.constant.int 1
    %58987 = torch.aten.add.Scalar %58875, %int1_56417, %int1_56418 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58987, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56419 = torch.constant.int 1
    %int1_56420 = torch.constant.int 1
    %58988 = torch.aten.add.Scalar %58876, %int1_56419, %int1_56420 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58988, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_56421 = torch.constant.int 1
    %int1_56422 = torch.constant.int 1
    %58989 = torch.aten.add.Scalar %58877, %int1_56421, %int1_56422 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %58989, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_56423 = torch.constant.int 4
    %58990 = torch.aten.mul.int %int4_56423, %3095 : !torch.int, !torch.int -> !torch.int
    %58991 = torch.prim.ListConstruct %58990 : (!torch.int) -> !torch.list<int>
    %58992 = torch.aten.view %58982, %58991 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58992, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56424 = torch.constant.int 4
    %58993 = torch.aten.mul.int %int4_56424, %3095 : !torch.int, !torch.int -> !torch.int
    %58994 = torch.prim.ListConstruct %58993 : (!torch.int) -> !torch.list<int>
    %58995 = torch.aten.view %58983, %58994 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58995, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56425 = torch.constant.int 4
    %58996 = torch.aten.mul.int %int4_56425, %3095 : !torch.int, !torch.int -> !torch.int
    %58997 = torch.prim.ListConstruct %58996 : (!torch.int) -> !torch.list<int>
    %58998 = torch.aten.view %58984, %58997 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %58998, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56426 = torch.constant.int 4
    %58999 = torch.aten.mul.int %int4_56426, %3095 : !torch.int, !torch.int -> !torch.int
    %59000 = torch.prim.ListConstruct %58999 : (!torch.int) -> !torch.list<int>
    %59001 = torch.aten.view %58985, %59000 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59001, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56427 = torch.constant.int 4
    %59002 = torch.aten.mul.int %int4_56427, %3095 : !torch.int, !torch.int -> !torch.int
    %59003 = torch.prim.ListConstruct %59002 : (!torch.int) -> !torch.list<int>
    %59004 = torch.aten.view %58986, %59003 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59004, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56428 = torch.constant.int 4
    %59005 = torch.aten.mul.int %int4_56428, %3095 : !torch.int, !torch.int -> !torch.int
    %59006 = torch.prim.ListConstruct %59005 : (!torch.int) -> !torch.list<int>
    %59007 = torch.aten.view %58987, %59006 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59007, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56429 = torch.constant.int 4
    %59008 = torch.aten.mul.int %int4_56429, %3095 : !torch.int, !torch.int -> !torch.int
    %59009 = torch.prim.ListConstruct %59008 : (!torch.int) -> !torch.list<int>
    %59010 = torch.aten.view %58988, %59009 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59010, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_56430 = torch.constant.int 4
    %59011 = torch.aten.mul.int %int4_56430, %3095 : !torch.int, !torch.int -> !torch.int
    %59012 = torch.prim.ListConstruct %59011 : (!torch.int) -> !torch.list<int>
    %59013 = torch.aten.view %58989, %59012 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59013, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %59014 = torch.prim.ListConstruct %58920, %58992 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56431 = torch.constant.int 0
    %59015 = torch.aten.cat %59014, %int0_56431 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59015, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59016 = torch.prim.ListConstruct %58923, %58995 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56432 = torch.constant.int 0
    %59017 = torch.aten.cat %59016, %int0_56432 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59017, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59018 = torch.prim.ListConstruct %58926, %58998 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56433 = torch.constant.int 0
    %59019 = torch.aten.cat %59018, %int0_56433 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59019, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59020 = torch.prim.ListConstruct %58929, %59001 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56434 = torch.constant.int 0
    %59021 = torch.aten.cat %59020, %int0_56434 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59021, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59022 = torch.prim.ListConstruct %58932, %59004 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56435 = torch.constant.int 0
    %59023 = torch.aten.cat %59022, %int0_56435 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59023, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59024 = torch.prim.ListConstruct %58935, %59007 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56436 = torch.constant.int 0
    %59025 = torch.aten.cat %59024, %int0_56436 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59025, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59026 = torch.prim.ListConstruct %58938, %59010 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56437 = torch.constant.int 0
    %59027 = torch.aten.cat %59026, %int0_56437 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59027, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59028 = torch.prim.ListConstruct %58941, %59013 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_56438 = torch.constant.int 0
    %59029 = torch.aten.cat %59028, %int0_56438 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %59029, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %59030 = torch.prim.ListConstruct %58896, %58960 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56439 = torch.constant.int 0
    %59031 = torch.aten.cat %59030, %int0_56439 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59031, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59032 = torch.prim.ListConstruct %58899, %58963 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56440 = torch.constant.int 0
    %59033 = torch.aten.cat %59032, %int0_56440 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59033, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59034 = torch.prim.ListConstruct %58902, %58966 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56441 = torch.constant.int 0
    %59035 = torch.aten.cat %59034, %int0_56441 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59035, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59036 = torch.prim.ListConstruct %58905, %58969 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56442 = torch.constant.int 0
    %59037 = torch.aten.cat %59036, %int0_56442 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59037, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59038 = torch.prim.ListConstruct %58908, %58972 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56443 = torch.constant.int 0
    %59039 = torch.aten.cat %59038, %int0_56443 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59039, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59040 = torch.prim.ListConstruct %58911, %58975 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56444 = torch.constant.int 0
    %59041 = torch.aten.cat %59040, %int0_56444 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59041, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59042 = torch.prim.ListConstruct %58914, %58978 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56445 = torch.constant.int 0
    %59043 = torch.aten.cat %59042, %int0_56445 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59043, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59044 = torch.prim.ListConstruct %58917, %58981 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_56446 = torch.constant.int 0
    %59045 = torch.aten.cat %59044, %int0_56446 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59045, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56447 = torch.constant.int 32
    %int2_56448 = torch.constant.int 2
    %int16_56449 = torch.constant.int 16
    %int1_56450 = torch.constant.int 1
    %int128_56451 = torch.constant.int 128
    %59046 = torch.prim.ListConstruct %3023, %int32_56447, %int2_56448, %int16_56449, %int1_56450, %int128_56451 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59047 = torch.aten.view %57196, %59046 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59047, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56452 = torch.constant.int 32
    %59048 = torch.aten.mul.int %3023, %int32_56452 : !torch.int, !torch.int -> !torch.int
    %int2_56453 = torch.constant.int 2
    %59049 = torch.aten.mul.int %59048, %int2_56453 : !torch.int, !torch.int -> !torch.int
    %int16_56454 = torch.constant.int 16
    %int1_56455 = torch.constant.int 1
    %int128_56456 = torch.constant.int 128
    %59050 = torch.prim.ListConstruct %59049, %int16_56454, %int1_56455, %int128_56456 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59051 = torch.aten.view %59047, %59050 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59051, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59052 = torch.prim.ListConstruct %59015 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56457 = torch.constant.bool false
    %59053 = torch.aten.index_put %59051, %59052, %59031, %false_56457 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59053, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56458 = torch.constant.int 32
    %int2_56459 = torch.constant.int 2
    %int16_56460 = torch.constant.int 16
    %int1_56461 = torch.constant.int 1
    %int128_56462 = torch.constant.int 128
    %59054 = torch.prim.ListConstruct %3023, %int32_56458, %int2_56459, %int16_56460, %int1_56461, %int128_56462 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59055 = torch.aten.view %59053, %59054 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59055, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56463 = torch.constant.int 131072
    %59056 = torch.prim.ListConstruct %3023, %int131072_56463 : (!torch.int, !torch.int) -> !torch.list<int>
    %59057 = torch.aten.view %59055, %59056 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59057, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56464 = torch.constant.int 32
    %int2_56465 = torch.constant.int 2
    %int16_56466 = torch.constant.int 16
    %int1_56467 = torch.constant.int 1
    %int128_56468 = torch.constant.int 128
    %59058 = torch.prim.ListConstruct %3026, %int32_56464, %int2_56465, %int16_56466, %int1_56467, %int128_56468 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59059 = torch.aten.view %57208, %59058 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59059, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56469 = torch.constant.int 32
    %59060 = torch.aten.mul.int %3026, %int32_56469 : !torch.int, !torch.int -> !torch.int
    %int2_56470 = torch.constant.int 2
    %59061 = torch.aten.mul.int %59060, %int2_56470 : !torch.int, !torch.int -> !torch.int
    %int16_56471 = torch.constant.int 16
    %int1_56472 = torch.constant.int 1
    %int128_56473 = torch.constant.int 128
    %59062 = torch.prim.ListConstruct %59061, %int16_56471, %int1_56472, %int128_56473 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59063 = torch.aten.view %59059, %59062 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59063, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59064 = torch.prim.ListConstruct %59017 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56474 = torch.constant.bool false
    %59065 = torch.aten.index_put %59063, %59064, %59033, %false_56474 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59065, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56475 = torch.constant.int 32
    %int2_56476 = torch.constant.int 2
    %int16_56477 = torch.constant.int 16
    %int1_56478 = torch.constant.int 1
    %int128_56479 = torch.constant.int 128
    %59066 = torch.prim.ListConstruct %3026, %int32_56475, %int2_56476, %int16_56477, %int1_56478, %int128_56479 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59067 = torch.aten.view %59065, %59066 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59067, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56480 = torch.constant.int 131072
    %59068 = torch.prim.ListConstruct %3026, %int131072_56480 : (!torch.int, !torch.int) -> !torch.list<int>
    %59069 = torch.aten.view %59067, %59068 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59069, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56481 = torch.constant.int 32
    %int2_56482 = torch.constant.int 2
    %int16_56483 = torch.constant.int 16
    %int1_56484 = torch.constant.int 1
    %int128_56485 = torch.constant.int 128
    %59070 = torch.prim.ListConstruct %3029, %int32_56481, %int2_56482, %int16_56483, %int1_56484, %int128_56485 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59071 = torch.aten.view %57220, %59070 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59071, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56486 = torch.constant.int 32
    %59072 = torch.aten.mul.int %3029, %int32_56486 : !torch.int, !torch.int -> !torch.int
    %int2_56487 = torch.constant.int 2
    %59073 = torch.aten.mul.int %59072, %int2_56487 : !torch.int, !torch.int -> !torch.int
    %int16_56488 = torch.constant.int 16
    %int1_56489 = torch.constant.int 1
    %int128_56490 = torch.constant.int 128
    %59074 = torch.prim.ListConstruct %59073, %int16_56488, %int1_56489, %int128_56490 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59075 = torch.aten.view %59071, %59074 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59075, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59076 = torch.prim.ListConstruct %59019 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56491 = torch.constant.bool false
    %59077 = torch.aten.index_put %59075, %59076, %59035, %false_56491 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59077, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56492 = torch.constant.int 32
    %int2_56493 = torch.constant.int 2
    %int16_56494 = torch.constant.int 16
    %int1_56495 = torch.constant.int 1
    %int128_56496 = torch.constant.int 128
    %59078 = torch.prim.ListConstruct %3029, %int32_56492, %int2_56493, %int16_56494, %int1_56495, %int128_56496 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59079 = torch.aten.view %59077, %59078 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59079, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56497 = torch.constant.int 131072
    %59080 = torch.prim.ListConstruct %3029, %int131072_56497 : (!torch.int, !torch.int) -> !torch.list<int>
    %59081 = torch.aten.view %59079, %59080 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59081, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56498 = torch.constant.int 32
    %int2_56499 = torch.constant.int 2
    %int16_56500 = torch.constant.int 16
    %int1_56501 = torch.constant.int 1
    %int128_56502 = torch.constant.int 128
    %59082 = torch.prim.ListConstruct %3032, %int32_56498, %int2_56499, %int16_56500, %int1_56501, %int128_56502 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59083 = torch.aten.view %57232, %59082 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59083, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56503 = torch.constant.int 32
    %59084 = torch.aten.mul.int %3032, %int32_56503 : !torch.int, !torch.int -> !torch.int
    %int2_56504 = torch.constant.int 2
    %59085 = torch.aten.mul.int %59084, %int2_56504 : !torch.int, !torch.int -> !torch.int
    %int16_56505 = torch.constant.int 16
    %int1_56506 = torch.constant.int 1
    %int128_56507 = torch.constant.int 128
    %59086 = torch.prim.ListConstruct %59085, %int16_56505, %int1_56506, %int128_56507 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59087 = torch.aten.view %59083, %59086 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59087, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59088 = torch.prim.ListConstruct %59021 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56508 = torch.constant.bool false
    %59089 = torch.aten.index_put %59087, %59088, %59037, %false_56508 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59089, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56509 = torch.constant.int 32
    %int2_56510 = torch.constant.int 2
    %int16_56511 = torch.constant.int 16
    %int1_56512 = torch.constant.int 1
    %int128_56513 = torch.constant.int 128
    %59090 = torch.prim.ListConstruct %3032, %int32_56509, %int2_56510, %int16_56511, %int1_56512, %int128_56513 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59091 = torch.aten.view %59089, %59090 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59091, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56514 = torch.constant.int 131072
    %59092 = torch.prim.ListConstruct %3032, %int131072_56514 : (!torch.int, !torch.int) -> !torch.list<int>
    %59093 = torch.aten.view %59091, %59092 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59093, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56515 = torch.constant.int 32
    %int2_56516 = torch.constant.int 2
    %int16_56517 = torch.constant.int 16
    %int1_56518 = torch.constant.int 1
    %int128_56519 = torch.constant.int 128
    %59094 = torch.prim.ListConstruct %3035, %int32_56515, %int2_56516, %int16_56517, %int1_56518, %int128_56519 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59095 = torch.aten.view %57244, %59094 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59095, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56520 = torch.constant.int 32
    %59096 = torch.aten.mul.int %3035, %int32_56520 : !torch.int, !torch.int -> !torch.int
    %int2_56521 = torch.constant.int 2
    %59097 = torch.aten.mul.int %59096, %int2_56521 : !torch.int, !torch.int -> !torch.int
    %int16_56522 = torch.constant.int 16
    %int1_56523 = torch.constant.int 1
    %int128_56524 = torch.constant.int 128
    %59098 = torch.prim.ListConstruct %59097, %int16_56522, %int1_56523, %int128_56524 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59099 = torch.aten.view %59095, %59098 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59099, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59100 = torch.prim.ListConstruct %59023 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56525 = torch.constant.bool false
    %59101 = torch.aten.index_put %59099, %59100, %59039, %false_56525 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59101, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56526 = torch.constant.int 32
    %int2_56527 = torch.constant.int 2
    %int16_56528 = torch.constant.int 16
    %int1_56529 = torch.constant.int 1
    %int128_56530 = torch.constant.int 128
    %59102 = torch.prim.ListConstruct %3035, %int32_56526, %int2_56527, %int16_56528, %int1_56529, %int128_56530 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59103 = torch.aten.view %59101, %59102 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59103, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56531 = torch.constant.int 131072
    %59104 = torch.prim.ListConstruct %3035, %int131072_56531 : (!torch.int, !torch.int) -> !torch.list<int>
    %59105 = torch.aten.view %59103, %59104 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59105, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56532 = torch.constant.int 32
    %int2_56533 = torch.constant.int 2
    %int16_56534 = torch.constant.int 16
    %int1_56535 = torch.constant.int 1
    %int128_56536 = torch.constant.int 128
    %59106 = torch.prim.ListConstruct %3038, %int32_56532, %int2_56533, %int16_56534, %int1_56535, %int128_56536 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59107 = torch.aten.view %57256, %59106 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59107, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56537 = torch.constant.int 32
    %59108 = torch.aten.mul.int %3038, %int32_56537 : !torch.int, !torch.int -> !torch.int
    %int2_56538 = torch.constant.int 2
    %59109 = torch.aten.mul.int %59108, %int2_56538 : !torch.int, !torch.int -> !torch.int
    %int16_56539 = torch.constant.int 16
    %int1_56540 = torch.constant.int 1
    %int128_56541 = torch.constant.int 128
    %59110 = torch.prim.ListConstruct %59109, %int16_56539, %int1_56540, %int128_56541 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59111 = torch.aten.view %59107, %59110 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59111, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59112 = torch.prim.ListConstruct %59025 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56542 = torch.constant.bool false
    %59113 = torch.aten.index_put %59111, %59112, %59041, %false_56542 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59113, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56543 = torch.constant.int 32
    %int2_56544 = torch.constant.int 2
    %int16_56545 = torch.constant.int 16
    %int1_56546 = torch.constant.int 1
    %int128_56547 = torch.constant.int 128
    %59114 = torch.prim.ListConstruct %3038, %int32_56543, %int2_56544, %int16_56545, %int1_56546, %int128_56547 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59115 = torch.aten.view %59113, %59114 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59115, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56548 = torch.constant.int 131072
    %59116 = torch.prim.ListConstruct %3038, %int131072_56548 : (!torch.int, !torch.int) -> !torch.list<int>
    %59117 = torch.aten.view %59115, %59116 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59117, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56549 = torch.constant.int 32
    %int2_56550 = torch.constant.int 2
    %int16_56551 = torch.constant.int 16
    %int1_56552 = torch.constant.int 1
    %int128_56553 = torch.constant.int 128
    %59118 = torch.prim.ListConstruct %3041, %int32_56549, %int2_56550, %int16_56551, %int1_56552, %int128_56553 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59119 = torch.aten.view %57268, %59118 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59119, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56554 = torch.constant.int 32
    %59120 = torch.aten.mul.int %3041, %int32_56554 : !torch.int, !torch.int -> !torch.int
    %int2_56555 = torch.constant.int 2
    %59121 = torch.aten.mul.int %59120, %int2_56555 : !torch.int, !torch.int -> !torch.int
    %int16_56556 = torch.constant.int 16
    %int1_56557 = torch.constant.int 1
    %int128_56558 = torch.constant.int 128
    %59122 = torch.prim.ListConstruct %59121, %int16_56556, %int1_56557, %int128_56558 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59123 = torch.aten.view %59119, %59122 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59123, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59124 = torch.prim.ListConstruct %59027 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56559 = torch.constant.bool false
    %59125 = torch.aten.index_put %59123, %59124, %59043, %false_56559 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59125, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56560 = torch.constant.int 32
    %int2_56561 = torch.constant.int 2
    %int16_56562 = torch.constant.int 16
    %int1_56563 = torch.constant.int 1
    %int128_56564 = torch.constant.int 128
    %59126 = torch.prim.ListConstruct %3041, %int32_56560, %int2_56561, %int16_56562, %int1_56563, %int128_56564 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59127 = torch.aten.view %59125, %59126 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59127, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56565 = torch.constant.int 131072
    %59128 = torch.prim.ListConstruct %3041, %int131072_56565 : (!torch.int, !torch.int) -> !torch.list<int>
    %59129 = torch.aten.view %59127, %59128 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59129, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_56566 = torch.constant.int 32
    %int2_56567 = torch.constant.int 2
    %int16_56568 = torch.constant.int 16
    %int1_56569 = torch.constant.int 1
    %int128_56570 = torch.constant.int 128
    %59130 = torch.prim.ListConstruct %3044, %int32_56566, %int2_56567, %int16_56568, %int1_56569, %int128_56570 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59131 = torch.aten.view %57280, %59130 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59131, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_56571 = torch.constant.int 32
    %59132 = torch.aten.mul.int %3044, %int32_56571 : !torch.int, !torch.int -> !torch.int
    %int2_56572 = torch.constant.int 2
    %59133 = torch.aten.mul.int %59132, %int2_56572 : !torch.int, !torch.int -> !torch.int
    %int16_56573 = torch.constant.int 16
    %int1_56574 = torch.constant.int 1
    %int128_56575 = torch.constant.int 128
    %59134 = torch.prim.ListConstruct %59133, %int16_56573, %int1_56574, %int128_56575 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59135 = torch.aten.view %59131, %59134 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59135, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %59136 = torch.prim.ListConstruct %59029 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_56576 = torch.constant.bool false
    %59137 = torch.aten.index_put %59135, %59136, %59045, %false_56576 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %59137, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_56577 = torch.constant.int 32
    %int2_56578 = torch.constant.int 2
    %int16_56579 = torch.constant.int 16
    %int1_56580 = torch.constant.int 1
    %int128_56581 = torch.constant.int 128
    %59138 = torch.prim.ListConstruct %3044, %int32_56577, %int2_56578, %int16_56579, %int1_56580, %int128_56581 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59139 = torch.aten.view %59137, %59138 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %59139, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_56582 = torch.constant.int 131072
    %59140 = torch.prim.ListConstruct %3044, %int131072_56582 : (!torch.int, !torch.int) -> !torch.list<int>
    %59141 = torch.aten.view %59139, %59140 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.bind_symbolic_shape %59141, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_56583 = torch.constant.int -2
    %59142 = torch.aten.unsqueeze %58756, %int-2_56583 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56584 = torch.constant.int -2
    %59143 = torch.aten.unsqueeze %58771, %int-2_56584 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56585 = torch.constant.int -2
    %59144 = torch.aten.unsqueeze %58786, %int-2_56585 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56586 = torch.constant.int -2
    %59145 = torch.aten.unsqueeze %58801, %int-2_56586 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56587 = torch.constant.int -2
    %59146 = torch.aten.unsqueeze %58816, %int-2_56587 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56588 = torch.constant.int -2
    %59147 = torch.aten.unsqueeze %58831, %int-2_56588 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56589 = torch.constant.int -2
    %59148 = torch.aten.unsqueeze %58846, %int-2_56589 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56590 = torch.constant.int -2
    %59149 = torch.aten.unsqueeze %58861, %int-2_56590 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_56591 = torch.constant.int 4
    %int1_56592 = torch.constant.int 1
    %int4_56593 = torch.constant.int 4
    %int128_56594 = torch.constant.int 128
    %59150 = torch.prim.ListConstruct %int4_56591, %58742, %int1_56592, %int4_56593, %int128_56594 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56595 = torch.constant.bool false
    %59151 = torch.aten.expand %59142, %59150, %false_56595 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56596 = torch.constant.int 4
    %int1_56597 = torch.constant.int 1
    %int4_56598 = torch.constant.int 4
    %int128_56599 = torch.constant.int 128
    %59152 = torch.prim.ListConstruct %int4_56596, %58742, %int1_56597, %int4_56598, %int128_56599 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56600 = torch.constant.bool false
    %59153 = torch.aten.expand %59143, %59152, %false_56600 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56601 = torch.constant.int 4
    %int1_56602 = torch.constant.int 1
    %int4_56603 = torch.constant.int 4
    %int128_56604 = torch.constant.int 128
    %59154 = torch.prim.ListConstruct %int4_56601, %58742, %int1_56602, %int4_56603, %int128_56604 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56605 = torch.constant.bool false
    %59155 = torch.aten.expand %59144, %59154, %false_56605 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56606 = torch.constant.int 4
    %int1_56607 = torch.constant.int 1
    %int4_56608 = torch.constant.int 4
    %int128_56609 = torch.constant.int 128
    %59156 = torch.prim.ListConstruct %int4_56606, %58742, %int1_56607, %int4_56608, %int128_56609 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56610 = torch.constant.bool false
    %59157 = torch.aten.expand %59145, %59156, %false_56610 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56611 = torch.constant.int 4
    %int1_56612 = torch.constant.int 1
    %int4_56613 = torch.constant.int 4
    %int128_56614 = torch.constant.int 128
    %59158 = torch.prim.ListConstruct %int4_56611, %58742, %int1_56612, %int4_56613, %int128_56614 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56615 = torch.constant.bool false
    %59159 = torch.aten.expand %59146, %59158, %false_56615 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56616 = torch.constant.int 4
    %int1_56617 = torch.constant.int 1
    %int4_56618 = torch.constant.int 4
    %int128_56619 = torch.constant.int 128
    %59160 = torch.prim.ListConstruct %int4_56616, %58742, %int1_56617, %int4_56618, %int128_56619 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56620 = torch.constant.bool false
    %59161 = torch.aten.expand %59147, %59160, %false_56620 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56621 = torch.constant.int 4
    %int1_56622 = torch.constant.int 1
    %int4_56623 = torch.constant.int 4
    %int128_56624 = torch.constant.int 128
    %59162 = torch.prim.ListConstruct %int4_56621, %58742, %int1_56622, %int4_56623, %int128_56624 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56625 = torch.constant.bool false
    %59163 = torch.aten.expand %59148, %59162, %false_56625 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56626 = torch.constant.int 4
    %int1_56627 = torch.constant.int 1
    %int4_56628 = torch.constant.int 4
    %int128_56629 = torch.constant.int 128
    %59164 = torch.prim.ListConstruct %int4_56626, %58742, %int1_56627, %int4_56628, %int128_56629 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56630 = torch.constant.bool false
    %59165 = torch.aten.expand %59149, %59164, %false_56630 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56631 = torch.constant.int 4
    %int4_56632 = torch.constant.int 4
    %int128_56633 = torch.constant.int 128
    %59166 = torch.prim.ListConstruct %int4_56631, %58742, %int4_56632, %int128_56633 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59167 = torch.aten.view %59151, %59166 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56634 = torch.constant.int 4
    %int4_56635 = torch.constant.int 4
    %int128_56636 = torch.constant.int 128
    %59168 = torch.prim.ListConstruct %int4_56634, %58742, %int4_56635, %int128_56636 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59169 = torch.aten.view %59153, %59168 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59169, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56637 = torch.constant.int 4
    %int4_56638 = torch.constant.int 4
    %int128_56639 = torch.constant.int 128
    %59170 = torch.prim.ListConstruct %int4_56637, %58742, %int4_56638, %int128_56639 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59171 = torch.aten.view %59155, %59170 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59171, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56640 = torch.constant.int 4
    %int4_56641 = torch.constant.int 4
    %int128_56642 = torch.constant.int 128
    %59172 = torch.prim.ListConstruct %int4_56640, %58742, %int4_56641, %int128_56642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59173 = torch.aten.view %59157, %59172 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56643 = torch.constant.int 4
    %int4_56644 = torch.constant.int 4
    %int128_56645 = torch.constant.int 128
    %59174 = torch.prim.ListConstruct %int4_56643, %58742, %int4_56644, %int128_56645 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59175 = torch.aten.view %59159, %59174 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59175, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56646 = torch.constant.int 4
    %int4_56647 = torch.constant.int 4
    %int128_56648 = torch.constant.int 128
    %59176 = torch.prim.ListConstruct %int4_56646, %58742, %int4_56647, %int128_56648 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59177 = torch.aten.view %59161, %59176 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59177, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56649 = torch.constant.int 4
    %int4_56650 = torch.constant.int 4
    %int128_56651 = torch.constant.int 128
    %59178 = torch.prim.ListConstruct %int4_56649, %58742, %int4_56650, %int128_56651 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59179 = torch.aten.view %59163, %59178 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56652 = torch.constant.int 4
    %int4_56653 = torch.constant.int 4
    %int128_56654 = torch.constant.int 128
    %59180 = torch.prim.ListConstruct %int4_56652, %58742, %int4_56653, %int128_56654 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59181 = torch.aten.view %59165, %59180 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59181, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_56655 = torch.constant.int -2
    %59182 = torch.aten.unsqueeze %58531, %int-2_56655 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59182, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56656 = torch.constant.int -2
    %59183 = torch.aten.unsqueeze %58533, %int-2_56656 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59183, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56657 = torch.constant.int -2
    %59184 = torch.aten.unsqueeze %58535, %int-2_56657 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59184, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56658 = torch.constant.int -2
    %59185 = torch.aten.unsqueeze %58537, %int-2_56658 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56659 = torch.constant.int -2
    %59186 = torch.aten.unsqueeze %58539, %int-2_56659 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59186, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56660 = torch.constant.int -2
    %59187 = torch.aten.unsqueeze %58541, %int-2_56660 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59187, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56661 = torch.constant.int -2
    %59188 = torch.aten.unsqueeze %58543, %int-2_56661 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_56662 = torch.constant.int -2
    %59189 = torch.aten.unsqueeze %58545, %int-2_56662 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %59189, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_56663 = torch.constant.int 1
    %59190 = torch.aten.size.int %58455, %int1_56663 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_56664 = torch.constant.int 4
    %int1_56665 = torch.constant.int 1
    %int4_56666 = torch.constant.int 4
    %int128_56667 = torch.constant.int 128
    %59191 = torch.prim.ListConstruct %int4_56664, %59190, %int1_56665, %int4_56666, %int128_56667 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56668 = torch.constant.bool false
    %59192 = torch.aten.expand %59182, %59191, %false_56668 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59192, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56669 = torch.constant.int 4
    %int1_56670 = torch.constant.int 1
    %int4_56671 = torch.constant.int 4
    %int128_56672 = torch.constant.int 128
    %59193 = torch.prim.ListConstruct %int4_56669, %59190, %int1_56670, %int4_56671, %int128_56672 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56673 = torch.constant.bool false
    %59194 = torch.aten.expand %59183, %59193, %false_56673 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56674 = torch.constant.int 4
    %int1_56675 = torch.constant.int 1
    %int4_56676 = torch.constant.int 4
    %int128_56677 = torch.constant.int 128
    %59195 = torch.prim.ListConstruct %int4_56674, %59190, %int1_56675, %int4_56676, %int128_56677 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56678 = torch.constant.bool false
    %59196 = torch.aten.expand %59184, %59195, %false_56678 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59196, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56679 = torch.constant.int 4
    %int1_56680 = torch.constant.int 1
    %int4_56681 = torch.constant.int 4
    %int128_56682 = torch.constant.int 128
    %59197 = torch.prim.ListConstruct %int4_56679, %59190, %int1_56680, %int4_56681, %int128_56682 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56683 = torch.constant.bool false
    %59198 = torch.aten.expand %59185, %59197, %false_56683 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59198, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56684 = torch.constant.int 4
    %int1_56685 = torch.constant.int 1
    %int4_56686 = torch.constant.int 4
    %int128_56687 = torch.constant.int 128
    %59199 = torch.prim.ListConstruct %int4_56684, %59190, %int1_56685, %int4_56686, %int128_56687 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56688 = torch.constant.bool false
    %59200 = torch.aten.expand %59186, %59199, %false_56688 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56689 = torch.constant.int 4
    %int1_56690 = torch.constant.int 1
    %int4_56691 = torch.constant.int 4
    %int128_56692 = torch.constant.int 128
    %59201 = torch.prim.ListConstruct %int4_56689, %59190, %int1_56690, %int4_56691, %int128_56692 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56693 = torch.constant.bool false
    %59202 = torch.aten.expand %59187, %59201, %false_56693 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59202, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56694 = torch.constant.int 4
    %int1_56695 = torch.constant.int 1
    %int4_56696 = torch.constant.int 4
    %int128_56697 = torch.constant.int 128
    %59203 = torch.prim.ListConstruct %int4_56694, %59190, %int1_56695, %int4_56696, %int128_56697 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56698 = torch.constant.bool false
    %59204 = torch.aten.expand %59188, %59203, %false_56698 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59204, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56699 = torch.constant.int 4
    %int1_56700 = torch.constant.int 1
    %int4_56701 = torch.constant.int 4
    %int128_56702 = torch.constant.int 128
    %59205 = torch.prim.ListConstruct %int4_56699, %59190, %int1_56700, %int4_56701, %int128_56702 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_56703 = torch.constant.bool false
    %59206 = torch.aten.expand %59189, %59205, %false_56703 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %59206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_56704 = torch.constant.int 4
    %int4_56705 = torch.constant.int 4
    %int128_56706 = torch.constant.int 128
    %59207 = torch.prim.ListConstruct %int4_56704, %59190, %int4_56705, %int128_56706 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59208 = torch.aten.view %59192, %59207 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59208, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56707 = torch.constant.int 4
    %int4_56708 = torch.constant.int 4
    %int128_56709 = torch.constant.int 128
    %59209 = torch.prim.ListConstruct %int4_56707, %59190, %int4_56708, %int128_56709 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59210 = torch.aten.view %59194, %59209 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59210, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56710 = torch.constant.int 4
    %int4_56711 = torch.constant.int 4
    %int128_56712 = torch.constant.int 128
    %59211 = torch.prim.ListConstruct %int4_56710, %59190, %int4_56711, %int128_56712 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59212 = torch.aten.view %59196, %59211 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56713 = torch.constant.int 4
    %int4_56714 = torch.constant.int 4
    %int128_56715 = torch.constant.int 128
    %59213 = torch.prim.ListConstruct %int4_56713, %59190, %int4_56714, %int128_56715 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59214 = torch.aten.view %59198, %59213 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59214, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56716 = torch.constant.int 4
    %int4_56717 = torch.constant.int 4
    %int128_56718 = torch.constant.int 128
    %59215 = torch.prim.ListConstruct %int4_56716, %59190, %int4_56717, %int128_56718 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59216 = torch.aten.view %59200, %59215 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59216, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56719 = torch.constant.int 4
    %int4_56720 = torch.constant.int 4
    %int128_56721 = torch.constant.int 128
    %59217 = torch.prim.ListConstruct %int4_56719, %59190, %int4_56720, %int128_56721 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59218 = torch.aten.view %59202, %59217 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56722 = torch.constant.int 4
    %int4_56723 = torch.constant.int 4
    %int128_56724 = torch.constant.int 128
    %59219 = torch.prim.ListConstruct %int4_56722, %59190, %int4_56723, %int128_56724 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59220 = torch.aten.view %59204, %59219 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59220, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56725 = torch.constant.int 4
    %int4_56726 = torch.constant.int 4
    %int128_56727 = torch.constant.int 128
    %59221 = torch.prim.ListConstruct %int4_56725, %59190, %int4_56726, %int128_56727 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59222 = torch.aten.view %59206, %59221 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59222, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56728 = torch.constant.int 1
    %int2_56729 = torch.constant.int 2
    %59223 = torch.aten.transpose.int %58598, %int1_56728, %int2_56729 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59223, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56730 = torch.constant.int 1
    %int2_56731 = torch.constant.int 2
    %59224 = torch.aten.transpose.int %58613, %int1_56730, %int2_56731 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59224, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56732 = torch.constant.int 1
    %int2_56733 = torch.constant.int 2
    %59225 = torch.aten.transpose.int %58628, %int1_56732, %int2_56733 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59225, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56734 = torch.constant.int 1
    %int2_56735 = torch.constant.int 2
    %59226 = torch.aten.transpose.int %58643, %int1_56734, %int2_56735 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59226, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56736 = torch.constant.int 1
    %int2_56737 = torch.constant.int 2
    %59227 = torch.aten.transpose.int %58658, %int1_56736, %int2_56737 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59227, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56738 = torch.constant.int 1
    %int2_56739 = torch.constant.int 2
    %59228 = torch.aten.transpose.int %58673, %int1_56738, %int2_56739 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59228, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56740 = torch.constant.int 1
    %int2_56741 = torch.constant.int 2
    %59229 = torch.aten.transpose.int %58688, %int1_56740, %int2_56741 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59229, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56742 = torch.constant.int 1
    %int2_56743 = torch.constant.int 2
    %59230 = torch.aten.transpose.int %58703, %int1_56742, %int2_56743 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59230, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56744 = torch.constant.int 1
    %int2_56745 = torch.constant.int 2
    %59231 = torch.aten.transpose.int %59167, %int1_56744, %int2_56745 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59231, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56746 = torch.constant.int 1
    %int2_56747 = torch.constant.int 2
    %59232 = torch.aten.transpose.int %59169, %int1_56746, %int2_56747 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59232, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56748 = torch.constant.int 1
    %int2_56749 = torch.constant.int 2
    %59233 = torch.aten.transpose.int %59171, %int1_56748, %int2_56749 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59233, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56750 = torch.constant.int 1
    %int2_56751 = torch.constant.int 2
    %59234 = torch.aten.transpose.int %59173, %int1_56750, %int2_56751 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59234, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56752 = torch.constant.int 1
    %int2_56753 = torch.constant.int 2
    %59235 = torch.aten.transpose.int %59175, %int1_56752, %int2_56753 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59235, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56754 = torch.constant.int 1
    %int2_56755 = torch.constant.int 2
    %59236 = torch.aten.transpose.int %59177, %int1_56754, %int2_56755 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59236, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56756 = torch.constant.int 1
    %int2_56757 = torch.constant.int 2
    %59237 = torch.aten.transpose.int %59179, %int1_56756, %int2_56757 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59237, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56758 = torch.constant.int 1
    %int2_56759 = torch.constant.int 2
    %59238 = torch.aten.transpose.int %59181, %int1_56758, %int2_56759 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59238, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56760 = torch.constant.int 1
    %int2_56761 = torch.constant.int 2
    %59239 = torch.aten.transpose.int %59208, %int1_56760, %int2_56761 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59239, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56762 = torch.constant.int 1
    %int2_56763 = torch.constant.int 2
    %59240 = torch.aten.transpose.int %59210, %int1_56762, %int2_56763 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59240, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56764 = torch.constant.int 1
    %int2_56765 = torch.constant.int 2
    %59241 = torch.aten.transpose.int %59212, %int1_56764, %int2_56765 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59241, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56766 = torch.constant.int 1
    %int2_56767 = torch.constant.int 2
    %59242 = torch.aten.transpose.int %59214, %int1_56766, %int2_56767 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59242, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56768 = torch.constant.int 1
    %int2_56769 = torch.constant.int 2
    %59243 = torch.aten.transpose.int %59216, %int1_56768, %int2_56769 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59243, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56770 = torch.constant.int 1
    %int2_56771 = torch.constant.int 2
    %59244 = torch.aten.transpose.int %59218, %int1_56770, %int2_56771 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59244, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56772 = torch.constant.int 1
    %int2_56773 = torch.constant.int 2
    %59245 = torch.aten.transpose.int %59220, %int1_56772, %int2_56773 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59245, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56774 = torch.constant.int 1
    %int2_56775 = torch.constant.int 2
    %59246 = torch.aten.transpose.int %59222, %int1_56774, %int2_56775 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %59246, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56776 = torch.constant.float 0.000000e+00
    %true_56777 = torch.constant.bool true
    %none_56778 = torch.constant.none
    %none_56779 = torch.constant.none
    %59247:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59223, %59231, %59239, %float0.000000e00_56776, %true_56777, %none_56778, %none_56779) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59247#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56780 = torch.constant.float 0.000000e+00
    %true_56781 = torch.constant.bool true
    %none_56782 = torch.constant.none
    %none_56783 = torch.constant.none
    %59248:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59224, %59232, %59240, %float0.000000e00_56780, %true_56781, %none_56782, %none_56783) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59248#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56784 = torch.constant.float 0.000000e+00
    %true_56785 = torch.constant.bool true
    %none_56786 = torch.constant.none
    %none_56787 = torch.constant.none
    %59249:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59225, %59233, %59241, %float0.000000e00_56784, %true_56785, %none_56786, %none_56787) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59249#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56788 = torch.constant.float 0.000000e+00
    %true_56789 = torch.constant.bool true
    %none_56790 = torch.constant.none
    %none_56791 = torch.constant.none
    %59250:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59226, %59234, %59242, %float0.000000e00_56788, %true_56789, %none_56790, %none_56791) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59250#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56792 = torch.constant.float 0.000000e+00
    %true_56793 = torch.constant.bool true
    %none_56794 = torch.constant.none
    %none_56795 = torch.constant.none
    %59251:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59227, %59235, %59243, %float0.000000e00_56792, %true_56793, %none_56794, %none_56795) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59251#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56796 = torch.constant.float 0.000000e+00
    %true_56797 = torch.constant.bool true
    %none_56798 = torch.constant.none
    %none_56799 = torch.constant.none
    %59252:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59228, %59236, %59244, %float0.000000e00_56796, %true_56797, %none_56798, %none_56799) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59252#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56800 = torch.constant.float 0.000000e+00
    %true_56801 = torch.constant.bool true
    %none_56802 = torch.constant.none
    %none_56803 = torch.constant.none
    %59253:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59229, %59237, %59245, %float0.000000e00_56800, %true_56801, %none_56802, %none_56803) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59253#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_56804 = torch.constant.float 0.000000e+00
    %true_56805 = torch.constant.bool true
    %none_56806 = torch.constant.none
    %none_56807 = torch.constant.none
    %59254:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%59230, %59238, %59246, %float0.000000e00_56804, %true_56805, %none_56806, %none_56807) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %59254#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_56808 = torch.constant.int 1
    %int2_56809 = torch.constant.int 2
    %59255 = torch.aten.transpose.int %59247#0, %int1_56808, %int2_56809 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56810 = torch.constant.int 1
    %int2_56811 = torch.constant.int 2
    %59256 = torch.aten.transpose.int %59248#0, %int1_56810, %int2_56811 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56812 = torch.constant.int 1
    %int2_56813 = torch.constant.int 2
    %59257 = torch.aten.transpose.int %59249#0, %int1_56812, %int2_56813 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56814 = torch.constant.int 1
    %int2_56815 = torch.constant.int 2
    %59258 = torch.aten.transpose.int %59250#0, %int1_56814, %int2_56815 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56816 = torch.constant.int 1
    %int2_56817 = torch.constant.int 2
    %59259 = torch.aten.transpose.int %59251#0, %int1_56816, %int2_56817 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56818 = torch.constant.int 1
    %int2_56819 = torch.constant.int 2
    %59260 = torch.aten.transpose.int %59252#0, %int1_56818, %int2_56819 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59260, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56820 = torch.constant.int 1
    %int2_56821 = torch.constant.int 2
    %59261 = torch.aten.transpose.int %59253#0, %int1_56820, %int2_56821 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59261, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_56822 = torch.constant.int 1
    %int2_56823 = torch.constant.int 2
    %59262 = torch.aten.transpose.int %59254#0, %int1_56822, %int2_56823 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %59262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_56824 = torch.constant.int 4
    %int512_56825 = torch.constant.int 512
    %59263 = torch.prim.ListConstruct %int4_56824, %58584, %int512_56825 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59264 = torch.aten.view %59255, %59263 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56826 = torch.constant.int 4
    %int512_56827 = torch.constant.int 512
    %59265 = torch.prim.ListConstruct %int4_56826, %58599, %int512_56827 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59266 = torch.aten.view %59256, %59265 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59266, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56828 = torch.constant.int 4
    %int512_56829 = torch.constant.int 512
    %59267 = torch.prim.ListConstruct %int4_56828, %58614, %int512_56829 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59268 = torch.aten.view %59257, %59267 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56830 = torch.constant.int 4
    %int512_56831 = torch.constant.int 512
    %59269 = torch.prim.ListConstruct %int4_56830, %58629, %int512_56831 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59270 = torch.aten.view %59258, %59269 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56832 = torch.constant.int 4
    %int512_56833 = torch.constant.int 512
    %59271 = torch.prim.ListConstruct %int4_56832, %58644, %int512_56833 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59272 = torch.aten.view %59259, %59271 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59272, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56834 = torch.constant.int 4
    %int512_56835 = torch.constant.int 512
    %59273 = torch.prim.ListConstruct %int4_56834, %58659, %int512_56835 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59274 = torch.aten.view %59260, %59273 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56836 = torch.constant.int 4
    %int512_56837 = torch.constant.int 512
    %59275 = torch.prim.ListConstruct %int4_56836, %58674, %int512_56837 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59276 = torch.aten.view %59261, %59275 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_56838 = torch.constant.int 4
    %int512_56839 = torch.constant.int 512
    %59277 = torch.prim.ListConstruct %int4_56838, %58689, %int512_56839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59278 = torch.aten.view %59262, %59277 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %59278, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_56840 = torch.constant.int 1
    %int0_56841 = torch.constant.int 0
    %59279 = torch.prim.ListConstruct %int1_56840, %int0_56841 : (!torch.int, !torch.int) -> !torch.list<int>
    %59280 = torch.aten.permute %2200, %59279 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56842 = torch.constant.int 1
    %int0_56843 = torch.constant.int 0
    %59281 = torch.prim.ListConstruct %int1_56842, %int0_56843 : (!torch.int, !torch.int) -> !torch.list<int>
    %59282 = torch.aten.permute %2201, %59281 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56844 = torch.constant.int 1
    %int0_56845 = torch.constant.int 0
    %59283 = torch.prim.ListConstruct %int1_56844, %int0_56845 : (!torch.int, !torch.int) -> !torch.list<int>
    %59284 = torch.aten.permute %2202, %59283 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56846 = torch.constant.int 1
    %int0_56847 = torch.constant.int 0
    %59285 = torch.prim.ListConstruct %int1_56846, %int0_56847 : (!torch.int, !torch.int) -> !torch.list<int>
    %59286 = torch.aten.permute %2203, %59285 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56848 = torch.constant.int 1
    %int0_56849 = torch.constant.int 0
    %59287 = torch.prim.ListConstruct %int1_56848, %int0_56849 : (!torch.int, !torch.int) -> !torch.list<int>
    %59288 = torch.aten.permute %2204, %59287 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56850 = torch.constant.int 1
    %int0_56851 = torch.constant.int 0
    %59289 = torch.prim.ListConstruct %int1_56850, %int0_56851 : (!torch.int, !torch.int) -> !torch.list<int>
    %59290 = torch.aten.permute %2205, %59289 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56852 = torch.constant.int 1
    %int0_56853 = torch.constant.int 0
    %59291 = torch.prim.ListConstruct %int1_56852, %int0_56853 : (!torch.int, !torch.int) -> !torch.list<int>
    %59292 = torch.aten.permute %2206, %59291 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_56854 = torch.constant.int 1
    %int0_56855 = torch.constant.int 0
    %59293 = torch.prim.ListConstruct %int1_56854, %int0_56855 : (!torch.int, !torch.int) -> !torch.list<int>
    %59294 = torch.aten.permute %2207, %59293 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_56856 = torch.constant.int 4
    %59295 = torch.aten.mul.int %int4_56856, %58584 : !torch.int, !torch.int -> !torch.int
    %int512_56857 = torch.constant.int 512
    %59296 = torch.prim.ListConstruct %59295, %int512_56857 : (!torch.int, !torch.int) -> !torch.list<int>
    %59297 = torch.aten.view %59264, %59296 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59297, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59298 = torch.aten.mm %59297, %59280 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59298, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56858 = torch.constant.int 4
    %int4096_56859 = torch.constant.int 4096
    %59299 = torch.prim.ListConstruct %int4_56858, %58584, %int4096_56859 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59300 = torch.aten.view %59298, %59299 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59300, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56860 = torch.constant.int 4
    %59301 = torch.aten.mul.int %int4_56860, %58599 : !torch.int, !torch.int -> !torch.int
    %int512_56861 = torch.constant.int 512
    %59302 = torch.prim.ListConstruct %59301, %int512_56861 : (!torch.int, !torch.int) -> !torch.list<int>
    %59303 = torch.aten.view %59266, %59302 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59303, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59304 = torch.aten.mm %59303, %59282 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59304, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56862 = torch.constant.int 4
    %int4096_56863 = torch.constant.int 4096
    %59305 = torch.prim.ListConstruct %int4_56862, %58599, %int4096_56863 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59306 = torch.aten.view %59304, %59305 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59306, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56864 = torch.constant.int 4
    %59307 = torch.aten.mul.int %int4_56864, %58614 : !torch.int, !torch.int -> !torch.int
    %int512_56865 = torch.constant.int 512
    %59308 = torch.prim.ListConstruct %59307, %int512_56865 : (!torch.int, !torch.int) -> !torch.list<int>
    %59309 = torch.aten.view %59268, %59308 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59309, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59310 = torch.aten.mm %59309, %59284 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59310, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56866 = torch.constant.int 4
    %int4096_56867 = torch.constant.int 4096
    %59311 = torch.prim.ListConstruct %int4_56866, %58614, %int4096_56867 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59312 = torch.aten.view %59310, %59311 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56868 = torch.constant.int 4
    %59313 = torch.aten.mul.int %int4_56868, %58629 : !torch.int, !torch.int -> !torch.int
    %int512_56869 = torch.constant.int 512
    %59314 = torch.prim.ListConstruct %59313, %int512_56869 : (!torch.int, !torch.int) -> !torch.list<int>
    %59315 = torch.aten.view %59270, %59314 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59315, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59316 = torch.aten.mm %59315, %59286 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59316, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56870 = torch.constant.int 4
    %int4096_56871 = torch.constant.int 4096
    %59317 = torch.prim.ListConstruct %int4_56870, %58629, %int4096_56871 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59318 = torch.aten.view %59316, %59317 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56872 = torch.constant.int 4
    %59319 = torch.aten.mul.int %int4_56872, %58644 : !torch.int, !torch.int -> !torch.int
    %int512_56873 = torch.constant.int 512
    %59320 = torch.prim.ListConstruct %59319, %int512_56873 : (!torch.int, !torch.int) -> !torch.list<int>
    %59321 = torch.aten.view %59272, %59320 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59321, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59322 = torch.aten.mm %59321, %59288 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59322, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56874 = torch.constant.int 4
    %int4096_56875 = torch.constant.int 4096
    %59323 = torch.prim.ListConstruct %int4_56874, %58644, %int4096_56875 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59324 = torch.aten.view %59322, %59323 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56876 = torch.constant.int 4
    %59325 = torch.aten.mul.int %int4_56876, %58659 : !torch.int, !torch.int -> !torch.int
    %int512_56877 = torch.constant.int 512
    %59326 = torch.prim.ListConstruct %59325, %int512_56877 : (!torch.int, !torch.int) -> !torch.list<int>
    %59327 = torch.aten.view %59274, %59326 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59327, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59328 = torch.aten.mm %59327, %59290 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59328, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56878 = torch.constant.int 4
    %int4096_56879 = torch.constant.int 4096
    %59329 = torch.prim.ListConstruct %int4_56878, %58659, %int4096_56879 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59330 = torch.aten.view %59328, %59329 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56880 = torch.constant.int 4
    %59331 = torch.aten.mul.int %int4_56880, %58674 : !torch.int, !torch.int -> !torch.int
    %int512_56881 = torch.constant.int 512
    %59332 = torch.prim.ListConstruct %59331, %int512_56881 : (!torch.int, !torch.int) -> !torch.list<int>
    %59333 = torch.aten.view %59276, %59332 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59333, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59334 = torch.aten.mm %59333, %59292 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59334, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56882 = torch.constant.int 4
    %int4096_56883 = torch.constant.int 4096
    %59335 = torch.prim.ListConstruct %int4_56882, %58674, %int4096_56883 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59336 = torch.aten.view %59334, %59335 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_56884 = torch.constant.int 4
    %59337 = torch.aten.mul.int %int4_56884, %58689 : !torch.int, !torch.int -> !torch.int
    %int512_56885 = torch.constant.int 512
    %59338 = torch.prim.ListConstruct %59337, %int512_56885 : (!torch.int, !torch.int) -> !torch.list<int>
    %59339 = torch.aten.view %59278, %59338 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %59339, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %59340 = torch.aten.mm %59339, %59294 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59340, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_56886 = torch.constant.int 4
    %int4096_56887 = torch.constant.int 4096
    %59341 = torch.prim.ListConstruct %int4_56886, %58689, %int4096_56887 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59342 = torch.aten.view %59340, %59341 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59343 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56888 = arith.constant 1 : index
    %dim_56889 = tensor.dim %59343, %c1_56888 : tensor<4x?x4096xf16>
    %59344 = flow.tensor.transfer %59343 : tensor<4x?x4096xf16>{%dim_56889} to #hal.device.promise<@__device_0>
    %59345 = torch_c.from_builtin_tensor %59344 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59345, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59346 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56890 = arith.constant 1 : index
    %dim_56891 = tensor.dim %59346, %c1_56890 : tensor<4x?x4096xf16>
    %59347 = flow.tensor.transfer %59346 : tensor<4x?x4096xf16>{%dim_56891} to #hal.device.promise<@__device_0>
    %59348 = torch_c.from_builtin_tensor %59347 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59348, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59349 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56892 = arith.constant 1 : index
    %dim_56893 = tensor.dim %59349, %c1_56892 : tensor<4x?x4096xf16>
    %59350 = flow.tensor.transfer %59349 : tensor<4x?x4096xf16>{%dim_56893} to #hal.device.promise<@__device_0>
    %59351 = torch_c.from_builtin_tensor %59350 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59351, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59352 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56894 = arith.constant 1 : index
    %dim_56895 = tensor.dim %59352, %c1_56894 : tensor<4x?x4096xf16>
    %59353 = flow.tensor.transfer %59352 : tensor<4x?x4096xf16>{%dim_56895} to #hal.device.promise<@__device_0>
    %59354 = torch_c.from_builtin_tensor %59353 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59354, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59355 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56896 = arith.constant 1 : index
    %dim_56897 = tensor.dim %59355, %c1_56896 : tensor<4x?x4096xf16>
    %59356 = flow.tensor.transfer %59355 : tensor<4x?x4096xf16>{%dim_56897} to #hal.device.promise<@__device_0>
    %59357 = torch_c.from_builtin_tensor %59356 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59357, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59358 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56898 = arith.constant 1 : index
    %dim_56899 = tensor.dim %59358, %c1_56898 : tensor<4x?x4096xf16>
    %59359 = flow.tensor.transfer %59358 : tensor<4x?x4096xf16>{%dim_56899} to #hal.device.promise<@__device_0>
    %59360 = torch_c.from_builtin_tensor %59359 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59361 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56900 = arith.constant 1 : index
    %dim_56901 = tensor.dim %59361, %c1_56900 : tensor<4x?x4096xf16>
    %59362 = flow.tensor.transfer %59361 : tensor<4x?x4096xf16>{%dim_56901} to #hal.device.promise<@__device_0>
    %59363 = torch_c.from_builtin_tensor %59362 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59363, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56902 = torch.constant.int 1
    %59364 = torch.aten.add.Tensor %59300, %59345, %int1_56902 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56903 = torch.constant.int 1
    %59365 = torch.aten.add.Tensor %59364, %59348, %int1_56903 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56904 = torch.constant.int 1
    %59366 = torch.aten.add.Tensor %59365, %59351, %int1_56904 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56905 = torch.constant.int 1
    %59367 = torch.aten.add.Tensor %59366, %59354, %int1_56905 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56906 = torch.constant.int 1
    %59368 = torch.aten.add.Tensor %59367, %59357, %int1_56906 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56907 = torch.constant.int 1
    %59369 = torch.aten.add.Tensor %59368, %59360, %int1_56907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56908 = torch.constant.int 1
    %59370 = torch.aten.add.Tensor %59369, %59363, %int1_56908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59371 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56909 = arith.constant 1 : index
    %dim_56910 = tensor.dim %59371, %c1_56909 : tensor<4x?x4096xf16>
    %59372 = flow.tensor.transfer %59371 : tensor<4x?x4096xf16>{%dim_56910} to #hal.device.promise<@__device_1>
    %59373 = torch_c.from_builtin_tensor %59372 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59373, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59374 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56911 = arith.constant 1 : index
    %dim_56912 = tensor.dim %59374, %c1_56911 : tensor<4x?x4096xf16>
    %59375 = flow.tensor.transfer %59374 : tensor<4x?x4096xf16>{%dim_56912} to #hal.device.promise<@__device_1>
    %59376 = torch_c.from_builtin_tensor %59375 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59377 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56913 = arith.constant 1 : index
    %dim_56914 = tensor.dim %59377, %c1_56913 : tensor<4x?x4096xf16>
    %59378 = flow.tensor.transfer %59377 : tensor<4x?x4096xf16>{%dim_56914} to #hal.device.promise<@__device_1>
    %59379 = torch_c.from_builtin_tensor %59378 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59379, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59380 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56915 = arith.constant 1 : index
    %dim_56916 = tensor.dim %59380, %c1_56915 : tensor<4x?x4096xf16>
    %59381 = flow.tensor.transfer %59380 : tensor<4x?x4096xf16>{%dim_56916} to #hal.device.promise<@__device_1>
    %59382 = torch_c.from_builtin_tensor %59381 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59383 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56917 = arith.constant 1 : index
    %dim_56918 = tensor.dim %59383, %c1_56917 : tensor<4x?x4096xf16>
    %59384 = flow.tensor.transfer %59383 : tensor<4x?x4096xf16>{%dim_56918} to #hal.device.promise<@__device_1>
    %59385 = torch_c.from_builtin_tensor %59384 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59385, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59386 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56919 = arith.constant 1 : index
    %dim_56920 = tensor.dim %59386, %c1_56919 : tensor<4x?x4096xf16>
    %59387 = flow.tensor.transfer %59386 : tensor<4x?x4096xf16>{%dim_56920} to #hal.device.promise<@__device_1>
    %59388 = torch_c.from_builtin_tensor %59387 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59389 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56921 = arith.constant 1 : index
    %dim_56922 = tensor.dim %59389, %c1_56921 : tensor<4x?x4096xf16>
    %59390 = flow.tensor.transfer %59389 : tensor<4x?x4096xf16>{%dim_56922} to #hal.device.promise<@__device_1>
    %59391 = torch_c.from_builtin_tensor %59390 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59391, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56923 = torch.constant.int 1
    %59392 = torch.aten.add.Tensor %59373, %59306, %int1_56923 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56924 = torch.constant.int 1
    %59393 = torch.aten.add.Tensor %59392, %59376, %int1_56924 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56925 = torch.constant.int 1
    %59394 = torch.aten.add.Tensor %59393, %59379, %int1_56925 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56926 = torch.constant.int 1
    %59395 = torch.aten.add.Tensor %59394, %59382, %int1_56926 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56927 = torch.constant.int 1
    %59396 = torch.aten.add.Tensor %59395, %59385, %int1_56927 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56928 = torch.constant.int 1
    %59397 = torch.aten.add.Tensor %59396, %59388, %int1_56928 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56929 = torch.constant.int 1
    %59398 = torch.aten.add.Tensor %59397, %59391, %int1_56929 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59399 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56930 = arith.constant 1 : index
    %dim_56931 = tensor.dim %59399, %c1_56930 : tensor<4x?x4096xf16>
    %59400 = flow.tensor.transfer %59399 : tensor<4x?x4096xf16>{%dim_56931} to #hal.device.promise<@__device_2>
    %59401 = torch_c.from_builtin_tensor %59400 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59401, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59402 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56932 = arith.constant 1 : index
    %dim_56933 = tensor.dim %59402, %c1_56932 : tensor<4x?x4096xf16>
    %59403 = flow.tensor.transfer %59402 : tensor<4x?x4096xf16>{%dim_56933} to #hal.device.promise<@__device_2>
    %59404 = torch_c.from_builtin_tensor %59403 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59405 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56934 = arith.constant 1 : index
    %dim_56935 = tensor.dim %59405, %c1_56934 : tensor<4x?x4096xf16>
    %59406 = flow.tensor.transfer %59405 : tensor<4x?x4096xf16>{%dim_56935} to #hal.device.promise<@__device_2>
    %59407 = torch_c.from_builtin_tensor %59406 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59407, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59408 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56936 = arith.constant 1 : index
    %dim_56937 = tensor.dim %59408, %c1_56936 : tensor<4x?x4096xf16>
    %59409 = flow.tensor.transfer %59408 : tensor<4x?x4096xf16>{%dim_56937} to #hal.device.promise<@__device_2>
    %59410 = torch_c.from_builtin_tensor %59409 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59410, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59411 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56938 = arith.constant 1 : index
    %dim_56939 = tensor.dim %59411, %c1_56938 : tensor<4x?x4096xf16>
    %59412 = flow.tensor.transfer %59411 : tensor<4x?x4096xf16>{%dim_56939} to #hal.device.promise<@__device_2>
    %59413 = torch_c.from_builtin_tensor %59412 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59413, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59414 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56940 = arith.constant 1 : index
    %dim_56941 = tensor.dim %59414, %c1_56940 : tensor<4x?x4096xf16>
    %59415 = flow.tensor.transfer %59414 : tensor<4x?x4096xf16>{%dim_56941} to #hal.device.promise<@__device_2>
    %59416 = torch_c.from_builtin_tensor %59415 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59416, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59417 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56942 = arith.constant 1 : index
    %dim_56943 = tensor.dim %59417, %c1_56942 : tensor<4x?x4096xf16>
    %59418 = flow.tensor.transfer %59417 : tensor<4x?x4096xf16>{%dim_56943} to #hal.device.promise<@__device_2>
    %59419 = torch_c.from_builtin_tensor %59418 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59419, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56944 = torch.constant.int 1
    %59420 = torch.aten.add.Tensor %59401, %59404, %int1_56944 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56945 = torch.constant.int 1
    %59421 = torch.aten.add.Tensor %59420, %59312, %int1_56945 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56946 = torch.constant.int 1
    %59422 = torch.aten.add.Tensor %59421, %59407, %int1_56946 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56947 = torch.constant.int 1
    %59423 = torch.aten.add.Tensor %59422, %59410, %int1_56947 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56948 = torch.constant.int 1
    %59424 = torch.aten.add.Tensor %59423, %59413, %int1_56948 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56949 = torch.constant.int 1
    %59425 = torch.aten.add.Tensor %59424, %59416, %int1_56949 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56950 = torch.constant.int 1
    %59426 = torch.aten.add.Tensor %59425, %59419, %int1_56950 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59427 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56951 = arith.constant 1 : index
    %dim_56952 = tensor.dim %59427, %c1_56951 : tensor<4x?x4096xf16>
    %59428 = flow.tensor.transfer %59427 : tensor<4x?x4096xf16>{%dim_56952} to #hal.device.promise<@__device_3>
    %59429 = torch_c.from_builtin_tensor %59428 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59430 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56953 = arith.constant 1 : index
    %dim_56954 = tensor.dim %59430, %c1_56953 : tensor<4x?x4096xf16>
    %59431 = flow.tensor.transfer %59430 : tensor<4x?x4096xf16>{%dim_56954} to #hal.device.promise<@__device_3>
    %59432 = torch_c.from_builtin_tensor %59431 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59433 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56955 = arith.constant 1 : index
    %dim_56956 = tensor.dim %59433, %c1_56955 : tensor<4x?x4096xf16>
    %59434 = flow.tensor.transfer %59433 : tensor<4x?x4096xf16>{%dim_56956} to #hal.device.promise<@__device_3>
    %59435 = torch_c.from_builtin_tensor %59434 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59436 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56957 = arith.constant 1 : index
    %dim_56958 = tensor.dim %59436, %c1_56957 : tensor<4x?x4096xf16>
    %59437 = flow.tensor.transfer %59436 : tensor<4x?x4096xf16>{%dim_56958} to #hal.device.promise<@__device_3>
    %59438 = torch_c.from_builtin_tensor %59437 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59439 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56959 = arith.constant 1 : index
    %dim_56960 = tensor.dim %59439, %c1_56959 : tensor<4x?x4096xf16>
    %59440 = flow.tensor.transfer %59439 : tensor<4x?x4096xf16>{%dim_56960} to #hal.device.promise<@__device_3>
    %59441 = torch_c.from_builtin_tensor %59440 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59442 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56961 = arith.constant 1 : index
    %dim_56962 = tensor.dim %59442, %c1_56961 : tensor<4x?x4096xf16>
    %59443 = flow.tensor.transfer %59442 : tensor<4x?x4096xf16>{%dim_56962} to #hal.device.promise<@__device_3>
    %59444 = torch_c.from_builtin_tensor %59443 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59445 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56963 = arith.constant 1 : index
    %dim_56964 = tensor.dim %59445, %c1_56963 : tensor<4x?x4096xf16>
    %59446 = flow.tensor.transfer %59445 : tensor<4x?x4096xf16>{%dim_56964} to #hal.device.promise<@__device_3>
    %59447 = torch_c.from_builtin_tensor %59446 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56965 = torch.constant.int 1
    %59448 = torch.aten.add.Tensor %59429, %59432, %int1_56965 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56966 = torch.constant.int 1
    %59449 = torch.aten.add.Tensor %59448, %59435, %int1_56966 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56967 = torch.constant.int 1
    %59450 = torch.aten.add.Tensor %59449, %59318, %int1_56967 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56968 = torch.constant.int 1
    %59451 = torch.aten.add.Tensor %59450, %59438, %int1_56968 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56969 = torch.constant.int 1
    %59452 = torch.aten.add.Tensor %59451, %59441, %int1_56969 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59452, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56970 = torch.constant.int 1
    %59453 = torch.aten.add.Tensor %59452, %59444, %int1_56970 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56971 = torch.constant.int 1
    %59454 = torch.aten.add.Tensor %59453, %59447, %int1_56971 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59455 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56972 = arith.constant 1 : index
    %dim_56973 = tensor.dim %59455, %c1_56972 : tensor<4x?x4096xf16>
    %59456 = flow.tensor.transfer %59455 : tensor<4x?x4096xf16>{%dim_56973} to #hal.device.promise<@__device_4>
    %59457 = torch_c.from_builtin_tensor %59456 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59458 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56974 = arith.constant 1 : index
    %dim_56975 = tensor.dim %59458, %c1_56974 : tensor<4x?x4096xf16>
    %59459 = flow.tensor.transfer %59458 : tensor<4x?x4096xf16>{%dim_56975} to #hal.device.promise<@__device_4>
    %59460 = torch_c.from_builtin_tensor %59459 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59460, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59461 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56976 = arith.constant 1 : index
    %dim_56977 = tensor.dim %59461, %c1_56976 : tensor<4x?x4096xf16>
    %59462 = flow.tensor.transfer %59461 : tensor<4x?x4096xf16>{%dim_56977} to #hal.device.promise<@__device_4>
    %59463 = torch_c.from_builtin_tensor %59462 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59464 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56978 = arith.constant 1 : index
    %dim_56979 = tensor.dim %59464, %c1_56978 : tensor<4x?x4096xf16>
    %59465 = flow.tensor.transfer %59464 : tensor<4x?x4096xf16>{%dim_56979} to #hal.device.promise<@__device_4>
    %59466 = torch_c.from_builtin_tensor %59465 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59466, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59467 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56980 = arith.constant 1 : index
    %dim_56981 = tensor.dim %59467, %c1_56980 : tensor<4x?x4096xf16>
    %59468 = flow.tensor.transfer %59467 : tensor<4x?x4096xf16>{%dim_56981} to #hal.device.promise<@__device_4>
    %59469 = torch_c.from_builtin_tensor %59468 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59470 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56982 = arith.constant 1 : index
    %dim_56983 = tensor.dim %59470, %c1_56982 : tensor<4x?x4096xf16>
    %59471 = flow.tensor.transfer %59470 : tensor<4x?x4096xf16>{%dim_56983} to #hal.device.promise<@__device_4>
    %59472 = torch_c.from_builtin_tensor %59471 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59473 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56984 = arith.constant 1 : index
    %dim_56985 = tensor.dim %59473, %c1_56984 : tensor<4x?x4096xf16>
    %59474 = flow.tensor.transfer %59473 : tensor<4x?x4096xf16>{%dim_56985} to #hal.device.promise<@__device_4>
    %59475 = torch_c.from_builtin_tensor %59474 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56986 = torch.constant.int 1
    %59476 = torch.aten.add.Tensor %59457, %59460, %int1_56986 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56987 = torch.constant.int 1
    %59477 = torch.aten.add.Tensor %59476, %59463, %int1_56987 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56988 = torch.constant.int 1
    %59478 = torch.aten.add.Tensor %59477, %59466, %int1_56988 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56989 = torch.constant.int 1
    %59479 = torch.aten.add.Tensor %59478, %59324, %int1_56989 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56990 = torch.constant.int 1
    %59480 = torch.aten.add.Tensor %59479, %59469, %int1_56990 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56991 = torch.constant.int 1
    %59481 = torch.aten.add.Tensor %59480, %59472, %int1_56991 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_56992 = torch.constant.int 1
    %59482 = torch.aten.add.Tensor %59481, %59475, %int1_56992 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59483 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56993 = arith.constant 1 : index
    %dim_56994 = tensor.dim %59483, %c1_56993 : tensor<4x?x4096xf16>
    %59484 = flow.tensor.transfer %59483 : tensor<4x?x4096xf16>{%dim_56994} to #hal.device.promise<@__device_5>
    %59485 = torch_c.from_builtin_tensor %59484 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59486 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56995 = arith.constant 1 : index
    %dim_56996 = tensor.dim %59486, %c1_56995 : tensor<4x?x4096xf16>
    %59487 = flow.tensor.transfer %59486 : tensor<4x?x4096xf16>{%dim_56996} to #hal.device.promise<@__device_5>
    %59488 = torch_c.from_builtin_tensor %59487 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59489 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56997 = arith.constant 1 : index
    %dim_56998 = tensor.dim %59489, %c1_56997 : tensor<4x?x4096xf16>
    %59490 = flow.tensor.transfer %59489 : tensor<4x?x4096xf16>{%dim_56998} to #hal.device.promise<@__device_5>
    %59491 = torch_c.from_builtin_tensor %59490 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59492 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_56999 = arith.constant 1 : index
    %dim_57000 = tensor.dim %59492, %c1_56999 : tensor<4x?x4096xf16>
    %59493 = flow.tensor.transfer %59492 : tensor<4x?x4096xf16>{%dim_57000} to #hal.device.promise<@__device_5>
    %59494 = torch_c.from_builtin_tensor %59493 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59495 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57001 = arith.constant 1 : index
    %dim_57002 = tensor.dim %59495, %c1_57001 : tensor<4x?x4096xf16>
    %59496 = flow.tensor.transfer %59495 : tensor<4x?x4096xf16>{%dim_57002} to #hal.device.promise<@__device_5>
    %59497 = torch_c.from_builtin_tensor %59496 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59498 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57003 = arith.constant 1 : index
    %dim_57004 = tensor.dim %59498, %c1_57003 : tensor<4x?x4096xf16>
    %59499 = flow.tensor.transfer %59498 : tensor<4x?x4096xf16>{%dim_57004} to #hal.device.promise<@__device_5>
    %59500 = torch_c.from_builtin_tensor %59499 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59501 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57005 = arith.constant 1 : index
    %dim_57006 = tensor.dim %59501, %c1_57005 : tensor<4x?x4096xf16>
    %59502 = flow.tensor.transfer %59501 : tensor<4x?x4096xf16>{%dim_57006} to #hal.device.promise<@__device_5>
    %59503 = torch_c.from_builtin_tensor %59502 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57007 = torch.constant.int 1
    %59504 = torch.aten.add.Tensor %59485, %59488, %int1_57007 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57008 = torch.constant.int 1
    %59505 = torch.aten.add.Tensor %59504, %59491, %int1_57008 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57009 = torch.constant.int 1
    %59506 = torch.aten.add.Tensor %59505, %59494, %int1_57009 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57010 = torch.constant.int 1
    %59507 = torch.aten.add.Tensor %59506, %59497, %int1_57010 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57011 = torch.constant.int 1
    %59508 = torch.aten.add.Tensor %59507, %59330, %int1_57011 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59508, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57012 = torch.constant.int 1
    %59509 = torch.aten.add.Tensor %59508, %59500, %int1_57012 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59509, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57013 = torch.constant.int 1
    %59510 = torch.aten.add.Tensor %59509, %59503, %int1_57013 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59510, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59511 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57014 = arith.constant 1 : index
    %dim_57015 = tensor.dim %59511, %c1_57014 : tensor<4x?x4096xf16>
    %59512 = flow.tensor.transfer %59511 : tensor<4x?x4096xf16>{%dim_57015} to #hal.device.promise<@__device_6>
    %59513 = torch_c.from_builtin_tensor %59512 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59513, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59514 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57016 = arith.constant 1 : index
    %dim_57017 = tensor.dim %59514, %c1_57016 : tensor<4x?x4096xf16>
    %59515 = flow.tensor.transfer %59514 : tensor<4x?x4096xf16>{%dim_57017} to #hal.device.promise<@__device_6>
    %59516 = torch_c.from_builtin_tensor %59515 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59516, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59517 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57018 = arith.constant 1 : index
    %dim_57019 = tensor.dim %59517, %c1_57018 : tensor<4x?x4096xf16>
    %59518 = flow.tensor.transfer %59517 : tensor<4x?x4096xf16>{%dim_57019} to #hal.device.promise<@__device_6>
    %59519 = torch_c.from_builtin_tensor %59518 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59520 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57020 = arith.constant 1 : index
    %dim_57021 = tensor.dim %59520, %c1_57020 : tensor<4x?x4096xf16>
    %59521 = flow.tensor.transfer %59520 : tensor<4x?x4096xf16>{%dim_57021} to #hal.device.promise<@__device_6>
    %59522 = torch_c.from_builtin_tensor %59521 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59522, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59523 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57022 = arith.constant 1 : index
    %dim_57023 = tensor.dim %59523, %c1_57022 : tensor<4x?x4096xf16>
    %59524 = flow.tensor.transfer %59523 : tensor<4x?x4096xf16>{%dim_57023} to #hal.device.promise<@__device_6>
    %59525 = torch_c.from_builtin_tensor %59524 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59525, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59526 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57024 = arith.constant 1 : index
    %dim_57025 = tensor.dim %59526, %c1_57024 : tensor<4x?x4096xf16>
    %59527 = flow.tensor.transfer %59526 : tensor<4x?x4096xf16>{%dim_57025} to #hal.device.promise<@__device_6>
    %59528 = torch_c.from_builtin_tensor %59527 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59528, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59529 = torch_c.to_builtin_tensor %59342 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57026 = arith.constant 1 : index
    %dim_57027 = tensor.dim %59529, %c1_57026 : tensor<4x?x4096xf16>
    %59530 = flow.tensor.transfer %59529 : tensor<4x?x4096xf16>{%dim_57027} to #hal.device.promise<@__device_6>
    %59531 = torch_c.from_builtin_tensor %59530 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59531, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57028 = torch.constant.int 1
    %59532 = torch.aten.add.Tensor %59513, %59516, %int1_57028 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59532, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57029 = torch.constant.int 1
    %59533 = torch.aten.add.Tensor %59532, %59519, %int1_57029 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57030 = torch.constant.int 1
    %59534 = torch.aten.add.Tensor %59533, %59522, %int1_57030 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57031 = torch.constant.int 1
    %59535 = torch.aten.add.Tensor %59534, %59525, %int1_57031 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57032 = torch.constant.int 1
    %59536 = torch.aten.add.Tensor %59535, %59528, %int1_57032 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59536, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57033 = torch.constant.int 1
    %59537 = torch.aten.add.Tensor %59536, %59336, %int1_57033 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59537, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57034 = torch.constant.int 1
    %59538 = torch.aten.add.Tensor %59537, %59531, %int1_57034 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59538, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59539 = torch_c.to_builtin_tensor %59300 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57035 = arith.constant 1 : index
    %dim_57036 = tensor.dim %59539, %c1_57035 : tensor<4x?x4096xf16>
    %59540 = flow.tensor.transfer %59539 : tensor<4x?x4096xf16>{%dim_57036} to #hal.device.promise<@__device_7>
    %59541 = torch_c.from_builtin_tensor %59540 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59542 = torch_c.to_builtin_tensor %59306 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57037 = arith.constant 1 : index
    %dim_57038 = tensor.dim %59542, %c1_57037 : tensor<4x?x4096xf16>
    %59543 = flow.tensor.transfer %59542 : tensor<4x?x4096xf16>{%dim_57038} to #hal.device.promise<@__device_7>
    %59544 = torch_c.from_builtin_tensor %59543 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59545 = torch_c.to_builtin_tensor %59312 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57039 = arith.constant 1 : index
    %dim_57040 = tensor.dim %59545, %c1_57039 : tensor<4x?x4096xf16>
    %59546 = flow.tensor.transfer %59545 : tensor<4x?x4096xf16>{%dim_57040} to #hal.device.promise<@__device_7>
    %59547 = torch_c.from_builtin_tensor %59546 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59548 = torch_c.to_builtin_tensor %59318 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57041 = arith.constant 1 : index
    %dim_57042 = tensor.dim %59548, %c1_57041 : tensor<4x?x4096xf16>
    %59549 = flow.tensor.transfer %59548 : tensor<4x?x4096xf16>{%dim_57042} to #hal.device.promise<@__device_7>
    %59550 = torch_c.from_builtin_tensor %59549 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59550, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59551 = torch_c.to_builtin_tensor %59324 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57043 = arith.constant 1 : index
    %dim_57044 = tensor.dim %59551, %c1_57043 : tensor<4x?x4096xf16>
    %59552 = flow.tensor.transfer %59551 : tensor<4x?x4096xf16>{%dim_57044} to #hal.device.promise<@__device_7>
    %59553 = torch_c.from_builtin_tensor %59552 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59554 = torch_c.to_builtin_tensor %59330 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57045 = arith.constant 1 : index
    %dim_57046 = tensor.dim %59554, %c1_57045 : tensor<4x?x4096xf16>
    %59555 = flow.tensor.transfer %59554 : tensor<4x?x4096xf16>{%dim_57046} to #hal.device.promise<@__device_7>
    %59556 = torch_c.from_builtin_tensor %59555 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59556, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59557 = torch_c.to_builtin_tensor %59336 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57047 = arith.constant 1 : index
    %dim_57048 = tensor.dim %59557, %c1_57047 : tensor<4x?x4096xf16>
    %59558 = flow.tensor.transfer %59557 : tensor<4x?x4096xf16>{%dim_57048} to #hal.device.promise<@__device_7>
    %59559 = torch_c.from_builtin_tensor %59558 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57049 = torch.constant.int 1
    %59560 = torch.aten.add.Tensor %59541, %59544, %int1_57049 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57050 = torch.constant.int 1
    %59561 = torch.aten.add.Tensor %59560, %59547, %int1_57050 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59561, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57051 = torch.constant.int 1
    %59562 = torch.aten.add.Tensor %59561, %59550, %int1_57051 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59562, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57052 = torch.constant.int 1
    %59563 = torch.aten.add.Tensor %59562, %59553, %int1_57052 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57053 = torch.constant.int 1
    %59564 = torch.aten.add.Tensor %59563, %59556, %int1_57053 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57054 = torch.constant.int 1
    %59565 = torch.aten.add.Tensor %59564, %59559, %int1_57054 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57055 = torch.constant.int 1
    %59566 = torch.aten.add.Tensor %59565, %59342, %int1_57055 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59566, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57056 = torch.constant.int 1
    %59567 = torch.aten.add.Tensor %58226, %59370, %int1_57056 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59567, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57057 = torch.constant.int 1
    %59568 = torch.aten.add.Tensor %58227, %59398, %int1_57057 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59568, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57058 = torch.constant.int 1
    %59569 = torch.aten.add.Tensor %58228, %59426, %int1_57058 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59569, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57059 = torch.constant.int 1
    %59570 = torch.aten.add.Tensor %58229, %59454, %int1_57059 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59570, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57060 = torch.constant.int 1
    %59571 = torch.aten.add.Tensor %58230, %59482, %int1_57060 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57061 = torch.constant.int 1
    %59572 = torch.aten.add.Tensor %58231, %59510, %int1_57061 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57062 = torch.constant.int 1
    %59573 = torch.aten.add.Tensor %58232, %59538, %int1_57062 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57063 = torch.constant.int 1
    %59574 = torch.aten.add.Tensor %58233, %59566, %int1_57063 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_57064 = torch.constant.int 6
    %59575 = torch.prims.convert_element_type %59567, %int6_57064 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57065 = torch.constant.int 6
    %59576 = torch.prims.convert_element_type %59568, %int6_57065 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57066 = torch.constant.int 6
    %59577 = torch.prims.convert_element_type %59569, %int6_57066 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57067 = torch.constant.int 6
    %59578 = torch.prims.convert_element_type %59570, %int6_57067 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57068 = torch.constant.int 6
    %59579 = torch.prims.convert_element_type %59571, %int6_57068 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57069 = torch.constant.int 6
    %59580 = torch.prims.convert_element_type %59572, %int6_57069 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59580, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57070 = torch.constant.int 6
    %59581 = torch.prims.convert_element_type %59573, %int6_57070 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59581, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57071 = torch.constant.int 6
    %59582 = torch.prims.convert_element_type %59574, %int6_57071 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59582, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57072 = torch.constant.int 2
    %59583 = torch.aten.pow.Tensor_Scalar %59575, %int2_57072 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59583, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57073 = torch.constant.int 2
    %59584 = torch.aten.pow.Tensor_Scalar %59576, %int2_57073 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59584, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57074 = torch.constant.int 2
    %59585 = torch.aten.pow.Tensor_Scalar %59577, %int2_57074 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59585, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57075 = torch.constant.int 2
    %59586 = torch.aten.pow.Tensor_Scalar %59578, %int2_57075 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59586, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57076 = torch.constant.int 2
    %59587 = torch.aten.pow.Tensor_Scalar %59579, %int2_57076 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59587, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57077 = torch.constant.int 2
    %59588 = torch.aten.pow.Tensor_Scalar %59580, %int2_57077 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59588, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57078 = torch.constant.int 2
    %59589 = torch.aten.pow.Tensor_Scalar %59581, %int2_57078 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59589, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57079 = torch.constant.int 2
    %59590 = torch.aten.pow.Tensor_Scalar %59582, %int2_57079 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59590, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_57080 = torch.constant.int -1
    %59591 = torch.prim.ListConstruct %int-1_57080 : (!torch.int) -> !torch.list<int>
    %true_57081 = torch.constant.bool true
    %none_57082 = torch.constant.none
    %59592 = torch.aten.mean.dim %59583, %59591, %true_57081, %none_57082 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59592, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57083 = torch.constant.int -1
    %59593 = torch.prim.ListConstruct %int-1_57083 : (!torch.int) -> !torch.list<int>
    %true_57084 = torch.constant.bool true
    %none_57085 = torch.constant.none
    %59594 = torch.aten.mean.dim %59584, %59593, %true_57084, %none_57085 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59594, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57086 = torch.constant.int -1
    %59595 = torch.prim.ListConstruct %int-1_57086 : (!torch.int) -> !torch.list<int>
    %true_57087 = torch.constant.bool true
    %none_57088 = torch.constant.none
    %59596 = torch.aten.mean.dim %59585, %59595, %true_57087, %none_57088 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59596, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57089 = torch.constant.int -1
    %59597 = torch.prim.ListConstruct %int-1_57089 : (!torch.int) -> !torch.list<int>
    %true_57090 = torch.constant.bool true
    %none_57091 = torch.constant.none
    %59598 = torch.aten.mean.dim %59586, %59597, %true_57090, %none_57091 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59598, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57092 = torch.constant.int -1
    %59599 = torch.prim.ListConstruct %int-1_57092 : (!torch.int) -> !torch.list<int>
    %true_57093 = torch.constant.bool true
    %none_57094 = torch.constant.none
    %59600 = torch.aten.mean.dim %59587, %59599, %true_57093, %none_57094 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59600, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57095 = torch.constant.int -1
    %59601 = torch.prim.ListConstruct %int-1_57095 : (!torch.int) -> !torch.list<int>
    %true_57096 = torch.constant.bool true
    %none_57097 = torch.constant.none
    %59602 = torch.aten.mean.dim %59588, %59601, %true_57096, %none_57097 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59602, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57098 = torch.constant.int -1
    %59603 = torch.prim.ListConstruct %int-1_57098 : (!torch.int) -> !torch.list<int>
    %true_57099 = torch.constant.bool true
    %none_57100 = torch.constant.none
    %59604 = torch.aten.mean.dim %59589, %59603, %true_57099, %none_57100 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59604, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57101 = torch.constant.int -1
    %59605 = torch.prim.ListConstruct %int-1_57101 : (!torch.int) -> !torch.list<int>
    %true_57102 = torch.constant.bool true
    %none_57103 = torch.constant.none
    %59606 = torch.aten.mean.dim %59590, %59605, %true_57102, %none_57103 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59606, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57104 = torch.constant.float 9.9999997473787516E-6
    %int1_57105 = torch.constant.int 1
    %59607 = torch.aten.add.Scalar %59592, %float9.999990e-06_57104, %int1_57105 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57106 = torch.constant.float 9.9999997473787516E-6
    %int1_57107 = torch.constant.int 1
    %59608 = torch.aten.add.Scalar %59594, %float9.999990e-06_57106, %int1_57107 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59608, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57108 = torch.constant.float 9.9999997473787516E-6
    %int1_57109 = torch.constant.int 1
    %59609 = torch.aten.add.Scalar %59596, %float9.999990e-06_57108, %int1_57109 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59609, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57110 = torch.constant.float 9.9999997473787516E-6
    %int1_57111 = torch.constant.int 1
    %59610 = torch.aten.add.Scalar %59598, %float9.999990e-06_57110, %int1_57111 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59610, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57112 = torch.constant.float 9.9999997473787516E-6
    %int1_57113 = torch.constant.int 1
    %59611 = torch.aten.add.Scalar %59600, %float9.999990e-06_57112, %int1_57113 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59611, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57114 = torch.constant.float 9.9999997473787516E-6
    %int1_57115 = torch.constant.int 1
    %59612 = torch.aten.add.Scalar %59602, %float9.999990e-06_57114, %int1_57115 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57116 = torch.constant.float 9.9999997473787516E-6
    %int1_57117 = torch.constant.int 1
    %59613 = torch.aten.add.Scalar %59604, %float9.999990e-06_57116, %int1_57117 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57118 = torch.constant.float 9.9999997473787516E-6
    %int1_57119 = torch.constant.int 1
    %59614 = torch.aten.add.Scalar %59606, %float9.999990e-06_57118, %int1_57119 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59614, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59615 = torch.aten.rsqrt %59607 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59615, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59616 = torch.aten.rsqrt %59608 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59617 = torch.aten.rsqrt %59609 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59618 = torch.aten.rsqrt %59610 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59618, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59619 = torch.aten.rsqrt %59611 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59620 = torch.aten.rsqrt %59612 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59620, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59621 = torch.aten.rsqrt %59613 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59621, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59622 = torch.aten.rsqrt %59614 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %59622, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %59623 = torch.aten.mul.Tensor %59575, %59615 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59623, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59624 = torch.aten.mul.Tensor %59576, %59616 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59624, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59625 = torch.aten.mul.Tensor %59577, %59617 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59626 = torch.aten.mul.Tensor %59578, %59618 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59626, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59627 = torch.aten.mul.Tensor %59579, %59619 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59628 = torch.aten.mul.Tensor %59580, %59620 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59629 = torch.aten.mul.Tensor %59581, %59621 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59629, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59630 = torch.aten.mul.Tensor %59582, %59622 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59630, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59631 = torch.aten.mul.Tensor %2208, %59623 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59632 = torch.aten.mul.Tensor %2209, %59624 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59633 = torch.aten.mul.Tensor %2210, %59625 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59633, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59634 = torch.aten.mul.Tensor %2211, %59626 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59634, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59635 = torch.aten.mul.Tensor %2212, %59627 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59635, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59636 = torch.aten.mul.Tensor %2213, %59628 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59636, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59637 = torch.aten.mul.Tensor %2214, %59629 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %59638 = torch.aten.mul.Tensor %2215, %59630 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %59638, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_57120 = torch.constant.int 5
    %59639 = torch.prims.convert_element_type %59631, %int5_57120 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59639, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57121 = torch.constant.int 5
    %59640 = torch.prims.convert_element_type %59632, %int5_57121 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59640, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57122 = torch.constant.int 5
    %59641 = torch.prims.convert_element_type %59633, %int5_57122 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59641, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57123 = torch.constant.int 5
    %59642 = torch.prims.convert_element_type %59634, %int5_57123 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57124 = torch.constant.int 5
    %59643 = torch.prims.convert_element_type %59635, %int5_57124 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57125 = torch.constant.int 5
    %59644 = torch.prims.convert_element_type %59636, %int5_57125 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57126 = torch.constant.int 5
    %59645 = torch.prims.convert_element_type %59637, %int5_57126 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57127 = torch.constant.int 5
    %59646 = torch.prims.convert_element_type %59638, %int5_57127 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57128 = torch.constant.int 1
    %int0_57129 = torch.constant.int 0
    %59647 = torch.prim.ListConstruct %int1_57128, %int0_57129 : (!torch.int, !torch.int) -> !torch.list<int>
    %59648 = torch.aten.permute %2216, %59647 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57130 = torch.constant.int 1
    %int0_57131 = torch.constant.int 0
    %59649 = torch.prim.ListConstruct %int1_57130, %int0_57131 : (!torch.int, !torch.int) -> !torch.list<int>
    %59650 = torch.aten.permute %2217, %59649 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57132 = torch.constant.int 1
    %int0_57133 = torch.constant.int 0
    %59651 = torch.prim.ListConstruct %int1_57132, %int0_57133 : (!torch.int, !torch.int) -> !torch.list<int>
    %59652 = torch.aten.permute %2218, %59651 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57134 = torch.constant.int 1
    %int0_57135 = torch.constant.int 0
    %59653 = torch.prim.ListConstruct %int1_57134, %int0_57135 : (!torch.int, !torch.int) -> !torch.list<int>
    %59654 = torch.aten.permute %2219, %59653 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57136 = torch.constant.int 1
    %int0_57137 = torch.constant.int 0
    %59655 = torch.prim.ListConstruct %int1_57136, %int0_57137 : (!torch.int, !torch.int) -> !torch.list<int>
    %59656 = torch.aten.permute %2220, %59655 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57138 = torch.constant.int 1
    %int0_57139 = torch.constant.int 0
    %59657 = torch.prim.ListConstruct %int1_57138, %int0_57139 : (!torch.int, !torch.int) -> !torch.list<int>
    %59658 = torch.aten.permute %2221, %59657 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57140 = torch.constant.int 1
    %int0_57141 = torch.constant.int 0
    %59659 = torch.prim.ListConstruct %int1_57140, %int0_57141 : (!torch.int, !torch.int) -> !torch.list<int>
    %59660 = torch.aten.permute %2222, %59659 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57142 = torch.constant.int 1
    %int0_57143 = torch.constant.int 0
    %59661 = torch.prim.ListConstruct %int1_57142, %int0_57143 : (!torch.int, !torch.int) -> !torch.list<int>
    %59662 = torch.aten.permute %2223, %59661 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_57144 = torch.constant.int 4
    %59663 = torch.aten.mul.int %int4_57144, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57145 = torch.constant.int 4096
    %59664 = torch.prim.ListConstruct %59663, %int4096_57145 : (!torch.int, !torch.int) -> !torch.list<int>
    %59665 = torch.aten.view %59639, %59664 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59665, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59666 = torch.aten.mm %59665, %59648 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59666, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57146 = torch.constant.int 4
    %int1792_57147 = torch.constant.int 1792
    %59667 = torch.prim.ListConstruct %int4_57146, %2482, %int1792_57147 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59668 = torch.aten.view %59666, %59667 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59668, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57148 = torch.constant.int 4
    %59669 = torch.aten.mul.int %int4_57148, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57149 = torch.constant.int 4096
    %59670 = torch.prim.ListConstruct %59669, %int4096_57149 : (!torch.int, !torch.int) -> !torch.list<int>
    %59671 = torch.aten.view %59640, %59670 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59671, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59672 = torch.aten.mm %59671, %59650 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59672, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57150 = torch.constant.int 4
    %int1792_57151 = torch.constant.int 1792
    %59673 = torch.prim.ListConstruct %int4_57150, %2482, %int1792_57151 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59674 = torch.aten.view %59672, %59673 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57152 = torch.constant.int 4
    %59675 = torch.aten.mul.int %int4_57152, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57153 = torch.constant.int 4096
    %59676 = torch.prim.ListConstruct %59675, %int4096_57153 : (!torch.int, !torch.int) -> !torch.list<int>
    %59677 = torch.aten.view %59641, %59676 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59677, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59678 = torch.aten.mm %59677, %59652 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59678, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57154 = torch.constant.int 4
    %int1792_57155 = torch.constant.int 1792
    %59679 = torch.prim.ListConstruct %int4_57154, %2482, %int1792_57155 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59680 = torch.aten.view %59678, %59679 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59680, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57156 = torch.constant.int 4
    %59681 = torch.aten.mul.int %int4_57156, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57157 = torch.constant.int 4096
    %59682 = torch.prim.ListConstruct %59681, %int4096_57157 : (!torch.int, !torch.int) -> !torch.list<int>
    %59683 = torch.aten.view %59642, %59682 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59683, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59684 = torch.aten.mm %59683, %59654 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59684, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57158 = torch.constant.int 4
    %int1792_57159 = torch.constant.int 1792
    %59685 = torch.prim.ListConstruct %int4_57158, %2482, %int1792_57159 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59686 = torch.aten.view %59684, %59685 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59686, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57160 = torch.constant.int 4
    %59687 = torch.aten.mul.int %int4_57160, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57161 = torch.constant.int 4096
    %59688 = torch.prim.ListConstruct %59687, %int4096_57161 : (!torch.int, !torch.int) -> !torch.list<int>
    %59689 = torch.aten.view %59643, %59688 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59689, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59690 = torch.aten.mm %59689, %59656 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59690, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57162 = torch.constant.int 4
    %int1792_57163 = torch.constant.int 1792
    %59691 = torch.prim.ListConstruct %int4_57162, %2482, %int1792_57163 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59692 = torch.aten.view %59690, %59691 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57164 = torch.constant.int 4
    %59693 = torch.aten.mul.int %int4_57164, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57165 = torch.constant.int 4096
    %59694 = torch.prim.ListConstruct %59693, %int4096_57165 : (!torch.int, !torch.int) -> !torch.list<int>
    %59695 = torch.aten.view %59644, %59694 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59695, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59696 = torch.aten.mm %59695, %59658 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59696, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57166 = torch.constant.int 4
    %int1792_57167 = torch.constant.int 1792
    %59697 = torch.prim.ListConstruct %int4_57166, %2482, %int1792_57167 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59698 = torch.aten.view %59696, %59697 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59698, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57168 = torch.constant.int 4
    %59699 = torch.aten.mul.int %int4_57168, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57169 = torch.constant.int 4096
    %59700 = torch.prim.ListConstruct %59699, %int4096_57169 : (!torch.int, !torch.int) -> !torch.list<int>
    %59701 = torch.aten.view %59645, %59700 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59701, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59702 = torch.aten.mm %59701, %59660 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59702, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57170 = torch.constant.int 4
    %int1792_57171 = torch.constant.int 1792
    %59703 = torch.prim.ListConstruct %int4_57170, %2482, %int1792_57171 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59704 = torch.aten.view %59702, %59703 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59704, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57172 = torch.constant.int 4
    %59705 = torch.aten.mul.int %int4_57172, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57173 = torch.constant.int 4096
    %59706 = torch.prim.ListConstruct %59705, %int4096_57173 : (!torch.int, !torch.int) -> !torch.list<int>
    %59707 = torch.aten.view %59646, %59706 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59707, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59708 = torch.aten.mm %59707, %59662 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59708, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57174 = torch.constant.int 4
    %int1792_57175 = torch.constant.int 1792
    %59709 = torch.prim.ListConstruct %int4_57174, %2482, %int1792_57175 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59710 = torch.aten.view %59708, %59709 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59710, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59711 = torch.aten.silu %59668 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59711, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59712 = torch.aten.silu %59674 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59712, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59713 = torch.aten.silu %59680 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59713, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59714 = torch.aten.silu %59686 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59714, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59715 = torch.aten.silu %59692 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59715, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59716 = torch.aten.silu %59698 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59717 = torch.aten.silu %59704 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59718 = torch.aten.silu %59710 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_57176 = torch.constant.int 1
    %int0_57177 = torch.constant.int 0
    %59719 = torch.prim.ListConstruct %int1_57176, %int0_57177 : (!torch.int, !torch.int) -> !torch.list<int>
    %59720 = torch.aten.permute %2224, %59719 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57178 = torch.constant.int 1
    %int0_57179 = torch.constant.int 0
    %59721 = torch.prim.ListConstruct %int1_57178, %int0_57179 : (!torch.int, !torch.int) -> !torch.list<int>
    %59722 = torch.aten.permute %2225, %59721 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57180 = torch.constant.int 1
    %int0_57181 = torch.constant.int 0
    %59723 = torch.prim.ListConstruct %int1_57180, %int0_57181 : (!torch.int, !torch.int) -> !torch.list<int>
    %59724 = torch.aten.permute %2226, %59723 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57182 = torch.constant.int 1
    %int0_57183 = torch.constant.int 0
    %59725 = torch.prim.ListConstruct %int1_57182, %int0_57183 : (!torch.int, !torch.int) -> !torch.list<int>
    %59726 = torch.aten.permute %2227, %59725 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57184 = torch.constant.int 1
    %int0_57185 = torch.constant.int 0
    %59727 = torch.prim.ListConstruct %int1_57184, %int0_57185 : (!torch.int, !torch.int) -> !torch.list<int>
    %59728 = torch.aten.permute %2228, %59727 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57186 = torch.constant.int 1
    %int0_57187 = torch.constant.int 0
    %59729 = torch.prim.ListConstruct %int1_57186, %int0_57187 : (!torch.int, !torch.int) -> !torch.list<int>
    %59730 = torch.aten.permute %2229, %59729 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57188 = torch.constant.int 1
    %int0_57189 = torch.constant.int 0
    %59731 = torch.prim.ListConstruct %int1_57188, %int0_57189 : (!torch.int, !torch.int) -> !torch.list<int>
    %59732 = torch.aten.permute %2230, %59731 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_57190 = torch.constant.int 1
    %int0_57191 = torch.constant.int 0
    %59733 = torch.prim.ListConstruct %int1_57190, %int0_57191 : (!torch.int, !torch.int) -> !torch.list<int>
    %59734 = torch.aten.permute %2231, %59733 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_57192 = torch.constant.int 4
    %59735 = torch.aten.mul.int %int4_57192, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57193 = torch.constant.int 4096
    %59736 = torch.prim.ListConstruct %59735, %int4096_57193 : (!torch.int, !torch.int) -> !torch.list<int>
    %59737 = torch.aten.view %59639, %59736 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59737, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59738 = torch.aten.mm %59737, %59720 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59738, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57194 = torch.constant.int 4
    %int1792_57195 = torch.constant.int 1792
    %59739 = torch.prim.ListConstruct %int4_57194, %2482, %int1792_57195 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59740 = torch.aten.view %59738, %59739 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59740, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57196 = torch.constant.int 4
    %59741 = torch.aten.mul.int %int4_57196, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57197 = torch.constant.int 4096
    %59742 = torch.prim.ListConstruct %59741, %int4096_57197 : (!torch.int, !torch.int) -> !torch.list<int>
    %59743 = torch.aten.view %59640, %59742 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59743, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59744 = torch.aten.mm %59743, %59722 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59744, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57198 = torch.constant.int 4
    %int1792_57199 = torch.constant.int 1792
    %59745 = torch.prim.ListConstruct %int4_57198, %2482, %int1792_57199 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59746 = torch.aten.view %59744, %59745 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57200 = torch.constant.int 4
    %59747 = torch.aten.mul.int %int4_57200, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57201 = torch.constant.int 4096
    %59748 = torch.prim.ListConstruct %59747, %int4096_57201 : (!torch.int, !torch.int) -> !torch.list<int>
    %59749 = torch.aten.view %59641, %59748 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59749, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59750 = torch.aten.mm %59749, %59724 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59750, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57202 = torch.constant.int 4
    %int1792_57203 = torch.constant.int 1792
    %59751 = torch.prim.ListConstruct %int4_57202, %2482, %int1792_57203 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59752 = torch.aten.view %59750, %59751 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59752, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57204 = torch.constant.int 4
    %59753 = torch.aten.mul.int %int4_57204, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57205 = torch.constant.int 4096
    %59754 = torch.prim.ListConstruct %59753, %int4096_57205 : (!torch.int, !torch.int) -> !torch.list<int>
    %59755 = torch.aten.view %59642, %59754 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59755, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59756 = torch.aten.mm %59755, %59726 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59756, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57206 = torch.constant.int 4
    %int1792_57207 = torch.constant.int 1792
    %59757 = torch.prim.ListConstruct %int4_57206, %2482, %int1792_57207 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59758 = torch.aten.view %59756, %59757 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59758, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57208 = torch.constant.int 4
    %59759 = torch.aten.mul.int %int4_57208, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57209 = torch.constant.int 4096
    %59760 = torch.prim.ListConstruct %59759, %int4096_57209 : (!torch.int, !torch.int) -> !torch.list<int>
    %59761 = torch.aten.view %59643, %59760 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59761, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59762 = torch.aten.mm %59761, %59728 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59762, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57210 = torch.constant.int 4
    %int1792_57211 = torch.constant.int 1792
    %59763 = torch.prim.ListConstruct %int4_57210, %2482, %int1792_57211 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59764 = torch.aten.view %59762, %59763 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59764, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57212 = torch.constant.int 4
    %59765 = torch.aten.mul.int %int4_57212, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57213 = torch.constant.int 4096
    %59766 = torch.prim.ListConstruct %59765, %int4096_57213 : (!torch.int, !torch.int) -> !torch.list<int>
    %59767 = torch.aten.view %59644, %59766 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59767, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59768 = torch.aten.mm %59767, %59730 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59768, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57214 = torch.constant.int 4
    %int1792_57215 = torch.constant.int 1792
    %59769 = torch.prim.ListConstruct %int4_57214, %2482, %int1792_57215 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59770 = torch.aten.view %59768, %59769 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59770, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57216 = torch.constant.int 4
    %59771 = torch.aten.mul.int %int4_57216, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57217 = torch.constant.int 4096
    %59772 = torch.prim.ListConstruct %59771, %int4096_57217 : (!torch.int, !torch.int) -> !torch.list<int>
    %59773 = torch.aten.view %59645, %59772 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59773, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59774 = torch.aten.mm %59773, %59732 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59774, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57218 = torch.constant.int 4
    %int1792_57219 = torch.constant.int 1792
    %59775 = torch.prim.ListConstruct %int4_57218, %2482, %int1792_57219 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59776 = torch.aten.view %59774, %59775 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_57220 = torch.constant.int 4
    %59777 = torch.aten.mul.int %int4_57220, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57221 = torch.constant.int 4096
    %59778 = torch.prim.ListConstruct %59777, %int4096_57221 : (!torch.int, !torch.int) -> !torch.list<int>
    %59779 = torch.aten.view %59646, %59778 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59779, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %59780 = torch.aten.mm %59779, %59734 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59780, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_57222 = torch.constant.int 4
    %int1792_57223 = torch.constant.int 1792
    %59781 = torch.prim.ListConstruct %int4_57222, %2482, %int1792_57223 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59782 = torch.aten.view %59780, %59781 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59783 = torch.aten.mul.Tensor %59711, %59740 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59783, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59784 = torch.aten.mul.Tensor %59712, %59746 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59784, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59785 = torch.aten.mul.Tensor %59713, %59752 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59786 = torch.aten.mul.Tensor %59714, %59758 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59786, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59787 = torch.aten.mul.Tensor %59715, %59764 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59787, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59788 = torch.aten.mul.Tensor %59716, %59770 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59789 = torch.aten.mul.Tensor %59717, %59776 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59789, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %59790 = torch.aten.mul.Tensor %59718, %59782 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %59790, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_57224 = torch.constant.int 1
    %int0_57225 = torch.constant.int 0
    %59791 = torch.prim.ListConstruct %int1_57224, %int0_57225 : (!torch.int, !torch.int) -> !torch.list<int>
    %59792 = torch.aten.permute %2232, %59791 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57226 = torch.constant.int 1
    %int0_57227 = torch.constant.int 0
    %59793 = torch.prim.ListConstruct %int1_57226, %int0_57227 : (!torch.int, !torch.int) -> !torch.list<int>
    %59794 = torch.aten.permute %2233, %59793 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57228 = torch.constant.int 1
    %int0_57229 = torch.constant.int 0
    %59795 = torch.prim.ListConstruct %int1_57228, %int0_57229 : (!torch.int, !torch.int) -> !torch.list<int>
    %59796 = torch.aten.permute %2234, %59795 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57230 = torch.constant.int 1
    %int0_57231 = torch.constant.int 0
    %59797 = torch.prim.ListConstruct %int1_57230, %int0_57231 : (!torch.int, !torch.int) -> !torch.list<int>
    %59798 = torch.aten.permute %2235, %59797 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57232 = torch.constant.int 1
    %int0_57233 = torch.constant.int 0
    %59799 = torch.prim.ListConstruct %int1_57232, %int0_57233 : (!torch.int, !torch.int) -> !torch.list<int>
    %59800 = torch.aten.permute %2236, %59799 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57234 = torch.constant.int 1
    %int0_57235 = torch.constant.int 0
    %59801 = torch.prim.ListConstruct %int1_57234, %int0_57235 : (!torch.int, !torch.int) -> !torch.list<int>
    %59802 = torch.aten.permute %2237, %59801 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57236 = torch.constant.int 1
    %int0_57237 = torch.constant.int 0
    %59803 = torch.prim.ListConstruct %int1_57236, %int0_57237 : (!torch.int, !torch.int) -> !torch.list<int>
    %59804 = torch.aten.permute %2238, %59803 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57238 = torch.constant.int 1
    %int0_57239 = torch.constant.int 0
    %59805 = torch.prim.ListConstruct %int1_57238, %int0_57239 : (!torch.int, !torch.int) -> !torch.list<int>
    %59806 = torch.aten.permute %2239, %59805 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_57240 = torch.constant.int 1
    %59807 = torch.aten.size.int %59668, %int1_57240 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57241 = torch.constant.int 4
    %59808 = torch.aten.mul.int %int4_57241, %59807 : !torch.int, !torch.int -> !torch.int
    %int1792_57242 = torch.constant.int 1792
    %59809 = torch.prim.ListConstruct %59808, %int1792_57242 : (!torch.int, !torch.int) -> !torch.list<int>
    %59810 = torch.aten.view %59783, %59809 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59810, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59811 = torch.aten.mm %59810, %59792 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59811, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57243 = torch.constant.int 4
    %int4096_57244 = torch.constant.int 4096
    %59812 = torch.prim.ListConstruct %int4_57243, %59807, %int4096_57244 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59813 = torch.aten.view %59811, %59812 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57245 = torch.constant.int 1
    %59814 = torch.aten.size.int %59674, %int1_57245 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57246 = torch.constant.int 4
    %59815 = torch.aten.mul.int %int4_57246, %59814 : !torch.int, !torch.int -> !torch.int
    %int1792_57247 = torch.constant.int 1792
    %59816 = torch.prim.ListConstruct %59815, %int1792_57247 : (!torch.int, !torch.int) -> !torch.list<int>
    %59817 = torch.aten.view %59784, %59816 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59817, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59818 = torch.aten.mm %59817, %59794 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59818, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57248 = torch.constant.int 4
    %int4096_57249 = torch.constant.int 4096
    %59819 = torch.prim.ListConstruct %int4_57248, %59814, %int4096_57249 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59820 = torch.aten.view %59818, %59819 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59820, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57250 = torch.constant.int 1
    %59821 = torch.aten.size.int %59680, %int1_57250 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57251 = torch.constant.int 4
    %59822 = torch.aten.mul.int %int4_57251, %59821 : !torch.int, !torch.int -> !torch.int
    %int1792_57252 = torch.constant.int 1792
    %59823 = torch.prim.ListConstruct %59822, %int1792_57252 : (!torch.int, !torch.int) -> !torch.list<int>
    %59824 = torch.aten.view %59785, %59823 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59824, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59825 = torch.aten.mm %59824, %59796 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59825, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57253 = torch.constant.int 4
    %int4096_57254 = torch.constant.int 4096
    %59826 = torch.prim.ListConstruct %int4_57253, %59821, %int4096_57254 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59827 = torch.aten.view %59825, %59826 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59827, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57255 = torch.constant.int 1
    %59828 = torch.aten.size.int %59686, %int1_57255 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57256 = torch.constant.int 4
    %59829 = torch.aten.mul.int %int4_57256, %59828 : !torch.int, !torch.int -> !torch.int
    %int1792_57257 = torch.constant.int 1792
    %59830 = torch.prim.ListConstruct %59829, %int1792_57257 : (!torch.int, !torch.int) -> !torch.list<int>
    %59831 = torch.aten.view %59786, %59830 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59831, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59832 = torch.aten.mm %59831, %59798 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59832, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57258 = torch.constant.int 4
    %int4096_57259 = torch.constant.int 4096
    %59833 = torch.prim.ListConstruct %int4_57258, %59828, %int4096_57259 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59834 = torch.aten.view %59832, %59833 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57260 = torch.constant.int 1
    %59835 = torch.aten.size.int %59692, %int1_57260 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57261 = torch.constant.int 4
    %59836 = torch.aten.mul.int %int4_57261, %59835 : !torch.int, !torch.int -> !torch.int
    %int1792_57262 = torch.constant.int 1792
    %59837 = torch.prim.ListConstruct %59836, %int1792_57262 : (!torch.int, !torch.int) -> !torch.list<int>
    %59838 = torch.aten.view %59787, %59837 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59838, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59839 = torch.aten.mm %59838, %59800 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59839, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57263 = torch.constant.int 4
    %int4096_57264 = torch.constant.int 4096
    %59840 = torch.prim.ListConstruct %int4_57263, %59835, %int4096_57264 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59841 = torch.aten.view %59839, %59840 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57265 = torch.constant.int 1
    %59842 = torch.aten.size.int %59698, %int1_57265 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57266 = torch.constant.int 4
    %59843 = torch.aten.mul.int %int4_57266, %59842 : !torch.int, !torch.int -> !torch.int
    %int1792_57267 = torch.constant.int 1792
    %59844 = torch.prim.ListConstruct %59843, %int1792_57267 : (!torch.int, !torch.int) -> !torch.list<int>
    %59845 = torch.aten.view %59788, %59844 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59845, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59846 = torch.aten.mm %59845, %59802 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59846, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57268 = torch.constant.int 4
    %int4096_57269 = torch.constant.int 4096
    %59847 = torch.prim.ListConstruct %int4_57268, %59842, %int4096_57269 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59848 = torch.aten.view %59846, %59847 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59848, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57270 = torch.constant.int 1
    %59849 = torch.aten.size.int %59704, %int1_57270 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57271 = torch.constant.int 4
    %59850 = torch.aten.mul.int %int4_57271, %59849 : !torch.int, !torch.int -> !torch.int
    %int1792_57272 = torch.constant.int 1792
    %59851 = torch.prim.ListConstruct %59850, %int1792_57272 : (!torch.int, !torch.int) -> !torch.list<int>
    %59852 = torch.aten.view %59789, %59851 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59852, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59853 = torch.aten.mm %59852, %59804 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59853, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57273 = torch.constant.int 4
    %int4096_57274 = torch.constant.int 4096
    %59854 = torch.prim.ListConstruct %int4_57273, %59849, %int4096_57274 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59855 = torch.aten.view %59853, %59854 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59855, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57275 = torch.constant.int 1
    %59856 = torch.aten.size.int %59710, %int1_57275 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_57276 = torch.constant.int 4
    %59857 = torch.aten.mul.int %int4_57276, %59856 : !torch.int, !torch.int -> !torch.int
    %int1792_57277 = torch.constant.int 1792
    %59858 = torch.prim.ListConstruct %59857, %int1792_57277 : (!torch.int, !torch.int) -> !torch.list<int>
    %59859 = torch.aten.view %59790, %59858 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %59859, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %59860 = torch.aten.mm %59859, %59806 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %59860, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_57278 = torch.constant.int 4
    %int4096_57279 = torch.constant.int 4096
    %59861 = torch.prim.ListConstruct %int4_57278, %59856, %int4096_57279 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %59862 = torch.aten.view %59860, %59861 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59863 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57280 = arith.constant 1 : index
    %dim_57281 = tensor.dim %59863, %c1_57280 : tensor<4x?x4096xf16>
    %59864 = flow.tensor.transfer %59863 : tensor<4x?x4096xf16>{%dim_57281} to #hal.device.promise<@__device_0>
    %59865 = torch_c.from_builtin_tensor %59864 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59865, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59866 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57282 = arith.constant 1 : index
    %dim_57283 = tensor.dim %59866, %c1_57282 : tensor<4x?x4096xf16>
    %59867 = flow.tensor.transfer %59866 : tensor<4x?x4096xf16>{%dim_57283} to #hal.device.promise<@__device_0>
    %59868 = torch_c.from_builtin_tensor %59867 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59868, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59869 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57284 = arith.constant 1 : index
    %dim_57285 = tensor.dim %59869, %c1_57284 : tensor<4x?x4096xf16>
    %59870 = flow.tensor.transfer %59869 : tensor<4x?x4096xf16>{%dim_57285} to #hal.device.promise<@__device_0>
    %59871 = torch_c.from_builtin_tensor %59870 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59871, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59872 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57286 = arith.constant 1 : index
    %dim_57287 = tensor.dim %59872, %c1_57286 : tensor<4x?x4096xf16>
    %59873 = flow.tensor.transfer %59872 : tensor<4x?x4096xf16>{%dim_57287} to #hal.device.promise<@__device_0>
    %59874 = torch_c.from_builtin_tensor %59873 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59874, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59875 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57288 = arith.constant 1 : index
    %dim_57289 = tensor.dim %59875, %c1_57288 : tensor<4x?x4096xf16>
    %59876 = flow.tensor.transfer %59875 : tensor<4x?x4096xf16>{%dim_57289} to #hal.device.promise<@__device_0>
    %59877 = torch_c.from_builtin_tensor %59876 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59877, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59878 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57290 = arith.constant 1 : index
    %dim_57291 = tensor.dim %59878, %c1_57290 : tensor<4x?x4096xf16>
    %59879 = flow.tensor.transfer %59878 : tensor<4x?x4096xf16>{%dim_57291} to #hal.device.promise<@__device_0>
    %59880 = torch_c.from_builtin_tensor %59879 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59880, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59881 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57292 = arith.constant 1 : index
    %dim_57293 = tensor.dim %59881, %c1_57292 : tensor<4x?x4096xf16>
    %59882 = flow.tensor.transfer %59881 : tensor<4x?x4096xf16>{%dim_57293} to #hal.device.promise<@__device_0>
    %59883 = torch_c.from_builtin_tensor %59882 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59883, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57294 = torch.constant.int 1
    %59884 = torch.aten.add.Tensor %59813, %59865, %int1_57294 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57295 = torch.constant.int 1
    %59885 = torch.aten.add.Tensor %59884, %59868, %int1_57295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57296 = torch.constant.int 1
    %59886 = torch.aten.add.Tensor %59885, %59871, %int1_57296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57297 = torch.constant.int 1
    %59887 = torch.aten.add.Tensor %59886, %59874, %int1_57297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57298 = torch.constant.int 1
    %59888 = torch.aten.add.Tensor %59887, %59877, %int1_57298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57299 = torch.constant.int 1
    %59889 = torch.aten.add.Tensor %59888, %59880, %int1_57299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57300 = torch.constant.int 1
    %59890 = torch.aten.add.Tensor %59889, %59883, %int1_57300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59891 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57301 = arith.constant 1 : index
    %dim_57302 = tensor.dim %59891, %c1_57301 : tensor<4x?x4096xf16>
    %59892 = flow.tensor.transfer %59891 : tensor<4x?x4096xf16>{%dim_57302} to #hal.device.promise<@__device_1>
    %59893 = torch_c.from_builtin_tensor %59892 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59893, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59894 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57303 = arith.constant 1 : index
    %dim_57304 = tensor.dim %59894, %c1_57303 : tensor<4x?x4096xf16>
    %59895 = flow.tensor.transfer %59894 : tensor<4x?x4096xf16>{%dim_57304} to #hal.device.promise<@__device_1>
    %59896 = torch_c.from_builtin_tensor %59895 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59896, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59897 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57305 = arith.constant 1 : index
    %dim_57306 = tensor.dim %59897, %c1_57305 : tensor<4x?x4096xf16>
    %59898 = flow.tensor.transfer %59897 : tensor<4x?x4096xf16>{%dim_57306} to #hal.device.promise<@__device_1>
    %59899 = torch_c.from_builtin_tensor %59898 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59899, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59900 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57307 = arith.constant 1 : index
    %dim_57308 = tensor.dim %59900, %c1_57307 : tensor<4x?x4096xf16>
    %59901 = flow.tensor.transfer %59900 : tensor<4x?x4096xf16>{%dim_57308} to #hal.device.promise<@__device_1>
    %59902 = torch_c.from_builtin_tensor %59901 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59902, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59903 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57309 = arith.constant 1 : index
    %dim_57310 = tensor.dim %59903, %c1_57309 : tensor<4x?x4096xf16>
    %59904 = flow.tensor.transfer %59903 : tensor<4x?x4096xf16>{%dim_57310} to #hal.device.promise<@__device_1>
    %59905 = torch_c.from_builtin_tensor %59904 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59905, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59906 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57311 = arith.constant 1 : index
    %dim_57312 = tensor.dim %59906, %c1_57311 : tensor<4x?x4096xf16>
    %59907 = flow.tensor.transfer %59906 : tensor<4x?x4096xf16>{%dim_57312} to #hal.device.promise<@__device_1>
    %59908 = torch_c.from_builtin_tensor %59907 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59908, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59909 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57313 = arith.constant 1 : index
    %dim_57314 = tensor.dim %59909, %c1_57313 : tensor<4x?x4096xf16>
    %59910 = flow.tensor.transfer %59909 : tensor<4x?x4096xf16>{%dim_57314} to #hal.device.promise<@__device_1>
    %59911 = torch_c.from_builtin_tensor %59910 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59911, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57315 = torch.constant.int 1
    %59912 = torch.aten.add.Tensor %59893, %59820, %int1_57315 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57316 = torch.constant.int 1
    %59913 = torch.aten.add.Tensor %59912, %59896, %int1_57316 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57317 = torch.constant.int 1
    %59914 = torch.aten.add.Tensor %59913, %59899, %int1_57317 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57318 = torch.constant.int 1
    %59915 = torch.aten.add.Tensor %59914, %59902, %int1_57318 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57319 = torch.constant.int 1
    %59916 = torch.aten.add.Tensor %59915, %59905, %int1_57319 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57320 = torch.constant.int 1
    %59917 = torch.aten.add.Tensor %59916, %59908, %int1_57320 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57321 = torch.constant.int 1
    %59918 = torch.aten.add.Tensor %59917, %59911, %int1_57321 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59919 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57322 = arith.constant 1 : index
    %dim_57323 = tensor.dim %59919, %c1_57322 : tensor<4x?x4096xf16>
    %59920 = flow.tensor.transfer %59919 : tensor<4x?x4096xf16>{%dim_57323} to #hal.device.promise<@__device_2>
    %59921 = torch_c.from_builtin_tensor %59920 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59921, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59922 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57324 = arith.constant 1 : index
    %dim_57325 = tensor.dim %59922, %c1_57324 : tensor<4x?x4096xf16>
    %59923 = flow.tensor.transfer %59922 : tensor<4x?x4096xf16>{%dim_57325} to #hal.device.promise<@__device_2>
    %59924 = torch_c.from_builtin_tensor %59923 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59924, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59925 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57326 = arith.constant 1 : index
    %dim_57327 = tensor.dim %59925, %c1_57326 : tensor<4x?x4096xf16>
    %59926 = flow.tensor.transfer %59925 : tensor<4x?x4096xf16>{%dim_57327} to #hal.device.promise<@__device_2>
    %59927 = torch_c.from_builtin_tensor %59926 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59927, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59928 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57328 = arith.constant 1 : index
    %dim_57329 = tensor.dim %59928, %c1_57328 : tensor<4x?x4096xf16>
    %59929 = flow.tensor.transfer %59928 : tensor<4x?x4096xf16>{%dim_57329} to #hal.device.promise<@__device_2>
    %59930 = torch_c.from_builtin_tensor %59929 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59930, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59931 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57330 = arith.constant 1 : index
    %dim_57331 = tensor.dim %59931, %c1_57330 : tensor<4x?x4096xf16>
    %59932 = flow.tensor.transfer %59931 : tensor<4x?x4096xf16>{%dim_57331} to #hal.device.promise<@__device_2>
    %59933 = torch_c.from_builtin_tensor %59932 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59933, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59934 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57332 = arith.constant 1 : index
    %dim_57333 = tensor.dim %59934, %c1_57332 : tensor<4x?x4096xf16>
    %59935 = flow.tensor.transfer %59934 : tensor<4x?x4096xf16>{%dim_57333} to #hal.device.promise<@__device_2>
    %59936 = torch_c.from_builtin_tensor %59935 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59936, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59937 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57334 = arith.constant 1 : index
    %dim_57335 = tensor.dim %59937, %c1_57334 : tensor<4x?x4096xf16>
    %59938 = flow.tensor.transfer %59937 : tensor<4x?x4096xf16>{%dim_57335} to #hal.device.promise<@__device_2>
    %59939 = torch_c.from_builtin_tensor %59938 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59939, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57336 = torch.constant.int 1
    %59940 = torch.aten.add.Tensor %59921, %59924, %int1_57336 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57337 = torch.constant.int 1
    %59941 = torch.aten.add.Tensor %59940, %59827, %int1_57337 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57338 = torch.constant.int 1
    %59942 = torch.aten.add.Tensor %59941, %59927, %int1_57338 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57339 = torch.constant.int 1
    %59943 = torch.aten.add.Tensor %59942, %59930, %int1_57339 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57340 = torch.constant.int 1
    %59944 = torch.aten.add.Tensor %59943, %59933, %int1_57340 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57341 = torch.constant.int 1
    %59945 = torch.aten.add.Tensor %59944, %59936, %int1_57341 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57342 = torch.constant.int 1
    %59946 = torch.aten.add.Tensor %59945, %59939, %int1_57342 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59947 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57343 = arith.constant 1 : index
    %dim_57344 = tensor.dim %59947, %c1_57343 : tensor<4x?x4096xf16>
    %59948 = flow.tensor.transfer %59947 : tensor<4x?x4096xf16>{%dim_57344} to #hal.device.promise<@__device_3>
    %59949 = torch_c.from_builtin_tensor %59948 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59950 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57345 = arith.constant 1 : index
    %dim_57346 = tensor.dim %59950, %c1_57345 : tensor<4x?x4096xf16>
    %59951 = flow.tensor.transfer %59950 : tensor<4x?x4096xf16>{%dim_57346} to #hal.device.promise<@__device_3>
    %59952 = torch_c.from_builtin_tensor %59951 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59953 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57347 = arith.constant 1 : index
    %dim_57348 = tensor.dim %59953, %c1_57347 : tensor<4x?x4096xf16>
    %59954 = flow.tensor.transfer %59953 : tensor<4x?x4096xf16>{%dim_57348} to #hal.device.promise<@__device_3>
    %59955 = torch_c.from_builtin_tensor %59954 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59956 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57349 = arith.constant 1 : index
    %dim_57350 = tensor.dim %59956, %c1_57349 : tensor<4x?x4096xf16>
    %59957 = flow.tensor.transfer %59956 : tensor<4x?x4096xf16>{%dim_57350} to #hal.device.promise<@__device_3>
    %59958 = torch_c.from_builtin_tensor %59957 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59959 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57351 = arith.constant 1 : index
    %dim_57352 = tensor.dim %59959, %c1_57351 : tensor<4x?x4096xf16>
    %59960 = flow.tensor.transfer %59959 : tensor<4x?x4096xf16>{%dim_57352} to #hal.device.promise<@__device_3>
    %59961 = torch_c.from_builtin_tensor %59960 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59962 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57353 = arith.constant 1 : index
    %dim_57354 = tensor.dim %59962, %c1_57353 : tensor<4x?x4096xf16>
    %59963 = flow.tensor.transfer %59962 : tensor<4x?x4096xf16>{%dim_57354} to #hal.device.promise<@__device_3>
    %59964 = torch_c.from_builtin_tensor %59963 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59965 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57355 = arith.constant 1 : index
    %dim_57356 = tensor.dim %59965, %c1_57355 : tensor<4x?x4096xf16>
    %59966 = flow.tensor.transfer %59965 : tensor<4x?x4096xf16>{%dim_57356} to #hal.device.promise<@__device_3>
    %59967 = torch_c.from_builtin_tensor %59966 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57357 = torch.constant.int 1
    %59968 = torch.aten.add.Tensor %59949, %59952, %int1_57357 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57358 = torch.constant.int 1
    %59969 = torch.aten.add.Tensor %59968, %59955, %int1_57358 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57359 = torch.constant.int 1
    %59970 = torch.aten.add.Tensor %59969, %59834, %int1_57359 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57360 = torch.constant.int 1
    %59971 = torch.aten.add.Tensor %59970, %59958, %int1_57360 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57361 = torch.constant.int 1
    %59972 = torch.aten.add.Tensor %59971, %59961, %int1_57361 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59972, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57362 = torch.constant.int 1
    %59973 = torch.aten.add.Tensor %59972, %59964, %int1_57362 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57363 = torch.constant.int 1
    %59974 = torch.aten.add.Tensor %59973, %59967, %int1_57363 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59974, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59975 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57364 = arith.constant 1 : index
    %dim_57365 = tensor.dim %59975, %c1_57364 : tensor<4x?x4096xf16>
    %59976 = flow.tensor.transfer %59975 : tensor<4x?x4096xf16>{%dim_57365} to #hal.device.promise<@__device_4>
    %59977 = torch_c.from_builtin_tensor %59976 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59978 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57366 = arith.constant 1 : index
    %dim_57367 = tensor.dim %59978, %c1_57366 : tensor<4x?x4096xf16>
    %59979 = flow.tensor.transfer %59978 : tensor<4x?x4096xf16>{%dim_57367} to #hal.device.promise<@__device_4>
    %59980 = torch_c.from_builtin_tensor %59979 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59980, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59981 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57368 = arith.constant 1 : index
    %dim_57369 = tensor.dim %59981, %c1_57368 : tensor<4x?x4096xf16>
    %59982 = flow.tensor.transfer %59981 : tensor<4x?x4096xf16>{%dim_57369} to #hal.device.promise<@__device_4>
    %59983 = torch_c.from_builtin_tensor %59982 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59984 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57370 = arith.constant 1 : index
    %dim_57371 = tensor.dim %59984, %c1_57370 : tensor<4x?x4096xf16>
    %59985 = flow.tensor.transfer %59984 : tensor<4x?x4096xf16>{%dim_57371} to #hal.device.promise<@__device_4>
    %59986 = torch_c.from_builtin_tensor %59985 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59986, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59987 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57372 = arith.constant 1 : index
    %dim_57373 = tensor.dim %59987, %c1_57372 : tensor<4x?x4096xf16>
    %59988 = flow.tensor.transfer %59987 : tensor<4x?x4096xf16>{%dim_57373} to #hal.device.promise<@__device_4>
    %59989 = torch_c.from_builtin_tensor %59988 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59990 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57374 = arith.constant 1 : index
    %dim_57375 = tensor.dim %59990, %c1_57374 : tensor<4x?x4096xf16>
    %59991 = flow.tensor.transfer %59990 : tensor<4x?x4096xf16>{%dim_57375} to #hal.device.promise<@__device_4>
    %59992 = torch_c.from_builtin_tensor %59991 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %59993 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57376 = arith.constant 1 : index
    %dim_57377 = tensor.dim %59993, %c1_57376 : tensor<4x?x4096xf16>
    %59994 = flow.tensor.transfer %59993 : tensor<4x?x4096xf16>{%dim_57377} to #hal.device.promise<@__device_4>
    %59995 = torch_c.from_builtin_tensor %59994 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57378 = torch.constant.int 1
    %59996 = torch.aten.add.Tensor %59977, %59980, %int1_57378 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57379 = torch.constant.int 1
    %59997 = torch.aten.add.Tensor %59996, %59983, %int1_57379 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57380 = torch.constant.int 1
    %59998 = torch.aten.add.Tensor %59997, %59986, %int1_57380 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57381 = torch.constant.int 1
    %59999 = torch.aten.add.Tensor %59998, %59841, %int1_57381 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %59999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57382 = torch.constant.int 1
    %60000 = torch.aten.add.Tensor %59999, %59989, %int1_57382 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57383 = torch.constant.int 1
    %60001 = torch.aten.add.Tensor %60000, %59992, %int1_57383 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57384 = torch.constant.int 1
    %60002 = torch.aten.add.Tensor %60001, %59995, %int1_57384 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60003 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57385 = arith.constant 1 : index
    %dim_57386 = tensor.dim %60003, %c1_57385 : tensor<4x?x4096xf16>
    %60004 = flow.tensor.transfer %60003 : tensor<4x?x4096xf16>{%dim_57386} to #hal.device.promise<@__device_5>
    %60005 = torch_c.from_builtin_tensor %60004 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60006 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57387 = arith.constant 1 : index
    %dim_57388 = tensor.dim %60006, %c1_57387 : tensor<4x?x4096xf16>
    %60007 = flow.tensor.transfer %60006 : tensor<4x?x4096xf16>{%dim_57388} to #hal.device.promise<@__device_5>
    %60008 = torch_c.from_builtin_tensor %60007 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60009 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57389 = arith.constant 1 : index
    %dim_57390 = tensor.dim %60009, %c1_57389 : tensor<4x?x4096xf16>
    %60010 = flow.tensor.transfer %60009 : tensor<4x?x4096xf16>{%dim_57390} to #hal.device.promise<@__device_5>
    %60011 = torch_c.from_builtin_tensor %60010 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60012 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57391 = arith.constant 1 : index
    %dim_57392 = tensor.dim %60012, %c1_57391 : tensor<4x?x4096xf16>
    %60013 = flow.tensor.transfer %60012 : tensor<4x?x4096xf16>{%dim_57392} to #hal.device.promise<@__device_5>
    %60014 = torch_c.from_builtin_tensor %60013 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60015 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57393 = arith.constant 1 : index
    %dim_57394 = tensor.dim %60015, %c1_57393 : tensor<4x?x4096xf16>
    %60016 = flow.tensor.transfer %60015 : tensor<4x?x4096xf16>{%dim_57394} to #hal.device.promise<@__device_5>
    %60017 = torch_c.from_builtin_tensor %60016 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60018 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57395 = arith.constant 1 : index
    %dim_57396 = tensor.dim %60018, %c1_57395 : tensor<4x?x4096xf16>
    %60019 = flow.tensor.transfer %60018 : tensor<4x?x4096xf16>{%dim_57396} to #hal.device.promise<@__device_5>
    %60020 = torch_c.from_builtin_tensor %60019 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60021 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57397 = arith.constant 1 : index
    %dim_57398 = tensor.dim %60021, %c1_57397 : tensor<4x?x4096xf16>
    %60022 = flow.tensor.transfer %60021 : tensor<4x?x4096xf16>{%dim_57398} to #hal.device.promise<@__device_5>
    %60023 = torch_c.from_builtin_tensor %60022 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57399 = torch.constant.int 1
    %60024 = torch.aten.add.Tensor %60005, %60008, %int1_57399 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57400 = torch.constant.int 1
    %60025 = torch.aten.add.Tensor %60024, %60011, %int1_57400 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57401 = torch.constant.int 1
    %60026 = torch.aten.add.Tensor %60025, %60014, %int1_57401 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57402 = torch.constant.int 1
    %60027 = torch.aten.add.Tensor %60026, %60017, %int1_57402 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57403 = torch.constant.int 1
    %60028 = torch.aten.add.Tensor %60027, %59848, %int1_57403 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57404 = torch.constant.int 1
    %60029 = torch.aten.add.Tensor %60028, %60020, %int1_57404 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60029, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57405 = torch.constant.int 1
    %60030 = torch.aten.add.Tensor %60029, %60023, %int1_57405 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60031 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57406 = arith.constant 1 : index
    %dim_57407 = tensor.dim %60031, %c1_57406 : tensor<4x?x4096xf16>
    %60032 = flow.tensor.transfer %60031 : tensor<4x?x4096xf16>{%dim_57407} to #hal.device.promise<@__device_6>
    %60033 = torch_c.from_builtin_tensor %60032 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60033, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60034 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57408 = arith.constant 1 : index
    %dim_57409 = tensor.dim %60034, %c1_57408 : tensor<4x?x4096xf16>
    %60035 = flow.tensor.transfer %60034 : tensor<4x?x4096xf16>{%dim_57409} to #hal.device.promise<@__device_6>
    %60036 = torch_c.from_builtin_tensor %60035 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60037 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57410 = arith.constant 1 : index
    %dim_57411 = tensor.dim %60037, %c1_57410 : tensor<4x?x4096xf16>
    %60038 = flow.tensor.transfer %60037 : tensor<4x?x4096xf16>{%dim_57411} to #hal.device.promise<@__device_6>
    %60039 = torch_c.from_builtin_tensor %60038 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60039, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60040 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57412 = arith.constant 1 : index
    %dim_57413 = tensor.dim %60040, %c1_57412 : tensor<4x?x4096xf16>
    %60041 = flow.tensor.transfer %60040 : tensor<4x?x4096xf16>{%dim_57413} to #hal.device.promise<@__device_6>
    %60042 = torch_c.from_builtin_tensor %60041 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60043 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57414 = arith.constant 1 : index
    %dim_57415 = tensor.dim %60043, %c1_57414 : tensor<4x?x4096xf16>
    %60044 = flow.tensor.transfer %60043 : tensor<4x?x4096xf16>{%dim_57415} to #hal.device.promise<@__device_6>
    %60045 = torch_c.from_builtin_tensor %60044 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60046 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57416 = arith.constant 1 : index
    %dim_57417 = tensor.dim %60046, %c1_57416 : tensor<4x?x4096xf16>
    %60047 = flow.tensor.transfer %60046 : tensor<4x?x4096xf16>{%dim_57417} to #hal.device.promise<@__device_6>
    %60048 = torch_c.from_builtin_tensor %60047 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60049 = torch_c.to_builtin_tensor %59862 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57418 = arith.constant 1 : index
    %dim_57419 = tensor.dim %60049, %c1_57418 : tensor<4x?x4096xf16>
    %60050 = flow.tensor.transfer %60049 : tensor<4x?x4096xf16>{%dim_57419} to #hal.device.promise<@__device_6>
    %60051 = torch_c.from_builtin_tensor %60050 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57420 = torch.constant.int 1
    %60052 = torch.aten.add.Tensor %60033, %60036, %int1_57420 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57421 = torch.constant.int 1
    %60053 = torch.aten.add.Tensor %60052, %60039, %int1_57421 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57422 = torch.constant.int 1
    %60054 = torch.aten.add.Tensor %60053, %60042, %int1_57422 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57423 = torch.constant.int 1
    %60055 = torch.aten.add.Tensor %60054, %60045, %int1_57423 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57424 = torch.constant.int 1
    %60056 = torch.aten.add.Tensor %60055, %60048, %int1_57424 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57425 = torch.constant.int 1
    %60057 = torch.aten.add.Tensor %60056, %59855, %int1_57425 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57426 = torch.constant.int 1
    %60058 = torch.aten.add.Tensor %60057, %60051, %int1_57426 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60059 = torch_c.to_builtin_tensor %59813 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57427 = arith.constant 1 : index
    %dim_57428 = tensor.dim %60059, %c1_57427 : tensor<4x?x4096xf16>
    %60060 = flow.tensor.transfer %60059 : tensor<4x?x4096xf16>{%dim_57428} to #hal.device.promise<@__device_7>
    %60061 = torch_c.from_builtin_tensor %60060 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60062 = torch_c.to_builtin_tensor %59820 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57429 = arith.constant 1 : index
    %dim_57430 = tensor.dim %60062, %c1_57429 : tensor<4x?x4096xf16>
    %60063 = flow.tensor.transfer %60062 : tensor<4x?x4096xf16>{%dim_57430} to #hal.device.promise<@__device_7>
    %60064 = torch_c.from_builtin_tensor %60063 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60065 = torch_c.to_builtin_tensor %59827 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57431 = arith.constant 1 : index
    %dim_57432 = tensor.dim %60065, %c1_57431 : tensor<4x?x4096xf16>
    %60066 = flow.tensor.transfer %60065 : tensor<4x?x4096xf16>{%dim_57432} to #hal.device.promise<@__device_7>
    %60067 = torch_c.from_builtin_tensor %60066 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60068 = torch_c.to_builtin_tensor %59834 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57433 = arith.constant 1 : index
    %dim_57434 = tensor.dim %60068, %c1_57433 : tensor<4x?x4096xf16>
    %60069 = flow.tensor.transfer %60068 : tensor<4x?x4096xf16>{%dim_57434} to #hal.device.promise<@__device_7>
    %60070 = torch_c.from_builtin_tensor %60069 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60070, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60071 = torch_c.to_builtin_tensor %59841 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57435 = arith.constant 1 : index
    %dim_57436 = tensor.dim %60071, %c1_57435 : tensor<4x?x4096xf16>
    %60072 = flow.tensor.transfer %60071 : tensor<4x?x4096xf16>{%dim_57436} to #hal.device.promise<@__device_7>
    %60073 = torch_c.from_builtin_tensor %60072 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60074 = torch_c.to_builtin_tensor %59848 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57437 = arith.constant 1 : index
    %dim_57438 = tensor.dim %60074, %c1_57437 : tensor<4x?x4096xf16>
    %60075 = flow.tensor.transfer %60074 : tensor<4x?x4096xf16>{%dim_57438} to #hal.device.promise<@__device_7>
    %60076 = torch_c.from_builtin_tensor %60075 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60076, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %60077 = torch_c.to_builtin_tensor %59855 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_57439 = arith.constant 1 : index
    %dim_57440 = tensor.dim %60077, %c1_57439 : tensor<4x?x4096xf16>
    %60078 = flow.tensor.transfer %60077 : tensor<4x?x4096xf16>{%dim_57440} to #hal.device.promise<@__device_7>
    %60079 = torch_c.from_builtin_tensor %60078 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57441 = torch.constant.int 1
    %60080 = torch.aten.add.Tensor %60061, %60064, %int1_57441 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60080, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57442 = torch.constant.int 1
    %60081 = torch.aten.add.Tensor %60080, %60067, %int1_57442 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57443 = torch.constant.int 1
    %60082 = torch.aten.add.Tensor %60081, %60070, %int1_57443 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60082, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57444 = torch.constant.int 1
    %60083 = torch.aten.add.Tensor %60082, %60073, %int1_57444 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57445 = torch.constant.int 1
    %60084 = torch.aten.add.Tensor %60083, %60076, %int1_57445 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60084, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57446 = torch.constant.int 1
    %60085 = torch.aten.add.Tensor %60084, %60079, %int1_57446 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60085, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57447 = torch.constant.int 1
    %60086 = torch.aten.add.Tensor %60085, %59862, %int1_57447 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60086, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57448 = torch.constant.int 1
    %60087 = torch.aten.add.Tensor %59567, %59890, %int1_57448 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60087, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57449 = torch.constant.int 1
    %60088 = torch.aten.add.Tensor %59568, %59918, %int1_57449 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57450 = torch.constant.int 1
    %60089 = torch.aten.add.Tensor %59569, %59946, %int1_57450 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60089, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57451 = torch.constant.int 1
    %60090 = torch.aten.add.Tensor %59570, %59974, %int1_57451 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60090, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57452 = torch.constant.int 1
    %60091 = torch.aten.add.Tensor %59571, %60002, %int1_57452 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60091, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57453 = torch.constant.int 1
    %60092 = torch.aten.add.Tensor %59572, %60030, %int1_57453 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60092, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57454 = torch.constant.int 1
    %60093 = torch.aten.add.Tensor %59573, %60058, %int1_57454 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60093, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57455 = torch.constant.int 1
    %60094 = torch.aten.add.Tensor %59574, %60086, %int1_57455 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60094, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_57456 = torch.constant.int 6
    %60095 = torch.prims.convert_element_type %60087, %int6_57456 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57457 = torch.constant.int 6
    %60096 = torch.prims.convert_element_type %60088, %int6_57457 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60096, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57458 = torch.constant.int 6
    %60097 = torch.prims.convert_element_type %60089, %int6_57458 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60097, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57459 = torch.constant.int 6
    %60098 = torch.prims.convert_element_type %60090, %int6_57459 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60098, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57460 = torch.constant.int 6
    %60099 = torch.prims.convert_element_type %60091, %int6_57460 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60099, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57461 = torch.constant.int 6
    %60100 = torch.prims.convert_element_type %60092, %int6_57461 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60100, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57462 = torch.constant.int 6
    %60101 = torch.prims.convert_element_type %60093, %int6_57462 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60101, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_57463 = torch.constant.int 6
    %60102 = torch.prims.convert_element_type %60094, %int6_57463 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57464 = torch.constant.int 2
    %60103 = torch.aten.pow.Tensor_Scalar %60095, %int2_57464 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60103, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57465 = torch.constant.int 2
    %60104 = torch.aten.pow.Tensor_Scalar %60096, %int2_57465 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60104, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57466 = torch.constant.int 2
    %60105 = torch.aten.pow.Tensor_Scalar %60097, %int2_57466 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60105, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57467 = torch.constant.int 2
    %60106 = torch.aten.pow.Tensor_Scalar %60098, %int2_57467 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60106, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57468 = torch.constant.int 2
    %60107 = torch.aten.pow.Tensor_Scalar %60099, %int2_57468 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60107, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57469 = torch.constant.int 2
    %60108 = torch.aten.pow.Tensor_Scalar %60100, %int2_57469 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60108, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57470 = torch.constant.int 2
    %60109 = torch.aten.pow.Tensor_Scalar %60101, %int2_57470 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_57471 = torch.constant.int 2
    %60110 = torch.aten.pow.Tensor_Scalar %60102, %int2_57471 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60110, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_57472 = torch.constant.int -1
    %60111 = torch.prim.ListConstruct %int-1_57472 : (!torch.int) -> !torch.list<int>
    %true_57473 = torch.constant.bool true
    %none_57474 = torch.constant.none
    %60112 = torch.aten.mean.dim %60103, %60111, %true_57473, %none_57474 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60112, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57475 = torch.constant.int -1
    %60113 = torch.prim.ListConstruct %int-1_57475 : (!torch.int) -> !torch.list<int>
    %true_57476 = torch.constant.bool true
    %none_57477 = torch.constant.none
    %60114 = torch.aten.mean.dim %60104, %60113, %true_57476, %none_57477 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60114, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57478 = torch.constant.int -1
    %60115 = torch.prim.ListConstruct %int-1_57478 : (!torch.int) -> !torch.list<int>
    %true_57479 = torch.constant.bool true
    %none_57480 = torch.constant.none
    %60116 = torch.aten.mean.dim %60105, %60115, %true_57479, %none_57480 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57481 = torch.constant.int -1
    %60117 = torch.prim.ListConstruct %int-1_57481 : (!torch.int) -> !torch.list<int>
    %true_57482 = torch.constant.bool true
    %none_57483 = torch.constant.none
    %60118 = torch.aten.mean.dim %60106, %60117, %true_57482, %none_57483 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57484 = torch.constant.int -1
    %60119 = torch.prim.ListConstruct %int-1_57484 : (!torch.int) -> !torch.list<int>
    %true_57485 = torch.constant.bool true
    %none_57486 = torch.constant.none
    %60120 = torch.aten.mean.dim %60107, %60119, %true_57485, %none_57486 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57487 = torch.constant.int -1
    %60121 = torch.prim.ListConstruct %int-1_57487 : (!torch.int) -> !torch.list<int>
    %true_57488 = torch.constant.bool true
    %none_57489 = torch.constant.none
    %60122 = torch.aten.mean.dim %60108, %60121, %true_57488, %none_57489 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57490 = torch.constant.int -1
    %60123 = torch.prim.ListConstruct %int-1_57490 : (!torch.int) -> !torch.list<int>
    %true_57491 = torch.constant.bool true
    %none_57492 = torch.constant.none
    %60124 = torch.aten.mean.dim %60109, %60123, %true_57491, %none_57492 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60124, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_57493 = torch.constant.int -1
    %60125 = torch.prim.ListConstruct %int-1_57493 : (!torch.int) -> !torch.list<int>
    %true_57494 = torch.constant.bool true
    %none_57495 = torch.constant.none
    %60126 = torch.aten.mean.dim %60110, %60125, %true_57494, %none_57495 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57496 = torch.constant.float 9.9999997473787516E-6
    %int1_57497 = torch.constant.int 1
    %60127 = torch.aten.add.Scalar %60112, %float9.999990e-06_57496, %int1_57497 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57498 = torch.constant.float 9.9999997473787516E-6
    %int1_57499 = torch.constant.int 1
    %60128 = torch.aten.add.Scalar %60114, %float9.999990e-06_57498, %int1_57499 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60128, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57500 = torch.constant.float 9.9999997473787516E-6
    %int1_57501 = torch.constant.int 1
    %60129 = torch.aten.add.Scalar %60116, %float9.999990e-06_57500, %int1_57501 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57502 = torch.constant.float 9.9999997473787516E-6
    %int1_57503 = torch.constant.int 1
    %60130 = torch.aten.add.Scalar %60118, %float9.999990e-06_57502, %int1_57503 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60130, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57504 = torch.constant.float 9.9999997473787516E-6
    %int1_57505 = torch.constant.int 1
    %60131 = torch.aten.add.Scalar %60120, %float9.999990e-06_57504, %int1_57505 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57506 = torch.constant.float 9.9999997473787516E-6
    %int1_57507 = torch.constant.int 1
    %60132 = torch.aten.add.Scalar %60122, %float9.999990e-06_57506, %int1_57507 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57508 = torch.constant.float 9.9999997473787516E-6
    %int1_57509 = torch.constant.int 1
    %60133 = torch.aten.add.Scalar %60124, %float9.999990e-06_57508, %int1_57509 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_57510 = torch.constant.float 9.9999997473787516E-6
    %int1_57511 = torch.constant.int 1
    %60134 = torch.aten.add.Scalar %60126, %float9.999990e-06_57510, %int1_57511 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60134, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60135 = torch.aten.rsqrt %60127 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60136 = torch.aten.rsqrt %60128 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60136, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60137 = torch.aten.rsqrt %60129 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60138 = torch.aten.rsqrt %60130 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60139 = torch.aten.rsqrt %60131 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60140 = torch.aten.rsqrt %60132 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60140, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60141 = torch.aten.rsqrt %60133 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60142 = torch.aten.rsqrt %60134 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %60142, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %60143 = torch.aten.mul.Tensor %60095, %60135 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60143, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60144 = torch.aten.mul.Tensor %60096, %60136 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60145 = torch.aten.mul.Tensor %60097, %60137 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60146 = torch.aten.mul.Tensor %60098, %60138 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60147 = torch.aten.mul.Tensor %60099, %60139 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60148 = torch.aten.mul.Tensor %60100, %60140 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60149 = torch.aten.mul.Tensor %60101, %60141 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60150 = torch.aten.mul.Tensor %60102, %60142 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60151 = torch.aten.mul.Tensor %2240, %60143 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60152 = torch.aten.mul.Tensor %2241, %60144 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60152, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60153 = torch.aten.mul.Tensor %2242, %60145 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60153, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60154 = torch.aten.mul.Tensor %2243, %60146 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60154, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60155 = torch.aten.mul.Tensor %2244, %60147 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60155, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60156 = torch.aten.mul.Tensor %2245, %60148 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60156, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60157 = torch.aten.mul.Tensor %2246, %60149 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60157, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %60158 = torch.aten.mul.Tensor %2247, %60150 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %60158, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_57512 = torch.constant.int 5
    %60159 = torch.prims.convert_element_type %60151, %int5_57512 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60159, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57513 = torch.constant.int 5
    %60160 = torch.prims.convert_element_type %60152, %int5_57513 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60160, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57514 = torch.constant.int 5
    %60161 = torch.prims.convert_element_type %60153, %int5_57514 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57515 = torch.constant.int 5
    %60162 = torch.prims.convert_element_type %60154, %int5_57515 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60162, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57516 = torch.constant.int 5
    %60163 = torch.prims.convert_element_type %60155, %int5_57516 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60163, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57517 = torch.constant.int 5
    %60164 = torch.prims.convert_element_type %60156, %int5_57517 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60164, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57518 = torch.constant.int 5
    %60165 = torch.prims.convert_element_type %60157, %int5_57518 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60165, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_57519 = torch.constant.int 5
    %60166 = torch.prims.convert_element_type %60158, %int5_57519 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %60166, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_57520 = torch.constant.int 1
    %int0_57521 = torch.constant.int 0
    %60167 = torch.prim.ListConstruct %int1_57520, %int0_57521 : (!torch.int, !torch.int) -> !torch.list<int>
    %60168 = torch.aten.permute %2248, %60167 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57522 = torch.constant.int 1
    %int0_57523 = torch.constant.int 0
    %60169 = torch.prim.ListConstruct %int1_57522, %int0_57523 : (!torch.int, !torch.int) -> !torch.list<int>
    %60170 = torch.aten.permute %2249, %60169 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57524 = torch.constant.int 1
    %int0_57525 = torch.constant.int 0
    %60171 = torch.prim.ListConstruct %int1_57524, %int0_57525 : (!torch.int, !torch.int) -> !torch.list<int>
    %60172 = torch.aten.permute %2250, %60171 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57526 = torch.constant.int 1
    %int0_57527 = torch.constant.int 0
    %60173 = torch.prim.ListConstruct %int1_57526, %int0_57527 : (!torch.int, !torch.int) -> !torch.list<int>
    %60174 = torch.aten.permute %2251, %60173 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57528 = torch.constant.int 1
    %int0_57529 = torch.constant.int 0
    %60175 = torch.prim.ListConstruct %int1_57528, %int0_57529 : (!torch.int, !torch.int) -> !torch.list<int>
    %60176 = torch.aten.permute %2252, %60175 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57530 = torch.constant.int 1
    %int0_57531 = torch.constant.int 0
    %60177 = torch.prim.ListConstruct %int1_57530, %int0_57531 : (!torch.int, !torch.int) -> !torch.list<int>
    %60178 = torch.aten.permute %2253, %60177 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57532 = torch.constant.int 1
    %int0_57533 = torch.constant.int 0
    %60179 = torch.prim.ListConstruct %int1_57532, %int0_57533 : (!torch.int, !torch.int) -> !torch.list<int>
    %60180 = torch.aten.permute %2254, %60179 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int1_57534 = torch.constant.int 1
    %int0_57535 = torch.constant.int 0
    %60181 = torch.prim.ListConstruct %int1_57534, %int0_57535 : (!torch.int, !torch.int) -> !torch.list<int>
    %60182 = torch.aten.permute %2255, %60181 : !torch.vtensor<[512,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,512],f16>
    %int4_57536 = torch.constant.int 4
    %60183 = torch.aten.mul.int %int4_57536, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57537 = torch.constant.int 4096
    %60184 = torch.prim.ListConstruct %60183, %int4096_57537 : (!torch.int, !torch.int) -> !torch.list<int>
    %60185 = torch.aten.view %60159, %60184 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60185, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60186 = torch.aten.mm %60185, %60168 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60186, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57538 = torch.constant.int 4
    %int512_57539 = torch.constant.int 512
    %60187 = torch.prim.ListConstruct %int4_57538, %2482, %int512_57539 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60188 = torch.aten.view %60186, %60187 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60188, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57540 = torch.constant.int 4
    %60189 = torch.aten.mul.int %int4_57540, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57541 = torch.constant.int 4096
    %60190 = torch.prim.ListConstruct %60189, %int4096_57541 : (!torch.int, !torch.int) -> !torch.list<int>
    %60191 = torch.aten.view %60160, %60190 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60191, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60192 = torch.aten.mm %60191, %60170 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60192, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57542 = torch.constant.int 4
    %int512_57543 = torch.constant.int 512
    %60193 = torch.prim.ListConstruct %int4_57542, %2482, %int512_57543 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60194 = torch.aten.view %60192, %60193 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60194, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57544 = torch.constant.int 4
    %60195 = torch.aten.mul.int %int4_57544, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57545 = torch.constant.int 4096
    %60196 = torch.prim.ListConstruct %60195, %int4096_57545 : (!torch.int, !torch.int) -> !torch.list<int>
    %60197 = torch.aten.view %60161, %60196 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60197, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60198 = torch.aten.mm %60197, %60172 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60198, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57546 = torch.constant.int 4
    %int512_57547 = torch.constant.int 512
    %60199 = torch.prim.ListConstruct %int4_57546, %2482, %int512_57547 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60200 = torch.aten.view %60198, %60199 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60200, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57548 = torch.constant.int 4
    %60201 = torch.aten.mul.int %int4_57548, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57549 = torch.constant.int 4096
    %60202 = torch.prim.ListConstruct %60201, %int4096_57549 : (!torch.int, !torch.int) -> !torch.list<int>
    %60203 = torch.aten.view %60162, %60202 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60203, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60204 = torch.aten.mm %60203, %60174 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60204, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57550 = torch.constant.int 4
    %int512_57551 = torch.constant.int 512
    %60205 = torch.prim.ListConstruct %int4_57550, %2482, %int512_57551 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60206 = torch.aten.view %60204, %60205 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57552 = torch.constant.int 4
    %60207 = torch.aten.mul.int %int4_57552, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57553 = torch.constant.int 4096
    %60208 = torch.prim.ListConstruct %60207, %int4096_57553 : (!torch.int, !torch.int) -> !torch.list<int>
    %60209 = torch.aten.view %60163, %60208 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60209, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60210 = torch.aten.mm %60209, %60176 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60210, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57554 = torch.constant.int 4
    %int512_57555 = torch.constant.int 512
    %60211 = torch.prim.ListConstruct %int4_57554, %2482, %int512_57555 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60212 = torch.aten.view %60210, %60211 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57556 = torch.constant.int 4
    %60213 = torch.aten.mul.int %int4_57556, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57557 = torch.constant.int 4096
    %60214 = torch.prim.ListConstruct %60213, %int4096_57557 : (!torch.int, !torch.int) -> !torch.list<int>
    %60215 = torch.aten.view %60164, %60214 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60215, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60216 = torch.aten.mm %60215, %60178 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60216, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57558 = torch.constant.int 4
    %int512_57559 = torch.constant.int 512
    %60217 = torch.prim.ListConstruct %int4_57558, %2482, %int512_57559 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60218 = torch.aten.view %60216, %60217 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57560 = torch.constant.int 4
    %60219 = torch.aten.mul.int %int4_57560, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57561 = torch.constant.int 4096
    %60220 = torch.prim.ListConstruct %60219, %int4096_57561 : (!torch.int, !torch.int) -> !torch.list<int>
    %60221 = torch.aten.view %60165, %60220 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60221, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60222 = torch.aten.mm %60221, %60180 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60222, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57562 = torch.constant.int 4
    %int512_57563 = torch.constant.int 512
    %60223 = torch.prim.ListConstruct %int4_57562, %2482, %int512_57563 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60224 = torch.aten.view %60222, %60223 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_57564 = torch.constant.int 4
    %60225 = torch.aten.mul.int %int4_57564, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57565 = torch.constant.int 4096
    %60226 = torch.prim.ListConstruct %60225, %int4096_57565 : (!torch.int, !torch.int) -> !torch.list<int>
    %60227 = torch.aten.view %60166, %60226 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60227, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60228 = torch.aten.mm %60227, %60182 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,512],f16> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %60228, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %int4_57566 = torch.constant.int 4
    %int512_57567 = torch.constant.int 512
    %60229 = torch.prim.ListConstruct %int4_57566, %2482, %int512_57567 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60230 = torch.aten.view %60228, %60229 : !torch.vtensor<[?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %60230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_57568 = torch.constant.int 1
    %int0_57569 = torch.constant.int 0
    %60231 = torch.prim.ListConstruct %int1_57568, %int0_57569 : (!torch.int, !torch.int) -> !torch.list<int>
    %60232 = torch.aten.permute %2256, %60231 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57570 = torch.constant.int 1
    %int0_57571 = torch.constant.int 0
    %60233 = torch.prim.ListConstruct %int1_57570, %int0_57571 : (!torch.int, !torch.int) -> !torch.list<int>
    %60234 = torch.aten.permute %2257, %60233 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57572 = torch.constant.int 1
    %int0_57573 = torch.constant.int 0
    %60235 = torch.prim.ListConstruct %int1_57572, %int0_57573 : (!torch.int, !torch.int) -> !torch.list<int>
    %60236 = torch.aten.permute %2258, %60235 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57574 = torch.constant.int 1
    %int0_57575 = torch.constant.int 0
    %60237 = torch.prim.ListConstruct %int1_57574, %int0_57575 : (!torch.int, !torch.int) -> !torch.list<int>
    %60238 = torch.aten.permute %2259, %60237 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57576 = torch.constant.int 1
    %int0_57577 = torch.constant.int 0
    %60239 = torch.prim.ListConstruct %int1_57576, %int0_57577 : (!torch.int, !torch.int) -> !torch.list<int>
    %60240 = torch.aten.permute %2260, %60239 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57578 = torch.constant.int 1
    %int0_57579 = torch.constant.int 0
    %60241 = torch.prim.ListConstruct %int1_57578, %int0_57579 : (!torch.int, !torch.int) -> !torch.list<int>
    %60242 = torch.aten.permute %2261, %60241 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57580 = torch.constant.int 1
    %int0_57581 = torch.constant.int 0
    %60243 = torch.prim.ListConstruct %int1_57580, %int0_57581 : (!torch.int, !torch.int) -> !torch.list<int>
    %60244 = torch.aten.permute %2262, %60243 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57582 = torch.constant.int 1
    %int0_57583 = torch.constant.int 0
    %60245 = torch.prim.ListConstruct %int1_57582, %int0_57583 : (!torch.int, !torch.int) -> !torch.list<int>
    %60246 = torch.aten.permute %2263, %60245 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_57584 = torch.constant.int 4
    %60247 = torch.aten.mul.int %int4_57584, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57585 = torch.constant.int 4096
    %60248 = torch.prim.ListConstruct %60247, %int4096_57585 : (!torch.int, !torch.int) -> !torch.list<int>
    %60249 = torch.aten.view %60159, %60248 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60249, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60250 = torch.aten.mm %60249, %60232 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60250, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57586 = torch.constant.int 4
    %int128_57587 = torch.constant.int 128
    %60251 = torch.prim.ListConstruct %int4_57586, %2482, %int128_57587 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60252 = torch.aten.view %60250, %60251 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57588 = torch.constant.int 4
    %60253 = torch.aten.mul.int %int4_57588, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57589 = torch.constant.int 4096
    %60254 = torch.prim.ListConstruct %60253, %int4096_57589 : (!torch.int, !torch.int) -> !torch.list<int>
    %60255 = torch.aten.view %60160, %60254 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60255, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60256 = torch.aten.mm %60255, %60234 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60256, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57590 = torch.constant.int 4
    %int128_57591 = torch.constant.int 128
    %60257 = torch.prim.ListConstruct %int4_57590, %2482, %int128_57591 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60258 = torch.aten.view %60256, %60257 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57592 = torch.constant.int 4
    %60259 = torch.aten.mul.int %int4_57592, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57593 = torch.constant.int 4096
    %60260 = torch.prim.ListConstruct %60259, %int4096_57593 : (!torch.int, !torch.int) -> !torch.list<int>
    %60261 = torch.aten.view %60161, %60260 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60261, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60262 = torch.aten.mm %60261, %60236 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60262, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57594 = torch.constant.int 4
    %int128_57595 = torch.constant.int 128
    %60263 = torch.prim.ListConstruct %int4_57594, %2482, %int128_57595 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60264 = torch.aten.view %60262, %60263 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60264, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57596 = torch.constant.int 4
    %60265 = torch.aten.mul.int %int4_57596, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57597 = torch.constant.int 4096
    %60266 = torch.prim.ListConstruct %60265, %int4096_57597 : (!torch.int, !torch.int) -> !torch.list<int>
    %60267 = torch.aten.view %60162, %60266 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60267, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60268 = torch.aten.mm %60267, %60238 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60268, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57598 = torch.constant.int 4
    %int128_57599 = torch.constant.int 128
    %60269 = torch.prim.ListConstruct %int4_57598, %2482, %int128_57599 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60270 = torch.aten.view %60268, %60269 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60270, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57600 = torch.constant.int 4
    %60271 = torch.aten.mul.int %int4_57600, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57601 = torch.constant.int 4096
    %60272 = torch.prim.ListConstruct %60271, %int4096_57601 : (!torch.int, !torch.int) -> !torch.list<int>
    %60273 = torch.aten.view %60163, %60272 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60273, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60274 = torch.aten.mm %60273, %60240 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60274, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57602 = torch.constant.int 4
    %int128_57603 = torch.constant.int 128
    %60275 = torch.prim.ListConstruct %int4_57602, %2482, %int128_57603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60276 = torch.aten.view %60274, %60275 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60276, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57604 = torch.constant.int 4
    %60277 = torch.aten.mul.int %int4_57604, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57605 = torch.constant.int 4096
    %60278 = torch.prim.ListConstruct %60277, %int4096_57605 : (!torch.int, !torch.int) -> !torch.list<int>
    %60279 = torch.aten.view %60164, %60278 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60279, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60280 = torch.aten.mm %60279, %60242 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60280, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57606 = torch.constant.int 4
    %int128_57607 = torch.constant.int 128
    %60281 = torch.prim.ListConstruct %int4_57606, %2482, %int128_57607 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60282 = torch.aten.view %60280, %60281 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57608 = torch.constant.int 4
    %60283 = torch.aten.mul.int %int4_57608, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57609 = torch.constant.int 4096
    %60284 = torch.prim.ListConstruct %60283, %int4096_57609 : (!torch.int, !torch.int) -> !torch.list<int>
    %60285 = torch.aten.view %60165, %60284 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60285, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60286 = torch.aten.mm %60285, %60244 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60286, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57610 = torch.constant.int 4
    %int128_57611 = torch.constant.int 128
    %60287 = torch.prim.ListConstruct %int4_57610, %2482, %int128_57611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60288 = torch.aten.view %60286, %60287 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60288, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57612 = torch.constant.int 4
    %60289 = torch.aten.mul.int %int4_57612, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57613 = torch.constant.int 4096
    %60290 = torch.prim.ListConstruct %60289, %int4096_57613 : (!torch.int, !torch.int) -> !torch.list<int>
    %60291 = torch.aten.view %60166, %60290 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60291, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60292 = torch.aten.mm %60291, %60246 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60292, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57614 = torch.constant.int 4
    %int128_57615 = torch.constant.int 128
    %60293 = torch.prim.ListConstruct %int4_57614, %2482, %int128_57615 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60294 = torch.aten.view %60292, %60293 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60294, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int1_57616 = torch.constant.int 1
    %int0_57617 = torch.constant.int 0
    %60295 = torch.prim.ListConstruct %int1_57616, %int0_57617 : (!torch.int, !torch.int) -> !torch.list<int>
    %60296 = torch.aten.permute %2264, %60295 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57618 = torch.constant.int 1
    %int0_57619 = torch.constant.int 0
    %60297 = torch.prim.ListConstruct %int1_57618, %int0_57619 : (!torch.int, !torch.int) -> !torch.list<int>
    %60298 = torch.aten.permute %2265, %60297 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57620 = torch.constant.int 1
    %int0_57621 = torch.constant.int 0
    %60299 = torch.prim.ListConstruct %int1_57620, %int0_57621 : (!torch.int, !torch.int) -> !torch.list<int>
    %60300 = torch.aten.permute %2266, %60299 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57622 = torch.constant.int 1
    %int0_57623 = torch.constant.int 0
    %60301 = torch.prim.ListConstruct %int1_57622, %int0_57623 : (!torch.int, !torch.int) -> !torch.list<int>
    %60302 = torch.aten.permute %2267, %60301 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57624 = torch.constant.int 1
    %int0_57625 = torch.constant.int 0
    %60303 = torch.prim.ListConstruct %int1_57624, %int0_57625 : (!torch.int, !torch.int) -> !torch.list<int>
    %60304 = torch.aten.permute %2268, %60303 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57626 = torch.constant.int 1
    %int0_57627 = torch.constant.int 0
    %60305 = torch.prim.ListConstruct %int1_57626, %int0_57627 : (!torch.int, !torch.int) -> !torch.list<int>
    %60306 = torch.aten.permute %2269, %60305 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57628 = torch.constant.int 1
    %int0_57629 = torch.constant.int 0
    %60307 = torch.prim.ListConstruct %int1_57628, %int0_57629 : (!torch.int, !torch.int) -> !torch.list<int>
    %60308 = torch.aten.permute %2270, %60307 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int1_57630 = torch.constant.int 1
    %int0_57631 = torch.constant.int 0
    %60309 = torch.prim.ListConstruct %int1_57630, %int0_57631 : (!torch.int, !torch.int) -> !torch.list<int>
    %60310 = torch.aten.permute %2271, %60309 : !torch.vtensor<[128,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,128],f16>
    %int4_57632 = torch.constant.int 4
    %60311 = torch.aten.mul.int %int4_57632, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57633 = torch.constant.int 4096
    %60312 = torch.prim.ListConstruct %60311, %int4096_57633 : (!torch.int, !torch.int) -> !torch.list<int>
    %60313 = torch.aten.view %60159, %60312 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60313, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60314 = torch.aten.mm %60313, %60296 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60314, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57634 = torch.constant.int 4
    %int128_57635 = torch.constant.int 128
    %60315 = torch.prim.ListConstruct %int4_57634, %2482, %int128_57635 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60316 = torch.aten.view %60314, %60315 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60316, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57636 = torch.constant.int 4
    %60317 = torch.aten.mul.int %int4_57636, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57637 = torch.constant.int 4096
    %60318 = torch.prim.ListConstruct %60317, %int4096_57637 : (!torch.int, !torch.int) -> !torch.list<int>
    %60319 = torch.aten.view %60160, %60318 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60319, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60320 = torch.aten.mm %60319, %60298 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60320, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57638 = torch.constant.int 4
    %int128_57639 = torch.constant.int 128
    %60321 = torch.prim.ListConstruct %int4_57638, %2482, %int128_57639 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60322 = torch.aten.view %60320, %60321 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60322, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57640 = torch.constant.int 4
    %60323 = torch.aten.mul.int %int4_57640, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57641 = torch.constant.int 4096
    %60324 = torch.prim.ListConstruct %60323, %int4096_57641 : (!torch.int, !torch.int) -> !torch.list<int>
    %60325 = torch.aten.view %60161, %60324 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60325, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60326 = torch.aten.mm %60325, %60300 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60326, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57642 = torch.constant.int 4
    %int128_57643 = torch.constant.int 128
    %60327 = torch.prim.ListConstruct %int4_57642, %2482, %int128_57643 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60328 = torch.aten.view %60326, %60327 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60328, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57644 = torch.constant.int 4
    %60329 = torch.aten.mul.int %int4_57644, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57645 = torch.constant.int 4096
    %60330 = torch.prim.ListConstruct %60329, %int4096_57645 : (!torch.int, !torch.int) -> !torch.list<int>
    %60331 = torch.aten.view %60162, %60330 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60331, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60332 = torch.aten.mm %60331, %60302 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60332, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57646 = torch.constant.int 4
    %int128_57647 = torch.constant.int 128
    %60333 = torch.prim.ListConstruct %int4_57646, %2482, %int128_57647 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60334 = torch.aten.view %60332, %60333 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60334, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57648 = torch.constant.int 4
    %60335 = torch.aten.mul.int %int4_57648, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57649 = torch.constant.int 4096
    %60336 = torch.prim.ListConstruct %60335, %int4096_57649 : (!torch.int, !torch.int) -> !torch.list<int>
    %60337 = torch.aten.view %60163, %60336 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60337, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60338 = torch.aten.mm %60337, %60304 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60338, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57650 = torch.constant.int 4
    %int128_57651 = torch.constant.int 128
    %60339 = torch.prim.ListConstruct %int4_57650, %2482, %int128_57651 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60340 = torch.aten.view %60338, %60339 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57652 = torch.constant.int 4
    %60341 = torch.aten.mul.int %int4_57652, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57653 = torch.constant.int 4096
    %60342 = torch.prim.ListConstruct %60341, %int4096_57653 : (!torch.int, !torch.int) -> !torch.list<int>
    %60343 = torch.aten.view %60164, %60342 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60343, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60344 = torch.aten.mm %60343, %60306 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60344, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57654 = torch.constant.int 4
    %int128_57655 = torch.constant.int 128
    %60345 = torch.prim.ListConstruct %int4_57654, %2482, %int128_57655 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60346 = torch.aten.view %60344, %60345 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57656 = torch.constant.int 4
    %60347 = torch.aten.mul.int %int4_57656, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57657 = torch.constant.int 4096
    %60348 = torch.prim.ListConstruct %60347, %int4096_57657 : (!torch.int, !torch.int) -> !torch.list<int>
    %60349 = torch.aten.view %60165, %60348 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60349, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60350 = torch.aten.mm %60349, %60308 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60350, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57658 = torch.constant.int 4
    %int128_57659 = torch.constant.int 128
    %60351 = torch.prim.ListConstruct %int4_57658, %2482, %int128_57659 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60352 = torch.aten.view %60350, %60351 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57660 = torch.constant.int 4
    %60353 = torch.aten.mul.int %int4_57660, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_57661 = torch.constant.int 4096
    %60354 = torch.prim.ListConstruct %60353, %int4096_57661 : (!torch.int, !torch.int) -> !torch.list<int>
    %60355 = torch.aten.view %60166, %60354 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %60355, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %60356 = torch.aten.mm %60355, %60310 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,128],f16> -> !torch.vtensor<[?,128],f16>
    torch.bind_symbolic_shape %60356, [%2336], affine_map<()[s0] -> (s0 * 64, 128)> : !torch.vtensor<[?,128],f16>
    %int4_57662 = torch.constant.int 4
    %int128_57663 = torch.constant.int 128
    %60357 = torch.prim.ListConstruct %int4_57662, %2482, %int128_57663 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60358 = torch.aten.view %60356, %60357 : !torch.vtensor<[?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128],f16>
    torch.bind_symbolic_shape %60358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128)> : !torch.vtensor<[4,?,128],f16>
    %int4_57664 = torch.constant.int 4
    %int4_57665 = torch.constant.int 4
    %int128_57666 = torch.constant.int 128
    %60359 = torch.prim.ListConstruct %int4_57664, %2482, %int4_57665, %int128_57666 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60360 = torch.aten.view %60188, %60359 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60360, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57667 = torch.constant.int 4
    %int4_57668 = torch.constant.int 4
    %int128_57669 = torch.constant.int 128
    %60361 = torch.prim.ListConstruct %int4_57667, %2482, %int4_57668, %int128_57669 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60362 = torch.aten.view %60194, %60361 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60362, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57670 = torch.constant.int 4
    %int4_57671 = torch.constant.int 4
    %int128_57672 = torch.constant.int 128
    %60363 = torch.prim.ListConstruct %int4_57670, %2482, %int4_57671, %int128_57672 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60364 = torch.aten.view %60200, %60363 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57673 = torch.constant.int 4
    %int4_57674 = torch.constant.int 4
    %int128_57675 = torch.constant.int 128
    %60365 = torch.prim.ListConstruct %int4_57673, %2482, %int4_57674, %int128_57675 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60366 = torch.aten.view %60206, %60365 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57676 = torch.constant.int 4
    %int4_57677 = torch.constant.int 4
    %int128_57678 = torch.constant.int 128
    %60367 = torch.prim.ListConstruct %int4_57676, %2482, %int4_57677, %int128_57678 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60368 = torch.aten.view %60212, %60367 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57679 = torch.constant.int 4
    %int4_57680 = torch.constant.int 4
    %int128_57681 = torch.constant.int 128
    %60369 = torch.prim.ListConstruct %int4_57679, %2482, %int4_57680, %int128_57681 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60370 = torch.aten.view %60218, %60369 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57682 = torch.constant.int 4
    %int4_57683 = torch.constant.int 4
    %int128_57684 = torch.constant.int 128
    %60371 = torch.prim.ListConstruct %int4_57682, %2482, %int4_57683, %int128_57684 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60372 = torch.aten.view %60224, %60371 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60372, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57685 = torch.constant.int 4
    %int4_57686 = torch.constant.int 4
    %int128_57687 = torch.constant.int 128
    %60373 = torch.prim.ListConstruct %int4_57685, %2482, %int4_57686, %int128_57687 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60374 = torch.aten.view %60230, %60373 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_57688 = torch.constant.int 4
    %int1_57689 = torch.constant.int 1
    %int128_57690 = torch.constant.int 128
    %60375 = torch.prim.ListConstruct %int4_57688, %2482, %int1_57689, %int128_57690 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60376 = torch.aten.view %60252, %60375 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60376, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57691 = torch.constant.int 4
    %int1_57692 = torch.constant.int 1
    %int128_57693 = torch.constant.int 128
    %60377 = torch.prim.ListConstruct %int4_57691, %2482, %int1_57692, %int128_57693 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60378 = torch.aten.view %60258, %60377 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60378, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57694 = torch.constant.int 4
    %int1_57695 = torch.constant.int 1
    %int128_57696 = torch.constant.int 128
    %60379 = torch.prim.ListConstruct %int4_57694, %2482, %int1_57695, %int128_57696 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60380 = torch.aten.view %60264, %60379 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57697 = torch.constant.int 4
    %int1_57698 = torch.constant.int 1
    %int128_57699 = torch.constant.int 128
    %60381 = torch.prim.ListConstruct %int4_57697, %2482, %int1_57698, %int128_57699 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60382 = torch.aten.view %60270, %60381 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60382, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57700 = torch.constant.int 4
    %int1_57701 = torch.constant.int 1
    %int128_57702 = torch.constant.int 128
    %60383 = torch.prim.ListConstruct %int4_57700, %2482, %int1_57701, %int128_57702 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60384 = torch.aten.view %60276, %60383 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60384, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57703 = torch.constant.int 4
    %int1_57704 = torch.constant.int 1
    %int128_57705 = torch.constant.int 128
    %60385 = torch.prim.ListConstruct %int4_57703, %2482, %int1_57704, %int128_57705 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60386 = torch.aten.view %60282, %60385 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57706 = torch.constant.int 4
    %int1_57707 = torch.constant.int 1
    %int128_57708 = torch.constant.int 128
    %60387 = torch.prim.ListConstruct %int4_57706, %2482, %int1_57707, %int128_57708 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60388 = torch.aten.view %60288, %60387 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60388, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57709 = torch.constant.int 4
    %int1_57710 = torch.constant.int 1
    %int128_57711 = torch.constant.int 128
    %60389 = torch.prim.ListConstruct %int4_57709, %2482, %int1_57710, %int128_57711 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60390 = torch.aten.view %60294, %60389 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60390, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57712 = torch.constant.int 4
    %int1_57713 = torch.constant.int 1
    %int128_57714 = torch.constant.int 128
    %60391 = torch.prim.ListConstruct %int4_57712, %2482, %int1_57713, %int128_57714 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60392 = torch.aten.view %60316, %60391 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57715 = torch.constant.int 4
    %int1_57716 = torch.constant.int 1
    %int128_57717 = torch.constant.int 128
    %60393 = torch.prim.ListConstruct %int4_57715, %2482, %int1_57716, %int128_57717 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60394 = torch.aten.view %60322, %60393 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57718 = torch.constant.int 4
    %int1_57719 = torch.constant.int 1
    %int128_57720 = torch.constant.int 128
    %60395 = torch.prim.ListConstruct %int4_57718, %2482, %int1_57719, %int128_57720 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60396 = torch.aten.view %60328, %60395 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57721 = torch.constant.int 4
    %int1_57722 = torch.constant.int 1
    %int128_57723 = torch.constant.int 128
    %60397 = torch.prim.ListConstruct %int4_57721, %2482, %int1_57722, %int128_57723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60398 = torch.aten.view %60334, %60397 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57724 = torch.constant.int 4
    %int1_57725 = torch.constant.int 1
    %int128_57726 = torch.constant.int 128
    %60399 = torch.prim.ListConstruct %int4_57724, %2482, %int1_57725, %int128_57726 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60400 = torch.aten.view %60340, %60399 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60400, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57727 = torch.constant.int 4
    %int1_57728 = torch.constant.int 1
    %int128_57729 = torch.constant.int 128
    %60401 = torch.prim.ListConstruct %int4_57727, %2482, %int1_57728, %int128_57729 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60402 = torch.aten.view %60346, %60401 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57730 = torch.constant.int 4
    %int1_57731 = torch.constant.int 1
    %int128_57732 = torch.constant.int 128
    %60403 = torch.prim.ListConstruct %int4_57730, %2482, %int1_57731, %int128_57732 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60404 = torch.aten.view %60352, %60403 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60404, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int4_57733 = torch.constant.int 4
    %int1_57734 = torch.constant.int 1
    %int128_57735 = torch.constant.int 128
    %60405 = torch.prim.ListConstruct %int4_57733, %2482, %int1_57734, %int128_57735 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60406 = torch.aten.view %60358, %60405 : !torch.vtensor<[4,?,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60406, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int131072_57736 = torch.constant.int 131072
    %none_57737 = torch.constant.none
    %none_57738 = torch.constant.none
    %cpu_57739 = torch.constant.device "cpu"
    %false_57740 = torch.constant.bool false
    %60407 = torch.aten.arange %int131072_57736, %none_57737, %none_57738, %cpu_57739, %false_57740 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_57741 = torch.constant.int 0
    %int128_57742 = torch.constant.int 128
    %int2_57743 = torch.constant.int 2
    %none_57744 = torch.constant.none
    %none_57745 = torch.constant.none
    %cpu_57746 = torch.constant.device "cpu"
    %false_57747 = torch.constant.bool false
    %60408 = torch.aten.arange.start_step %int0_57741, %int128_57742, %int2_57743, %none_57744, %none_57745, %cpu_57746, %false_57747 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_57748 = torch.constant.int 0
    %int0_57749 = torch.constant.int 0
    %int64_57750 = torch.constant.int 64
    %int1_57751 = torch.constant.int 1
    %60409 = torch.aten.slice.Tensor %60408, %int0_57748, %int0_57749, %int64_57750, %int1_57751 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_57752 = torch.constant.int 6
    %60410 = torch.prims.convert_element_type %60409, %int6_57752 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_57753 = torch.constant.int 128
    %60411 = torch.aten.div.Scalar %60410, %int128_57753 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_57754 = torch.constant.float 5.000000e+05
    %60412 = torch.aten.pow.Scalar %float5.000000e05_57754, %60411 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %60413 = torch.aten.reciprocal %60412 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_57755 = torch.constant.float 1.000000e+00
    %60414 = torch.aten.mul.Scalar %60413, %float1.000000e00_57755 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_57756 = torch.constant.int 131072
    %int1_57757 = torch.constant.int 1
    %60415 = torch.prim.ListConstruct %int131072_57756, %int1_57757 : (!torch.int, !torch.int) -> !torch.list<int>
    %60416 = torch.aten.view %60407, %60415 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %60417 = torch.aten.mul.Tensor %60416, %60414 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %60418 = torch.aten.cos %60417 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %60419 = torch.aten.sin %60417 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %60420 = torch.aten.complex %60418, %60419 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %60421 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60422 = flow.tensor.transfer %60421 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %60423 = torch_c.from_builtin_tensor %60422 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60424 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60425 = flow.tensor.transfer %60424 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %60426 = torch_c.from_builtin_tensor %60425 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60427 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60428 = flow.tensor.transfer %60427 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %60429 = torch_c.from_builtin_tensor %60428 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60430 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60431 = flow.tensor.transfer %60430 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %60432 = torch_c.from_builtin_tensor %60431 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60433 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60434 = flow.tensor.transfer %60433 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %60435 = torch_c.from_builtin_tensor %60434 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60436 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60437 = flow.tensor.transfer %60436 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %60438 = torch_c.from_builtin_tensor %60437 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60439 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60440 = flow.tensor.transfer %60439 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %60441 = torch_c.from_builtin_tensor %60440 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60442 = torch_c.to_builtin_tensor %60420 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60443 = flow.tensor.transfer %60442 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %60444 = torch_c.from_builtin_tensor %60443 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_57758 = torch.constant.int 1
    %60445 = torch.aten.size.int %60188, %int1_57758 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57759 = torch.constant.int 0
    %60446 = torch.aten.add.int %int0_57759, %60445 : !torch.int, !torch.int -> !torch.int
    %int0_57760 = torch.constant.int 0
    %int0_57761 = torch.constant.int 0
    %int1_57762 = torch.constant.int 1
    %60447 = torch.aten.slice.Tensor %60423, %int0_57760, %int0_57761, %60446, %int1_57762 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60447, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57763 = torch.constant.int 1
    %int0_57764 = torch.constant.int 0
    %int9223372036854775807_57765 = torch.constant.int 9223372036854775807
    %int1_57766 = torch.constant.int 1
    %60448 = torch.aten.slice.Tensor %60447, %int1_57763, %int0_57764, %int9223372036854775807_57765, %int1_57766 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60448, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57767 = torch.constant.int 0
    %60449 = torch.aten.unsqueeze %60448, %int0_57767 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60449, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57768 = torch.constant.int 2
    %60450 = torch.aten.unsqueeze %60449, %int2_57768 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60450, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57769 = torch.constant.int 3
    %int0_57770 = torch.constant.int 0
    %int9223372036854775807_57771 = torch.constant.int 9223372036854775807
    %int1_57772 = torch.constant.int 1
    %60451 = torch.aten.slice.Tensor %60450, %int3_57769, %int0_57770, %int9223372036854775807_57771, %int1_57772 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60451, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60452 = torch_c.to_builtin_tensor %60360 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57773 = arith.constant 1 : index
    %dim_57774 = tensor.dim %60452, %c1_57773 : tensor<4x?x4x128xf16>
    %60453 = flow.tensor.bitcast %60452 : tensor<4x?x4x128xf16>{%dim_57774} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57774}
    %60454 = torch_c.from_builtin_tensor %60453 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60454, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60455 = torch.aten.mul.Tensor %60454, %60451 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60456 = torch_c.to_builtin_tensor %60455 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57775 = arith.constant 1 : index
    %dim_57776 = tensor.dim %60456, %c1_57775 : tensor<4x?x4x64xcomplex<f32>>
    %60457 = flow.tensor.bitcast %60456 : tensor<4x?x4x64xcomplex<f32>>{%dim_57776} -> tensor<4x?x4x128xf32>{%dim_57776}
    %60458 = torch_c.from_builtin_tensor %60457 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60458, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57777 = torch.constant.int 5
    %60459 = torch.prims.convert_element_type %60458, %int5_57777 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57778 = torch.constant.int 1
    %60460 = torch.aten.size.int %60194, %int1_57778 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57779 = torch.constant.int 0
    %60461 = torch.aten.add.int %int0_57779, %60460 : !torch.int, !torch.int -> !torch.int
    %int0_57780 = torch.constant.int 0
    %int0_57781 = torch.constant.int 0
    %int1_57782 = torch.constant.int 1
    %60462 = torch.aten.slice.Tensor %60426, %int0_57780, %int0_57781, %60461, %int1_57782 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60462, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57783 = torch.constant.int 1
    %int0_57784 = torch.constant.int 0
    %int9223372036854775807_57785 = torch.constant.int 9223372036854775807
    %int1_57786 = torch.constant.int 1
    %60463 = torch.aten.slice.Tensor %60462, %int1_57783, %int0_57784, %int9223372036854775807_57785, %int1_57786 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60463, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57787 = torch.constant.int 0
    %60464 = torch.aten.unsqueeze %60463, %int0_57787 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60464, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57788 = torch.constant.int 2
    %60465 = torch.aten.unsqueeze %60464, %int2_57788 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60465, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57789 = torch.constant.int 3
    %int0_57790 = torch.constant.int 0
    %int9223372036854775807_57791 = torch.constant.int 9223372036854775807
    %int1_57792 = torch.constant.int 1
    %60466 = torch.aten.slice.Tensor %60465, %int3_57789, %int0_57790, %int9223372036854775807_57791, %int1_57792 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60466, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60467 = torch_c.to_builtin_tensor %60362 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57793 = arith.constant 1 : index
    %dim_57794 = tensor.dim %60467, %c1_57793 : tensor<4x?x4x128xf16>
    %60468 = flow.tensor.bitcast %60467 : tensor<4x?x4x128xf16>{%dim_57794} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57794}
    %60469 = torch_c.from_builtin_tensor %60468 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60470 = torch.aten.mul.Tensor %60469, %60466 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60471 = torch_c.to_builtin_tensor %60470 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57795 = arith.constant 1 : index
    %dim_57796 = tensor.dim %60471, %c1_57795 : tensor<4x?x4x64xcomplex<f32>>
    %60472 = flow.tensor.bitcast %60471 : tensor<4x?x4x64xcomplex<f32>>{%dim_57796} -> tensor<4x?x4x128xf32>{%dim_57796}
    %60473 = torch_c.from_builtin_tensor %60472 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57797 = torch.constant.int 5
    %60474 = torch.prims.convert_element_type %60473, %int5_57797 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57798 = torch.constant.int 1
    %60475 = torch.aten.size.int %60200, %int1_57798 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57799 = torch.constant.int 0
    %60476 = torch.aten.add.int %int0_57799, %60475 : !torch.int, !torch.int -> !torch.int
    %int0_57800 = torch.constant.int 0
    %int0_57801 = torch.constant.int 0
    %int1_57802 = torch.constant.int 1
    %60477 = torch.aten.slice.Tensor %60429, %int0_57800, %int0_57801, %60476, %int1_57802 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60477, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57803 = torch.constant.int 1
    %int0_57804 = torch.constant.int 0
    %int9223372036854775807_57805 = torch.constant.int 9223372036854775807
    %int1_57806 = torch.constant.int 1
    %60478 = torch.aten.slice.Tensor %60477, %int1_57803, %int0_57804, %int9223372036854775807_57805, %int1_57806 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60478, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57807 = torch.constant.int 0
    %60479 = torch.aten.unsqueeze %60478, %int0_57807 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60479, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57808 = torch.constant.int 2
    %60480 = torch.aten.unsqueeze %60479, %int2_57808 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60480, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57809 = torch.constant.int 3
    %int0_57810 = torch.constant.int 0
    %int9223372036854775807_57811 = torch.constant.int 9223372036854775807
    %int1_57812 = torch.constant.int 1
    %60481 = torch.aten.slice.Tensor %60480, %int3_57809, %int0_57810, %int9223372036854775807_57811, %int1_57812 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60481, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60482 = torch_c.to_builtin_tensor %60364 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57813 = arith.constant 1 : index
    %dim_57814 = tensor.dim %60482, %c1_57813 : tensor<4x?x4x128xf16>
    %60483 = flow.tensor.bitcast %60482 : tensor<4x?x4x128xf16>{%dim_57814} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57814}
    %60484 = torch_c.from_builtin_tensor %60483 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60485 = torch.aten.mul.Tensor %60484, %60481 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60486 = torch_c.to_builtin_tensor %60485 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57815 = arith.constant 1 : index
    %dim_57816 = tensor.dim %60486, %c1_57815 : tensor<4x?x4x64xcomplex<f32>>
    %60487 = flow.tensor.bitcast %60486 : tensor<4x?x4x64xcomplex<f32>>{%dim_57816} -> tensor<4x?x4x128xf32>{%dim_57816}
    %60488 = torch_c.from_builtin_tensor %60487 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57817 = torch.constant.int 5
    %60489 = torch.prims.convert_element_type %60488, %int5_57817 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57818 = torch.constant.int 1
    %60490 = torch.aten.size.int %60206, %int1_57818 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57819 = torch.constant.int 0
    %60491 = torch.aten.add.int %int0_57819, %60490 : !torch.int, !torch.int -> !torch.int
    %int0_57820 = torch.constant.int 0
    %int0_57821 = torch.constant.int 0
    %int1_57822 = torch.constant.int 1
    %60492 = torch.aten.slice.Tensor %60432, %int0_57820, %int0_57821, %60491, %int1_57822 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60492, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57823 = torch.constant.int 1
    %int0_57824 = torch.constant.int 0
    %int9223372036854775807_57825 = torch.constant.int 9223372036854775807
    %int1_57826 = torch.constant.int 1
    %60493 = torch.aten.slice.Tensor %60492, %int1_57823, %int0_57824, %int9223372036854775807_57825, %int1_57826 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60493, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57827 = torch.constant.int 0
    %60494 = torch.aten.unsqueeze %60493, %int0_57827 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60494, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57828 = torch.constant.int 2
    %60495 = torch.aten.unsqueeze %60494, %int2_57828 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60495, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57829 = torch.constant.int 3
    %int0_57830 = torch.constant.int 0
    %int9223372036854775807_57831 = torch.constant.int 9223372036854775807
    %int1_57832 = torch.constant.int 1
    %60496 = torch.aten.slice.Tensor %60495, %int3_57829, %int0_57830, %int9223372036854775807_57831, %int1_57832 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60496, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60497 = torch_c.to_builtin_tensor %60366 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57833 = arith.constant 1 : index
    %dim_57834 = tensor.dim %60497, %c1_57833 : tensor<4x?x4x128xf16>
    %60498 = flow.tensor.bitcast %60497 : tensor<4x?x4x128xf16>{%dim_57834} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57834}
    %60499 = torch_c.from_builtin_tensor %60498 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60500 = torch.aten.mul.Tensor %60499, %60496 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60501 = torch_c.to_builtin_tensor %60500 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57835 = arith.constant 1 : index
    %dim_57836 = tensor.dim %60501, %c1_57835 : tensor<4x?x4x64xcomplex<f32>>
    %60502 = flow.tensor.bitcast %60501 : tensor<4x?x4x64xcomplex<f32>>{%dim_57836} -> tensor<4x?x4x128xf32>{%dim_57836}
    %60503 = torch_c.from_builtin_tensor %60502 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57837 = torch.constant.int 5
    %60504 = torch.prims.convert_element_type %60503, %int5_57837 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57838 = torch.constant.int 1
    %60505 = torch.aten.size.int %60212, %int1_57838 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57839 = torch.constant.int 0
    %60506 = torch.aten.add.int %int0_57839, %60505 : !torch.int, !torch.int -> !torch.int
    %int0_57840 = torch.constant.int 0
    %int0_57841 = torch.constant.int 0
    %int1_57842 = torch.constant.int 1
    %60507 = torch.aten.slice.Tensor %60435, %int0_57840, %int0_57841, %60506, %int1_57842 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60507, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57843 = torch.constant.int 1
    %int0_57844 = torch.constant.int 0
    %int9223372036854775807_57845 = torch.constant.int 9223372036854775807
    %int1_57846 = torch.constant.int 1
    %60508 = torch.aten.slice.Tensor %60507, %int1_57843, %int0_57844, %int9223372036854775807_57845, %int1_57846 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60508, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57847 = torch.constant.int 0
    %60509 = torch.aten.unsqueeze %60508, %int0_57847 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60509, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57848 = torch.constant.int 2
    %60510 = torch.aten.unsqueeze %60509, %int2_57848 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60510, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57849 = torch.constant.int 3
    %int0_57850 = torch.constant.int 0
    %int9223372036854775807_57851 = torch.constant.int 9223372036854775807
    %int1_57852 = torch.constant.int 1
    %60511 = torch.aten.slice.Tensor %60510, %int3_57849, %int0_57850, %int9223372036854775807_57851, %int1_57852 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60511, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60512 = torch_c.to_builtin_tensor %60368 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57853 = arith.constant 1 : index
    %dim_57854 = tensor.dim %60512, %c1_57853 : tensor<4x?x4x128xf16>
    %60513 = flow.tensor.bitcast %60512 : tensor<4x?x4x128xf16>{%dim_57854} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57854}
    %60514 = torch_c.from_builtin_tensor %60513 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60514, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60515 = torch.aten.mul.Tensor %60514, %60511 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60515, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60516 = torch_c.to_builtin_tensor %60515 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57855 = arith.constant 1 : index
    %dim_57856 = tensor.dim %60516, %c1_57855 : tensor<4x?x4x64xcomplex<f32>>
    %60517 = flow.tensor.bitcast %60516 : tensor<4x?x4x64xcomplex<f32>>{%dim_57856} -> tensor<4x?x4x128xf32>{%dim_57856}
    %60518 = torch_c.from_builtin_tensor %60517 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60518, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57857 = torch.constant.int 5
    %60519 = torch.prims.convert_element_type %60518, %int5_57857 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60519, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57858 = torch.constant.int 1
    %60520 = torch.aten.size.int %60218, %int1_57858 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57859 = torch.constant.int 0
    %60521 = torch.aten.add.int %int0_57859, %60520 : !torch.int, !torch.int -> !torch.int
    %int0_57860 = torch.constant.int 0
    %int0_57861 = torch.constant.int 0
    %int1_57862 = torch.constant.int 1
    %60522 = torch.aten.slice.Tensor %60438, %int0_57860, %int0_57861, %60521, %int1_57862 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60522, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57863 = torch.constant.int 1
    %int0_57864 = torch.constant.int 0
    %int9223372036854775807_57865 = torch.constant.int 9223372036854775807
    %int1_57866 = torch.constant.int 1
    %60523 = torch.aten.slice.Tensor %60522, %int1_57863, %int0_57864, %int9223372036854775807_57865, %int1_57866 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60523, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57867 = torch.constant.int 0
    %60524 = torch.aten.unsqueeze %60523, %int0_57867 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60524, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57868 = torch.constant.int 2
    %60525 = torch.aten.unsqueeze %60524, %int2_57868 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60525, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57869 = torch.constant.int 3
    %int0_57870 = torch.constant.int 0
    %int9223372036854775807_57871 = torch.constant.int 9223372036854775807
    %int1_57872 = torch.constant.int 1
    %60526 = torch.aten.slice.Tensor %60525, %int3_57869, %int0_57870, %int9223372036854775807_57871, %int1_57872 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60526, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60527 = torch_c.to_builtin_tensor %60370 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57873 = arith.constant 1 : index
    %dim_57874 = tensor.dim %60527, %c1_57873 : tensor<4x?x4x128xf16>
    %60528 = flow.tensor.bitcast %60527 : tensor<4x?x4x128xf16>{%dim_57874} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57874}
    %60529 = torch_c.from_builtin_tensor %60528 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60530 = torch.aten.mul.Tensor %60529, %60526 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60530, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60531 = torch_c.to_builtin_tensor %60530 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57875 = arith.constant 1 : index
    %dim_57876 = tensor.dim %60531, %c1_57875 : tensor<4x?x4x64xcomplex<f32>>
    %60532 = flow.tensor.bitcast %60531 : tensor<4x?x4x64xcomplex<f32>>{%dim_57876} -> tensor<4x?x4x128xf32>{%dim_57876}
    %60533 = torch_c.from_builtin_tensor %60532 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60533, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57877 = torch.constant.int 5
    %60534 = torch.prims.convert_element_type %60533, %int5_57877 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60534, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57878 = torch.constant.int 1
    %60535 = torch.aten.size.int %60224, %int1_57878 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57879 = torch.constant.int 0
    %60536 = torch.aten.add.int %int0_57879, %60535 : !torch.int, !torch.int -> !torch.int
    %int0_57880 = torch.constant.int 0
    %int0_57881 = torch.constant.int 0
    %int1_57882 = torch.constant.int 1
    %60537 = torch.aten.slice.Tensor %60441, %int0_57880, %int0_57881, %60536, %int1_57882 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60537, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57883 = torch.constant.int 1
    %int0_57884 = torch.constant.int 0
    %int9223372036854775807_57885 = torch.constant.int 9223372036854775807
    %int1_57886 = torch.constant.int 1
    %60538 = torch.aten.slice.Tensor %60537, %int1_57883, %int0_57884, %int9223372036854775807_57885, %int1_57886 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60538, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57887 = torch.constant.int 0
    %60539 = torch.aten.unsqueeze %60538, %int0_57887 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60539, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57888 = torch.constant.int 2
    %60540 = torch.aten.unsqueeze %60539, %int2_57888 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60540, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57889 = torch.constant.int 3
    %int0_57890 = torch.constant.int 0
    %int9223372036854775807_57891 = torch.constant.int 9223372036854775807
    %int1_57892 = torch.constant.int 1
    %60541 = torch.aten.slice.Tensor %60540, %int3_57889, %int0_57890, %int9223372036854775807_57891, %int1_57892 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60541, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60542 = torch_c.to_builtin_tensor %60372 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57893 = arith.constant 1 : index
    %dim_57894 = tensor.dim %60542, %c1_57893 : tensor<4x?x4x128xf16>
    %60543 = flow.tensor.bitcast %60542 : tensor<4x?x4x128xf16>{%dim_57894} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57894}
    %60544 = torch_c.from_builtin_tensor %60543 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60544, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60545 = torch.aten.mul.Tensor %60544, %60541 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60545, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60546 = torch_c.to_builtin_tensor %60545 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57895 = arith.constant 1 : index
    %dim_57896 = tensor.dim %60546, %c1_57895 : tensor<4x?x4x64xcomplex<f32>>
    %60547 = flow.tensor.bitcast %60546 : tensor<4x?x4x64xcomplex<f32>>{%dim_57896} -> tensor<4x?x4x128xf32>{%dim_57896}
    %60548 = torch_c.from_builtin_tensor %60547 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60548, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57897 = torch.constant.int 5
    %60549 = torch.prims.convert_element_type %60548, %int5_57897 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60549, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_57898 = torch.constant.int 1
    %60550 = torch.aten.size.int %60230, %int1_57898 : !torch.vtensor<[4,?,512],f16>, !torch.int -> !torch.int
    %int0_57899 = torch.constant.int 0
    %60551 = torch.aten.add.int %int0_57899, %60550 : !torch.int, !torch.int -> !torch.int
    %int0_57900 = torch.constant.int 0
    %int0_57901 = torch.constant.int 0
    %int1_57902 = torch.constant.int 1
    %60552 = torch.aten.slice.Tensor %60444, %int0_57900, %int0_57901, %60551, %int1_57902 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60552, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57903 = torch.constant.int 1
    %int0_57904 = torch.constant.int 0
    %int9223372036854775807_57905 = torch.constant.int 9223372036854775807
    %int1_57906 = torch.constant.int 1
    %60553 = torch.aten.slice.Tensor %60552, %int1_57903, %int0_57904, %int9223372036854775807_57905, %int1_57906 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60553, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57907 = torch.constant.int 0
    %60554 = torch.aten.unsqueeze %60553, %int0_57907 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60554, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57908 = torch.constant.int 2
    %60555 = torch.aten.unsqueeze %60554, %int2_57908 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60555, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57909 = torch.constant.int 3
    %int0_57910 = torch.constant.int 0
    %int9223372036854775807_57911 = torch.constant.int 9223372036854775807
    %int1_57912 = torch.constant.int 1
    %60556 = torch.aten.slice.Tensor %60555, %int3_57909, %int0_57910, %int9223372036854775807_57911, %int1_57912 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60556, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60557 = torch_c.to_builtin_tensor %60374 : !torch.vtensor<[4,?,4,128],f16> -> tensor<4x?x4x128xf16>
    %c1_57913 = arith.constant 1 : index
    %dim_57914 = tensor.dim %60557, %c1_57913 : tensor<4x?x4x128xf16>
    %60558 = flow.tensor.bitcast %60557 : tensor<4x?x4x128xf16>{%dim_57914} -> tensor<4x?x4x64xcomplex<f16>>{%dim_57914}
    %60559 = torch_c.from_builtin_tensor %60558 : tensor<4x?x4x64xcomplex<f16>> -> !torch.vtensor<[4,?,4,64],complex<f16>>
    torch.bind_symbolic_shape %60559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f16>>
    %60560 = torch.aten.mul.Tensor %60559, %60556 : !torch.vtensor<[4,?,4,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,4,64],complex<f32>>
    torch.bind_symbolic_shape %60560, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 64)> : !torch.vtensor<[4,?,4,64],complex<f32>>
    %60561 = torch_c.to_builtin_tensor %60560 : !torch.vtensor<[4,?,4,64],complex<f32>> -> tensor<4x?x4x64xcomplex<f32>>
    %c1_57915 = arith.constant 1 : index
    %dim_57916 = tensor.dim %60561, %c1_57915 : tensor<4x?x4x64xcomplex<f32>>
    %60562 = flow.tensor.bitcast %60561 : tensor<4x?x4x64xcomplex<f32>>{%dim_57916} -> tensor<4x?x4x128xf32>{%dim_57916}
    %60563 = torch_c.from_builtin_tensor %60562 : tensor<4x?x4x128xf32> -> !torch.vtensor<[4,?,4,128],f32>
    torch.bind_symbolic_shape %60563, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f32>
    %int5_57917 = torch.constant.int 5
    %60564 = torch.prims.convert_element_type %60563, %int5_57917 : !torch.vtensor<[4,?,4,128],f32>, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %60564, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int131072_57918 = torch.constant.int 131072
    %none_57919 = torch.constant.none
    %none_57920 = torch.constant.none
    %cpu_57921 = torch.constant.device "cpu"
    %false_57922 = torch.constant.bool false
    %60565 = torch.aten.arange %int131072_57918, %none_57919, %none_57920, %cpu_57921, %false_57922 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64>
    %int0_57923 = torch.constant.int 0
    %int128_57924 = torch.constant.int 128
    %int2_57925 = torch.constant.int 2
    %none_57926 = torch.constant.none
    %none_57927 = torch.constant.none
    %cpu_57928 = torch.constant.device "cpu"
    %false_57929 = torch.constant.bool false
    %60566 = torch.aten.arange.start_step %int0_57923, %int128_57924, %int2_57925, %none_57926, %none_57927, %cpu_57928, %false_57929 : !torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64>
    %int0_57930 = torch.constant.int 0
    %int0_57931 = torch.constant.int 0
    %int64_57932 = torch.constant.int 64
    %int1_57933 = torch.constant.int 1
    %60567 = torch.aten.slice.Tensor %60566, %int0_57930, %int0_57931, %int64_57932, %int1_57933 : !torch.vtensor<[64],si64>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[64],si64>
    %int6_57934 = torch.constant.int 6
    %60568 = torch.prims.convert_element_type %60567, %int6_57934 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32>
    %int128_57935 = torch.constant.int 128
    %60569 = torch.aten.div.Scalar %60568, %int128_57935 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32>
    %float5.000000e05_57936 = torch.constant.float 5.000000e+05
    %60570 = torch.aten.pow.Scalar %float5.000000e05_57936, %60569 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %60571 = torch.aten.reciprocal %60570 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32>
    %float1.000000e00_57937 = torch.constant.float 1.000000e+00
    %60572 = torch.aten.mul.Scalar %60571, %float1.000000e00_57937 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32>
    %int131072_57938 = torch.constant.int 131072
    %int1_57939 = torch.constant.int 1
    %60573 = torch.prim.ListConstruct %int131072_57938, %int1_57939 : (!torch.int, !torch.int) -> !torch.list<int>
    %60574 = torch.aten.view %60565, %60573 : !torch.vtensor<[131072],si64>, !torch.list<int> -> !torch.vtensor<[131072,1],si64>
    %60575 = torch.aten.mul.Tensor %60574, %60572 : !torch.vtensor<[131072,1],si64>, !torch.vtensor<[64],f32> -> !torch.vtensor<[131072,64],f32>
    %60576 = torch.aten.cos %60575 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %60577 = torch.aten.sin %60575 : !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],f32>
    %60578 = torch.aten.complex %60576, %60577 : !torch.vtensor<[131072,64],f32>, !torch.vtensor<[131072,64],f32> -> !torch.vtensor<[131072,64],complex<f32>>
    %60579 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60580 = flow.tensor.transfer %60579 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_0>
    %60581 = torch_c.from_builtin_tensor %60580 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60582 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60583 = flow.tensor.transfer %60582 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_1>
    %60584 = torch_c.from_builtin_tensor %60583 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60585 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60586 = flow.tensor.transfer %60585 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_2>
    %60587 = torch_c.from_builtin_tensor %60586 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60588 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60589 = flow.tensor.transfer %60588 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_3>
    %60590 = torch_c.from_builtin_tensor %60589 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60591 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60592 = flow.tensor.transfer %60591 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_4>
    %60593 = torch_c.from_builtin_tensor %60592 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60594 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60595 = flow.tensor.transfer %60594 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_5>
    %60596 = torch_c.from_builtin_tensor %60595 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60597 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60598 = flow.tensor.transfer %60597 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_6>
    %60599 = torch_c.from_builtin_tensor %60598 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %60600 = torch_c.to_builtin_tensor %60578 : !torch.vtensor<[131072,64],complex<f32>> -> tensor<131072x64xcomplex<f32>>
    %60601 = flow.tensor.transfer %60600 : tensor<131072x64xcomplex<f32>> to #hal.device.promise<@__device_7>
    %60602 = torch_c.from_builtin_tensor %60601 : tensor<131072x64xcomplex<f32>> -> !torch.vtensor<[131072,64],complex<f32>>
    %int1_57940 = torch.constant.int 1
    %60603 = torch.aten.size.int %60252, %int1_57940 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_57941 = torch.constant.int 0
    %60604 = torch.aten.add.int %int0_57941, %60603 : !torch.int, !torch.int -> !torch.int
    %int0_57942 = torch.constant.int 0
    %int0_57943 = torch.constant.int 0
    %int1_57944 = torch.constant.int 1
    %60605 = torch.aten.slice.Tensor %60581, %int0_57942, %int0_57943, %60604, %int1_57944 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60605, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57945 = torch.constant.int 1
    %int0_57946 = torch.constant.int 0
    %int9223372036854775807_57947 = torch.constant.int 9223372036854775807
    %int1_57948 = torch.constant.int 1
    %60606 = torch.aten.slice.Tensor %60605, %int1_57945, %int0_57946, %int9223372036854775807_57947, %int1_57948 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60606, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57949 = torch.constant.int 0
    %60607 = torch.aten.unsqueeze %60606, %int0_57949 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60607, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57950 = torch.constant.int 2
    %60608 = torch.aten.unsqueeze %60607, %int2_57950 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60608, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57951 = torch.constant.int 3
    %int0_57952 = torch.constant.int 0
    %int9223372036854775807_57953 = torch.constant.int 9223372036854775807
    %int1_57954 = torch.constant.int 1
    %60609 = torch.aten.slice.Tensor %60608, %int3_57951, %int0_57952, %int9223372036854775807_57953, %int1_57954 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60609, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60610 = torch_c.to_builtin_tensor %60376 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_57955 = arith.constant 1 : index
    %dim_57956 = tensor.dim %60610, %c1_57955 : tensor<4x?x1x128xf16>
    %60611 = flow.tensor.bitcast %60610 : tensor<4x?x1x128xf16>{%dim_57956} -> tensor<4x?x1x64xcomplex<f16>>{%dim_57956}
    %60612 = torch_c.from_builtin_tensor %60611 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60612, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60613 = torch.aten.mul.Tensor %60612, %60609 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60614 = torch_c.to_builtin_tensor %60613 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_57957 = arith.constant 1 : index
    %dim_57958 = tensor.dim %60614, %c1_57957 : tensor<4x?x1x64xcomplex<f32>>
    %60615 = flow.tensor.bitcast %60614 : tensor<4x?x1x64xcomplex<f32>>{%dim_57958} -> tensor<4x?x1x128xf32>{%dim_57958}
    %60616 = torch_c.from_builtin_tensor %60615 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60616, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_57959 = torch.constant.int 5
    %60617 = torch.prims.convert_element_type %60616, %int5_57959 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60617, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_57960 = torch.constant.int 1
    %60618 = torch.aten.size.int %60258, %int1_57960 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_57961 = torch.constant.int 0
    %60619 = torch.aten.add.int %int0_57961, %60618 : !torch.int, !torch.int -> !torch.int
    %int0_57962 = torch.constant.int 0
    %int0_57963 = torch.constant.int 0
    %int1_57964 = torch.constant.int 1
    %60620 = torch.aten.slice.Tensor %60584, %int0_57962, %int0_57963, %60619, %int1_57964 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60620, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57965 = torch.constant.int 1
    %int0_57966 = torch.constant.int 0
    %int9223372036854775807_57967 = torch.constant.int 9223372036854775807
    %int1_57968 = torch.constant.int 1
    %60621 = torch.aten.slice.Tensor %60620, %int1_57965, %int0_57966, %int9223372036854775807_57967, %int1_57968 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60621, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57969 = torch.constant.int 0
    %60622 = torch.aten.unsqueeze %60621, %int0_57969 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60622, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57970 = torch.constant.int 2
    %60623 = torch.aten.unsqueeze %60622, %int2_57970 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60623, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57971 = torch.constant.int 3
    %int0_57972 = torch.constant.int 0
    %int9223372036854775807_57973 = torch.constant.int 9223372036854775807
    %int1_57974 = torch.constant.int 1
    %60624 = torch.aten.slice.Tensor %60623, %int3_57971, %int0_57972, %int9223372036854775807_57973, %int1_57974 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60624, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60625 = torch_c.to_builtin_tensor %60378 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_57975 = arith.constant 1 : index
    %dim_57976 = tensor.dim %60625, %c1_57975 : tensor<4x?x1x128xf16>
    %60626 = flow.tensor.bitcast %60625 : tensor<4x?x1x128xf16>{%dim_57976} -> tensor<4x?x1x64xcomplex<f16>>{%dim_57976}
    %60627 = torch_c.from_builtin_tensor %60626 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60627, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60628 = torch.aten.mul.Tensor %60627, %60624 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60628, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60629 = torch_c.to_builtin_tensor %60628 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_57977 = arith.constant 1 : index
    %dim_57978 = tensor.dim %60629, %c1_57977 : tensor<4x?x1x64xcomplex<f32>>
    %60630 = flow.tensor.bitcast %60629 : tensor<4x?x1x64xcomplex<f32>>{%dim_57978} -> tensor<4x?x1x128xf32>{%dim_57978}
    %60631 = torch_c.from_builtin_tensor %60630 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_57979 = torch.constant.int 5
    %60632 = torch.prims.convert_element_type %60631, %int5_57979 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60632, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_57980 = torch.constant.int 1
    %60633 = torch.aten.size.int %60264, %int1_57980 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_57981 = torch.constant.int 0
    %60634 = torch.aten.add.int %int0_57981, %60633 : !torch.int, !torch.int -> !torch.int
    %int0_57982 = torch.constant.int 0
    %int0_57983 = torch.constant.int 0
    %int1_57984 = torch.constant.int 1
    %60635 = torch.aten.slice.Tensor %60587, %int0_57982, %int0_57983, %60634, %int1_57984 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60635, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_57985 = torch.constant.int 1
    %int0_57986 = torch.constant.int 0
    %int9223372036854775807_57987 = torch.constant.int 9223372036854775807
    %int1_57988 = torch.constant.int 1
    %60636 = torch.aten.slice.Tensor %60635, %int1_57985, %int0_57986, %int9223372036854775807_57987, %int1_57988 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60636, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_57989 = torch.constant.int 0
    %60637 = torch.aten.unsqueeze %60636, %int0_57989 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60637, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_57990 = torch.constant.int 2
    %60638 = torch.aten.unsqueeze %60637, %int2_57990 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60638, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_57991 = torch.constant.int 3
    %int0_57992 = torch.constant.int 0
    %int9223372036854775807_57993 = torch.constant.int 9223372036854775807
    %int1_57994 = torch.constant.int 1
    %60639 = torch.aten.slice.Tensor %60638, %int3_57991, %int0_57992, %int9223372036854775807_57993, %int1_57994 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60639, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60640 = torch_c.to_builtin_tensor %60380 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_57995 = arith.constant 1 : index
    %dim_57996 = tensor.dim %60640, %c1_57995 : tensor<4x?x1x128xf16>
    %60641 = flow.tensor.bitcast %60640 : tensor<4x?x1x128xf16>{%dim_57996} -> tensor<4x?x1x64xcomplex<f16>>{%dim_57996}
    %60642 = torch_c.from_builtin_tensor %60641 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60642, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60643 = torch.aten.mul.Tensor %60642, %60639 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60644 = torch_c.to_builtin_tensor %60643 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_57997 = arith.constant 1 : index
    %dim_57998 = tensor.dim %60644, %c1_57997 : tensor<4x?x1x64xcomplex<f32>>
    %60645 = flow.tensor.bitcast %60644 : tensor<4x?x1x64xcomplex<f32>>{%dim_57998} -> tensor<4x?x1x128xf32>{%dim_57998}
    %60646 = torch_c.from_builtin_tensor %60645 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_57999 = torch.constant.int 5
    %60647 = torch.prims.convert_element_type %60646, %int5_57999 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_58000 = torch.constant.int 1
    %60648 = torch.aten.size.int %60270, %int1_58000 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_58001 = torch.constant.int 0
    %60649 = torch.aten.add.int %int0_58001, %60648 : !torch.int, !torch.int -> !torch.int
    %int0_58002 = torch.constant.int 0
    %int0_58003 = torch.constant.int 0
    %int1_58004 = torch.constant.int 1
    %60650 = torch.aten.slice.Tensor %60590, %int0_58002, %int0_58003, %60649, %int1_58004 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60650, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_58005 = torch.constant.int 1
    %int0_58006 = torch.constant.int 0
    %int9223372036854775807_58007 = torch.constant.int 9223372036854775807
    %int1_58008 = torch.constant.int 1
    %60651 = torch.aten.slice.Tensor %60650, %int1_58005, %int0_58006, %int9223372036854775807_58007, %int1_58008 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60651, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_58009 = torch.constant.int 0
    %60652 = torch.aten.unsqueeze %60651, %int0_58009 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60652, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_58010 = torch.constant.int 2
    %60653 = torch.aten.unsqueeze %60652, %int2_58010 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60653, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_58011 = torch.constant.int 3
    %int0_58012 = torch.constant.int 0
    %int9223372036854775807_58013 = torch.constant.int 9223372036854775807
    %int1_58014 = torch.constant.int 1
    %60654 = torch.aten.slice.Tensor %60653, %int3_58011, %int0_58012, %int9223372036854775807_58013, %int1_58014 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60654, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60655 = torch_c.to_builtin_tensor %60382 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_58015 = arith.constant 1 : index
    %dim_58016 = tensor.dim %60655, %c1_58015 : tensor<4x?x1x128xf16>
    %60656 = flow.tensor.bitcast %60655 : tensor<4x?x1x128xf16>{%dim_58016} -> tensor<4x?x1x64xcomplex<f16>>{%dim_58016}
    %60657 = torch_c.from_builtin_tensor %60656 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60657, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60658 = torch.aten.mul.Tensor %60657, %60654 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60658, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60659 = torch_c.to_builtin_tensor %60658 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_58017 = arith.constant 1 : index
    %dim_58018 = tensor.dim %60659, %c1_58017 : tensor<4x?x1x64xcomplex<f32>>
    %60660 = flow.tensor.bitcast %60659 : tensor<4x?x1x64xcomplex<f32>>{%dim_58018} -> tensor<4x?x1x128xf32>{%dim_58018}
    %60661 = torch_c.from_builtin_tensor %60660 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60661, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_58019 = torch.constant.int 5
    %60662 = torch.prims.convert_element_type %60661, %int5_58019 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60662, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_58020 = torch.constant.int 1
    %60663 = torch.aten.size.int %60276, %int1_58020 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_58021 = torch.constant.int 0
    %60664 = torch.aten.add.int %int0_58021, %60663 : !torch.int, !torch.int -> !torch.int
    %int0_58022 = torch.constant.int 0
    %int0_58023 = torch.constant.int 0
    %int1_58024 = torch.constant.int 1
    %60665 = torch.aten.slice.Tensor %60593, %int0_58022, %int0_58023, %60664, %int1_58024 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60665, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_58025 = torch.constant.int 1
    %int0_58026 = torch.constant.int 0
    %int9223372036854775807_58027 = torch.constant.int 9223372036854775807
    %int1_58028 = torch.constant.int 1
    %60666 = torch.aten.slice.Tensor %60665, %int1_58025, %int0_58026, %int9223372036854775807_58027, %int1_58028 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60666, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_58029 = torch.constant.int 0
    %60667 = torch.aten.unsqueeze %60666, %int0_58029 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60667, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_58030 = torch.constant.int 2
    %60668 = torch.aten.unsqueeze %60667, %int2_58030 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60668, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_58031 = torch.constant.int 3
    %int0_58032 = torch.constant.int 0
    %int9223372036854775807_58033 = torch.constant.int 9223372036854775807
    %int1_58034 = torch.constant.int 1
    %60669 = torch.aten.slice.Tensor %60668, %int3_58031, %int0_58032, %int9223372036854775807_58033, %int1_58034 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60669, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60670 = torch_c.to_builtin_tensor %60384 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_58035 = arith.constant 1 : index
    %dim_58036 = tensor.dim %60670, %c1_58035 : tensor<4x?x1x128xf16>
    %60671 = flow.tensor.bitcast %60670 : tensor<4x?x1x128xf16>{%dim_58036} -> tensor<4x?x1x64xcomplex<f16>>{%dim_58036}
    %60672 = torch_c.from_builtin_tensor %60671 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60672, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60673 = torch.aten.mul.Tensor %60672, %60669 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60673, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60674 = torch_c.to_builtin_tensor %60673 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_58037 = arith.constant 1 : index
    %dim_58038 = tensor.dim %60674, %c1_58037 : tensor<4x?x1x64xcomplex<f32>>
    %60675 = flow.tensor.bitcast %60674 : tensor<4x?x1x64xcomplex<f32>>{%dim_58038} -> tensor<4x?x1x128xf32>{%dim_58038}
    %60676 = torch_c.from_builtin_tensor %60675 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60676, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_58039 = torch.constant.int 5
    %60677 = torch.prims.convert_element_type %60676, %int5_58039 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60677, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_58040 = torch.constant.int 1
    %60678 = torch.aten.size.int %60282, %int1_58040 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_58041 = torch.constant.int 0
    %60679 = torch.aten.add.int %int0_58041, %60678 : !torch.int, !torch.int -> !torch.int
    %int0_58042 = torch.constant.int 0
    %int0_58043 = torch.constant.int 0
    %int1_58044 = torch.constant.int 1
    %60680 = torch.aten.slice.Tensor %60596, %int0_58042, %int0_58043, %60679, %int1_58044 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60680, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_58045 = torch.constant.int 1
    %int0_58046 = torch.constant.int 0
    %int9223372036854775807_58047 = torch.constant.int 9223372036854775807
    %int1_58048 = torch.constant.int 1
    %60681 = torch.aten.slice.Tensor %60680, %int1_58045, %int0_58046, %int9223372036854775807_58047, %int1_58048 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60681, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_58049 = torch.constant.int 0
    %60682 = torch.aten.unsqueeze %60681, %int0_58049 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60682, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_58050 = torch.constant.int 2
    %60683 = torch.aten.unsqueeze %60682, %int2_58050 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60683, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_58051 = torch.constant.int 3
    %int0_58052 = torch.constant.int 0
    %int9223372036854775807_58053 = torch.constant.int 9223372036854775807
    %int1_58054 = torch.constant.int 1
    %60684 = torch.aten.slice.Tensor %60683, %int3_58051, %int0_58052, %int9223372036854775807_58053, %int1_58054 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60684, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60685 = torch_c.to_builtin_tensor %60386 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_58055 = arith.constant 1 : index
    %dim_58056 = tensor.dim %60685, %c1_58055 : tensor<4x?x1x128xf16>
    %60686 = flow.tensor.bitcast %60685 : tensor<4x?x1x128xf16>{%dim_58056} -> tensor<4x?x1x64xcomplex<f16>>{%dim_58056}
    %60687 = torch_c.from_builtin_tensor %60686 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60687, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60688 = torch.aten.mul.Tensor %60687, %60684 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60689 = torch_c.to_builtin_tensor %60688 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_58057 = arith.constant 1 : index
    %dim_58058 = tensor.dim %60689, %c1_58057 : tensor<4x?x1x64xcomplex<f32>>
    %60690 = flow.tensor.bitcast %60689 : tensor<4x?x1x64xcomplex<f32>>{%dim_58058} -> tensor<4x?x1x128xf32>{%dim_58058}
    %60691 = torch_c.from_builtin_tensor %60690 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60691, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_58059 = torch.constant.int 5
    %60692 = torch.prims.convert_element_type %60691, %int5_58059 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60692, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_58060 = torch.constant.int 1
    %60693 = torch.aten.size.int %60288, %int1_58060 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_58061 = torch.constant.int 0
    %60694 = torch.aten.add.int %int0_58061, %60693 : !torch.int, !torch.int -> !torch.int
    %int0_58062 = torch.constant.int 0
    %int0_58063 = torch.constant.int 0
    %int1_58064 = torch.constant.int 1
    %60695 = torch.aten.slice.Tensor %60599, %int0_58062, %int0_58063, %60694, %int1_58064 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60695, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_58065 = torch.constant.int 1
    %int0_58066 = torch.constant.int 0
    %int9223372036854775807_58067 = torch.constant.int 9223372036854775807
    %int1_58068 = torch.constant.int 1
    %60696 = torch.aten.slice.Tensor %60695, %int1_58065, %int0_58066, %int9223372036854775807_58067, %int1_58068 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60696, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_58069 = torch.constant.int 0
    %60697 = torch.aten.unsqueeze %60696, %int0_58069 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60697, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_58070 = torch.constant.int 2
    %60698 = torch.aten.unsqueeze %60697, %int2_58070 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60698, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_58071 = torch.constant.int 3
    %int0_58072 = torch.constant.int 0
    %int9223372036854775807_58073 = torch.constant.int 9223372036854775807
    %int1_58074 = torch.constant.int 1
    %60699 = torch.aten.slice.Tensor %60698, %int3_58071, %int0_58072, %int9223372036854775807_58073, %int1_58074 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60699, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60700 = torch_c.to_builtin_tensor %60388 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_58075 = arith.constant 1 : index
    %dim_58076 = tensor.dim %60700, %c1_58075 : tensor<4x?x1x128xf16>
    %60701 = flow.tensor.bitcast %60700 : tensor<4x?x1x128xf16>{%dim_58076} -> tensor<4x?x1x64xcomplex<f16>>{%dim_58076}
    %60702 = torch_c.from_builtin_tensor %60701 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60703 = torch.aten.mul.Tensor %60702, %60699 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60703, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60704 = torch_c.to_builtin_tensor %60703 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_58077 = arith.constant 1 : index
    %dim_58078 = tensor.dim %60704, %c1_58077 : tensor<4x?x1x64xcomplex<f32>>
    %60705 = flow.tensor.bitcast %60704 : tensor<4x?x1x64xcomplex<f32>>{%dim_58078} -> tensor<4x?x1x128xf32>{%dim_58078}
    %60706 = torch_c.from_builtin_tensor %60705 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60706, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_58079 = torch.constant.int 5
    %60707 = torch.prims.convert_element_type %60706, %int5_58079 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60707, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int1_58080 = torch.constant.int 1
    %60708 = torch.aten.size.int %60294, %int1_58080 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int0_58081 = torch.constant.int 0
    %60709 = torch.aten.add.int %int0_58081, %60708 : !torch.int, !torch.int -> !torch.int
    %int0_58082 = torch.constant.int 0
    %int0_58083 = torch.constant.int 0
    %int1_58084 = torch.constant.int 1
    %60710 = torch.aten.slice.Tensor %60602, %int0_58082, %int0_58083, %60709, %int1_58084 : !torch.vtensor<[131072,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60710, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int1_58085 = torch.constant.int 1
    %int0_58086 = torch.constant.int 0
    %int9223372036854775807_58087 = torch.constant.int 9223372036854775807
    %int1_58088 = torch.constant.int 1
    %60711 = torch.aten.slice.Tensor %60710, %int1_58085, %int0_58086, %int9223372036854775807_58087, %int1_58088 : !torch.vtensor<[?,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,64],complex<f32>>
    torch.bind_symbolic_shape %60711, [%2336], affine_map<()[s0] -> (s0 * 16, 64)> : !torch.vtensor<[?,64],complex<f32>>
    %int0_58089 = torch.constant.int 0
    %60712 = torch.aten.unsqueeze %60711, %int0_58089 : !torch.vtensor<[?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,64],complex<f32>>
    torch.bind_symbolic_shape %60712, [%2336], affine_map<()[s0] -> (1, s0 * 16, 64)> : !torch.vtensor<[1,?,64],complex<f32>>
    %int2_58090 = torch.constant.int 2
    %60713 = torch.aten.unsqueeze %60712, %int2_58090 : !torch.vtensor<[1,?,64],complex<f32>>, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60713, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %int3_58091 = torch.constant.int 3
    %int0_58092 = torch.constant.int 0
    %int9223372036854775807_58093 = torch.constant.int 9223372036854775807
    %int1_58094 = torch.constant.int 1
    %60714 = torch.aten.slice.Tensor %60713, %int3_58091, %int0_58092, %int9223372036854775807_58093, %int1_58094 : !torch.vtensor<[1,?,1,64],complex<f32>>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60714, [%2336], affine_map<()[s0] -> (1, s0 * 16, 1, 64)> : !torch.vtensor<[1,?,1,64],complex<f32>>
    %60715 = torch_c.to_builtin_tensor %60390 : !torch.vtensor<[4,?,1,128],f16> -> tensor<4x?x1x128xf16>
    %c1_58095 = arith.constant 1 : index
    %dim_58096 = tensor.dim %60715, %c1_58095 : tensor<4x?x1x128xf16>
    %60716 = flow.tensor.bitcast %60715 : tensor<4x?x1x128xf16>{%dim_58096} -> tensor<4x?x1x64xcomplex<f16>>{%dim_58096}
    %60717 = torch_c.from_builtin_tensor %60716 : tensor<4x?x1x64xcomplex<f16>> -> !torch.vtensor<[4,?,1,64],complex<f16>>
    torch.bind_symbolic_shape %60717, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f16>>
    %60718 = torch.aten.mul.Tensor %60717, %60714 : !torch.vtensor<[4,?,1,64],complex<f16>>, !torch.vtensor<[1,?,1,64],complex<f32>> -> !torch.vtensor<[4,?,1,64],complex<f32>>
    torch.bind_symbolic_shape %60718, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 64)> : !torch.vtensor<[4,?,1,64],complex<f32>>
    %60719 = torch_c.to_builtin_tensor %60718 : !torch.vtensor<[4,?,1,64],complex<f32>> -> tensor<4x?x1x64xcomplex<f32>>
    %c1_58097 = arith.constant 1 : index
    %dim_58098 = tensor.dim %60719, %c1_58097 : tensor<4x?x1x64xcomplex<f32>>
    %60720 = flow.tensor.bitcast %60719 : tensor<4x?x1x64xcomplex<f32>>{%dim_58098} -> tensor<4x?x1x128xf32>{%dim_58098}
    %60721 = torch_c.from_builtin_tensor %60720 : tensor<4x?x1x128xf32> -> !torch.vtensor<[4,?,1,128],f32>
    torch.bind_symbolic_shape %60721, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f32>
    %int5_58099 = torch.constant.int 5
    %60722 = torch.prims.convert_element_type %60721, %int5_58099 : !torch.vtensor<[4,?,1,128],f32>, !torch.int -> !torch.vtensor<[4,?,1,128],f16>
    torch.bind_symbolic_shape %60722, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 128)> : !torch.vtensor<[4,?,1,128],f16>
    %int64_58100 = torch.constant.int 64
    %60723 = torch.aten.mul.Scalar %2364, %int64_58100 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60723, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58101 = torch.constant.int 64
    %60724 = torch.aten.mul.Scalar %2367, %int64_58101 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60724, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58102 = torch.constant.int 64
    %60725 = torch.aten.mul.Scalar %2370, %int64_58102 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60725, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58103 = torch.constant.int 64
    %60726 = torch.aten.mul.Scalar %2373, %int64_58103 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60726, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58104 = torch.constant.int 64
    %60727 = torch.aten.mul.Scalar %2376, %int64_58104 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60727, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58105 = torch.constant.int 64
    %60728 = torch.aten.mul.Scalar %2379, %int64_58105 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60728, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58106 = torch.constant.int 64
    %60729 = torch.aten.mul.Scalar %2382, %int64_58106 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60729, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int64_58107 = torch.constant.int 64
    %60730 = torch.aten.mul.Scalar %2385, %int64_58107 : !torch.vtensor<[4,?],si64>, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60730, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62 = torch.constant.int 62
    %int1_58108 = torch.constant.int 1
    %60731 = torch.aten.add.Scalar %60723, %int62, %int1_58108 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60731, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58109 = torch.constant.int 62
    %int1_58110 = torch.constant.int 1
    %60732 = torch.aten.add.Scalar %60724, %int62_58109, %int1_58110 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60732, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58111 = torch.constant.int 62
    %int1_58112 = torch.constant.int 1
    %60733 = torch.aten.add.Scalar %60725, %int62_58111, %int1_58112 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60733, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58113 = torch.constant.int 62
    %int1_58114 = torch.constant.int 1
    %60734 = torch.aten.add.Scalar %60726, %int62_58113, %int1_58114 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60734, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58115 = torch.constant.int 62
    %int1_58116 = torch.constant.int 1
    %60735 = torch.aten.add.Scalar %60727, %int62_58115, %int1_58116 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60735, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58117 = torch.constant.int 62
    %int1_58118 = torch.constant.int 1
    %60736 = torch.aten.add.Scalar %60728, %int62_58117, %int1_58118 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60736, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58119 = torch.constant.int 62
    %int1_58120 = torch.constant.int 1
    %60737 = torch.aten.add.Scalar %60729, %int62_58119, %int1_58120 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60737, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int62_58121 = torch.constant.int 62
    %int1_58122 = torch.constant.int 1
    %60738 = torch.aten.add.Scalar %60730, %int62_58121, %int1_58122 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60738, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_58123 = torch.constant.int 4
    %int16_58124 = torch.constant.int 16
    %int1_58125 = torch.constant.int 1
    %int128_58126 = torch.constant.int 128
    %60739 = torch.prim.ListConstruct %int4_58123, %3095, %int16_58124, %int1_58125, %int128_58126 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60740 = torch.aten.view %60617, %60739 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60740, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58127 = torch.constant.int 4
    %int16_58128 = torch.constant.int 16
    %int1_58129 = torch.constant.int 1
    %int128_58130 = torch.constant.int 128
    %60741 = torch.prim.ListConstruct %int4_58127, %3095, %int16_58128, %int1_58129, %int128_58130 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60742 = torch.aten.view %60632, %60741 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60742, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58131 = torch.constant.int 4
    %int16_58132 = torch.constant.int 16
    %int1_58133 = torch.constant.int 1
    %int128_58134 = torch.constant.int 128
    %60743 = torch.prim.ListConstruct %int4_58131, %3095, %int16_58132, %int1_58133, %int128_58134 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60744 = torch.aten.view %60647, %60743 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60744, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58135 = torch.constant.int 4
    %int16_58136 = torch.constant.int 16
    %int1_58137 = torch.constant.int 1
    %int128_58138 = torch.constant.int 128
    %60745 = torch.prim.ListConstruct %int4_58135, %3095, %int16_58136, %int1_58137, %int128_58138 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60746 = torch.aten.view %60662, %60745 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60746, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58139 = torch.constant.int 4
    %int16_58140 = torch.constant.int 16
    %int1_58141 = torch.constant.int 1
    %int128_58142 = torch.constant.int 128
    %60747 = torch.prim.ListConstruct %int4_58139, %3095, %int16_58140, %int1_58141, %int128_58142 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60748 = torch.aten.view %60677, %60747 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60748, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58143 = torch.constant.int 4
    %int16_58144 = torch.constant.int 16
    %int1_58145 = torch.constant.int 1
    %int128_58146 = torch.constant.int 128
    %60749 = torch.prim.ListConstruct %int4_58143, %3095, %int16_58144, %int1_58145, %int128_58146 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60750 = torch.aten.view %60692, %60749 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60750, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58147 = torch.constant.int 4
    %int16_58148 = torch.constant.int 16
    %int1_58149 = torch.constant.int 1
    %int128_58150 = torch.constant.int 128
    %60751 = torch.prim.ListConstruct %int4_58147, %3095, %int16_58148, %int1_58149, %int128_58150 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60752 = torch.aten.view %60707, %60751 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60752, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58151 = torch.constant.int 4
    %int16_58152 = torch.constant.int 16
    %int1_58153 = torch.constant.int 1
    %int128_58154 = torch.constant.int 128
    %60753 = torch.prim.ListConstruct %int4_58151, %3095, %int16_58152, %int1_58153, %int128_58154 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60754 = torch.aten.view %60722, %60753 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60754, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58155 = torch.constant.int 4
    %60755 = torch.aten.mul.int %int4_58155, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58156 = torch.constant.int 16
    %int1_58157 = torch.constant.int 1
    %int128_58158 = torch.constant.int 128
    %60756 = torch.prim.ListConstruct %60755, %int16_58156, %int1_58157, %int128_58158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60757 = torch.aten.view %60740, %60756 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60757, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58159 = torch.constant.int 4
    %60758 = torch.aten.mul.int %int4_58159, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58160 = torch.constant.int 16
    %int1_58161 = torch.constant.int 1
    %int128_58162 = torch.constant.int 128
    %60759 = torch.prim.ListConstruct %60758, %int16_58160, %int1_58161, %int128_58162 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60760 = torch.aten.view %60742, %60759 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60760, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58163 = torch.constant.int 4
    %60761 = torch.aten.mul.int %int4_58163, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58164 = torch.constant.int 16
    %int1_58165 = torch.constant.int 1
    %int128_58166 = torch.constant.int 128
    %60762 = torch.prim.ListConstruct %60761, %int16_58164, %int1_58165, %int128_58166 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60763 = torch.aten.view %60744, %60762 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60763, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58167 = torch.constant.int 4
    %60764 = torch.aten.mul.int %int4_58167, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58168 = torch.constant.int 16
    %int1_58169 = torch.constant.int 1
    %int128_58170 = torch.constant.int 128
    %60765 = torch.prim.ListConstruct %60764, %int16_58168, %int1_58169, %int128_58170 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60766 = torch.aten.view %60746, %60765 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60766, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58171 = torch.constant.int 4
    %60767 = torch.aten.mul.int %int4_58171, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58172 = torch.constant.int 16
    %int1_58173 = torch.constant.int 1
    %int128_58174 = torch.constant.int 128
    %60768 = torch.prim.ListConstruct %60767, %int16_58172, %int1_58173, %int128_58174 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60769 = torch.aten.view %60748, %60768 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60769, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58175 = torch.constant.int 4
    %60770 = torch.aten.mul.int %int4_58175, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58176 = torch.constant.int 16
    %int1_58177 = torch.constant.int 1
    %int128_58178 = torch.constant.int 128
    %60771 = torch.prim.ListConstruct %60770, %int16_58176, %int1_58177, %int128_58178 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60772 = torch.aten.view %60750, %60771 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60772, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58179 = torch.constant.int 4
    %60773 = torch.aten.mul.int %int4_58179, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58180 = torch.constant.int 16
    %int1_58181 = torch.constant.int 1
    %int128_58182 = torch.constant.int 128
    %60774 = torch.prim.ListConstruct %60773, %int16_58180, %int1_58181, %int128_58182 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60775 = torch.aten.view %60752, %60774 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60775, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58183 = torch.constant.int 4
    %60776 = torch.aten.mul.int %int4_58183, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58184 = torch.constant.int 16
    %int1_58185 = torch.constant.int 1
    %int128_58186 = torch.constant.int 128
    %60777 = torch.prim.ListConstruct %60776, %int16_58184, %int1_58185, %int128_58186 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60778 = torch.aten.view %60754, %60777 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60778, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58187 = torch.constant.int 4
    %60779 = torch.aten.mul.int %int4_58187, %3095 : !torch.int, !torch.int -> !torch.int
    %60780 = torch.prim.ListConstruct %60779 : (!torch.int) -> !torch.list<int>
    %60781 = torch.aten.view %60731, %60780 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60781, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58188 = torch.constant.int 4
    %60782 = torch.aten.mul.int %int4_58188, %3095 : !torch.int, !torch.int -> !torch.int
    %60783 = torch.prim.ListConstruct %60782 : (!torch.int) -> !torch.list<int>
    %60784 = torch.aten.view %60732, %60783 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60784, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58189 = torch.constant.int 4
    %60785 = torch.aten.mul.int %int4_58189, %3095 : !torch.int, !torch.int -> !torch.int
    %60786 = torch.prim.ListConstruct %60785 : (!torch.int) -> !torch.list<int>
    %60787 = torch.aten.view %60733, %60786 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60787, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58190 = torch.constant.int 4
    %60788 = torch.aten.mul.int %int4_58190, %3095 : !torch.int, !torch.int -> !torch.int
    %60789 = torch.prim.ListConstruct %60788 : (!torch.int) -> !torch.list<int>
    %60790 = torch.aten.view %60734, %60789 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60790, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58191 = torch.constant.int 4
    %60791 = torch.aten.mul.int %int4_58191, %3095 : !torch.int, !torch.int -> !torch.int
    %60792 = torch.prim.ListConstruct %60791 : (!torch.int) -> !torch.list<int>
    %60793 = torch.aten.view %60735, %60792 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60793, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58192 = torch.constant.int 4
    %60794 = torch.aten.mul.int %int4_58192, %3095 : !torch.int, !torch.int -> !torch.int
    %60795 = torch.prim.ListConstruct %60794 : (!torch.int) -> !torch.list<int>
    %60796 = torch.aten.view %60736, %60795 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60796, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58193 = torch.constant.int 4
    %60797 = torch.aten.mul.int %int4_58193, %3095 : !torch.int, !torch.int -> !torch.int
    %60798 = torch.prim.ListConstruct %60797 : (!torch.int) -> !torch.list<int>
    %60799 = torch.aten.view %60737, %60798 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60799, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58194 = torch.constant.int 4
    %60800 = torch.aten.mul.int %int4_58194, %3095 : !torch.int, !torch.int -> !torch.int
    %60801 = torch.prim.ListConstruct %60800 : (!torch.int) -> !torch.list<int>
    %60802 = torch.aten.view %60738, %60801 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60802, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58195 = torch.constant.int 4
    %int16_58196 = torch.constant.int 16
    %int1_58197 = torch.constant.int 1
    %int128_58198 = torch.constant.int 128
    %60803 = torch.prim.ListConstruct %int4_58195, %3095, %int16_58196, %int1_58197, %int128_58198 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60804 = torch.aten.view %60392, %60803 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60804, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58199 = torch.constant.int 4
    %int16_58200 = torch.constant.int 16
    %int1_58201 = torch.constant.int 1
    %int128_58202 = torch.constant.int 128
    %60805 = torch.prim.ListConstruct %int4_58199, %3095, %int16_58200, %int1_58201, %int128_58202 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60806 = torch.aten.view %60394, %60805 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60806, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58203 = torch.constant.int 4
    %int16_58204 = torch.constant.int 16
    %int1_58205 = torch.constant.int 1
    %int128_58206 = torch.constant.int 128
    %60807 = torch.prim.ListConstruct %int4_58203, %3095, %int16_58204, %int1_58205, %int128_58206 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60808 = torch.aten.view %60396, %60807 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60808, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58207 = torch.constant.int 4
    %int16_58208 = torch.constant.int 16
    %int1_58209 = torch.constant.int 1
    %int128_58210 = torch.constant.int 128
    %60809 = torch.prim.ListConstruct %int4_58207, %3095, %int16_58208, %int1_58209, %int128_58210 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60810 = torch.aten.view %60398, %60809 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60810, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58211 = torch.constant.int 4
    %int16_58212 = torch.constant.int 16
    %int1_58213 = torch.constant.int 1
    %int128_58214 = torch.constant.int 128
    %60811 = torch.prim.ListConstruct %int4_58211, %3095, %int16_58212, %int1_58213, %int128_58214 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60812 = torch.aten.view %60400, %60811 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60812, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58215 = torch.constant.int 4
    %int16_58216 = torch.constant.int 16
    %int1_58217 = torch.constant.int 1
    %int128_58218 = torch.constant.int 128
    %60813 = torch.prim.ListConstruct %int4_58215, %3095, %int16_58216, %int1_58217, %int128_58218 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60814 = torch.aten.view %60402, %60813 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60814, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58219 = torch.constant.int 4
    %int16_58220 = torch.constant.int 16
    %int1_58221 = torch.constant.int 1
    %int128_58222 = torch.constant.int 128
    %60815 = torch.prim.ListConstruct %int4_58219, %3095, %int16_58220, %int1_58221, %int128_58222 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60816 = torch.aten.view %60404, %60815 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60816, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58223 = torch.constant.int 4
    %int16_58224 = torch.constant.int 16
    %int1_58225 = torch.constant.int 1
    %int128_58226 = torch.constant.int 128
    %60817 = torch.prim.ListConstruct %int4_58223, %3095, %int16_58224, %int1_58225, %int128_58226 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60818 = torch.aten.view %60406, %60817 : !torch.vtensor<[4,?,1,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,16,1,128],f16>
    torch.bind_symbolic_shape %60818, [%2336], affine_map<()[s0] -> (4, s0, 16, 1, 128)> : !torch.vtensor<[4,?,16,1,128],f16>
    %int4_58227 = torch.constant.int 4
    %60819 = torch.aten.mul.int %int4_58227, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58228 = torch.constant.int 16
    %int1_58229 = torch.constant.int 1
    %int128_58230 = torch.constant.int 128
    %60820 = torch.prim.ListConstruct %60819, %int16_58228, %int1_58229, %int128_58230 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60821 = torch.aten.view %60804, %60820 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60821, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58231 = torch.constant.int 4
    %60822 = torch.aten.mul.int %int4_58231, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58232 = torch.constant.int 16
    %int1_58233 = torch.constant.int 1
    %int128_58234 = torch.constant.int 128
    %60823 = torch.prim.ListConstruct %60822, %int16_58232, %int1_58233, %int128_58234 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60824 = torch.aten.view %60806, %60823 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60824, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58235 = torch.constant.int 4
    %60825 = torch.aten.mul.int %int4_58235, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58236 = torch.constant.int 16
    %int1_58237 = torch.constant.int 1
    %int128_58238 = torch.constant.int 128
    %60826 = torch.prim.ListConstruct %60825, %int16_58236, %int1_58237, %int128_58238 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60827 = torch.aten.view %60808, %60826 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60827, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58239 = torch.constant.int 4
    %60828 = torch.aten.mul.int %int4_58239, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58240 = torch.constant.int 16
    %int1_58241 = torch.constant.int 1
    %int128_58242 = torch.constant.int 128
    %60829 = torch.prim.ListConstruct %60828, %int16_58240, %int1_58241, %int128_58242 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60830 = torch.aten.view %60810, %60829 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60830, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58243 = torch.constant.int 4
    %60831 = torch.aten.mul.int %int4_58243, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58244 = torch.constant.int 16
    %int1_58245 = torch.constant.int 1
    %int128_58246 = torch.constant.int 128
    %60832 = torch.prim.ListConstruct %60831, %int16_58244, %int1_58245, %int128_58246 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60833 = torch.aten.view %60812, %60832 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60833, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58247 = torch.constant.int 4
    %60834 = torch.aten.mul.int %int4_58247, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58248 = torch.constant.int 16
    %int1_58249 = torch.constant.int 1
    %int128_58250 = torch.constant.int 128
    %60835 = torch.prim.ListConstruct %60834, %int16_58248, %int1_58249, %int128_58250 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60836 = torch.aten.view %60814, %60835 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60836, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58251 = torch.constant.int 4
    %60837 = torch.aten.mul.int %int4_58251, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58252 = torch.constant.int 16
    %int1_58253 = torch.constant.int 1
    %int128_58254 = torch.constant.int 128
    %60838 = torch.prim.ListConstruct %60837, %int16_58252, %int1_58253, %int128_58254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60839 = torch.aten.view %60816, %60838 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60839, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int4_58255 = torch.constant.int 4
    %60840 = torch.aten.mul.int %int4_58255, %3095 : !torch.int, !torch.int -> !torch.int
    %int16_58256 = torch.constant.int 16
    %int1_58257 = torch.constant.int 1
    %int128_58258 = torch.constant.int 128
    %60841 = torch.prim.ListConstruct %60840, %int16_58256, %int1_58257, %int128_58258 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60842 = torch.aten.view %60818, %60841 : !torch.vtensor<[4,?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60842, [%2336], affine_map<()[s0] -> (s0 * 4, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int1_58259 = torch.constant.int 1
    %int1_58260 = torch.constant.int 1
    %60843 = torch.aten.add.Scalar %60731, %int1_58259, %int1_58260 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60843, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58261 = torch.constant.int 1
    %int1_58262 = torch.constant.int 1
    %60844 = torch.aten.add.Scalar %60732, %int1_58261, %int1_58262 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60844, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58263 = torch.constant.int 1
    %int1_58264 = torch.constant.int 1
    %60845 = torch.aten.add.Scalar %60733, %int1_58263, %int1_58264 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60845, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58265 = torch.constant.int 1
    %int1_58266 = torch.constant.int 1
    %60846 = torch.aten.add.Scalar %60734, %int1_58265, %int1_58266 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60846, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58267 = torch.constant.int 1
    %int1_58268 = torch.constant.int 1
    %60847 = torch.aten.add.Scalar %60735, %int1_58267, %int1_58268 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60847, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58269 = torch.constant.int 1
    %int1_58270 = torch.constant.int 1
    %60848 = torch.aten.add.Scalar %60736, %int1_58269, %int1_58270 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60848, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58271 = torch.constant.int 1
    %int1_58272 = torch.constant.int 1
    %60849 = torch.aten.add.Scalar %60737, %int1_58271, %int1_58272 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60849, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int1_58273 = torch.constant.int 1
    %int1_58274 = torch.constant.int 1
    %60850 = torch.aten.add.Scalar %60738, %int1_58273, %int1_58274 : !torch.vtensor<[4,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[4,?],si64>
    torch.bind_symbolic_shape %60850, [%2336], affine_map<()[s0] -> (4, s0)> : !torch.vtensor<[4,?],si64>
    %int4_58275 = torch.constant.int 4
    %60851 = torch.aten.mul.int %int4_58275, %3095 : !torch.int, !torch.int -> !torch.int
    %60852 = torch.prim.ListConstruct %60851 : (!torch.int) -> !torch.list<int>
    %60853 = torch.aten.view %60843, %60852 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60853, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58276 = torch.constant.int 4
    %60854 = torch.aten.mul.int %int4_58276, %3095 : !torch.int, !torch.int -> !torch.int
    %60855 = torch.prim.ListConstruct %60854 : (!torch.int) -> !torch.list<int>
    %60856 = torch.aten.view %60844, %60855 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60856, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58277 = torch.constant.int 4
    %60857 = torch.aten.mul.int %int4_58277, %3095 : !torch.int, !torch.int -> !torch.int
    %60858 = torch.prim.ListConstruct %60857 : (!torch.int) -> !torch.list<int>
    %60859 = torch.aten.view %60845, %60858 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60859, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58278 = torch.constant.int 4
    %60860 = torch.aten.mul.int %int4_58278, %3095 : !torch.int, !torch.int -> !torch.int
    %60861 = torch.prim.ListConstruct %60860 : (!torch.int) -> !torch.list<int>
    %60862 = torch.aten.view %60846, %60861 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60862, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58279 = torch.constant.int 4
    %60863 = torch.aten.mul.int %int4_58279, %3095 : !torch.int, !torch.int -> !torch.int
    %60864 = torch.prim.ListConstruct %60863 : (!torch.int) -> !torch.list<int>
    %60865 = torch.aten.view %60847, %60864 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60865, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58280 = torch.constant.int 4
    %60866 = torch.aten.mul.int %int4_58280, %3095 : !torch.int, !torch.int -> !torch.int
    %60867 = torch.prim.ListConstruct %60866 : (!torch.int) -> !torch.list<int>
    %60868 = torch.aten.view %60848, %60867 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60868, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58281 = torch.constant.int 4
    %60869 = torch.aten.mul.int %int4_58281, %3095 : !torch.int, !torch.int -> !torch.int
    %60870 = torch.prim.ListConstruct %60869 : (!torch.int) -> !torch.list<int>
    %60871 = torch.aten.view %60849, %60870 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60871, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %int4_58282 = torch.constant.int 4
    %60872 = torch.aten.mul.int %int4_58282, %3095 : !torch.int, !torch.int -> !torch.int
    %60873 = torch.prim.ListConstruct %60872 : (!torch.int) -> !torch.list<int>
    %60874 = torch.aten.view %60850, %60873 : !torch.vtensor<[4,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60874, [%2336], affine_map<()[s0] -> (s0 * 4)> : !torch.vtensor<[?],si64>
    %60875 = torch.prim.ListConstruct %60781, %60853 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58283 = torch.constant.int 0
    %60876 = torch.aten.cat %60875, %int0_58283 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60876, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60877 = torch.prim.ListConstruct %60784, %60856 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58284 = torch.constant.int 0
    %60878 = torch.aten.cat %60877, %int0_58284 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60878, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60879 = torch.prim.ListConstruct %60787, %60859 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58285 = torch.constant.int 0
    %60880 = torch.aten.cat %60879, %int0_58285 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60880, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60881 = torch.prim.ListConstruct %60790, %60862 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58286 = torch.constant.int 0
    %60882 = torch.aten.cat %60881, %int0_58286 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60882, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60883 = torch.prim.ListConstruct %60793, %60865 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58287 = torch.constant.int 0
    %60884 = torch.aten.cat %60883, %int0_58287 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60884, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60885 = torch.prim.ListConstruct %60796, %60868 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58288 = torch.constant.int 0
    %60886 = torch.aten.cat %60885, %int0_58288 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60886, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60887 = torch.prim.ListConstruct %60799, %60871 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58289 = torch.constant.int 0
    %60888 = torch.aten.cat %60887, %int0_58289 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60888, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60889 = torch.prim.ListConstruct %60802, %60874 : (!torch.vtensor<[?],si64>, !torch.vtensor<[?],si64>) -> !torch.list<vtensor>
    %int0_58290 = torch.constant.int 0
    %60890 = torch.aten.cat %60889, %int0_58290 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?],si64>
    torch.bind_symbolic_shape %60890, [%2336], affine_map<()[s0] -> (s0 * 8)> : !torch.vtensor<[?],si64>
    %60891 = torch.prim.ListConstruct %60757, %60821 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58291 = torch.constant.int 0
    %60892 = torch.aten.cat %60891, %int0_58291 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60892, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60893 = torch.prim.ListConstruct %60760, %60824 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58292 = torch.constant.int 0
    %60894 = torch.aten.cat %60893, %int0_58292 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60894, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60895 = torch.prim.ListConstruct %60763, %60827 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58293 = torch.constant.int 0
    %60896 = torch.aten.cat %60895, %int0_58293 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60896, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60897 = torch.prim.ListConstruct %60766, %60830 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58294 = torch.constant.int 0
    %60898 = torch.aten.cat %60897, %int0_58294 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60898, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60899 = torch.prim.ListConstruct %60769, %60833 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58295 = torch.constant.int 0
    %60900 = torch.aten.cat %60899, %int0_58295 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60900, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60901 = torch.prim.ListConstruct %60772, %60836 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58296 = torch.constant.int 0
    %60902 = torch.aten.cat %60901, %int0_58296 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60902, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60903 = torch.prim.ListConstruct %60775, %60839 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58297 = torch.constant.int 0
    %60904 = torch.aten.cat %60903, %int0_58297 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60904, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60905 = torch.prim.ListConstruct %60778, %60842 : (!torch.vtensor<[?,16,1,128],f16>, !torch.vtensor<[?,16,1,128],f16>) -> !torch.list<vtensor>
    %int0_58298 = torch.constant.int 0
    %60906 = torch.aten.cat %60905, %int0_58298 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60906, [%2336], affine_map<()[s0] -> (s0 * 8, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58299 = torch.constant.int 32
    %int2_58300 = torch.constant.int 2
    %int16_58301 = torch.constant.int 16
    %int1_58302 = torch.constant.int 1
    %int128_58303 = torch.constant.int 128
    %60907 = torch.prim.ListConstruct %3023, %int32_58299, %int2_58300, %int16_58301, %int1_58302, %int128_58303 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60908 = torch.aten.view %59057, %60907 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60908, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58304 = torch.constant.int 32
    %60909 = torch.aten.mul.int %3023, %int32_58304 : !torch.int, !torch.int -> !torch.int
    %int2_58305 = torch.constant.int 2
    %60910 = torch.aten.mul.int %60909, %int2_58305 : !torch.int, !torch.int -> !torch.int
    %int16_58306 = torch.constant.int 16
    %int1_58307 = torch.constant.int 1
    %int128_58308 = torch.constant.int 128
    %60911 = torch.prim.ListConstruct %60910, %int16_58306, %int1_58307, %int128_58308 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60912 = torch.aten.view %60908, %60911 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60912, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60913 = torch.prim.ListConstruct %60876 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58309 = torch.constant.bool false
    %60914 = torch.aten.index_put %60912, %60913, %60892, %false_58309 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60914, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58310 = torch.constant.int 32
    %int2_58311 = torch.constant.int 2
    %int16_58312 = torch.constant.int 16
    %int1_58313 = torch.constant.int 1
    %int128_58314 = torch.constant.int 128
    %60915 = torch.prim.ListConstruct %3023, %int32_58310, %int2_58311, %int16_58312, %int1_58313, %int128_58314 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60916 = torch.aten.view %60914, %60915 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60916, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58315 = torch.constant.int 131072
    %60917 = torch.prim.ListConstruct %3023, %int131072_58315 : (!torch.int, !torch.int) -> !torch.list<int>
    %60918 = torch.aten.view %60916, %60917 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60918 overwrites %arg3 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60918, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58316 = torch.constant.int 32
    %int2_58317 = torch.constant.int 2
    %int16_58318 = torch.constant.int 16
    %int1_58319 = torch.constant.int 1
    %int128_58320 = torch.constant.int 128
    %60919 = torch.prim.ListConstruct %3026, %int32_58316, %int2_58317, %int16_58318, %int1_58319, %int128_58320 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60920 = torch.aten.view %59069, %60919 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60920, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58321 = torch.constant.int 32
    %60921 = torch.aten.mul.int %3026, %int32_58321 : !torch.int, !torch.int -> !torch.int
    %int2_58322 = torch.constant.int 2
    %60922 = torch.aten.mul.int %60921, %int2_58322 : !torch.int, !torch.int -> !torch.int
    %int16_58323 = torch.constant.int 16
    %int1_58324 = torch.constant.int 1
    %int128_58325 = torch.constant.int 128
    %60923 = torch.prim.ListConstruct %60922, %int16_58323, %int1_58324, %int128_58325 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60924 = torch.aten.view %60920, %60923 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60924, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60925 = torch.prim.ListConstruct %60878 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58326 = torch.constant.bool false
    %60926 = torch.aten.index_put %60924, %60925, %60894, %false_58326 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60926, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58327 = torch.constant.int 32
    %int2_58328 = torch.constant.int 2
    %int16_58329 = torch.constant.int 16
    %int1_58330 = torch.constant.int 1
    %int128_58331 = torch.constant.int 128
    %60927 = torch.prim.ListConstruct %3026, %int32_58327, %int2_58328, %int16_58329, %int1_58330, %int128_58331 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60928 = torch.aten.view %60926, %60927 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60928, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58332 = torch.constant.int 131072
    %60929 = torch.prim.ListConstruct %3026, %int131072_58332 : (!torch.int, !torch.int) -> !torch.list<int>
    %60930 = torch.aten.view %60928, %60929 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60930 overwrites %arg4 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60930, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58333 = torch.constant.int 32
    %int2_58334 = torch.constant.int 2
    %int16_58335 = torch.constant.int 16
    %int1_58336 = torch.constant.int 1
    %int128_58337 = torch.constant.int 128
    %60931 = torch.prim.ListConstruct %3029, %int32_58333, %int2_58334, %int16_58335, %int1_58336, %int128_58337 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60932 = torch.aten.view %59081, %60931 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60932, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58338 = torch.constant.int 32
    %60933 = torch.aten.mul.int %3029, %int32_58338 : !torch.int, !torch.int -> !torch.int
    %int2_58339 = torch.constant.int 2
    %60934 = torch.aten.mul.int %60933, %int2_58339 : !torch.int, !torch.int -> !torch.int
    %int16_58340 = torch.constant.int 16
    %int1_58341 = torch.constant.int 1
    %int128_58342 = torch.constant.int 128
    %60935 = torch.prim.ListConstruct %60934, %int16_58340, %int1_58341, %int128_58342 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60936 = torch.aten.view %60932, %60935 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60936, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60937 = torch.prim.ListConstruct %60880 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58343 = torch.constant.bool false
    %60938 = torch.aten.index_put %60936, %60937, %60896, %false_58343 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60938, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58344 = torch.constant.int 32
    %int2_58345 = torch.constant.int 2
    %int16_58346 = torch.constant.int 16
    %int1_58347 = torch.constant.int 1
    %int128_58348 = torch.constant.int 128
    %60939 = torch.prim.ListConstruct %3029, %int32_58344, %int2_58345, %int16_58346, %int1_58347, %int128_58348 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60940 = torch.aten.view %60938, %60939 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60940, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58349 = torch.constant.int 131072
    %60941 = torch.prim.ListConstruct %3029, %int131072_58349 : (!torch.int, !torch.int) -> !torch.list<int>
    %60942 = torch.aten.view %60940, %60941 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60942 overwrites %arg5 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60942, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58350 = torch.constant.int 32
    %int2_58351 = torch.constant.int 2
    %int16_58352 = torch.constant.int 16
    %int1_58353 = torch.constant.int 1
    %int128_58354 = torch.constant.int 128
    %60943 = torch.prim.ListConstruct %3032, %int32_58350, %int2_58351, %int16_58352, %int1_58353, %int128_58354 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60944 = torch.aten.view %59093, %60943 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60944, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58355 = torch.constant.int 32
    %60945 = torch.aten.mul.int %3032, %int32_58355 : !torch.int, !torch.int -> !torch.int
    %int2_58356 = torch.constant.int 2
    %60946 = torch.aten.mul.int %60945, %int2_58356 : !torch.int, !torch.int -> !torch.int
    %int16_58357 = torch.constant.int 16
    %int1_58358 = torch.constant.int 1
    %int128_58359 = torch.constant.int 128
    %60947 = torch.prim.ListConstruct %60946, %int16_58357, %int1_58358, %int128_58359 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60948 = torch.aten.view %60944, %60947 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60948, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60949 = torch.prim.ListConstruct %60882 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58360 = torch.constant.bool false
    %60950 = torch.aten.index_put %60948, %60949, %60898, %false_58360 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60950, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58361 = torch.constant.int 32
    %int2_58362 = torch.constant.int 2
    %int16_58363 = torch.constant.int 16
    %int1_58364 = torch.constant.int 1
    %int128_58365 = torch.constant.int 128
    %60951 = torch.prim.ListConstruct %3032, %int32_58361, %int2_58362, %int16_58363, %int1_58364, %int128_58365 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60952 = torch.aten.view %60950, %60951 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60952, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58366 = torch.constant.int 131072
    %60953 = torch.prim.ListConstruct %3032, %int131072_58366 : (!torch.int, !torch.int) -> !torch.list<int>
    %60954 = torch.aten.view %60952, %60953 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60954 overwrites %arg6 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60954, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58367 = torch.constant.int 32
    %int2_58368 = torch.constant.int 2
    %int16_58369 = torch.constant.int 16
    %int1_58370 = torch.constant.int 1
    %int128_58371 = torch.constant.int 128
    %60955 = torch.prim.ListConstruct %3035, %int32_58367, %int2_58368, %int16_58369, %int1_58370, %int128_58371 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60956 = torch.aten.view %59105, %60955 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60956, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58372 = torch.constant.int 32
    %60957 = torch.aten.mul.int %3035, %int32_58372 : !torch.int, !torch.int -> !torch.int
    %int2_58373 = torch.constant.int 2
    %60958 = torch.aten.mul.int %60957, %int2_58373 : !torch.int, !torch.int -> !torch.int
    %int16_58374 = torch.constant.int 16
    %int1_58375 = torch.constant.int 1
    %int128_58376 = torch.constant.int 128
    %60959 = torch.prim.ListConstruct %60958, %int16_58374, %int1_58375, %int128_58376 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60960 = torch.aten.view %60956, %60959 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60960, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60961 = torch.prim.ListConstruct %60884 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58377 = torch.constant.bool false
    %60962 = torch.aten.index_put %60960, %60961, %60900, %false_58377 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60962, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58378 = torch.constant.int 32
    %int2_58379 = torch.constant.int 2
    %int16_58380 = torch.constant.int 16
    %int1_58381 = torch.constant.int 1
    %int128_58382 = torch.constant.int 128
    %60963 = torch.prim.ListConstruct %3035, %int32_58378, %int2_58379, %int16_58380, %int1_58381, %int128_58382 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60964 = torch.aten.view %60962, %60963 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60964, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58383 = torch.constant.int 131072
    %60965 = torch.prim.ListConstruct %3035, %int131072_58383 : (!torch.int, !torch.int) -> !torch.list<int>
    %60966 = torch.aten.view %60964, %60965 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60966 overwrites %arg7 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60966, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58384 = torch.constant.int 32
    %int2_58385 = torch.constant.int 2
    %int16_58386 = torch.constant.int 16
    %int1_58387 = torch.constant.int 1
    %int128_58388 = torch.constant.int 128
    %60967 = torch.prim.ListConstruct %3038, %int32_58384, %int2_58385, %int16_58386, %int1_58387, %int128_58388 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60968 = torch.aten.view %59117, %60967 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60968, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58389 = torch.constant.int 32
    %60969 = torch.aten.mul.int %3038, %int32_58389 : !torch.int, !torch.int -> !torch.int
    %int2_58390 = torch.constant.int 2
    %60970 = torch.aten.mul.int %60969, %int2_58390 : !torch.int, !torch.int -> !torch.int
    %int16_58391 = torch.constant.int 16
    %int1_58392 = torch.constant.int 1
    %int128_58393 = torch.constant.int 128
    %60971 = torch.prim.ListConstruct %60970, %int16_58391, %int1_58392, %int128_58393 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60972 = torch.aten.view %60968, %60971 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60972, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60973 = torch.prim.ListConstruct %60886 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58394 = torch.constant.bool false
    %60974 = torch.aten.index_put %60972, %60973, %60902, %false_58394 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60974, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58395 = torch.constant.int 32
    %int2_58396 = torch.constant.int 2
    %int16_58397 = torch.constant.int 16
    %int1_58398 = torch.constant.int 1
    %int128_58399 = torch.constant.int 128
    %60975 = torch.prim.ListConstruct %3038, %int32_58395, %int2_58396, %int16_58397, %int1_58398, %int128_58399 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60976 = torch.aten.view %60974, %60975 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60976, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58400 = torch.constant.int 131072
    %60977 = torch.prim.ListConstruct %3038, %int131072_58400 : (!torch.int, !torch.int) -> !torch.list<int>
    %60978 = torch.aten.view %60976, %60977 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60978 overwrites %arg8 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60978, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58401 = torch.constant.int 32
    %int2_58402 = torch.constant.int 2
    %int16_58403 = torch.constant.int 16
    %int1_58404 = torch.constant.int 1
    %int128_58405 = torch.constant.int 128
    %60979 = torch.prim.ListConstruct %3041, %int32_58401, %int2_58402, %int16_58403, %int1_58404, %int128_58405 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60980 = torch.aten.view %59129, %60979 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60980, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58406 = torch.constant.int 32
    %60981 = torch.aten.mul.int %3041, %int32_58406 : !torch.int, !torch.int -> !torch.int
    %int2_58407 = torch.constant.int 2
    %60982 = torch.aten.mul.int %60981, %int2_58407 : !torch.int, !torch.int -> !torch.int
    %int16_58408 = torch.constant.int 16
    %int1_58409 = torch.constant.int 1
    %int128_58410 = torch.constant.int 128
    %60983 = torch.prim.ListConstruct %60982, %int16_58408, %int1_58409, %int128_58410 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60984 = torch.aten.view %60980, %60983 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60984, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60985 = torch.prim.ListConstruct %60888 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58411 = torch.constant.bool false
    %60986 = torch.aten.index_put %60984, %60985, %60904, %false_58411 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60986, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58412 = torch.constant.int 32
    %int2_58413 = torch.constant.int 2
    %int16_58414 = torch.constant.int 16
    %int1_58415 = torch.constant.int 1
    %int128_58416 = torch.constant.int 128
    %60987 = torch.prim.ListConstruct %3041, %int32_58412, %int2_58413, %int16_58414, %int1_58415, %int128_58416 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60988 = torch.aten.view %60986, %60987 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60988, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58417 = torch.constant.int 131072
    %60989 = torch.prim.ListConstruct %3041, %int131072_58417 : (!torch.int, !torch.int) -> !torch.list<int>
    %60990 = torch.aten.view %60988, %60989 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %60990 overwrites %arg9 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %60990, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int32_58418 = torch.constant.int 32
    %int2_58419 = torch.constant.int 2
    %int16_58420 = torch.constant.int 16
    %int1_58421 = torch.constant.int 1
    %int128_58422 = torch.constant.int 128
    %60991 = torch.prim.ListConstruct %3044, %int32_58418, %int2_58419, %int16_58420, %int1_58421, %int128_58422 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60992 = torch.aten.view %59141, %60991 : !torch.vtensor<[?,131072],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %60992, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int32_58423 = torch.constant.int 32
    %60993 = torch.aten.mul.int %3044, %int32_58423 : !torch.int, !torch.int -> !torch.int
    %int2_58424 = torch.constant.int 2
    %60994 = torch.aten.mul.int %60993, %int2_58424 : !torch.int, !torch.int -> !torch.int
    %int16_58425 = torch.constant.int 16
    %int1_58426 = torch.constant.int 1
    %int128_58427 = torch.constant.int 128
    %60995 = torch.prim.ListConstruct %60994, %int16_58425, %int1_58426, %int128_58427 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %60996 = torch.aten.view %60992, %60995 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60996, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %60997 = torch.prim.ListConstruct %60890 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
    %false_58428 = torch.constant.bool false
    %60998 = torch.aten.index_put %60996, %60997, %60906, %false_58428 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,16,1,128],f16>, !torch.bool -> !torch.vtensor<[?,16,1,128],f16>
    torch.bind_symbolic_shape %60998, [%2337], affine_map<()[s0] -> (s0 * 64, 16, 1, 128)> : !torch.vtensor<[?,16,1,128],f16>
    %int32_58429 = torch.constant.int 32
    %int2_58430 = torch.constant.int 2
    %int16_58431 = torch.constant.int 16
    %int1_58432 = torch.constant.int 1
    %int128_58433 = torch.constant.int 128
    %60999 = torch.prim.ListConstruct %3044, %int32_58429, %int2_58430, %int16_58431, %int1_58432, %int128_58433 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61000 = torch.aten.view %60998, %60999 : !torch.vtensor<[?,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,32,2,16,1,128],f16>
    torch.bind_symbolic_shape %61000, [%2337], affine_map<()[s0] -> (s0, 32, 2, 16, 1, 128)> : !torch.vtensor<[?,32,2,16,1,128],f16>
    %int131072_58434 = torch.constant.int 131072
    %61001 = torch.prim.ListConstruct %3044, %int131072_58434 : (!torch.int, !torch.int) -> !torch.list<int>
    %61002 = torch.aten.view %61000, %61001 : !torch.vtensor<[?,32,2,16,1,128],f16>, !torch.list<int> -> !torch.vtensor<[?,131072],f16>
    torch.overwrite.tensor.contents %61002 overwrites %arg10 : !torch.vtensor<[?,131072],f16>, !torch.tensor<[?,131072],f16>
    torch.bind_symbolic_shape %61002, [%2337], affine_map<()[s0] -> (s0, 131072)> : !torch.vtensor<[?,131072],f16>
    %int-2_58435 = torch.constant.int -2
    %61003 = torch.aten.unsqueeze %60617, %int-2_58435 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58436 = torch.constant.int -2
    %61004 = torch.aten.unsqueeze %60632, %int-2_58436 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58437 = torch.constant.int -2
    %61005 = torch.aten.unsqueeze %60647, %int-2_58437 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58438 = torch.constant.int -2
    %61006 = torch.aten.unsqueeze %60662, %int-2_58438 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58439 = torch.constant.int -2
    %61007 = torch.aten.unsqueeze %60677, %int-2_58439 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58440 = torch.constant.int -2
    %61008 = torch.aten.unsqueeze %60692, %int-2_58440 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58441 = torch.constant.int -2
    %61009 = torch.aten.unsqueeze %60707, %int-2_58441 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58442 = torch.constant.int -2
    %61010 = torch.aten.unsqueeze %60722, %int-2_58442 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int4_58443 = torch.constant.int 4
    %int1_58444 = torch.constant.int 1
    %int4_58445 = torch.constant.int 4
    %int128_58446 = torch.constant.int 128
    %61011 = torch.prim.ListConstruct %int4_58443, %60603, %int1_58444, %int4_58445, %int128_58446 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58447 = torch.constant.bool false
    %61012 = torch.aten.expand %61003, %61011, %false_58447 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58448 = torch.constant.int 4
    %int1_58449 = torch.constant.int 1
    %int4_58450 = torch.constant.int 4
    %int128_58451 = torch.constant.int 128
    %61013 = torch.prim.ListConstruct %int4_58448, %60603, %int1_58449, %int4_58450, %int128_58451 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58452 = torch.constant.bool false
    %61014 = torch.aten.expand %61004, %61013, %false_58452 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58453 = torch.constant.int 4
    %int1_58454 = torch.constant.int 1
    %int4_58455 = torch.constant.int 4
    %int128_58456 = torch.constant.int 128
    %61015 = torch.prim.ListConstruct %int4_58453, %60603, %int1_58454, %int4_58455, %int128_58456 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58457 = torch.constant.bool false
    %61016 = torch.aten.expand %61005, %61015, %false_58457 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58458 = torch.constant.int 4
    %int1_58459 = torch.constant.int 1
    %int4_58460 = torch.constant.int 4
    %int128_58461 = torch.constant.int 128
    %61017 = torch.prim.ListConstruct %int4_58458, %60603, %int1_58459, %int4_58460, %int128_58461 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58462 = torch.constant.bool false
    %61018 = torch.aten.expand %61006, %61017, %false_58462 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58463 = torch.constant.int 4
    %int1_58464 = torch.constant.int 1
    %int4_58465 = torch.constant.int 4
    %int128_58466 = torch.constant.int 128
    %61019 = torch.prim.ListConstruct %int4_58463, %60603, %int1_58464, %int4_58465, %int128_58466 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58467 = torch.constant.bool false
    %61020 = torch.aten.expand %61007, %61019, %false_58467 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58468 = torch.constant.int 4
    %int1_58469 = torch.constant.int 1
    %int4_58470 = torch.constant.int 4
    %int128_58471 = torch.constant.int 128
    %61021 = torch.prim.ListConstruct %int4_58468, %60603, %int1_58469, %int4_58470, %int128_58471 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58472 = torch.constant.bool false
    %61022 = torch.aten.expand %61008, %61021, %false_58472 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58473 = torch.constant.int 4
    %int1_58474 = torch.constant.int 1
    %int4_58475 = torch.constant.int 4
    %int128_58476 = torch.constant.int 128
    %61023 = torch.prim.ListConstruct %int4_58473, %60603, %int1_58474, %int4_58475, %int128_58476 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58477 = torch.constant.bool false
    %61024 = torch.aten.expand %61009, %61023, %false_58477 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58478 = torch.constant.int 4
    %int1_58479 = torch.constant.int 1
    %int4_58480 = torch.constant.int 4
    %int128_58481 = torch.constant.int 128
    %61025 = torch.prim.ListConstruct %int4_58478, %60603, %int1_58479, %int4_58480, %int128_58481 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58482 = torch.constant.bool false
    %61026 = torch.aten.expand %61010, %61025, %false_58482 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58483 = torch.constant.int 4
    %int4_58484 = torch.constant.int 4
    %int128_58485 = torch.constant.int 128
    %61027 = torch.prim.ListConstruct %int4_58483, %60603, %int4_58484, %int128_58485 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61028 = torch.aten.view %61012, %61027 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61028, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58486 = torch.constant.int 4
    %int4_58487 = torch.constant.int 4
    %int128_58488 = torch.constant.int 128
    %61029 = torch.prim.ListConstruct %int4_58486, %60603, %int4_58487, %int128_58488 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61030 = torch.aten.view %61014, %61029 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61030, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58489 = torch.constant.int 4
    %int4_58490 = torch.constant.int 4
    %int128_58491 = torch.constant.int 128
    %61031 = torch.prim.ListConstruct %int4_58489, %60603, %int4_58490, %int128_58491 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61032 = torch.aten.view %61016, %61031 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61032, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58492 = torch.constant.int 4
    %int4_58493 = torch.constant.int 4
    %int128_58494 = torch.constant.int 128
    %61033 = torch.prim.ListConstruct %int4_58492, %60603, %int4_58493, %int128_58494 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61034 = torch.aten.view %61018, %61033 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61034, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58495 = torch.constant.int 4
    %int4_58496 = torch.constant.int 4
    %int128_58497 = torch.constant.int 128
    %61035 = torch.prim.ListConstruct %int4_58495, %60603, %int4_58496, %int128_58497 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61036 = torch.aten.view %61020, %61035 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61036, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58498 = torch.constant.int 4
    %int4_58499 = torch.constant.int 4
    %int128_58500 = torch.constant.int 128
    %61037 = torch.prim.ListConstruct %int4_58498, %60603, %int4_58499, %int128_58500 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61038 = torch.aten.view %61022, %61037 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61038, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58501 = torch.constant.int 4
    %int4_58502 = torch.constant.int 4
    %int128_58503 = torch.constant.int 128
    %61039 = torch.prim.ListConstruct %int4_58501, %60603, %int4_58502, %int128_58503 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61040 = torch.aten.view %61024, %61039 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61040, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58504 = torch.constant.int 4
    %int4_58505 = torch.constant.int 4
    %int128_58506 = torch.constant.int 128
    %61041 = torch.prim.ListConstruct %int4_58504, %60603, %int4_58505, %int128_58506 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61042 = torch.aten.view %61026, %61041 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61042, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int-2_58507 = torch.constant.int -2
    %61043 = torch.aten.unsqueeze %60392, %int-2_58507 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61043, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58508 = torch.constant.int -2
    %61044 = torch.aten.unsqueeze %60394, %int-2_58508 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58509 = torch.constant.int -2
    %61045 = torch.aten.unsqueeze %60396, %int-2_58509 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58510 = torch.constant.int -2
    %61046 = torch.aten.unsqueeze %60398, %int-2_58510 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58511 = torch.constant.int -2
    %61047 = torch.aten.unsqueeze %60400, %int-2_58511 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58512 = torch.constant.int -2
    %61048 = torch.aten.unsqueeze %60402, %int-2_58512 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58513 = torch.constant.int -2
    %61049 = torch.aten.unsqueeze %60404, %int-2_58513 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int-2_58514 = torch.constant.int -2
    %61050 = torch.aten.unsqueeze %60406, %int-2_58514 : !torch.vtensor<[4,?,1,128],f16>, !torch.int -> !torch.vtensor<[4,?,1,1,128],f16>
    torch.bind_symbolic_shape %61050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 1, 128)> : !torch.vtensor<[4,?,1,1,128],f16>
    %int1_58515 = torch.constant.int 1
    %61051 = torch.aten.size.int %60316, %int1_58515 : !torch.vtensor<[4,?,128],f16>, !torch.int -> !torch.int
    %int4_58516 = torch.constant.int 4
    %int1_58517 = torch.constant.int 1
    %int4_58518 = torch.constant.int 4
    %int128_58519 = torch.constant.int 128
    %61052 = torch.prim.ListConstruct %int4_58516, %61051, %int1_58517, %int4_58518, %int128_58519 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58520 = torch.constant.bool false
    %61053 = torch.aten.expand %61043, %61052, %false_58520 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58521 = torch.constant.int 4
    %int1_58522 = torch.constant.int 1
    %int4_58523 = torch.constant.int 4
    %int128_58524 = torch.constant.int 128
    %61054 = torch.prim.ListConstruct %int4_58521, %61051, %int1_58522, %int4_58523, %int128_58524 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58525 = torch.constant.bool false
    %61055 = torch.aten.expand %61044, %61054, %false_58525 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58526 = torch.constant.int 4
    %int1_58527 = torch.constant.int 1
    %int4_58528 = torch.constant.int 4
    %int128_58529 = torch.constant.int 128
    %61056 = torch.prim.ListConstruct %int4_58526, %61051, %int1_58527, %int4_58528, %int128_58529 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58530 = torch.constant.bool false
    %61057 = torch.aten.expand %61045, %61056, %false_58530 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58531 = torch.constant.int 4
    %int1_58532 = torch.constant.int 1
    %int4_58533 = torch.constant.int 4
    %int128_58534 = torch.constant.int 128
    %61058 = torch.prim.ListConstruct %int4_58531, %61051, %int1_58532, %int4_58533, %int128_58534 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58535 = torch.constant.bool false
    %61059 = torch.aten.expand %61046, %61058, %false_58535 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58536 = torch.constant.int 4
    %int1_58537 = torch.constant.int 1
    %int4_58538 = torch.constant.int 4
    %int128_58539 = torch.constant.int 128
    %61060 = torch.prim.ListConstruct %int4_58536, %61051, %int1_58537, %int4_58538, %int128_58539 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58540 = torch.constant.bool false
    %61061 = torch.aten.expand %61047, %61060, %false_58540 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58541 = torch.constant.int 4
    %int1_58542 = torch.constant.int 1
    %int4_58543 = torch.constant.int 4
    %int128_58544 = torch.constant.int 128
    %61062 = torch.prim.ListConstruct %int4_58541, %61051, %int1_58542, %int4_58543, %int128_58544 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58545 = torch.constant.bool false
    %61063 = torch.aten.expand %61048, %61062, %false_58545 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58546 = torch.constant.int 4
    %int1_58547 = torch.constant.int 1
    %int4_58548 = torch.constant.int 4
    %int128_58549 = torch.constant.int 128
    %61064 = torch.prim.ListConstruct %int4_58546, %61051, %int1_58547, %int4_58548, %int128_58549 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58550 = torch.constant.bool false
    %61065 = torch.aten.expand %61049, %61064, %false_58550 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58551 = torch.constant.int 4
    %int1_58552 = torch.constant.int 1
    %int4_58553 = torch.constant.int 4
    %int128_58554 = torch.constant.int 128
    %61066 = torch.prim.ListConstruct %int4_58551, %61051, %int1_58552, %int4_58553, %int128_58554 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %false_58555 = torch.constant.bool false
    %61067 = torch.aten.expand %61050, %61066, %false_58555 : !torch.vtensor<[4,?,1,1,128],f16>, !torch.list<int>, !torch.bool -> !torch.vtensor<[4,?,1,4,128],f16>
    torch.bind_symbolic_shape %61067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1, 4, 128)> : !torch.vtensor<[4,?,1,4,128],f16>
    %int4_58556 = torch.constant.int 4
    %int4_58557 = torch.constant.int 4
    %int128_58558 = torch.constant.int 128
    %61068 = torch.prim.ListConstruct %int4_58556, %61051, %int4_58557, %int128_58558 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61069 = torch.aten.view %61053, %61068 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61069, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58559 = torch.constant.int 4
    %int4_58560 = torch.constant.int 4
    %int128_58561 = torch.constant.int 128
    %61070 = torch.prim.ListConstruct %int4_58559, %61051, %int4_58560, %int128_58561 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61071 = torch.aten.view %61055, %61070 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61071, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58562 = torch.constant.int 4
    %int4_58563 = torch.constant.int 4
    %int128_58564 = torch.constant.int 128
    %61072 = torch.prim.ListConstruct %int4_58562, %61051, %int4_58563, %int128_58564 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61073 = torch.aten.view %61057, %61072 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61073, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58565 = torch.constant.int 4
    %int4_58566 = torch.constant.int 4
    %int128_58567 = torch.constant.int 128
    %61074 = torch.prim.ListConstruct %int4_58565, %61051, %int4_58566, %int128_58567 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61075 = torch.aten.view %61059, %61074 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61075, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58568 = torch.constant.int 4
    %int4_58569 = torch.constant.int 4
    %int128_58570 = torch.constant.int 128
    %61076 = torch.prim.ListConstruct %int4_58568, %61051, %int4_58569, %int128_58570 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61077 = torch.aten.view %61061, %61076 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61077, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58571 = torch.constant.int 4
    %int4_58572 = torch.constant.int 4
    %int128_58573 = torch.constant.int 128
    %61078 = torch.prim.ListConstruct %int4_58571, %61051, %int4_58572, %int128_58573 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61079 = torch.aten.view %61063, %61078 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61079, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58574 = torch.constant.int 4
    %int4_58575 = torch.constant.int 4
    %int128_58576 = torch.constant.int 128
    %61080 = torch.prim.ListConstruct %int4_58574, %61051, %int4_58575, %int128_58576 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61081 = torch.aten.view %61065, %61080 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58577 = torch.constant.int 4
    %int4_58578 = torch.constant.int 4
    %int128_58579 = torch.constant.int 128
    %61082 = torch.prim.ListConstruct %int4_58577, %61051, %int4_58578, %int128_58579 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61083 = torch.aten.view %61067, %61082 : !torch.vtensor<[4,?,1,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61083, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58580 = torch.constant.int 1
    %int2_58581 = torch.constant.int 2
    %61084 = torch.aten.transpose.int %60459, %int1_58580, %int2_58581 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61084, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58582 = torch.constant.int 1
    %int2_58583 = torch.constant.int 2
    %61085 = torch.aten.transpose.int %60474, %int1_58582, %int2_58583 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61085, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58584 = torch.constant.int 1
    %int2_58585 = torch.constant.int 2
    %61086 = torch.aten.transpose.int %60489, %int1_58584, %int2_58585 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61086, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58586 = torch.constant.int 1
    %int2_58587 = torch.constant.int 2
    %61087 = torch.aten.transpose.int %60504, %int1_58586, %int2_58587 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61087, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58588 = torch.constant.int 1
    %int2_58589 = torch.constant.int 2
    %61088 = torch.aten.transpose.int %60519, %int1_58588, %int2_58589 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61088, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58590 = torch.constant.int 1
    %int2_58591 = torch.constant.int 2
    %61089 = torch.aten.transpose.int %60534, %int1_58590, %int2_58591 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61089, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58592 = torch.constant.int 1
    %int2_58593 = torch.constant.int 2
    %61090 = torch.aten.transpose.int %60549, %int1_58592, %int2_58593 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61090, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58594 = torch.constant.int 1
    %int2_58595 = torch.constant.int 2
    %61091 = torch.aten.transpose.int %60564, %int1_58594, %int2_58595 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61091, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58596 = torch.constant.int 1
    %int2_58597 = torch.constant.int 2
    %61092 = torch.aten.transpose.int %61028, %int1_58596, %int2_58597 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61092, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58598 = torch.constant.int 1
    %int2_58599 = torch.constant.int 2
    %61093 = torch.aten.transpose.int %61030, %int1_58598, %int2_58599 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61093, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58600 = torch.constant.int 1
    %int2_58601 = torch.constant.int 2
    %61094 = torch.aten.transpose.int %61032, %int1_58600, %int2_58601 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61094, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58602 = torch.constant.int 1
    %int2_58603 = torch.constant.int 2
    %61095 = torch.aten.transpose.int %61034, %int1_58602, %int2_58603 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61095, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58604 = torch.constant.int 1
    %int2_58605 = torch.constant.int 2
    %61096 = torch.aten.transpose.int %61036, %int1_58604, %int2_58605 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61096, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58606 = torch.constant.int 1
    %int2_58607 = torch.constant.int 2
    %61097 = torch.aten.transpose.int %61038, %int1_58606, %int2_58607 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61097, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58608 = torch.constant.int 1
    %int2_58609 = torch.constant.int 2
    %61098 = torch.aten.transpose.int %61040, %int1_58608, %int2_58609 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61098, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58610 = torch.constant.int 1
    %int2_58611 = torch.constant.int 2
    %61099 = torch.aten.transpose.int %61042, %int1_58610, %int2_58611 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61099, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58612 = torch.constant.int 1
    %int2_58613 = torch.constant.int 2
    %61100 = torch.aten.transpose.int %61069, %int1_58612, %int2_58613 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61100, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58614 = torch.constant.int 1
    %int2_58615 = torch.constant.int 2
    %61101 = torch.aten.transpose.int %61071, %int1_58614, %int2_58615 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61101, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58616 = torch.constant.int 1
    %int2_58617 = torch.constant.int 2
    %61102 = torch.aten.transpose.int %61073, %int1_58616, %int2_58617 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61102, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58618 = torch.constant.int 1
    %int2_58619 = torch.constant.int 2
    %61103 = torch.aten.transpose.int %61075, %int1_58618, %int2_58619 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61103, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58620 = torch.constant.int 1
    %int2_58621 = torch.constant.int 2
    %61104 = torch.aten.transpose.int %61077, %int1_58620, %int2_58621 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61104, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58622 = torch.constant.int 1
    %int2_58623 = torch.constant.int 2
    %61105 = torch.aten.transpose.int %61079, %int1_58622, %int2_58623 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61105, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58624 = torch.constant.int 1
    %int2_58625 = torch.constant.int 2
    %61106 = torch.aten.transpose.int %61081, %int1_58624, %int2_58625 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61106, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58626 = torch.constant.int 1
    %int2_58627 = torch.constant.int 2
    %61107 = torch.aten.transpose.int %61083, %int1_58626, %int2_58627 : !torch.vtensor<[4,?,4,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,4,?,128],f16>
    torch.bind_symbolic_shape %61107, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58628 = torch.constant.float 0.000000e+00
    %true_58629 = torch.constant.bool true
    %none_58630 = torch.constant.none
    %none_58631 = torch.constant.none
    %61108:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61084, %61092, %61100, %float0.000000e00_58628, %true_58629, %none_58630, %none_58631) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61108#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58632 = torch.constant.float 0.000000e+00
    %true_58633 = torch.constant.bool true
    %none_58634 = torch.constant.none
    %none_58635 = torch.constant.none
    %61109:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61085, %61093, %61101, %float0.000000e00_58632, %true_58633, %none_58634, %none_58635) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61109#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58636 = torch.constant.float 0.000000e+00
    %true_58637 = torch.constant.bool true
    %none_58638 = torch.constant.none
    %none_58639 = torch.constant.none
    %61110:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61086, %61094, %61102, %float0.000000e00_58636, %true_58637, %none_58638, %none_58639) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61110#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58640 = torch.constant.float 0.000000e+00
    %true_58641 = torch.constant.bool true
    %none_58642 = torch.constant.none
    %none_58643 = torch.constant.none
    %61111:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61087, %61095, %61103, %float0.000000e00_58640, %true_58641, %none_58642, %none_58643) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61111#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58644 = torch.constant.float 0.000000e+00
    %true_58645 = torch.constant.bool true
    %none_58646 = torch.constant.none
    %none_58647 = torch.constant.none
    %61112:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61088, %61096, %61104, %float0.000000e00_58644, %true_58645, %none_58646, %none_58647) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61112#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58648 = torch.constant.float 0.000000e+00
    %true_58649 = torch.constant.bool true
    %none_58650 = torch.constant.none
    %none_58651 = torch.constant.none
    %61113:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61089, %61097, %61105, %float0.000000e00_58648, %true_58649, %none_58650, %none_58651) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61113#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58652 = torch.constant.float 0.000000e+00
    %true_58653 = torch.constant.bool true
    %none_58654 = torch.constant.none
    %none_58655 = torch.constant.none
    %61114:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61090, %61098, %61106, %float0.000000e00_58652, %true_58653, %none_58654, %none_58655) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61114#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %float0.000000e00_58656 = torch.constant.float 0.000000e+00
    %true_58657 = torch.constant.bool true
    %none_58658 = torch.constant.none
    %none_58659 = torch.constant.none
    %61115:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%61091, %61099, %61107, %float0.000000e00_58656, %true_58657, %none_58658, %none_58659) : (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?,128],f16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[4,4,?,128],f16>, !torch.vtensor<[4,4,?],f32>) 
    torch.bind_symbolic_shape %61115#0, [%2336], affine_map<()[s0] -> (4, 4, s0 * 16, 128)> : !torch.vtensor<[4,4,?,128],f16>
    %int1_58660 = torch.constant.int 1
    %int2_58661 = torch.constant.int 2
    %61116 = torch.aten.transpose.int %61108#0, %int1_58660, %int2_58661 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58662 = torch.constant.int 1
    %int2_58663 = torch.constant.int 2
    %61117 = torch.aten.transpose.int %61109#0, %int1_58662, %int2_58663 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61117, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58664 = torch.constant.int 1
    %int2_58665 = torch.constant.int 2
    %61118 = torch.aten.transpose.int %61110#0, %int1_58664, %int2_58665 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61118, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58666 = torch.constant.int 1
    %int2_58667 = torch.constant.int 2
    %61119 = torch.aten.transpose.int %61111#0, %int1_58666, %int2_58667 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61119, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58668 = torch.constant.int 1
    %int2_58669 = torch.constant.int 2
    %61120 = torch.aten.transpose.int %61112#0, %int1_58668, %int2_58669 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61120, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58670 = torch.constant.int 1
    %int2_58671 = torch.constant.int 2
    %61121 = torch.aten.transpose.int %61113#0, %int1_58670, %int2_58671 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61121, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58672 = torch.constant.int 1
    %int2_58673 = torch.constant.int 2
    %61122 = torch.aten.transpose.int %61114#0, %int1_58672, %int2_58673 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61122, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int1_58674 = torch.constant.int 1
    %int2_58675 = torch.constant.int 2
    %61123 = torch.aten.transpose.int %61115#0, %int1_58674, %int2_58675 : !torch.vtensor<[4,4,?,128],f16>, !torch.int, !torch.int -> !torch.vtensor<[4,?,4,128],f16>
    torch.bind_symbolic_shape %61123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4, 128)> : !torch.vtensor<[4,?,4,128],f16>
    %int4_58676 = torch.constant.int 4
    %int512_58677 = torch.constant.int 512
    %61124 = torch.prim.ListConstruct %int4_58676, %60445, %int512_58677 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61125 = torch.aten.view %61116, %61124 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61125, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58678 = torch.constant.int 4
    %int512_58679 = torch.constant.int 512
    %61126 = torch.prim.ListConstruct %int4_58678, %60460, %int512_58679 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61127 = torch.aten.view %61117, %61126 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61127, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58680 = torch.constant.int 4
    %int512_58681 = torch.constant.int 512
    %61128 = torch.prim.ListConstruct %int4_58680, %60475, %int512_58681 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61129 = torch.aten.view %61118, %61128 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58682 = torch.constant.int 4
    %int512_58683 = torch.constant.int 512
    %61130 = torch.prim.ListConstruct %int4_58682, %60490, %int512_58683 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61131 = torch.aten.view %61119, %61130 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61131, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58684 = torch.constant.int 4
    %int512_58685 = torch.constant.int 512
    %61132 = torch.prim.ListConstruct %int4_58684, %60505, %int512_58685 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61133 = torch.aten.view %61120, %61132 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61133, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58686 = torch.constant.int 4
    %int512_58687 = torch.constant.int 512
    %61134 = torch.prim.ListConstruct %int4_58686, %60520, %int512_58687 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61135 = torch.aten.view %61121, %61134 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58688 = torch.constant.int 4
    %int512_58689 = torch.constant.int 512
    %61136 = torch.prim.ListConstruct %int4_58688, %60535, %int512_58689 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61137 = torch.aten.view %61122, %61136 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61137, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int4_58690 = torch.constant.int 4
    %int512_58691 = torch.constant.int 512
    %61138 = torch.prim.ListConstruct %int4_58690, %60550, %int512_58691 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61139 = torch.aten.view %61123, %61138 : !torch.vtensor<[4,?,4,128],f16>, !torch.list<int> -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %61139, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_58692 = torch.constant.int 1
    %int0_58693 = torch.constant.int 0
    %61140 = torch.prim.ListConstruct %int1_58692, %int0_58693 : (!torch.int, !torch.int) -> !torch.list<int>
    %61141 = torch.aten.permute %2272, %61140 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58694 = torch.constant.int 1
    %int0_58695 = torch.constant.int 0
    %61142 = torch.prim.ListConstruct %int1_58694, %int0_58695 : (!torch.int, !torch.int) -> !torch.list<int>
    %61143 = torch.aten.permute %2273, %61142 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58696 = torch.constant.int 1
    %int0_58697 = torch.constant.int 0
    %61144 = torch.prim.ListConstruct %int1_58696, %int0_58697 : (!torch.int, !torch.int) -> !torch.list<int>
    %61145 = torch.aten.permute %2274, %61144 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58698 = torch.constant.int 1
    %int0_58699 = torch.constant.int 0
    %61146 = torch.prim.ListConstruct %int1_58698, %int0_58699 : (!torch.int, !torch.int) -> !torch.list<int>
    %61147 = torch.aten.permute %2275, %61146 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58700 = torch.constant.int 1
    %int0_58701 = torch.constant.int 0
    %61148 = torch.prim.ListConstruct %int1_58700, %int0_58701 : (!torch.int, !torch.int) -> !torch.list<int>
    %61149 = torch.aten.permute %2276, %61148 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58702 = torch.constant.int 1
    %int0_58703 = torch.constant.int 0
    %61150 = torch.prim.ListConstruct %int1_58702, %int0_58703 : (!torch.int, !torch.int) -> !torch.list<int>
    %61151 = torch.aten.permute %2277, %61150 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58704 = torch.constant.int 1
    %int0_58705 = torch.constant.int 0
    %61152 = torch.prim.ListConstruct %int1_58704, %int0_58705 : (!torch.int, !torch.int) -> !torch.list<int>
    %61153 = torch.aten.permute %2278, %61152 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int1_58706 = torch.constant.int 1
    %int0_58707 = torch.constant.int 0
    %61154 = torch.prim.ListConstruct %int1_58706, %int0_58707 : (!torch.int, !torch.int) -> !torch.list<int>
    %61155 = torch.aten.permute %2279, %61154 : !torch.vtensor<[4096,512],f16>, !torch.list<int> -> !torch.vtensor<[512,4096],f16>
    %int4_58708 = torch.constant.int 4
    %61156 = torch.aten.mul.int %int4_58708, %60445 : !torch.int, !torch.int -> !torch.int
    %int512_58709 = torch.constant.int 512
    %61157 = torch.prim.ListConstruct %61156, %int512_58709 : (!torch.int, !torch.int) -> !torch.list<int>
    %61158 = torch.aten.view %61125, %61157 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61158, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61159 = torch.aten.mm %61158, %61141 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61159, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58710 = torch.constant.int 4
    %int4096_58711 = torch.constant.int 4096
    %61160 = torch.prim.ListConstruct %int4_58710, %60445, %int4096_58711 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61161 = torch.aten.view %61159, %61160 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61161, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58712 = torch.constant.int 4
    %61162 = torch.aten.mul.int %int4_58712, %60460 : !torch.int, !torch.int -> !torch.int
    %int512_58713 = torch.constant.int 512
    %61163 = torch.prim.ListConstruct %61162, %int512_58713 : (!torch.int, !torch.int) -> !torch.list<int>
    %61164 = torch.aten.view %61127, %61163 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61164, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61165 = torch.aten.mm %61164, %61143 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61165, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58714 = torch.constant.int 4
    %int4096_58715 = torch.constant.int 4096
    %61166 = torch.prim.ListConstruct %int4_58714, %60460, %int4096_58715 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61167 = torch.aten.view %61165, %61166 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61167, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58716 = torch.constant.int 4
    %61168 = torch.aten.mul.int %int4_58716, %60475 : !torch.int, !torch.int -> !torch.int
    %int512_58717 = torch.constant.int 512
    %61169 = torch.prim.ListConstruct %61168, %int512_58717 : (!torch.int, !torch.int) -> !torch.list<int>
    %61170 = torch.aten.view %61129, %61169 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61170, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61171 = torch.aten.mm %61170, %61145 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61171, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58718 = torch.constant.int 4
    %int4096_58719 = torch.constant.int 4096
    %61172 = torch.prim.ListConstruct %int4_58718, %60475, %int4096_58719 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61173 = torch.aten.view %61171, %61172 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61173, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58720 = torch.constant.int 4
    %61174 = torch.aten.mul.int %int4_58720, %60490 : !torch.int, !torch.int -> !torch.int
    %int512_58721 = torch.constant.int 512
    %61175 = torch.prim.ListConstruct %61174, %int512_58721 : (!torch.int, !torch.int) -> !torch.list<int>
    %61176 = torch.aten.view %61131, %61175 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61176, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61177 = torch.aten.mm %61176, %61147 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61177, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58722 = torch.constant.int 4
    %int4096_58723 = torch.constant.int 4096
    %61178 = torch.prim.ListConstruct %int4_58722, %60490, %int4096_58723 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61179 = torch.aten.view %61177, %61178 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61179, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58724 = torch.constant.int 4
    %61180 = torch.aten.mul.int %int4_58724, %60505 : !torch.int, !torch.int -> !torch.int
    %int512_58725 = torch.constant.int 512
    %61181 = torch.prim.ListConstruct %61180, %int512_58725 : (!torch.int, !torch.int) -> !torch.list<int>
    %61182 = torch.aten.view %61133, %61181 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61182, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61183 = torch.aten.mm %61182, %61149 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61183, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58726 = torch.constant.int 4
    %int4096_58727 = torch.constant.int 4096
    %61184 = torch.prim.ListConstruct %int4_58726, %60505, %int4096_58727 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61185 = torch.aten.view %61183, %61184 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61185, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58728 = torch.constant.int 4
    %61186 = torch.aten.mul.int %int4_58728, %60520 : !torch.int, !torch.int -> !torch.int
    %int512_58729 = torch.constant.int 512
    %61187 = torch.prim.ListConstruct %61186, %int512_58729 : (!torch.int, !torch.int) -> !torch.list<int>
    %61188 = torch.aten.view %61135, %61187 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61188, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61189 = torch.aten.mm %61188, %61151 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61189, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58730 = torch.constant.int 4
    %int4096_58731 = torch.constant.int 4096
    %61190 = torch.prim.ListConstruct %int4_58730, %60520, %int4096_58731 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61191 = torch.aten.view %61189, %61190 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61191, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58732 = torch.constant.int 4
    %61192 = torch.aten.mul.int %int4_58732, %60535 : !torch.int, !torch.int -> !torch.int
    %int512_58733 = torch.constant.int 512
    %61193 = torch.prim.ListConstruct %61192, %int512_58733 : (!torch.int, !torch.int) -> !torch.list<int>
    %61194 = torch.aten.view %61137, %61193 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61194, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61195 = torch.aten.mm %61194, %61153 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61195, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58734 = torch.constant.int 4
    %int4096_58735 = torch.constant.int 4096
    %61196 = torch.prim.ListConstruct %int4_58734, %60535, %int4096_58735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61197 = torch.aten.view %61195, %61196 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61197, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int4_58736 = torch.constant.int 4
    %61198 = torch.aten.mul.int %int4_58736, %60550 : !torch.int, !torch.int -> !torch.int
    %int512_58737 = torch.constant.int 512
    %61199 = torch.prim.ListConstruct %61198, %int512_58737 : (!torch.int, !torch.int) -> !torch.list<int>
    %61200 = torch.aten.view %61139, %61199 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %61200, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %61201 = torch.aten.mm %61200, %61155 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61201, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_58738 = torch.constant.int 4
    %int4096_58739 = torch.constant.int 4096
    %61202 = torch.prim.ListConstruct %int4_58738, %60550, %int4096_58739 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61203 = torch.aten.view %61201, %61202 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61203, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61204 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58740 = arith.constant 1 : index
    %dim_58741 = tensor.dim %61204, %c1_58740 : tensor<4x?x4096xf16>
    %61205 = flow.tensor.transfer %61204 : tensor<4x?x4096xf16>{%dim_58741} to #hal.device.promise<@__device_0>
    %61206 = torch_c.from_builtin_tensor %61205 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61206, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61207 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58742 = arith.constant 1 : index
    %dim_58743 = tensor.dim %61207, %c1_58742 : tensor<4x?x4096xf16>
    %61208 = flow.tensor.transfer %61207 : tensor<4x?x4096xf16>{%dim_58743} to #hal.device.promise<@__device_0>
    %61209 = torch_c.from_builtin_tensor %61208 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61209, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61210 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58744 = arith.constant 1 : index
    %dim_58745 = tensor.dim %61210, %c1_58744 : tensor<4x?x4096xf16>
    %61211 = flow.tensor.transfer %61210 : tensor<4x?x4096xf16>{%dim_58745} to #hal.device.promise<@__device_0>
    %61212 = torch_c.from_builtin_tensor %61211 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61212, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61213 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58746 = arith.constant 1 : index
    %dim_58747 = tensor.dim %61213, %c1_58746 : tensor<4x?x4096xf16>
    %61214 = flow.tensor.transfer %61213 : tensor<4x?x4096xf16>{%dim_58747} to #hal.device.promise<@__device_0>
    %61215 = torch_c.from_builtin_tensor %61214 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61215, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61216 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58748 = arith.constant 1 : index
    %dim_58749 = tensor.dim %61216, %c1_58748 : tensor<4x?x4096xf16>
    %61217 = flow.tensor.transfer %61216 : tensor<4x?x4096xf16>{%dim_58749} to #hal.device.promise<@__device_0>
    %61218 = torch_c.from_builtin_tensor %61217 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61218, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61219 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58750 = arith.constant 1 : index
    %dim_58751 = tensor.dim %61219, %c1_58750 : tensor<4x?x4096xf16>
    %61220 = flow.tensor.transfer %61219 : tensor<4x?x4096xf16>{%dim_58751} to #hal.device.promise<@__device_0>
    %61221 = torch_c.from_builtin_tensor %61220 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61221, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61222 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58752 = arith.constant 1 : index
    %dim_58753 = tensor.dim %61222, %c1_58752 : tensor<4x?x4096xf16>
    %61223 = flow.tensor.transfer %61222 : tensor<4x?x4096xf16>{%dim_58753} to #hal.device.promise<@__device_0>
    %61224 = torch_c.from_builtin_tensor %61223 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61224, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58754 = torch.constant.int 1
    %61225 = torch.aten.add.Tensor %61161, %61206, %int1_58754 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61225, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58755 = torch.constant.int 1
    %61226 = torch.aten.add.Tensor %61225, %61209, %int1_58755 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61226, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58756 = torch.constant.int 1
    %61227 = torch.aten.add.Tensor %61226, %61212, %int1_58756 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61227, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58757 = torch.constant.int 1
    %61228 = torch.aten.add.Tensor %61227, %61215, %int1_58757 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61228, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58758 = torch.constant.int 1
    %61229 = torch.aten.add.Tensor %61228, %61218, %int1_58758 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61229, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58759 = torch.constant.int 1
    %61230 = torch.aten.add.Tensor %61229, %61221, %int1_58759 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61230, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58760 = torch.constant.int 1
    %61231 = torch.aten.add.Tensor %61230, %61224, %int1_58760 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61231, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61232 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58761 = arith.constant 1 : index
    %dim_58762 = tensor.dim %61232, %c1_58761 : tensor<4x?x4096xf16>
    %61233 = flow.tensor.transfer %61232 : tensor<4x?x4096xf16>{%dim_58762} to #hal.device.promise<@__device_1>
    %61234 = torch_c.from_builtin_tensor %61233 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61234, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61235 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58763 = arith.constant 1 : index
    %dim_58764 = tensor.dim %61235, %c1_58763 : tensor<4x?x4096xf16>
    %61236 = flow.tensor.transfer %61235 : tensor<4x?x4096xf16>{%dim_58764} to #hal.device.promise<@__device_1>
    %61237 = torch_c.from_builtin_tensor %61236 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61237, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61238 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58765 = arith.constant 1 : index
    %dim_58766 = tensor.dim %61238, %c1_58765 : tensor<4x?x4096xf16>
    %61239 = flow.tensor.transfer %61238 : tensor<4x?x4096xf16>{%dim_58766} to #hal.device.promise<@__device_1>
    %61240 = torch_c.from_builtin_tensor %61239 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61240, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61241 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58767 = arith.constant 1 : index
    %dim_58768 = tensor.dim %61241, %c1_58767 : tensor<4x?x4096xf16>
    %61242 = flow.tensor.transfer %61241 : tensor<4x?x4096xf16>{%dim_58768} to #hal.device.promise<@__device_1>
    %61243 = torch_c.from_builtin_tensor %61242 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61243, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61244 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58769 = arith.constant 1 : index
    %dim_58770 = tensor.dim %61244, %c1_58769 : tensor<4x?x4096xf16>
    %61245 = flow.tensor.transfer %61244 : tensor<4x?x4096xf16>{%dim_58770} to #hal.device.promise<@__device_1>
    %61246 = torch_c.from_builtin_tensor %61245 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61246, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61247 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58771 = arith.constant 1 : index
    %dim_58772 = tensor.dim %61247, %c1_58771 : tensor<4x?x4096xf16>
    %61248 = flow.tensor.transfer %61247 : tensor<4x?x4096xf16>{%dim_58772} to #hal.device.promise<@__device_1>
    %61249 = torch_c.from_builtin_tensor %61248 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61249, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61250 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58773 = arith.constant 1 : index
    %dim_58774 = tensor.dim %61250, %c1_58773 : tensor<4x?x4096xf16>
    %61251 = flow.tensor.transfer %61250 : tensor<4x?x4096xf16>{%dim_58774} to #hal.device.promise<@__device_1>
    %61252 = torch_c.from_builtin_tensor %61251 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61252, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58775 = torch.constant.int 1
    %61253 = torch.aten.add.Tensor %61234, %61167, %int1_58775 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61253, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58776 = torch.constant.int 1
    %61254 = torch.aten.add.Tensor %61253, %61237, %int1_58776 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61254, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58777 = torch.constant.int 1
    %61255 = torch.aten.add.Tensor %61254, %61240, %int1_58777 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61255, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58778 = torch.constant.int 1
    %61256 = torch.aten.add.Tensor %61255, %61243, %int1_58778 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61256, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58779 = torch.constant.int 1
    %61257 = torch.aten.add.Tensor %61256, %61246, %int1_58779 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61257, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58780 = torch.constant.int 1
    %61258 = torch.aten.add.Tensor %61257, %61249, %int1_58780 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61258, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58781 = torch.constant.int 1
    %61259 = torch.aten.add.Tensor %61258, %61252, %int1_58781 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61259, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61260 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58782 = arith.constant 1 : index
    %dim_58783 = tensor.dim %61260, %c1_58782 : tensor<4x?x4096xf16>
    %61261 = flow.tensor.transfer %61260 : tensor<4x?x4096xf16>{%dim_58783} to #hal.device.promise<@__device_2>
    %61262 = torch_c.from_builtin_tensor %61261 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61262, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61263 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58784 = arith.constant 1 : index
    %dim_58785 = tensor.dim %61263, %c1_58784 : tensor<4x?x4096xf16>
    %61264 = flow.tensor.transfer %61263 : tensor<4x?x4096xf16>{%dim_58785} to #hal.device.promise<@__device_2>
    %61265 = torch_c.from_builtin_tensor %61264 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61265, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61266 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58786 = arith.constant 1 : index
    %dim_58787 = tensor.dim %61266, %c1_58786 : tensor<4x?x4096xf16>
    %61267 = flow.tensor.transfer %61266 : tensor<4x?x4096xf16>{%dim_58787} to #hal.device.promise<@__device_2>
    %61268 = torch_c.from_builtin_tensor %61267 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61268, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61269 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58788 = arith.constant 1 : index
    %dim_58789 = tensor.dim %61269, %c1_58788 : tensor<4x?x4096xf16>
    %61270 = flow.tensor.transfer %61269 : tensor<4x?x4096xf16>{%dim_58789} to #hal.device.promise<@__device_2>
    %61271 = torch_c.from_builtin_tensor %61270 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61271, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61272 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58790 = arith.constant 1 : index
    %dim_58791 = tensor.dim %61272, %c1_58790 : tensor<4x?x4096xf16>
    %61273 = flow.tensor.transfer %61272 : tensor<4x?x4096xf16>{%dim_58791} to #hal.device.promise<@__device_2>
    %61274 = torch_c.from_builtin_tensor %61273 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61274, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61275 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58792 = arith.constant 1 : index
    %dim_58793 = tensor.dim %61275, %c1_58792 : tensor<4x?x4096xf16>
    %61276 = flow.tensor.transfer %61275 : tensor<4x?x4096xf16>{%dim_58793} to #hal.device.promise<@__device_2>
    %61277 = torch_c.from_builtin_tensor %61276 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61277, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61278 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58794 = arith.constant 1 : index
    %dim_58795 = tensor.dim %61278, %c1_58794 : tensor<4x?x4096xf16>
    %61279 = flow.tensor.transfer %61278 : tensor<4x?x4096xf16>{%dim_58795} to #hal.device.promise<@__device_2>
    %61280 = torch_c.from_builtin_tensor %61279 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61280, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58796 = torch.constant.int 1
    %61281 = torch.aten.add.Tensor %61262, %61265, %int1_58796 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61281, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58797 = torch.constant.int 1
    %61282 = torch.aten.add.Tensor %61281, %61173, %int1_58797 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61282, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58798 = torch.constant.int 1
    %61283 = torch.aten.add.Tensor %61282, %61268, %int1_58798 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61283, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58799 = torch.constant.int 1
    %61284 = torch.aten.add.Tensor %61283, %61271, %int1_58799 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61284, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58800 = torch.constant.int 1
    %61285 = torch.aten.add.Tensor %61284, %61274, %int1_58800 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61285, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58801 = torch.constant.int 1
    %61286 = torch.aten.add.Tensor %61285, %61277, %int1_58801 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61286, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58802 = torch.constant.int 1
    %61287 = torch.aten.add.Tensor %61286, %61280, %int1_58802 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61287, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61288 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58803 = arith.constant 1 : index
    %dim_58804 = tensor.dim %61288, %c1_58803 : tensor<4x?x4096xf16>
    %61289 = flow.tensor.transfer %61288 : tensor<4x?x4096xf16>{%dim_58804} to #hal.device.promise<@__device_3>
    %61290 = torch_c.from_builtin_tensor %61289 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61290, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61291 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58805 = arith.constant 1 : index
    %dim_58806 = tensor.dim %61291, %c1_58805 : tensor<4x?x4096xf16>
    %61292 = flow.tensor.transfer %61291 : tensor<4x?x4096xf16>{%dim_58806} to #hal.device.promise<@__device_3>
    %61293 = torch_c.from_builtin_tensor %61292 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61293, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61294 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58807 = arith.constant 1 : index
    %dim_58808 = tensor.dim %61294, %c1_58807 : tensor<4x?x4096xf16>
    %61295 = flow.tensor.transfer %61294 : tensor<4x?x4096xf16>{%dim_58808} to #hal.device.promise<@__device_3>
    %61296 = torch_c.from_builtin_tensor %61295 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61296, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61297 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58809 = arith.constant 1 : index
    %dim_58810 = tensor.dim %61297, %c1_58809 : tensor<4x?x4096xf16>
    %61298 = flow.tensor.transfer %61297 : tensor<4x?x4096xf16>{%dim_58810} to #hal.device.promise<@__device_3>
    %61299 = torch_c.from_builtin_tensor %61298 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61299, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61300 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58811 = arith.constant 1 : index
    %dim_58812 = tensor.dim %61300, %c1_58811 : tensor<4x?x4096xf16>
    %61301 = flow.tensor.transfer %61300 : tensor<4x?x4096xf16>{%dim_58812} to #hal.device.promise<@__device_3>
    %61302 = torch_c.from_builtin_tensor %61301 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61302, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61303 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58813 = arith.constant 1 : index
    %dim_58814 = tensor.dim %61303, %c1_58813 : tensor<4x?x4096xf16>
    %61304 = flow.tensor.transfer %61303 : tensor<4x?x4096xf16>{%dim_58814} to #hal.device.promise<@__device_3>
    %61305 = torch_c.from_builtin_tensor %61304 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61305, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61306 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58815 = arith.constant 1 : index
    %dim_58816 = tensor.dim %61306, %c1_58815 : tensor<4x?x4096xf16>
    %61307 = flow.tensor.transfer %61306 : tensor<4x?x4096xf16>{%dim_58816} to #hal.device.promise<@__device_3>
    %61308 = torch_c.from_builtin_tensor %61307 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61308, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58817 = torch.constant.int 1
    %61309 = torch.aten.add.Tensor %61290, %61293, %int1_58817 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61309, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58818 = torch.constant.int 1
    %61310 = torch.aten.add.Tensor %61309, %61296, %int1_58818 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61310, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58819 = torch.constant.int 1
    %61311 = torch.aten.add.Tensor %61310, %61179, %int1_58819 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61311, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58820 = torch.constant.int 1
    %61312 = torch.aten.add.Tensor %61311, %61299, %int1_58820 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61312, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58821 = torch.constant.int 1
    %61313 = torch.aten.add.Tensor %61312, %61302, %int1_58821 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61313, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58822 = torch.constant.int 1
    %61314 = torch.aten.add.Tensor %61313, %61305, %int1_58822 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61314, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58823 = torch.constant.int 1
    %61315 = torch.aten.add.Tensor %61314, %61308, %int1_58823 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61315, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61316 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58824 = arith.constant 1 : index
    %dim_58825 = tensor.dim %61316, %c1_58824 : tensor<4x?x4096xf16>
    %61317 = flow.tensor.transfer %61316 : tensor<4x?x4096xf16>{%dim_58825} to #hal.device.promise<@__device_4>
    %61318 = torch_c.from_builtin_tensor %61317 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61318, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61319 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58826 = arith.constant 1 : index
    %dim_58827 = tensor.dim %61319, %c1_58826 : tensor<4x?x4096xf16>
    %61320 = flow.tensor.transfer %61319 : tensor<4x?x4096xf16>{%dim_58827} to #hal.device.promise<@__device_4>
    %61321 = torch_c.from_builtin_tensor %61320 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61321, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61322 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58828 = arith.constant 1 : index
    %dim_58829 = tensor.dim %61322, %c1_58828 : tensor<4x?x4096xf16>
    %61323 = flow.tensor.transfer %61322 : tensor<4x?x4096xf16>{%dim_58829} to #hal.device.promise<@__device_4>
    %61324 = torch_c.from_builtin_tensor %61323 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61324, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61325 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58830 = arith.constant 1 : index
    %dim_58831 = tensor.dim %61325, %c1_58830 : tensor<4x?x4096xf16>
    %61326 = flow.tensor.transfer %61325 : tensor<4x?x4096xf16>{%dim_58831} to #hal.device.promise<@__device_4>
    %61327 = torch_c.from_builtin_tensor %61326 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61327, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61328 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58832 = arith.constant 1 : index
    %dim_58833 = tensor.dim %61328, %c1_58832 : tensor<4x?x4096xf16>
    %61329 = flow.tensor.transfer %61328 : tensor<4x?x4096xf16>{%dim_58833} to #hal.device.promise<@__device_4>
    %61330 = torch_c.from_builtin_tensor %61329 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61330, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61331 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58834 = arith.constant 1 : index
    %dim_58835 = tensor.dim %61331, %c1_58834 : tensor<4x?x4096xf16>
    %61332 = flow.tensor.transfer %61331 : tensor<4x?x4096xf16>{%dim_58835} to #hal.device.promise<@__device_4>
    %61333 = torch_c.from_builtin_tensor %61332 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61333, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61334 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58836 = arith.constant 1 : index
    %dim_58837 = tensor.dim %61334, %c1_58836 : tensor<4x?x4096xf16>
    %61335 = flow.tensor.transfer %61334 : tensor<4x?x4096xf16>{%dim_58837} to #hal.device.promise<@__device_4>
    %61336 = torch_c.from_builtin_tensor %61335 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61336, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58838 = torch.constant.int 1
    %61337 = torch.aten.add.Tensor %61318, %61321, %int1_58838 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61337, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58839 = torch.constant.int 1
    %61338 = torch.aten.add.Tensor %61337, %61324, %int1_58839 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61338, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58840 = torch.constant.int 1
    %61339 = torch.aten.add.Tensor %61338, %61327, %int1_58840 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61339, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58841 = torch.constant.int 1
    %61340 = torch.aten.add.Tensor %61339, %61185, %int1_58841 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61340, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58842 = torch.constant.int 1
    %61341 = torch.aten.add.Tensor %61340, %61330, %int1_58842 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61341, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58843 = torch.constant.int 1
    %61342 = torch.aten.add.Tensor %61341, %61333, %int1_58843 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61342, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58844 = torch.constant.int 1
    %61343 = torch.aten.add.Tensor %61342, %61336, %int1_58844 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61343, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61344 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58845 = arith.constant 1 : index
    %dim_58846 = tensor.dim %61344, %c1_58845 : tensor<4x?x4096xf16>
    %61345 = flow.tensor.transfer %61344 : tensor<4x?x4096xf16>{%dim_58846} to #hal.device.promise<@__device_5>
    %61346 = torch_c.from_builtin_tensor %61345 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61346, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61347 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58847 = arith.constant 1 : index
    %dim_58848 = tensor.dim %61347, %c1_58847 : tensor<4x?x4096xf16>
    %61348 = flow.tensor.transfer %61347 : tensor<4x?x4096xf16>{%dim_58848} to #hal.device.promise<@__device_5>
    %61349 = torch_c.from_builtin_tensor %61348 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61349, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61350 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58849 = arith.constant 1 : index
    %dim_58850 = tensor.dim %61350, %c1_58849 : tensor<4x?x4096xf16>
    %61351 = flow.tensor.transfer %61350 : tensor<4x?x4096xf16>{%dim_58850} to #hal.device.promise<@__device_5>
    %61352 = torch_c.from_builtin_tensor %61351 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61352, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61353 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58851 = arith.constant 1 : index
    %dim_58852 = tensor.dim %61353, %c1_58851 : tensor<4x?x4096xf16>
    %61354 = flow.tensor.transfer %61353 : tensor<4x?x4096xf16>{%dim_58852} to #hal.device.promise<@__device_5>
    %61355 = torch_c.from_builtin_tensor %61354 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61355, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61356 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58853 = arith.constant 1 : index
    %dim_58854 = tensor.dim %61356, %c1_58853 : tensor<4x?x4096xf16>
    %61357 = flow.tensor.transfer %61356 : tensor<4x?x4096xf16>{%dim_58854} to #hal.device.promise<@__device_5>
    %61358 = torch_c.from_builtin_tensor %61357 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61358, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61359 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58855 = arith.constant 1 : index
    %dim_58856 = tensor.dim %61359, %c1_58855 : tensor<4x?x4096xf16>
    %61360 = flow.tensor.transfer %61359 : tensor<4x?x4096xf16>{%dim_58856} to #hal.device.promise<@__device_5>
    %61361 = torch_c.from_builtin_tensor %61360 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61361, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61362 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58857 = arith.constant 1 : index
    %dim_58858 = tensor.dim %61362, %c1_58857 : tensor<4x?x4096xf16>
    %61363 = flow.tensor.transfer %61362 : tensor<4x?x4096xf16>{%dim_58858} to #hal.device.promise<@__device_5>
    %61364 = torch_c.from_builtin_tensor %61363 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61364, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58859 = torch.constant.int 1
    %61365 = torch.aten.add.Tensor %61346, %61349, %int1_58859 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61365, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58860 = torch.constant.int 1
    %61366 = torch.aten.add.Tensor %61365, %61352, %int1_58860 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61366, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58861 = torch.constant.int 1
    %61367 = torch.aten.add.Tensor %61366, %61355, %int1_58861 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61367, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58862 = torch.constant.int 1
    %61368 = torch.aten.add.Tensor %61367, %61358, %int1_58862 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61368, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58863 = torch.constant.int 1
    %61369 = torch.aten.add.Tensor %61368, %61191, %int1_58863 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61369, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58864 = torch.constant.int 1
    %61370 = torch.aten.add.Tensor %61369, %61361, %int1_58864 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61370, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58865 = torch.constant.int 1
    %61371 = torch.aten.add.Tensor %61370, %61364, %int1_58865 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61371, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61372 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58866 = arith.constant 1 : index
    %dim_58867 = tensor.dim %61372, %c1_58866 : tensor<4x?x4096xf16>
    %61373 = flow.tensor.transfer %61372 : tensor<4x?x4096xf16>{%dim_58867} to #hal.device.promise<@__device_6>
    %61374 = torch_c.from_builtin_tensor %61373 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61374, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61375 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58868 = arith.constant 1 : index
    %dim_58869 = tensor.dim %61375, %c1_58868 : tensor<4x?x4096xf16>
    %61376 = flow.tensor.transfer %61375 : tensor<4x?x4096xf16>{%dim_58869} to #hal.device.promise<@__device_6>
    %61377 = torch_c.from_builtin_tensor %61376 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61377, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61378 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58870 = arith.constant 1 : index
    %dim_58871 = tensor.dim %61378, %c1_58870 : tensor<4x?x4096xf16>
    %61379 = flow.tensor.transfer %61378 : tensor<4x?x4096xf16>{%dim_58871} to #hal.device.promise<@__device_6>
    %61380 = torch_c.from_builtin_tensor %61379 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61380, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61381 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58872 = arith.constant 1 : index
    %dim_58873 = tensor.dim %61381, %c1_58872 : tensor<4x?x4096xf16>
    %61382 = flow.tensor.transfer %61381 : tensor<4x?x4096xf16>{%dim_58873} to #hal.device.promise<@__device_6>
    %61383 = torch_c.from_builtin_tensor %61382 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61383, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61384 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58874 = arith.constant 1 : index
    %dim_58875 = tensor.dim %61384, %c1_58874 : tensor<4x?x4096xf16>
    %61385 = flow.tensor.transfer %61384 : tensor<4x?x4096xf16>{%dim_58875} to #hal.device.promise<@__device_6>
    %61386 = torch_c.from_builtin_tensor %61385 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61386, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61387 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58876 = arith.constant 1 : index
    %dim_58877 = tensor.dim %61387, %c1_58876 : tensor<4x?x4096xf16>
    %61388 = flow.tensor.transfer %61387 : tensor<4x?x4096xf16>{%dim_58877} to #hal.device.promise<@__device_6>
    %61389 = torch_c.from_builtin_tensor %61388 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61389, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61390 = torch_c.to_builtin_tensor %61203 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58878 = arith.constant 1 : index
    %dim_58879 = tensor.dim %61390, %c1_58878 : tensor<4x?x4096xf16>
    %61391 = flow.tensor.transfer %61390 : tensor<4x?x4096xf16>{%dim_58879} to #hal.device.promise<@__device_6>
    %61392 = torch_c.from_builtin_tensor %61391 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61392, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58880 = torch.constant.int 1
    %61393 = torch.aten.add.Tensor %61374, %61377, %int1_58880 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61393, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58881 = torch.constant.int 1
    %61394 = torch.aten.add.Tensor %61393, %61380, %int1_58881 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61394, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58882 = torch.constant.int 1
    %61395 = torch.aten.add.Tensor %61394, %61383, %int1_58882 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61395, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58883 = torch.constant.int 1
    %61396 = torch.aten.add.Tensor %61395, %61386, %int1_58883 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61396, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58884 = torch.constant.int 1
    %61397 = torch.aten.add.Tensor %61396, %61389, %int1_58884 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61397, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58885 = torch.constant.int 1
    %61398 = torch.aten.add.Tensor %61397, %61197, %int1_58885 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61398, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58886 = torch.constant.int 1
    %61399 = torch.aten.add.Tensor %61398, %61392, %int1_58886 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61399, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61400 = torch_c.to_builtin_tensor %61161 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58887 = arith.constant 1 : index
    %dim_58888 = tensor.dim %61400, %c1_58887 : tensor<4x?x4096xf16>
    %61401 = flow.tensor.transfer %61400 : tensor<4x?x4096xf16>{%dim_58888} to #hal.device.promise<@__device_7>
    %61402 = torch_c.from_builtin_tensor %61401 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61402, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61403 = torch_c.to_builtin_tensor %61167 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58889 = arith.constant 1 : index
    %dim_58890 = tensor.dim %61403, %c1_58889 : tensor<4x?x4096xf16>
    %61404 = flow.tensor.transfer %61403 : tensor<4x?x4096xf16>{%dim_58890} to #hal.device.promise<@__device_7>
    %61405 = torch_c.from_builtin_tensor %61404 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61405, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61406 = torch_c.to_builtin_tensor %61173 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58891 = arith.constant 1 : index
    %dim_58892 = tensor.dim %61406, %c1_58891 : tensor<4x?x4096xf16>
    %61407 = flow.tensor.transfer %61406 : tensor<4x?x4096xf16>{%dim_58892} to #hal.device.promise<@__device_7>
    %61408 = torch_c.from_builtin_tensor %61407 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61408, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61409 = torch_c.to_builtin_tensor %61179 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58893 = arith.constant 1 : index
    %dim_58894 = tensor.dim %61409, %c1_58893 : tensor<4x?x4096xf16>
    %61410 = flow.tensor.transfer %61409 : tensor<4x?x4096xf16>{%dim_58894} to #hal.device.promise<@__device_7>
    %61411 = torch_c.from_builtin_tensor %61410 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61411, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61412 = torch_c.to_builtin_tensor %61185 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58895 = arith.constant 1 : index
    %dim_58896 = tensor.dim %61412, %c1_58895 : tensor<4x?x4096xf16>
    %61413 = flow.tensor.transfer %61412 : tensor<4x?x4096xf16>{%dim_58896} to #hal.device.promise<@__device_7>
    %61414 = torch_c.from_builtin_tensor %61413 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61414, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61415 = torch_c.to_builtin_tensor %61191 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58897 = arith.constant 1 : index
    %dim_58898 = tensor.dim %61415, %c1_58897 : tensor<4x?x4096xf16>
    %61416 = flow.tensor.transfer %61415 : tensor<4x?x4096xf16>{%dim_58898} to #hal.device.promise<@__device_7>
    %61417 = torch_c.from_builtin_tensor %61416 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61417, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61418 = torch_c.to_builtin_tensor %61197 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_58899 = arith.constant 1 : index
    %dim_58900 = tensor.dim %61418, %c1_58899 : tensor<4x?x4096xf16>
    %61419 = flow.tensor.transfer %61418 : tensor<4x?x4096xf16>{%dim_58900} to #hal.device.promise<@__device_7>
    %61420 = torch_c.from_builtin_tensor %61419 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61420, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58901 = torch.constant.int 1
    %61421 = torch.aten.add.Tensor %61402, %61405, %int1_58901 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61421, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58902 = torch.constant.int 1
    %61422 = torch.aten.add.Tensor %61421, %61408, %int1_58902 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61422, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58903 = torch.constant.int 1
    %61423 = torch.aten.add.Tensor %61422, %61411, %int1_58903 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61423, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58904 = torch.constant.int 1
    %61424 = torch.aten.add.Tensor %61423, %61414, %int1_58904 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61424, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58905 = torch.constant.int 1
    %61425 = torch.aten.add.Tensor %61424, %61417, %int1_58905 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61425, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58906 = torch.constant.int 1
    %61426 = torch.aten.add.Tensor %61425, %61420, %int1_58906 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61426, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58907 = torch.constant.int 1
    %61427 = torch.aten.add.Tensor %61426, %61203, %int1_58907 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61427, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58908 = torch.constant.int 1
    %61428 = torch.aten.add.Tensor %60087, %61231, %int1_58908 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61428, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58909 = torch.constant.int 1
    %61429 = torch.aten.add.Tensor %60088, %61259, %int1_58909 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61429, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58910 = torch.constant.int 1
    %61430 = torch.aten.add.Tensor %60089, %61287, %int1_58910 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61430, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58911 = torch.constant.int 1
    %61431 = torch.aten.add.Tensor %60090, %61315, %int1_58911 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61431, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58912 = torch.constant.int 1
    %61432 = torch.aten.add.Tensor %60091, %61343, %int1_58912 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61432, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58913 = torch.constant.int 1
    %61433 = torch.aten.add.Tensor %60092, %61371, %int1_58913 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61433, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58914 = torch.constant.int 1
    %61434 = torch.aten.add.Tensor %60093, %61399, %int1_58914 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61434, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58915 = torch.constant.int 1
    %61435 = torch.aten.add.Tensor %60094, %61427, %int1_58915 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61435, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_58916 = torch.constant.int 6
    %61436 = torch.prims.convert_element_type %61428, %int6_58916 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61436, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58917 = torch.constant.int 6
    %61437 = torch.prims.convert_element_type %61429, %int6_58917 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61437, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58918 = torch.constant.int 6
    %61438 = torch.prims.convert_element_type %61430, %int6_58918 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61438, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58919 = torch.constant.int 6
    %61439 = torch.prims.convert_element_type %61431, %int6_58919 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61439, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58920 = torch.constant.int 6
    %61440 = torch.prims.convert_element_type %61432, %int6_58920 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61440, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58921 = torch.constant.int 6
    %61441 = torch.prims.convert_element_type %61433, %int6_58921 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61441, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58922 = torch.constant.int 6
    %61442 = torch.prims.convert_element_type %61434, %int6_58922 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61442, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_58923 = torch.constant.int 6
    %61443 = torch.prims.convert_element_type %61435, %int6_58923 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61443, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58924 = torch.constant.int 2
    %61444 = torch.aten.pow.Tensor_Scalar %61436, %int2_58924 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61444, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58925 = torch.constant.int 2
    %61445 = torch.aten.pow.Tensor_Scalar %61437, %int2_58925 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61445, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58926 = torch.constant.int 2
    %61446 = torch.aten.pow.Tensor_Scalar %61438, %int2_58926 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61446, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58927 = torch.constant.int 2
    %61447 = torch.aten.pow.Tensor_Scalar %61439, %int2_58927 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61447, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58928 = torch.constant.int 2
    %61448 = torch.aten.pow.Tensor_Scalar %61440, %int2_58928 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61448, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58929 = torch.constant.int 2
    %61449 = torch.aten.pow.Tensor_Scalar %61441, %int2_58929 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61449, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58930 = torch.constant.int 2
    %61450 = torch.aten.pow.Tensor_Scalar %61442, %int2_58930 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61450, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_58931 = torch.constant.int 2
    %61451 = torch.aten.pow.Tensor_Scalar %61443, %int2_58931 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61451, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_58932 = torch.constant.int -1
    %61452 = torch.prim.ListConstruct %int-1_58932 : (!torch.int) -> !torch.list<int>
    %true_58933 = torch.constant.bool true
    %none_58934 = torch.constant.none
    %61453 = torch.aten.mean.dim %61444, %61452, %true_58933, %none_58934 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61453, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58935 = torch.constant.int -1
    %61454 = torch.prim.ListConstruct %int-1_58935 : (!torch.int) -> !torch.list<int>
    %true_58936 = torch.constant.bool true
    %none_58937 = torch.constant.none
    %61455 = torch.aten.mean.dim %61445, %61454, %true_58936, %none_58937 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61455, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58938 = torch.constant.int -1
    %61456 = torch.prim.ListConstruct %int-1_58938 : (!torch.int) -> !torch.list<int>
    %true_58939 = torch.constant.bool true
    %none_58940 = torch.constant.none
    %61457 = torch.aten.mean.dim %61446, %61456, %true_58939, %none_58940 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61457, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58941 = torch.constant.int -1
    %61458 = torch.prim.ListConstruct %int-1_58941 : (!torch.int) -> !torch.list<int>
    %true_58942 = torch.constant.bool true
    %none_58943 = torch.constant.none
    %61459 = torch.aten.mean.dim %61447, %61458, %true_58942, %none_58943 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61459, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58944 = torch.constant.int -1
    %61460 = torch.prim.ListConstruct %int-1_58944 : (!torch.int) -> !torch.list<int>
    %true_58945 = torch.constant.bool true
    %none_58946 = torch.constant.none
    %61461 = torch.aten.mean.dim %61448, %61460, %true_58945, %none_58946 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61461, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58947 = torch.constant.int -1
    %61462 = torch.prim.ListConstruct %int-1_58947 : (!torch.int) -> !torch.list<int>
    %true_58948 = torch.constant.bool true
    %none_58949 = torch.constant.none
    %61463 = torch.aten.mean.dim %61449, %61462, %true_58948, %none_58949 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61463, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58950 = torch.constant.int -1
    %61464 = torch.prim.ListConstruct %int-1_58950 : (!torch.int) -> !torch.list<int>
    %true_58951 = torch.constant.bool true
    %none_58952 = torch.constant.none
    %61465 = torch.aten.mean.dim %61450, %61464, %true_58951, %none_58952 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61465, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_58953 = torch.constant.int -1
    %61466 = torch.prim.ListConstruct %int-1_58953 : (!torch.int) -> !torch.list<int>
    %true_58954 = torch.constant.bool true
    %none_58955 = torch.constant.none
    %61467 = torch.aten.mean.dim %61451, %61466, %true_58954, %none_58955 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61467, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58956 = torch.constant.float 9.9999997473787516E-6
    %int1_58957 = torch.constant.int 1
    %61468 = torch.aten.add.Scalar %61453, %float9.999990e-06_58956, %int1_58957 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61468, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58958 = torch.constant.float 9.9999997473787516E-6
    %int1_58959 = torch.constant.int 1
    %61469 = torch.aten.add.Scalar %61455, %float9.999990e-06_58958, %int1_58959 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61469, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58960 = torch.constant.float 9.9999997473787516E-6
    %int1_58961 = torch.constant.int 1
    %61470 = torch.aten.add.Scalar %61457, %float9.999990e-06_58960, %int1_58961 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61470, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58962 = torch.constant.float 9.9999997473787516E-6
    %int1_58963 = torch.constant.int 1
    %61471 = torch.aten.add.Scalar %61459, %float9.999990e-06_58962, %int1_58963 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61471, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58964 = torch.constant.float 9.9999997473787516E-6
    %int1_58965 = torch.constant.int 1
    %61472 = torch.aten.add.Scalar %61461, %float9.999990e-06_58964, %int1_58965 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61472, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58966 = torch.constant.float 9.9999997473787516E-6
    %int1_58967 = torch.constant.int 1
    %61473 = torch.aten.add.Scalar %61463, %float9.999990e-06_58966, %int1_58967 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61473, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58968 = torch.constant.float 9.9999997473787516E-6
    %int1_58969 = torch.constant.int 1
    %61474 = torch.aten.add.Scalar %61465, %float9.999990e-06_58968, %int1_58969 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61474, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_58970 = torch.constant.float 9.9999997473787516E-6
    %int1_58971 = torch.constant.int 1
    %61475 = torch.aten.add.Scalar %61467, %float9.999990e-06_58970, %int1_58971 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61475, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61476 = torch.aten.rsqrt %61468 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61476, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61477 = torch.aten.rsqrt %61469 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61477, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61478 = torch.aten.rsqrt %61470 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61478, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61479 = torch.aten.rsqrt %61471 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61479, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61480 = torch.aten.rsqrt %61472 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61480, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61481 = torch.aten.rsqrt %61473 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61481, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61482 = torch.aten.rsqrt %61474 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61482, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61483 = torch.aten.rsqrt %61475 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61483, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61484 = torch.aten.mul.Tensor %61436, %61476 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61484, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61485 = torch.aten.mul.Tensor %61437, %61477 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61485, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61486 = torch.aten.mul.Tensor %61438, %61478 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61486, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61487 = torch.aten.mul.Tensor %61439, %61479 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61487, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61488 = torch.aten.mul.Tensor %61440, %61480 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61488, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61489 = torch.aten.mul.Tensor %61441, %61481 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61489, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61490 = torch.aten.mul.Tensor %61442, %61482 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61490, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61491 = torch.aten.mul.Tensor %61443, %61483 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61491, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61492 = torch.aten.mul.Tensor %2280, %61484 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61492, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61493 = torch.aten.mul.Tensor %2281, %61485 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61493, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61494 = torch.aten.mul.Tensor %2282, %61486 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61494, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61495 = torch.aten.mul.Tensor %2283, %61487 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61495, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61496 = torch.aten.mul.Tensor %2284, %61488 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61496, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61497 = torch.aten.mul.Tensor %2285, %61489 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61497, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61498 = torch.aten.mul.Tensor %2286, %61490 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61498, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %61499 = torch.aten.mul.Tensor %2287, %61491 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61499, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_58972 = torch.constant.int 5
    %61500 = torch.prims.convert_element_type %61492, %int5_58972 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61500, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58973 = torch.constant.int 5
    %61501 = torch.prims.convert_element_type %61493, %int5_58973 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61501, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58974 = torch.constant.int 5
    %61502 = torch.prims.convert_element_type %61494, %int5_58974 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61502, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58975 = torch.constant.int 5
    %61503 = torch.prims.convert_element_type %61495, %int5_58975 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61503, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58976 = torch.constant.int 5
    %61504 = torch.prims.convert_element_type %61496, %int5_58976 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61504, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58977 = torch.constant.int 5
    %61505 = torch.prims.convert_element_type %61497, %int5_58977 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61505, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58978 = torch.constant.int 5
    %61506 = torch.prims.convert_element_type %61498, %int5_58978 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61506, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_58979 = torch.constant.int 5
    %61507 = torch.prims.convert_element_type %61499, %int5_58979 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61507, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_58980 = torch.constant.int 1
    %int0_58981 = torch.constant.int 0
    %61508 = torch.prim.ListConstruct %int1_58980, %int0_58981 : (!torch.int, !torch.int) -> !torch.list<int>
    %61509 = torch.aten.permute %2288, %61508 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58982 = torch.constant.int 1
    %int0_58983 = torch.constant.int 0
    %61510 = torch.prim.ListConstruct %int1_58982, %int0_58983 : (!torch.int, !torch.int) -> !torch.list<int>
    %61511 = torch.aten.permute %2289, %61510 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58984 = torch.constant.int 1
    %int0_58985 = torch.constant.int 0
    %61512 = torch.prim.ListConstruct %int1_58984, %int0_58985 : (!torch.int, !torch.int) -> !torch.list<int>
    %61513 = torch.aten.permute %2290, %61512 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58986 = torch.constant.int 1
    %int0_58987 = torch.constant.int 0
    %61514 = torch.prim.ListConstruct %int1_58986, %int0_58987 : (!torch.int, !torch.int) -> !torch.list<int>
    %61515 = torch.aten.permute %2291, %61514 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58988 = torch.constant.int 1
    %int0_58989 = torch.constant.int 0
    %61516 = torch.prim.ListConstruct %int1_58988, %int0_58989 : (!torch.int, !torch.int) -> !torch.list<int>
    %61517 = torch.aten.permute %2292, %61516 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58990 = torch.constant.int 1
    %int0_58991 = torch.constant.int 0
    %61518 = torch.prim.ListConstruct %int1_58990, %int0_58991 : (!torch.int, !torch.int) -> !torch.list<int>
    %61519 = torch.aten.permute %2293, %61518 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58992 = torch.constant.int 1
    %int0_58993 = torch.constant.int 0
    %61520 = torch.prim.ListConstruct %int1_58992, %int0_58993 : (!torch.int, !torch.int) -> !torch.list<int>
    %61521 = torch.aten.permute %2294, %61520 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_58994 = torch.constant.int 1
    %int0_58995 = torch.constant.int 0
    %61522 = torch.prim.ListConstruct %int1_58994, %int0_58995 : (!torch.int, !torch.int) -> !torch.list<int>
    %61523 = torch.aten.permute %2295, %61522 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_58996 = torch.constant.int 4
    %61524 = torch.aten.mul.int %int4_58996, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_58997 = torch.constant.int 4096
    %61525 = torch.prim.ListConstruct %61524, %int4096_58997 : (!torch.int, !torch.int) -> !torch.list<int>
    %61526 = torch.aten.view %61500, %61525 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61526, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61527 = torch.aten.mm %61526, %61509 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61527, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_58998 = torch.constant.int 4
    %int1792_58999 = torch.constant.int 1792
    %61528 = torch.prim.ListConstruct %int4_58998, %2482, %int1792_58999 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61529 = torch.aten.view %61527, %61528 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61529, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59000 = torch.constant.int 4
    %61530 = torch.aten.mul.int %int4_59000, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59001 = torch.constant.int 4096
    %61531 = torch.prim.ListConstruct %61530, %int4096_59001 : (!torch.int, !torch.int) -> !torch.list<int>
    %61532 = torch.aten.view %61501, %61531 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61532, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61533 = torch.aten.mm %61532, %61511 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61533, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59002 = torch.constant.int 4
    %int1792_59003 = torch.constant.int 1792
    %61534 = torch.prim.ListConstruct %int4_59002, %2482, %int1792_59003 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61535 = torch.aten.view %61533, %61534 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61535, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59004 = torch.constant.int 4
    %61536 = torch.aten.mul.int %int4_59004, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59005 = torch.constant.int 4096
    %61537 = torch.prim.ListConstruct %61536, %int4096_59005 : (!torch.int, !torch.int) -> !torch.list<int>
    %61538 = torch.aten.view %61502, %61537 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61538, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61539 = torch.aten.mm %61538, %61513 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61539, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59006 = torch.constant.int 4
    %int1792_59007 = torch.constant.int 1792
    %61540 = torch.prim.ListConstruct %int4_59006, %2482, %int1792_59007 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61541 = torch.aten.view %61539, %61540 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61541, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59008 = torch.constant.int 4
    %61542 = torch.aten.mul.int %int4_59008, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59009 = torch.constant.int 4096
    %61543 = torch.prim.ListConstruct %61542, %int4096_59009 : (!torch.int, !torch.int) -> !torch.list<int>
    %61544 = torch.aten.view %61503, %61543 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61544, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61545 = torch.aten.mm %61544, %61515 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61545, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59010 = torch.constant.int 4
    %int1792_59011 = torch.constant.int 1792
    %61546 = torch.prim.ListConstruct %int4_59010, %2482, %int1792_59011 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61547 = torch.aten.view %61545, %61546 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61547, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59012 = torch.constant.int 4
    %61548 = torch.aten.mul.int %int4_59012, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59013 = torch.constant.int 4096
    %61549 = torch.prim.ListConstruct %61548, %int4096_59013 : (!torch.int, !torch.int) -> !torch.list<int>
    %61550 = torch.aten.view %61504, %61549 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61550, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61551 = torch.aten.mm %61550, %61517 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61551, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59014 = torch.constant.int 4
    %int1792_59015 = torch.constant.int 1792
    %61552 = torch.prim.ListConstruct %int4_59014, %2482, %int1792_59015 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61553 = torch.aten.view %61551, %61552 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61553, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59016 = torch.constant.int 4
    %61554 = torch.aten.mul.int %int4_59016, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59017 = torch.constant.int 4096
    %61555 = torch.prim.ListConstruct %61554, %int4096_59017 : (!torch.int, !torch.int) -> !torch.list<int>
    %61556 = torch.aten.view %61505, %61555 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61556, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61557 = torch.aten.mm %61556, %61519 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61557, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59018 = torch.constant.int 4
    %int1792_59019 = torch.constant.int 1792
    %61558 = torch.prim.ListConstruct %int4_59018, %2482, %int1792_59019 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61559 = torch.aten.view %61557, %61558 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61559, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59020 = torch.constant.int 4
    %61560 = torch.aten.mul.int %int4_59020, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59021 = torch.constant.int 4096
    %61561 = torch.prim.ListConstruct %61560, %int4096_59021 : (!torch.int, !torch.int) -> !torch.list<int>
    %61562 = torch.aten.view %61506, %61561 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61562, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61563 = torch.aten.mm %61562, %61521 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61563, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59022 = torch.constant.int 4
    %int1792_59023 = torch.constant.int 1792
    %61564 = torch.prim.ListConstruct %int4_59022, %2482, %int1792_59023 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61565 = torch.aten.view %61563, %61564 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61565, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59024 = torch.constant.int 4
    %61566 = torch.aten.mul.int %int4_59024, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59025 = torch.constant.int 4096
    %61567 = torch.prim.ListConstruct %61566, %int4096_59025 : (!torch.int, !torch.int) -> !torch.list<int>
    %61568 = torch.aten.view %61507, %61567 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61568, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61569 = torch.aten.mm %61568, %61523 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61569, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59026 = torch.constant.int 4
    %int1792_59027 = torch.constant.int 1792
    %61570 = torch.prim.ListConstruct %int4_59026, %2482, %int1792_59027 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61571 = torch.aten.view %61569, %61570 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61571, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61572 = torch.aten.silu %61529 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61572, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61573 = torch.aten.silu %61535 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61573, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61574 = torch.aten.silu %61541 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61574, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61575 = torch.aten.silu %61547 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61575, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61576 = torch.aten.silu %61553 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61576, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61577 = torch.aten.silu %61559 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61577, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61578 = torch.aten.silu %61565 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61578, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61579 = torch.aten.silu %61571 : !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61579, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_59028 = torch.constant.int 1
    %int0_59029 = torch.constant.int 0
    %61580 = torch.prim.ListConstruct %int1_59028, %int0_59029 : (!torch.int, !torch.int) -> !torch.list<int>
    %61581 = torch.aten.permute %2296, %61580 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59030 = torch.constant.int 1
    %int0_59031 = torch.constant.int 0
    %61582 = torch.prim.ListConstruct %int1_59030, %int0_59031 : (!torch.int, !torch.int) -> !torch.list<int>
    %61583 = torch.aten.permute %2297, %61582 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59032 = torch.constant.int 1
    %int0_59033 = torch.constant.int 0
    %61584 = torch.prim.ListConstruct %int1_59032, %int0_59033 : (!torch.int, !torch.int) -> !torch.list<int>
    %61585 = torch.aten.permute %2298, %61584 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59034 = torch.constant.int 1
    %int0_59035 = torch.constant.int 0
    %61586 = torch.prim.ListConstruct %int1_59034, %int0_59035 : (!torch.int, !torch.int) -> !torch.list<int>
    %61587 = torch.aten.permute %2299, %61586 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59036 = torch.constant.int 1
    %int0_59037 = torch.constant.int 0
    %61588 = torch.prim.ListConstruct %int1_59036, %int0_59037 : (!torch.int, !torch.int) -> !torch.list<int>
    %61589 = torch.aten.permute %2300, %61588 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59038 = torch.constant.int 1
    %int0_59039 = torch.constant.int 0
    %61590 = torch.prim.ListConstruct %int1_59038, %int0_59039 : (!torch.int, !torch.int) -> !torch.list<int>
    %61591 = torch.aten.permute %2301, %61590 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59040 = torch.constant.int 1
    %int0_59041 = torch.constant.int 0
    %61592 = torch.prim.ListConstruct %int1_59040, %int0_59041 : (!torch.int, !torch.int) -> !torch.list<int>
    %61593 = torch.aten.permute %2302, %61592 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int1_59042 = torch.constant.int 1
    %int0_59043 = torch.constant.int 0
    %61594 = torch.prim.ListConstruct %int1_59042, %int0_59043 : (!torch.int, !torch.int) -> !torch.list<int>
    %61595 = torch.aten.permute %2303, %61594 : !torch.vtensor<[1792,4096],f16>, !torch.list<int> -> !torch.vtensor<[4096,1792],f16>
    %int4_59044 = torch.constant.int 4
    %61596 = torch.aten.mul.int %int4_59044, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59045 = torch.constant.int 4096
    %61597 = torch.prim.ListConstruct %61596, %int4096_59045 : (!torch.int, !torch.int) -> !torch.list<int>
    %61598 = torch.aten.view %61500, %61597 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61598, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61599 = torch.aten.mm %61598, %61581 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61599, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59046 = torch.constant.int 4
    %int1792_59047 = torch.constant.int 1792
    %61600 = torch.prim.ListConstruct %int4_59046, %2482, %int1792_59047 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61601 = torch.aten.view %61599, %61600 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61601, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59048 = torch.constant.int 4
    %61602 = torch.aten.mul.int %int4_59048, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59049 = torch.constant.int 4096
    %61603 = torch.prim.ListConstruct %61602, %int4096_59049 : (!torch.int, !torch.int) -> !torch.list<int>
    %61604 = torch.aten.view %61501, %61603 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61604, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61605 = torch.aten.mm %61604, %61583 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61605, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59050 = torch.constant.int 4
    %int1792_59051 = torch.constant.int 1792
    %61606 = torch.prim.ListConstruct %int4_59050, %2482, %int1792_59051 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61607 = torch.aten.view %61605, %61606 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61607, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59052 = torch.constant.int 4
    %61608 = torch.aten.mul.int %int4_59052, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59053 = torch.constant.int 4096
    %61609 = torch.prim.ListConstruct %61608, %int4096_59053 : (!torch.int, !torch.int) -> !torch.list<int>
    %61610 = torch.aten.view %61502, %61609 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61610, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61611 = torch.aten.mm %61610, %61585 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61611, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59054 = torch.constant.int 4
    %int1792_59055 = torch.constant.int 1792
    %61612 = torch.prim.ListConstruct %int4_59054, %2482, %int1792_59055 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61613 = torch.aten.view %61611, %61612 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61613, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59056 = torch.constant.int 4
    %61614 = torch.aten.mul.int %int4_59056, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59057 = torch.constant.int 4096
    %61615 = torch.prim.ListConstruct %61614, %int4096_59057 : (!torch.int, !torch.int) -> !torch.list<int>
    %61616 = torch.aten.view %61503, %61615 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61616, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61617 = torch.aten.mm %61616, %61587 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61617, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59058 = torch.constant.int 4
    %int1792_59059 = torch.constant.int 1792
    %61618 = torch.prim.ListConstruct %int4_59058, %2482, %int1792_59059 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61619 = torch.aten.view %61617, %61618 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61619, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59060 = torch.constant.int 4
    %61620 = torch.aten.mul.int %int4_59060, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59061 = torch.constant.int 4096
    %61621 = torch.prim.ListConstruct %61620, %int4096_59061 : (!torch.int, !torch.int) -> !torch.list<int>
    %61622 = torch.aten.view %61504, %61621 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61622, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61623 = torch.aten.mm %61622, %61589 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61623, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59062 = torch.constant.int 4
    %int1792_59063 = torch.constant.int 1792
    %61624 = torch.prim.ListConstruct %int4_59062, %2482, %int1792_59063 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61625 = torch.aten.view %61623, %61624 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61625, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59064 = torch.constant.int 4
    %61626 = torch.aten.mul.int %int4_59064, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59065 = torch.constant.int 4096
    %61627 = torch.prim.ListConstruct %61626, %int4096_59065 : (!torch.int, !torch.int) -> !torch.list<int>
    %61628 = torch.aten.view %61505, %61627 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61628, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61629 = torch.aten.mm %61628, %61591 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61629, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59066 = torch.constant.int 4
    %int1792_59067 = torch.constant.int 1792
    %61630 = torch.prim.ListConstruct %int4_59066, %2482, %int1792_59067 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61631 = torch.aten.view %61629, %61630 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61631, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59068 = torch.constant.int 4
    %61632 = torch.aten.mul.int %int4_59068, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59069 = torch.constant.int 4096
    %61633 = torch.prim.ListConstruct %61632, %int4096_59069 : (!torch.int, !torch.int) -> !torch.list<int>
    %61634 = torch.aten.view %61506, %61633 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61634, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61635 = torch.aten.mm %61634, %61593 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61635, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59070 = torch.constant.int 4
    %int1792_59071 = torch.constant.int 1792
    %61636 = torch.prim.ListConstruct %int4_59070, %2482, %int1792_59071 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61637 = torch.aten.view %61635, %61636 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61637, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int4_59072 = torch.constant.int 4
    %61638 = torch.aten.mul.int %int4_59072, %2482 : !torch.int, !torch.int -> !torch.int
    %int4096_59073 = torch.constant.int 4096
    %61639 = torch.prim.ListConstruct %61638, %int4096_59073 : (!torch.int, !torch.int) -> !torch.list<int>
    %61640 = torch.aten.view %61507, %61639 : !torch.vtensor<[4,?,4096],f16>, !torch.list<int> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61640, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %61641 = torch.aten.mm %61640, %61595 : !torch.vtensor<[?,4096],f16>, !torch.vtensor<[4096,1792],f16> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61641, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %int4_59074 = torch.constant.int 4
    %int1792_59075 = torch.constant.int 1792
    %61642 = torch.prim.ListConstruct %int4_59074, %2482, %int1792_59075 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61643 = torch.aten.view %61641, %61642 : !torch.vtensor<[?,1792],f16>, !torch.list<int> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61643, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61644 = torch.aten.mul.Tensor %61572, %61601 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61644, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61645 = torch.aten.mul.Tensor %61573, %61607 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61645, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61646 = torch.aten.mul.Tensor %61574, %61613 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61646, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61647 = torch.aten.mul.Tensor %61575, %61619 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61647, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61648 = torch.aten.mul.Tensor %61576, %61625 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61648, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61649 = torch.aten.mul.Tensor %61577, %61631 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61649, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61650 = torch.aten.mul.Tensor %61578, %61637 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61650, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %61651 = torch.aten.mul.Tensor %61579, %61643 : !torch.vtensor<[4,?,1792],f16>, !torch.vtensor<[4,?,1792],f16> -> !torch.vtensor<[4,?,1792],f16>
    torch.bind_symbolic_shape %61651, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1792)> : !torch.vtensor<[4,?,1792],f16>
    %int1_59076 = torch.constant.int 1
    %int0_59077 = torch.constant.int 0
    %61652 = torch.prim.ListConstruct %int1_59076, %int0_59077 : (!torch.int, !torch.int) -> !torch.list<int>
    %61653 = torch.aten.permute %2304, %61652 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59078 = torch.constant.int 1
    %int0_59079 = torch.constant.int 0
    %61654 = torch.prim.ListConstruct %int1_59078, %int0_59079 : (!torch.int, !torch.int) -> !torch.list<int>
    %61655 = torch.aten.permute %2305, %61654 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59080 = torch.constant.int 1
    %int0_59081 = torch.constant.int 0
    %61656 = torch.prim.ListConstruct %int1_59080, %int0_59081 : (!torch.int, !torch.int) -> !torch.list<int>
    %61657 = torch.aten.permute %2306, %61656 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59082 = torch.constant.int 1
    %int0_59083 = torch.constant.int 0
    %61658 = torch.prim.ListConstruct %int1_59082, %int0_59083 : (!torch.int, !torch.int) -> !torch.list<int>
    %61659 = torch.aten.permute %2307, %61658 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59084 = torch.constant.int 1
    %int0_59085 = torch.constant.int 0
    %61660 = torch.prim.ListConstruct %int1_59084, %int0_59085 : (!torch.int, !torch.int) -> !torch.list<int>
    %61661 = torch.aten.permute %2308, %61660 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59086 = torch.constant.int 1
    %int0_59087 = torch.constant.int 0
    %61662 = torch.prim.ListConstruct %int1_59086, %int0_59087 : (!torch.int, !torch.int) -> !torch.list<int>
    %61663 = torch.aten.permute %2309, %61662 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59088 = torch.constant.int 1
    %int0_59089 = torch.constant.int 0
    %61664 = torch.prim.ListConstruct %int1_59088, %int0_59089 : (!torch.int, !torch.int) -> !torch.list<int>
    %61665 = torch.aten.permute %2310, %61664 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59090 = torch.constant.int 1
    %int0_59091 = torch.constant.int 0
    %61666 = torch.prim.ListConstruct %int1_59090, %int0_59091 : (!torch.int, !torch.int) -> !torch.list<int>
    %61667 = torch.aten.permute %2311, %61666 : !torch.vtensor<[4096,1792],f16>, !torch.list<int> -> !torch.vtensor<[1792,4096],f16>
    %int1_59092 = torch.constant.int 1
    %61668 = torch.aten.size.int %61529, %int1_59092 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59093 = torch.constant.int 4
    %61669 = torch.aten.mul.int %int4_59093, %61668 : !torch.int, !torch.int -> !torch.int
    %int1792_59094 = torch.constant.int 1792
    %61670 = torch.prim.ListConstruct %61669, %int1792_59094 : (!torch.int, !torch.int) -> !torch.list<int>
    %61671 = torch.aten.view %61644, %61670 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61671, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61672 = torch.aten.mm %61671, %61653 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61672, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59095 = torch.constant.int 4
    %int4096_59096 = torch.constant.int 4096
    %61673 = torch.prim.ListConstruct %int4_59095, %61668, %int4096_59096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61674 = torch.aten.view %61672, %61673 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61674, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59097 = torch.constant.int 1
    %61675 = torch.aten.size.int %61535, %int1_59097 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59098 = torch.constant.int 4
    %61676 = torch.aten.mul.int %int4_59098, %61675 : !torch.int, !torch.int -> !torch.int
    %int1792_59099 = torch.constant.int 1792
    %61677 = torch.prim.ListConstruct %61676, %int1792_59099 : (!torch.int, !torch.int) -> !torch.list<int>
    %61678 = torch.aten.view %61645, %61677 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61678, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61679 = torch.aten.mm %61678, %61655 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61679, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59100 = torch.constant.int 4
    %int4096_59101 = torch.constant.int 4096
    %61680 = torch.prim.ListConstruct %int4_59100, %61675, %int4096_59101 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61681 = torch.aten.view %61679, %61680 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61681, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59102 = torch.constant.int 1
    %61682 = torch.aten.size.int %61541, %int1_59102 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59103 = torch.constant.int 4
    %61683 = torch.aten.mul.int %int4_59103, %61682 : !torch.int, !torch.int -> !torch.int
    %int1792_59104 = torch.constant.int 1792
    %61684 = torch.prim.ListConstruct %61683, %int1792_59104 : (!torch.int, !torch.int) -> !torch.list<int>
    %61685 = torch.aten.view %61646, %61684 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61685, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61686 = torch.aten.mm %61685, %61657 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61686, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59105 = torch.constant.int 4
    %int4096_59106 = torch.constant.int 4096
    %61687 = torch.prim.ListConstruct %int4_59105, %61682, %int4096_59106 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61688 = torch.aten.view %61686, %61687 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61688, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59107 = torch.constant.int 1
    %61689 = torch.aten.size.int %61547, %int1_59107 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59108 = torch.constant.int 4
    %61690 = torch.aten.mul.int %int4_59108, %61689 : !torch.int, !torch.int -> !torch.int
    %int1792_59109 = torch.constant.int 1792
    %61691 = torch.prim.ListConstruct %61690, %int1792_59109 : (!torch.int, !torch.int) -> !torch.list<int>
    %61692 = torch.aten.view %61647, %61691 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61692, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61693 = torch.aten.mm %61692, %61659 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61693, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59110 = torch.constant.int 4
    %int4096_59111 = torch.constant.int 4096
    %61694 = torch.prim.ListConstruct %int4_59110, %61689, %int4096_59111 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61695 = torch.aten.view %61693, %61694 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61695, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59112 = torch.constant.int 1
    %61696 = torch.aten.size.int %61553, %int1_59112 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59113 = torch.constant.int 4
    %61697 = torch.aten.mul.int %int4_59113, %61696 : !torch.int, !torch.int -> !torch.int
    %int1792_59114 = torch.constant.int 1792
    %61698 = torch.prim.ListConstruct %61697, %int1792_59114 : (!torch.int, !torch.int) -> !torch.list<int>
    %61699 = torch.aten.view %61648, %61698 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61699, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61700 = torch.aten.mm %61699, %61661 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61700, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59115 = torch.constant.int 4
    %int4096_59116 = torch.constant.int 4096
    %61701 = torch.prim.ListConstruct %int4_59115, %61696, %int4096_59116 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61702 = torch.aten.view %61700, %61701 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61702, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59117 = torch.constant.int 1
    %61703 = torch.aten.size.int %61559, %int1_59117 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59118 = torch.constant.int 4
    %61704 = torch.aten.mul.int %int4_59118, %61703 : !torch.int, !torch.int -> !torch.int
    %int1792_59119 = torch.constant.int 1792
    %61705 = torch.prim.ListConstruct %61704, %int1792_59119 : (!torch.int, !torch.int) -> !torch.list<int>
    %61706 = torch.aten.view %61649, %61705 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61706, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61707 = torch.aten.mm %61706, %61663 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61707, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59120 = torch.constant.int 4
    %int4096_59121 = torch.constant.int 4096
    %61708 = torch.prim.ListConstruct %int4_59120, %61703, %int4096_59121 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61709 = torch.aten.view %61707, %61708 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61709, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59122 = torch.constant.int 1
    %61710 = torch.aten.size.int %61565, %int1_59122 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59123 = torch.constant.int 4
    %61711 = torch.aten.mul.int %int4_59123, %61710 : !torch.int, !torch.int -> !torch.int
    %int1792_59124 = torch.constant.int 1792
    %61712 = torch.prim.ListConstruct %61711, %int1792_59124 : (!torch.int, !torch.int) -> !torch.list<int>
    %61713 = torch.aten.view %61650, %61712 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61713, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61714 = torch.aten.mm %61713, %61665 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61714, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59125 = torch.constant.int 4
    %int4096_59126 = torch.constant.int 4096
    %61715 = torch.prim.ListConstruct %int4_59125, %61710, %int4096_59126 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61716 = torch.aten.view %61714, %61715 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61716, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59127 = torch.constant.int 1
    %61717 = torch.aten.size.int %61571, %int1_59127 : !torch.vtensor<[4,?,1792],f16>, !torch.int -> !torch.int
    %int4_59128 = torch.constant.int 4
    %61718 = torch.aten.mul.int %int4_59128, %61717 : !torch.int, !torch.int -> !torch.int
    %int1792_59129 = torch.constant.int 1792
    %61719 = torch.prim.ListConstruct %61718, %int1792_59129 : (!torch.int, !torch.int) -> !torch.list<int>
    %61720 = torch.aten.view %61651, %61719 : !torch.vtensor<[4,?,1792],f16>, !torch.list<int> -> !torch.vtensor<[?,1792],f16>
    torch.bind_symbolic_shape %61720, [%2336], affine_map<()[s0] -> (s0 * 64, 1792)> : !torch.vtensor<[?,1792],f16>
    %61721 = torch.aten.mm %61720, %61667 : !torch.vtensor<[?,1792],f16>, !torch.vtensor<[1792,4096],f16> -> !torch.vtensor<[?,4096],f16>
    torch.bind_symbolic_shape %61721, [%2336], affine_map<()[s0] -> (s0 * 64, 4096)> : !torch.vtensor<[?,4096],f16>
    %int4_59130 = torch.constant.int 4
    %int4096_59131 = torch.constant.int 4096
    %61722 = torch.prim.ListConstruct %int4_59130, %61717, %int4096_59131 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %61723 = torch.aten.view %61721, %61722 : !torch.vtensor<[?,4096],f16>, !torch.list<int> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61723, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61724 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59132 = arith.constant 1 : index
    %dim_59133 = tensor.dim %61724, %c1_59132 : tensor<4x?x4096xf16>
    %61725 = flow.tensor.transfer %61724 : tensor<4x?x4096xf16>{%dim_59133} to #hal.device.promise<@__device_0>
    %61726 = torch_c.from_builtin_tensor %61725 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61726, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61727 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59134 = arith.constant 1 : index
    %dim_59135 = tensor.dim %61727, %c1_59134 : tensor<4x?x4096xf16>
    %61728 = flow.tensor.transfer %61727 : tensor<4x?x4096xf16>{%dim_59135} to #hal.device.promise<@__device_0>
    %61729 = torch_c.from_builtin_tensor %61728 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61729, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61730 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59136 = arith.constant 1 : index
    %dim_59137 = tensor.dim %61730, %c1_59136 : tensor<4x?x4096xf16>
    %61731 = flow.tensor.transfer %61730 : tensor<4x?x4096xf16>{%dim_59137} to #hal.device.promise<@__device_0>
    %61732 = torch_c.from_builtin_tensor %61731 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61732, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61733 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59138 = arith.constant 1 : index
    %dim_59139 = tensor.dim %61733, %c1_59138 : tensor<4x?x4096xf16>
    %61734 = flow.tensor.transfer %61733 : tensor<4x?x4096xf16>{%dim_59139} to #hal.device.promise<@__device_0>
    %61735 = torch_c.from_builtin_tensor %61734 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61735, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61736 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59140 = arith.constant 1 : index
    %dim_59141 = tensor.dim %61736, %c1_59140 : tensor<4x?x4096xf16>
    %61737 = flow.tensor.transfer %61736 : tensor<4x?x4096xf16>{%dim_59141} to #hal.device.promise<@__device_0>
    %61738 = torch_c.from_builtin_tensor %61737 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61738, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61739 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59142 = arith.constant 1 : index
    %dim_59143 = tensor.dim %61739, %c1_59142 : tensor<4x?x4096xf16>
    %61740 = flow.tensor.transfer %61739 : tensor<4x?x4096xf16>{%dim_59143} to #hal.device.promise<@__device_0>
    %61741 = torch_c.from_builtin_tensor %61740 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61741, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61742 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59144 = arith.constant 1 : index
    %dim_59145 = tensor.dim %61742, %c1_59144 : tensor<4x?x4096xf16>
    %61743 = flow.tensor.transfer %61742 : tensor<4x?x4096xf16>{%dim_59145} to #hal.device.promise<@__device_0>
    %61744 = torch_c.from_builtin_tensor %61743 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61744, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59146 = torch.constant.int 1
    %61745 = torch.aten.add.Tensor %61674, %61726, %int1_59146 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61745, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59147 = torch.constant.int 1
    %61746 = torch.aten.add.Tensor %61745, %61729, %int1_59147 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61746, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59148 = torch.constant.int 1
    %61747 = torch.aten.add.Tensor %61746, %61732, %int1_59148 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61747, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59149 = torch.constant.int 1
    %61748 = torch.aten.add.Tensor %61747, %61735, %int1_59149 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61748, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59150 = torch.constant.int 1
    %61749 = torch.aten.add.Tensor %61748, %61738, %int1_59150 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61749, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59151 = torch.constant.int 1
    %61750 = torch.aten.add.Tensor %61749, %61741, %int1_59151 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61750, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59152 = torch.constant.int 1
    %61751 = torch.aten.add.Tensor %61750, %61744, %int1_59152 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61751, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61752 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59153 = arith.constant 1 : index
    %dim_59154 = tensor.dim %61752, %c1_59153 : tensor<4x?x4096xf16>
    %61753 = flow.tensor.transfer %61752 : tensor<4x?x4096xf16>{%dim_59154} to #hal.device.promise<@__device_1>
    %61754 = torch_c.from_builtin_tensor %61753 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61754, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61755 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59155 = arith.constant 1 : index
    %dim_59156 = tensor.dim %61755, %c1_59155 : tensor<4x?x4096xf16>
    %61756 = flow.tensor.transfer %61755 : tensor<4x?x4096xf16>{%dim_59156} to #hal.device.promise<@__device_1>
    %61757 = torch_c.from_builtin_tensor %61756 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61757, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61758 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59157 = arith.constant 1 : index
    %dim_59158 = tensor.dim %61758, %c1_59157 : tensor<4x?x4096xf16>
    %61759 = flow.tensor.transfer %61758 : tensor<4x?x4096xf16>{%dim_59158} to #hal.device.promise<@__device_1>
    %61760 = torch_c.from_builtin_tensor %61759 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61760, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61761 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59159 = arith.constant 1 : index
    %dim_59160 = tensor.dim %61761, %c1_59159 : tensor<4x?x4096xf16>
    %61762 = flow.tensor.transfer %61761 : tensor<4x?x4096xf16>{%dim_59160} to #hal.device.promise<@__device_1>
    %61763 = torch_c.from_builtin_tensor %61762 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61763, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61764 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59161 = arith.constant 1 : index
    %dim_59162 = tensor.dim %61764, %c1_59161 : tensor<4x?x4096xf16>
    %61765 = flow.tensor.transfer %61764 : tensor<4x?x4096xf16>{%dim_59162} to #hal.device.promise<@__device_1>
    %61766 = torch_c.from_builtin_tensor %61765 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61766, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61767 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59163 = arith.constant 1 : index
    %dim_59164 = tensor.dim %61767, %c1_59163 : tensor<4x?x4096xf16>
    %61768 = flow.tensor.transfer %61767 : tensor<4x?x4096xf16>{%dim_59164} to #hal.device.promise<@__device_1>
    %61769 = torch_c.from_builtin_tensor %61768 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61769, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61770 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59165 = arith.constant 1 : index
    %dim_59166 = tensor.dim %61770, %c1_59165 : tensor<4x?x4096xf16>
    %61771 = flow.tensor.transfer %61770 : tensor<4x?x4096xf16>{%dim_59166} to #hal.device.promise<@__device_1>
    %61772 = torch_c.from_builtin_tensor %61771 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61772, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59167 = torch.constant.int 1
    %61773 = torch.aten.add.Tensor %61754, %61681, %int1_59167 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61773, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59168 = torch.constant.int 1
    %61774 = torch.aten.add.Tensor %61773, %61757, %int1_59168 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61774, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59169 = torch.constant.int 1
    %61775 = torch.aten.add.Tensor %61774, %61760, %int1_59169 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61775, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59170 = torch.constant.int 1
    %61776 = torch.aten.add.Tensor %61775, %61763, %int1_59170 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61776, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59171 = torch.constant.int 1
    %61777 = torch.aten.add.Tensor %61776, %61766, %int1_59171 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61777, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59172 = torch.constant.int 1
    %61778 = torch.aten.add.Tensor %61777, %61769, %int1_59172 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61778, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59173 = torch.constant.int 1
    %61779 = torch.aten.add.Tensor %61778, %61772, %int1_59173 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61779, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61780 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59174 = arith.constant 1 : index
    %dim_59175 = tensor.dim %61780, %c1_59174 : tensor<4x?x4096xf16>
    %61781 = flow.tensor.transfer %61780 : tensor<4x?x4096xf16>{%dim_59175} to #hal.device.promise<@__device_2>
    %61782 = torch_c.from_builtin_tensor %61781 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61782, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61783 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59176 = arith.constant 1 : index
    %dim_59177 = tensor.dim %61783, %c1_59176 : tensor<4x?x4096xf16>
    %61784 = flow.tensor.transfer %61783 : tensor<4x?x4096xf16>{%dim_59177} to #hal.device.promise<@__device_2>
    %61785 = torch_c.from_builtin_tensor %61784 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61785, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61786 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59178 = arith.constant 1 : index
    %dim_59179 = tensor.dim %61786, %c1_59178 : tensor<4x?x4096xf16>
    %61787 = flow.tensor.transfer %61786 : tensor<4x?x4096xf16>{%dim_59179} to #hal.device.promise<@__device_2>
    %61788 = torch_c.from_builtin_tensor %61787 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61788, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61789 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59180 = arith.constant 1 : index
    %dim_59181 = tensor.dim %61789, %c1_59180 : tensor<4x?x4096xf16>
    %61790 = flow.tensor.transfer %61789 : tensor<4x?x4096xf16>{%dim_59181} to #hal.device.promise<@__device_2>
    %61791 = torch_c.from_builtin_tensor %61790 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61791, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61792 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59182 = arith.constant 1 : index
    %dim_59183 = tensor.dim %61792, %c1_59182 : tensor<4x?x4096xf16>
    %61793 = flow.tensor.transfer %61792 : tensor<4x?x4096xf16>{%dim_59183} to #hal.device.promise<@__device_2>
    %61794 = torch_c.from_builtin_tensor %61793 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61794, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61795 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59184 = arith.constant 1 : index
    %dim_59185 = tensor.dim %61795, %c1_59184 : tensor<4x?x4096xf16>
    %61796 = flow.tensor.transfer %61795 : tensor<4x?x4096xf16>{%dim_59185} to #hal.device.promise<@__device_2>
    %61797 = torch_c.from_builtin_tensor %61796 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61797, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61798 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59186 = arith.constant 1 : index
    %dim_59187 = tensor.dim %61798, %c1_59186 : tensor<4x?x4096xf16>
    %61799 = flow.tensor.transfer %61798 : tensor<4x?x4096xf16>{%dim_59187} to #hal.device.promise<@__device_2>
    %61800 = torch_c.from_builtin_tensor %61799 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61800, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59188 = torch.constant.int 1
    %61801 = torch.aten.add.Tensor %61782, %61785, %int1_59188 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61801, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59189 = torch.constant.int 1
    %61802 = torch.aten.add.Tensor %61801, %61688, %int1_59189 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61802, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59190 = torch.constant.int 1
    %61803 = torch.aten.add.Tensor %61802, %61788, %int1_59190 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61803, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59191 = torch.constant.int 1
    %61804 = torch.aten.add.Tensor %61803, %61791, %int1_59191 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61804, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59192 = torch.constant.int 1
    %61805 = torch.aten.add.Tensor %61804, %61794, %int1_59192 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61805, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59193 = torch.constant.int 1
    %61806 = torch.aten.add.Tensor %61805, %61797, %int1_59193 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61806, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59194 = torch.constant.int 1
    %61807 = torch.aten.add.Tensor %61806, %61800, %int1_59194 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61807, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61808 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59195 = arith.constant 1 : index
    %dim_59196 = tensor.dim %61808, %c1_59195 : tensor<4x?x4096xf16>
    %61809 = flow.tensor.transfer %61808 : tensor<4x?x4096xf16>{%dim_59196} to #hal.device.promise<@__device_3>
    %61810 = torch_c.from_builtin_tensor %61809 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61810, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61811 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59197 = arith.constant 1 : index
    %dim_59198 = tensor.dim %61811, %c1_59197 : tensor<4x?x4096xf16>
    %61812 = flow.tensor.transfer %61811 : tensor<4x?x4096xf16>{%dim_59198} to #hal.device.promise<@__device_3>
    %61813 = torch_c.from_builtin_tensor %61812 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61813, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61814 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59199 = arith.constant 1 : index
    %dim_59200 = tensor.dim %61814, %c1_59199 : tensor<4x?x4096xf16>
    %61815 = flow.tensor.transfer %61814 : tensor<4x?x4096xf16>{%dim_59200} to #hal.device.promise<@__device_3>
    %61816 = torch_c.from_builtin_tensor %61815 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61816, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61817 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59201 = arith.constant 1 : index
    %dim_59202 = tensor.dim %61817, %c1_59201 : tensor<4x?x4096xf16>
    %61818 = flow.tensor.transfer %61817 : tensor<4x?x4096xf16>{%dim_59202} to #hal.device.promise<@__device_3>
    %61819 = torch_c.from_builtin_tensor %61818 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61819, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61820 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59203 = arith.constant 1 : index
    %dim_59204 = tensor.dim %61820, %c1_59203 : tensor<4x?x4096xf16>
    %61821 = flow.tensor.transfer %61820 : tensor<4x?x4096xf16>{%dim_59204} to #hal.device.promise<@__device_3>
    %61822 = torch_c.from_builtin_tensor %61821 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61822, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61823 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59205 = arith.constant 1 : index
    %dim_59206 = tensor.dim %61823, %c1_59205 : tensor<4x?x4096xf16>
    %61824 = flow.tensor.transfer %61823 : tensor<4x?x4096xf16>{%dim_59206} to #hal.device.promise<@__device_3>
    %61825 = torch_c.from_builtin_tensor %61824 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61825, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61826 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59207 = arith.constant 1 : index
    %dim_59208 = tensor.dim %61826, %c1_59207 : tensor<4x?x4096xf16>
    %61827 = flow.tensor.transfer %61826 : tensor<4x?x4096xf16>{%dim_59208} to #hal.device.promise<@__device_3>
    %61828 = torch_c.from_builtin_tensor %61827 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61828, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59209 = torch.constant.int 1
    %61829 = torch.aten.add.Tensor %61810, %61813, %int1_59209 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61829, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59210 = torch.constant.int 1
    %61830 = torch.aten.add.Tensor %61829, %61816, %int1_59210 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61830, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59211 = torch.constant.int 1
    %61831 = torch.aten.add.Tensor %61830, %61695, %int1_59211 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61831, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59212 = torch.constant.int 1
    %61832 = torch.aten.add.Tensor %61831, %61819, %int1_59212 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61832, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59213 = torch.constant.int 1
    %61833 = torch.aten.add.Tensor %61832, %61822, %int1_59213 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61833, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59214 = torch.constant.int 1
    %61834 = torch.aten.add.Tensor %61833, %61825, %int1_59214 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61834, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59215 = torch.constant.int 1
    %61835 = torch.aten.add.Tensor %61834, %61828, %int1_59215 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61835, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61836 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59216 = arith.constant 1 : index
    %dim_59217 = tensor.dim %61836, %c1_59216 : tensor<4x?x4096xf16>
    %61837 = flow.tensor.transfer %61836 : tensor<4x?x4096xf16>{%dim_59217} to #hal.device.promise<@__device_4>
    %61838 = torch_c.from_builtin_tensor %61837 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61838, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61839 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59218 = arith.constant 1 : index
    %dim_59219 = tensor.dim %61839, %c1_59218 : tensor<4x?x4096xf16>
    %61840 = flow.tensor.transfer %61839 : tensor<4x?x4096xf16>{%dim_59219} to #hal.device.promise<@__device_4>
    %61841 = torch_c.from_builtin_tensor %61840 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61841, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61842 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59220 = arith.constant 1 : index
    %dim_59221 = tensor.dim %61842, %c1_59220 : tensor<4x?x4096xf16>
    %61843 = flow.tensor.transfer %61842 : tensor<4x?x4096xf16>{%dim_59221} to #hal.device.promise<@__device_4>
    %61844 = torch_c.from_builtin_tensor %61843 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61844, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61845 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59222 = arith.constant 1 : index
    %dim_59223 = tensor.dim %61845, %c1_59222 : tensor<4x?x4096xf16>
    %61846 = flow.tensor.transfer %61845 : tensor<4x?x4096xf16>{%dim_59223} to #hal.device.promise<@__device_4>
    %61847 = torch_c.from_builtin_tensor %61846 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61847, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61848 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59224 = arith.constant 1 : index
    %dim_59225 = tensor.dim %61848, %c1_59224 : tensor<4x?x4096xf16>
    %61849 = flow.tensor.transfer %61848 : tensor<4x?x4096xf16>{%dim_59225} to #hal.device.promise<@__device_4>
    %61850 = torch_c.from_builtin_tensor %61849 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61850, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61851 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59226 = arith.constant 1 : index
    %dim_59227 = tensor.dim %61851, %c1_59226 : tensor<4x?x4096xf16>
    %61852 = flow.tensor.transfer %61851 : tensor<4x?x4096xf16>{%dim_59227} to #hal.device.promise<@__device_4>
    %61853 = torch_c.from_builtin_tensor %61852 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61853, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61854 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59228 = arith.constant 1 : index
    %dim_59229 = tensor.dim %61854, %c1_59228 : tensor<4x?x4096xf16>
    %61855 = flow.tensor.transfer %61854 : tensor<4x?x4096xf16>{%dim_59229} to #hal.device.promise<@__device_4>
    %61856 = torch_c.from_builtin_tensor %61855 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61856, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59230 = torch.constant.int 1
    %61857 = torch.aten.add.Tensor %61838, %61841, %int1_59230 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61857, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59231 = torch.constant.int 1
    %61858 = torch.aten.add.Tensor %61857, %61844, %int1_59231 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61858, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59232 = torch.constant.int 1
    %61859 = torch.aten.add.Tensor %61858, %61847, %int1_59232 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61859, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59233 = torch.constant.int 1
    %61860 = torch.aten.add.Tensor %61859, %61702, %int1_59233 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61860, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59234 = torch.constant.int 1
    %61861 = torch.aten.add.Tensor %61860, %61850, %int1_59234 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61861, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59235 = torch.constant.int 1
    %61862 = torch.aten.add.Tensor %61861, %61853, %int1_59235 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61862, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59236 = torch.constant.int 1
    %61863 = torch.aten.add.Tensor %61862, %61856, %int1_59236 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61863, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61864 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59237 = arith.constant 1 : index
    %dim_59238 = tensor.dim %61864, %c1_59237 : tensor<4x?x4096xf16>
    %61865 = flow.tensor.transfer %61864 : tensor<4x?x4096xf16>{%dim_59238} to #hal.device.promise<@__device_5>
    %61866 = torch_c.from_builtin_tensor %61865 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61866, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61867 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59239 = arith.constant 1 : index
    %dim_59240 = tensor.dim %61867, %c1_59239 : tensor<4x?x4096xf16>
    %61868 = flow.tensor.transfer %61867 : tensor<4x?x4096xf16>{%dim_59240} to #hal.device.promise<@__device_5>
    %61869 = torch_c.from_builtin_tensor %61868 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61869, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61870 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59241 = arith.constant 1 : index
    %dim_59242 = tensor.dim %61870, %c1_59241 : tensor<4x?x4096xf16>
    %61871 = flow.tensor.transfer %61870 : tensor<4x?x4096xf16>{%dim_59242} to #hal.device.promise<@__device_5>
    %61872 = torch_c.from_builtin_tensor %61871 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61872, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61873 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59243 = arith.constant 1 : index
    %dim_59244 = tensor.dim %61873, %c1_59243 : tensor<4x?x4096xf16>
    %61874 = flow.tensor.transfer %61873 : tensor<4x?x4096xf16>{%dim_59244} to #hal.device.promise<@__device_5>
    %61875 = torch_c.from_builtin_tensor %61874 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61875, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61876 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59245 = arith.constant 1 : index
    %dim_59246 = tensor.dim %61876, %c1_59245 : tensor<4x?x4096xf16>
    %61877 = flow.tensor.transfer %61876 : tensor<4x?x4096xf16>{%dim_59246} to #hal.device.promise<@__device_5>
    %61878 = torch_c.from_builtin_tensor %61877 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61878, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61879 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59247 = arith.constant 1 : index
    %dim_59248 = tensor.dim %61879, %c1_59247 : tensor<4x?x4096xf16>
    %61880 = flow.tensor.transfer %61879 : tensor<4x?x4096xf16>{%dim_59248} to #hal.device.promise<@__device_5>
    %61881 = torch_c.from_builtin_tensor %61880 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61881, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61882 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59249 = arith.constant 1 : index
    %dim_59250 = tensor.dim %61882, %c1_59249 : tensor<4x?x4096xf16>
    %61883 = flow.tensor.transfer %61882 : tensor<4x?x4096xf16>{%dim_59250} to #hal.device.promise<@__device_5>
    %61884 = torch_c.from_builtin_tensor %61883 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61884, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59251 = torch.constant.int 1
    %61885 = torch.aten.add.Tensor %61866, %61869, %int1_59251 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61885, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59252 = torch.constant.int 1
    %61886 = torch.aten.add.Tensor %61885, %61872, %int1_59252 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61886, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59253 = torch.constant.int 1
    %61887 = torch.aten.add.Tensor %61886, %61875, %int1_59253 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61887, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59254 = torch.constant.int 1
    %61888 = torch.aten.add.Tensor %61887, %61878, %int1_59254 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61888, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59255 = torch.constant.int 1
    %61889 = torch.aten.add.Tensor %61888, %61709, %int1_59255 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61889, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59256 = torch.constant.int 1
    %61890 = torch.aten.add.Tensor %61889, %61881, %int1_59256 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61890, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59257 = torch.constant.int 1
    %61891 = torch.aten.add.Tensor %61890, %61884, %int1_59257 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61891, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61892 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59258 = arith.constant 1 : index
    %dim_59259 = tensor.dim %61892, %c1_59258 : tensor<4x?x4096xf16>
    %61893 = flow.tensor.transfer %61892 : tensor<4x?x4096xf16>{%dim_59259} to #hal.device.promise<@__device_6>
    %61894 = torch_c.from_builtin_tensor %61893 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61894, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61895 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59260 = arith.constant 1 : index
    %dim_59261 = tensor.dim %61895, %c1_59260 : tensor<4x?x4096xf16>
    %61896 = flow.tensor.transfer %61895 : tensor<4x?x4096xf16>{%dim_59261} to #hal.device.promise<@__device_6>
    %61897 = torch_c.from_builtin_tensor %61896 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61897, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61898 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59262 = arith.constant 1 : index
    %dim_59263 = tensor.dim %61898, %c1_59262 : tensor<4x?x4096xf16>
    %61899 = flow.tensor.transfer %61898 : tensor<4x?x4096xf16>{%dim_59263} to #hal.device.promise<@__device_6>
    %61900 = torch_c.from_builtin_tensor %61899 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61900, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61901 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59264 = arith.constant 1 : index
    %dim_59265 = tensor.dim %61901, %c1_59264 : tensor<4x?x4096xf16>
    %61902 = flow.tensor.transfer %61901 : tensor<4x?x4096xf16>{%dim_59265} to #hal.device.promise<@__device_6>
    %61903 = torch_c.from_builtin_tensor %61902 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61903, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61904 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59266 = arith.constant 1 : index
    %dim_59267 = tensor.dim %61904, %c1_59266 : tensor<4x?x4096xf16>
    %61905 = flow.tensor.transfer %61904 : tensor<4x?x4096xf16>{%dim_59267} to #hal.device.promise<@__device_6>
    %61906 = torch_c.from_builtin_tensor %61905 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61906, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61907 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59268 = arith.constant 1 : index
    %dim_59269 = tensor.dim %61907, %c1_59268 : tensor<4x?x4096xf16>
    %61908 = flow.tensor.transfer %61907 : tensor<4x?x4096xf16>{%dim_59269} to #hal.device.promise<@__device_6>
    %61909 = torch_c.from_builtin_tensor %61908 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61909, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61910 = torch_c.to_builtin_tensor %61723 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59270 = arith.constant 1 : index
    %dim_59271 = tensor.dim %61910, %c1_59270 : tensor<4x?x4096xf16>
    %61911 = flow.tensor.transfer %61910 : tensor<4x?x4096xf16>{%dim_59271} to #hal.device.promise<@__device_6>
    %61912 = torch_c.from_builtin_tensor %61911 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61912, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59272 = torch.constant.int 1
    %61913 = torch.aten.add.Tensor %61894, %61897, %int1_59272 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61913, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59273 = torch.constant.int 1
    %61914 = torch.aten.add.Tensor %61913, %61900, %int1_59273 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61914, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59274 = torch.constant.int 1
    %61915 = torch.aten.add.Tensor %61914, %61903, %int1_59274 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61915, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59275 = torch.constant.int 1
    %61916 = torch.aten.add.Tensor %61915, %61906, %int1_59275 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61916, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59276 = torch.constant.int 1
    %61917 = torch.aten.add.Tensor %61916, %61909, %int1_59276 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61917, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59277 = torch.constant.int 1
    %61918 = torch.aten.add.Tensor %61917, %61716, %int1_59277 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61918, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59278 = torch.constant.int 1
    %61919 = torch.aten.add.Tensor %61918, %61912, %int1_59278 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61919, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61920 = torch_c.to_builtin_tensor %61674 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59279 = arith.constant 1 : index
    %dim_59280 = tensor.dim %61920, %c1_59279 : tensor<4x?x4096xf16>
    %61921 = flow.tensor.transfer %61920 : tensor<4x?x4096xf16>{%dim_59280} to #hal.device.promise<@__device_7>
    %61922 = torch_c.from_builtin_tensor %61921 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61922, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61923 = torch_c.to_builtin_tensor %61681 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59281 = arith.constant 1 : index
    %dim_59282 = tensor.dim %61923, %c1_59281 : tensor<4x?x4096xf16>
    %61924 = flow.tensor.transfer %61923 : tensor<4x?x4096xf16>{%dim_59282} to #hal.device.promise<@__device_7>
    %61925 = torch_c.from_builtin_tensor %61924 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61925, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61926 = torch_c.to_builtin_tensor %61688 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59283 = arith.constant 1 : index
    %dim_59284 = tensor.dim %61926, %c1_59283 : tensor<4x?x4096xf16>
    %61927 = flow.tensor.transfer %61926 : tensor<4x?x4096xf16>{%dim_59284} to #hal.device.promise<@__device_7>
    %61928 = torch_c.from_builtin_tensor %61927 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61928, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61929 = torch_c.to_builtin_tensor %61695 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59285 = arith.constant 1 : index
    %dim_59286 = tensor.dim %61929, %c1_59285 : tensor<4x?x4096xf16>
    %61930 = flow.tensor.transfer %61929 : tensor<4x?x4096xf16>{%dim_59286} to #hal.device.promise<@__device_7>
    %61931 = torch_c.from_builtin_tensor %61930 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61931, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61932 = torch_c.to_builtin_tensor %61702 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59287 = arith.constant 1 : index
    %dim_59288 = tensor.dim %61932, %c1_59287 : tensor<4x?x4096xf16>
    %61933 = flow.tensor.transfer %61932 : tensor<4x?x4096xf16>{%dim_59288} to #hal.device.promise<@__device_7>
    %61934 = torch_c.from_builtin_tensor %61933 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61934, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61935 = torch_c.to_builtin_tensor %61709 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59289 = arith.constant 1 : index
    %dim_59290 = tensor.dim %61935, %c1_59289 : tensor<4x?x4096xf16>
    %61936 = flow.tensor.transfer %61935 : tensor<4x?x4096xf16>{%dim_59290} to #hal.device.promise<@__device_7>
    %61937 = torch_c.from_builtin_tensor %61936 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61937, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %61938 = torch_c.to_builtin_tensor %61716 : !torch.vtensor<[4,?,4096],f16> -> tensor<4x?x4096xf16>
    %c1_59291 = arith.constant 1 : index
    %dim_59292 = tensor.dim %61938, %c1_59291 : tensor<4x?x4096xf16>
    %61939 = flow.tensor.transfer %61938 : tensor<4x?x4096xf16>{%dim_59292} to #hal.device.promise<@__device_7>
    %61940 = torch_c.from_builtin_tensor %61939 : tensor<4x?x4096xf16> -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61940, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59293 = torch.constant.int 1
    %61941 = torch.aten.add.Tensor %61922, %61925, %int1_59293 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61941, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59294 = torch.constant.int 1
    %61942 = torch.aten.add.Tensor %61941, %61928, %int1_59294 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61942, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59295 = torch.constant.int 1
    %61943 = torch.aten.add.Tensor %61942, %61931, %int1_59295 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61943, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59296 = torch.constant.int 1
    %61944 = torch.aten.add.Tensor %61943, %61934, %int1_59296 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61944, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59297 = torch.constant.int 1
    %61945 = torch.aten.add.Tensor %61944, %61937, %int1_59297 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61945, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59298 = torch.constant.int 1
    %61946 = torch.aten.add.Tensor %61945, %61940, %int1_59298 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61946, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59299 = torch.constant.int 1
    %61947 = torch.aten.add.Tensor %61946, %61723, %int1_59299 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61947, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59300 = torch.constant.int 1
    %61948 = torch.aten.add.Tensor %61428, %61751, %int1_59300 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61948, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59301 = torch.constant.int 1
    %61949 = torch.aten.add.Tensor %61429, %61779, %int1_59301 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61949, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59302 = torch.constant.int 1
    %61950 = torch.aten.add.Tensor %61430, %61807, %int1_59302 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61950, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59303 = torch.constant.int 1
    %61951 = torch.aten.add.Tensor %61431, %61835, %int1_59303 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61951, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59304 = torch.constant.int 1
    %61952 = torch.aten.add.Tensor %61432, %61863, %int1_59304 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61952, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59305 = torch.constant.int 1
    %61953 = torch.aten.add.Tensor %61433, %61891, %int1_59305 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61953, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59306 = torch.constant.int 1
    %61954 = torch.aten.add.Tensor %61434, %61919, %int1_59306 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61954, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59307 = torch.constant.int 1
    %61955 = torch.aten.add.Tensor %61435, %61947, %int1_59307 : !torch.vtensor<[4,?,4096],f16>, !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %61955, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int6_59308 = torch.constant.int 6
    %61956 = torch.prims.convert_element_type %61948, %int6_59308 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61956, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59309 = torch.constant.int 6
    %61957 = torch.prims.convert_element_type %61949, %int6_59309 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61957, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59310 = torch.constant.int 6
    %61958 = torch.prims.convert_element_type %61950, %int6_59310 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61958, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59311 = torch.constant.int 6
    %61959 = torch.prims.convert_element_type %61951, %int6_59311 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61959, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59312 = torch.constant.int 6
    %61960 = torch.prims.convert_element_type %61952, %int6_59312 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61960, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59313 = torch.constant.int 6
    %61961 = torch.prims.convert_element_type %61953, %int6_59313 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61961, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59314 = torch.constant.int 6
    %61962 = torch.prims.convert_element_type %61954, %int6_59314 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61962, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int6_59315 = torch.constant.int 6
    %61963 = torch.prims.convert_element_type %61955, %int6_59315 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61963, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59316 = torch.constant.int 2
    %61964 = torch.aten.pow.Tensor_Scalar %61956, %int2_59316 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61964, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59317 = torch.constant.int 2
    %61965 = torch.aten.pow.Tensor_Scalar %61957, %int2_59317 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61965, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59318 = torch.constant.int 2
    %61966 = torch.aten.pow.Tensor_Scalar %61958, %int2_59318 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61966, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59319 = torch.constant.int 2
    %61967 = torch.aten.pow.Tensor_Scalar %61959, %int2_59319 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61967, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59320 = torch.constant.int 2
    %61968 = torch.aten.pow.Tensor_Scalar %61960, %int2_59320 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61968, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59321 = torch.constant.int 2
    %61969 = torch.aten.pow.Tensor_Scalar %61961, %int2_59321 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61969, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59322 = torch.constant.int 2
    %61970 = torch.aten.pow.Tensor_Scalar %61962, %int2_59322 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61970, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int2_59323 = torch.constant.int 2
    %61971 = torch.aten.pow.Tensor_Scalar %61963, %int2_59323 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %61971, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int-1_59324 = torch.constant.int -1
    %61972 = torch.prim.ListConstruct %int-1_59324 : (!torch.int) -> !torch.list<int>
    %true_59325 = torch.constant.bool true
    %none_59326 = torch.constant.none
    %61973 = torch.aten.mean.dim %61964, %61972, %true_59325, %none_59326 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61973, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59327 = torch.constant.int -1
    %61974 = torch.prim.ListConstruct %int-1_59327 : (!torch.int) -> !torch.list<int>
    %true_59328 = torch.constant.bool true
    %none_59329 = torch.constant.none
    %61975 = torch.aten.mean.dim %61965, %61974, %true_59328, %none_59329 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61975, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59330 = torch.constant.int -1
    %61976 = torch.prim.ListConstruct %int-1_59330 : (!torch.int) -> !torch.list<int>
    %true_59331 = torch.constant.bool true
    %none_59332 = torch.constant.none
    %61977 = torch.aten.mean.dim %61966, %61976, %true_59331, %none_59332 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61977, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59333 = torch.constant.int -1
    %61978 = torch.prim.ListConstruct %int-1_59333 : (!torch.int) -> !torch.list<int>
    %true_59334 = torch.constant.bool true
    %none_59335 = torch.constant.none
    %61979 = torch.aten.mean.dim %61967, %61978, %true_59334, %none_59335 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61979, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59336 = torch.constant.int -1
    %61980 = torch.prim.ListConstruct %int-1_59336 : (!torch.int) -> !torch.list<int>
    %true_59337 = torch.constant.bool true
    %none_59338 = torch.constant.none
    %61981 = torch.aten.mean.dim %61968, %61980, %true_59337, %none_59338 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61981, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59339 = torch.constant.int -1
    %61982 = torch.prim.ListConstruct %int-1_59339 : (!torch.int) -> !torch.list<int>
    %true_59340 = torch.constant.bool true
    %none_59341 = torch.constant.none
    %61983 = torch.aten.mean.dim %61969, %61982, %true_59340, %none_59341 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61983, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59342 = torch.constant.int -1
    %61984 = torch.prim.ListConstruct %int-1_59342 : (!torch.int) -> !torch.list<int>
    %true_59343 = torch.constant.bool true
    %none_59344 = torch.constant.none
    %61985 = torch.aten.mean.dim %61970, %61984, %true_59343, %none_59344 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61985, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %int-1_59345 = torch.constant.int -1
    %61986 = torch.prim.ListConstruct %int-1_59345 : (!torch.int) -> !torch.list<int>
    %true_59346 = torch.constant.bool true
    %none_59347 = torch.constant.none
    %61987 = torch.aten.mean.dim %61971, %61986, %true_59346, %none_59347 : !torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61987, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59348 = torch.constant.float 9.9999997473787516E-6
    %int1_59349 = torch.constant.int 1
    %61988 = torch.aten.add.Scalar %61973, %float9.999990e-06_59348, %int1_59349 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61988, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59350 = torch.constant.float 9.9999997473787516E-6
    %int1_59351 = torch.constant.int 1
    %61989 = torch.aten.add.Scalar %61975, %float9.999990e-06_59350, %int1_59351 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61989, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59352 = torch.constant.float 9.9999997473787516E-6
    %int1_59353 = torch.constant.int 1
    %61990 = torch.aten.add.Scalar %61977, %float9.999990e-06_59352, %int1_59353 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61990, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59354 = torch.constant.float 9.9999997473787516E-6
    %int1_59355 = torch.constant.int 1
    %61991 = torch.aten.add.Scalar %61979, %float9.999990e-06_59354, %int1_59355 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61991, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59356 = torch.constant.float 9.9999997473787516E-6
    %int1_59357 = torch.constant.int 1
    %61992 = torch.aten.add.Scalar %61981, %float9.999990e-06_59356, %int1_59357 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61992, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59358 = torch.constant.float 9.9999997473787516E-6
    %int1_59359 = torch.constant.int 1
    %61993 = torch.aten.add.Scalar %61983, %float9.999990e-06_59358, %int1_59359 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61993, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59360 = torch.constant.float 9.9999997473787516E-6
    %int1_59361 = torch.constant.int 1
    %61994 = torch.aten.add.Scalar %61985, %float9.999990e-06_59360, %int1_59361 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61994, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %float9.999990e-06_59362 = torch.constant.float 9.9999997473787516E-6
    %int1_59363 = torch.constant.int 1
    %61995 = torch.aten.add.Scalar %61987, %float9.999990e-06_59362, %int1_59363 : !torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61995, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61996 = torch.aten.rsqrt %61988 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61996, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61997 = torch.aten.rsqrt %61989 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61997, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61998 = torch.aten.rsqrt %61990 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61998, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %61999 = torch.aten.rsqrt %61991 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %61999, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %62000 = torch.aten.rsqrt %61992 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %62000, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %62001 = torch.aten.rsqrt %61993 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %62001, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %62002 = torch.aten.rsqrt %61994 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %62002, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %62003 = torch.aten.rsqrt %61995 : !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,1],f32>
    torch.bind_symbolic_shape %62003, [%2336], affine_map<()[s0] -> (4, s0 * 16, 1)> : !torch.vtensor<[4,?,1],f32>
    %62004 = torch.aten.mul.Tensor %61956, %61996 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62004, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62005 = torch.aten.mul.Tensor %61957, %61997 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62005, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62006 = torch.aten.mul.Tensor %61958, %61998 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62006, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62007 = torch.aten.mul.Tensor %61959, %61999 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62007, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62008 = torch.aten.mul.Tensor %61960, %62000 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62008, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62009 = torch.aten.mul.Tensor %61961, %62001 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62009, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62010 = torch.aten.mul.Tensor %61962, %62002 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62010, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62011 = torch.aten.mul.Tensor %61963, %62003 : !torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62011, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62012 = torch.aten.mul.Tensor %2312, %62004 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62012, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62013 = torch.aten.mul.Tensor %2313, %62005 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62013, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62014 = torch.aten.mul.Tensor %2314, %62006 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62014, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62015 = torch.aten.mul.Tensor %2315, %62007 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62015, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62016 = torch.aten.mul.Tensor %2316, %62008 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62016, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62017 = torch.aten.mul.Tensor %2317, %62009 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62017, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62018 = torch.aten.mul.Tensor %2318, %62010 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62018, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %62019 = torch.aten.mul.Tensor %2319, %62011 : !torch.vtensor<[4096],f32>, !torch.vtensor<[4,?,4096],f32> -> !torch.vtensor<[4,?,4096],f32>
    torch.bind_symbolic_shape %62019, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f32>
    %int5_59364 = torch.constant.int 5
    %62020 = torch.prims.convert_element_type %62012, %int5_59364 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62020, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59365 = torch.constant.int 5
    %62021 = torch.prims.convert_element_type %62013, %int5_59365 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62021, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59366 = torch.constant.int 5
    %62022 = torch.prims.convert_element_type %62014, %int5_59366 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62022, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59367 = torch.constant.int 5
    %62023 = torch.prims.convert_element_type %62015, %int5_59367 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62023, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59368 = torch.constant.int 5
    %62024 = torch.prims.convert_element_type %62016, %int5_59368 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62024, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59369 = torch.constant.int 5
    %62025 = torch.prims.convert_element_type %62017, %int5_59369 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62025, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59370 = torch.constant.int 5
    %62026 = torch.prims.convert_element_type %62018, %int5_59370 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62026, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int5_59371 = torch.constant.int 5
    %62027 = torch.prims.convert_element_type %62019, %int5_59371 : !torch.vtensor<[4,?,4096],f32>, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62027, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59372 = torch.constant.int 1
    %int0_59373 = torch.constant.int 0
    %62028 = torch.prim.ListConstruct %int1_59372, %int0_59373 : (!torch.int, !torch.int) -> !torch.list<int>
    %62029 = torch.aten.permute %2320, %62028 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59374 = torch.constant.int 1
    %int0_59375 = torch.constant.int 0
    %62030 = torch.prim.ListConstruct %int1_59374, %int0_59375 : (!torch.int, !torch.int) -> !torch.list<int>
    %62031 = torch.aten.permute %2321, %62030 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59376 = torch.constant.int 1
    %int0_59377 = torch.constant.int 0
    %62032 = torch.prim.ListConstruct %int1_59376, %int0_59377 : (!torch.int, !torch.int) -> !torch.list<int>
    %62033 = torch.aten.permute %2322, %62032 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59378 = torch.constant.int 1
    %int0_59379 = torch.constant.int 0
    %62034 = torch.prim.ListConstruct %int1_59378, %int0_59379 : (!torch.int, !torch.int) -> !torch.list<int>
    %62035 = torch.aten.permute %2323, %62034 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59380 = torch.constant.int 1
    %int0_59381 = torch.constant.int 0
    %62036 = torch.prim.ListConstruct %int1_59380, %int0_59381 : (!torch.int, !torch.int) -> !torch.list<int>
    %62037 = torch.aten.permute %2324, %62036 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59382 = torch.constant.int 1
    %int0_59383 = torch.constant.int 0
    %62038 = torch.prim.ListConstruct %int1_59382, %int0_59383 : (!torch.int, !torch.int) -> !torch.list<int>
    %62039 = torch.aten.permute %2325, %62038 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59384 = torch.constant.int 1
    %int0_59385 = torch.constant.int 0
    %62040 = torch.prim.ListConstruct %int1_59384, %int0_59385 : (!torch.int, !torch.int) -> !torch.list<int>
    %62041 = torch.aten.permute %2326, %62040 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int1_59386 = torch.constant.int 1
    %int0_59387 = torch.constant.int 0
    %62042 = torch.prim.ListConstruct %int1_59386, %int0_59387 : (!torch.int, !torch.int) -> !torch.list<int>
    %62043 = torch.aten.permute %2327, %62042 : !torch.vtensor<[128256,512],f16>, !torch.list<int> -> !torch.vtensor<[512,128256],f16>
    %int0_59388 = torch.constant.int 0
    %int0_59389 = torch.constant.int 0
    %int9223372036854775807_59390 = torch.constant.int 9223372036854775807
    %int1_59391 = torch.constant.int 1
    %62044 = torch.aten.slice.Tensor %62020, %int0_59388, %int0_59389, %int9223372036854775807_59390, %int1_59391 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62044, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59392 = torch.constant.int 1
    %int0_59393 = torch.constant.int 0
    %int9223372036854775807_59394 = torch.constant.int 9223372036854775807
    %int1_59395 = torch.constant.int 1
    %62045 = torch.aten.slice.Tensor %62044, %int1_59392, %int0_59393, %int9223372036854775807_59394, %int1_59395 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62045, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59396 = torch.constant.int 2
    %int0_59397 = torch.constant.int 0
    %int512_59398 = torch.constant.int 512
    %int1_59399 = torch.constant.int 1
    %62046 = torch.aten.slice.Tensor %62045, %int2_59396, %int0_59397, %int512_59398, %int1_59399 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62046, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59400 = torch.constant.int 0
    %int0_59401 = torch.constant.int 0
    %int9223372036854775807_59402 = torch.constant.int 9223372036854775807
    %int1_59403 = torch.constant.int 1
    %62047 = torch.aten.slice.Tensor %62021, %int0_59400, %int0_59401, %int9223372036854775807_59402, %int1_59403 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62047, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59404 = torch.constant.int 1
    %int0_59405 = torch.constant.int 0
    %int9223372036854775807_59406 = torch.constant.int 9223372036854775807
    %int1_59407 = torch.constant.int 1
    %62048 = torch.aten.slice.Tensor %62047, %int1_59404, %int0_59405, %int9223372036854775807_59406, %int1_59407 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62048, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59408 = torch.constant.int 2
    %int512_59409 = torch.constant.int 512
    %int1024 = torch.constant.int 1024
    %int1_59410 = torch.constant.int 1
    %62049 = torch.aten.slice.Tensor %62048, %int2_59408, %int512_59409, %int1024, %int1_59410 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62049, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59411 = torch.constant.int 0
    %int0_59412 = torch.constant.int 0
    %int9223372036854775807_59413 = torch.constant.int 9223372036854775807
    %int1_59414 = torch.constant.int 1
    %62050 = torch.aten.slice.Tensor %62022, %int0_59411, %int0_59412, %int9223372036854775807_59413, %int1_59414 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62050, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59415 = torch.constant.int 1
    %int0_59416 = torch.constant.int 0
    %int9223372036854775807_59417 = torch.constant.int 9223372036854775807
    %int1_59418 = torch.constant.int 1
    %62051 = torch.aten.slice.Tensor %62050, %int1_59415, %int0_59416, %int9223372036854775807_59417, %int1_59418 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62051, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59419 = torch.constant.int 2
    %int1024_59420 = torch.constant.int 1024
    %int1536 = torch.constant.int 1536
    %int1_59421 = torch.constant.int 1
    %62052 = torch.aten.slice.Tensor %62051, %int2_59419, %int1024_59420, %int1536, %int1_59421 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62052, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59422 = torch.constant.int 0
    %int0_59423 = torch.constant.int 0
    %int9223372036854775807_59424 = torch.constant.int 9223372036854775807
    %int1_59425 = torch.constant.int 1
    %62053 = torch.aten.slice.Tensor %62023, %int0_59422, %int0_59423, %int9223372036854775807_59424, %int1_59425 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62053, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59426 = torch.constant.int 1
    %int0_59427 = torch.constant.int 0
    %int9223372036854775807_59428 = torch.constant.int 9223372036854775807
    %int1_59429 = torch.constant.int 1
    %62054 = torch.aten.slice.Tensor %62053, %int1_59426, %int0_59427, %int9223372036854775807_59428, %int1_59429 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62054, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59430 = torch.constant.int 2
    %int1536_59431 = torch.constant.int 1536
    %int2048 = torch.constant.int 2048
    %int1_59432 = torch.constant.int 1
    %62055 = torch.aten.slice.Tensor %62054, %int2_59430, %int1536_59431, %int2048, %int1_59432 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62055, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59433 = torch.constant.int 0
    %int0_59434 = torch.constant.int 0
    %int9223372036854775807_59435 = torch.constant.int 9223372036854775807
    %int1_59436 = torch.constant.int 1
    %62056 = torch.aten.slice.Tensor %62024, %int0_59433, %int0_59434, %int9223372036854775807_59435, %int1_59436 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62056, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59437 = torch.constant.int 1
    %int0_59438 = torch.constant.int 0
    %int9223372036854775807_59439 = torch.constant.int 9223372036854775807
    %int1_59440 = torch.constant.int 1
    %62057 = torch.aten.slice.Tensor %62056, %int1_59437, %int0_59438, %int9223372036854775807_59439, %int1_59440 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62057, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59441 = torch.constant.int 2
    %int2048_59442 = torch.constant.int 2048
    %int2560 = torch.constant.int 2560
    %int1_59443 = torch.constant.int 1
    %62058 = torch.aten.slice.Tensor %62057, %int2_59441, %int2048_59442, %int2560, %int1_59443 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62058, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59444 = torch.constant.int 0
    %int0_59445 = torch.constant.int 0
    %int9223372036854775807_59446 = torch.constant.int 9223372036854775807
    %int1_59447 = torch.constant.int 1
    %62059 = torch.aten.slice.Tensor %62025, %int0_59444, %int0_59445, %int9223372036854775807_59446, %int1_59447 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62059, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59448 = torch.constant.int 1
    %int0_59449 = torch.constant.int 0
    %int9223372036854775807_59450 = torch.constant.int 9223372036854775807
    %int1_59451 = torch.constant.int 1
    %62060 = torch.aten.slice.Tensor %62059, %int1_59448, %int0_59449, %int9223372036854775807_59450, %int1_59451 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62060, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59452 = torch.constant.int 2
    %int2560_59453 = torch.constant.int 2560
    %int3072 = torch.constant.int 3072
    %int1_59454 = torch.constant.int 1
    %62061 = torch.aten.slice.Tensor %62060, %int2_59452, %int2560_59453, %int3072, %int1_59454 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62061, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59455 = torch.constant.int 0
    %int0_59456 = torch.constant.int 0
    %int9223372036854775807_59457 = torch.constant.int 9223372036854775807
    %int1_59458 = torch.constant.int 1
    %62062 = torch.aten.slice.Tensor %62026, %int0_59455, %int0_59456, %int9223372036854775807_59457, %int1_59458 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62062, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59459 = torch.constant.int 1
    %int0_59460 = torch.constant.int 0
    %int9223372036854775807_59461 = torch.constant.int 9223372036854775807
    %int1_59462 = torch.constant.int 1
    %62063 = torch.aten.slice.Tensor %62062, %int1_59459, %int0_59460, %int9223372036854775807_59461, %int1_59462 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62063, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59463 = torch.constant.int 2
    %int3072_59464 = torch.constant.int 3072
    %int3584 = torch.constant.int 3584
    %int1_59465 = torch.constant.int 1
    %62064 = torch.aten.slice.Tensor %62063, %int2_59463, %int3072_59464, %int3584, %int1_59465 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62064, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int0_59466 = torch.constant.int 0
    %int0_59467 = torch.constant.int 0
    %int9223372036854775807_59468 = torch.constant.int 9223372036854775807
    %int1_59469 = torch.constant.int 1
    %62065 = torch.aten.slice.Tensor %62027, %int0_59466, %int0_59467, %int9223372036854775807_59468, %int1_59469 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62065, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int1_59470 = torch.constant.int 1
    %int0_59471 = torch.constant.int 0
    %int9223372036854775807_59472 = torch.constant.int 9223372036854775807
    %int1_59473 = torch.constant.int 1
    %62066 = torch.aten.slice.Tensor %62065, %int1_59470, %int0_59471, %int9223372036854775807_59472, %int1_59473 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,4096],f16>
    torch.bind_symbolic_shape %62066, [%2336], affine_map<()[s0] -> (4, s0 * 16, 4096)> : !torch.vtensor<[4,?,4096],f16>
    %int2_59474 = torch.constant.int 2
    %int3584_59475 = torch.constant.int 3584
    %int4096_59476 = torch.constant.int 4096
    %int1_59477 = torch.constant.int 1
    %62067 = torch.aten.slice.Tensor %62066, %int2_59474, %int3584_59475, %int4096_59476, %int1_59477 : !torch.vtensor<[4,?,4096],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[4,?,512],f16>
    torch.bind_symbolic_shape %62067, [%2336], affine_map<()[s0] -> (4, s0 * 16, 512)> : !torch.vtensor<[4,?,512],f16>
    %int1_59478 = torch.constant.int 1
    %62068 = torch.aten.size.int %62045, %int1_59478 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59479 = torch.constant.int 4
    %62069 = torch.aten.mul.int %int4_59479, %62068 : !torch.int, !torch.int -> !torch.int
    %int512_59480 = torch.constant.int 512
    %62070 = torch.prim.ListConstruct %62069, %int512_59480 : (!torch.int, !torch.int) -> !torch.list<int>
    %62071 = torch.aten.view %62046, %62070 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62071, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62072 = torch.aten.mm %62071, %62029 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62072, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59481 = torch.constant.int 4
    %int128256 = torch.constant.int 128256
    %62073 = torch.prim.ListConstruct %int4_59481, %62068, %int128256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62074 = torch.aten.view %62072, %62073 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62074, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59482 = torch.constant.int 1
    %62075 = torch.aten.size.int %62048, %int1_59482 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59483 = torch.constant.int 4
    %62076 = torch.aten.mul.int %int4_59483, %62075 : !torch.int, !torch.int -> !torch.int
    %int512_59484 = torch.constant.int 512
    %62077 = torch.prim.ListConstruct %62076, %int512_59484 : (!torch.int, !torch.int) -> !torch.list<int>
    %62078 = torch.aten.view %62049, %62077 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62078, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62079 = torch.aten.mm %62078, %62031 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62079, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59485 = torch.constant.int 4
    %int128256_59486 = torch.constant.int 128256
    %62080 = torch.prim.ListConstruct %int4_59485, %62075, %int128256_59486 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62081 = torch.aten.view %62079, %62080 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62081, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59487 = torch.constant.int 1
    %62082 = torch.aten.size.int %62051, %int1_59487 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59488 = torch.constant.int 4
    %62083 = torch.aten.mul.int %int4_59488, %62082 : !torch.int, !torch.int -> !torch.int
    %int512_59489 = torch.constant.int 512
    %62084 = torch.prim.ListConstruct %62083, %int512_59489 : (!torch.int, !torch.int) -> !torch.list<int>
    %62085 = torch.aten.view %62052, %62084 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62085, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62086 = torch.aten.mm %62085, %62033 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62086, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59490 = torch.constant.int 4
    %int128256_59491 = torch.constant.int 128256
    %62087 = torch.prim.ListConstruct %int4_59490, %62082, %int128256_59491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62088 = torch.aten.view %62086, %62087 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62088, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59492 = torch.constant.int 1
    %62089 = torch.aten.size.int %62054, %int1_59492 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59493 = torch.constant.int 4
    %62090 = torch.aten.mul.int %int4_59493, %62089 : !torch.int, !torch.int -> !torch.int
    %int512_59494 = torch.constant.int 512
    %62091 = torch.prim.ListConstruct %62090, %int512_59494 : (!torch.int, !torch.int) -> !torch.list<int>
    %62092 = torch.aten.view %62055, %62091 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62092, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62093 = torch.aten.mm %62092, %62035 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62093, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59495 = torch.constant.int 4
    %int128256_59496 = torch.constant.int 128256
    %62094 = torch.prim.ListConstruct %int4_59495, %62089, %int128256_59496 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62095 = torch.aten.view %62093, %62094 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62095, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59497 = torch.constant.int 1
    %62096 = torch.aten.size.int %62057, %int1_59497 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59498 = torch.constant.int 4
    %62097 = torch.aten.mul.int %int4_59498, %62096 : !torch.int, !torch.int -> !torch.int
    %int512_59499 = torch.constant.int 512
    %62098 = torch.prim.ListConstruct %62097, %int512_59499 : (!torch.int, !torch.int) -> !torch.list<int>
    %62099 = torch.aten.view %62058, %62098 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62099, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62100 = torch.aten.mm %62099, %62037 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62100, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59500 = torch.constant.int 4
    %int128256_59501 = torch.constant.int 128256
    %62101 = torch.prim.ListConstruct %int4_59500, %62096, %int128256_59501 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62102 = torch.aten.view %62100, %62101 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62102, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59502 = torch.constant.int 1
    %62103 = torch.aten.size.int %62060, %int1_59502 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59503 = torch.constant.int 4
    %62104 = torch.aten.mul.int %int4_59503, %62103 : !torch.int, !torch.int -> !torch.int
    %int512_59504 = torch.constant.int 512
    %62105 = torch.prim.ListConstruct %62104, %int512_59504 : (!torch.int, !torch.int) -> !torch.list<int>
    %62106 = torch.aten.view %62061, %62105 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62106, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62107 = torch.aten.mm %62106, %62039 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62107, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59505 = torch.constant.int 4
    %int128256_59506 = torch.constant.int 128256
    %62108 = torch.prim.ListConstruct %int4_59505, %62103, %int128256_59506 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62109 = torch.aten.view %62107, %62108 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62109, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59507 = torch.constant.int 1
    %62110 = torch.aten.size.int %62063, %int1_59507 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59508 = torch.constant.int 4
    %62111 = torch.aten.mul.int %int4_59508, %62110 : !torch.int, !torch.int -> !torch.int
    %int512_59509 = torch.constant.int 512
    %62112 = torch.prim.ListConstruct %62111, %int512_59509 : (!torch.int, !torch.int) -> !torch.list<int>
    %62113 = torch.aten.view %62064, %62112 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62113, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62114 = torch.aten.mm %62113, %62041 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62114, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59510 = torch.constant.int 4
    %int128256_59511 = torch.constant.int 128256
    %62115 = torch.prim.ListConstruct %int4_59510, %62110, %int128256_59511 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62116 = torch.aten.view %62114, %62115 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62116, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59512 = torch.constant.int 1
    %62117 = torch.aten.size.int %62066, %int1_59512 : !torch.vtensor<[4,?,4096],f16>, !torch.int -> !torch.int
    %int4_59513 = torch.constant.int 4
    %62118 = torch.aten.mul.int %int4_59513, %62117 : !torch.int, !torch.int -> !torch.int
    %int512_59514 = torch.constant.int 512
    %62119 = torch.prim.ListConstruct %62118, %int512_59514 : (!torch.int, !torch.int) -> !torch.list<int>
    %62120 = torch.aten.view %62067, %62119 : !torch.vtensor<[4,?,512],f16>, !torch.list<int> -> !torch.vtensor<[?,512],f16>
    torch.bind_symbolic_shape %62120, [%2336], affine_map<()[s0] -> (s0 * 64, 512)> : !torch.vtensor<[?,512],f16>
    %62121 = torch.aten.mm %62120, %62043 : !torch.vtensor<[?,512],f16>, !torch.vtensor<[512,128256],f16> -> !torch.vtensor<[?,128256],f16>
    torch.bind_symbolic_shape %62121, [%2336], affine_map<()[s0] -> (s0 * 64, 128256)> : !torch.vtensor<[?,128256],f16>
    %int4_59515 = torch.constant.int 4
    %int128256_59516 = torch.constant.int 128256
    %62122 = torch.prim.ListConstruct %int4_59515, %62117, %int128256_59516 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
    %62123 = torch.aten.view %62121, %62122 : !torch.vtensor<[?,128256],f16>, !torch.list<int> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62123, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62124 = torch_c.to_builtin_tensor %62081 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59517 = arith.constant 1 : index
    %dim_59518 = tensor.dim %62124, %c1_59517 : tensor<4x?x128256xf16>
    %62125 = flow.tensor.transfer %62124 : tensor<4x?x128256xf16>{%dim_59518} to #hal.device.promise<@__device_0>
    %62126 = torch_c.from_builtin_tensor %62125 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62126, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62127 = torch_c.to_builtin_tensor %62088 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59519 = arith.constant 1 : index
    %dim_59520 = tensor.dim %62127, %c1_59519 : tensor<4x?x128256xf16>
    %62128 = flow.tensor.transfer %62127 : tensor<4x?x128256xf16>{%dim_59520} to #hal.device.promise<@__device_0>
    %62129 = torch_c.from_builtin_tensor %62128 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62129, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62130 = torch_c.to_builtin_tensor %62095 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59521 = arith.constant 1 : index
    %dim_59522 = tensor.dim %62130, %c1_59521 : tensor<4x?x128256xf16>
    %62131 = flow.tensor.transfer %62130 : tensor<4x?x128256xf16>{%dim_59522} to #hal.device.promise<@__device_0>
    %62132 = torch_c.from_builtin_tensor %62131 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62132, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62133 = torch_c.to_builtin_tensor %62102 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59523 = arith.constant 1 : index
    %dim_59524 = tensor.dim %62133, %c1_59523 : tensor<4x?x128256xf16>
    %62134 = flow.tensor.transfer %62133 : tensor<4x?x128256xf16>{%dim_59524} to #hal.device.promise<@__device_0>
    %62135 = torch_c.from_builtin_tensor %62134 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62135, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62136 = torch_c.to_builtin_tensor %62109 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59525 = arith.constant 1 : index
    %dim_59526 = tensor.dim %62136, %c1_59525 : tensor<4x?x128256xf16>
    %62137 = flow.tensor.transfer %62136 : tensor<4x?x128256xf16>{%dim_59526} to #hal.device.promise<@__device_0>
    %62138 = torch_c.from_builtin_tensor %62137 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62138, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62139 = torch_c.to_builtin_tensor %62116 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59527 = arith.constant 1 : index
    %dim_59528 = tensor.dim %62139, %c1_59527 : tensor<4x?x128256xf16>
    %62140 = flow.tensor.transfer %62139 : tensor<4x?x128256xf16>{%dim_59528} to #hal.device.promise<@__device_0>
    %62141 = torch_c.from_builtin_tensor %62140 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62141, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %62142 = torch_c.to_builtin_tensor %62123 : !torch.vtensor<[4,?,128256],f16> -> tensor<4x?x128256xf16>
    %c1_59529 = arith.constant 1 : index
    %dim_59530 = tensor.dim %62142, %c1_59529 : tensor<4x?x128256xf16>
    %62143 = flow.tensor.transfer %62142 : tensor<4x?x128256xf16>{%dim_59530} to #hal.device.promise<@__device_0>
    %62144 = torch_c.from_builtin_tensor %62143 : tensor<4x?x128256xf16> -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62144, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59531 = torch.constant.int 1
    %62145 = torch.aten.add.Tensor %62074, %62126, %int1_59531 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62145, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59532 = torch.constant.int 1
    %62146 = torch.aten.add.Tensor %62145, %62129, %int1_59532 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62146, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59533 = torch.constant.int 1
    %62147 = torch.aten.add.Tensor %62146, %62132, %int1_59533 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62147, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59534 = torch.constant.int 1
    %62148 = torch.aten.add.Tensor %62147, %62135, %int1_59534 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62148, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59535 = torch.constant.int 1
    %62149 = torch.aten.add.Tensor %62148, %62138, %int1_59535 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62149, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59536 = torch.constant.int 1
    %62150 = torch.aten.add.Tensor %62149, %62141, %int1_59536 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62150, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    %int1_59537 = torch.constant.int 1
    %62151 = torch.aten.add.Tensor %62150, %62144, %int1_59537 : !torch.vtensor<[4,?,128256],f16>, !torch.vtensor<[4,?,128256],f16>, !torch.int -> !torch.vtensor<[4,?,128256],f16>
    torch.bind_symbolic_shape %62151, [%2336], affine_map<()[s0] -> (4, s0 * 16, 128256)> : !torch.vtensor<[4,?,128256],f16>
    return %62151 : !torch.vtensor<[4,?,128256],f16>
  }
}